├── .gitmodules
├── LICENSE
├── README.md
├── matlab
    ├── ComputeVarianceScatterPlot.m
    ├── PlotResultsEachMovieMarlinCompareKernels.m
    ├── PlotResultsEachMovieMarlinWeak.m
    ├── PlotResultsMovielens.m
    ├── PlotResultsMovielensMarlinCompareKernels.m
    ├── PlotResultsMovielensMarlinWeak.m
    ├── PlotResultsMovielensWeak.m
    ├── RecomputeMovieLens10MWeakPartialScript1.m
    ├── RecomputeMovieLens10MWeakScript1.m
    ├── RecomputeResults.m
    ├── changeNameMovielens.m
    ├── collabComponentPosteriorMeanVar.m
    ├── collabComputeM.m
    ├── collabComputeS.m
    ├── collabCreate.m
    ├── collabCreateTensor.m
    ├── collabDisplay.m
    ├── collabEstep.m
    ├── collabExpandParam.m
    ├── collabExtractParam.m
    ├── collabInitS.m
    ├── collabLoadData.m
    ├── collabLogLikeGradients.m
    ├── collabLogLikelihood.m
    ├── collabOptimise.m
    ├── collabOptimiseEachMovie.m
    ├── collabOptimiseOptions.m
    ├── collabOptions.m
    ├── collabOptionsTensor.m
    ├── collabPosteriorMeanVar.m
    ├── collabPosteriorMeanVarCell.m
    ├── collabTest.m
    ├── collabToolboxes.m
    ├── collabUpdateKernels.m
    ├── computeMeanVarianceWeak.m
    ├── computePredictionsErrorWeak.m
    ├── computeTestErrorEnsemblesWeak.m
    ├── computeTestErrorStrong.m
    ├── computeTestErrorWeak.m
    ├── computeTestErrorWeakCell.m
    ├── demAistats1.m
    ├── demEachMovie1.m
    ├── demEachMovie7.m
    ├── demEachMovieMarlinStrongLinearScript1.m
    ├── demEachMovieMarlinStrongScript1.m
    ├── demEachMovieMarlinWeakEnsemScript1.m
    ├── demEachMovieMarlinWeakLinear.m
    ├── demEachMovieMarlinWeakScript1.m
    ├── demEachMovieMarlinWeakTest1.m
    ├── demEachMovieMarlinWeakTestCustom1.m
    ├── demMixtoydata1.m
    ├── demMovieLens10MLetterWeakScript1.m
    ├── demMovieLens10MWeakScript1.m
    ├── demMovieLensMarlinStrongAdditionalScript1.m
    ├── demMovieLensMarlinStrongLinearRBFScript1.m
    ├── demMovieLensMarlinStrongLinearScript1.m
    ├── demMovieLensMarlinStrongScript1.m
    ├── demMovieLensMarlinWeakAdditionalScript1.m
    ├── demMovieLensMarlinWeakEnsemScript1.m
    ├── demMovieLensMarlinWeakLinearRBFScript1.m
    ├── demMovieLensMarlinWeakLinearScript1.m
    ├── demMovieLensMarlinWeakMLPScript1.m
    ├── demMovieLensMarlinWeakScript1.m
    ├── demMovielens1.m
    ├── demMovielens2.m
    ├── demMovielens3.m
    ├── demMovielens3Script.m
    ├── demMovielens4.m
    ├── demMovielens5.m
    ├── demMovielens6.m
    ├── demMovielens6Script.m
    ├── demMovielens7.m
    ├── demMovielens7Script.m
    ├── demMovielensOrdered1.m
    ├── demMovielensSmall1.m
    ├── demMovielensSmallHetero1.m
    ├── demMovielensSmallMix1.m
    ├── demMovielensSmallMixFromSingleScript1.m
    ├── demNetflix1.m
    ├── demNetflix2.m
    ├── demNetflix3.m
    ├── demNetflix4.m
    ├── demNetflix5.m
    ├── em-weak.m
    ├── generateCteNMAE.m
    ├── kernAdditionalKernCompute.m
    ├── loadAverageVariance.m
    ├── loadNetflix.m
    ├── loadNetflix2.m
    ├── lvmLoadData.m
    ├── ml-weak.m
    ├── netflixTest.m
    ├── rbfadditionalKernCompute.m
    ├── rbfadditionalKernDiagCompute.m
    ├── rbfadditionalKernDiagGradX.m
    ├── rbfadditionalKernExpandParam.m
    ├── rbfadditionalKernExtractParam.m
    ├── rbfadditionalKernGradX.m
    ├── rbfadditionalKernGradient.m
    ├── rbfadditionalKernParamInit.m
    ├── readEachMovieMarlinStrong.m
    ├── readEachMovieMarlinWeak.m
    ├── readEachMovieWeak.m
    ├── readMovieLens.m
    ├── readMovieLens10M.m
    ├── readMovieLens10MCell.m
    ├── readMovieLens10MCellLetter.m
    ├── readMovieLensMarlinStrong.m
    ├── readMovieLensMarlinWeak.m
    ├── readMovieLensStrong.m
    ├── readMovieLensWeak.m
    ├── restartNetflix1.m
    ├── restartNetflix5.m
    └── splitProbeData.m
└── python
    ├── collab.py
    ├── demNetflix1.py
    ├── demNetflix10.py
    ├── demNetflix2.py
    ├── demNetflix3.py
    ├── demNetflix4.py
    ├── demNetflix5.py
    ├── demNetflix6.py
    ├── demNetflix7.py
    ├── demNetflix8.py
    ├── demNetflix9.py
    ├── restartNetflix10.py
    ├── restartNetflix2.py
    ├── restartNetflix3.py
    ├── restartNetflix4.py
    ├── restartNetflix5.py
    ├── restartNetflix6.py
    ├── restartNetflix7.py
    ├── restartNetflix8.py
    └── restartNetflix9.py


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "python/ndlml"]
2 | 	path = python/ndlml
3 | 	url = https://github.com/SheffieldML/ndlml
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Sheffield Machine Learning Software (ML@SITraN)
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of collab nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # collab
 2 | Collaborative filtering with the GP-LVM
 3 | 
 4 | This repository contains the code for the paper:
 5 | 
 6 | ["Non-linear Matrix Factorization with Gaussian Processes"](http://www.machinelearning.org/archive/icml2009/papers/384.pdf) by Neil D. Lawrence and Raquel Urtasun. It was published at ICML 2009.
 7 | 
 8 | The main code used in the paper is in the matlab subdirectory.
 9 | 
10 | We also worked to do some experiments on netflix with a C++/Python variant of the code. These weren't done in time, but the code is included here although it is not well documented. The code makes use of the swig wrappers around [GPc](https://github.com/SheffieldML/GPc) for creating python objects. These are included as a submodule through [ndlml](https://github.com/SheffieldML/ndlml). 
11 | 
12 | 


--------------------------------------------------------------------------------
/matlab/PlotResultsEachMovieMarlinCompareKernels.m:
--------------------------------------------------------------------------------
  1 | function [] = PlotResultsEachMovieMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print)
  2 | %
  3 |   %function [] = PlotResultsEachMovieMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print)
  4 | 
  5 |   if strcmp(type,'weak')
  6 | 
  7 | % directories are order to be
  8 |   directories{1} = 'marlin_eachmovie'; 
  9 | directories{2} = 'marlin_eachmovie_linear'; kern_type{2} = 'linear';
 10 | %  directories{3} = 'marlin_eachmovie_metadata'; kern_type{3} = 'additional';
 11 | %  directories{4} = 'marlin_eachmovie_linear_RBF'; kern_type{4} = 'linear_RBF';
 12 | 
 13 | end
 14 | 
 15 | if strcmp(type,'strong')
 16 |   directories{1} = 'marlin_eachmovie_strong'; 
 17 | directories{2} = 'marlin_eachmovie_strong_linear'; kern_type{2} = 'linear';
 18 | %  directories{3} = 'marlin_eachmovie_strong_metadata'; kern_type{3} = 'additional';
 19 | %  directories{4} = 'marlin_eachmovie_strong_linear_RBF'; kern_type{4} = 'linear_RBF';
 20 | 
 21 | end
 22 | 
 23 | 
 24 | for i=1:length(directories)
 25 | 
 26 |   cd (['../',directories{i}]);
 27 | 
 28 | latentDim_v_p = latentDim_v;
 29 | if strcmp(kern_type{i},'additional')
 30 |   latentDim_v_p = latentDim_v_p+1;
 31 | end
 32 | 
 33 | 
 34 | if (length(kern_type{i})>0)
 35 | 
 36 |   [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i});
 37 | 
 38 | 
 39 |     else
 40 | 
 41 |       [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted);
 42 | end
 43 | 
 44 | %
 45 | 
 46 | mean_NMAE_round_error(:,i) = mean(NMAE_round_error_T{i}')';
 47 | std_NMAE_round_error(:,i) = std(NMAE_round_error_T{i}')';
 48 | 				mean_L2_error(:,i) = mean(L2_error_T{i}')';
 49 | std_L2_error(:,i) = std(L2_error_T{i}')';
 50 | 
 51 | end
 52 | 
 53 | if if_plot
 54 | 				figure(1);
 55 | 				clf;
 56 | 
 57 | font_size = 16;
 58 | set(gca,'FontSize',font_size);
 59 | set(get(gca,'Title'),'FontSize',font_size);
 60 | set(get(gca,'Xlabel'),'FontSize',font_size);
 61 | set(get(gca,'Ylabel'),'FontSize',font_size);
 62 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 63 | 				for i=1:length(directories)
 64 | 				hold on;
 65 | 				plot(latentDim_v,mean_NMAE_round_error(:,i),[getColor(i),'s'],'lineWidth',3,'markersize',12);
 66 | 			%errorbar(latentDim_v,mean_NMAE_round_error(:,i),std_NMAE_round_error(:,i),[getColor(i),'o'],'lineWidth',2,'markersize',12);
 67 | end
 68 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 69 | 
 70 | 				xlabel('latent dimensionality');
 71 | 				ylabel('NMAE error')
 72 | 				set(gca,'XTick',latentDim_v);
 73 | 
 74 | figure(2);
 75 | 				clf;
 76 | 
 77 | font_size = 16;
 78 | set(gca,'FontSize',font_size);
 79 | set(get(gca,'Title'),'FontSize',font_size);
 80 | set(get(gca,'Xlabel'),'FontSize',font_size);
 81 | set(get(gca,'Ylabel'),'FontSize',font_size);
 82 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 83 | 				for i=1:length(directories)
 84 | 				hold on;
 85 | 				plot(latentDim_v,mean_L2_error(:,i),[getColor(i),'s'],'lineWidth',3,'markersize',12);
 86 | end
 87 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 88 | 
 89 | 				xlabel('latent dimensionality');
 90 | 				ylabel('RMSE error')
 91 | 				set(gca,'XTick',latentDim_v);
 92 | 
 93 | end
 94 | 
 95 | 				nameFile = ['compare_kernels_',type,'_em'];
 96 | nameFileRMSE = ['compare_kernels_',type,'_em'];
 97 | 
 98 | if if_print 
 99 | 			figure(1);				
100 | saveas(gcf,[nameFile,'.fig']);
101 | 				nameFile = [nameFile,'.eps'];
102 | 				print('-depsc',nameFile); 
103 | figure(2);				
104 | saveas(gcf,[nameFileRMSE,'_RMSE.fig']);
105 | 				nameFileRMSE = [nameFileRMSE,'_RMSE.eps'];
106 | 				print('-depsc',nameFileRMSE); 
107 | 
108 | end
109 | 
110 | 				keyboard;
111 | 
112 | end
113 | 
114 | 
115 | function [value] = getColor(index)
116 | switch index
117 |   case 1
118 | value = 'r-';
119 | case 2
120 | value = 'b-';
121 | case 3
122 | value = 'g--'
123 |   case 4
124 | value = 'm--';
125 | case 5
126 | value = 'k-'
127 | end
128 | end
129 | 


--------------------------------------------------------------------------------
/matlab/PlotResultsEachMovieMarlinWeak.m:
--------------------------------------------------------------------------------
 1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kern_type)
 2 | %
 3 |   % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsEachMovieMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, kern_type)
 4 | %
 5 | % substract_mean --> bool if substract the mean
 6 | % partNo_v --> vector with the partitions to compute results
 7 | % latentDim_v --> vector with the latent dimensionalities to compute results
 8 |   % iters --> number of iterations
 9 | % type --> strong or weak
10 |   % inverted --> if it is inverted and we learn latent for subjects, not movies
11 | % kern_type --> '' for RBF 'linear', 'MLP'
12 | 
13 | 
14 |   numDim = length(latentDim_v);
15 | numPartNo = length(partNo_v);
16 | 
17 | L2_error_T = -ones(numDim,numPartNo);
18 | NMAE_error_T = -ones(numDim,numPartNo);
19 | NMAE_round_error_T = -ones(numDim,numPartNo);
20 | 
21 | for i_latent=1:numDim
22 |     q = latentDim_v(i_latent);
23 |     for i_part=1:numPartNo
24 |         partNo = partNo_v(i_part);
25 | 
26 | dataSetName = ['eachmovie_marlin_',type,'_',num2str(partNo)];
27 |         
28 | 
29 | 
30 |         % Save the results.
31 |         capName = dataSetName;
32 |         capName(1) = upper(capName(1));
33 | if (nargin>6)    
34 |   loadResults = [capName,'_',kern_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
35 |  else
36 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
37 |   end
38 |       disp(['Loading ... ',loadResults]);
39 | try
40 |         load(loadResults);
41 | catch
42 | continue;
43 | end
44 | L2_error_T(i_latent,i_part) = L2_error;
45 | NMAE_error_T(i_latent,i_part) = NMAE_error * 1.6/1.944;
46 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error*1.6/1.944;
47 |     end
48 | end
49 | 
50 | % plot the results
51 | 
52 | mean_L2 = mean(L2_error_T,2);
53 | mean_NMAE = mean(NMAE_error_T,2);
54 | mean_NMAE_round = mean(NMAE_round_error_T,2);
55 | %keyboard;
56 |   for j=1:size(mean_L2,2)
57 |     std_L2(j) = std(permute(L2_error_T(j,:),[2 1]));
58 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1]));
59 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1]));
60 | end
61 | 
62 | %figure(1)
63 | %  clf;
64 | %hold on;
65 | %for i=1:length(latentDim_v)
66 |   % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']);
67 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']);
68 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))];
69 | %end
70 | %xlabel('percentage database');
71 | %ylabel('NMAE round error');
72 | %legend(toLeg);
73 | 
74 | end
75 | 
76 | 
77 | function [value] = getColor(index)
78 | switch index
79 |   case 1
80 | value = 'r-';
81 | case 2
82 | value = 'b-';
83 | case 3
84 | value = 'g--'
85 |   case 4
86 | value = 'm--';
87 | case 5
88 | value = 'k-'
89 | end
90 | end
91 | 


--------------------------------------------------------------------------------
/matlab/PlotResultsMovielensMarlinCompareKernels.m:
--------------------------------------------------------------------------------
  1 | function [] = PlotResultsMovielensMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print, new_iters)
  2 | %
  3 |   %function [] = PlotResultsMovielensMarlinCompareKernels(substract_mean, partNo_v, latentDim_v, iters, type, inverted, directories, if_plot, if_print, new_iters)
  4 | 
  5 |   if strcmp(type,'weak')
  6 | 
  7 | % directories are order to be
  8 |   directories{1} = 'marlin_movielens'; 
  9 | directories{2} = 'marlin_movielens_linear'; kern_type{2} = 'linear';
 10 |   directories{3} = 'marlin_movielens_metadata'; kern_type{3} = 'additional';
 11 |   %directories{4} = 'marlin_movielens_linear_RBF'; kern_type{4} = 'linear_RBF';
 12 | 
 13 | end
 14 | 
 15 | if strcmp(type,'strong')
 16 |   directories{1} = 'marlin_movielens_strong'; 
 17 | directories{2} = 'marlin_movielens_linear_strong'; kern_type{2} = 'linear';
 18 |   directories{3} = 'marlin_movielens_strong_metadata'; kern_type{3} = 'additional';
 19 |  %directories{4} = 'marlin_movielens_strong_linear_RBF'; kern_type{4} = 'linear_RBF';
 20 | 
 21 | end
 22 | 
 23 | 
 24 | for i=1:length(directories)
 25 | 
 26 |   cd (['../',directories{i}]);
 27 | 
 28 | latentDim_v_p = latentDim_v;
 29 | if strcmp(kern_type{i},'additional')
 30 |   latentDim_v_p = latentDim_v_p+1;
 31 | end
 32 | 
 33 | 
 34 | if (length(kern_type{i})>0)
 35 | if (strcmp(kern_type{i},'linear_RBF'))
 36 |   [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i},new_iters);
 37 | 
 38 | 
 39 | else
 40 | 
 41 | 
 42 |   [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted, kern_type{i});
 43 | 
 44 | end
 45 |  else
 46 | 
 47 |    
 48 |       [L2_error_T{i},NMAE_error_T{i},NMAE_round_error_T{i}] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v_p, iters, type, inverted);
 49 | end
 50 | 
 51 | 
 52 | %
 53 | 
 54 | mean_NMAE_round_error(:,i) = mean(NMAE_round_error_T{i}')';
 55 | std_NMAE_round_error(:,i) = std(NMAE_round_error_T{i}')';
 56 | mean_L2_error(:,i) = mean(L2_error_T{i}')';
 57 | std_L2_error(:,i) = std(L2_error_T{i}')';
 58 | 
 59 | end
 60 | 
 61 | if if_plot
 62 | 				figure(1);
 63 | 				clf;
 64 | 
 65 | font_size = 16;
 66 | set(gca,'FontSize',font_size);
 67 | set(get(gca,'Title'),'FontSize',font_size);
 68 | set(get(gca,'Xlabel'),'FontSize',font_size);
 69 | set(get(gca,'Ylabel'),'FontSize',font_size);
 70 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 71 | 				for i=1:length(directories)
 72 | 				hold on;
 73 | 				plot(latentDim_v,mean_NMAE_round_error(:,i),[getColor(i),'o'],'lineWidth',3,'markersize',12);
 74 | end
 75 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 76 | 
 77 | 				xlabel('latent dimensionality');
 78 | 				ylabel('NMAE error');
 79 | 
 80 | 				set(gca,'XTick',latentDim_v);
 81 | 
 82 | 				figure(2);
 83 | 				clf;
 84 | 
 85 | font_size = 16;
 86 | set(gca,'FontSize',font_size);
 87 | set(get(gca,'Title'),'FontSize',font_size);
 88 | set(get(gca,'Xlabel'),'FontSize',font_size);
 89 | set(get(gca,'Ylabel'),'FontSize',font_size);
 90 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 91 | 				for i=1:length(directories)
 92 | 				hold on;
 93 | 				plot(latentDim_v,mean_L2_error(:,i),[getColor(i),'o'],'lineWidth',3,'markersize',12);
 94 | end
 95 | 				legend([{'RBF'},{'linear'},{'metadata'},{'RBF+data'}]);
 96 | 
 97 | 				xlabel('latent dimensionality');
 98 | 				ylabel('RMSE error');
 99 | 
100 | 				set(gca,'XTick',latentDim_v);
101 | 
102 | 
103 | end
104 | 
105 | 				nameFile = ['compare_kernels_',type];
106 | 				nameFileRMSE = nameFile;
107 | 
108 | if if_print 
109 | 				figure(1)
110 | 				saveas(gcf,[nameFile,'.fig']);
111 | 				nameFile = [nameFile,'.eps'];
112 | 				print('-depsc',nameFile); 
113 | 				figure(2)
114 | 				saveas(gcf,[nameFileRMSE,'_RMSE.fig']);
115 | 				nameFileRMSE = [nameFileRMSE,'_RMSE.eps'];
116 | 				print('-depsc',nameFileRMSE); 
117 | 
118 | end
119 | 
120 | 				keyboard;
121 | 
122 | end
123 | 
124 | 
125 | function [value] = getColor(index)
126 | switch index
127 |   case 1
128 | value = 'r-';
129 | case 2
130 | value = 'b-.';
131 | case 3
132 | value = 'g--'
133 |   case 4
134 | value = 'm--';
135 | case 5
136 | value = 'k-'
137 | end
138 | end
139 | 


--------------------------------------------------------------------------------
/matlab/PlotResultsMovielensMarlinWeak.m:
--------------------------------------------------------------------------------
  1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kernel_type, new_iters)
  2 | %
  3 |   % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsMovielensMarlinWeak(substract_mean,partNo_v,latentDim_v, iters, type, inverted, kernel_type)
  4 | %
  5 | % substract_mean --> bool if substract the mean
  6 | % partNo_v --> vector with the partitions to compute results
  7 | % latentDim_v --> vector with the latent dimensionalities to compute results
  8 |   % iters --> number of iterations
  9 | % type --> strong or weak
 10 |   % inverted --> if it is inverted and we learn latent for subjects, not movies
 11 |   % kernel type --> '' for RBF 'linear', 'MLP'
 12 | 
 13 | 
 14 |   numDim = length(latentDim_v);
 15 | numPartNo = length(partNo_v);
 16 | 
 17 | L2_error_T = -ones(numDim,numPartNo);
 18 | NMAE_error_T = -ones(numDim,numPartNo);
 19 | NMAE_round_error_T = -ones(numDim,numPartNo);
 20 | 
 21 | for i_latent=1:numDim
 22 |     q = latentDim_v(i_latent);
 23 |     for i_part=1:numPartNo
 24 |         partNo = partNo_v(i_part);
 25 | 
 26 | dataSetName = ['movielens_marlin_',type,'_',num2str(partNo)];
 27 |         
 28 | 
 29 | 
 30 |         % Save the results.
 31 |         capName = dataSetName;
 32 |         capName(1) = upper(capName(1));
 33 | 
 34 | 
 35 |   if (nargin>6)
 36 | 
 37 |     if (nargin>7)
 38 |       loadResults = [capName,'_',kernel_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'_newiters_',num2str(new_iters),'.mat'];
 39 | 
 40 | else
 41 |             
 42 |     loadResults = [capName,'_',kernel_type,'_inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
 43 | end
 44 | else
 45 |         
 46 | loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
 47 | end
 48 |         disp(['Loading ... ',loadResults]);
 49 | try
 50 |         load(loadResults);
 51 | catch
 52 | continue;
 53 | end
 54 | L2_error_T(i_latent,i_part) = L2_error;
 55 | NMAE_error_T(i_latent,i_part) = NMAE_error;
 56 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error;
 57 |     end
 58 | end
 59 | 
 60 | % plot the results
 61 | 
 62 | mean_L2 = mean(L2_error_T,2);
 63 | mean_NMAE = mean(NMAE_error_T,2);
 64 | mean_NMAE_round = mean(NMAE_round_error_T,2);
 65 | %keyboard;
 66 |   for j=1:size(mean_L2,1)
 67 |     std_L2(j) = std(permute(L2_error_T(j,:),[2 1]));
 68 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1]));
 69 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1]));
 70 | end
 71 | 
 72 | %keyboard;
 73 | 
 74 | %figure(1)
 75 | %  clf;
 76 | %hold on;
 77 | %for i=1:length(latentDim_v)
 78 |   % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']);
 79 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']);
 80 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))];
 81 | %end
 82 | %xlabel('percentage database');
 83 | %ylabel('NMAE error');
 84 | %legend(toLeg);
 85 | 
 86 | end
 87 | 
 88 | 
 89 | function [value] = getColor(index)
 90 | switch index
 91 |   case 1
 92 | value = 'r-';
 93 | case 2
 94 | value = 'b-';
 95 | case 3
 96 | value = 'g--'
 97 |   case 4
 98 | value = 'm--';
 99 | case 5
100 | value = 'k-'
101 | end
102 | end
103 | 


--------------------------------------------------------------------------------
/matlab/PlotResultsMovielensWeak.m:
--------------------------------------------------------------------------------
 1 | function [L2_error_T,NMAE_error_T,NMAE_round_error_T] = PlotResultsMovielensWeak(substract_mean,partNo_v,latentDim_v, iters, type)
 2 | %
 3 |   % [L2_error,NMAE_error,NMAE_round_error] = PlotResultsMovielensWeak(substract_mean,partNo_v,latentDim_v, iters)
 4 | %
 5 | % substract_mean --> bool if substract the mean
 6 | % partNo_v --> vector with the partitions to compute results
 7 | % latentDim_v --> vector with the latent dimensionalities to compute results
 8 |   % iters --> number of iterations
 9 | % type --> strong or weak
10 | 
11 |   numDim = length(latentDim_v);
12 | numPartNo = length(partNo_v);
13 | 
14 | L2_error_T = -ones(numDim,numPartNo);
15 | NMAE_error_T = -ones(numDim,numPartNo);
16 | NMAE_round_error_T = -ones(numDim,numPartNo);
17 | 
18 | for i_latent=1:numDim
19 |     q = latentDim_v(i_latent);
20 |     for i_part=1:numPartNo
21 |         partNo = partNo_v(i_part);
22 | 
23 | dataSetName = ['movielens_',type,'_',num2str(partNo)];
24 |         
25 | 
26 | 
27 |         % Save the results.
28 |         capName = dataSetName;
29 |         capName(1) = upper(capName(1));
30 |         
31 | loadResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
32 |         disp(['Loading ... ',loadResults]);
33 | try
34 |         load(loadResults);
35 | catch
36 | continue;
37 | end
38 | L2_error_T(i_latent,i_part) = L2_error;
39 | NMAE_error_T(i_latent,i_part) = NMAE_error;
40 | NMAE_round_error_T(i_latent,i_part) = NMAE_round_error;
41 |     end
42 | end
43 | 
44 | % plot the results
45 | 
46 | mean_L2 = mean(L2_error_T,2);
47 | mean_NMAE = mean(NMAE_error_T,2);
48 | mean_NMAE_round = mean(NMAE_round_error_T,2);
49 | %keyboard;
50 |   for j=1:size(mean_L2,2)
51 |     std_L2(j) = std(permute(L2_error_T(j,:),[2 1]));
52 | std_NMAE(j) = std(permute(NMAE_error_T(j,:),[2 1]));
53 | std_NMAE_round(j) = std(permute(NMAE_round_error_T(j,:),[2 1]));
54 | end
55 | 
56 | %figure(1)
57 | %  clf;
58 | %hold on;
59 | %for i=1:length(latentDim_v)
60 |   % plot(perc_train_v/100,mean_NMAE_round(:,i),[getColor(i),'x']);
61 | %errorbar(perc_train_v/100,mean_NMAE_round(:,i),std_NMAE_round(:,i),[getColor(i),'x']);
62 | %toLeg{i} = ['Dimension ',num2str(latentDim_v(i))];
63 | %end
64 | %xlabel('percentage database');
65 | %ylabel('NMAE round error');
66 | %legend(toLeg);
67 | 
68 | end
69 | 
70 | 
71 | function [value] = getColor(index)
72 | switch index
73 |   case 1
74 | value = 'r-';
75 | case 2
76 | value = 'b-';
77 | case 3
78 | value = 'g--'
79 |   case 4
80 | value = 'm--';
81 | case 5
82 | value = 'k-'
83 | end
84 | end
85 | 


--------------------------------------------------------------------------------
/matlab/RecomputeMovieLens10MWeakPartialScript1.m:
--------------------------------------------------------------------------------
 1 | function [] = RecomputeMovieLens10MWeakPartialScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
 2 | % RECOMPUTEMOVIELENS10MWEAKPARTIALSCRIPT1 Recompute the test error for the 10M Movielens database
 3 | % where the weak movielens experiment
 4 | %
 5 |   % RecomputeMovieLens10MWeakPartialScript1(substract_mean, partNo_v,
 6 |   % latentDim_v,iters, inverted)
 7 | %
 8 | % substract_mean --> bool if substract the mean
 9 | % partNo_v --> vector with the partitions to compute results
10 | % latentDim_v --> vector with the latent dimensionalities to compute results
11 | % iters --> number of iterations
12 | % if inverted = true, then learn users as examples and not movies
13 | 
14 | randn('seed', 1e5);
15 | rand('seed', 1e5);
16 | 
17 | experimentNo = 3;
18 | 
19 | 
20 | partLetter_v = 'ab';
21 | 
22 | 
23 | for i_latent=1:length(latentDim_v)
24 |     q = latentDim_v(i_latent);
25 |     for i_part=1:length(partNo_v)
26 |         partNo = partNo_v(i_part);
27 | 
28 |       partLetter = partLetter_v(partNo_v(i_part));
29 |         
30 |         dataSetName = ['movielens_10M_',partLetter];
31 |         
32 |         disp(['Reading ... ',dataSetName]);
33 |         
34 |         [Y, void, Ytest] = collabLoadData(dataSetName);
35 | 
36 |         if (inverted)
37 |             Y = Y';
38 |             Ytest = Y';
39 |         end
40 |         
41 | 	numFilms = size(Y,1);
42 |         numUsers = size(Y,2);
43 |         meanFilms = zeros(numFilms,1);
44 |         stdFilms = ones(numFilms,1);
45 |         
46 | 
47 | % Save the results.
48 |         capName = dataSetName;
49 |         capName(1) = upper(capName(1));
50 |         
51 | 
52 | saveResults = ['dem',capName,num2str(experimentNo),'_Iters',num2str(iters),'.mat'];
53 |         
54 | disp(['Loading ... ',saveResults]);
55 | load(saveResults);
56 |         
57 | 	% compute the test error
58 | 	  disp('Computing test error');
59 | 
60 | 
61 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest)
62 | 
63 | 
64 |   
65 | 
66 |         % Save the results.
67 |         capName = dataSetName;
68 |         capName(1) = upper(capName(1));
69 | 
70 | saveResults = ['dem',capName,num2str(experimentNo),'_Iters',num2str(iters),'_error.mat'];
71 | 
72 |         %saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
73 |         disp(['Saving ... ',saveResults]);
74 |         save(saveResults, 'model', 'L2_error','NMAE_error','NMAE_round_error');
75 |     end
76 | end
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/matlab/RecomputeMovieLens10MWeakScript1.m:
--------------------------------------------------------------------------------
 1 | function [] = RecomputeMovieLens10MWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
 2 | % RECOMPUTEMOVIELENS10MWEAKSCRIPT1 Recompute the test error for the 10M Movielens database
 3 | % where the weak movielens experiment
 4 | %
 5 |   % RecomputeMovieLens10MWeakScript1(substract_mean, partNo_v,
 6 |   % latentDim_v,iters, inverted)
 7 | %
 8 | % substract_mean --> bool if substract the mean
 9 | % partNo_v --> vector with the partitions to compute results
10 | % latentDim_v --> vector with the latent dimensionalities to compute results
11 | % iters --> number of iterations
12 | % if inverted = true, then learn users as examples and not movies
13 | 
14 | randn('seed', 1e5);
15 | rand('seed', 1e5);
16 | 
17 | experimentNo = 3;
18 | 
19 | 
20 | for i_latent=1:length(latentDim_v)
21 |     q = latentDim_v(i_latent);
22 |     for i_part=1:length(partNo_v)
23 |         partNo = partNo_v(i_part);
24 |         
25 |         dataSetName = ['movielens_10M_',num2str(partNo)];
26 |         
27 |         disp(['Reading ... ',dataSetName]);
28 |         
29 |         [Y, void, Ytest] = collabLoadData(dataSetName);
30 | 
31 |         if (inverted)
32 |             Y = Y';
33 |             Ytest = Y';
34 |         end
35 |         
36 | 	numFilms = size(Y,1);
37 |         numUsers = size(Y,2);
38 |         meanFilms = zeros(numFilms,1);
39 |         stdFilms = ones(numFilms,1);
40 |         
41 | 
42 | % Save the results.
43 |         capName = dataSetName;
44 |         capName(1) = upper(capName(1));
45 |         
46 |         loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
47 |         disp(['Loading ... ',saveResults]);
48 | load(saveResults);, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
49 |    
50 |         
51 | 	% compute the test error
52 | 	  disp('Computing test error');
53 | 
54 | 
55 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest)
56 | 
57 | 
58 |         % Save the results.
59 |         capName = dataSetName;
60 |         capName(1) = upper(capName(1));
61 |         
62 |         saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
63 |         disp(['Saving ... ',saveResults]);
64 |         save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
65 |     end
66 | end
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/matlab/RecomputeResults.m:
--------------------------------------------------------------------------------
  1 | function[] = RecomputeResults(perc_train, substract_mean, partNo_v, latentDim_v)
  2 | % RECOMPUTERESULTS Try collaborative filtering on the large movielens data.
  3 | %
  4 |   % RecomputeResults(perc_train, substract_mean, partNo_v, latentDim_v)
  5 | %
  6 | % perc_train -> percentage of training
  7 | % substract_mean --> bool if substract the mean
  8 | % partNo_v --> vector with the partitions to compute results
  9 | % latentDim_v --> vector with the latent dimensionalities to compute results
 10 | 
 11 | randn('seed', 1e5);
 12 | rand('seed', 1e5);
 13 | 
 14 | experimentNo = 3;
 15 | 
 16 | 
 17 | %partNo_v = [1:5];
 18 | %latentDim_v = [5, 2:4, 6];
 19 | 
 20 | 
 21 | for i_latent=1:length(latentDim_v)
 22 |     q = latentDim_v(i_latent);
 23 |     for i_part=1:length(partNo_v)
 24 |         partNo = partNo_v(i_part);
 25 | 
 26 |         dataSetName = ['movielens_',num2str(perc_train),'_',num2str(partNo)];
 27 |         
 28 |         disp(['Reading ... ',dataSetName]);
 29 |         
 30 |         [Y, void, Ytest] = collabLoadData(dataSetName);
 31 |         
 32 |         if (substract_mean)
 33 |             % create the total vector
 34 |             s = nonzeros(Ytest);
 35 |             ratings = [nonzeros(Y); nonzeros(Ytest)];
 36 |             meanY = mean(ratings);
 37 |             stdY = std(ratings);
 38 |             %keyboard;
 39 |             index = find(Y);
 40 |             Y(index) = Y(index) - meanY;
 41 |             Y(index) = Y(index) / stdY;
 42 |             %keyboard;
 43 |         end;
 44 | 
 45 | % load the model
 46 | % Save the results.
 47 |         capName = dataSetName;
 48 |         capName(1) = upper(capName(1));
 49 |         
 50 |         loadResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'.mat'];
 51 |         disp(['Loading ... ',loadResults]);
 52 | load(loadResults);
 53 | %, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
 54 | L2_error_before = L2_error;
 55 | NMAE_error_before = NMAE_error;
 56 | NMAE_round_error_before = NMAE_round_error;
 57 | 
 58 | 
 59 |         val_L2 = 0;
 60 |         tot_L2 = 0;
 61 |         val_NMAE = 0;
 62 |         tot_NMAE = 0;
 63 |         val_round_NMAE = 0;
 64 |         tot_round_NMAE = 0;
 65 |         val_round_NMAE_2 = 0;
 66 |         tot_round_NMAE_2 = 0;
 67 | 
 68 | disp('Computing results');
 69 | ErrorValues = [];
 70 | ErrorValues_round = [];
 71 |         for i = 1:size(Y, 2)       
 72 |           ind = find(Ytest(:, i));
 73 |           elim = find(ind>size(model.X, 1));
 74 |           tind = ind;
 75 |           tind(elim) = [];
 76 |           [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
 77 |           % normalize the values
 78 | 	           
 79 | 
 80 | mu = mu*model.sd(1);
 81 | mu = mu+model.mu(1);
 82 |           a = Ytest(tind, i) - mu; 
 83 |           a = [a; Ytest(elim, i)];
 84 | a_round = Ytest(tind, i) - round(mu); 
 85 |           a_round = [a_round; Ytest(elim, i)];
 86 |           val_L2 = val_L2 + a'*a;
 87 |           tot_L2 = tot_L2 + length(a);
 88 |           val_NMAE = val_NMAE + sum(abs(a));
 89 |           tot_NMAE = tot_NMAE + length(a);
 90 |           val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
 91 |           tot_round_NMAE = tot_round_NMAE + length(a);
 92 |           val_round_NMAE_2 = val_round_NMAE_2 + sum(abs(a_round));
 93 |           tot_round_NMAE_2 = tot_round_NMAE_2 + length(a_round);
 94 |           
 95 | % ??? this doesn't work yet
 96 | 	    %keyboard;
 97 | ErrorValues = [ErrorValues; full(abs(a))];
 98 | ErrorValues_round = [ErrorValues_round; full(abs(a))];
 99 | 
100 |         end
101 |         L2_error = sqrt(val_L2/tot_L2);
102 |         NMAE_error = (val_NMAE/tot_NMAE)/1.6;
103 |         NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
104 |         NMAE_round_error_2 = (val_round_NMAE_2/tot_round_NMAE_2)/1.6;
105 | 
106 | 
107 | [L2_error L2_error_before]
108 | [NMAE_error NMAE_error_before]
109 | [NMAE_round_error NMAE_round_error_before]
110 | NMAE_round_error_2
111 | mean(ErrorValues)
112 | std(ErrorValues)
113 | %keyboard;
114 | 
115 |         % Save the results.
116 | %        capName = dataSetName;
117 | %        capName(1) = upper(capName(1));
118 |         
119 | %        saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'.mat'];
120 | %        disp(['Saving ... ',saveResults]);
121 |         %save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
122 |     end
123 | end
124 | 
125 | 


--------------------------------------------------------------------------------
/matlab/changeNameMovielens.m:
--------------------------------------------------------------------------------
 1 | function [] = changeNameMovielens(mean_substraction, perc_train_v, latentDim_v, partNo_v, iters)
 2 | %
 3 |   % changeNameMovielens(mean_substraction, perc_train_v, latentDim_v, partNo_v, iters)
 4 | 
 5 | numDim = length(latentDim_v);
 6 | numPerc = length(perc_train_v);
 7 | numPart = length(partNo_v);
 8 | 
 9 | for i_perc=1:numPerc
10 |   perc_train = perc_train_v(i_perc);
11 | for i_latent=1:numDim
12 |   latentDim = latentDim_v(i_latent);
13 | for i_part = 1:numPart
14 |   partNo = partNo_v(i_part);
15 | 
16 | toLoad = ['Movielens_',num2str(perc_train),'_',num2str(partNo),'_norm_',num2str(mean_substraction),'_',num2str(latentDim),'_',num2str(partNo),'.mat'];
17 | 
18 | toSave = ['Movielens_',num2str(perc_train),'_',num2str(partNo),'_norm_',num2str(mean_substraction),'_',num2str(latentDim),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
19 | 
20 | 
21 | disp(['Loading ... ',toLoad]);
22 | 
23 | try
24 | load(toLoad)
25 | catch
26 | continue;
27 | end
28 | 
29 | disp(['Saving ... ',toSave]);
30 | save(toSave,'options','model','L2_error','NMAE_error','NMAE_round_error');
31 | 
32 | end
33 | end
34 | end
35 | 


--------------------------------------------------------------------------------
/matlab/collabComponentPosteriorMeanVar.m:
--------------------------------------------------------------------------------
 1 | function [mu, varsigma, secondMoment] = collabComponentPosteriorMeanVar(model, X)
 2 | % COLLABCOMPONENTPOSTERIORMEANVAR Compute the posterior mean and variance for each component.
 3 | % FORMAT
 4 | % DESC computes the posterior mean and variance asssociated with each
 5 | % component of the mixture model.
 6 | % ARG model : the model for which means and variances are computed.
 7 | % ARG x : optional input argment where means and variances are to be
 8 | % computed. If not provided model.X is used.
 9 | % RETURN mu : the mean associated with each of the components as a cell
10 | % array.
11 | % RETURN varsigma : the variance associated with each of the components
12 | % as a cell array.
13 | % RETURN secondMoment : the second moment associated with each of the
14 | % components as a cell array.
15 | %
16 | % SEEALSO : collabCreate
17 | % 
18 | % COPYRIGHT : Neil D. Lawrence, 2009
19 |   
20 | % COLLAB
21 | 
22 |   
23 | % Work out component means and variances.
24 |   ind = find(model.m);
25 |   if nargin > 1
26 |     Kx = kernCompute(model.kern, model.X(ind, :), X);
27 |     diagK = kernDiagCompute(model.kern, X);
28 |   else 
29 |     Kx = model.K;
30 |     diagK = diag(model.K);
31 |   end
32 |   ind = find(model.m);
33 |   for m = 1:model.M
34 |     Kinvk = model.invK{m}*Kx;
35 |     mu{m} = Kinvk'*model.m(ind);
36 |     varsigma{m} = diagK - sum(Kx.*Kinvk, 1)';
37 |     if nargout > 2
38 |       secondMoment{m} = varsigma{m} + mu{m}.*mu{m};
39 |     end
40 |   end
41 | end
42 |   


--------------------------------------------------------------------------------
/matlab/collabComputeM.m:
--------------------------------------------------------------------------------
 1 | function m = collabComputeM(model)
 2 |   
 3 | % COLLABCOMPUTEM Computes target values inside model.
 4 | % FORMAT
 5 | % DESC takes in a model and an output user and computes the target values
 6 | % for that user.
 7 | % ARG model : the model for which the values of m are to be computed. The
 8 | % field currentOut should be set to which user is to be taken from the data.
 9 | %
10 | % SEEALSO : collabCreate
11 | %
12 | % COPYRIGHT : Neil D. Lawrence, 2009
13 | 
14 | % COLLAB
15 |   
16 |   if iscell(model.y)
17 |     m = spalloc(model.N, 1, length(model.y{model.currentOut, 1}));
18 |     m(model.y{model.currentOut, 1}, :) = double(y{model.currentOut, 2});
19 |   else
20 |     m = model.y(:, model.currentOut);
21 |   end
22 |   ind = find(m);
23 |   m(ind) = m(ind) - model.mu(ind);
24 |   m(ind) = m(ind)./model.sd(ind);
25 | 
26 | end


--------------------------------------------------------------------------------
/matlab/collabComputeS.m:
--------------------------------------------------------------------------------
 1 | function [s, numer] = collabComputeS(model)
 2 | 
 3 | % COLLABCOMPUTES Compute the responsibilities for the mixture model.
 4 | % FORMAT
 5 | % DESC computes the responsibilities for the mixture model.
 6 | % ARG model : the model for which the responsibilities are required.
 7 | % RETURN s : the responsibilities associated with the components and the
 8 | % data.
 9 | % RETURN numer : the numerator when the expectations are computed.
10 | %
11 | % SEEALSO : collabCreate, collabEstep
12 | %
13 | % COPYRIGHT : Neil D. Lawrence, 2009
14 | 
15 | % COLLAB
16 |   
17 |   % update the expected value of the components.
18 |     ind = find(model.m);
19 |     lognumer = zeros(size(ind, 1), model.M);
20 |     for m = 1:model.M
21 |       yhat = (model.m(ind) - model.expectation.f{m});
22 |       y2 = yhat.*yhat + model.expectation.varf{m};
23 |       if model.heteroNoise
24 |         % Log of numerator of s.
25 |         lognumer(:, m) = log(model.pi(m)) + (-.5*y2./model.diagvar(ind));
26 |       else
27 |         % Log of numerator of s.
28 |         lognumer(:, m) = log(model.pi(m)) + (-.5*y2/model.sigma2);
29 |       end
30 |       % subtract maximum value from log numerator to keep numerically stable.
31 |       numer = exp(lognumer - repmat(max(lognumer, [], 2), 1, model.M)); 
32 |       numer = numer + 1e-6;
33 |       s = spalloc(model.N, model.M, length(ind)*model.M);
34 |       % normalize to obtain the expectations.
35 |       s(ind, :) = (numer)./repmat(sum(numer, 2), 1, model.M);
36 |     end
37 |   end
38 | 


--------------------------------------------------------------------------------
/matlab/collabCreate.m:
--------------------------------------------------------------------------------
 1 | function model = collabCreate(q, d, y, options);
 2 | 
 3 | % COLLABCREATE Create a COLLAB model with inducing varibles/pseudo-inputs.
 4 | % FORMAT
 5 | % DESC creates a collaborative filter structure with a latent space of q.
 6 | % ARG q : input data dimension.
 7 | % ARG d : the number of processes (i.e. output data dimension).
 8 | % ARG Y : the data.
 9 | % ARG options : options structure as defined by collabOptions.m.
10 | % RETURN model : model structure containing the GP collaborative filter.
11 | %
12 | % SEEALSO : collabOptions, modelCreate
13 | %
14 | % COPYRIGHT : Neil D. Lawrence, 2008
15 | 
16 | % COLLAB
17 | 
18 |   
19 |   model.type = 'collab';
20 |   
21 |   model.q = q;
22 |   model.d = d;
23 |   model.N = size(y, 1);
24 |   model.y = y;
25 |   model.mu = zeros(model.N, 1);
26 |   model.sd = ones(model.N, 1);
27 |   model.currentOut = 1;
28 |   model.m = collabComputeM(model);
29 |   model.numParams = model.N*model.q;
30 |   model.kern = kernCreate(q, options.kern);
31 |   model.numParams = model.numParams + model.kern.nParams;
32 |   model.X = randn(model.N, q)*0.001;
33 |   model.change = zeros(size(model.X));
34 |   model.changeParam = zeros(1, model.kern.nParams);
35 |   % This forces kernel computation.
36 |   %model = collabExpandParam(model, initParams);
37 |   model.heteroNoise = options.heteroNoise; % Whether or not to have diagonal
38 |                                            % noise variance.
39 |   model.noiseTransform = optimiDefaultConstraint('positive');
40 |   model.M = options.numComps;
41 |   if model.M > 1
42 |     model.pi = repmat(1/model.M, 1, model.M);
43 |     model.sigma2 = exp(-2);
44 |     model.lnsigma2Change = 0;
45 |     ind = find(model.m);
46 |     model = collabInitS(model);
47 |     model.numParams = model.numParams + 1;
48 |   end
49 |   if model.heteroNoise
50 |     model.diagvar = repmat(exp(-2), model.N, 1);
51 |     model.lndiagChange = zeros(model.N, 1);
52 |     model.numParams = model.numParams + model.N;
53 |   end
54 |   initParams = collabExtractParam(model);
55 |   model = collabExpandParam(model, initParams);
56 | end
57 | 


--------------------------------------------------------------------------------
/matlab/collabCreateTensor.m:
--------------------------------------------------------------------------------
 1 | function model = collabCreateTensor(q, d, Y, options);
 2 | 
 3 | % COLLABCREATE Create a COLLAB model with inducing varibles/pseudo-inputs.
 4 | % FORMAT
 5 | % DESC creates a collaborative filter structure with a latent space of q.
 6 | % ARG q : input data dimension.
 7 | % ARG d : the number of processes (i.e. output data dimension).
 8 | % ARG options : options structure as defined by collabOptions.m.
 9 | % RETURN model : model structure containing the GP collaborative filter.
10 | %
11 | % SEEALSO : collabOptions, modelCreate
12 | %
13 | % COPYRIGHT : Raquel Urtasun, 2008
14 | 
15 | % COLLAB
16 | 
17 | 
18 | model.type = 'collab';
19 | 
20 | model.q = q;
21 | model.d = d;
22 | if size(Y, 1) == 1 && size(Y, 2) == 1
23 |   model.N = Y;
24 | else
25 |   model.N = size(Y, 1);
26 | end
27 | %keyboard;
28 | model.kern = kernCreate(q, options.kern);
29 | %initParams = collabExtractParam(model);
30 | model.X = randn(model.N, q)*0.001;
31 | % add the labels
32 | model.X(:,end) = [[1:model.N]'];
33 | model.change = zeros(size(model.X));
34 | %keyboard;
35 | model.changeParam = zeros(1, model.kern.nParams);
36 | model.mu = zeros(model.N, 1);
37 | model.sd = ones(model.N, 1);
38 | % This forces kernel computation.
39 | %model = collabExpandParam(model, initParams);
40 | 


--------------------------------------------------------------------------------
/matlab/collabDisplay.m:
--------------------------------------------------------------------------------
 1 | function collabDisplay(model, spaceNum)
 2 |   
 3 | % COLLABDISPLAY Displays the provided collaborative filter model.
 4 | % FORMAT
 5 | % DESC displays the collaborative model as provided.
 6 | % ARG model : the model to display.
 7 | % ARG spaceNum : number of spaces to indent display.
 8 | %
 9 | % SEEALSO : modelDisplay
10 | %
11 | % COPYRIGHT : Neil D. Lawrence, 2008
12 | 
13 | % COLLAB
14 | 
15 |   if nargin > 1
16 |     spacing = repmat(32, 1, spaceNum);
17 |   else
18 |     spaceNum = 0;
19 |     spacing = [];
20 |   end
21 |   spacing = char(spacing);
22 |   fprintf(spacing);
23 |   fprintf('Collaborative filter GPLVM:\n')
24 |   fprintf(spacing);
25 |   fprintf('  Number of data points: %d\n', model.N);
26 |   fprintf(spacing);
27 |   fprintf('  Input dimension: %d\n', model.q);
28 |   fprintf(spacing);
29 |   fprintf('  Number of processes: %d\n', model.d);
30 |   if model.heteroNoise
31 |     fprintf(spacing);
32 |     fprintf('  Heteroschodastic noise model, mean %2.4f, sd %2.4f\n', mean(model.diagvar), sqrt(var(model.diagvar)))  
33 |   end
34 |   if model.M > 1
35 |     fprintf(spacing);
36 |     fprintf('  Mixture model with %d components.\n', model.M)
37 |     fprintf(spacing);
38 |     fprintf('    Output variance %2.4f.\n', model.sigma2)
39 |   end
40 |   fprintf(spacing);
41 |   fprintf('  Kernel:\n')
42 | 
43 |   kernDisplay(model.kern, spaceNum+2)
44 | end


--------------------------------------------------------------------------------
/matlab/collabEstep.m:
--------------------------------------------------------------------------------
 1 | function model = collabEstep(model, maxIters)
 2 |   
 3 | % COLLABESTEP Do E step updates and compute resulting Kinv for each component.
 4 | % FORMAT
 5 | % DESC computes the means and variances of each component of the mixture
 6 | % model.
 7 | % ARG model : the model for which the means and variances are to be
 8 | % computed.
 9 | % ARG K : the computed covariance matrix.
10 | % ARG y : the target values.
11 | % RETURN mu : the mean for each component (as a cell array).
12 | % RETURN varsigma : the variance for each component (as a cell array).
13 | %
14 | % SEEALSO : collabLogLikeGradient
15 | %
16 | % COPYRIGHT : Neil D. Lawrence, 2009
17 | 
18 | % COLLAB
19 | 
20 |   if nargin < 2
21 |     maxIters = 100;
22 |   end
23 |   ind = find(model.m);
24 |   
25 |   [model.expectation.f, model.expectation.varf] = collabComponentPosteriorMeanVar(model);
26 |   for i = 1:maxIters
27 |     model.expectation.s{model.currentOut} = collabComputeS(model);
28 |     model = collabUpdateKernels(model);
29 |     [model.expectation.f, model.expectation.varf] = collabComponentPosteriorMeanVar(model);
30 |   end
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/matlab/collabExpandParam.m:
--------------------------------------------------------------------------------
 1 | function model = collabExpandParam(model, params)
 2 | 
 3 | % COLLABEXPANDPARAM Expand a parameter vector into a COLLAB model.
 4 | % FORMAT
 5 | % DESC takes the given vector of parameters and places them in the
 6 | % model structure, it then updates any stored representations that
 7 | % are dependent on those parameters, for example kernel matrices
 8 | % etc..
 9 | % ARG model : the model structure for which parameters are to be
10 | % updated.
11 | % ARG params : a vector of parameters for placing in the model
12 | % structure.
13 | % RETURN model : a returned model structure containing the updated
14 | % parameters.
15 | % 
16 | % SEEALSO : collabCreate, collabExtractParam, modelExtractParam
17 | %
18 | % COPYRIGHT : Neil D. Lawrence, 2009
19 | 
20 | % COLLAB
21 | 
22 | 
23 |   startVal = 1;
24 |   endVal = model.N*model.q;
25 |   model.X = reshape(params(startVal:endVal), model.N, model.q);
26 |   startVal = endVal +1;
27 |   endVal = endVal + model.kern.nParams;
28 |   model.kern = kernExpandParam(model.kern, params(startVal:endVal));
29 |   
30 |   fhandle = str2func([model.noiseTransform 'Transform']);
31 |   if isfield(model, 'heteroNoise') && model.heteroNoise
32 |     startVal = endVal + 1;
33 |     endVal = endVal + model.N;
34 |     model.diagvar = fhandle(params(startVal:endVal), 'atox')';
35 |   elseif model.M>1
36 |     startVal = endVal + 1;
37 |     endVal = endVal + 1;
38 |     model.sigma2 = fhandle(params(startVal:endVal), 'atox');
39 |   end
40 |   model = collabUpdateKernels(model);
41 | end


--------------------------------------------------------------------------------
/matlab/collabExtractParam.m:
--------------------------------------------------------------------------------
 1 | function [params, names] = collabExtractParam(model)
 2 | 
 3 | % COLLABEXTRACTPARAM Extract a parameter vector from a COLLAB model.
 4 | % FORMAT
 5 | % DESC extracts the model parameters from a structure containing
 6 | % the information about a Gaussian process.
 7 | % ARG model : the model structure containing the information about
 8 | % the model.
 9 | % RETURN params : a vector of parameters from the model.
10 | %
11 | % DESC does the same as above, but also returns parameter names.
12 | % ARG model : the model structure containing the information about
13 | % the model.
14 | % RETURN params : a vector of parameters from the model.
15 | % RETURN names : cell array of parameter names.
16 | %
17 | % SEEALSO : collabCreate, collabExpandParam, modelExtractParam
18 | %
19 | % COPYRIGHT : Neil D. Lawrence, 2009
20 | 
21 | % COLLAB
22 | 
23 | if nargout > 1
24 |   returnNames = true;
25 | else
26 |   returnNames = false;
27 | end
28 | 
29 | if returnNames
30 |   [params, names] = kernExtractParam(model.kern);
31 |   for i = 1:length(names)
32 |     names{i} = ['Kernel, ' names{i}];
33 |   end
34 | else
35 |   params = kernExtractParam(model.kern);
36 | end
37 | params = [model.X(:)' params];
38 | if returnNames
39 |   for i = 1:size(model.X, 1)
40 |     for j = 1:size(model.X, 2)
41 |       Xnames{i, j} = ['X(' num2str(i) ', ' num2str(j) ')'];
42 |     end
43 |   end
44 |   names = {Xnames{:}, names{:}};
45 | end
46 | fhandle = str2func([model.noiseTransform 'Transform']);
47 | if model.heteroNoise
48 |   params = [params fhandle(model.diagvar, 'xtoa')'];
49 |   if returnNames
50 |     for i = 1:model.N
51 |       sigNames{i} = ['Sigma2(' num2str(i) ')'];
52 |     end
53 |     names = {names{:}, sigNames{:}};
54 |   end
55 | elseif model.M > 1
56 |   params = [params fhandle(model.sigma2, 'xtoa')];
57 |   if returnNames
58 |     names = {names{:}, 'Sigma2'};
59 |   end
60 | end


--------------------------------------------------------------------------------
/matlab/collabInitS.m:
--------------------------------------------------------------------------------
 1 | function model = collabInitS(model)
 2 |   
 3 | % COLLABINITS Initialize the expectations of S for the collaborative filter.
 4 | % FORMAT
 5 | % DESC initilizes the expectations of S for the collaborative filter
 6 | % model.
 7 | % ARG model : the model structure for which expectations are being
 8 | % initialized.
 9 | % RETURN model : the model structure with the expectations initalized.
10 | %
11 | % SEEALSO : collabExpandParam, collabCreate
12 | %
13 | % COPYRIGHT : Neil D. Lawrence, 2009
14 | 
15 | % COLLAB
16 | 
17 |   
18 |   ind = find(model.m);
19 |   model.expectation.s{model.currentOut} = spalloc(model.N, model.M, length(ind));
20 |   lognumer = repmat(log(model.pi), length(ind), 1) ...
21 |       + randn(length(ind), model.M)*0.001;
22 |   numer = exp(lognumer - repmat(max(lognumer, [], 2), 1, model.M));
23 |   
24 |   model.expectation.s{model.currentOut}(ind, :) = numer./repmat(sum(numer, 2), 1, model.M);
25 | end


--------------------------------------------------------------------------------
/matlab/collabLogLikeGradients.m:
--------------------------------------------------------------------------------
  1 | function [g, g_param, g_noise] = collabLogLikeGradients(model)
  2 |   
  3 | % COLLABLOGLIKEGRADIENTS Gradient of the latent points.
  4 | % FORMAT 
  5 | % DESC computes the gradient of the latent points given ratings as a
  6 | % sparse matrix.
  7 | % ARG model : the model of the data.
  8 | % ARG y : the ratings for an individual.
  9 | %
 10 | % SEEALSO : collabLogLikelihood
 11 | %
 12 | % COPYRIGHT : Neil D. Lawrence, 2008, 2009
 13 |   
 14 | % COLLAB
 15 | 
 16 |   g_param = zeros(1, model.kern.nParams);
 17 |   fullInd = find(model.m);
 18 | 
 19 |   g = spalloc(size(model.X, 1), size(model.X, 2), length(fullInd)*model.q);
 20 |   if model.heteroNoise
 21 |     g_noise = spalloc(size(model.X, 1), 1, length(fullInd));
 22 |   elseif model.M > 1
 23 |     g_noise = 0;
 24 |   else
 25 |     g_noise = [];
 26 |   end
 27 |   g_param = zeros(1, model.kern.nParams);
 28 |   % For large inputs, split them into blocks of maximum 1000.
 29 |   maxBlock = ceil(length(fullInd)/ceil(length(fullInd)/1000));
 30 |   span = 0:maxBlock:length(fullInd);
 31 |   if rem(length(fullInd), maxBlock)
 32 |     span = [span length(fullInd)];
 33 |   end
 34 |   
 35 |   for block = 2:length(span)
 36 |     ind = fullInd(span(block-1)+1:span(block));
 37 |     m = model.m(ind, 1);
 38 | 
 39 |     X = model.X(ind, :);
 40 |     N = length(ind);
 41 |     if ~isfield(model, 'noise') || isempty(model.noise)
 42 |       if model.M > 1
 43 |         n = length(ind);
 44 |         % mixture model.
 45 |         gK = zeros(n);
 46 |         for i = 1:model.M
 47 |           invKy = model.invK{i}*m;
 48 |           gKm{i} =  0.5*(invKy*invKy'- model.invK{i});
 49 |           gK = gK + gKm{i};
 50 |         end
 51 |       else
 52 |         invKy = model.invK*m;
 53 |         gK = -model.invK + invKy*invKy';
 54 |         gK = gK * 0.5;
 55 |       end
 56 |       %%% Prepare to Compute Gradients with respect to X %%%
 57 |       gKX = kernGradX(model.kern, X, X);
 58 |       gKX = gKX*2;
 59 |       dgKX = kernDiagGradX(model.kern, X);
 60 |       for i = 1:length(ind)
 61 |         gKX(i, :, i) = dgKX(i, :);
 62 |       end
 63 |       gX = zeros(N, model.q);
 64 |       
 65 |       counter = 0;
 66 |       for i = 1:N
 67 |         counter = counter + 1;
 68 |         for j = 1:model.q
 69 |           gX(i, j) = gX(i, j) + gKX(:, j, i)'*gK(:, counter);
 70 |         end
 71 |       end
 72 |       g(ind, :) = gX;
 73 |       g_param = g_param + kernGradient(model.kern, X, gK);
 74 | 
 75 |       fhandle = str2func([model.noiseTransform 'Transform']);
 76 |       if model.heteroNoise 
 77 |         if model.M>1
 78 |           % Mixture model.
 79 |           for i = 1:model.M
 80 |             fact = fhandle(model.diagvar(ind), 'gradfact');
 81 |             g_noise(ind, :) = g_noise(ind, :) ...
 82 |                       + diag(gKm{i})./model.expectation.s{model.currentOut}(ind, i).*fact;
 83 |           end
 84 |         else
 85 |           g_noise(ind, :) = diag(gK);
 86 |           fact = fhandle(model.diagvar(ind), 'gradfact');
 87 |           g_noise(ind, :) = g_noise(ind, :).*fact;
 88 |         end
 89 |       elseif model.M > 1 
 90 |         % Mixture model.
 91 |         for i = 1:model.M
 92 |           fact = fhandle(model.sigma2, 'gradfact');
 93 |           g_noise = g_noise ...
 94 |                     + sum(diag(gKm{i})./model.expectation.s{model.currentOut}(ind, i))*fact;
 95 |         end
 96 |       end
 97 |       
 98 |     else
 99 |       muse = muse-1; % make muse start from zero.
100 |       % Create an IVM model and update site parameters.
101 |       options = ivmOptions;
102 |       options.kern = model.kern;
103 |       options.noise = model.noise;
104 |       options.selectionCriterion = model.selectionCriterion;
105 |       options.numActive = min(model.numActive, N);
106 |       imodel = ivmCreate(model.q, 1, X, muse, options);
107 |       imodel = ivmOptimiseIVM(imodel, options.display);
108 |       gX = gplvmApproxLogLikeActiveSetGrad(imodel);
109 |       gX = reshape(gX, length(imodel.I), size(imodel.X, 2));
110 |       g(ind(imodel.I), :) = gX;
111 |       g_param = g_param + ivmApproxLogLikeKernGrad(imodel);
112 |     end
113 |   end
114 |   if nargout < 2
115 |     g = [g(:)' g_param g_noise'];
116 |   end
117 | end
118 | 


--------------------------------------------------------------------------------
/matlab/collabLogLikelihood.m:
--------------------------------------------------------------------------------
 1 | function ll = collabLogLikelihood(model)
 2 | 
 3 | % COLLABLOGLIKELIHOOD Compute the log likelihood of a COLLAB.
 4 | % FORMAT
 5 | % DESC computes the log likelihood of a data set given a COLLAB model.
 6 | % ARG model : the COLLAB model for which log likelihood is to be
 7 | % computed.
 8 | % RETURN ll : the log likelihood of the data in the COLLAB model.
 9 | %
10 | % SEEALSO : collabCreate, collabLogLikeGradients, modelLogLikelihood
11 | %
12 | % COPYRIGHT : Neil D. Lawrence, 2009
13 | 
14 | % COLLAB
15 | 
16 |   ll = 0;
17 |   
18 |   if iscell(model.y)
19 |     total = size(model.y, 1);
20 |   else
21 |     total = size(model.y, 2);
22 |   end
23 | 
24 |   for i = 1:total
25 |     model.currentOut = i;
26 |     model.m = collabComputeM(model, i);
27 |     if model.M > 1
28 |       model = collabInitS(model);
29 |     end
30 |     model = collabUpdateKernels(model);
31 |     if model.M > 1
32 |       model = collabEstep(model);
33 |     end
34 |     %/~
35 |     % This code was for splitting large data into blocks.
36 |     %   maxBlock = ceil(length(fullInd)/ceil(length(fullInd)/1000));
37 |     %   span = 0:maxBlock:length(fullInd);
38 |     %   if rem(length(fullInd), maxBlock)
39 |     %     span = [span length(fullInd)];
40 |     %   end
41 |     
42 |     %   for block = 2:length(span)
43 |     %     ind = fullInd(span(block-1)+1:span(block));
44 |     %     if iscell(y)
45 |     %       yuse = double(y{1, 2}(span(block-1)+1:span(block)));
46 |     %     else
47 |     %       yuse = y(ind, 1);
48 |     %     end
49 |     
50 |     %     N = length(ind);
51 |     %~/
52 |     if ~isfield(model, 'noise') || isempty(model.noise)
53 |       
54 |       ind = find(model.m);
55 |       muse = model.m(ind);
56 |       if model.M> 1      
57 |         for i = 1:model.M
58 |           ll = ll - 0.5*model.logDetK(i) - 0.5*muse'*model.invK{i}*muse;
59 |         end
60 |       else
61 |         ll = ll - 0.5*model.logDetK - 0.5*muse'*model.invK*muse;
62 |       end
63 |     end
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/matlab/collabOptimiseOptions.m:
--------------------------------------------------------------------------------
 1 | function options = collabOptimiseOptions
 2 | 
 3 | % COLLABOPTIMISEOPTIONS returns default options for collaborative filter optimisation.
 4 | % FORMAT
 5 | % DESC returns default options for the optimization of the collaborative
 6 | % filter.
 7 | % RETURN options : the default options structure.
 8 | %
 9 | % SEEALSO : collabOptimise, collabCreate
10 | %
11 | % COPYRIGHT : Neil D. Lawrence, 2008
12 | 
13 | % COLLAB
14 |   
15 |   options.momentum = 0.5;
16 |   options.learnRate = 0.0001;
17 |   options.paramMomentum = 0.5;
18 |   options.paramLearnRate = 0.0001;
19 |   options.noiseMomentum = 0.5;
20 |   options.noiseLearnRate = 0.0001;
21 |   options.optimiseParam = true;
22 |   options.showEvery = 100;
23 |   options.saveEvery = 10000;
24 |   options.showLikelihood = false;
25 |   options.numIters = 50;
26 |   options.saveName = 'save';
27 |  end
28 | 


--------------------------------------------------------------------------------
/matlab/collabOptions.m:
--------------------------------------------------------------------------------
 1 | function options = collabOptions(approx);
 2 | 
 3 | % COLLABOPTIONS Return default options for COLLAB model.
 4 | % FORMAT
 5 | % DESC returns the default options in a structure for a COLLAB model.
 6 | % RETURN options : structure containing the default options for the
 7 | % given approximation type.
 8 | %
 9 | % SEEALSO : collabCreate
10 | %
11 | % COPYRIGHT : Neil D. Lawrence, 2008
12 | 
13 | % COLLAB
14 | 
15 |   options.kern = {'rbf', 'bias', 'white'};
16 |   options.numActive = 0;
17 |   options.beta = [];
18 |   options.heteroNoise = false;
19 |   options.numComps = 1;
20 |   
21 | end
22 | 


--------------------------------------------------------------------------------
/matlab/collabOptionsTensor.m:
--------------------------------------------------------------------------------
 1 | function options = collabOptionsTensor(approx);
 2 | 
 3 | % COLLABOPTIONSTENSOR Return default options for COLLAB model with a tensor
 4 | % FORMAT
 5 | % DESC returns the default options in a structure for a COLLAB model.
 6 | % RETURN options : structure containing the default options for the
 7 | % given approximation type.
 8 | %
 9 | % SEEALSO : collabCreateTensor
10 | %
11 | % COPYRIGHT : Raquel Urtasun, 2008
12 | 
13 | % COLLAB
14 | 
15 | 
16 |   options.kern = {'cmpnd', {'tensor', 'rbf', 'rbf'}, 'bias', 'white'};
17 |   options.numActive = 0;
18 |   options.beta = [];
19 | 
20 | end
21 | 


--------------------------------------------------------------------------------
/matlab/collabPosteriorMeanVar.m:
--------------------------------------------------------------------------------
 1 | function [mu, varsig] = collabPosteriorMeanVar(model, y, X);
 2 | 
 3 | % COLLABPOSTERIORMEANVAR Mean and variances of the posterior at points given by X.
 4 | % FORMAT
 5 | % DESC returns the posterior mean and variance for a given set of
 6 | % points.
 7 | % ARG model : the model for which the posterior will be computed.
 8 | % ARG x : the input positions for which the posterior will be
 9 | % computed.
10 | % RETURN mu : the mean of the posterior distribution.
11 | % RETURN sigma : the variances of the posterior distributions.
12 | %
13 | % SEEALSO : collabCreate
14 | %
15 | % COPYRIGHT : Neil D. Lawrence, 2008
16 | 
17 | % COLLAB
18 | 
19 |   if nargout > 1
20 |     diagK = kernDiagCompute(model.kern, X);
21 |     varsig = zeros(size(X, 1), size(y, 2));
22 |     sndMoment = zeros(size(X, 1), size(y, 2));
23 |     
24 |   end
25 | 
26 |   mu = zeros(size(X, 1), size(y, 2));
27 |   % Compute kernel for new point.
28 |   for i = 1:size(y, 2)
29 |     ind = find(y(:, i));
30 |     model.m = y(:, i);
31 |     yind = y(ind, i);
32 |     if model.M > 1
33 |       model = collabInitS(model);
34 |     end  
35 |     model = collabUpdateKernels(model);
36 |     KX_star = kernCompute(model.kern, model.X(ind, :), X);  
37 |     if model.M > 1
38 |       model = collabEstep(model);
39 |       for m = 1:model.M
40 |         mum{m} = KX_star'*model.invK{m}*yind;
41 |         mu(:, i) = mu(:,i) + model.pi(m)*mum{m};
42 |       end
43 |     else
44 |       mu(:, i) =KX_star'*model.invK*yind;
45 |     end
46 |     % Compute if variances required.
47 |     if model.M > 1
48 |       for m = 1:model.M
49 |         Kinvk = model.invK{m}*KX_star;
50 |         varsigm = diagK - sum(KX_star.*Kinvk, 1)';
51 |         sndMoment(:, i) = sndMoment(:, i) + model.pi(m)*(mum{m}.*mum{m} + varsigm);
52 |       end
53 |       varsig(:, i) = sndMoment(:, i) - mu(:, i).*mu(:, i);
54 |     end
55 |   end
56 |   % Compute if variances required.
57 |   if nargout > 1 && model.M == 1
58 |     Kinvk = model.invK*KX_star;
59 |     varsig = diagK - sum(KX_star.*Kinvk, 1)';
60 |     varsig = repmat(varsig, 1, size(y, 2));
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/matlab/collabPosteriorMeanVarCell.m:
--------------------------------------------------------------------------------
 1 | function [mu, varsig] = collabPosteriorMeanVarCell(model, ind, y, X)
 2 | 
 3 | % COLLABPOSTERIORMEANVARCELL Mean and variances of the posterior at points given by X.
 4 | % FORMAT
 5 | % DESC returns the posterior mean and variance for a given set of
 6 | % points.
 7 | % ARG model : the model for which the posterior will be computed.
 8 | % ARG x : the input positions for which the posterior will be
 9 | % computed.
10 | % ARG ind : the indices of the train data for that user
11 | % RETURN mu : the mean of the posterior distribution.
12 | % RETURN sigma : the variances of the posterior distributions.
13 | %
14 | % SEEALSO : collabCreate
15 | %
16 | % COPYRIGHT : Raquel Urtasun, 2009
17 | 
18 | % COLLAB
19 | 
20 |   mu = zeros(size(X, 1), size(y, 2));
21 |   % Compute kernel for new point.
22 |   for i = 1:size(y, 2)
23 |     %ind = find(y(:, i));
24 |     KX_star = kernCompute(model.kern, model.X(ind, :), X);  
25 |     K = kernCompute(model.kern, model.X(ind, :));
26 |     invK = pdinv(K);
27 |     %yind = y(ind, i);
28 |     mu(:, i) =KX_star'*invK*y;
29 |     % Compute if variances required.
30 |   end
31 |   if nargout > 1
32 |     diagK = kernDiagCompute(model.kern, X);
33 |     Kinvk = invK*KX_star;
34 |     varsig = diagK - sum(KX_star.*Kinvk, 1)';
35 |     varsig = repmat(varsig, 1, size(y, 2));
36 |   end
37 | end
38 | 


--------------------------------------------------------------------------------
/matlab/collabTest.m:
--------------------------------------------------------------------------------
 1 | % COLLABTEST Test collaborative filtering model.
 2 | 
 3 | % COLLAB
 4 | 
 5 | rand('seed', 1e5)
 6 | randn('seed', 1e5)
 7 | 
 8 | numItems = 50;
 9 | numUsers = 10;
10 | y = randn(numItems, numUsers);
11 | y(find(rand(numItems, numUsers)>0.2)) = 0;
12 | y = sparse(y);
13 | 
14 | options = collabOptions;
15 | 
16 | for numComps = [1 2 4 8]
17 |   for heteroNoise = [false true]
18 |     options.numComps = numComps;
19 |     options.heteroNoise = heteroNoise;
20 |     fprintf('Testing model with %d component(s).\n', options.numComps)
21 |     if heteroNoise
22 |       fprintf('Heteroschedastic noise used.\n')
23 |     end
24 |     model = collabCreate(2, numUsers, y(:, 1), options);
25 |     params = collabExtractParam(model);
26 |     params = randn(size(params));
27 |     model = collabExpandParam(model, params);
28 |     if model.M > 1
29 |       model = collabEstep(model);
30 |     end
31 |     modelDisplay(model);
32 |     modelGradientCheck(model);
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/matlab/collabToolboxes.m:
--------------------------------------------------------------------------------
 1 | % COLLABTOOLBOXES Load in the relevant toolboxes for collaborative filtering.
 2 | 
 3 | importLatest('netlab');
 4 | importLatest('ndlutil');
 5 | importLatest('mltools');
 6 | importLatest('optimi');
 7 | importLatest('datasets');
 8 | importLatest('kern');
 9 | importLatest('ivm');
10 | importLatest('gplvm');
11 | importLatest('noise');


--------------------------------------------------------------------------------
/matlab/collabUpdateKernels.m:
--------------------------------------------------------------------------------
 1 | function model = collabUpdateKernels(model)
 2 | 
 3 | % COLLABUPDATEKERNELS Update the kernels that are needed.
 4 | % FORMAT
 5 | % DESC updates any representations of the kernel in the model
 6 | % structure, such as invK, logDetK or K.
 7 | % ARG model : the model structure for which kernels are being
 8 | % updated.
 9 | % RETURN model : the model structure with the kernels updated.
10 | %
11 | % SEEALSO : collabExpandParam, collabCreate
12 | %
13 | % COPYRIGHT : Neil D. Lawrence, 2009
14 | 
15 | % COLLAB
16 |   
17 |   ind = find(model.m);
18 |   n = length(ind);
19 |   model.K = kernCompute(model.kern, model.X(ind, :));
20 |   s = model.expectation.s{model.currentOut};
21 |   if model.M > 1
22 |     % mixture model.
23 |     for m = 1:model.M
24 |       if model.heteroNoise
25 |         Binv = diag(model.diagvar(ind)./s(ind, m));
26 |       else
27 |         Binv = diag(model.sigma2./s(ind, m));
28 |       end
29 |       Kadd = model.K + Binv;
30 |       [model.invK{m}, U] = pdinv(Kadd);
31 |       model.logDetK(m) = logdet(model.K, U);
32 |     end
33 |   elseif model.heteroNoise
34 |     n = length(ind);
35 |     [model.invK, U] = pdinv(model.K + spdiags(model.diagvar(ind, :), 0, n, n));
36 |     model.logDetK = logdet(model.K, U);
37 |   else
38 |     [model.invK, U] = pdinv(model.K);
39 |     model.logDetK = logdet(model.K, U);
40 |   end
41 | 
42 | end


--------------------------------------------------------------------------------
/matlab/computeMeanVarianceWeak.m:
--------------------------------------------------------------------------------
 1 | function [L2_error,NMAE_error,NMAE_round_error,pred_L2, pred_r_NMAE,pred_var, users, perUser_var, perUser_L2, perUser_r_NMAE, numUsers] = computeMeanVarianceWeak(model,Y,Ytest)
 2 | %
 3 | % [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest)
 4 |  
 5 | val_L2 = 0;
 6 | tot_L2 = 0;
 7 | val_NMAE = 0;
 8 | tot_NMAE = 0;
 9 | val_round_NMAE = 0;
10 | tot_round_NMAE = 0;
11 | pred_L2 = [];
12 | pred_r_NMAE = [];
13 | pred_var = [];
14 | users = [];
15 | perUser_var = [];
16 | perUser_r_NMAE = [];
17 | perUser_L2 = [];
18 | 
19 | for i = 1:size(Y, 2)       
20 |     ind = find(Ytest(:, i));
21 |     elim = find(ind>size(model.X, 1));
22 |     tind = ind;
23 |     tind(elim) = [];
24 |     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
25 |     % normalize the values
26 | 
27 |     %if (length(mu)>0)
28 |     %    mu = mu.*model.sd(tind);
29 |     %    mu = mu+model.mu(tind);
30 |     %end
31 |     a = Ytest(tind, i) - mu; 
32 |     a = [a; Ytest(elim, i)];
33 |     val_L2 = val_L2 + a'*a;
34 |     tot_L2 = tot_L2 + length(a);
35 |     val_NMAE = val_NMAE + sum(abs(a));
36 |     tot_NMAE = tot_NMAE + length(a);
37 |     val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
38 |     tot_round_NMAE = tot_round_NMAE + length(a);
39 |     pred_L2 = [pred_L2; a'*a];
40 |     pred_r_NMAE = [pred_r_NMAE; abs(a)];
41 | perUser_var =[perUser_var;  mean(varsig)];
42 | perUser_L2 = [perUser_L2; mean(a'*a)];
43 | perUser_r_NMAE = [perUser_r_NMAE; mean(abs(a))];
44 | pred_var = [pred_var; varsig];
45 | users = [users; i*ones(length(varsig),1)];
46 | numUsers(i) = length(a);
47 | end
48 | L2_error = sqrt(val_L2/tot_L2);
49 | NMAE_error = (val_NMAE/tot_NMAE)/1.6;
50 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
51 | 


--------------------------------------------------------------------------------
/matlab/computePredictionsErrorWeak.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function [mu_T] = computePredictionsErrorWeak(model,Y,Ytest)
 3 | %
 4 | % [error_L2,error_NMAE,error_NMAE_round] = computePredictionsErrorWeak(model,Y,Ytest)
 5 |  
 6 | val_L2 = 0;
 7 | tot_L2 = 0;
 8 | val_NMAE = 0;
 9 | tot_NMAE = 0;
10 | val_round_NMAE = 0;
11 | tot_round_NMAE = 0;
12 | accum = [];
13 | mu_T = [];
14 | 
15 | for i = 1:size(Y, 2)       
16 |     ind = find(Ytest(:, i));
17 |     elim = find(ind>size(model.X, 1));
18 |     tind = ind;
19 |     tind(elim) = [];
20 |     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
21 |     % normalize the values
22 | 
23 |     if (length(mu)>0)
24 |         mu = mu.*model.sd(tind);
25 |         mu = mu+model.mu(tind);
26 |     end
27 | 	mu_T = [mu_T; mu];
28 | end
29 | 


--------------------------------------------------------------------------------
/matlab/computeTestErrorEnsemblesWeak.m:
--------------------------------------------------------------------------------
 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest)
 2 | %
 3 | % [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest)
 4 | 
 5 |  
 6 | val_L2 = 0;
 7 | tot_L2 = 0;
 8 | val_NMAE = 0;
 9 | tot_NMAE = 0;
10 | val_round_NMAE = 0;
11 | tot_round_NMAE = 0;
12 | accum = [];
13 | 
14 | for i = 1:size(Y, 2)       
15 |     ind = find(Ytest(:, i));
16 | elim = find(ind>size(allModels{1}.X, 1));
17 |     tind = ind;
18 |     tind(elim) = [];
19 | mu_T = 0;
20 | for j=1:length(allModels)
21 |   [mu, varsig] = collabPosteriorMeanVar(allModels{j}, Y(:, i), allModels{j}.X(tind, :));
22 |     % normalize the values
23 | 
24 |     if (length(mu)>0)
25 |       mu = mu.*allModels{j}.sd(tind);
26 | mu = mu+allModels{j}.mu(tind);
27 |     end
28 | 	mu_T = mu_T + mu;
29 | end
30 |     mu_T = mu_T/length(allModels);
31 |     a = Ytest(tind, i) - mu_T; 
32 |     a = [a; Ytest(elim, i)];
33 |     val_L2 = val_L2 + a'*a;
34 |     tot_L2 = tot_L2 + length(a);
35 |     val_NMAE = val_NMAE + sum(abs(a));
36 |     tot_NMAE = tot_NMAE + length(a);
37 |     val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
38 |     tot_round_NMAE = tot_round_NMAE + length(a);
39 |     accum = [accum; abs(a)];
40 | end
41 | L2_error = sqrt(val_L2/tot_L2);
42 | NMAE_error = (val_NMAE/tot_NMAE)/1.6;
43 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
44 | 


--------------------------------------------------------------------------------
/matlab/computeTestErrorStrong.m:
--------------------------------------------------------------------------------
 1 | function [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorStrong(model,Ytest)
 2 | % COMPUTETESTERRORSTRONG Compute the strong test error.
 3 | % FORMAT
 4 | % DESC computes the test error for the strong generalization.
 5 | % ARG model : the model.
 6 | % ARG Ytest : the test data.
 7 | % RETURN L2_error : the l2 error.
 8 | % RETURN NMAE_error : the NMAE error.
 9 | % RETURN NMAE_round_error : the NMAE error with rounding on the outputs.
10 | %
11 | % SEEALSO : computeTestErrorWeak
12 | % 
13 | % COPYRIGHT : Raquel Urtasun, 2009
14 | 
15 | % COLLAB
16 | ??? this doesn't work
17 | 
18 | val_L2 = 0;
19 | tot_L2 = 0;
20 | val_NMAE = 0;
21 | tot_NMAE = 0;
22 | val_NMAE_round = 0;
23 | tot_NMAE_round = 0;
24 | 
25 | for i = 1:size(Ytest, 2)       
26 |   ind = find(Ytest(:, i));
27 |   elim = find(ind>size(model.X, 1));
28 |   tind = ind;
29 |   tind(elim) = [];
30 |   
31 |   if (length(tind)==0)
32 |       continue;
33 |   end
34 |   % in the case of STRONG experiments, the user is new, so we have to
35 |   % compute the prediction using the test data
36 |   % compute random (LOO --> leave one out)
37 |   indexRand = randperm(length(tind));
38 |   Y_train_user = Ytest(:,i);
39 |   Y_test_user = Y_train_user(tind(indexRand(end)));
40 |   Y_train_user(tind(indexRand(end)),:) = 0;
41 |   [mu, varsig] = collabPosteriorMeanVar(model, Y_train_user, model.X(tind(indexRand(end)), :));
42 |   
43 |   %mu = mu*model.sd(tind);
44 |   %mu = mu+model.mu(tind);
45 |   
46 |   a = Y_test_user - mu; 
47 |   a = [a; Ytest(elim, i)];
48 |   val_L2 = val_L2 + a'*a;
49 |   tot_L2 = tot_L2 + length(a);
50 |   val_NMAE = val_NMAE + sum(abs(a));
51 |   tot_NMAE = tot_NMAE + length(a);
52 |   val_NMAE_round = val_NMAE_round + sum(abs(round(a)));
53 |   tot_NMAE_round = tot_NMAE_round + length(a);
54 | end
55 | error_L2 = sqrt(val_L2/tot_L2);
56 | error_NMAE = (val_NMAE/tot_NMAE)/1.6;
57 | error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6;
58 | 


--------------------------------------------------------------------------------
/matlab/computeTestErrorWeak.m:
--------------------------------------------------------------------------------
 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest)
 2 | % COMPUTETESTERRORWEAK Compute the weak test error.
 3 | % FORMAT
 4 | % DESC computes the test error for the weak generalization.
 5 | % ARG model : the model.
 6 | % ARG Y : the training data.
 7 | % ARG Ytest : the test data.
 8 | % RETURN L2_error : the l2 error.
 9 | % RETURN NMAE_error : the NMAE error.
10 | % RETURN NMAE_round_error : the NMAE error with rounding on the outputs.
11 | %
12 | % SEEALSO : computeTestErrorStrong
13 | % 
14 | % COPYRIGHT : Raquel Urtasun, 2009
15 | 
16 | % COLLAB
17 |   
18 |  
19 | val_L2 = 0;
20 | tot_L2 = 0;
21 | val_NMAE = 0;
22 | tot_NMAE = 0;
23 | val_round_NMAE = 0;
24 | tot_round_NMAE = 0;
25 | accum = [];
26 | 
27 | for i = 1:size(Y, 2)       
28 |   ind = find(Ytest(:, i));
29 |   elim = find(ind>size(model.X, 1));
30 |   tind = ind;
31 |   tind(elim) = [];
32 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
33 |   %/~
34 |   % normalize the values
35 |   
36 |   %if (length(mu)>0)
37 |   %    mu = mu.*model.sd(tind);
38 |   %    mu = mu+model.mu(tind);
39 |   %end
40 |   %~/
41 |   a = Ytest(tind, i) - mu; 
42 |   a = [a; Ytest(elim, i)];
43 |   val_L2 = val_L2 + a'*a;
44 |   tot_L2 = tot_L2 + length(a);
45 |   val_NMAE = val_NMAE + sum(abs(a));
46 |   tot_NMAE = tot_NMAE + length(a);
47 |   val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
48 |   tot_round_NMAE = tot_round_NMAE + length(a);
49 |   accum = [accum; abs(a)];
50 | end
51 | L2_error = sqrt(val_L2/tot_L2);
52 | NMAE_error = (val_NMAE/tot_NMAE)/1.6;
53 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
54 | 


--------------------------------------------------------------------------------
/matlab/computeTestErrorWeakCell.m:
--------------------------------------------------------------------------------
 1 | function [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest)
 2 | % 
 3 | % COMPUTETESTERRORWEAKCELL Compute the weak test error for data stored in a cell array.
 4 | % FORMAT
 5 | % DESC computes the test error for the weak generalization.
 6 | % ARG model : the model.
 7 | % ARG Y : the training data.
 8 | % ARG Ytest : the test data.
 9 | % RETURN L2_error : the l2 error.
10 | % RETURN NMAE_error : the NMAE error.
11 | %
12 | % 
13 | % COPYRIGHT : Raquel Urtasun, 2009
14 | 
15 | % COLLAB
16 |  
17 | val_L2 = 0;
18 | tot_L2 = 0;
19 | val_NMAE = 0;
20 | tot_NMAE = 0;
21 | val_round_NMAE = 0;
22 | tot_round_NMAE = 0;
23 | accum = [];
24 | 
25 | 
26 | 
27 | for i = 1:size(Y, 1)   
28 |   ind = Ytest{i,1};
29 |   elim = find(ind>size(model.X, 1));
30 |   tind = ind;
31 |   tind(elim) = [];
32 |   
33 |   if (length(ind)<1)
34 |     disp(['No test data for ',num2str(i),]);
35 |     continue;
36 |   end
37 |   [mu, varsig] = collabPosteriorMeanVarCell(model, Y{i,1}, double(Y{i,2}), model.X(tind, :));
38 |   % normalize the values
39 |   
40 |   a = double(Ytest{i,2}) - mu; 
41 |   %a = [a; Ytest(elim, i)];
42 |   val_L2 = val_L2 + a'*a;
43 |   tot_L2 = tot_L2 + length(a);
44 |   val_NMAE = val_NMAE + sum(abs(a));
45 |   tot_NMAE = tot_NMAE + length(a);
46 |   val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
47 |   tot_round_NMAE = tot_round_NMAE + length(a);
48 |   %accum = [accum; abs(a)];
49 | end
50 | L2_error = sqrt(val_L2/tot_L2);
51 | NMAE_error = (val_NMAE/tot_NMAE)/1.6;
52 | NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/matlab/demAistats1.m:
--------------------------------------------------------------------------------
 1 | % DEMAISTATS1 Try collaborative filtering on the Aistats Reviews
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | 
10 | dataSetName = 'aistats';
11 | [Y, void, Ytest] = collabLoadData(dataSetName);
12 | 
13 | numPapers = size(Y,1);
14 | numReviewers = size(Y,2);
15 | meanPapers = zeros(numPapers,1);
16 | stdPapers = ones(numPapers,1);
17 | 
18 | q = 2;
19 | options = collabOptions;
20 | model = collabCreate(q, size(Y, 2), Y, options);
21 | model.kern.comp{2}.variance = 0.11;
22 | model.kern.comp{3}.variance =  5; 
23 | options = collabOptimiseOptions;
24 | 
25 | % set parameters
26 | options.momentum = 0.9;
27 | options.learnRate = 0.0001;
28 | options.paramMomentum = 0.9;
29 | options.paramLearnRate = 0.0001;
30 | options.numIters = 20; % ??? put 10 back
31 | options.showLikelihood = true;
32 | 
33 | capName = dataSetName;
34 | capName(1) = upper(capName(1));
35 | options.saveName = ['dem' capName num2str(experimentNo)];
36 | 
37 | model.mu = meanPapers;
38 | model.sd = stdPapers;
39 | 
40 | model = collabOptimise(model, Y, options)
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/matlab/demEachMovie1.m:
--------------------------------------------------------------------------------
  1 | % DEMEACHMOVIE1 Try collaborative filtering on the EachMovie data with Marlin's partitions
  2 | % where the weak movielens experiment
  3 | 
  4 | % COLLAB
  5 | 
  6 | randn('seed', 1e5);
  7 | rand('seed', 1e5);
  8 | 
  9 | experimentNo = 3;
 10 | substract_mean = 0;
 11 | 
 12 | dataSetName = 'eachmovie_marlin_weak_1';
 13 | [Y, void, Ytest] = collabLoadData(dataSetName);
 14 | 
 15 | numFilms = size(Y,1);
 16 | numUsers = size(Y,2);
 17 | meanFilms = zeros(numFilms,1);
 18 | stdFilms = ones(numFilms,1);
 19 | if (substract_mean)
 20 |     % do for each film independently
 21 |     for i=1:numFilms
 22 |         % compute the mean and standard deviation of each film
 23 |         ind = find(Y(i,:));
 24 |         mean_v = sum(Y(i,ind));
 25 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 26 |         length_v = length(ind) + nnz(Ytest(i,:));
 27 |         mean_v = mean_v/length_v;
 28 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 29 |         Y(i,ind) = Y(i,ind) - mean_v;
 30 |         if (std_v>0) 
 31 |             Y(i,ind) = Y(i,ind)/std_v;
 32 |         end
 33 |         meanFilms(i) = mean_v;
 34 |         stdFilms(i) = std_v;
 35 |     end
 36 | end
 37 | 
 38 | q = 5;
 39 | options = collabOptions;
 40 | model = collabCreate(q, size(Y, 2), Y, options);
 41 | model.kern.comp{2}.variance = 0.11;
 42 | model.kern.comp{3}.variance =  5; 
 43 | options = collabOptimiseOptions;
 44 | 
 45 | % set parameters
 46 | options.momentum = 0.9;
 47 | options.learnRate = 0.0001;
 48 | options.paramMomentum = 0.9;
 49 | options.paramLearnRate = 0.0001;
 50 | options.numIters = 1; % ??? put 10 back
 51 | options.showLikelihood = false;
 52 | 
 53 | capName = dataSetName;
 54 | capName(1) = upper(capName(1));
 55 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
 56 | 
 57 | %%% ?? add the model.mu and model.sd
 58 | model.mu = meanFilms;
 59 | model.sd = stdFilms;
 60 | 
 61 | model = collabOptimise(model, Y, options)
 62 | 
 63 | % we have to divide the test data into two sets, train and test for the
 64 | % prediction. All but one are the train
 65 | 
 66 |   
 67 | 
 68 | 
 69 | disp('Computing test error');
 70 | 
 71 | % ????? this test is to be done
 72 | 
 73 | keyboard
 74 | 
 75 | % ??? check if the mean is substracted...
 76 | 
 77 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest);
 78 | 
 79 | % val_L2 = 0;
 80 | % tot_L2 = 0;
 81 | % val_NMAE = 0;
 82 | % tot_NMAE = 0;
 83 | % val_round_NMAE = 0;
 84 | % tot_round_NMAE = 0;
 85 | % 
 86 | % for i = 1:size(Y, 2)       
 87 | %     ind = find(Ytest(:, i));
 88 | %     elim = find(ind>size(model.X, 1));
 89 | %     tind = ind;
 90 | %     tind(elim) = [];
 91 | %     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
 92 | %     % normalize the values
 93 | % 
 94 | % 
 95 | %     mu = mu*model.sd(1);
 96 | %     mu = mu+model.mu(1);
 97 | %     a = Ytest(tind, i) - mu; 
 98 | %     a = [a; Ytest(elim, i)];
 99 | %     val_L2 = val_L2 + a'*a;
100 | %     tot_L2 = tot_L2 + length(a);
101 | %     val_NMAE = val_NMAE + sum(abs(a));
102 | %     tot_NMAE = tot_NMAE + length(a);
103 | %     val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
104 | %     tot_round_NMAE = tot_round_NMAE + length(a);
105 | % end
106 | % L2_error = sqrt(val_L2/tot_L2);
107 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6;
108 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
109 | 
110 | 
111 | % Save the results.
112 | capName = dataSetName;
113 | capName(1) = upper(capName(1));
114 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round');
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/matlab/demEachMovie7.m:
--------------------------------------------------------------------------------
  1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data.
  2 | % where the strong movielens experiment
  3 | 
  4 | randn('seed', 1e5);
  5 | rand('seed', 1e5);
  6 | 
  7 | experimentNo = 3;
  8 | substract_mean = 0;
  9 | 
 10 | dataSetName = 'eachmovie_weak_1';
 11 | [Y, void, Ytest] = collabLoadData(dataSetName);
 12 | 
 13 | numFilms = size(Y,1);
 14 | numUsers = size(Y,2);
 15 | meanFilms = zeros(numFilms,1);
 16 | stdFilms = ones(numFilms,1);
 17 | if (substract_mean)
 18 |     % do for each film independently
 19 |     for i=1:numFilms
 20 |         % compute the mean and standard deviation of each film
 21 |         ind = find(Y(i,:));
 22 |         mean_v = sum(Y(i,ind));
 23 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 24 |         length_v = length(ind) + nnz(Ytest(i,:));
 25 |         mean_v = mean_v/length_v;
 26 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 27 |         Y(i,ind) = Y(i,ind) - mean_v;
 28 |         if (std_v>0) 
 29 |             Y(i,ind) = Y(i,ind)/std_v;
 30 |         end
 31 |         meanFilms(i) = mean_v;
 32 |         stdFilms(i) = std_v;
 33 |     end
 34 | end
 35 | 
 36 | q = 5;
 37 | options = collabOptions;
 38 | model = collabCreate(q, size(Y, 2), Y, options);
 39 | model.kern.comp{2}.variance = 0.11;
 40 | model.kern.comp{3}.variance =  5; 
 41 | options = collabOptimiseOptions;
 42 | 
 43 | % set parameters
 44 | options.momentum = 0.9;
 45 | options.learnRate = 0.0001;
 46 | options.paramMomentum = 0.9;
 47 | options.paramLearnRate = 0.0001;
 48 | options.numIters = 1; % ??? put 10 back
 49 | options.showLikelihood = false;
 50 | 
 51 | capName = dataSetName;
 52 | capName(1) = upper(capName(1));
 53 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
 54 | 
 55 | %%% ?? add the model.mu and model.sd
 56 | model.mu = meanFilms;
 57 | model.sd = stdFilms;
 58 | 
 59 | model = collabOptimise(model, Y, options)
 60 | 
 61 | % we have to divide the test data into two sets, train and test for the
 62 | % prediction. All but one are the train
 63 | 
 64 |   
 65 | 
 66 | 
 67 | disp('Computing test error');
 68 | 
 69 | % ????? this test is to be done
 70 | 
 71 | keyboard
 72 | 
 73 | % ??? check if the mean is substracted...
 74 | 
 75 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest);
 76 | 
 77 | % val_L2 = 0;
 78 | % tot_L2 = 0;
 79 | % val_NMAE = 0;
 80 | % tot_NMAE = 0;
 81 | % val_round_NMAE = 0;
 82 | % tot_round_NMAE = 0;
 83 | % 
 84 | % for i = 1:size(Y, 2)       
 85 | %     ind = find(Ytest(:, i));
 86 | %     elim = find(ind>size(model.X, 1));
 87 | %     tind = ind;
 88 | %     tind(elim) = [];
 89 | %     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
 90 | %     % normalize the values
 91 | % 
 92 | % 
 93 | %     mu = mu*model.sd(1);
 94 | %     mu = mu+model.mu(1);
 95 | %     a = Ytest(tind, i) - mu; 
 96 | %     a = [a; Ytest(elim, i)];
 97 | %     val_L2 = val_L2 + a'*a;
 98 | %     tot_L2 = tot_L2 + length(a);
 99 | %     val_NMAE = val_NMAE + sum(abs(a));
100 | %     tot_NMAE = tot_NMAE + length(a);
101 | %     val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
102 | %     tot_round_NMAE = tot_round_NMAE + length(a);
103 | % end
104 | % L2_error = sqrt(val_L2/tot_L2);
105 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6;
106 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
107 | 
108 | 
109 | % Save the results.
110 | capName = dataSetName;
111 | capName(1) = upper(capName(1));
112 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round');
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/matlab/demEachMovieMarlinStrongScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demEachMovieMarlinStrongScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
  2 | % DEMEACHMOVIEMARLINSTRONGSCRIPT1 EachMovie strong generalization.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering with the RBF covariance function
  5 | % on the EachMovie data with Marlin's partitions for strong generalization.
  6 | % ARG  substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | randn('seed', 1e5);
 19 | rand('seed', 1e5);
 20 | 
 21 | experimentNo = 3;
 22 | 
 23 | 
 24 | %partNo_v = [1:5];
 25 | %latentDim_v = [5, 2:4, 6];
 26 | 
 27 | 
 28 | for i_latent=1:length(latentDim_v)
 29 |   q = latentDim_v(i_latent);
 30 |   for i_part=1:length(partNo_v)
 31 |     partNo = partNo_v(i_part);
 32 |     
 33 |     dataSetName = ['eachmovie_marlin_strong_',num2str(partNo)];
 34 |     
 35 |     disp(['Reading ... ',dataSetName]);
 36 |     
 37 |     [Y, lbls, Ytest] = collabLoadData(dataSetName);
 38 |     
 39 |     Ytraintest = lbls;
 40 |     
 41 |     if (inverted)
 42 |       Y = Y';
 43 |       Ytest = Ytest';
 44 |     end
 45 |     
 46 |     numFilms = size(Y,1);
 47 |     numUsers = size(Y,2);
 48 |     meanFilms = zeros(numFilms,1);
 49 |     stdFilms = ones(numFilms,1);
 50 |         
 51 |     if (substract_mean)
 52 |       if 0
 53 |         % this substract the global mean
 54 |         % create the total vector
 55 |         s = nonzeros(Ytest);
 56 |         ratings = [nonzeros(Y); nonzeros(Ytest)];
 57 |         meanY = mean(ratings);
 58 |         stdY = std(ratings);
 59 |         %keyboard;
 60 |         index = find(Y);
 61 |         %Y(index) = Y(index) - meanY;
 62 |         %Y(index) = Y(index) / stdY;
 63 |       else
 64 |         for i=1:numFilms
 65 |           % compute the mean and standard deviation of each film
 66 |           ind = find(Y(i,:));
 67 |           mean_v = sum(Y(i,ind));
 68 |           mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 69 |           length_v = length(ind) + nnz(Ytest(i,:));
 70 |           mean_v = mean_v/length_v;
 71 |           std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 72 |           %Y(i,ind) = Y(i,ind) - mean_v;
 73 |           %if (std_v>0) 
 74 |           %    Y(i,ind) = Y(i,ind)/std_v;
 75 |                     %end
 76 |           meanFilms(i) = mean_v;
 77 |           stdFilms(i) = std_v;
 78 |         end
 79 |       end
 80 |       %keyboard;
 81 |     end
 82 |     
 83 |     options = collabOptions;
 84 |     model = collabCreate(q, size(Y, 2), Y, options);
 85 |     % keyboard;
 86 |     if (substract_mean)
 87 |       if 0
 88 |         % this does the global mean
 89 |         model.mu = repmat(meanY,size(model.mu,1),1);
 90 |         model.sd = repmat(stdY,size(model.sd,1),1);
 91 |       else
 92 |         model.mu = meanFilms;
 93 |         model.sd = stdFilms;
 94 |       end
 95 |       
 96 |     end
 97 |     model.kern.comp{2}.variance = 0.11;
 98 |     model.kern.comp{3}.variance =  5; 
 99 |     options = collabOptimiseOptions;
100 |     
101 |     
102 |     % set parameters
103 |     options.momentum = 0.9;
104 |     options.learnRate = 0.0001;
105 |     options.paramMomentum = 0.9;
106 |     options.paramLearnRate = 0.0001;
107 |     options.numIters = iters;
108 |     options.showLikelihood = false;
109 |     
110 |     capName = dataSetName;
111 |     capName(1) = upper(capName(1));
112 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
113 |     
114 |     model = collabOptimise(model, Y, options)
115 |     
116 |     % compute the test error
117 |     disp('Computing test error');
118 |     
119 |     
120 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Ytraintest,Ytest)
121 | 
122 |     
123 |     % Save the results.
124 |     capName = dataSetName;
125 |     capName(1) = upper(capName(1));
126 |     
127 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
128 |     disp(['Saving ... ',saveResults]);
129 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
130 |   end
131 | end
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/matlab/demEachMovieMarlinWeakEnsemScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demEachMovieMarlinWeakEnsemScript1(substract_mean, partNo_v, latentDim_v,iters, inverted, type)
  2 | % DEMEACHMOVIEMARLINWEAKENSEMSCRIPT1 Ensemble of models on Marlin's weak Eachmovie partions.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering on the Eachmovie data with ensembles for
  5 | % Marlin's partitions for weak generalization.
  6 | % ARG substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | randn('seed', 1e5);
 19 | rand('seed', 1e5);
 20 | 
 21 | experimentNo = 3;
 22 | 
 23 | 
 24 | predictions = zeros(length(latentDim_v),length(partNo_v));
 25 | modelsActive = ones(length(latentDim_v),length(partNo_v));
 26 | 
 27 | %partNo_v = [1:5];
 28 | %latentDim_v = [5, 2:4, 6];
 29 | 
 30 | 
 31 | 
 32 | % for each partition load the data
 33 | for i_part=1:length(partNo_v)
 34 |   partNo = partNo_v(i_part);
 35 |   numActive = 0;
 36 |   allModels = [];
 37 |   
 38 |   dataSetName = ['eachmovie_marlin_',type,'_',num2str(partNo)];
 39 |   
 40 |   disp(['Reading ... ',dataSetName]);
 41 |   
 42 |   [Y, lbls, Ytest] = collabLoadData(dataSetName);
 43 |   
 44 |   if (inverted)
 45 |     Y = Y';
 46 |     Ytest = Ytest';
 47 |   end
 48 |         
 49 |   numFilms = size(Y,1);
 50 |   numUsers = size(Y,2);
 51 |   meanFilms = zeros(numFilms,1);
 52 |   stdFilms = ones(numFilms,1);
 53 |   
 54 |   if (substract_mean)
 55 |     if 0
 56 |       % this substract the global mean
 57 |       % create the total vector
 58 |       s = nonzeros(Ytest);
 59 |       ratings = [nonzeros(Y); nonzeros(Ytest)];
 60 |       meanY = mean(ratings);
 61 |       stdY = std(ratings);
 62 |       %keyboard;
 63 |       index = find(Y);
 64 |       %Y(index) = Y(index) - meanY;
 65 |       %Y(index) = Y(index) / stdY;
 66 |     else
 67 |       for i=1:numFilms
 68 |         % compute the mean and standard deviation of each film
 69 |         ind = find(Y(i,:));
 70 |         mean_v = sum(Y(i,ind));
 71 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 72 |         length_v = length(ind) + nnz(Ytest(i,:));
 73 |         mean_v = mean_v/length_v;
 74 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 75 |         %Y(i,ind) = Y(i,ind) - mean_v;
 76 |         %if (std_v>0) 
 77 |         %    Y(i,ind) = Y(i,ind)/std_v;
 78 |         %end
 79 |         meanFilms(i) = mean_v;
 80 |         stdFilms(i) = std_v;
 81 |       end
 82 |     end
 83 |     %keyboard;
 84 |   end
 85 |   
 86 |   for i_latent=1:length(latentDim_v)
 87 |     q = latentDim_v(i_latent);
 88 |     
 89 |     % load the model
 90 |     % Save the results.
 91 |     capName = dataSetName;
 92 |     capName(1) = upper(capName(1));
 93 |     
 94 |     loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
 95 |     disp(['Loading ... ',loadResults]);
 96 |     try
 97 |       load(loadResults);
 98 |     catch
 99 |       disp(['Model not found ',loadResults]);
100 |       %keyboard;
101 |       continue;
102 |     end
103 |     numActive = numActive + 1;
104 |     allModels{numActive} = model;
105 |     
106 |     
107 |     %modelsActive(q) = 1;
108 |   end
109 |   
110 |   
111 |   %%%%%%%%
112 |   % compute the test error
113 |   disp('Computing test error');
114 |   
115 |   % compute the test error for ensembles of models
116 |   
117 |   if strcmp(type,'weak')
118 |     
119 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest)
120 |   else if strcmp(type,'strong')
121 |       
122 |       [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,lbls,Ytest)
123 |   end
124 | end
125 | 
126 | %[mu] = computePredictionsErrorWeak(model,Y,Ytest)
127 | 
128 | % Save the results.
129 | capName = dataSetName;
130 | capName(1) = upper(capName(1));
131 | 
132 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(partNo),'_iters_',num2str(iters),'_ensembles.mat'];
133 | disp(['Saving ... ',saveResults]);
134 | save(saveResults, 'allModels', 'L2_error','NMAE_error','NMAE_round_error','modelsActive');
135 |     end
136 |   
137 |   
138 | 
139 | 


--------------------------------------------------------------------------------
/matlab/demEachMovieMarlinWeakScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demEachMovieMarlinWeakScript1(substract_mean, partNo_v, latentDim_v,iters,inverted)
  2 | % DEMEACHMOVIEMARLINWEAKSCRIPT1 RBF covariance on Marlin's weak Eachmovie partitions.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering with the RBF covariance 
  5 | % on the Eachmovie data for Marlin's partitions for weak generalization.
  6 | % ARG  substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | 
 19 | 
 20 | randn('seed', 1e5);
 21 | rand('seed', 1e5);
 22 | 
 23 | experimentNo = 3;
 24 | 
 25 | 
 26 | %partNo_v = [1:5];
 27 | %latentDim_v = [5, 2:4, 6];
 28 | 
 29 | 
 30 | for i_latent=1:length(latentDim_v)
 31 |   q = latentDim_v(i_latent);
 32 |   for i_part=1:length(partNo_v)
 33 |     partNo = partNo_v(i_part);
 34 |     
 35 |     
 36 |     dataSetName = ['eachmovie_marlin_weak_',num2str(partNo)];
 37 |     
 38 |     disp(['Reading ... ',dataSetName]);
 39 |     
 40 |     [Y, void, Ytest] = collabLoadData(dataSetName);
 41 |     
 42 |     if (inverted)
 43 |       Y = Y';
 44 |       Ytest = Y';
 45 |       
 46 |     end
 47 | 
 48 |     numFilms = size(Y,1);
 49 |     numUsers = size(Y,2);
 50 |     meanFilms = zeros(numFilms,1);
 51 |     stdFilms = ones(numFilms,1);
 52 |     
 53 |     %if (substract_mean)
 54 |     if 0
 55 |       % this substract the global mean
 56 |       % create the total vector
 57 |       s = nonzeros(Ytest);
 58 |       ratings = [nonzeros(Y); nonzeros(Ytest)];
 59 |       meanY = mean(ratings);
 60 |       stdY = std(ratings);
 61 |       %keyboard;
 62 |       index = find(Y);
 63 |       %Y(index) = Y(index) - meanY;
 64 |       %Y(index) = Y(index) / stdY;
 65 |     else
 66 |       for i=1:numFilms
 67 |         % compute the mean and standard deviation of each film
 68 |         ind = find(Y(i,:));
 69 |         mean_v = sum(Y(i,ind));
 70 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 71 |         length_v = length(ind) + nnz(Ytest(i,:));
 72 |         mean_v = mean_v/length_v;
 73 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 74 |         %Y(i,ind) = Y(i,ind) - mean_v;
 75 |         %if (std_v>0) 
 76 |         %    Y(i,ind) = Y(i,ind)/std_v;
 77 |         %end
 78 |         meanFilms(i) = mean_v;
 79 |         stdFilms(i) = std_v;
 80 |       end
 81 |     end
 82 |     %keyboard;
 83 |     %end
 84 |     
 85 | 
 86 |     options = collabOptions;
 87 |     model = collabCreate(q, size(Y, 2), Y, options);
 88 |     % keyboard;
 89 |     if (substract_mean)
 90 |       if 0
 91 |         % this does the global mean
 92 |         model.mu = repmat(meanY,size(model.mu,1),1);
 93 |         model.sd = repmat(stdY,size(model.sd,1),1);
 94 |       else
 95 |         model.mu = meanFilms;
 96 |         model.sd = stdFilms;
 97 |       end
 98 |       
 99 |     end
100 |     model.kern.comp{2}.variance = 0.11;
101 |     model.kern.comp{3}.variance =  5; 
102 |     options = collabOptimiseOptions;
103 |     
104 |     
105 |     % set parameters
106 |     options.momentum = 0.9;
107 |     options.learnRate = 0.0001;
108 |     options.paramMomentum = 0.9;
109 |     options.paramLearnRate = 0.0001;
110 |     options.numIters = iters;
111 |     options.showLikelihood = false;
112 |     
113 |     capName = dataSetName;
114 |     capName(1) = upper(capName(1));
115 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
116 |     
117 |     ind = find(model.sd==0);
118 |     model.sd(ind) = 1;
119 |     
120 |     
121 |     model = collabOptimise(model, Y, options)
122 |     
123 |     % compute the test error
124 |     disp('Computing test error');
125 |     
126 |     
127 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest)
128 |     
129 |     
130 |     % Save the results.
131 |     capName = dataSetName;
132 |     capName(1) = upper(capName(1));
133 |     
134 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
135 |     disp(['Saving ... ',saveResults]);
136 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
137 |   end
138 | end
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/matlab/demMixtoydata1.m:
--------------------------------------------------------------------------------
 1 | % DEMMIXTOYDATA1 Demonstrate model on toy data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | dataSetName = 'mixtoydata';
 6 | [Y, lbls, Ytest, X] = collabLoadData(dataSetName);
 7 | 
 8 | q = 2;
 9 | options = collabOptions;
10 | options.kern = {'rbf', 'bias'}
11 | options.numComps = 2;
12 | model = collabCreate(q, size(Y, 2), Y, options);
13 | options = collabOptimiseOptions();
14 | options.momentum = 0.9;
15 | options.learnRate = 0.0001;
16 | options.paramMomentum = 0.9;
17 | options.paramLearnRate = 0.0001;
18 | options.numIters = 5;
19 | model = collabOptimise(model, Y, options);
20 | %model.X = X;
21 | %model.kern.comp{1}.variance = 1;
22 | %model.kern.comp{2}.variance = 0.4;
23 | %model.sigma2 = 0.4;  
24 | %model = collabEstep(model, 100);
25 | 


--------------------------------------------------------------------------------
/matlab/demMovieLens10MLetterWeakScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demMovieLens10MLetterWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
  2 | 
  3 | % DEMMOVIELENS10MLETTERWEAKSCRIPT1 Try collaborative filtering on the 10M movielens data set.
  4 | % FORMAT
  5 | % DESC run a script on the 10M movielens data.
  6 | % ARG  substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB 
 17 |   
 18 | randn('seed', 1e5);
 19 | rand('seed', 1e5);
 20 | 
 21 | experimentNo = 3;
 22 | 
 23 | partLetter_v = 'ab';
 24 | 
 25 | %partNo_v = [1:5];
 26 | %latentDim_v = [5, 2:4, 6];
 27 | 
 28 | 
 29 | for i_latent=1:length(latentDim_v)
 30 |   q = latentDim_v(i_latent);
 31 |   for i_part=1:length(partNo_v)
 32 |     partLetter = partLetter_v(partNo_v(i_part));
 33 |     
 34 |     dataSetName = ['movielens_10M_',partLetter];
 35 |     
 36 |     disp(['Reading ... ',dataSetName]);
 37 |     
 38 |     [Y, void, Ytest] = collabLoadData(dataSetName);
 39 |     
 40 |     if (inverted)
 41 |       Y = Y';
 42 |       Ytest = Ytest';
 43 |     end
 44 |     
 45 |     numFilms = size(Y,1);
 46 |     numUsers = size(Y,2);
 47 |     meanFilms = zeros(numFilms,1);
 48 |     stdFilms = ones(numFilms,1);
 49 |     
 50 |     if (substract_mean)
 51 |       if 0
 52 |         % this substract the global mean
 53 |         % create the total vector
 54 |         s = nonzeros(Ytest);
 55 |         ratings = [nonzeros(Y); nonzeros(Ytest)];
 56 |         meanY = mean(ratings);
 57 |         stdY = std(ratings);
 58 |         %keyboard;
 59 |         index = find(Y);
 60 |         %Y(index) = Y(index) - meanY;
 61 |         %Y(index) = Y(index) / stdY;
 62 |       else
 63 |         for i=1:numFilms
 64 |           % compute the mean and standard deviation of each film
 65 |           ind = find(Y(i,:));
 66 |           mean_v = sum(Y(i,ind));
 67 |           mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 68 |           length_v = length(ind) + nnz(Ytest(i,:));
 69 |           mean_v = mean_v/length_v;
 70 |           std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 71 |           %Y(i,ind) = Y(i,ind) - mean_v;
 72 |           %if (std_v>0) 
 73 |           %    Y(i,ind) = Y(i,ind)/std_v;
 74 |           %end
 75 |           meanFilms(i) = mean_v;
 76 |           stdFilms(i) = std_v;
 77 |         end
 78 |       end
 79 |       %keyboard;
 80 |     end
 81 |     
 82 |     options = collabOptions;
 83 |     model = collabCreate(q, size(Y, 2), Y, options);
 84 |     % keyboard;
 85 |     if (substract_mean)
 86 |       if 0
 87 |         % this does the global mean
 88 |         model.mu = repmat(meanY,size(model.mu,1),1);
 89 |         model.sd = repmat(stdY,size(model.sd,1),1);
 90 |       else
 91 |         model.mu = meanFilms;
 92 |         model.sd = stdFilms;
 93 |       end
 94 |       
 95 |     end
 96 |     model.kern.comp{2}.variance = 0.11;
 97 |     model.kern.comp{3}.variance =  5; 
 98 |     options = collabOptimiseOptions;
 99 |     
100 |     
101 |     % set parameters
102 |     options.momentum = 0.9;
103 |     options.learnRate = 0.0001;
104 |     options.paramMomentum = 0.9;
105 |     options.paramLearnRate = 0.0001;
106 |     options.numIters = iters;
107 |     options.showLikelihood = false;
108 |     
109 |     capName = dataSetName;
110 |     capName(1) = upper(capName(1));
111 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
112 |     
113 |     model = collabOptimise(model, Y, options)
114 |     
115 |     capName = dataSetName;
116 |     capName(1) = upper(capName(1));
117 |     
118 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',partLetter,'_iters_',num2str(iters),'.mat'];
119 |     disp(['Saving ... ',saveResults]);
120 |     
121 |     save(saveResults, 'model', 'options');
122 |     
123 |     
124 |     % compute the test error
125 |     disp('Computing test error');
126 |     
127 |     
128 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest)
129 |     
130 |     
131 |     % Save the results.
132 |     disp(['Saving ... ',saveResults]);
133 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
134 |   end
135 | end
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/matlab/demMovieLens10MWeakScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demMovieLens10MWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
  2 | % DEMMOVIELENS10MWEAKSCRIPT1 Try collaborative filtering on the 10M movielens data set.
  3 | % FORMAT
  4 | % DESC run a script on the 10M movielens data.
  5 | % ARG  substract_mean : bool if substract the mean.
  6 | % ARG partNo :  vector with the partitions to compute results.
  7 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  8 | % ARG iters : number of iterations.
  9 | % ARG inverted : if true, then learn users as examples and not items.
 10 | %
 11 | % SEEALSO collabCreate, collabOptimise
 12 | %
 13 | % COPYRIGHT : Raquel Urtasun, 2009
 14 | 
 15 | % COLLAB
 16 |   
 17 | randn('seed', 1e5);
 18 | rand('seed', 1e5);
 19 | 
 20 | experimentNo = 3;
 21 | 
 22 | 
 23 | %partNo_v = [1:5];
 24 | %latentDim_v = [5, 2:4, 6];
 25 | 
 26 | 
 27 | for i_latent=1:length(latentDim_v)
 28 |   q = latentDim_v(i_latent);
 29 |   for i_part=1:length(partNo_v)
 30 |     partNo = partNo_v(i_part);
 31 |     
 32 |     dataSetName = ['movielens_10M_',num2str(partNo)];
 33 |     
 34 |     disp(['Reading ... ',dataSetName]);
 35 |     
 36 |     [Y, void, Ytest] = collabLoadData(dataSetName);
 37 |     
 38 |     if (inverted)
 39 |       Y = Y';
 40 |       Ytest = Ytest';
 41 |     end
 42 |     
 43 |     numFilms = size(Y,1);
 44 |     numUsers = size(Y,2);
 45 |     meanFilms = zeros(numFilms,1);
 46 |     stdFilms = ones(numFilms,1);
 47 |     
 48 |     if (substract_mean)
 49 |       if 0
 50 |         % this substract the global mean
 51 |         % create the total vector
 52 |         s = nonzeros(Ytest);
 53 |         ratings = [nonzeros(Y); nonzeros(Ytest)];
 54 |         meanY = mean(ratings);
 55 |         stdY = std(ratings);
 56 |         %keyboard;
 57 |         index = find(Y);
 58 |         %Y(index) = Y(index) - meanY;
 59 |         %Y(index) = Y(index) / stdY;
 60 |       else
 61 |         for i=1:numFilms
 62 |           % compute the mean and standard deviation of each film
 63 |           ind = find(Y(i,:));
 64 |           mean_v = sum(Y(i,ind));
 65 |           mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 66 |           length_v = length(ind) + nnz(Ytest(i,:));
 67 |           mean_v = mean_v/length_v;
 68 |           std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 69 |           %Y(i,ind) = Y(i,ind) - mean_v;
 70 |           %if (std_v>0) 
 71 |           %    Y(i,ind) = Y(i,ind)/std_v;
 72 |           %end
 73 |           meanFilms(i) = mean_v;
 74 |           stdFilms(i) = std_v;
 75 |         end
 76 |       end
 77 |       %keyboard;
 78 |     end
 79 |     
 80 |     options = collabOptions;
 81 |     model = collabCreate(q, size(Y, 2), Y, options);
 82 |     % keyboard;
 83 |     if (substract_mean)
 84 |       if 0
 85 |         % this does the global mean
 86 |         model.mu = repmat(meanY,size(model.mu,1),1);
 87 |         model.sd = repmat(stdY,size(model.sd,1),1);
 88 |       else
 89 |         model.mu = meanFilms;
 90 |         model.sd = stdFilms;
 91 |       end
 92 |       
 93 |     end
 94 |     model.kern.comp{2}.variance = 0.11;
 95 |     model.kern.comp{3}.variance =  5; 
 96 |     options = collabOptimiseOptions;
 97 |     
 98 |     
 99 |     % set parameters
100 |     options.momentum = 0.9;
101 |     options.learnRate = 0.0001;
102 |     options.paramMomentum = 0.9;
103 |     options.paramLearnRate = 0.0001;
104 |     options.numIters = iters;
105 |     options.showLikelihood = false;
106 |     
107 |     capName = dataSetName;
108 |     capName(1) = upper(capName(1));
109 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
110 |     
111 |     model = collabOptimise(model, Y, options)
112 |     
113 |     % compute the test error
114 |     disp('Computing test error');
115 |     
116 |     
117 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeakCell(model,Y,Ytest)
118 |     
119 |     
120 |     % Save the results.
121 |     capName = dataSetName;
122 |     capName(1) = upper(capName(1));
123 |     
124 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
125 |     disp(['Saving ... ',saveResults]);
126 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
127 |   end
128 | end
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/matlab/demMovieLensMarlinStrongScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demMovieLensMarlinStrongScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
  2 | % DEMMOVIELENSMARLINSTRONGSCRIPT1 Movielens strong generalization.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering with the RBF covariance function
  5 | % on the Movielens data with Marlin's partitions for strong generalization.
  6 | % ARG  substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | randn('seed', 1e5);
 19 | rand('seed', 1e5);
 20 |   
 21 | experimentNo = 3;
 22 |   
 23 |   
 24 | %partNo_v = [1:5];
 25 | %latentDim_v = [5, 2:4, 6];
 26 | 
 27 | 
 28 | for i_latent=1:length(latentDim_v)
 29 |   q = latentDim_v(i_latent);
 30 |   for i_part=1:length(partNo_v)
 31 |     partNo = partNo_v(i_part);
 32 |     
 33 |     dataSetName = ['movielens_marlin_strong_',num2str(partNo)];
 34 |     
 35 |     disp(['Reading ... ',dataSetName]);
 36 |     
 37 |     [Y, lbls, Ytest] = collabLoadData(dataSetName);
 38 |     
 39 |     Ytraintest = lbls;
 40 |     
 41 |     if (inverted)
 42 |       Y = Y';
 43 |       Ytest = Ytest';
 44 |     end
 45 |     
 46 |     numFilms = size(Y,1);
 47 |     numUsers = size(Y,2);
 48 |     meanFilms = zeros(numFilms,1);
 49 |     stdFilms = ones(numFilms,1);
 50 |     
 51 |     if (substract_mean)
 52 |       if 0
 53 |         % this substract the global mean
 54 |         % create the total vector
 55 |         s = nonzeros(Ytest);
 56 |         ratings = [nonzeros(Y); nonzeros(Ytest)];
 57 |         meanY = mean(ratings);
 58 |         stdY = std(ratings);
 59 |         %keyboard;
 60 |         index = find(Y);
 61 |         %Y(index) = Y(index) - meanY;
 62 |         %Y(index) = Y(index) / stdY;
 63 |       else
 64 |         for i=1:numFilms
 65 |           % compute the mean and standard deviation of each film
 66 |           ind = find(Y(i,:));
 67 |           mean_v = sum(Y(i,ind));
 68 |           mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 69 |           length_v = length(ind) + nnz(Ytest(i,:));
 70 |           mean_v = mean_v/length_v;
 71 |           std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 72 |           %Y(i,ind) = Y(i,ind) - mean_v;
 73 |           %if (std_v>0) 
 74 |           %    Y(i,ind) = Y(i,ind)/std_v;
 75 |           %end
 76 |           meanFilms(i) = mean_v;
 77 |           stdFilms(i) = std_v;
 78 |         end
 79 |       end
 80 |       %keyboard;
 81 |     end
 82 |     
 83 |     options = collabOptions;
 84 |     model = collabCreate(q, size(Y, 2), Y, options);
 85 |     % keyboard;
 86 |     if (substract_mean)
 87 |       if 0
 88 |         % this does the global mean
 89 |         model.mu = repmat(meanY,size(model.mu,1),1);
 90 |         model.sd = repmat(stdY,size(model.sd,1),1);
 91 |       else
 92 |         model.mu = meanFilms;
 93 |         model.sd = stdFilms;
 94 |       end
 95 |       
 96 |     end
 97 |     model.kern.comp{2}.variance = 0.11;
 98 |     model.kern.comp{3}.variance =  5; 
 99 |     options = collabOptimiseOptions;
100 |     
101 |     
102 |     % set parameters
103 |     options.momentum = 0.9;
104 |     options.learnRate = 0.0001;
105 |     options.paramMomentum = 0.9;
106 |     options.paramLearnRate = 0.0001;
107 |     options.numIters = iters;
108 |     options.showLikelihood = false;
109 |     
110 |     capName = dataSetName;
111 |     capName(1) = upper(capName(1));
112 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
113 |     
114 |     model = collabOptimise(model, Y, options)
115 |     
116 |     % compute the test error
117 |     disp('Computing test error');
118 |     
119 |     keyboard;
120 |     
121 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Ytraintest,Ytest)
122 | 
123 |     
124 |     % Save the results.
125 |     capName = dataSetName;
126 |     capName(1) = upper(capName(1));
127 |     
128 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
129 |     disp(['Saving ... ',saveResults]);
130 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
131 |   end
132 | end
133 | 
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/matlab/demMovieLensMarlinWeakEnsemScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demMovieLensMarlinWeakEnsemScript1(substract_mean, partNo_v, latentDim_v,iters, inverted, type)
  2 | % DEMMOVIELENSMARLINWEAKENSEMSCRIPT1 Ensemble of models on Marlin's weak Movielens partions.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering on the Movielens data with ensembles for
  5 | % Marlin's partitions for weak generalization.
  6 | % ARG substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | randn('seed', 1e5);
 19 | rand('seed', 1e5);
 20 | 
 21 | experimentNo = 3;
 22 | 
 23 | 
 24 | predictions = zeros(length(latentDim_v),length(partNo_v));
 25 | modelsActive = ones(length(latentDim_v),length(partNo_v));
 26 | 
 27 | %partNo_v = [1:5];
 28 | %latentDim_v = [5, 2:4, 6];
 29 | 
 30 | 
 31 | 
 32 | % for each partition load the data
 33 | for i_part=1:length(partNo_v)
 34 |   partNo = partNo_v(i_part);
 35 |   numActive = 0;
 36 |   allModels = [];
 37 |   
 38 |   dataSetName = ['movielens_marlin_',type,'_',num2str(partNo)];
 39 |   
 40 |   disp(['Reading ... ',dataSetName]);
 41 |   
 42 |   [Y, lbls, Ytest] = collabLoadData(dataSetName);
 43 |   
 44 |   if (inverted)
 45 |     Y = Y';
 46 |     Ytest = Ytest';
 47 |   end
 48 |   
 49 |   numFilms = size(Y,1);
 50 |   numUsers = size(Y,2);
 51 |   meanFilms = zeros(numFilms,1);
 52 |   stdFilms = ones(numFilms,1);
 53 |   
 54 |   if (substract_mean)
 55 |     if 0
 56 |       % this substract the global mean
 57 |       % create the total vector
 58 |       s = nonzeros(Ytest);
 59 |       ratings = [nonzeros(Y); nonzeros(Ytest)];
 60 |       meanY = mean(ratings);
 61 |       stdY = std(ratings);
 62 |       %keyboard;
 63 |       index = find(Y);
 64 |       Y(index) = Y(index) - meanY;
 65 |       Y(index) = Y(index) / stdY;
 66 |     else
 67 |       for i=1:numFilms
 68 |         % compute the mean and standard deviation of each film
 69 |         ind = find(Y(i,:));
 70 |         mean_v = sum(Y(i,ind));
 71 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 72 |         length_v = length(ind) + nnz(Ytest(i,:));
 73 |         mean_v = mean_v/length_v;
 74 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 75 |         Y(i,ind) = Y(i,ind) - mean_v;
 76 |         if (std_v>0) 
 77 |           Y(i,ind) = Y(i,ind)/std_v;
 78 |         end
 79 |         meanFilms(i) = mean_v;
 80 |         stdFilms(i) = std_v;
 81 |       end
 82 |     end
 83 |     %keyboard;
 84 |   end
 85 |   
 86 |   for i_latent=1:length(latentDim_v)
 87 |     q = latentDim_v(i_latent);
 88 |     
 89 |     % load the model
 90 |     % Save the results.
 91 |     capName = dataSetName;
 92 |     capName(1) = upper(capName(1));
 93 |     
 94 |     loadResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
 95 |     disp(['Loading ... ',loadResults]);
 96 |     try
 97 |       load(loadResults);
 98 |     catch
 99 |       disp(['Model not found ',loadResults]);
100 |       %keyboard;
101 |       continue;
102 |     end
103 |     numActive = numActive + 1;
104 |     allModels{numActive} = model;
105 |     
106 |     
107 |     %modelsActive(q) = 1;
108 |   end
109 |   
110 |   
111 |   %%%%%%%%
112 |   % compute the test error
113 |   disp('Computing test error');
114 |   
115 |   % compute the test error for ensembles of models
116 |   
117 |   if strcmp(type,'weak')
118 |     
119 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,Y,Ytest)
120 |   elseif strcmp(type,'strong')
121 |     
122 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorEnsemblesWeak(allModels,lbls,Ytest)
123 |   end
124 | end
125 | 
126 | %[mu] = computePredictionsErrorWeak(model,Y,Ytest)
127 | 
128 | % Save the results.
129 | capName = dataSetName;
130 | capName(1) = upper(capName(1));
131 | 
132 | saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(partNo),'_iters_',num2str(iters),'_ensembles.mat'];
133 | disp(['Saving ... ',saveResults]);
134 | save(saveResults, 'allModels', 'L2_error','options','NMAE_error','NMAE_round_error','modelsActive');
135 | end
136 |     
137 |   
138 | 
139 | 


--------------------------------------------------------------------------------
/matlab/demMovieLensMarlinWeakScript1.m:
--------------------------------------------------------------------------------
  1 | function [] = demMovieLensMarlinWeakScript1(substract_mean, partNo_v, latentDim_v,iters, inverted)
  2 | % DEMMOVIELENSMARLINWEAKSCRIPT1 RBF covariance on Marlin's weak Movielens partitions.
  3 | % FORMAT
  4 | % DESC Try collaborative filtering with the RBF covariance 
  5 | % on the Movielens data for Marlin's partitions for weak generalization.
  6 | % ARG  substract_mean : bool if substract the mean.
  7 | % ARG partNo :  vector with the partitions to compute results.
  8 | % ARG latentDim_v : vector with the latent dimensionalities to compute results.
  9 | % ARG iters : number of iterations.
 10 | % ARG inverted : if true, then learn users as examples and not items.
 11 | %
 12 | % SEEALSO collabCreate, collabOptimise
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 | 
 19 | randn('seed', 1e5);
 20 | rand('seed', 1e5);
 21 | 
 22 | experimentNo = 3;
 23 | 
 24 | 
 25 | %partNo_v = [1:5];
 26 | %latentDim_v = [5, 2:4, 6];
 27 | 
 28 | 
 29 | for i_latent=1:length(latentDim_v)
 30 |   q = latentDim_v(i_latent);
 31 |   for i_part=1:length(partNo_v)
 32 |     partNo = partNo_v(i_part);
 33 |     
 34 |     dataSetName = ['movielens_marlin_weak_',num2str(partNo)];
 35 |     
 36 |     disp(['Reading ... ',dataSetName]);
 37 |     
 38 |     [Y, void, Ytest] = collabLoadData(dataSetName);
 39 |     
 40 |     if (inverted)
 41 |       Y = Y';
 42 |       Ytest = Ytest';
 43 |     end
 44 |     
 45 |     numFilms = size(Y,1);
 46 |     numUsers = size(Y,2);
 47 |     meanFilms = zeros(numFilms,1);
 48 |     stdFilms = ones(numFilms,1);
 49 |     
 50 |     if (substract_mean)
 51 |       if 0
 52 |         % this substract the global mean
 53 |         % create the total vector
 54 |         s = nonzeros(Ytest);
 55 |         ratings = [nonzeros(Y); nonzeros(Ytest)];
 56 |         meanY = mean(ratings);
 57 |         stdY = std(ratings);
 58 |         %keyboard;
 59 |         index = find(Y);
 60 |         Y(index) = Y(index) - meanY;
 61 |         Y(index) = Y(index) / stdY;
 62 |       else
 63 |         for i=1:numFilms
 64 |           % compute the mean and standard deviation of each film
 65 |           ind = find(Y(i,:));
 66 |           mean_v = sum(Y(i,ind));
 67 |           mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 68 |           length_v = length(ind) + nnz(Ytest(i,:));
 69 |           mean_v = mean_v/length_v;
 70 |           std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 71 |           Y(i,ind) = Y(i,ind) - mean_v;
 72 |           if (std_v>0) 
 73 |             Y(i,ind) = Y(i,ind)/std_v;
 74 |           end
 75 |           meanFilms(i) = mean_v;
 76 |           stdFilms(i) = std_v;
 77 |         end
 78 |       end
 79 |       %keyboard;
 80 |     end
 81 |     
 82 |     options = collabOptions;
 83 |     model = collabCreate(q, size(Y, 2), Y, options);
 84 |     % keyboard;
 85 |     if (substract_mean)
 86 |       if 0
 87 |         % this does the global mean
 88 |         model.mu = repmat(meanY,size(model.mu,1),1);
 89 |         model.sd = repmat(stdY,size(model.sd,1),1);
 90 |       else
 91 |         model.mu = meanFilms;
 92 |         model.sd = stdFilms;
 93 |       end
 94 |       
 95 |     end
 96 |     model.kern.comp{2}.variance = 0.11;
 97 |     model.kern.comp{3}.variance =  5; 
 98 |     options = collabOptimiseOptions;
 99 |     
100 |     
101 |     % set parameters
102 |     options.momentum = 0.9;
103 |     options.learnRate = 0.0001;
104 |     options.paramMomentum = 0.9;
105 |     options.paramLearnRate = 0.0001;
106 |     options.numIters = iters;
107 |     options.showLikelihood = false;
108 |     
109 |     capName = dataSetName;
110 |     capName(1) = upper(capName(1));
111 |     options.saveName = ['dem' capName num2str(experimentNo) '_'];
112 |     
113 |     model = collabOptimise(model, Y, options)
114 |     
115 |     % compute the test error
116 |     disp('Computing test error');
117 |     
118 |     
119 |     [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest)
120 |     
121 | 
122 |     % Save the results.
123 |     capName = dataSetName;
124 |     capName(1) = upper(capName(1));
125 |     
126 |     saveResults = [capName,'inverted_',num2str(inverted),'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
127 |     disp(['Saving ... ',saveResults]);
128 |     save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
129 |   end
130 | end
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/matlab/demMovielens1.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS1 Try collaborative filtering on the large movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | 
10 | dataSetName = 'movielens';
11 | [Y, void, Ytest] = collabLoadData(dataSetName);
12 | 
13 | q = 3;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 2), Y, options);
16 | model.kern.comp{2}.variance = 0.11;
17 | model.kern.comp{3}.variance =  5; 
18 | options = collabOptimiseOptions;
19 | options.numIters = 30;
20 | options.showLikelihood = false;
21 | model = collabOptimise(model, Y, options)
22 |   
23 | val = 0;
24 | tot = 0;
25 | for i = 1:size(Y, 2)       
26 |   ind = find(Ytest(:, i));
27 |   elim = find(ind>size(model.X, 1));
28 |   tind = ind;
29 |   tind(elim) = [];
30 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
31 |   a = Ytest(tind, i) - mu; 
32 |   a = [a; Ytest(elim, i)];
33 |   val = val + a'*a;
34 |   tot = tot + length(a);
35 | end
36 | error = sqrt(val/tot);
37 | 
38 | % Save the results.
39 | capName = dataSetName;
40 | capName(1) = upper(capName(1));
41 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error');
42 | 


--------------------------------------------------------------------------------
/matlab/demMovielens2.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS2 Try collaborative filtering on the large movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 2;
 9 | 
10 | dataSetName = 'movielens';
11 | [Y, void, Ytest] = collabLoadData(dataSetName);
12 | 
13 | q = 4;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 2), Y, options);
16 | model.kern.comp{2}.variance = 0.11;
17 | model.kern.comp{3}.variance =  5; 
18 | options = collabOptimiseOptions;
19 | options.numIters = 10;
20 | options.showLikelihood = false;
21 | model = collabOptimise(model, Y, options)
22 |   
23 | val = 0;
24 | tot = 0;
25 | for i = 1:size(Y, 2)       
26 |   ind = find(Ytest(:, i));
27 |   elim = find(ind>size(model.X, 1));
28 |   tind = ind;
29 |   tind(elim) = [];
30 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
31 |   a = Ytest(tind, i) - mu; 
32 |   a = [a; Ytest(elim, i)];
33 |   val = val + a'*a;
34 |   tot = tot + length(a);
35 | end
36 | error = sqrt(val/tot);
37 | 
38 | % Save the results.
39 | capName = dataSetName;
40 | capName(1) = upper(capName(1));
41 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error');
42 | 


--------------------------------------------------------------------------------
/matlab/demMovielens3.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS3 Try collaborative filtering on the large movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 3;
 9 | 
10 | dataSetName = 'movielens';
11 | [Y, void, Ytest] = collabLoadData(dataSetName);
12 | 
13 | q = 5;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 2), Y, options);
16 | model.kern.comp{2}.variance = 0.11;
17 | model.kern.comp{3}.variance =  5; 
18 | options = collabOptimiseOptions;
19 | 
20 | % set parameters
21 | options.momentum = 0.9;
22 | options.learnRate = 0.0001;
23 | options.paramMomentum = 0.9;
24 | options.paramLearnRate = 0.0001;
25 | options.numIters = 10;
26 | options.showLikelihood = false;
27 | 
28 | capName = dataSetName;
29 | capName(1) = upper(capName(1));
30 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
31 | 
32 | model = collabOptimise(model, Y, options)
33 |   
34 | val = 0;
35 | tot = 0;
36 | for i = 1:size(Y, 2)       
37 |   ind = find(Ytest(:, i));
38 |   elim = find(ind>size(model.X, 1));
39 |   tind = ind;
40 |   tind(elim) = [];
41 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
42 |   a = Ytest(tind, i) - mu; 
43 |   a = [a; Ytest(elim, i)];
44 |   val = val + a'*a;
45 |   tot = tot + length(a);
46 | end
47 | error_L2 = sqrt(val/tot);
48 | 
49 | % compute NMAE
50 | val = 0;
51 | tot = 0;
52 | for i = 1:size(Y, 2)       
53 |   ind = find(Ytest(:, i));
54 |   elim = find(ind>size(model.X, 1));
55 |   tind = ind;
56 |   tind(elim) = [];
57 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
58 |   a = Ytest(tind, i) - mu; 
59 |   a = [a; Ytest(elim, i)];
60 |   val = val + sum(abs(a));
61 |   tot = tot + length(a);
62 | end
63 | error_NMAE = (val/tot)/1.6;
64 | 
65 | % round NMAE
66 | val = 0;
67 | tot = 0;
68 | for i = 1:size(Y, 2)       
69 |   ind = find(Ytest(:, i));
70 |   elim = find(ind>size(model.X, 1));
71 |   tind = ind;
72 |   tind(elim) = [];
73 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
74 |   a = Ytest(tind, i) - mu; 
75 |   a = [a; Ytest(elim, i)];
76 |   val = val + sum(abs(round(a)));
77 |   tot = tot + length(a);
78 | end
79 | error_NMAE_round = (val/tot)/1.6;
80 | 
81 | 
82 | % Save the results.
83 | capName = dataSetName;
84 | capName(1) = upper(capName(1));
85 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error');
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/matlab/demMovielens4.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS4 Try collaborative filtering on the large movielens data.
 2 | % try different kernels
 3 | 
 4 | % COLLAB
 5 | 
 6 | randn('seed', 1e5);
 7 | rand('seed', 1e5);
 8 | 
 9 | experimentNo = 3;
10 | 
11 | dataSetName = 'movielens';
12 | [Y, lbls, Ytest] = collabLoadData(dataSetName);
13 | 
14 | % get the extra data in the labels
15 | 
16 | q = 5;
17 | q = q+1;
18 | options = collabOptionsTensor;
19 | 
20 | 
21 | %%%%% as in gpReversible dynamics
22 | type = {'cmpnd', {'tensor', 'rbf', 'rbfadditional'}, 'bias', 'white'};
23 | options.kern = kernCreate(q, type);
24 | %keyboard;
25 | options.kern.comp{1} = kernSetIndex(options.kern.comp{1}, 1, [1:q-1]);
26 | options.kern.comp{1} = kernSetIndex(options.kern.comp{1}, 2, [q]);
27 | options.kern.comp{1}.comp{2}.additional = lbls;
28 | %options.kern.comp{1}.comp{1}.inverseWidth = 0.2;
29 | %options.kern.comp{1}.comp{1}.variance = 0.001;
30 | %options.kern.comp{1}.comp{2}.variance = 2/pi;
31 | %options.kern.comp{1}.comp{2}.weightVariance = 1000;
32 | %options.kern.comp{1}.comp{2}.biasVariance = eps;
33 | 
34 | 
35 | % as previously
36 | %options.kern = {'cmpnd', {'tensor', 'rbf', 'rbfadditional'}, 'bias', 'white'};
37 | %options.kern.comp{1}.comp{1}.index = 1:q-1;
38 | %options.kern.comp{1}.comp{2}.index = q; 
39 | %options.kern.comp{1}.comp{2}.additional = lbls;
40 | %keyboard;
41 | model = collabCreateTensor(q, size(Y, 2), size(Y, 1), options);
42 | % put the last component to be the index
43 | %keyboard
44 | model.kern.comp{2}.variance = 0.11;
45 | model.kern.comp{3}.variance =  5;
46 | 
47 | %keyboard;
48 | options = collabOptimiseOptions;
49 | 
50 | % set parameters
51 | options.momentum = 0.9;
52 | options.learnRate = 0.0001;
53 | options.paramMomentum = 0.9;
54 | options.paramLearnRate = 0.0001;
55 | options.numIters = 1;
56 | options.showLikelihood = false;
57 | 
58 | capName = dataSetName;
59 | capName(1) = upper(capName(1));
60 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
61 | 
62 | model = collabOptimise(model, Y, options)
63 |   
64 | val = 0;
65 | tot = 0;
66 | for i = 1:size(Y, 2)       
67 |   ind = find(Ytest(:, i));
68 |   elim = find(ind>size(model.X, 1));
69 |   tind = ind;
70 |   tind(elim) = [];
71 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
72 |   a = Ytest(tind, i) - mu; 
73 |   a = [a; Ytest(elim, i)];
74 |   val = val + a'*a;
75 |   tot = tot + length(a);
76 | end
77 | error = sqrt(val/tot);
78 | 
79 | % Save the results.
80 | capName = dataSetName;
81 | capName(1) = upper(capName(1));
82 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error');
83 | 
84 | 


--------------------------------------------------------------------------------
/matlab/demMovielens5.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data.
 2 | % where now the latent space is in the users, not the films
 3 | 
 4 | randn('seed', 1e5);
 5 | rand('seed', 1e5);
 6 | 
 7 | experimentNo = 3;
 8 | 
 9 | ??? to be done
10 | 
11 | dataSetName = 'movielens';
12 | [Y, void, Ytest] = collabLoadData(dataSetName);
13 | 
14 | % learn latent space of each user
15 | Y = Y';
16 | Ytest = Ytest';
17 | 
18 | q = 5;
19 | options = collabOptions;
20 | model = collabCreate(q, size(Y, 2), Y, options);
21 | model.kern.comp{2}.variance = 0.11;
22 | model.kern.comp{3}.variance =  5; 
23 | options = collabOptimiseOptions;
24 | 
25 | % set parameters
26 | options.momentum = 0.9;
27 | options.learnRate = 0.0001;
28 | options.paramMomentum = 0.9;
29 | options.paramLearnRate = 0.0001;
30 | options.numIters = 10; 
31 | options.showLikelihood = false;
32 | 
33 | capName = dataSetName;
34 | capName(1) = upper(capName(1));
35 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
36 | 
37 | model = collabOptimise(model, Y, options)
38 | 
39 | % we have to divide the test data into two sets, train and test for the
40 | % prediction. All but one are the train
41 | 
42 |   
43 | val_L2 = 0;
44 | tot_L2 = 0;
45 | val_NMAE = 0;
46 | tot_NMAE = 0;
47 | val_NMAE_round = 0;
48 | tot_NMAE_round = 0;
49 | 
50 | disp('Computing test error');
51 | 
52 | 
53 | for i = 1:size(Y, 2)       
54 |   ind = find(Ytest(:, i));
55 |   elim = find(ind>size(model.X, 1));
56 |   tind = ind;
57 |   tind(elim) = [];
58 |   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
59 |   a = Ytest(tind, i) - mu; 
60 |   a = [a; Ytest(elim, i)];
61 |   val_L2 = val_L2 + a'*a;
62 |   tot_L2 = tot_L2 + length(a);
63 |   val_NMAE = val_NMAE + sum(abs(a));
64 |   tot_NMAE = tot_NMAE + length(a);
65 |   val_NMAE_round = val_NMAE_round + sum(abs(round(a)));
66 |   tot_NMAE_round = tot_NMAE_round + length(a);
67 | end
68 | error_L2 = sqrt(val_L2/tot_L2);
69 | error_NMAE = (val_NMAE/tot_NMAE)/1.6;
70 | error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6;
71 | 
72 | 
73 | % Save the results.
74 | capName = dataSetName;
75 | capName(1) = upper(capName(1));
76 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round');
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/matlab/demMovielens6.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data.
 2 | % where the strong movielens experiment
 3 | 
 4 | randn('seed', 1e5);
 5 | rand('seed', 1e5);
 6 | 
 7 | experimentNo = 3;
 8 | 
 9 | dataSetName = 'movielens_strong_1';
10 | [Y, void, Ytest] = collabLoadData(dataSetName);
11 | 
12 | q = 5;
13 | options = collabOptions;
14 | model = collabCreate(q, size(Y, 2), Y, options);
15 | model.kern.comp{2}.variance = 0.11;
16 | model.kern.comp{3}.variance =  5; 
17 | options = collabOptimiseOptions;
18 | 
19 | % set parameters
20 | options.momentum = 0.9;
21 | options.learnRate = 0.0001;
22 | options.paramMomentum = 0.9;
23 | options.paramLearnRate = 0.0001;
24 | options.numIters = 1; % ??? put 10 back
25 | options.showLikelihood = false;
26 | 
27 | capName = dataSetName;
28 | capName(1) = upper(capName(1));
29 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
30 | 
31 | model = collabOptimise(model, Y, options)
32 | 
33 | % we have to divide the test data into two sets, train and test for the
34 | % prediction. All but one are the train
35 | 
36 |   
37 | 
38 | 
39 | disp('Computing test error');
40 | 
41 | % ????? this test is to be done
42 | 
43 | keyboard
44 | 
45 | 
46 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorStrong(model,Ytest);
47 | % 
48 | % val_L2 = 0;
49 | % tot_L2 = 0;
50 | % val_NMAE = 0;
51 | % tot_NMAE = 0;
52 | % val_NMAE_round = 0;
53 | % tot_NMAE_round = 0;
54 | % 
55 | % for i = 1:size(Ytest, 2)       
56 | %   ind = find(Ytest(:, i));
57 | %   elim = find(ind>size(model.X, 1));
58 | %   tind = ind;
59 | %   tind(elim) = [];
60 | %   
61 | %   if (length(tind)==0)
62 | %       continue;
63 | %   end
64 | %   % in the case of STRONG experiments, the user is new, so we have to
65 | %   % compute the prediction using the test data
66 | %   % compute random (LOO --> leave one out)
67 | %   indexRand = randperm(length(tind));
68 | %   Y_train_user = Ytest(:,i);
69 | %   Y_test_user = Y_train_user(tind(indexRand(end)));
70 | %   Y_train_user(tind(indexRand(end)),:) = 0;
71 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y_train_user, model.X(tind(indexRand(end)), :));
72 | %   a = Y_test_user - mu; 
73 | %   a = [a; Ytest(elim, i)];
74 | %   val_L2 = val_L2 + a'*a;
75 | %   tot_L2 = tot_L2 + length(a);
76 | %   val_NMAE = val_NMAE + sum(abs(a));
77 | %   tot_NMAE = tot_NMAE + length(a);
78 | %   val_NMAE_round = val_NMAE_round + sum(abs(round(a)));
79 | %   tot_NMAE_round = tot_NMAE_round + length(a);
80 | % end
81 | % error_L2 = sqrt(val_L2/tot_L2);
82 | % error_NMAE = (val_NMAE/tot_NMAE)/1.6;
83 | % error_NMAE_round = (val_NMAE_round/tot_NMAE_round)/1.6;
84 | 
85 | 
86 | % Save the results.
87 | capName = dataSetName;
88 | capName(1) = upper(capName(1));
89 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round');
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/matlab/demMovielens6Script.m:
--------------------------------------------------------------------------------
  1 | function[] = demMovielens6Script(substract_mean, partNo_v, latentDim_v,iters)
  2 | % DEMMOVIELENS6Script Try collaborative filtering on the large movielens data.
  3 | %
  4 |   % demMovielens6script(substract_mean, partNo_v, latentDim_v, iters)
  5 | %
  6 | % substract_mean --> bool if substract the mean
  7 | % partNo_v --> vector with the partitions to compute results
  8 | % latentDim_v --> vector with the latent dimensionalities to compute results
  9 | % iters --> number of iterations
 10 | 
 11 | randn('seed', 1e5);
 12 | rand('seed', 1e5);
 13 | 
 14 | experimentNo = 3;
 15 | 
 16 | 
 17 | %partNo_v = [1:5];
 18 | %latentDim_v = [5, 2:4, 6];
 19 | 
 20 | 
 21 | for i_latent=1:length(latentDim_v)
 22 |     q = latentDim_v(i_latent);
 23 |     for i_part=1:length(partNo_v)
 24 |         partNo = partNo_v(i_part);
 25 | 
 26 |         dataSetName = ['movielens_strong_',num2str(partNo)];
 27 |         
 28 |         disp(['Reading ... ',dataSetName]);
 29 |         
 30 |         [Y, void, Ytest] = collabLoadData(dataSetName);
 31 |         
 32 |         numFilms = size(Y,1);
 33 |         numUsers = size(Y,2);
 34 |         meanFilms = zeros(numFilms,1);
 35 |         stdFilms = ones(numFilms,1);
 36 |         
 37 |         if (substract_mean)
 38 |             if 0
 39 |                 % this substract the global mean
 40 |                 % create the total vector
 41 |                 s = nonzeros(Ytest);
 42 |                 ratings = [nonzeros(Y); nonzeros(Ytest)];
 43 |                 meanY = mean(ratings);
 44 |                 stdY = std(ratings);
 45 |                 %keyboard;
 46 |                 index = find(Y);
 47 |                 Y(index) = Y(index) - meanY;
 48 |                 Y(index) = Y(index) / stdY;
 49 |             else
 50 |                  for i=1:numFilms
 51 |                     % compute the mean and standard deviation of each film
 52 |                     ind = find(Y(i,:));
 53 |                     mean_v = sum(Y(i,ind));
 54 |                     mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 55 |                     length_v = length(ind) + nnz(Ytest(i,:));
 56 |                     mean_v = mean_v/length_v;
 57 |                     std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 58 |                     Y(i,ind) = Y(i,ind) - mean_v;
 59 |                     if (std_v>0) 
 60 |                         Y(i,ind) = Y(i,ind)/std_v;
 61 |                     end
 62 |                     meanFilms(i) = mean_v;
 63 |                     stdFilms(i) = std_v;
 64 |                 end
 65 |             end
 66 |             %keyboard;
 67 |         end
 68 | 
 69 |         options = collabOptions;
 70 |         model = collabCreate(q, size(Y, 2), Y, options);
 71 |         % keyboard;
 72 |         if (substract_mean)
 73 |             if 0
 74 |                 % this does the global mean
 75 |                 model.mu = repmat(meanY,size(model.mu,1),1);
 76 |                 model.sd = repmat(stdY,size(model.sd,1),1);
 77 |             else
 78 |                 model.mu = meanFilms;
 79 |                 model.sd = stdFilms;
 80 |             end
 81 |             
 82 |         end
 83 |         model.kern.comp{2}.variance = 0.11;
 84 |         model.kern.comp{3}.variance =  5; 
 85 |         options = collabOptimiseOptions;
 86 |         
 87 | 
 88 |         % set parameters
 89 |         options.momentum = 0.9;
 90 |         options.learnRate = 0.0001;
 91 |         options.paramMomentum = 0.9;
 92 |         options.paramLearnRate = 0.0001;
 93 |         options.numIters = iters;
 94 |         options.showLikelihood = false;
 95 | 
 96 |         capName = dataSetName;
 97 |         capName(1) = upper(capName(1));
 98 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
 99 | 
100 |         model = collabOptimise(model, Y, options)
101 | 
102 | 	% compute the test error
103 | 	  disp('Computing test error');
104 | 
105 | 
106 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorStrong(model,Ytest)
107 | 
108 | 
109 |         % Save the results.
110 |         capName = dataSetName;
111 |         capName(1) = upper(capName(1));
112 |         
113 |         saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
114 |         disp(['Saving ... ',saveResults]);
115 |         save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
116 |     end
117 | end
118 | 
119 | 


--------------------------------------------------------------------------------
/matlab/demMovielens7.m:
--------------------------------------------------------------------------------
  1 | % DEMMOVIELENS5 Try collaborative filtering on the large movielens data.
  2 | % where the strong movielens experiment
  3 | 
  4 | randn('seed', 1e5);
  5 | rand('seed', 1e5);
  6 | 
  7 | experimentNo = 3;
  8 | substract_mean = 0;
  9 | 
 10 | dataSetName = 'movielens_weak_1';
 11 | [Y, void, Ytest] = collabLoadData(dataSetName);
 12 | 
 13 | numFilms = size(Y,1);
 14 | numUsers = size(Y,2);
 15 | meanFilms = zeros(numFilms,1);
 16 | stdFilms = ones(numFilms,1);
 17 | if (substract_mean)
 18 |     % do for each film independently
 19 |     for i=1:numFilms
 20 |         % compute the mean and standard deviation of each film
 21 |         ind = find(Y(i,:));
 22 |         mean_v = sum(Y(i,ind));
 23 |         mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 24 |         length_v = length(ind) + nnz(Ytest(i,:));
 25 |         mean_v = mean_v/length_v;
 26 |         std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 27 |         Y(i,ind) = Y(i,ind) - mean_v;
 28 |         if (std_v>0) 
 29 |             Y(i,ind) = Y(i,ind)/std_v;
 30 |         end
 31 |         meanFilms(i) = mean_v;
 32 |         stdFilms(i) = std_v;
 33 |     end
 34 | end
 35 | 
 36 | q = 5;
 37 | options = collabOptions;
 38 | model = collabCreate(q, size(Y, 2), Y, options);
 39 | model.kern.comp{2}.variance = 0.11;
 40 | model.kern.comp{3}.variance =  5; 
 41 | options = collabOptimiseOptions;
 42 | 
 43 | % set parameters
 44 | options.momentum = 0.9;
 45 | options.learnRate = 0.0001;
 46 | options.paramMomentum = 0.9;
 47 | options.paramLearnRate = 0.0001;
 48 | options.numIters = 1; % ??? put 10 back
 49 | options.showLikelihood = false;
 50 | 
 51 | capName = dataSetName;
 52 | capName(1) = upper(capName(1));
 53 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
 54 | 
 55 | %%% ?? add the model.mu and model.sd
 56 | model.mu = meanFilms;
 57 | model.sd = stdFilms;
 58 | 
 59 | model = collabOptimise(model, Y, options)
 60 | 
 61 | % we have to divide the test data into two sets, train and test for the
 62 | % prediction. All but one are the train
 63 | 
 64 |   
 65 | 
 66 | 
 67 | disp('Computing test error');
 68 | 
 69 | % ????? this test is to be done
 70 | 
 71 | keyboard
 72 | 
 73 | % ??? check if the mean is substracted...
 74 | 
 75 | [error_L2,error_NMAE,error_NMAE_round] = computeTestErrorWeak(model,Y,Ytest);
 76 | 
 77 | % val_L2 = 0;
 78 | % tot_L2 = 0;
 79 | % val_NMAE = 0;
 80 | % tot_NMAE = 0;
 81 | % val_round_NMAE = 0;
 82 | % tot_round_NMAE = 0;
 83 | % 
 84 | % for i = 1:size(Y, 2)       
 85 | %     ind = find(Ytest(:, i));
 86 | %     elim = find(ind>size(model.X, 1));
 87 | %     tind = ind;
 88 | %     tind(elim) = [];
 89 | %     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
 90 | %     % normalize the values
 91 | % 
 92 | % 
 93 | %     mu = mu*model.sd(1);
 94 | %     mu = mu+model.mu(1);
 95 | %     a = Ytest(tind, i) - mu; 
 96 | %     a = [a; Ytest(elim, i)];
 97 | %     val_L2 = val_L2 + a'*a;
 98 | %     tot_L2 = tot_L2 + length(a);
 99 | %     val_NMAE = val_NMAE + sum(abs(a));
100 | %     tot_NMAE = tot_NMAE + length(a);
101 | %     val_round_NMAE = val_round_NMAE + sum(abs(round(a)));
102 | %     tot_round_NMAE = tot_round_NMAE + length(a);
103 | % end
104 | % L2_error = sqrt(val_L2/tot_L2);
105 | % NMAE_error = (val_NMAE/tot_NMAE)/1.6;
106 | % NMAE_round_error = (val_round_NMAE/tot_round_NMAE)/1.6;
107 | 
108 | 
109 | % Save the results.
110 | capName = dataSetName;
111 | capName(1) = upper(capName(1));
112 | save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error_L2', 'error_NMAE', 'error_NMAE_round');
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/matlab/demMovielens7Script.m:
--------------------------------------------------------------------------------
  1 | function[] = demMovielens7Script(substract_mean, partNo_v, latentDim_v,iters)
  2 | % DEMMOVIELENS7Script Try collaborative filtering on the large movielens data.
  3 | %
  4 |   % demMovielens7script(substract_mean, partNo_v, latentDim_v,iters)
  5 | %
  6 | % substract_mean --> bool if substract the mean
  7 | % partNo_v --> vector with the partitions to compute results
  8 | % latentDim_v --> vector with the latent dimensionalities to compute results
  9 | % iters --> number of iterations
 10 | 
 11 | randn('seed', 1e5);
 12 | rand('seed', 1e5);
 13 | 
 14 | experimentNo = 3;
 15 | 
 16 | 
 17 | %partNo_v = [1:5];
 18 | %latentDim_v = [5, 2:4, 6];
 19 | 
 20 | 
 21 | for i_latent=1:length(latentDim_v)
 22 |     q = latentDim_v(i_latent);
 23 |     for i_part=1:length(partNo_v)
 24 |         partNo = partNo_v(i_part);
 25 | 
 26 |         dataSetName = ['movielens_weak_',num2str(partNo)];
 27 |         
 28 |         disp(['Reading ... ',dataSetName]);
 29 |         
 30 |         [Y, void, Ytest] = collabLoadData(dataSetName);
 31 |         
 32 |         numFilms = size(Y,1);
 33 |         numUsers = size(Y,2);
 34 |         meanFilms = zeros(numFilms,1);
 35 |         stdFilms = ones(numFilms,1);
 36 |         
 37 |         if (substract_mean)
 38 |             if 0
 39 |                 % this substract the global mean
 40 |                 % create the total vector
 41 |                 s = nonzeros(Ytest);
 42 |                 ratings = [nonzeros(Y); nonzeros(Ytest)];
 43 |                 meanY = mean(ratings);
 44 |                 stdY = std(ratings);
 45 |                 %keyboard;
 46 |                 index = find(Y);
 47 |                 Y(index) = Y(index) - meanY;
 48 |                 Y(index) = Y(index) / stdY;
 49 |             else
 50 |                  for i=1:numFilms
 51 |                     % compute the mean and standard deviation of each film
 52 |                     ind = find(Y(i,:));
 53 |                     mean_v = sum(Y(i,ind));
 54 |                     mean_v = mean_v + sum(nonzeros(Ytest(i,:)));
 55 |                     length_v = length(ind) + nnz(Ytest(i,:));
 56 |                     mean_v = mean_v/length_v;
 57 |                     std_v = (length(ind)*std(Y(i,ind)) + nnz(Ytest(i,:))*std(Ytest(i,:)))/length_v;
 58 |                     Y(i,ind) = Y(i,ind) - mean_v;
 59 |                     if (std_v>0) 
 60 |                         Y(i,ind) = Y(i,ind)/std_v;
 61 |                     end
 62 |                     meanFilms(i) = mean_v;
 63 |                     stdFilms(i) = std_v;
 64 |                 end
 65 |             end
 66 |             %keyboard;
 67 |         end
 68 | 
 69 |         options = collabOptions;
 70 |         model = collabCreate(q, size(Y, 2), Y, options);
 71 |         % keyboard;
 72 |         if (substract_mean)
 73 |             if 0
 74 |                 % this does the global mean
 75 |                 model.mu = repmat(meanY,size(model.mu,1),1);
 76 |                 model.sd = repmat(stdY,size(model.sd,1),1);
 77 |             else
 78 |                 model.mu = meanFilms;
 79 |                 model.sd = stdFilms;
 80 |             end
 81 |             
 82 |         end
 83 |         model.kern.comp{2}.variance = 0.11;
 84 |         model.kern.comp{3}.variance =  5; 
 85 |         options = collabOptimiseOptions;
 86 |         
 87 | 
 88 |         % set parameters
 89 |         options.momentum = 0.9;
 90 |         options.learnRate = 0.0001;
 91 |         options.paramMomentum = 0.9;
 92 |         options.paramLearnRate = 0.0001;
 93 |         options.numIters = iters;
 94 |         options.showLikelihood = false;
 95 | 
 96 |         capName = dataSetName;
 97 |         capName(1) = upper(capName(1));
 98 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
 99 | 
100 |         model = collabOptimise(model, Y, options)
101 | 
102 | 	% compute the test error
103 | 	  disp('Computing test error');
104 | 
105 | 
106 | [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest)
107 | 
108 | 
109 |         % Save the results.
110 |         capName = dataSetName;
111 |         capName(1) = upper(capName(1));
112 |         
113 |         saveResults = [capName,'_norm_',num2str(substract_mean),'_',num2str(q),'_',num2str(partNo),'_iters_',num2str(iters),'.mat'];
114 |         disp(['Saving ... ',saveResults]);
115 |         save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error');
116 |     end
117 | end
118 | 
119 | 


--------------------------------------------------------------------------------
/matlab/demMovielensOrdered1.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENSORDERED1 Try collaborative filtering on the large movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | 
10 | dataSetName = 'movielens';
11 | [Y, void, Ytest] = collabLoadData(dataSetName);
12 | 
13 | q = 3;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 2), Y, options);
16 | model.kern.comp{2}.variance = 0.11;
17 | model.kern.comp{3}.variance =  1; 
18 | model.selectionCriterion = 'random';
19 | model.numActive = 1000;
20 | model.noise = 'ordered';
21 | options = collabOptimiseOptions;
22 | options.numIters = 1;
23 | options.showLikelihood = false;
24 | options.showEvery = 10;
25 | Y = Y(:, 1:100);
26 | model = collabOptimise(model, Y, options)
27 |   
28 | % val = 0;
29 | % tot = 0;
30 | % for i = 1:size(Y, 2)       
31 | %   ind = find(Ytest(:, i));
32 | %   elim = find(ind>size(model.X, 1));
33 | %   tind = ind;
34 | %   tind(elim) = [];
35 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
36 | %   a = Ytest(tind, i) - mu; 
37 | %   a = [a; Ytest(elim, i)];
38 | %   val = val + a'*a;
39 | %   tot = tot + length(a);
40 | % end
41 | % error = sqrt(val/tot);
42 | 
43 | % % Save the results.
44 | % capName = dataSetName;
45 | % capName(1) = upper(capName(1));
46 | % save(['dem' capName num2str(experimentNo) '.mat'], 'model', 'error');
47 | 


--------------------------------------------------------------------------------
/matlab/demMovielensSmall1.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENSSMALL1 Try collaborative filtering on the small movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | 
10 | for partition = 1:5
11 |   dataSetName = ['movielensSmall' num2str(partition)];
12 |   [Y, void, Ytest] = collabLoadData(dataSetName);
13 |   q = 2;
14 |   options = collabOptions;
15 |   %/~
16 |   %options.heteroNoise = true;
17 |   %options.kern = {'rbf', 'bias'};
18 |   %~/
19 |   model = collabCreate(q, size(Y, 2), Y, options);
20 |   %/~
21 |   %model.diagvar = repmat(5.0, size(model.diagvar));
22 |   %~/
23 |   model.kern.comp{2}.variance = 0.11;
24 |   model.kern.comp{3}.variance =  5; 
25 |   options = collabOptimiseOptions;
26 |   
27 |   % set parameters
28 |   options.momentum = 0.9;
29 |   options.learnRate = 0.0001;
30 |   options.paramMomentum = 0.9;
31 |   options.paramLearnRate = 0.0001;
32 |   options.numIters = 10;
33 |   
34 |   capName = dataSetName;
35 |   capName(1) = upper(capName(1));
36 |   options.saveName = ['dem' capName num2str(experimentNo) '_'];
37 |   
38 |   model = collabOptimise(model, Y, options);
39 |   
40 |   val = 0;
41 |   tot = 0;
42 |   for i = 1:size(Y, 2)       
43 |     ind = find(Ytest(:, i));
44 |     elim = find(ind>size(model.X, 1));
45 |     tind = ind;
46 |     tind(elim) = [];
47 |     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
48 |     a = Ytest(tind, i) - mu; 
49 |     a = [a; Ytest(elim, i)];
50 |     val = val + a'*a;
51 |     tot = tot + length(a);
52 |   end
53 |   error(partition) = sqrt(val/tot);
54 |                                    % Save the results.
55 |   capName = dataSetName;
56 |   capName(1) = upper(capName(1));
57 |   save(['dem' capName '_' num2str(experimentNo) '.mat'], 'model', 'error');
58 |   
59 | end
60 | 


--------------------------------------------------------------------------------
/matlab/demMovielensSmallHetero1.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENSSMALLHETERO1 Try collaborative filtering on the small movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | 
10 | for partition = 1:5
11 |   dataSetName = ['movielensSmall' num2str(partition)];
12 |   [Y, void, Ytest] = collabLoadData(dataSetName);
13 |   q = 2;
14 |   options = collabOptions;
15 |   options.heteroNoise = true;
16 |   options.kern = {'rbf', 'bias'};
17 |   model = collabCreate(q, size(Y, 2), Y, options);
18 |   model.diagvar = repmat(1.0, size(model.diagvar));
19 |   model.kern.comp{2}.variance = 0.11;
20 |   options = collabOptimiseOptions;
21 |   
22 |   % set parameters
23 |   options.momentum = 0.9;
24 |   options.learnRate = 0.0001;
25 |   options.paramMomentum = 0.9;
26 |   options.paramLearnRate = 0.0001;
27 |   options.noiseMomentum = 0.9;
28 |   options.noiseLearnRate = 0.0001;
29 |   options.numIters = 10;
30 |   
31 |   capName = dataSetName;
32 |   capName(1) = upper(capName(1));
33 |   options.saveName = ['dem' capName 'Hetero' num2str(experimentNo) '_'];
34 |   
35 |   model = collabOptimise(model, Y, options);
36 |   
37 |   val = 0;
38 |   tot = 0;
39 |   for i = 1:size(Y, 2)       
40 |     ind = find(Ytest(:, i));
41 |     elim = find(ind>size(model.X, 1));
42 |     tind = ind;
43 |     tind(elim) = [];
44 |     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
45 |     a = Ytest(tind, i) - mu; 
46 |     a = [a; Ytest(elim, i)];
47 |     val = val + a'*a;
48 |     tot = tot + length(a);
49 |   end
50 |   error(partition) = sqrt(val/tot);
51 |                                    % Save the results.
52 |   capName = dataSetName;
53 |   capName(1) = upper(capName(1));
54 |   save(['dem' capName 'Hetero' '_' num2str(experimentNo) '.mat'], 'model', 'error');
55 |   
56 | end
57 | 


--------------------------------------------------------------------------------
/matlab/demMovielensSmallMix1.m:
--------------------------------------------------------------------------------
 1 | % DEMMOVIELENSSMALLMIX1 Try collaborative filtering on the small movielens data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | error = 0;
10 | for partition = 1:5
11 |   dataSetName = ['movielensSmall' num2str(partition)];
12 |   [Y, void, Ytest] = collabLoadData(dataSetName);
13 |   q = 2;
14 |   options = collabOptions;
15 |   options.numComps = 2;
16 |   options.kern = {'rbf', 'bias'};
17 |   %/~
18 |   %options.heteroNoise = true;
19 |   %~/
20 |   model = collabCreate(q, size(Y, 2), Y, options);
21 |   %/~
22 |   %model.diagvar = repmat(5.0, size(model.diagvar));
23 |   %~/
24 |   model.kern.comp{2}.variance = 0.11;
25 |   model.sigma2 = 5;
26 |   %model.kern.comp{3}.variance =  5; 
27 |   options = collabOptimiseOptions;
28 |   
29 |   % set parameters
30 |   options.momentum = 0.9;
31 |   options.learnRate = 0.0001;
32 |   options.paramMomentum = 0.9;
33 |   options.paramLearnRate = 0.0001;
34 |   options.noiseMomentum = 0.9;
35 |   options.noiseLearnRate = 0.0001;
36 |   options.numIters = 10;
37 |   
38 |   capName = dataSetName;
39 |   capName(1) = upper(capName(1));
40 |   options.saveName = ['dem' capName 'Mix' num2str(experimentNo) '_'];
41 |   
42 |   model = collabOptimise(model, Y, options);
43 |   
44 |   val = 0;
45 |   tot = 0;
46 |   for i = 1:size(Y, 2)       
47 |     ind = find(Ytest(:, i));
48 |     elim = find(ind>size(model.X, 1));
49 |     tind = ind;
50 |     tind(elim) = [];
51 |     [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
52 |     a = Ytest(tind, i) - mu; 
53 |     a = [a; Ytest(elim, i)];
54 |     val = val + a'*a;
55 |     tot = tot + length(a);
56 |   end
57 |   error(partition) = sqrt(val/tot);
58 |                                    % Save the results.
59 |   capName = dataSetName;
60 |   capName(1) = upper(capName(1));
61 |   save(['dem' capName 'Mix_' num2str(experimentNo) '.mat'], 'model', 'error');
62 |   
63 | end
64 | 


--------------------------------------------------------------------------------
/matlab/demMovielensSmallMixFromSingleScript1.m:
--------------------------------------------------------------------------------
  1 | % DEMMOVIELENSSMALLMIXFROMSINGLE1 Try collaborative filtering on the small movielens data.
  2 | 
  3 | % COLLAB
  4 | 
  5 | randn('seed', 1e5);
  6 | rand('seed', 1e5);
  7 | 
  8 | partNo_v = [1:5];
  9 | latentDim_v = [2 5 7 10];
 10 | numComps_v = [2];
 11 | experimentNo = 1;
 12 | numIters = 0;
 13 | numItersSingle = 10;
 14 | itersFinalEstep = 100;
 15 | 
 16 | error = 0;
 17 | for i_latent=1:length(latentDim_v)
 18 |     q = latentDim_v(i_latent);
 19 |     for i_part=1:length(partNo_v)
 20 |         partNo = partNo_v(i_part);
 21 | for i_comp = 1:length(numComps_v)
 22 |   
 23 |   dataSetName = ['movielensSmall' num2str(partNo)];
 24 |   [Y, void, Ytest] = collabLoadData(dataSetName);
 25 | 
 26 |   options = collabOptions;
 27 |   options.numComps = numComps_v(i_comp);
 28 | options.kern = {'rbf', 'bias'};%, 'white'};
 29 |   %/~
 30 |   %options.heteroNoise = true;
 31 |   %~/
 32 |   model = collabCreate(q, size(Y, 2), Y, options);
 33 |   %/~
 34 |   %model.diagvar = repmat(5.0, size(model.diagvar));
 35 |   %~/
 36 |   model.kern.comp{2}.variance = 0.11;
 37 |   model.sigma2 = 5;
 38 |   %model.kern.comp{3}.variance =  5; 
 39 |   options = collabOptimiseOptions;
 40 | 
 41 |   capName = dataSetName;
 42 |   capName(1) = upper(capName(1));
 43 |   options.saveName = ['dem' capName 'Mix' num2str(experimentNo) '_'];
 44 | 
 45 | 
 46 |   loadResults = [capName,'_',num2str(q),'_1_',num2str(partNo),'_iters_',num2str(numItersSingle),'.mat'];
 47 |   disp(['Loading ... ',loadResults]);
 48 | 
 49 |   % loading the model learn without a mixture
 50 |   model_single = load(loadResults);
 51 | 
 52 | model.X = model_single.model.X;
 53 | params_single = kernExtractParam(model_single.model.kern);
 54 | %model.kern = kernExpandParam(model.kern,params_single);
 55 | model = collabInitS(model);
 56 | model = collabUpdateKernels(model);
 57 | 
 58 |   
 59 |   % set parameters
 60 |   options.momentum = 0.9;
 61 |   options.learnRate = 0.0001;
 62 |   options.paramMomentum = 0.9;
 63 |   options.paramLearnRate = 0.0001;
 64 |   options.noiseMomentum = 0.9;
 65 |   options.noiseLearnRate = 0.0001;
 66 |   options.numIters = numIters;
 67 |   
 68 | options.numIters
 69 |   
 70 |   disp('Starting optimization');
 71 |   
 72 |  
 73 |   model = collabOptimise(model, Y, options);
 74 | 
 75 |   disp('Ending optimization');
 76 | 
 77 | keyboard;
 78 | 
 79 | % do an E-step
 80 |      model = collabUpdateKernels(model);
 81 |     disp(['Doing E-step ',num2str(itersFinalEstep)]);
 82 | model = collabEstep(model,itersFinalEstep);
 83 |   
 84 | keyboard;  
 85 |   
 86 |   disp('Computing error');
 87 | 
 88 | 
 89 |   [L2_error,NMAE_error,NMAE_round_error] = computeTestErrorWeak(model,Y,Ytest)
 90 |   [L2_error_all,NMAE_error_all,NMAE_round_error_all] = computeTestErrorWeakAllModes(model,Y,Ytest)
 91 |   [L2_error_best,NMAE_error_best,NMAE_round_error_best] = computeTestErrorWeakBestMode(model,Y,Ytest)
 92 | 
 93 |    
 94 |   capName = dataSetName;
 95 |   capName(1) = upper(capName(1));
 96 |   %save(['dem' capName 'Mix_' num2str(experimentNo) '.mat'], 'model', 'error');
 97 | saveResults = [capName,'_',num2str(q),'_',num2str(numComps_v(i_comp)),'_',num2str(partNo),'_iters_',num2str(numItersSingle),'_mix_',num2str(numComps_v(i_comp)),'_Estepiters_',num2str(itersFinalEstep),'_iters_',num2str(numItersSingle),'.mat'];
 98 |   disp(['Saving ... ',saveResults]);
 99 |   save(saveResults, 'model', 'L2_error','options','NMAE_error','NMAE_round_error', 'L2_error_best','NMAE_error_best','NMAE_round_error_best','L2_error_all','NMAE_error_all','NMAE_round_error_all');
100 |   end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/matlab/demNetflix1.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | dataSetName = 'netflix';
10 | 
11 | load /local/data/netFlixDataProbe.mat
12 | 
13 | q = 5;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 1), Y, options);
16 | model.mu = ratingSum./ratingCount;
17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu);
18 | 
19 | model.kern.comp{2}.variance = 0.11;
20 | model.kern.comp{3}.variance =  5; 
21 | options = collabOptimiseOptions;
22 | options.numIters = 5;
23 | options.showEvery = 400;
24 | options.saveEvery = 20000;
25 | capName = dataSetName;
26 | capName(1) = upper(capName(1));
27 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
28 | options.showLikelihood = false;
29 | model = collabOptimise(model, Y, options)
30 |   
31 | % val = 0;
32 | % tot = 0;
33 | % for i = 1:size(Y, 2)       
34 | %   ind = find(Ytest(:, i));
35 | %   elim = find(ind>size(model.X, 1));
36 | %   tind = ind;
37 | %   tind(elim) = [];
38 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
39 | %   a = Ytest(tind, i) - mu; 
40 | %   a = [a; Ytest(elim, i)];
41 | %   val = val + a'*a;
42 | %   tot = tot + length(a);
43 | % end
44 | % error = sqrt(val/tot);
45 | 
46 | % Save the results.
47 | capName = dataSetName;
48 | capName(1) = upper(capName(1));
49 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
50 | 


--------------------------------------------------------------------------------
/matlab/demNetflix2.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX2 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 2;
 9 | dataSetName = 'netflix';
10 | 
11 | load /home/neill/netFlixDataProbe.mat
12 | 
13 | q = 4;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 1), Y, options);
16 | model.mu = ratingSum./ratingCount;
17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu);
18 | model.kern.comp{2}.variance = 0.11;
19 | model.kern.comp{3}.variance =  5; 
20 | options = collabOptimiseOptions;
21 | options.numIters = 5;
22 | options.showEvery = 400;
23 | options.saveEvery = 20000;
24 | capName = dataSetName;
25 | capName(1) = upper(capName(1));
26 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
27 | options.showLikelihood = false;
28 | model = collabOptimise(model, Y, options)
29 |   
30 | % val = 0;
31 | % tot = 0;
32 | % for i = 1:size(Y, 2)       
33 | %   ind = find(Ytest(:, i));
34 | %   elim = find(ind>size(model.X, 1));
35 | %   tind = ind;
36 | %   tind(elim) = [];
37 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
38 | %   a = Ytest(tind, i) - mu; 
39 | %   a = [a; Ytest(elim, i)];
40 | %   val = val + a'*a;
41 | %   tot = tot + length(a);
42 | % end
43 | % error = sqrt(val/tot);
44 | 
45 | % Save the results.
46 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
47 | 


--------------------------------------------------------------------------------
/matlab/demNetflix3.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX3 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 3;
 9 | dataSetName = 'netflix';
10 | load /local/data/netFlixDataProbe.mat
11 | 
12 | q = 6;
13 | options = collabOptions;
14 | model = collabCreate(q, size(Y, 1), Y, options);
15 | model.mu = ratingSum./ratingCount;
16 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu);
17 | model.kern.comp{2}.variance = 0.11;
18 | model.kern.comp{3}.variance =  5; 
19 | options = collabOptimiseOptions;
20 | options.numIters = 5;
21 | options.showEvery = 400;
22 | options.saveEvery = 20000;
23 | capName = dataSetName;
24 | capName(1) = upper(capName(1));
25 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
26 | options.showLikelihood = false;
27 | model = collabOptimise(model, Y, options)
28 |   
29 | % val = 0;
30 | % tot = 0;
31 | % for i = 1:size(Y, 2)       
32 | %   ind = find(Ytest(:, i));
33 | %   elim = find(ind>size(model.X, 1));
34 | %   tind = ind;
35 | %   tind(elim) = [];
36 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
37 | %   a = Ytest(tind, i) - mu; 
38 | %   a = [a; Ytest(elim, i)];
39 | %   val = val + a'*a;
40 | %   tot = tot + length(a);
41 | % end
42 | % error = sqrt(val/tot);
43 | 
44 | % Save the results.
45 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
46 | 


--------------------------------------------------------------------------------
/matlab/demNetflix4.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 4;
 9 | dataSetName = 'netflix';
10 | 
11 | load /local/data/netFlixDataProbe.mat
12 | 
13 | q = 5;
14 | options = collabOptions;
15 | options.kern = {'rbf', 'lin', 'bias', 'white'};
16 | model = collabCreate(q, size(Y, 1), Y, options);
17 | model.mu = ratingSum./ratingCount;
18 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu);
19 | model.kern.comp{3}.variance = 0.11;
20 | model.kern.comp{4}.variance =  5; 
21 | options = collabOptimiseOptions;
22 | options.numIters = 5;
23 | options.showEvery = 400;
24 | options.saveEvery = 20000;
25 | capName = dataSetName;
26 | capName(1) = upper(capName(1));
27 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
28 | options.showLikelihood = false;
29 | model = collabOptimise(model, Y, options)
30 |   
31 | % val = 0;
32 | % tot = 0;
33 | % for i = 1:size(Y, 2)       
34 | %   ind = find(Ytest(:, i));
35 | %   elim = find(ind>size(model.X, 1));
36 | %   tind = ind;
37 | %   tind(elim) = [];
38 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
39 | %   a = Ytest(tind, i) - mu; 
40 | %   a = [a; Ytest(elim, i)];
41 | %   val = val + a'*a;
42 | %   tot = tot + length(a);
43 | % end
44 | % error = sqrt(val/tot);
45 | 
46 | % Save the results.
47 | capName = dataSetName;
48 | capName(1) = upper(capName(1));
49 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
50 | 


--------------------------------------------------------------------------------
/matlab/demNetflix5.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX5 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 5;
 9 | dataSetName = 'netflix';
10 | 
11 | load /local/data/netFlixDataProbe.mat
12 | 
13 | q = 5;
14 | options = collabOptions;
15 | model = collabCreate(q, size(Y, 1), Y, options);
16 | model.mu = ratingSum./ratingCount;
17 | model.sd = sqrt(ratingSquareSum./ratingCount - model.mu.*model.mu);
18 | 
19 | model.kern.comp{2}.variance = 0.11;
20 | model.kern.comp{3}.variance =  5; 
21 | options = collabOptimiseOptions;
22 | options.momentum = 0.5;
23 | options.learnRate = 0.00001;
24 | options.paramMomentum = 0.5;
25 | options.paramLearnRate = 0.00001;
26 | options.numIters = 5;
27 | options.showEvery = 400;
28 | options.saveEvery = 20000;
29 | capName = dataSetName;
30 | capName(1) = upper(capName(1));
31 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
32 | options.showLikelihood = false;
33 | model = collabOptimise(model, Y, options)
34 |   
35 | % val = 0;
36 | % tot = 0;
37 | % for i = 1:size(Y, 2)       
38 | %   ind = find(Ytest(:, i));
39 | %   elim = find(ind>size(model.X, 1));
40 | %   tind = ind;
41 | %   tind(elim) = [];
42 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
43 | %   a = Ytest(tind, i) - mu; 
44 | %   a = [a; Ytest(elim, i)];
45 | %   val = val + a'*a;
46 | %   tot = tot + length(a);
47 | % end
48 | % error = sqrt(val/tot);
49 | 
50 | % Save the results.
51 | capName = dataSetName;
52 | capName(1) = upper(capName(1));
53 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
54 | 


--------------------------------------------------------------------------------
/matlab/em-weak.m:
--------------------------------------------------------------------------------
 1 | % variables: strong{train,test}, weak{train,test}
 2 | load 'em-split.mat'
 3 | clear strongtrain strongtest;
 4 | regvals = sqrt(sqrt(10)).^[9 8.5 8 7.5 7 6.5 6 5.5 5 4.5 4];
 5 | objgrad = @m3fshc_norm;
 6 | tol = 1e-3;
 7 | [n,m] = size(weaktrain{1});
 8 | p = 500;
 9 | l = 5;
10 | i = 3;
11 | maxiter = 100;
12 | fprintf('p=%d maxiter=%d i=%d\n',p,maxiter,i);
13 | fn = sprintf('../result/WEAK_r%d_c%d_p%d_x%d_i%d',n,m,p,maxiter,i);
14 | v = randn(n*p+m*p+n*(l-1),1);
15 | for i3=1:length(regvals)
16 |   fprintf(1,'Begin conjgrad: regval=%.1e\n',regvals(i3));
17 |   [v] = conjgrad(v,@cgLineSearch,{'c2',1e-2},objgrad,{weaktrain{i},regvals(i3),l,'verbose',0},'tol',tol,'maxiter',maxiter,'verbose',2);
18 |   U = reshape(v(1:n*p),n,p);
19 |   V = reshape(v(n*p+1:n*p+m*p),m,p);
20 |   theta = reshape(v(n*p+m*p+1:n*p+m*p+n*(l-1)),n,l-1);
21 |   [U,V] = normCols(U,V);  
22 |   X = U*V';
23 |   [y] = m3fSoftmax(X,theta);
24 |   Xrank = rank(X);
25 |   clear U V theta X;
26 |   fprintf(1,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f  MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i}));
27 |   fh = fopen(fn,'a');
28 |   fprintf(fh,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f  MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i}));
29 |   fclose(fh);
30 | end
31 | 


--------------------------------------------------------------------------------
/matlab/generateCteNMAE.m:
--------------------------------------------------------------------------------
 1 | function [mae] = generateCteNMAE(num_ordinals)
 2 | 
 3 | % [mae] = generateCteNMAE(num_ordinals)
 4 | %
 5 | % generate the cte for NMAE normalization
 6 | % num_ordinals is 5 for movielens and 6 for eachmovie
 7 | 
 8 |   size_data = 100000;
 9 | 
10 | % first generate a uniformly distributed random data set
11 | Y = ceil(rand(size_data,1)*num_ordinals);
12 | 
13 | % generate predictions for the data
14 | pred = ceil(rand(size_data,1)*num_ordinals);
15 | 
16 | % predict the mean absolute error
17 | 		     mae = mean(abs(Y - pred));
18 | 


--------------------------------------------------------------------------------
/matlab/kernAdditionalKernCompute.m:
--------------------------------------------------------------------------------
 1 | function [k, n2] = kernAdditionalKernCompute(kern, x, x2)
 2 | 
 3 | % KERNADDITIONALKERNCOMPUTE Compute the RBF kernel given the parameters and X.
 4 | %
 5 | %	Description:
 6 | %
 7 | %	K = KERNADDITIONALKERNCOMPUTE(KERN, X, X2) computes the kernel parameters for
 8 | %	the radial basis function kernel given inputs associated with rows
 9 | %	and columns.
10 | %	 Returns:
11 | %	  K - the kernel matrix computed at the given points.
12 | %	 Arguments:
13 | %	  KERN - the kernel structure for which the matrix is computed.
14 | %	  X - the index of the input matrix associated with the rows of the kernel.
15 | %	  X2 - the index of the input matrix associated with the columns of the kernel.
16 | %
17 | %	K = KERNADDITIONALKERNCOMPUTE(KERN, X) computes the kernel matrix for the
18 | %	radial basis function kernel given a design matrix of inputs.
19 | %	 Returns:
20 | %	  K - the kernel matrix computed at the given points.
21 | %	 Arguments:
22 | %	  KERN - the kernel structure for which the matrix is computed.
23 | %	  X - the index of the input data matrix in the form of a design matrix.
24 | %	
25 | %
26 | %	See also
27 | %	RBFADDITIONALKERNPARAMINIT, KERNCOMPUTE, KERNCREATE, RBFADDITIONALKERNDIAGCOMPUTE
28 | 
29 | 
30 | %	Copyright (c) 2009 Raquel Urtasun
31 | % 	rbfKernCompute.m version 1.0
32 | 
33 | 
34 | if nargin < 3
35 | n2 = dist2(additional(x,:), additional(x,:));
36 |   wi2 = (.5 .* kern.inverseWidth);
37 |   k = kern.variance*exp(-n2*wi2);
38 | else
39 |   n2 = dist2(additional(x,:), additional(x2,:));
40 |   wi2 = (.5 .* kern.inverseWidth);
41 |   k = kern.variance*exp(-n2*wi2);
42 | end
43 | 


--------------------------------------------------------------------------------
/matlab/loadAverageVariance.m:
--------------------------------------------------------------------------------
 1 | 
 2 | meanVals = zeros(17700, 1);
 3 | varVals = zeros(17700, 1);
 4 | for i = 1:17700
 5 |   vals = zeros(Y{i, 3}, 1);
 6 |   fileNameBase = num2str(filmNum);
 7 |   fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase ...
 8 |       '.txt'];
 9 |   fid = fopen(fileName);
10 |   void = fgetl(fid);
11 |   while 1
12 |     nextLine = fgetl(fid);
13 |     if ~ischar(nextLine), break, end
14 |     commas = find(nextLine==44);
15 |     vals(count) = str2num(nextLine(commas(1)+1:commas(2)-1));
16 |   end
17 |   meanVals(i) = mean(vals);
18 |   varVals(i) = var(vals);
19 | end
20 | 


--------------------------------------------------------------------------------
/matlab/loadNetflix.m:
--------------------------------------------------------------------------------
 1 | Y = cell(2649429, 3);
 2 | ratingSum = zeros(17770, 1);
 3 | ratingSquareSum = zeros(17770, 1);
 4 | ratingCount = zeros(17770, 1);
 5 | oldTotalRating = 0;
 6 | totalRating = 0;
 7 | tic
 8 | for filmNumDouble = 1:17770
 9 |   filmNum = uint16(filmNumDouble);
10 |   fileNameBase = num2str(filmNum);
11 |   fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase '.txt'];
12 |   fid = fopen(fileName);
13 |   void = fgetl(fid);
14 |   while 1
15 |     totalRating = totalRating +1;
16 |     ratingCount(filmNumDouble) = ratingCount(filmNumDouble)  + 1; 
17 |     nextLine = fgetl(fid);
18 |     if ~ischar(nextLine), break, end
19 |     commas = find(nextLine==44);
20 |     uid = str2num(nextLine(1:commas(1)-1));
21 |     score = uint8(str2num(nextLine(commas(1)+1:commas(2)-1)));
22 |     ratingSum(filmNumDouble) = ratingSum(filmNumDouble)+double(score);
23 |     ratingSquareSum(filmNumDouble) = ratingSquareSum(filmNumDouble)+double(score)*double(score);
24 |     if isempty(Y{uid, 1})
25 |       Y{uid, 1} = uint16(zeros(40, 1));
26 |       Y{uid, 2} = uint8(zeros(40, 1));
27 |       Y{uid, 3} = 0;
28 |     end
29 |     if Y{uid, 1}(end) ~= 0
30 |       %fprintf('Allocating memory for %d, user %d\n', filmNum, uid)
31 |       Y{uid, 1} = [Y{uid, 1}; uint16(zeros(20, 1))];
32 |       Y{uid, 2} = [Y{uid, 2}; uint8(zeros(20, 1))];
33 |     end
34 |     Y{uid, 3} = Y{uid, 3} + 1;
35 |     Y{uid, 1}(Y{uid, 3}) = filmNum;
36 |     Y{uid, 2}(Y{uid, 3}) = score;
37 |   end
38 |   fclose(fid);
39 |   n = ratingCount(filmNumDouble);
40 |   diffRating = totalRating - oldTotalRating;
41 |   oldTotalRating = totalRating;
42 |   rps = diffRating/toc;
43 |   remain = (100000000 - totalRating)/rps;
44 |   remain = remain/(3600);
45 |   tic
46 |   if ~rem(filmNumDouble, 1)
47 |     fprintf('Film %d done,\t ratings %d,\t mean %2.4f,\t std %2.4f,\t rps %2.4f,\t remain %2.4f hrs,\t total %d.\n', filmNumDouble, n,...
48 |             ratingSum(filmNumDouble)/n, ...
49 |             sqrt(ratingSquareSum(filmNumDouble)/n- ...
50 |                  ratingSum(filmNumDouble)*ratingSum(filmNumDouble)/(n*n)), ...
51 |             rps, remain, totalRating);
52 |   end
53 | end
54 | userCount = spalloc(2649429, 1, 480189);
55 | userSquareSum  = spalloc(2649429, 1, 480189);
56 | userSum  = spalloc(2649429, 1, 480189);
57 | 
58 | for i = 1:size(Y, 1)
59 |   if ~isempty(Y{i, 1})
60 |     userCount(i) = Y{i, 3};
61 |     userSum(i) = sum(Y{i, 2});
62 |     userSquareSum(i) = sum(Y{i, 2}.*Y{i, 2});
63 |   end
64 | end
65 | 
66 | for i = 1:size(Y, 1)
67 |   if ~isempty(Y{i, 1})
68 |     Y{i, 1} = Y{i, 1}(1:Y{i,3});
69 |     Y{i, 2} = Y{i, 2}(1:Y{i,3});
70 |   end
71 | end
72 | 
73 | save netFlixData.mat Y ratingSum ratingSquareSum ratingCount userCount userSquareSum userSum
74 | 
75 | 


--------------------------------------------------------------------------------
/matlab/loadNetflix2.m:
--------------------------------------------------------------------------------
 1 | Y = cell(2649429, 3);
 2 | userCount = spalloc(2649429, 1, 480189);
 3 | userSquareSum  = spalloc(2649429, 1, 480189);
 4 | userSum  = spalloc(2649429, 1, 480189);
 5 | ratingSum = zeros(17700, 1);
 6 | ratingSquareSum = zeros(17700, 1);
 7 | ratingCount = zeros(17700, 1);
 8 | oldTotalRating = 0;
 9 | totalRating = 0;
10 | tic
11 | for filmNumDouble = 1:17700
12 |   filmNum = uint16(filmNumDouble);
13 |   fileNameBase = num2str(filmNum);
14 |   fileName = ['mv_' repmat('0', 1, 7-length(fileNameBase)) fileNameBase '.txt'];
15 |   fid = fopen(fileName);
16 |   void = fgetl(fid);
17 |   while 1
18 |     totalRating = totalRating +1;
19 |     ratingCount(filmNumDouble) = ratingCount(filmNumDouble)  + 1; 
20 |     nextLine = fgetl(fid);
21 |     if ~ischar(nextLine), break, end
22 |     commas = find(nextLine==44);
23 |     uid = str2num(nextLine(1:commas(1)-1));
24 |     score = uint8(str2num(nextLine(commas(1)+1:commas(2)-1)));
25 |     ratingSum(filmNumDouble) = ratingSum(filmNumDouble)+double(score);
26 |     ratingSquareSum(filmNumDouble) = ratingSquareSum(filmNumDouble)+double(score)*double(score);
27 |     userCount(uid) = userCount(uid)+1;
28 |     userSquareSum(uid) = userSquareSum(uid)+score*score;
29 |     userSum(uid) = userSum(uid) + score;
30 |   end
31 |   fclose(fid);
32 |   n = ratingCount(filmNumDouble);
33 |   diffRating = totalRating - oldTotalRating;
34 |   oldTotalRating = totalRating;
35 |   rps = diffRating/toc;
36 |   remain = (100000000 - totalRating)/rps;
37 |   remain = remain/(3600);
38 |   tic
39 |   if ~rem(filmNumDouble, 1)
40 |     fprintf('Film %d done,\t ratings %d,\t mean %2.4f,\t std %2.4f,\t rps %2.4f,\t remain %2.4f hrs,\t total %d.\n', filmNumDouble, n,...
41 |             ratingSum(filmNumDouble)/n, ...
42 |             sqrt(ratingSquareSum(filmNumDouble)/n- ...
43 |                  ratingSum(filmNumDouble)*ratingSum(filmNumDouble)/(n*n)), ...
44 |             rps, remain, totalRating);
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/matlab/ml-weak.m:
--------------------------------------------------------------------------------
 1 | % variables: strong{train,test}, weak{train,test}
 2 | load 'ml-split.mat'
 3 | clear strongtrain strongtest;
 4 | regvals = sqrt(sqrt(10)).^[8 7.5 7 6.5 6 5.5 5 4.5 4 3.5 3];
 5 | objgrad = @m3fshc_norm;
 6 | tol = 1e-3;
 7 | [n,m] = size(weaktrain{1});
 8 | p = 500;
 9 | l = 5;
10 | i = 3;
11 | maxiter = 100;
12 | fprintf('p=%d maxiter=%d i=%d\n',p,maxiter,i);
13 | fn = sprintf('../result/WEAK_r%d_c%d_p%d_x%d_i%d',n,m,p,maxiter,i);
14 | v = randn(n*p+m*p+n*(l-1),1);
15 | for i3=1:length(regvals)
16 |   fprintf(1,'Begin conjgrad: regval=%.1e\n',regvals(i3));
17 |   [v] = conjgrad(v,@cgLineSearch,{'c2',1e-2},objgrad,{weaktrain{i},regvals(i3),l,'verbose',0},'tol',tol,'maxiter',maxiter,'verbose',2);
18 |   U = reshape(v(1:n*p),n,p);
19 |   V = reshape(v(n*p+1:n*p+m*p),m,p);
20 |   theta = reshape(v(n*p+m*p+1:n*p+m*p+n*(l-1)),n,l-1);
21 |   [U,V] = normCols(U,V);  
22 |   X = U*V';
23 |   [y] = m3fSoftmax(X,theta);
24 |   Xrank = rank(X);
25 |   clear U V theta X;
26 |   fprintf(1,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f  MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i}));
27 |   fh = fopen(fn,'a');
28 |   fprintf(fh,'%d %s xi=%d p=%d tol=%.0e rank=%d %.2e ZOE: %.2f %.4f  MAE: %.2f %.4f\n',i,func2str(objgrad),maxiter,p,tol,Xrank,regvals(i3),zoe(y,weaktrain{i}),zoe(y,weaktest{i}),mae(y,weaktrain{i}),mae(y,weaktest{i}));
29 |   fclose(fh);
30 | end
31 | 


--------------------------------------------------------------------------------
/matlab/netflixTest.m:
--------------------------------------------------------------------------------
 1 | % Load in the probe set and test netflix performance.
 2 | 
 3 | load /local/data/netFlixDataProbe.mat
 4 | counter = 0;
 5 | totalCount = 0;
 6 | totalse = 0;
 7 | totalse2 = 0;
 8 | totalMeanSe = 0;
 9 | rmseOne = spalloc(length(Y), 1, 480000);
10 | rmseTwo = spalloc(length(Y), 1, 480000);
11 | Ypred = cell(size(Yprobe, 1), 3);
12 | for i = 1:length(Y)
13 |   if ~isempty(Y{i, 1})
14 |     counter = counter + 1;
15 |     if counter > 1000
16 |       break
17 |     else
18 |       if length(Yprobe{i, 1})>0
19 |         ind = Y{i, 1};
20 |         if length(ind)<3000
21 |           yprime = (double(Y{i, 2}) - model.mu(ind))./model.sd(ind);
22 |           K = kernCompute(model.kern, model.X(ind, :));
23 |           invK = pdinv(K);
24 |           
25 |           testInd = Yprobe{i, 1};
26 |           diagK = kernDiagCompute(model.kern, model.X(testInd, :));
27 |           Kx = kernCompute(model.kern,model.X(ind, :), model.X(testInd, :));
28 |           KinvK = invK*Kx;
29 |           sd = model.sd(testInd);
30 |           Ypred{i, 1} = (KinvK'*yprime).*sd + model.mu(testInd);
31 |           
32 |           Ypred{i, 2} = (diagK - sum(Kx.*KinvK, 1)').*sd.*sd;
33 |           thisMu = Ypred{i, 1};
34 |           thisSd = sqrt(Ypred{i, 2});
35 |           a = (1-thisMu)./thisSd;
36 |           b = (5-thisMu)./thisSd;
37 |           Ypred{i, 3} = thisMu ...
38 |               + (gaussOverDiffCumGaussian(b, a, 2) ...
39 |               - gaussOverDiffCumGaussian(b, a, 1)).*thisSd;
40 | 
41 |           vals = double(Yprobe{i, 2}) - Ypred{i, 1};
42 |           vals2 = double(Yprobe{i, 2}) - Ypred{i, 3};
43 |           dum = double(Yprobe{i, 2}) - model.mu(testInd);
44 | 
45 |           dumValsSq = dum'*dum;
46 |           vals2Sq = vals2'*vals2;
47 |           valsSq = vals'*vals;
48 | 
49 |           rmse1(i) = sqrt(valsSq/length(vals));
50 |           rmse2(i) = sqrt(vals2Sq/length(vals));
51 |           rmseDum(i) = sqrt(dumValsSq/length(vals));
52 |           totalMeanSe = totalMeanSe + dumValsSq; 
53 |           totalCount = totalCount + length(vals);
54 |           totalse = totalse+valsSq;
55 |           totalse2 = totalse2+vals2Sq;
56 |         end
57 |       end
58 |     end
59 |   end
60 | end
61 | rmseCorrected= sqrt(totalse2/totalCount);
62 | rmse = sqrt(totalse/totalCount);
63 | mrmse = sqrt(totalMeanSe/totalCount);


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernCompute.m:
--------------------------------------------------------------------------------
 1 | function [k, sk, n2] = rbfadditionalKernCompute(kern, x, x2)
 2 | 
 3 | % RBFADDITIONALKERNCOMPUTE Compute the RBF kernel given the parameters and X.
 4 | % FORMAT
 5 | % DESC computes the kernel parameters for the radial basis function kernel
 6 | % given inputs associated with rows and columns.
 7 | % RETURN K : the kernel matrix computed at the given points.
 8 | % ARG kern : the kernel structure for which the matrix is computed.
 9 | % ARG i : the index of the input matrix associated with the rows of the kernel.
10 | % ARG i2 : the index of the input matrix associated with the columns of the kernel.
11 | %
12 | % DESC computes the kernel matrix for the
13 | %	radial basis function kernel given a design matrix of inputs.
14 | % RETURN k : the kernel matrix computed at the given points.
15 | % ARG kern : the kernel structure for which the matrix is computed.
16 | % ARG i : the index of the input data matrix in the form of a design matrix.
17 | %	
18 | % SEEALSO : rbfadditionalKernParamInit, kernCompute, kernCreate,
19 | % rbfadditionalKernDiagCompute
20 | % 
21 | % COPYRIGHT : Raquel Urtasun, 2009
22 | %
23 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
24 | 
25 | % COLLAB
26 |   
27 | if nargin < 3
28 |   n2 = dist2(kern.additional(x,:), kern.additional(x,:));
29 |   wi2 = (.5 .* kern.inverseWidth);
30 |   sk = exp(-n2*wi2);
31 | else
32 |   n2 = dist2(kern.additional(x,:), kern.additional(x2,:));
33 |   wi2 = (.5 .* kern.inverseWidth);
34 |   sk = exp(-n2*wi2);
35 | end
36 | k = sk*kern.variance;


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernDiagCompute.m:
--------------------------------------------------------------------------------
 1 | function k = rbfadditionalKernDiagCompute(kern, x)
 2 | 
 3 | % RBFADDITIONALKERNDIAGCOMPUTE Compute diagonal of RBF side information kernel.
 4 | % FORMAT
 5 | % DESC computes the diagonal of the kernel
 6 | %	matrix for the radial basis function kernel given a design matrix of
 7 | %	inputs.
 8 | % RETURN k : a vector containing the diagonal of the kernel matrix computed
 9 | %	   at the given points.
10 | % ARG kern : the kernel structure for which the matrix is computed.
11 | % ARG i - input data indices.
12 | %	
13 | % SEEALSO : rbfadditionalKernParamInit, kernDiagCompute, kernCreate,
14 | % rbfaddtiionalKernCompute
15 | %
16 | % COPYRIGHT : Raquel Urtasun 2009
17 | %
18 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
19 | 
20 | % COLLAB
21 | 
22 | k = repmat(kern.variance, size(x, 1), 1);
23 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernDiagGradX.m:
--------------------------------------------------------------------------------
 1 | function gX = rbfadditionalKernDiagGradX(kern, X)
 2 | 
 3 | % RBFADDITIONALKERNDIAGGRADX Gradient of RBF with side information kernel's
 4 | % diagonal with respect to X.
 5 | % FORMAT
 6 | % DESC computes the gradient of the diagonal of the radial basis function
 7 | % side information kernel matrix with respect to the elements of the design
 8 | % matrix given in X.
 9 | % ARG kern : the kernel structure for which gradients are being computed.
10 | % ARG X : the input data in the form of a design matrix.
11 | % RETURN gX : the gradients of the diagonal with respect to each element
12 | % of X. The returned matrix has the same dimensions as X.
13 | %
14 | % SEEALSO : rbfadditionalKernParamInit, kernDiagGradX, rbfadditionalKernGradX
15 | %
16 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
17 | %
18 | % COPYRIGHT : Raquel Urtasun, 2009
19 |   
20 | % COLLAB
21 | 
22 | gX = zeros(size(X));
23 | 
24 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernExpandParam.m:
--------------------------------------------------------------------------------
 1 | function kern = rbfadditionalKernExpandParam(kern, params)
 2 | 
 3 | % RBFADDITIONALKERNEXPANDPARAM Create kernel structure from RBF kernel's parameters.
 4 | % FORMAT
 5 | % DESC returns a radial basis function kernel structure, for use with side
 6 | % information, filled with the parameters in the given vector. This is used
 7 | % as a helper function to enable parameters to be optimised in, for example,
 8 | % the NETLAB optimisation functions.
 9 | % ARG kern : the kernel structure in which the parameters are to be
10 | % placed.
11 | % ARG param : vector of parameters which are to be placed in the
12 | % kernel structure.
13 | % RETURN kern : kernel structure with the given parameters in the
14 | % relevant locations.
15 | %
16 | % SEEALSO : rbfKernParamInit, rbfKernExtractParam, kernExpandParam
17 | %
18 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
19 | %
20 | % COPYRIGHT : Raquel Urtasun
21 |   
22 | % COLLAB
23 | 
24 | kern.inverseWidth = params(1);
25 | kern.variance = params(2);
26 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernExtractParam.m:
--------------------------------------------------------------------------------
 1 | function [params, names] = rbfadditionalKernExtractParam(kern)
 2 | 
 3 | % RBFADDITIONALKERNEXTRACTPARAM Extract parameters from the RBF with side
 4 | % information kernel structure.
 5 | % FORMAT
 6 | % DESC Extract parameters from the radial basis function with side
 7 | % information kernel structure into a vector of parameters for optimisation.
 8 | % ARG kern : the kernel structure containing the parameters to be
 9 | % extracted.
10 | % RETURN param : vector of parameters extracted from the kernel. If
11 | % the field 'transforms' is not empty in the kernel matrix, the
12 | % parameters will be transformed before optimisation (for example
13 | % positive only parameters could be logged before being returned).
14 | %
15 | % FORMAT
16 | % DESC Extract parameters and parameter names from the radial basis
17 | % function with side information kernel structure.
18 | % ARG kern : the kernel structure containing the parameters to be
19 | % extracted.
20 | % RETURN param : vector of parameters extracted from the kernel. If
21 | % the field 'transforms' is not empty in the kernel matrix, the
22 | % parameters will be transformed before optimisation (for example
23 | % positive only parameters could be logged before being returned).
24 | % RETURN names : cell array of strings giving names to the parameters.
25 | %
26 | % SEEALSO rbfadditionalKernParamInit, rbfadditionalKernExpandParam, kernExtractParam, scg, conjgrad
27 | %
28 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
29 | %
30 | % COPYRIGHT : Raquel Urtasun, 2009
31 | 
32 | % COLLAB
33 |   
34 | params = [kern.inverseWidth kern.variance];
35 | if nargout > 1
36 |   names={'inverse width', 'variance'};
37 | end
38 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernGradX.m:
--------------------------------------------------------------------------------
 1 | function gX = rbfadditionalKernGradX(kern, X, X2)
 2 | 
 3 | % RBFADDITIONALKERNGRADX Gradient of RBF kernel with respect to input locations.
 4 | % FORMAT
 5 | % DESC computes the gradident of the radial basis function
 6 | % kernel with respect to the input positions where both the row
 7 | % positions and column positions are provided separately.
 8 | % ARG kern : kernel structure for which gradients are being
 9 | % computed.
10 | % ARG i1 : row locations against which gradients are being computed.
11 | % ARG i2 : column locations against which gradients are being computed.
12 | % RETURN g : the returned gradients. The gradients are returned in
13 | % a matrix which is numData2 x numInputs x numData1. Where numData1 is
14 | % the number of data points in I1, numData2 is the number of data
15 | % points in I2 and numInputs is the number of input
16 | % dimensions in X.
17 | %
18 | % SEEALSO : rbfKernParamInit, kernGradX, rbfKernDiagGradX
19 | %
20 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
21 | % 
22 | % COPYRIGHT : Raquel Urtasun, 2009
23 |   
24 | % COLLAB
25 | 
26 | gX = zeros(size(X2, 1), size(X2, 2), size(X, 1));
27 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernGradient.m:
--------------------------------------------------------------------------------
 1 | function g = rbfadditionalKernGradient(kern, x, varargin)
 2 | 
 3 | % RBFADDITIONALKERNGRADIENT Gradient of RBF with side information kernel's parameters.
 4 | % FORMAT
 5 | % DESC computes the gradient of functions with respect to the
 6 | % radial basis function with side information
 7 | % kernel's parameters. As well as the kernel structure and the
 8 | % input positions, the user provides a matrix PARTIAL which gives
 9 | % the partial derivatives of the function with respect to the
10 | % relevant elements of the kernel matrix. 
11 | % ARG kern : the kernel structure for which the gradients are being
12 | % computed.
13 | % ARG i : the input indices for which the gradients are being
14 | % computed. 
15 | % ARG partial : matrix of partial derivatives of the function of
16 | % interest with respect to the kernel matrix. The argument takes
17 | % the form of a square matrix of dimension  numData, where numData is
18 | % the number of rows in I.
19 | % RETURN g : gradients of the function of interest with respect to
20 | % the kernel parameters. The ordering of the vector should match
21 | % that provided by the function kernExtractParam.
22 | %
23 | % FORMAT
24 | % DESC computes the derivatives as above, but input locations are
25 | % now provided in two vectors associated with rows and columns of
26 | % the kernel matrix. 
27 | % ARG kern : the kernel structure for which the gradients are being
28 | % computed.
29 | % ARG i1 : the input indices associated with the rows of the
30 | % kernel matrix.
31 | % ARG i2 : the input indices associated with the columns of the
32 | % kernel matrix.
33 | % ARG partial : matrix of partial derivatives of the function of
34 | % interest with respect to the kernel matrix. The matrix should
35 | % have the same number of rows as I1 and the same number of columns
36 | % as I2 has rows.
37 | % RETURN g : gradients of the function of interest with respect to
38 | % the kernel parameters.
39 | %
40 | % SEEALSO rbfadditionalKernParamInit, kernGradient, rbfadditionalKernDiagGradient, kernGradX
41 | %
42 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006, 2009
43 | %
44 | % COPYRIGHT : Raquel Urtasun, 2009
45 | 
46 | % COLLAB
47 |   
48 | % The last argument is covGrad
49 | if nargin < 4
50 |   [k, sk, dist2xx] = rbfadditionalKernCompute(kern, x);
51 | else
52 |   [k, sk, dist2xx] = rbfadditionalKernCompute(kern, x, varargin{1});
53 | end
54 | g(1) = - .5*sum(sum(varargin{end}.*k.*dist2xx));
55 | g(2) =  sum(sum(varargin{end}.*sk));
56 | 


--------------------------------------------------------------------------------
/matlab/rbfadditionalKernParamInit.m:
--------------------------------------------------------------------------------
 1 | function kern = rbfadditionalKernParamInit(kern)
 2 | 
 3 | % RBFADDITIONALKERNPARAMINIT RBF kernel with side information.
 4 | % FORMAT
 5 | % The radial basis function kernel (RBF) is sometimes also known as the
 6 | % squared exponential kernel. It is a very smooth non-linear kernel and is a
 7 | % popular choice for generic use.
 8 | %	
 9 | %	k(x_i, x_j) = sigma2 * exp(-gamma/2 *(additional(x_i) - additional(x_j))'*(additional(x_i) - additional(x_j)))
10 | %	
11 | % The parameters are sigma2, the process variance (kern.variance) and gamma,
12 | % the inverse width (kern.inverseWidth). The inverse width controls how wide
13 | % the basis functions are, the larger gamma, the smaller the basis functions
14 | % are.
15 | %r
16 | % DESC computes the RBF kernel with the side information for
17 | % collaborative filtering.
18 | % RETURN kern : the kernel structure with the default parameters placed in.
19 | % ARG kern : the kernel structure which requires initialisation.
20 | %	
21 | % SEEALSO : rbfkernParamInit, kernCreate, kernParamInit
22 | %
23 | % COPYRIGHT : Raquel Urtasun, 2009
24 | %
25 | % COPYRIGHT : Neil D. Lawrence, 2004, 2005, 2006
26 | 
27 | % COLLAB
28 | 
29 | kern.inverseWidth = 1;
30 | kern.variance = 1;
31 | kern.nParams = 2;
32 | 
33 | % Constrains parameters positive for optimisation.
34 | kern.transforms.index = [1 2];
35 | kern.transforms.type = optimiDefaultConstraint('positive');
36 | kern.isStationary = true;
37 | 
38 | % it requires a field with the additional information
39 | 


--------------------------------------------------------------------------------
/matlab/readEachMovieMarlinStrong.m:
--------------------------------------------------------------------------------
 1 | function [Y, lbls, Ytest] = readEachMovieMarlinStrong(partNo)
 2 | 
 3 | % READEACHMOVIEMARLINSTRONG Reads the EachMovie strong partitions from Marlin.
 4 | % FORMAT
 5 | % DESC reads the EachMovie Marlin strong partitions.
 6 | % ARG partLetter : the part of the 10M MovieLens data to read in. 
 7 | % RETURN Y : the data.
 8 | % RETURN Ytest : the test data.
 9 | %
10 | % SEEALSO : collabLoadData, readEachMovieMarlinWeak
11 | %
12 | % COPYRIGHT : Raquel Urtasun, 2009
13 | 
14 | % COLLAB
15 | 
16 | lbls = [];
17 | 
18 | baseDir = datasetsDirectory;
19 | dirSep = filesep;
20 | 
21 | % load the ratings
22 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep 'em-mmmf' dirSep 'data' dirSep 'marlin.mat'];
23 | 
24 | disp(['Reading ... ',fileName]);
25 | 
26 | load(fileName);
27 | 
28 | Y = weaktrain{partNo}';
29 | Ytest = strongtest{partNo}';
30 | lbls = strongtrain{partNo}';
31 | 
32 | %/~
33 | % find movies with too big rates
34 | %max_film = max(Y');
35 | %max_film_test = max(Ytest');
36 | %max_film_train_test = max(lbls');
37 | %ind = find(max_film>6);
38 | %ind_test = find(max_film_test>6);
39 | %ind_train_test = find(max_film_train_test>6);
40 | 
41 | %ind = [ind, ind_test, ind_train_test];
42 | %ind = unique(ind);
43 | 
44 | 
45 | % remove the corrupted data
46 | %Y(ind,:) = [];
47 | %Ytest(ind,:) = [];
48 | %lbls(ind,:) = [];
49 | 
50 | %toRemove = [];
51 | 
52 | % find movies that are not rated
53 | %for i=1:size(Y,1)
54 | % check empy rating movies
55 | %  ind = find(Y(i,:));
56 | %if (length(ind)<1)
57 | %  toRemove = [toRemove, i];
58 | %end
59 | %end
60 |         
61 | %Y(toRemove,:) = [];
62 | %Ytest(toRemove,:) = [];
63 | %lbls(toRemove,:) = [];
64 |         
65 | %~/


--------------------------------------------------------------------------------
/matlab/readEachMovieMarlinWeak.m:
--------------------------------------------------------------------------------
 1 | function [Y, Ytest] = readEachMovieMarlinWeak(partNo)
 2 | 
 3 | % READEACHMOVIEMARLINWEAK Read in Marlin's weak partitions for EachMovie.
 4 | % FORMAT
 5 | % DESC reads the EachMovie Marlin weak partitions.
 6 | % ARG partNo : the part of the EachMovie data to read in. 
 7 | % RETURN Y : the data.
 8 | % RETURN Ytest : the test data.
 9 | %
10 | % SEEALSO : collabLoadData, readEachMovieMarlinStrong
11 | %
12 | % COPYRIGHT : Raquel Urtasun, 2009
13 | 
14 | % COLLAB
15 |   
16 |   baseDir = datasetsDirectory;
17 |   dirSep = filesep;
18 |   
19 |   % load the ratings
20 |   
21 |   
22 |   fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep 'em-mmmf' dirSep 'data' dirSep 'marlin.mat'];
23 |   
24 |   disp(['Reading ... ',fileName]);
25 |   
26 |   load(fileName);
27 |   
28 |   Y = weaktrain{partNo}';
29 |   Ytest = weaktest{partNo}';
30 | end
31 | 
32 | %/~
33 | % find movies with too big rates
34 | %max_film = max(Y');
35 | %max_film_test = max(Ytest');
36 | %ind = find(max_film>6);
37 | %ind_test = find(max_film_test>6);
38 | 
39 | %ind = [ind, ind_test];
40 | %ind = unique(ind);
41 | 
42 | 
43 | % remove the corrupted data
44 | %Y(ind,:) = [];
45 | %Ytest(ind,:) = [];
46 | 
47 | %toRemove = [];
48 | 
49 | % find movies that are not rated
50 | %for i=1:size(Y,1)
51 | % check empy rating movies
52 | %  ind = find(Y(i,:));
53 | %if (length(ind)<1)
54 | %  toRemove = [toRemove, i];
55 | %end
56 | %end
57 |         
58 | %Y(toRemove,:) = [];
59 | %Ytest(toRemove,:) = [];
60 | %~/


--------------------------------------------------------------------------------
/matlab/readEachMovieWeak.m:
--------------------------------------------------------------------------------
  1 | function [Y, Ytest] = readEachMovieWeak(partNo)
  2 | 
  3 | % READEACHMOVIEWEAK Read in EachMovie users with over 20 ratings.
  4 | % FORMAT
  5 | % DESC reads in the EachMovie users with over 20 ratings and saves them
  6 | % to a mat file for later use.
  7 | % ARG partNo : the partition number.
  8 | % RETURN Y : the training data.
  9 | % RETURN Ytest : the test data.
 10 | % 
 11 | % SEEALSO : readEachMovieMarlinWeak, readEachMovieMarlinStrong
 12 | %
 13 | % COPYRIGHT : Raquel Urtasun, 2009
 14 | 
 15 | % COLLAB
 16 | 
 17 | baseDir = datasetsDirectory;
 18 | dirSep = filesep;
 19 | 
 20 | % load the ratings
 21 | 
 22 | try
 23 |   fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote_more_20.mat'];
 24 |   load(fileName);
 25 | catch
 26 |   
 27 |   fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote.txt'];
 28 |   
 29 |   disp(['Reading ... ',fileName]);
 30 |   
 31 |   [users, films, ratings, weights, dates, hours, minutes, seconds] = textread(fileName, '%n\t%n\t%n\t%n\t%s %n:%n:%n');
 32 |   ind = randperm(size(users, 1));
 33 |   users = users(ind, :);
 34 |   films = films(ind, :);
 35 |   ratings = ratings(ind, :);
 36 |   numUsers = max(users);
 37 |   numFilms = max(films);
 38 |   
 39 |   activeUsers = [1:numUsers];
 40 |   % erase the users with less than 20 films
 41 |   disp('Removing users with less than 20 ratings');
 42 |   mapUsers = -ones(numUsers,1);
 43 |   numActiveUsers = 0;
 44 |   indTotal = [];
 45 |   for i=1:numUsers
 46 |     ind = find(users==i);
 47 |     if (length(ind)<20)
 48 |       % remove the user
 49 |       [indTotal] = [indTotal; ind];
 50 |     else
 51 |       numActiveUsers = numActiveUsers+1;
 52 |       mapUsers(i) = numActiveUsers;
 53 |     end
 54 |   end
 55 |   users(indTotal) = [];
 56 |   films(indTotal) = [];
 57 |   ratings(indTotal) = [];
 58 |   weights(indTotal) = [];
 59 |   dates(indTotal) = [];
 60 |   hours(indTotal) = [];
 61 |   minutes(indTotal) = [];
 62 |   second(indTotal) = [];
 63 |   users = mapUsers(users);
 64 |   fileName = [baseDir dirSep 'eachmovie' dirSep 'Vote_more_20.mat'];
 65 |   save(fileName,'users','films','ratings','weights','dates','hours','minutes','seconds');
 66 | end
 67 | 
 68 | numUsers = max(users);
 69 | numFilms = max(films);
 70 | 
 71 | numRatings = size(users, 1);
 72 | numUsersTrain = 30000;
 73 | numUsers = max(users);
 74 | for i=1:partNo
 75 |   % partition the users at random
 76 |   randIndexUsers = randperm(numUsers);
 77 | 
 78 | end
 79 | % get the films for those users
 80 | numTrainRatings = 0;
 81 | indexTrain = [];
 82 | indexTest = [];
 83 | for i=1:numUsersTrain
 84 |   indexUsers = find(users==randIndexUsers(i));
 85 |   
 86 |   indexTest = [indexTest; indexUsers(end)];
 87 |   
 88 |   % use one for testing and one for training
 89 |   indexUsers(end) = [];
 90 |   
 91 |   numTrainRatings = numTrainRatings + length(indexUsers);
 92 |   indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time
 93 | end
 94 | numTestRatings = numUsersTrain;
 95 | Y = spalloc(numFilms, numUsers, numTrainRatings);
 96 | Ytest = spalloc(numFilms, numUsers, numTestRatings);
 97 | numRatings = numTrainRatings + numTestRatings;
 98 | 
 99 | 
100 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain));
101 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest));
102 | 
103 | Y(indTrain) = ratings(indexTrain);
104 | Ytest(indTest) = ratings(indexTest);
105 | 
106 | 


--------------------------------------------------------------------------------
/matlab/readMovieLens.m:
--------------------------------------------------------------------------------
  1 | function [Y, lbls, Ytest] = readMovieLens(perc_train,partNo,if_random)
  2 | 
  3 | % READMOVIELENS Read in a given percentage of the movielens data.
  4 | % FORMAT
  5 | % DESC reads the MovieLens 1M Marlin data.
  6 | % ARG perc_train : the percentage to use as training.
  7 | % ARG partNo : the partition number.
  8 | % RETURN Y : the data.
  9 | % RETURN lbls : the lables of the training data. 
 10 | % RETURN Ytest : the test data.
 11 | %
 12 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong
 13 | %
 14 | % COPYRIGHT : Raquel Urtasun, 2009
 15 | 
 16 | % COLLAB
 17 | 
 18 |   lbls = [];
 19 | 
 20 | 
 21 |   baseDir = datasetsDirectory;
 22 |   dirSep = filesep;
 23 |   
 24 |   % load the ratings
 25 |   
 26 |   fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat'];
 27 |   [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
 28 |   ind = randperm(size(users, 1));
 29 |   users = users(ind, :);
 30 |   films = films(ind, :);
 31 |   ratings = ratings(ind, :);
 32 |   numUsers = max(users);
 33 |   numFilms = max(films);
 34 |   
 35 |   numRatings = size(users, 1);
 36 |   numTrainRatings = ceil(perc_train*numRatings);
 37 |   Y = spalloc(numFilms, numUsers, numTrainRatings);
 38 |   Ytest = spalloc(numFilms, numUsers, numRatings-numTrainRatings);
 39 |   
 40 |   
 41 |   
 42 |   % this depends on the partition number, and it is ordered
 43 |   numTestRatings = numRatings - numTrainRatings;
 44 |   if (if_random)
 45 |     
 46 |     randn('seed', 1e5);
 47 |     rand('seed', 1e5);
 48 |     for i=1:partNo
 49 |       index = randperm(numRatings);
 50 |     end
 51 |     indexTrain = index(1:numTrainRatings);
 52 |     indexTest = index(1+numTrainRatings:end);
 53 |     
 54 |     
 55 |   else
 56 |     index_rand = 1:numRatings;
 57 |     maxPartNo = 1./(1-perc_train);
 58 |     indexTrain = [];
 59 |     
 60 |     indexTrain = [1:(partNo-1)*numTestRatings]; 
 61 |     indexTrain = [indexTrain, 1+(partNo)*numTestRatings:numRatings];
 62 |     indexTest = [1+(partNo-1)*numTestRatings:partNo*numTestRatings];
 63 |   end
 64 |   indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain));
 65 |   indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest));
 66 |   
 67 |   Y(indTrain) = ratings(indexTrain);
 68 |   Ytest(indTest) = ratings(indexTest);
 69 |   
 70 |   
 71 |   % save the additional information
 72 |   
 73 |   fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat'];
 74 |   %[id, films, Type] = textread(fileName, '%n::%s::%s');
 75 |   
 76 |   % create the structure
 77 |   lbls = zeros(size(Y,1),18);
 78 |   
 79 |   fid = fopen(fileName);
 80 |   readLine = 0;
 81 |   counter = 0;
 82 |   data = [];
 83 |   all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ...
 84 |                 {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ...
 85 |                 {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}];
 86 |   
 87 |   
 88 |   readLine = fgets(fid);
 89 |   while readLine ~= -1
 90 |     
 91 |     parts = stringSplit(readLine,':');
 92 |     id = str2num(parts{1});
 93 |     title = parts(3);
 94 |     genre = parts{5};
 95 |     % createMovieLensExtra(genre);
 96 |     
 97 |     for i=1:length(all_genres)
 98 |       if (strfind(genre,all_genres{i}))
 99 |         lbls(id,i) = 1;
100 |       end
101 |     end
102 |     
103 |     readLine = fgets(fid);
104 |     
105 |   end
106 | end
107 |   
108 |   
109 | 


--------------------------------------------------------------------------------
/matlab/readMovieLens10M.m:
--------------------------------------------------------------------------------
  1 | function [Y, lbls, Ytest] = readMovieLens10M(partNo)
  2 | 
  3 | % READMOVIELENS10M Read in a partition of the movielens 10M data.
  4 | % FORMAT
  5 | % DESC reads the MovieLens 10M Marlin data.
  6 | % ARG partNo : the partition number.
  7 | % RETURN Y : the data.
  8 | % RETURN lbls : the lables of the training data. 
  9 | % RETURN Ytest : the test data.
 10 | %
 11 | % SEEALSO : collabLoadData, readMovieLens
 12 | %
 13 | % COPYRIGHT : Raquel Urtasun, 2009
 14 | 
 15 | % COLLAB
 16 | 
 17 |   lbl = [];
 18 | 
 19 |   baseDir = datasetsDirectory;
 20 |   dirSep = filesep;
 21 |   
 22 |   % load the ratings
 23 |   
 24 |   fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.train'];
 25 |   [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
 26 |   
 27 |   numUsers = max(users);
 28 |   numFilms = max(films);
 29 |   
 30 |   %keyboard;
 31 |   
 32 |   numTrainRatings = size(users,1);
 33 |   Y = spalloc(numFilms, numUsers, numTrainRatings);
 34 |   
 35 |   
 36 |   indTrain = sub2ind(size(Y), films, users);
 37 |   
 38 |   %keyboard;
 39 |   Y(indTrain) = ratings;
 40 |   
 41 |   %keyboard;
 42 |   
 43 |   
 44 |   users = [];
 45 |   films = [];
 46 |   ratings = [];
 47 |   timeStamp = [];
 48 |   
 49 |         
 50 |   fileNameTest = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.test'];
 51 |   [users_test, films_test, ratings_test, timeStamp_test] = textread(fileNameTest, '%n::%n::%n::%n');
 52 |   
 53 |   numTestRatings = size(users_test,1);
 54 |   numRatings = numTrainRatings + numTestRatings;
 55 |   Ytest = spalloc(numFilms, numUsers, numTestRatings);
 56 |   
 57 |   
 58 |   
 59 |   % this depends on the partition number, and it is ord;
 60 |   
 61 |   
 62 |   indTest = sub2ind(size(Ytest), films_test, users_test);
 63 |   Ytest(indTest) = ratings_test;
 64 |   
 65 |   
 66 |   % save the additional information
 67 |   
 68 |   %keyboard
 69 |   
 70 |   fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat'];
 71 |   
 72 |   % create the structure
 73 |   lbls = zeros(size(Y,1),18);
 74 |   
 75 |   fid = fopen(fileName);
 76 |   readLine = 0;
 77 |   counter = 0;
 78 |   data = [];
 79 |   all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ...
 80 |                 {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ...
 81 |                 {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}];
 82 |   
 83 |   
 84 |   readLine = fgets(fid);
 85 |   while readLine ~= -1
 86 |     
 87 |     parts = stringSplit(readLine,':');
 88 |     id = str2num(parts{1});
 89 |     title = parts(3);
 90 |     genre = parts{5};
 91 |     % createMovieLensExtra(genre);
 92 |     
 93 |     for i=1:length(all_genres)
 94 |       if (strfind(genre,all_genres{i}))
 95 |         lbls(id,i) = 1;
 96 |       end
 97 |     end
 98 |     
 99 |     readLine = fgets(fid);
100 |     
101 |   end
102 | end
103 |   
104 |   
105 |   


--------------------------------------------------------------------------------
/matlab/readMovieLens10MCell.m:
--------------------------------------------------------------------------------
 1 | function [Y, Ytest] = readMovieLens10MCell(partNo)
 2 | 
 3 | % READMOVIELENS10MCELL Read the 10M Movielens into a cell array.
 4 | % FORMAT
 5 | % DESC reads the 10M MovieLens data into a cell array.
 6 | % ARG partNo : the part of the 10M MovieLens data to read in. 
 7 | % RETURN Y : the data in a cell array.
 8 | % RETURN Ytest : the test data in a cell array.
 9 | % read the 10M movielens in a cell array. It is too big to do the regular way
10 | %
11 | % SEEALSO : collabLoadData
12 | %
13 | % COPYRIGHT : Raquel Urtasun, 2009
14 | 
15 | % COLLAB
16 |   
17 | 
18 |   baseDir = datasetsDirectory;
19 |   dirSep = filesep;
20 |   
21 |   % load the ratings
22 |   
23 |   fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.train'];
24 |   [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
25 |   
26 |   
27 |   [Y] = loadSparse10M(users,films,ratings);
28 |   
29 | 
30 |   fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partNo),'.test'];
31 |   [users_test, films_test, ratings_test, timeStamp] = textread(fileName, '%n::%n::%n::%n');
32 |   
33 |   
34 |   [Ytest] = loadSparse10M(users_test,films_test,ratings_test);
35 |   
36 | 


--------------------------------------------------------------------------------
/matlab/readMovieLens10MCellLetter.m:
--------------------------------------------------------------------------------
 1 | function [Y, Ytest] = readMovieLens10MCellLetter(partLetter)
 2 | 
 3 | % READMOVIELENS10MCELLLETTER Read the 10M Movielens into a cell array.
 4 | % FORMAT
 5 | % DESC reads the 10M MovieLens data into a cell array.
 6 | % ARG partLetter : the part of the 10M MovieLens data to read in. 
 7 | % RETURN Y : the data in a cell array.
 8 | % RETURN Ytest : the test data in a cell array.
 9 | % read the 10M movielens in a cell array. It is too big to do the regular way
10 | %
11 | % SEEALSO : collabLoadData
12 | %
13 | % COPYRIGHT : Raquel Urtasun, 2009
14 | 
15 | % COLLAB
16 | 
17 |   
18 |   baseDir = datasetsDirectory;
19 |   dirSep = filesep;
20 |   
21 |   % load the ratings
22 |   
23 |   fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partLetter),'.train'];
24 |   [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
25 |   
26 |   
27 |   [Y] = loadSparse10M(users,films,ratings);
28 |   
29 |   
30 |   fileName = [baseDir dirSep 'movielens' dirSep '10M' dirSep 'r',num2str(partLetter),'.test'];
31 |   [users_test, films_test, ratings_test, timeStamp] = textread(fileName, '%n::%n::%n::%n');
32 |   
33 |   
34 |   [Ytest] = loadSparse10M(users_test,films_test,ratings_test);
35 | end
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/matlab/readMovieLensMarlinStrong.m:
--------------------------------------------------------------------------------
 1 | function [Y, lbls, Ytest] = readMovieLensMarlinStrong(partNo)
 2 | 
 3 | % READMOVIELENSMARLINSTRONG Read in Marlin's strong partitions for movielens 1M.
 4 | % FORMAT
 5 | % DESC reads the Movielens 1M Marlin strong partitions.
 6 | % ARG partNo : the part of the Movielens data to read in. 
 7 | % RETURN Y : the data.
 8 | % RETURN lbls : the labels associated with the movies.
 9 | % RETURN Ytest : the test data.
10 | %
11 | % SEEALSO : collabLoadData, readMovieLensMarlinWeak
12 | %
13 | % COPYRIGHT : Raquel Urtasun, 2009
14 | 
15 | % COLLAB
16 | 
17 | 
18 | lbls = [];
19 | 
20 | baseDir = datasetsDirectory;
21 | dirSep = filesep;
22 | 
23 | % load the ratings
24 | 
25 | 
26 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep '1mml-mmmf' dirSep 'data' dirSep 'marlin.mat'];
27 | 
28 | disp(['Reading ... ',fileName]);
29 | 
30 | load(fileName);
31 | 
32 | Y = weaktrain{partNo}';
33 | lbls = strongtrain{partNo}';
34 | Ytest = strongtest{partNo}';
35 | 
36 | 
37 |         
38 | 


--------------------------------------------------------------------------------
/matlab/readMovieLensMarlinWeak.m:
--------------------------------------------------------------------------------
 1 | function [Y, Ytest] = readMovieLensMarlinWeak(partNo)
 2 | 
 3 | % READMOVIELENSMARLINWEAK Read in Marlin's weak partitions for movielens 1M.
 4 | % FORMAT
 5 | % DESC reads the Movielens 1M Marlin weak partitions.
 6 | % ARG partNo : the part of the Movielens data to read in. 
 7 | % RETURN Y : the data.
 8 | % RETURN Ytest : the test data.
 9 | %
10 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong
11 | %
12 | % COPYRIGHT : Raquel Urtasun, 2009
13 | 
14 | % COLLAB
15 | 
16 | 
17 | baseDir = datasetsDirectory;
18 | dirSep = filesep;
19 | 
20 | % load the ratings
21 | 
22 | 
23 | fileName = [baseDir dirSep 'collab' dirSep 'project' dirSep '1mml-mmmf' dirSep 'data' dirSep 'marlin.mat'];
24 | 
25 | disp(['Reading ... ',fileName]);
26 | 
27 | load(fileName);
28 | 
29 | Y = weaktrain{partNo}';
30 | Ytest = weaktest{partNo}';
31 | 
32 | 
33 |         
34 | 


--------------------------------------------------------------------------------
/matlab/readMovieLensStrong.m:
--------------------------------------------------------------------------------
  1 | function [Y,lbls,Ytest] = readMovieLensStrong(partNo)
  2 | 
  3 | % READMOVIELENSSTRONG Read in the strong partitions for the Movielens.
  4 | % FORMAT
  5 | % DESC reads the MovieLens 1M Marlin weak partitions.
  6 | % ARG partNo : the part of the 1M MovieLens data to read in. 
  7 | % RETURN Y : the data.
  8 | % RETURN lbls : addiitonal information.
  9 | % RETURN Ytest : the test data.
 10 | %
 11 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong
 12 | %
 13 | % COPYRIGHT : Raquel Urtasun, 2009
 14 | 
 15 | % COLLAB
 16 | 
 17 | 
 18 | 
 19 |   
 20 |   baseDir = datasetsDirectory;
 21 |   dirSep = filesep;
 22 |   
 23 |   % load the ratings
 24 |   
 25 |   fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat'];
 26 |   [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
 27 |   ind = randperm(size(users, 1));
 28 |   users = users(ind, :);
 29 |   films = films(ind, :);
 30 |   ratings = ratings(ind, :);
 31 |   numUsers = max(users);
 32 |   numFilms = max(films);
 33 |   
 34 |   numRatings = size(users, 1);
 35 |   numUsersTrain = 5000;
 36 |   numUsers = max(users);
 37 |   for i=1:partNo
 38 |     % partition the users at random
 39 |     randIndexUsers = randperm(numUsers);
 40 |     
 41 |   end
 42 |   % get the films for those users
 43 |   numTrainRatings = 0;
 44 |   indexTrain = [];
 45 |   for i=1:numUsersTrain
 46 |     indexUsers = find(users==randIndexUsers(i));
 47 |     numTrainRatings = numTrainRatings + length(indexUsers);
 48 |     indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time
 49 |   end
 50 |   Y = spalloc(numFilms, numUsers, numTrainRatings);
 51 |   Ytest = spalloc(numFilms, numUsers, numRatings-numTrainRatings);
 52 |   
 53 |   indexTest = 1:length(users);
 54 |   indexTest(indexTrain) = [];
 55 |   
 56 |   % this depends on the partition number, and it is ordered
 57 |   numTestRatings = numRatings - numTrainRatings;
 58 |   
 59 |   indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain));
 60 |   indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest));
 61 |   
 62 |   Y(indTrain) = ratings(indexTrain);
 63 |   Ytest(indTest) = ratings(indexTest);
 64 |   
 65 |   
 66 |   % save the additional information
 67 |   
 68 |   fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat'];
 69 |   %[id, films, Type] = textread(fileName, '%n::%s::%s');
 70 |   
 71 |   % create the structure
 72 |   lbls = zeros(size(Y,1),18);
 73 |   
 74 |   fid = fopen(fileName);
 75 |   readLine = 0;
 76 |   counter = 0;
 77 |   data = [];
 78 |   all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ...
 79 |                 {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ...
 80 |                 {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}];
 81 |   
 82 |   
 83 |   readLine = fgets(fid);
 84 |   while readLine ~= -1
 85 |     
 86 |     parts = stringSplit(readLine,':');
 87 |     id = str2num(parts{1});
 88 |     title = parts(3);
 89 |     genre = parts{5};
 90 |     % createMovieLensExtra(genre);
 91 |     
 92 |     for i=1:length(all_genres)
 93 |       if (strfind(genre,all_genres{i}))
 94 |         lbls(id,i) = 1;
 95 |       end
 96 |     end
 97 |           
 98 |     readLine = fgets(fid);
 99 |     
100 |   end
101 | end
102 | 
103 |         
104 | 


--------------------------------------------------------------------------------
/matlab/readMovieLensWeak.m:
--------------------------------------------------------------------------------
  1 | function [Y, lbls, Ytest] = readMovieLensWeak(partNo)
  2 | 
  3 | % READMOVIELENSWEAK Read in the weak partitions for the Movielens.
  4 | % FORMAT
  5 | % DESC reads the MovieLens 1M Marlin weak partitions.
  6 | % ARG partNo : the part of the 1M MovieLens data to read in. 
  7 | % RETURN Y : the data.
  8 | % RETURN lbls : addiitonal information.
  9 | % RETURN Ytest : the test data.
 10 | %
 11 | % SEEALSO : collabLoadData, readMovieLensMarlinStrong
 12 | %
 13 | % COPYRIGHT : Raquel Urtasun, 2009
 14 | 
 15 | % COLLAB
 16 | 
 17 | 
 18 | 
 19 | baseDir = datasetsDirectory;
 20 | dirSep = filesep;
 21 | 
 22 | % load the ratings
 23 | 
 24 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'ratings.dat'];
 25 | [users, films, ratings, timeStamp] = textread(fileName, '%n::%n::%n::%n');
 26 | ind = randperm(size(users, 1));
 27 | users = users(ind, :);
 28 | films = films(ind, :);
 29 | ratings = ratings(ind, :);
 30 | numUsers = max(users);
 31 | numFilms = max(films);
 32 | 
 33 | numRatings = size(users, 1);
 34 | numUsersTrain = 5000;
 35 | numUsers = max(users);
 36 | for i=1:partNo
 37 |     % partition the users at random
 38 |     randIndexUsers = randperm(numUsers);
 39 | 
 40 | end
 41 | % get the films for those users
 42 | numTrainRatings = 0;
 43 | indexTrain = [];
 44 | indexTest = [];
 45 | for i=1:numUsersTrain
 46 |     indexUsers = find(users==randIndexUsers(i));
 47 |     
 48 |     indexTest = [indexTest; indexUsers(end)];
 49 | 
 50 |     % use one for testing and one for training
 51 |     indexUsers(end) = [];
 52 | 
 53 |     numTrainRatings = numTrainRatings + length(indexUsers);
 54 |     indexTrain = [indexTrain; indexUsers]; % ?? this takes too much time
 55 | end
 56 | numTestRatings = numUsersTrain;
 57 | Y = spalloc(numFilms, numUsers, numTrainRatings);
 58 | Ytest = spalloc(numFilms, numUsers, numTestRatings);
 59 | numRatings = numTrainRatings + numTestRatings;
 60 | 
 61 | %indexTest = 1:length(users);
 62 | %indexTest(indexTrain) = [];
 63 | 
 64 | indTrain = sub2ind(size(Y), films(indexTrain), users(indexTrain));
 65 | indTest = sub2ind(size(Ytest), films(indexTest), users(indexTest));
 66 | 
 67 | Y(indTrain) = ratings(indexTrain);
 68 | Ytest(indTest) = ratings(indexTest);
 69 | 
 70 | 
 71 | % save the additional information
 72 | 
 73 | fileName = [baseDir dirSep 'movielens' dirSep 'large' dirSep 'movies.dat'];
 74 | %[id, films, Type] = textread(fileName, '%n::%s::%s');
 75 | 
 76 | % create the structure
 77 | lbls = zeros(size(Y,1),18);
 78 | 
 79 | fid = fopen(fileName);
 80 | readLine = 0;
 81 | counter = 0;
 82 | data = [];
 83 | all_genres = [{'Action'},{'Adventure'},{'Animation'},{'Children''s'}, ...
 84 |     {'Comedy'},{'Crime'},{'Documentary'},{'Drama'},{'Fantasy'},{'Film-Noir'}, ...
 85 |     {'Horror'},{'Musical'},{'Mystery'},{'Romance'},{'Sci-Fi'},{'Thriller'},{'War'},{'Western'}];
 86 | 
 87 | 
 88 | readLine = fgets(fid);
 89 | while readLine ~= -1
 90 | 
 91 |   parts = stringSplit(readLine,':');
 92 |   id = str2num(parts{1});
 93 |   title = parts(3);
 94 |   genre = parts{5};
 95 |   % createMovieLensExtra(genre);
 96 | 
 97 |   for i=1:length(all_genres)
 98 |     if (strfind(genre,all_genres{i}))
 99 |         lbls(id,i) = 1;
100 |     end
101 |   end
102 | 
103 |   readLine = fgets(fid);
104 | 
105 | end
106 | 
107 | 
108 |         
109 | 


--------------------------------------------------------------------------------
/matlab/restartNetflix1.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX1 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 1;
 9 | dataSetName = 'netflix';
10 | 
11 | load /local/data/netFlixDataProbe.mat
12 | load demNetflix1_1875881
13 | 
14 | options = collabOptimiseOptions;
15 | options.numIters = 5;
16 | options.showEvery = 400;
17 | options.saveEvery = 20000;
18 | options.currIters = 17*400;
19 | options.randState = 1e5;
20 | options.startIter = 1;
21 | options.runIter = 1875882;
22 | options.startUser = 1875882;
23 | capName = dataSetName;
24 | capName(1) = upper(capName(1));
25 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
26 | options.showLikelihood = false;
27 | model = collabOptimise(model, Y, options)
28 |   
29 | % val = 0;
30 | % tot = 0;
31 | % for i = 1:size(Y, 2)       
32 | %   ind = find(Ytest(:, i));
33 | %   elim = find(ind>size(model.X, 1));
34 | %   tind = ind;
35 | %   tind(elim) = [];
36 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
37 | %   a = Ytest(tind, i) - mu; 
38 | %   a = [a; Ytest(elim, i)];
39 | %   val = val + a'*a;
40 | %   tot = tot + length(a);
41 | % end
42 | % error = sqrt(val/tot);
43 | 
44 | % Save the results.
45 | capName = dataSetName;
46 | capName(1) = upper(capName(1));
47 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
48 | 


--------------------------------------------------------------------------------
/matlab/restartNetflix5.m:
--------------------------------------------------------------------------------
 1 | % DEMNETFLIX5 Try collaborative filtering on the netflix data.
 2 | 
 3 | % COLLAB
 4 | 
 5 | randn('seed', 1e5);
 6 | rand('seed', 1e5);
 7 | 
 8 | experimentNo = 5;
 9 | dataSetName = 'netflix';
10 | 
11 | load /local/data/netFlixDataProbe.mat
12 | load demNetflix5_2426657
13 | 
14 | disp(['Restarting ' dataSetName ' experiment ' num2str(experimentNo)]);
15 | options = collabOptimiseOptions;
16 | options.numIters = 5;
17 | options.showEvery = 400;
18 | options.saveEvery = 20000;
19 | options.randState = randState;
20 | options.randnState = randnState;
21 | options.startIter = iters;
22 | options.runIter = runIter;
23 | options.startUser = user+1;
24 | capName = dataSetName;
25 | capName(1) = upper(capName(1));
26 | options.saveName = ['dem' capName num2str(experimentNo) '_'];
27 | options.showLikelihood = false;
28 | model = collabOptimise(model, Y, options)
29 |   
30 | % val = 0;
31 | % tot = 0;
32 | % for i = 1:size(Y, 2)       
33 | %   ind = find(Ytest(:, i));
34 | %   elim = find(ind>size(model.X, 1));
35 | %   tind = ind;
36 | %   tind(elim) = [];
37 | %   [mu, varsig] = collabPosteriorMeanVar(model, Y(:, i), model.X(tind, :));
38 | %   a = Ytest(tind, i) - mu; 
39 | %   a = [a; Ytest(elim, i)];
40 | %   val = val + a'*a;
41 | %   tot = tot + length(a);
42 | % end
43 | % error = sqrt(val/tot);
44 | 
45 | % Save the results.
46 | capName = dataSetName;
47 | capName(1) = upper(capName(1));
48 | save(['dem' capName num2str(experimentNo) '.mat'], 'model');
49 | 


--------------------------------------------------------------------------------
/matlab/splitProbeData.m:
--------------------------------------------------------------------------------
 1 | fid = fopen('probe.txt');
 2 | count = 0;
 3 | probeFilms = cell(17770, 1);
 4 | while 1
 5 |   count = count + 1;
 6 |   nextLine = fgetl(fid);
 7 |   if ~ischar(nextLine), break, end
 8 |   if nextLine(end)==':'
 9 |     film = str2num(nextLine(1:end-1));
10 |   else
11 |     probeFilms{film} = [probeFilms{film}; str2num(nextLine)];
12 |   end
13 |   if(~rem(count, 10000))
14 |     fprintf('Loaded in %d rating locations.\n', count)
15 |   end
16 | end
17 | fclose(fid);
18 | Yprobe = cell(2649429, 3);
19 | 
20 | for i = 1:length(probeFilms)
21 |   for j = 1:length(probeFilms{i})
22 |     userId = probeFilms{i}(j);
23 |     ind = find(Y{userId, 1}==i);
24 |     
25 |     if isempty(Yprobe{userId, 1})
26 |       Yprobe{userId, 1} = zeros(20, 1);
27 |       Yprobe{userId, 2} = zeros(20, 1);
28 |       Yprobe{userId, 3} = 0;
29 |     end
30 |     if Yprobe{userId, 1}(end) ~= 0
31 |       Yprobe{userId, 1} = [Yprobe{userId, 1}; zeros(20, 1)];
32 |       Yprobe{userId, 2} = [Yprobe{userId, 2}; zeros(20, 1)];
33 |     end
34 |     Yprobe{userId, 3} = Yprobe{userId, 3} + 1;
35 |     Yprobe{userId, 1}(Yprobe{userId, 3}) =  i;
36 |     Yprobe{userId, 2}(Yprobe{userId, 3}) =  Y{userId,2}(ind);
37 |     Y{userId, 1}(ind) = [];
38 |     Y{userId, 2}(ind) = [];
39 |     Y{userId, 3} = Y{userId, 3} - 1;
40 |   end
41 |   
42 |   if(~rem(i, 10))
43 |     fprintf('Done %d films.\n', i)
44 |   end
45 | end


--------------------------------------------------------------------------------
/python/demNetflix10.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | #Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=10,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=10, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=2,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=2, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | b
21 | 


--------------------------------------------------------------------------------
/python/demNetflix3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | 
12 | try:
13 |     collab.run(latentDim=3,  \
14 |                dataSetName='netflix', \
15 |                experimentNo=3, \
16 |                options=opt) 
17 | except:
18 |     import pdb, sys
19 |     e, m, tb = sys.exc_info()
20 |     pdb.post_mortem(tb)
21 | 


--------------------------------------------------------------------------------
/python/demNetflix4.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=4,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=4, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=5,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=5, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=6,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=6, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix7.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=7,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=7, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix8.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=8,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=8, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/demNetflix9.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | #Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | 
11 | try:
12 |     collab.run(latentDim=9,  \
13 |                dataSetName='netflix', \
14 |                experimentNo=9, \
15 |                options=opt) 
16 | except:
17 |     import pdb, sys
18 |     e, m, tb = sys.exc_info()
19 |     pdb.post_mortem(tb)
20 | 


--------------------------------------------------------------------------------
/python/restartNetflix10.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 0, 
12 |                    startCount = 200000, 
13 |                    loadUser = 1786429, 
14 |                    latentDim = 10, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 10, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try:
11 |     collab.restart(loadIter = 9, 
12 |                    startCount = 4600000, 
13 |                    loadUser = 1288699, 
14 |                    latentDim = 2, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 2, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 5, 
12 |                    startCount = 2660000, 
13 |                    loadUser = 1499180, 
14 |                    latentDim = 3, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 3, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix4.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 5, 
12 |                    startCount = 2520000, 
13 |                    loadUser = 1776294, 
14 |                    latentDim = 4, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 4, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 9, 
12 |                    startCount = 4640000, 
13 |                    loadUser = 1361446, 
14 |                    latentDim = 5, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 5, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix6.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 6, 
12 |                    startCount = 3320000, 
13 |                    loadUser = 2600176,
14 |                    latentDim = 6, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 6, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix7.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 5, 
12 |                    startCount = 2620000, 
13 |                    loadUser = 2190625, 
14 |                    latentDim = 7, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 7, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix8.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 3, 
12 |                    startCount = 1520000, 
13 |                    loadUser = 560608, 
14 |                    latentDim = 8, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 8, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------
/python/restartNetflix9.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | 
 5 | # Try collaborative filtering on the netflix data.
 6 | import collab
 7 | import ndlml as nl
 8 | opt = collab.options()
 9 | opt.resultsBaseDir = "/local/data/results/netflix/"
10 | try: 
11 |     collab.restart(loadIter = 2, 
12 |                    startCount = 1440000, 
13 |                    loadUser = 2331578, 
14 |                    latentDim = 9, 
15 |                    dataSetName = 'netflix', 
16 |                    experimentNo = 9, 
17 |                    options=opt) 
18 | except:
19 |     import pdb, sys
20 |     e, m, tb = sys.exc_info()
21 |     pdb.post_mortem(tb)
22 | 
23 | 


--------------------------------------------------------------------------------