├── .gitignore ├── README.md ├── code ├── data │ ├── movies.mat │ ├── movie_ids.txt │ └── movieParams.mat ├── normalizeRatings.m ├── loadMovieList.m ├── cofiCostFunc.m ├── movie_recommender.m └── fmincg.m ├── Math_Project_PPT_1.pptx └── Recommender_Systems_Using_Collaborative_Filtering_2.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recommender_Systems_Using_Collaborative_Filtering -------------------------------------------------------------------------------- /code/data/movies.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dhanvanth0905/Recommender_Systems_Using_Collaborative_Filtering/HEAD/code/data/movies.mat -------------------------------------------------------------------------------- /Math_Project_PPT_1.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dhanvanth0905/Recommender_Systems_Using_Collaborative_Filtering/HEAD/Math_Project_PPT_1.pptx -------------------------------------------------------------------------------- /code/data/movie_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dhanvanth0905/Recommender_Systems_Using_Collaborative_Filtering/HEAD/code/data/movie_ids.txt -------------------------------------------------------------------------------- /code/data/movieParams.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dhanvanth0905/Recommender_Systems_Using_Collaborative_Filtering/HEAD/code/data/movieParams.mat -------------------------------------------------------------------------------- /Recommender_Systems_Using_Collaborative_Filtering_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dhanvanth0905/Recommender_Systems_Using_Collaborative_Filtering/HEAD/Recommender_Systems_Using_Collaborative_Filtering_2.pdf -------------------------------------------------------------------------------- /code/normalizeRatings.m: -------------------------------------------------------------------------------- 1 | function [Ynorm, Ymean] = normalizeRatings(Y, R) 2 | 3 | [m, n] = size(Y); 4 | Ymean = zeros(m, 1); 5 | Ynorm = zeros(size(Y)); 6 | for i = 1:m 7 | idx = find(R(i, :) == 1); 8 | Ymean(i) = mean(Y(i, idx)); 9 | Ynorm(i, idx) = Y(i, idx) - Ymean(i); 10 | end 11 | 12 | end 13 | -------------------------------------------------------------------------------- /code/loadMovieList.m: -------------------------------------------------------------------------------- 1 | function movieList = loadMovieList() 2 | 3 | fid = fopen('data/movie_ids.txt'); 4 | 5 | n = 1682; 6 | 7 | movieList = cell(n, 1); 8 | for i = 1:n 9 | line = fgets(fid); 10 | [idx, movieName] = strtok(line, ' '); 11 | movieList{i} = strtrim(movieName); 12 | end 13 | fclose(fid); 14 | end -------------------------------------------------------------------------------- /code/cofiCostFunc.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... 2 | num_features, lambda) 3 | 4 | X = reshape(params(1:num_movies*num_features), num_movies, num_features); 5 | 6 | Theta = reshape(params(num_movies*num_features+1:end), ... 7 | num_users, num_features); 8 | 9 | X_grad = zeros(size(X)); 10 | 11 | Theta_grad = zeros(size(Theta)); 12 | 13 | 14 | J = 1/2 * sum(sum((X*Theta' - Y).^2.*R)) + ... 15 | lambda/2* (sum(sum(Theta.^2)) + sum(sum(X.^2))); 16 | 17 | for i = 1:num_movies 18 | idx = find(R(i, :) == 1); 19 | Theta_temp = Theta(idx, :); 20 | Y_temp = Y(i, idx); 21 | X_grad(i, :) = ((X(i, :)*Theta_temp' - Y_temp)*Theta_temp)' + ... 22 | lambda*X(i,:)'; 23 | end 24 | 25 | for j = 1:num_users 26 | idx = find(R(:, j) == 1); 27 | X_temp = X(idx, :); 28 | Y_temp = Y(idx, j); 29 | Theta_grad(j, :) = ((X_temp*Theta(j, :)' - Y_temp)'*X_temp)' + ... 30 | lambda * Theta(j,:)'; 31 | end 32 | 33 | grad = [X_grad(:); Theta_grad(:)]; 34 | 35 | end 36 | -------------------------------------------------------------------------------- /code/movie_recommender.m: -------------------------------------------------------------------------------- 1 | movieList = loadMovieList(); 2 | 3 | my_ratings = zeros(1682, 1); 4 | 5 | my_ratings(1) = 4; 6 | my_ratings(98) = 2; 7 | my_ratings(7) = 3; 8 | my_ratings(12)= 5; 9 | my_ratings(54) = 4; 10 | my_ratings(64)= 5; 11 | my_ratings(66)= 3; 12 | my_ratings(82) = 5; 13 | my_ratings(183) = 4; 14 | my_ratings(226) = 5; 15 | my_ratings(355)= 5; 16 | 17 | fprintf('\n\nNew user ratings:\n'); 18 | for i = 1:length(my_ratings) 19 | if my_ratings(i) > 0 20 | fprintf('Rated %d for %s\n', my_ratings(i), ... 21 | movieList{i}); 22 | end 23 | end 24 | 25 | 26 | fprintf('\nTraining collaborative filtering...\n'); 27 | 28 | load('data/movies.mat'); 29 | 30 | Y = [my_ratings Y]; 31 | R = [(my_ratings ~= 0) R]; 32 | 33 | [Ynorm, Ymean] = normalizeRatings(Y, R); 34 | 35 | num_users = size(Y, 2); 36 | num_movies = size(Y, 1); 37 | num_features = 10; 38 | 39 | X = randn(num_movies, num_features); 40 | Theta = randn(num_users, num_features); 41 | 42 | initial_parameters = [X(:); Theta(:)]; 43 | 44 | options = optimset('GradObj', 'on', 'MaxIter', 100); 45 | 46 | lambda = 10; 47 | theta = fmincg (@(t)(cofiCostFunc(t, Ynorm, R, num_users, num_movies, ... 48 | num_features, lambda)), ... 49 | initial_parameters, options); 50 | 51 | X = reshape(theta(1:num_movies*num_features), num_movies, num_features); 52 | Theta = reshape(theta(num_movies*num_features+1:end), ... 53 | num_users, num_features); 54 | 55 | fprintf('Recommender system learning completed.\n'); 56 | 57 | p = X * Theta'; 58 | my_predictions = p(:,1) + Ymean; 59 | 60 | movieList = loadMovieList(); 61 | 62 | [r, ix] = sort(my_predictions, 'descend'); 63 | fprintf('\nTop recommendations for you:\n'); 64 | for i=1:10 65 | j = ix(i); 66 | fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... 67 | movieList{j}); 68 | end 69 | 70 | fprintf('\n\nOriginal ratings provided:\n'); 71 | for i = 1:length(my_ratings) 72 | if my_ratings(i) > 0 73 | fprintf('Rated %d for %s\n', my_ratings(i), ... 74 | movieList{i}); 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /code/fmincg.m: -------------------------------------------------------------------------------- 1 | function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) 2 | 3 | if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') 4 | length = options.MaxIter; 5 | else 6 | length = 100; 7 | end 8 | 9 | 10 | RHO = 0.01; 11 | SIG = 0.5; 12 | INT = 0.1; 13 | EXT = 3.0; 14 | MAX = 20; 15 | RATIO = 100; 16 | 17 | argstr = ['feval(f, X']; 18 | for i = 1:(nargin - 3) 19 | argstr = [argstr, ',P', int2str(i)]; 20 | end 21 | argstr = [argstr, ')']; 22 | 23 | if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end 24 | S=['Iteration ']; 25 | 26 | i = 0; 27 | ls_failed = 0; 28 | fX = []; 29 | [f1 df1] = eval(argstr); 30 | i = i + (length<0); 31 | s = -df1; 32 | d1 = -s'*s; 33 | z1 = red/(1-d1); 34 | 35 | while i < abs(length) 36 | i = i + (length>0); 37 | 38 | X0 = X; f0 = f1; df0 = df1; 39 | X = X + z1*s; 40 | [f2 df2] = eval(argstr); 41 | i = i + (length<0); 42 | d2 = df2'*s; 43 | f3 = f1; d3 = d1; z3 = -z1; 44 | if length>0, M = MAX; else M = min(MAX, -length-i); end 45 | success = 0; limit = -1; 46 | while 1 47 | while ((f2 > f1+z1*RHO*d1) || (d2 > -SIG*d1)) && (M > 0) 48 | limit = z1; 49 | if f2 > f1 50 | z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); 51 | else 52 | A = 6*(f2-f3)/z3+3*(d2+d3); 53 | B = 3*(f3-f2)-z3*(d3+2*d2); 54 | z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; 55 | end 56 | if isnan(z2) || isinf(z2) 57 | z2 = z3/2; 58 | end 59 | z2 = max(min(z2, INT*z3),(1-INT)*z3); 60 | z1 = z1 + z2; 61 | X = X + z2*s; 62 | [f2 df2] = eval(argstr); 63 | M = M - 1; i = i + (length<0); 64 | d2 = df2'*s; 65 | z3 = z3-z2; 66 | end 67 | if f2 > f1+z1*RHO*d1 || d2 > -SIG*d1 68 | break; 69 | elseif d2 > SIG*d1 70 | success = 1; break; 71 | elseif M == 0 72 | break; 73 | end 74 | A = 6*(f2-f3)/z3+3*(d2+d3); 75 | B = 3*(f3-f2)-z3*(d3+2*d2); 76 | z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); 77 | if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0 78 | if limit < -0.5 79 | z2 = z1 * (EXT-1); 80 | else 81 | z2 = (limit-z1)/2; 82 | end 83 | elseif (limit > -0.5) && (z2+z1 > limit) 84 | z2 = (limit-z1)/2; 85 | elseif (limit < -0.5) && (z2+z1 > z1*EXT) 86 | z2 = z1*(EXT-1.0); 87 | elseif z2 < -z3*INT 88 | z2 = -z3*INT; 89 | elseif (limit > -0.5) && (z2 < (limit-z1)*(1.0-INT)) 90 | z2 = (limit-z1)*(1.0-INT); 91 | end 92 | f3 = f2; d3 = d2; z3 = -z2; 93 | z1 = z1 + z2; X = X + z2*s; 94 | [f2 df2] = eval(argstr); 95 | M = M - 1; i = i + (length<0); 96 | d2 = df2'*s; 97 | end 98 | 99 | if success 100 | f1 = f2; fX = [fX' f1]'; 101 | fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); 102 | s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; 103 | tmp = df1; df1 = df2; df2 = tmp; 104 | d2 = df1'*s; 105 | if d2 > 0 106 | s = -df1; 107 | d2 = -s'*s; 108 | end 109 | z1 = z1 * min(RATIO, d1/(d2-realmin)); 110 | d1 = d2; 111 | ls_failed = 0; 112 | else 113 | X = X0; f1 = f0; df1 = df0; 114 | if ls_failed || i > abs(length) 115 | break; 116 | end 117 | tmp = df1; df1 = df2; df2 = tmp; 118 | s = -df1; 119 | d1 = -s'*s; 120 | z1 = 1/(1-d1); 121 | ls_failed = 1; 122 | end 123 | if exist('OCTAVE_VERSION') 124 | fflush(stdout); 125 | end 126 | end 127 | fprintf('\n'); 128 | --------------------------------------------------------------------------------