├── .gitignore ├── linear regression ├── tests │ ├── normaleq_1.m │ ├── normaleq_2.m │ ├── jcost_1.m │ ├── jcost_2.m │ ├── hydist_1.m │ ├── jcost_3.m │ ├── run.m │ ├── scale_vector.m │ ├── gdescent_1.m │ ├── gdescent_2.m │ ├── scale_matrix.m │ ├── gdescent_3.m │ ├── scale_equals.m │ └── scale_square_matrix.m ├── normaleq.m ├── jcost.m ├── hydist.m ├── scale.m └── gdescent.m ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.m~ 3 | *.mex* 4 | -------------------------------------------------------------------------------- /linear regression/tests/normaleq_1.m: -------------------------------------------------------------------------------- 1 | function normaleq_1() 2 | fprintf('> normal equation\n') 3 | X = [1 0; 0 2]; 4 | y = [1; 1]; 5 | exp_theta = [1; 0.5]; 6 | assert(normaleq(X, y), exp_theta, eps^-2); 7 | endfunction 8 | -------------------------------------------------------------------------------- /linear regression/tests/normaleq_2.m: -------------------------------------------------------------------------------- 1 | function normaleq_2() 2 | fprintf('> normal equation [non invertibility]\n') 3 | X = [1 2; 2 4; 4 8]; 4 | y = [1; 1; 2]; 5 | exp_theta = [0.10476; 0.20952]; 6 | assert(normaleq(X, y), exp_theta, eps^-2); 7 | endfunction 8 | -------------------------------------------------------------------------------- /linear regression/tests/jcost_1.m: -------------------------------------------------------------------------------- 1 | function jcost_1() 2 | fprintf('> linear regression cost function [1]\n') 3 | X = [1 2 3; 1 3 4; 1 4 5; 1 5 6]; 4 | y = [7; 6; 5; 4]; 5 | theta = [0.1; 0.2; 0.3]; 6 | j = jcost(X, y, theta); 7 | exp_j = 7.0175; 8 | assert(j, exp_j, eps^-2); 9 | endfunction -------------------------------------------------------------------------------- /linear regression/tests/jcost_2.m: -------------------------------------------------------------------------------- 1 | function jcost_2() 2 | fprintf('> linear regression cost function [2]\n') 3 | X = [1 2; 1 3; 1 4; 1 5]; 4 | y = [7; 6; 5; 4]; 5 | theta = [0.1; 0.2]; 6 | j = jcost(X, y, theta); 7 | exp_j = 11.9450; 8 | assert(j, exp_j, eps^-2); 9 | endfunction 10 | -------------------------------------------------------------------------------- /linear regression/tests/hydist_1.m: -------------------------------------------------------------------------------- 1 | function hydist_1() 2 | fprintf('> hypothesis vs class distance [1]\n') 3 | X = [1 2 3; 1 3 4; 1 4 5; 1 5 6]; 4 | y = [7; 6; 5; 4]; 5 | theta = [0.1; 0.2; 0.3]; 6 | exp_dist = [-5.6; -4.1; -2.6; -1.1]; 7 | assert(hydist(X, y, theta), exp_dist, eps^-2); 8 | endfunction -------------------------------------------------------------------------------- /linear regression/tests/jcost_3.m: -------------------------------------------------------------------------------- 1 | function jcost_3() 2 | fprintf('> linear regression cost function [3]\n') 3 | X = [2 1 3; 7 1 9; 1 8 1; 3 7 4]; 4 | y = [2; 5; 5; 6]; 5 | theta = [0.3816; 0.7655; 0.7952 ]; 6 | j = jcost(X, y, theta); 7 | exp_j = 6.7273; 8 | assert(j, exp_j, eps^-2); 9 | endfunction -------------------------------------------------------------------------------- /linear regression/tests/run.m: -------------------------------------------------------------------------------- 1 | %!test scale_vector(); 2 | %!test scale_matrix(); 3 | %!test scale_square_matrix(); 4 | %!test scale_equals(); 5 | %!test hydist_1(); 6 | %!test jcost_1(); 7 | %!test jcost_2(); 8 | %!test jcost_3(); 9 | %!test gdescent_1(); 10 | %!test gdescent_2(); 11 | %!test gdescent_3(); 12 | %!test normaleq_1(); 13 | %!test normaleq_2(); -------------------------------------------------------------------------------- /linear regression/tests/scale_vector.m: -------------------------------------------------------------------------------- 1 | function scale_vector() 2 | fprintf('> feature scaling [vector]\n') 3 | X = [1; 2; 3]; 4 | [XN, mu, sigma] = scale(X); 5 | EXP_XN = [-1; 0; 1]; 6 | exp_mu = 2; 7 | exp_sigma = 1; 8 | assert(XN, EXP_XN, eps); 9 | assert(mu, exp_mu, eps); 10 | assert(sigma, exp_sigma, eps); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/gdescent_1.m: -------------------------------------------------------------------------------- 1 | function gdescent_1() 2 | fprintf('> gradient descent [1]\n') 3 | X = [1 5; 1 2; 1 4; 1 5]; 4 | y = [1; 6; 4; 2]; 5 | theta = [0; 0]; 6 | alpha = 0.01; 7 | iterations = 1000; 8 | [theta, jhist] = gdescent(X, y, theta, alpha, iterations); 9 | exp_theta = [5.21475; -0.57335]; 10 | assert(theta, exp_theta, eps^-2); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/gdescent_2.m: -------------------------------------------------------------------------------- 1 | function gdescent_2() 2 | fprintf('> gradient descent [2]\n') 3 | X = [1 5; 1 2; 1 4; 1 5]; 4 | y = [1; 6; 4; 2]; 5 | theta = [0; 0]; 6 | alpha = 0.01; 7 | iterations = 1000; 8 | [theta, jhist] = gdescent(X, y, theta, alpha, iterations); 9 | exp_theta = [5.21475; -0.57335]; 10 | assert(theta, exp_theta, eps^-2); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/scale_matrix.m: -------------------------------------------------------------------------------- 1 | function scale_matrix() 2 | fprintf('> feature scaling [matrix]\n') 3 | X = [1 6; 2 4; 3 2]; 4 | [XN, mu, sigma] = scale(X); 5 | EXP_XN = [-1 1; 0 0; 1 -1]; 6 | exp_mu = [2 4]; 7 | exp_sigma = [1 2]; 8 | assert(XN, EXP_XN, eps); 9 | assert(mu, exp_mu, eps); 10 | assert(sigma, exp_sigma, eps); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/gdescent_3.m: -------------------------------------------------------------------------------- 1 | function gdescent_3() 2 | fprintf('> gradient descent [3]\n') 3 | X = [3 5 6; 1 2 3; 9 4 2]; 4 | y = [1; 6; 4]; 5 | theta = [0; 0; 1]; 6 | alpha = 0.01; 7 | iterations = 1000; 8 | [theta, jhist] = gdescent(X, y, theta, alpha, iterations); 9 | exp_theta = [1.3169; -3.2949; 2.5524]; 10 | assert(theta, exp_theta, eps^-2); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/scale_equals.m: -------------------------------------------------------------------------------- 1 | function scale_equals() 2 | fprintf('> feature scaling [equals values]\n') 3 | X = [1 2; 1 3; 1 4]; 4 | [XN, mu, sigma] = scale(X); 5 | EXP_XN = [NaN -1; NaN 0; NaN 1]; 6 | exp_mu = [unique(X(:, 1)) 3]; 7 | exp_sigma = [0 1]; 8 | assert(XN, EXP_XN); 9 | assert(mu, exp_mu); 10 | assert(sigma, exp_sigma); 11 | endfunction 12 | -------------------------------------------------------------------------------- /linear regression/tests/scale_square_matrix.m: -------------------------------------------------------------------------------- 1 | function scale_square_matrix() 2 | fprintf('> feature scaling [square matrix]\n'); 3 | X = [8 1 6; 3 5 7; 4 9 2]; 4 | [XN, mu, sigma] = scale(X); 5 | EXP_XN = [1.13389 -1 0.37796; -0.75593 0 0.75593; -0.37796 1 -1.13389]; 6 | exp_mu = [5 5 5]; 7 | exp_sigma = [2.6458 4 2.6458]; 8 | assert(XN, EXP_XN, eps^-2); 9 | assert(mu, exp_mu, eps^-2); 10 | assert(sigma, exp_sigma, eps^-2); 11 | endfunction 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | What is this? [![Analytics](https://ga-beacon.appspot.com/UA-49657176-1/ml-octa-collection)](https://github.com/igrigorik/ga-beacon) 2 | ============= 3 | 4 | This repository contains some GNU/Octave functions for machine learning. 5 | 6 | ## Instructions 7 | 8 | If you want to use the linear regression code you can simply execute: 9 | 10 | ```matlab 11 | addpath(genpath('path/to/linear regression')); 12 | ``` 13 | 14 | Done. You can now use the linear regression functions but also run test cases in order to check that everything works just fine: 15 | 16 | ```matlab 17 | test('run') 18 | ``` 19 | 20 | This command will execute the [run.m](linear regression/tests/run.m) file in the [linear regression/tests](linear regression/tests/) directory. 21 | 22 | Also, you can run demos ... 23 | 24 | _TODO_ 25 | 26 | ## Copyright and license 27 | 28 | The code is released under the terms of the [BSD-2 license](LICENSE). 29 | 30 | Copyright (c) 2014 - [Leonardo Di Donato](www.github.com/leodido). 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Leonardo Di Donato 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /linear regression/normaleq.m: -------------------------------------------------------------------------------- 1 | function [theta] = normaleq(X, y) 2 | % ----------------------------------------------------------------------------- 3 | % Usage: theta = normaleq (X, y) 4 | % ----------------------------------------------------------------------------- 5 | % Parameters: 6 | % X : double, size(m, n) 7 | % y : double, size(m, 1) 8 | % ----------------------------------------------------------------------------- 9 | % Return values: 10 | % theta : double, size(n, 1) 11 | % ----------------------------------------------------------------------------- 12 | % Compute the closed-form solution to linear regression 13 | % using normal equations. 14 | % Perform the pseudo-inverse function to bypass the inversion of 15 | % non-invertible matrices. 16 | % ----------------------------------------------------------------------------- 17 | % Keywords: normal equations, linear regression 18 | % ----------------------------------------------------------------------------- 19 | % Examples: 20 | % X = [1 0; 0 2]; 21 | % y = [1; 1]; 22 | % theta = normaleq(X, y) 23 | % ----------------------------------------------------------------------------- 24 | % Author: leodido 25 | % ----------------------------------------------------------------------------- 26 | % Mantainer: leodido 27 | % ----------------------------------------------------------------------------- 28 | theta = pinv(X' * X) * X' * y; 29 | end 30 | 31 | -------------------------------------------------------------------------------- /linear regression/jcost.m: -------------------------------------------------------------------------------- 1 | function j = jcost (X, y, theta) 2 | % ----------------------------------------------------------------------------- 3 | % Usage: j = jcost (X, y, theta) 4 | % ----------------------------------------------------------------------------- 5 | % Parameters: 6 | % X : double, size(m, n) 7 | % y : double, size(m, 1) 8 | % theta : double, size(n, 1) 9 | % ----------------------------------------------------------------------------- 10 | % Return values: 11 | % j : double, size(1, 1) 12 | % ----------------------------------------------------------------------------- 13 | % Compute cost for linear regression. 14 | % Return the cost of using theta as the parameter for linear regression to fit 15 | % the data point in X and y. 16 | % ----------------------------------------------------------------------------- 17 | % Keywords: cost function, linear regression 18 | % ----------------------------------------------------------------------------- 19 | % Examples: 20 | % X = [1 2; 1 3; 1 4; 1 5]; 21 | % y = [7; 6; 5; 4]; 22 | % theta = [0.1; 0.2]; 23 | % j = jcost (X, y, theta) 24 | % ----------------------------------------------------------------------------- 25 | % Author: leodido 26 | % ----------------------------------------------------------------------------- 27 | % Mantainer: leodido 28 | % ----------------------------------------------------------------------------- 29 | dist = hydist(X, y, theta); 30 | j = (dist' * dist) / (2 * size(X, 1)); 31 | end 32 | 33 | -------------------------------------------------------------------------------- /linear regression/hydist.m: -------------------------------------------------------------------------------- 1 | function dist = hydist(X, y, theta) 2 | % ----------------------------------------------------------------------------- 3 | % Usage: dist = hydist (X, y, theta) 4 | % ----------------------------------------------------------------------------- 5 | % Parameters: 6 | % X : double, size(m, n) 7 | % y : double, size(m, 1) 8 | % theta : double, size(n, 1) 9 | % ----------------------------------------------------------------------------- 10 | % Return values: 11 | % dist : double, size(n, 1) 12 | % ----------------------------------------------------------------------------- 13 | % Distance between hypothesis and class for linear regression. 14 | % Return the distance between the hypothesis output value and the class 15 | % using theta as the parameter for linear regression to fit 16 | % the data point in X and y. 17 | % ----------------------------------------------------------------------------- 18 | % Keywords: distance, hypothesis, linear regression 19 | % ----------------------------------------------------------------------------- 20 | % Examples: 21 | % X = [1 2; 1 3; 1 4; 1 5]; 22 | % y = [7; 6; 5; 4]; 23 | % theta = [0.1; 0.2]; 24 | % j = hydist (X, y, theta) 25 | % ----------------------------------------------------------------------------- 26 | % Author: leodido 27 | % ----------------------------------------------------------------------------- 28 | % Mantainer: leodido 29 | % ----------------------------------------------------------------------------- 30 | dist = (X * theta) .- y; 31 | end -------------------------------------------------------------------------------- /linear regression/scale.m: -------------------------------------------------------------------------------- 1 | function [XN, mu, sigma] = scale (X) 2 | % ----------------------------------------------------------------------------- 3 | % Usage: [XN, mu, sigma] = scale (X) 4 | % ----------------------------------------------------------------------------- 5 | % Parameters: 6 | % X : double, size(m, n) 7 | % ----------------------------------------------------------------------------- 8 | % Return values: 9 | % XN : double, size(m, n) 10 | % mu : double, size(1, n) 11 | % sigma : double, size(1, n) 12 | % ----------------------------------------------------------------------------- 13 | % Normalize the matrix X. 14 | % Return a scaled version of X where the mean value of each colum 15 | % is 0 and the standard deviation is 1. 16 | % ----------------------------------------------------------------------------- 17 | % Keywords: feature scaling, mean normalization 18 | % ----------------------------------------------------------------------------- 19 | % Examples: 20 | % X = [1 6; 2 7; 3 8; 4 9; 5 10]; 21 | % [XN, mu, sigma] = scale (X) 22 | % ----------------------------------------------------------------------------- 23 | % Author: leodido 24 | % ----------------------------------------------------------------------------- 25 | % Mantainer: leodido 26 | % ----------------------------------------------------------------------------- 27 | m = size(X, 1); % number of X rows 28 | mu = mean(X); % mean 29 | sigma = std(X); % standard deviation 30 | XN = (X .- repmat(mu, m, 1)) ./ repmat(sigma, m, 1); % scaled matrix 31 | end 32 | 33 | -------------------------------------------------------------------------------- /linear regression/gdescent.m: -------------------------------------------------------------------------------- 1 | function [theta, jhist] = gdescent(X, y, theta, alpha, it) 2 | % ----------------------------------------------------------------------------- 3 | % Usage: [theta, jhist] = gdescent(X, y, theta, alpha, it) 4 | % ----------------------------------------------------------------------------- 5 | % Parameters: 6 | % X : double, size(m, n) 7 | % y : double, size(m, 1) 8 | % theta : double, size(n, 1) 9 | % alpha : double, size(1, 1) 10 | % it : double, size(1, 1) 11 | % ----------------------------------------------------------------------------- 12 | % Return values: 13 | % theta : double, size(n, 1) 14 | % jhist : double, size(it, 1) 15 | % ----------------------------------------------------------------------------- 16 | % Perform gradient descent algorith to learn theta parameters. 17 | % Update theta by taking it gradient steps with learning rate alpha. 18 | % ----------------------------------------------------------------------------- 19 | % Keywords: gradient descent, linear regression 20 | % ----------------------------------------------------------------------------- 21 | % Examples: 22 | % X = [1 5; 1 2; 1 4; 1 5]; 23 | % y = [1; 6; 4; 2]; 24 | % theta = [0; 0]; 25 | % learnrate = 0.01; 26 | % numiterations = 1000; 27 | % [mintheta, jhist] = gdescent(X, y, theta, learnrate, numiterations) 28 | % ----------------------------------------------------------------------------- 29 | % Author: leodido 30 | % ----------------------------------------------------------------------------- 31 | % Mantainer: leodido 32 | % ----------------------------------------------------------------------------- 33 | jhist = zeros(it, 1); 34 | for iter = 1:it 35 | theta = theta .- alpha * 1 / size(X, 1) * X' * hydist(X, y, theta); 36 | jhist(iter, 1) = jcost(X, y, theta); 37 | end 38 | end 39 | 40 | --------------------------------------------------------------------------------