├── .gitignore
├── linear regression
    ├── tests
    │   ├── normaleq_1.m
    │   ├── normaleq_2.m
    │   ├── jcost_1.m
    │   ├── jcost_2.m
    │   ├── hydist_1.m
    │   ├── jcost_3.m
    │   ├── run.m
    │   ├── scale_vector.m
    │   ├── gdescent_1.m
    │   ├── gdescent_2.m
    │   ├── scale_matrix.m
    │   ├── gdescent_3.m
    │   ├── scale_equals.m
    │   └── scale_square_matrix.m
    ├── normaleq.m
    ├── jcost.m
    ├── hydist.m
    ├── scale.m
    └── gdescent.m
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.m~
3 | *.mex*
4 | 


--------------------------------------------------------------------------------
/linear regression/tests/normaleq_1.m:
--------------------------------------------------------------------------------
1 | function normaleq_1()
2 | 	fprintf('> normal equation\n')
3 | 	X = [1 0; 0 2];
4 | 	y = [1; 1];
5 | 	exp_theta = [1; 0.5];
6 | 	assert(normaleq(X, y), exp_theta, eps^-2);
7 | endfunction
8 | 


--------------------------------------------------------------------------------
/linear regression/tests/normaleq_2.m:
--------------------------------------------------------------------------------
1 | function normaleq_2()
2 | 	fprintf('> normal equation [non invertibility]\n')
3 | 	X = [1 2; 2 4; 4 8];
4 | 	y = [1; 1; 2];
5 | 	exp_theta = [0.10476; 0.20952];
6 | 	assert(normaleq(X, y), exp_theta, eps^-2);
7 | endfunction
8 | 


--------------------------------------------------------------------------------
/linear regression/tests/jcost_1.m:
--------------------------------------------------------------------------------
1 | function jcost_1()
2 | 	fprintf('> linear regression cost function [1]\n')
3 | 	X = [1 2 3; 1 3 4; 1 4 5; 1 5 6];
4 | 	y = [7; 6; 5; 4];
5 | 	theta = [0.1; 0.2; 0.3];
6 | 	j = jcost(X, y, theta);
7 | 	exp_j = 7.0175;
8 | 	assert(j, exp_j, eps^-2);
9 | endfunction


--------------------------------------------------------------------------------
/linear regression/tests/jcost_2.m:
--------------------------------------------------------------------------------
 1 | function jcost_2()
 2 | 	fprintf('> linear regression cost function [2]\n')
 3 | 	X = [1 2; 1 3; 1 4; 1 5];
 4 | 	y = [7; 6; 5; 4];
 5 | 	theta = [0.1; 0.2];
 6 | 	j = jcost(X, y, theta);
 7 | 	exp_j = 11.9450;
 8 | 	assert(j, exp_j, eps^-2);
 9 | endfunction
10 | 


--------------------------------------------------------------------------------
/linear regression/tests/hydist_1.m:
--------------------------------------------------------------------------------
1 | function hydist_1()
2 | 	fprintf('> hypothesis vs class distance [1]\n')
3 | 	X = [1 2 3; 1 3 4; 1 4 5; 1 5 6];
4 | 	y = [7; 6; 5; 4];
5 | 	theta = [0.1; 0.2; 0.3];
6 | 	exp_dist = [-5.6; -4.1; -2.6; -1.1];
7 | 	assert(hydist(X, y, theta), exp_dist, eps^-2);
8 | endfunction


--------------------------------------------------------------------------------
/linear regression/tests/jcost_3.m:
--------------------------------------------------------------------------------
1 | function jcost_3()
2 | 	fprintf('> linear regression cost function [3]\n')
3 | 	X = [2 1 3; 7 1 9; 1 8 1; 3 7 4];
4 | 	y = [2; 5; 5; 6];
5 | 	theta = [0.3816; 0.7655; 0.7952 ];
6 | 	j = jcost(X, y, theta);
7 | 	exp_j = 6.7273;
8 | 	assert(j, exp_j, eps^-2);
9 | endfunction


--------------------------------------------------------------------------------
/linear regression/tests/run.m:
--------------------------------------------------------------------------------
 1 | %!test scale_vector();
 2 | %!test scale_matrix();
 3 | %!test scale_square_matrix();
 4 | %!test scale_equals();
 5 | %!test hydist_1();
 6 | %!test jcost_1();
 7 | %!test jcost_2();
 8 | %!test jcost_3();
 9 | %!test gdescent_1();
10 | %!test gdescent_2();
11 | %!test gdescent_3();
12 | %!test normaleq_1();
13 | %!test normaleq_2();


--------------------------------------------------------------------------------
/linear regression/tests/scale_vector.m:
--------------------------------------------------------------------------------
 1 | function scale_vector()
 2 | 	fprintf('> feature scaling [vector]\n')
 3 |     X = [1; 2; 3];
 4 |     [XN, mu, sigma] = scale(X);
 5 |     EXP_XN = [-1; 0; 1];
 6 |     exp_mu = 2;
 7 |     exp_sigma = 1;
 8 |     assert(XN, EXP_XN, eps);
 9 |     assert(mu, exp_mu, eps);
10 |     assert(sigma, exp_sigma, eps);
11 | endfunction
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/gdescent_1.m:
--------------------------------------------------------------------------------
 1 | function gdescent_1()
 2 | 	fprintf('> gradient descent [1]\n')
 3 | 	X = [1 5; 1 2; 1 4; 1 5];
 4 | 	y = [1; 6; 4; 2];
 5 | 	theta = [0; 0];
 6 | 	alpha = 0.01;
 7 | 	iterations = 1000;
 8 | 	[theta, jhist] = gdescent(X, y, theta, alpha, iterations);
 9 | 	exp_theta = [5.21475; -0.57335];
10 | 	assert(theta, exp_theta, eps^-2);
11 | endfunction	
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/gdescent_2.m:
--------------------------------------------------------------------------------
 1 | function gdescent_2()
 2 | 	fprintf('> gradient descent [2]\n')
 3 | 	X = [1 5; 1 2; 1 4; 1 5];
 4 | 	y = [1; 6; 4; 2];
 5 | 	theta = [0; 0];
 6 | 	alpha = 0.01;
 7 | 	iterations = 1000;
 8 | 	[theta, jhist] = gdescent(X, y, theta, alpha, iterations);
 9 | 	exp_theta = [5.21475; -0.57335];
10 | 	assert(theta, exp_theta, eps^-2);
11 | endfunction	
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/scale_matrix.m:
--------------------------------------------------------------------------------
 1 | function scale_matrix()
 2 | 	fprintf('> feature scaling [matrix]\n')
 3 | 	X = [1 6; 2 4; 3 2];
 4 |     [XN, mu, sigma] = scale(X);
 5 |     EXP_XN = [-1 1; 0 0; 1 -1];
 6 |     exp_mu = [2 4];
 7 |     exp_sigma = [1 2];
 8 |     assert(XN, EXP_XN, eps);
 9 |     assert(mu, exp_mu, eps);
10 |     assert(sigma, exp_sigma, eps);
11 | endfunction
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/gdescent_3.m:
--------------------------------------------------------------------------------
 1 | function gdescent_3()
 2 | 	fprintf('> gradient descent [3]\n')
 3 | 	X = [3 5 6; 1 2 3; 9 4 2];
 4 | 	y = [1; 6; 4];
 5 | 	theta = [0; 0; 1];
 6 | 	alpha = 0.01;
 7 | 	iterations = 1000;
 8 | 	[theta, jhist] = gdescent(X, y, theta, alpha, iterations);
 9 | 	exp_theta = [1.3169; -3.2949; 2.5524];
10 | 	assert(theta, exp_theta, eps^-2);
11 | endfunction	
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/scale_equals.m:
--------------------------------------------------------------------------------
 1 | function scale_equals()
 2 | 	fprintf('> feature scaling [equals values]\n')
 3 |     X = [1 2; 1 3; 1 4];
 4 |     [XN, mu, sigma] = scale(X);
 5 |     EXP_XN = [NaN -1; NaN 0; NaN 1];
 6 |     exp_mu = [unique(X(:, 1)) 3];
 7 |     exp_sigma = [0 1];
 8 |     assert(XN, EXP_XN);
 9 |     assert(mu, exp_mu);
10 |     assert(sigma, exp_sigma);
11 | endfunction
12 | 


--------------------------------------------------------------------------------
/linear regression/tests/scale_square_matrix.m:
--------------------------------------------------------------------------------
 1 | function scale_square_matrix()
 2 | 	fprintf('> feature scaling [square matrix]\n');
 3 | 	X = [8 1 6; 3 5 7; 4 9 2];
 4 |     [XN, mu, sigma] = scale(X);
 5 |     EXP_XN = [1.13389 -1 0.37796; -0.75593 0 0.75593; -0.37796 1 -1.13389];
 6 |     exp_mu = [5 5 5];
 7 |     exp_sigma = [2.6458 4 2.6458];
 8 |     assert(XN, EXP_XN, eps^-2);
 9 |     assert(mu, exp_mu, eps^-2);
10 |     assert(sigma, exp_sigma, eps^-2);
11 | endfunction
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | What is this? [![Analytics](https://ga-beacon.appspot.com/UA-49657176-1/ml-octa-collection)](https://github.com/igrigorik/ga-beacon)
 2 | =============
 3 | 
 4 | This repository contains some GNU/Octave functions for machine learning.
 5 | 
 6 | ## Instructions
 7 | 
 8 | If you want to use the linear regression code you can simply execute:
 9 | 
10 | ```matlab
11 | addpath(genpath('path/to/linear regression'));
12 | ```
13 | 
14 | Done. You can now use the linear regression functions but also run test cases in order to check that everything works just fine:
15 | 
16 | ```matlab
17 | test('run')
18 | ```
19 | 
20 | This command will execute the [run.m](linear regression/tests/run.m) file in the [linear regression/tests](linear regression/tests/) directory.
21 | 
22 | Also, you can run demos ...
23 | 
24 | _TODO_
25 | 
26 | ## Copyright and license
27 | 
28 | The code is released under the terms of the [BSD-2 license](LICENSE).
29 | 
30 | Copyright (c) 2014 - [Leonardo Di Donato](www.github.com/leodido).
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Leonardo Di Donato
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/linear regression/normaleq.m:
--------------------------------------------------------------------------------
 1 | function [theta] = normaleq(X, y)
 2 | % -----------------------------------------------------------------------------
 3 | % Usage: theta = normaleq (X, y)
 4 | % -----------------------------------------------------------------------------
 5 | % Parameters:
 6 | % X     : double, size(m, n)
 7 | % y     : double, size(m, 1)
 8 | % -----------------------------------------------------------------------------
 9 | % Return values:
10 | % theta : double, size(n, 1)
11 | % ----------------------------------------------------------------------------- 
12 | % Compute the closed-form solution to linear regression
13 | % using normal equations.
14 | % Perform the pseudo-inverse function to bypass the inversion of
15 | % non-invertible matrices.
16 | % -----------------------------------------------------------------------------
17 | % Keywords: normal equations, linear regression
18 | % -----------------------------------------------------------------------------
19 | % Examples:
20 | % X = [1 0; 0 2];
21 | % y = [1; 1];
22 | % theta = normaleq(X, y)
23 | % -----------------------------------------------------------------------------
24 | % Author: leodido <leodidonato@gmail.com>
25 | % -----------------------------------------------------------------------------
26 | % Mantainer: leodido <leodidonato@gmail.com>
27 | % -----------------------------------------------------------------------------
28 |     theta = pinv(X' * X) * X' * y;
29 | end
30 | 
31 | 


--------------------------------------------------------------------------------
/linear regression/jcost.m:
--------------------------------------------------------------------------------
 1 | function j = jcost (X, y, theta)
 2 | % -----------------------------------------------------------------------------
 3 | % Usage: j = jcost (X, y, theta)
 4 | % -----------------------------------------------------------------------------
 5 | % Parameters:
 6 | % X     : double, size(m, n)
 7 | % y     : double, size(m, 1)
 8 | % theta : double, size(n, 1)
 9 | % -----------------------------------------------------------------------------
10 | % Return values:
11 | % j     : double, size(1, 1)
12 | % -----------------------------------------------------------------------------
13 | % Compute cost for linear regression.
14 | % Return the cost of using theta as the parameter for linear regression to fit
15 | % the data point in X and y.
16 | % -----------------------------------------------------------------------------
17 | % Keywords: cost function, linear regression
18 | % -----------------------------------------------------------------------------
19 | % Examples:
20 | % X = [1 2; 1 3; 1 4; 1 5];
21 | % y = [7; 6; 5; 4];
22 | % theta = [0.1; 0.2];
23 | % j = jcost (X, y, theta)
24 | % -----------------------------------------------------------------------------
25 | % Author: leodido <leodidonato@gmail.com>
26 | % -----------------------------------------------------------------------------
27 | % Mantainer: leodido <leodidonato@gmail.com>
28 | % -----------------------------------------------------------------------------
29 |     dist = hydist(X, y, theta);
30 |     j = (dist' * dist) / (2 * size(X, 1));
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/linear regression/hydist.m:
--------------------------------------------------------------------------------
 1 | function dist = hydist(X, y, theta)
 2 | % -----------------------------------------------------------------------------
 3 | % Usage: dist = hydist (X, y, theta)
 4 | % -----------------------------------------------------------------------------
 5 | % Parameters:
 6 | % X     : double, size(m, n)
 7 | % y     : double, size(m, 1)
 8 | % theta : double, size(n, 1)
 9 | % -----------------------------------------------------------------------------
10 | % Return values:
11 | % dist  : double, size(n, 1)
12 | % -----------------------------------------------------------------------------
13 | % Distance between hypothesis and class for linear regression.
14 | % Return the distance between the hypothesis output value and the class
15 | % using theta as the parameter for linear regression to fit
16 | % the data point in X and y.
17 | % -----------------------------------------------------------------------------
18 | % Keywords: distance, hypothesis, linear regression
19 | % -----------------------------------------------------------------------------
20 | % Examples:
21 | % X = [1 2; 1 3; 1 4; 1 5];
22 | % y = [7; 6; 5; 4];
23 | % theta = [0.1; 0.2];
24 | % j = hydist (X, y, theta)
25 | % -----------------------------------------------------------------------------
26 | % Author: leodido <leodidonato@gmail.com>
27 | % -----------------------------------------------------------------------------
28 | % Mantainer: leodido <leodidonato@gmail.com>
29 | % -----------------------------------------------------------------------------
30 | 	dist = (X * theta) .- y;
31 | end


--------------------------------------------------------------------------------
/linear regression/scale.m:
--------------------------------------------------------------------------------
 1 | function [XN, mu, sigma] = scale (X)
 2 | % -----------------------------------------------------------------------------
 3 | % Usage: [XN, mu, sigma] = scale (X)
 4 | % -----------------------------------------------------------------------------
 5 | % Parameters:
 6 | % X     : double, size(m, n)
 7 | % -----------------------------------------------------------------------------
 8 | % Return values:
 9 | % XN    : double, size(m, n)
10 | % mu    : double, size(1, n)
11 | % sigma : double, size(1, n)
12 | % -----------------------------------------------------------------------------
13 | % Normalize the matrix X.
14 | % Return a scaled version of X where the mean value of each colum
15 | % is 0 and the standard deviation is 1.
16 | % -----------------------------------------------------------------------------
17 | % Keywords: feature scaling, mean normalization
18 | % -----------------------------------------------------------------------------
19 | % Examples:
20 | % X = [1 6; 2 7; 3 8; 4 9; 5 10];
21 | % [XN, mu, sigma] = scale (X)
22 | % -----------------------------------------------------------------------------
23 | % Author: leodido <leodidonato@gmail.com>
24 | % -----------------------------------------------------------------------------
25 | % Mantainer: leodido <leodidonato@gmail.com>
26 | % -----------------------------------------------------------------------------
27 |     m = size(X, 1);                                      % number of X rows
28 |     mu = mean(X);                                        % mean
29 |     sigma = std(X);                                      % standard deviation
30 |     XN = (X .- repmat(mu, m, 1)) ./ repmat(sigma, m, 1); % scaled matrix
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/linear regression/gdescent.m:
--------------------------------------------------------------------------------
 1 | function [theta, jhist] = gdescent(X, y, theta, alpha, it)
 2 | % -----------------------------------------------------------------------------
 3 | % Usage: [theta, jhist] = gdescent(X, y, theta, alpha, it)
 4 | % -----------------------------------------------------------------------------
 5 | % Parameters:
 6 | % X     : double, size(m, n)
 7 | % y     : double, size(m, 1)
 8 | % theta : double, size(n, 1)
 9 | % alpha : double, size(1, 1)
10 | % it    : double, size(1, 1)
11 | % -----------------------------------------------------------------------------
12 | % Return values:
13 | % theta : double, size(n, 1)
14 | % jhist : double, size(it, 1)
15 | % -----------------------------------------------------------------------------
16 | % Perform gradient descent algorith to learn theta parameters.
17 | % Update theta by taking it gradient steps with learning rate alpha.
18 | % -----------------------------------------------------------------------------
19 | % Keywords: gradient descent, linear regression
20 | % -----------------------------------------------------------------------------
21 | % Examples:
22 | % X = [1 5; 1 2; 1 4; 1 5];
23 | % y = [1; 6; 4; 2];
24 | % theta = [0; 0];
25 | % learnrate = 0.01;
26 | % numiterations = 1000;
27 | % [mintheta, jhist] = gdescent(X, y, theta, learnrate, numiterations)
28 | % -----------------------------------------------------------------------------
29 | % Author: leodido <leodidonato@gmail.com>
30 | % -----------------------------------------------------------------------------
31 | % Mantainer: leodido <leodidonato@gmail.com>
32 | % -----------------------------------------------------------------------------
33 |     jhist = zeros(it, 1);
34 |     for iter = 1:it
35 |         theta = theta .- alpha * 1 / size(X, 1) * X' * hydist(X, y, theta); 
36 |         jhist(iter, 1) = jcost(X, y, theta);
37 |     end
38 | end
39 | 
40 | 


--------------------------------------------------------------------------------