├── .gitignore ├── LICENSE ├── README.md ├── demo ├── data │ ├── sklearn_data_2clusters.m │ ├── sklearn_data_3clusters.m │ └── sklearn_data_noisyplane.m ├── demo_k_neighbors_classifier.m ├── demo_kernel_ridge_regression.m ├── demo_kmeans.m ├── demo_label_binarizer.m ├── demo_logistic_regression.m ├── demo_nystroem_ridge_regression.m ├── demo_pca.m ├── demo_ridge_regression.m ├── demo_svc.m └── run_all_demos.m ├── install.m └── lib ├── base ├── BaseEstimator.m ├── ClassifierMixin.m ├── RegressorMixin.m └── TransformerMixin.m ├── cluster └── KMeans_.m ├── cross_validation └── train_test_split.m ├── decomposition └── PCA_.m ├── ensemble └── GradientBoostingRegressor.m ├── kernel_approximation └── Nystroem.m ├── kernel_ridge └── KernelRidge.m ├── linear_model ├── Lasso_.m ├── LinearRegression.m ├── LogisticRegression.m ├── Ridge.m └── log_cost_function_reg.m ├── metrics ├── accuracy_score.m ├── mean_squared_error.m ├── pairwise │ ├── euclidean_distances.m │ ├── linear_kernel.m │ ├── pairwise_kernels.m │ ├── polynomial_kernel.m │ └── rbf_kernel.m └── r2_score.m ├── neighbors └── KNeighborsClassifier.m ├── pipeline ├── Pipeline.m └── make_pipeline.m ├── preprocessing ├── FunctionTransformer.m ├── LabelBinarizer.m ├── MinMaxScaler.m └── StandardScaler.m └── svm └── SVC.m /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows default autosave extension 2 | *.asv 3 | 4 | # OSX / *nix default autosave extension 5 | *.m~ 6 | 7 | # Compiled MEX binaries (all platforms) 8 | *.mex* 9 | 10 | # Simulink Code Generation 11 | slprj/ 12 | 13 | # Session info 14 | octave-workspace 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | License for sklearn-matlab: 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2016-2018 Steven Van Vaerenbergh 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | 26 | License for scikit-learn: 27 | 28 | New BSD License 29 | 30 | Copyright (c) 2007–2018 The scikit-learn developers. 31 | All rights reserved. 32 | 33 | 34 | Redistribution and use in source and binary forms, with or without 35 | modification, are permitted provided that the following conditions are met: 36 | 37 | a. Redistributions of source code must retain the above copyright notice, 38 | this list of conditions and the following disclaimer. 39 | b. Redistributions in binary form must reproduce the above copyright 40 | notice, this list of conditions and the following disclaimer in the 41 | documentation and/or other materials provided with the distribution. 42 | c. Neither the name of the Scikit-learn Developers nor the names of 43 | its contributors may be used to endorse or promote products 44 | derived from this software without specific prior written 45 | permission. 46 | 47 | 48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 49 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 52 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 54 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 55 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 58 | DAMAGE. 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | sklearn-matlab 2 | === 3 | 4 | Machine learning in Matlab using scikit-learn syntax. 5 | -------------------------------------------------------------------------------- /demo/data/sklearn_data_2clusters.m: -------------------------------------------------------------------------------- 1 | function [X_train, X_test, y_train, y_test] = sklearn_data_2clusters() 2 | % Generate a data set containing two clusters. 3 | 4 | %% PARAMETERS 5 | 6 | n = 300; 7 | test_size = 1/3; 8 | 9 | %% GENERATE 10 | 11 | y = randn(n,1)>0; 12 | X = randn(n,2); 13 | 14 | % two clusters 15 | X(y,:) = bsxfun(@plus,X(y,:),[1 1]); 16 | X(~y,:) = bsxfun(@plus,X(~y,:),[-1 -1]); 17 | 18 | [X_train, X_test, y_train, y_test] = train_test_split(X,y,test_size); 19 | 20 | % %% OUTPUT 21 | % 22 | % figure; hold all 23 | % plot(X(y==0,1),X(y==0,2),'o') 24 | % plot(X(y==1,1),X(y==1,2),'o') 25 | -------------------------------------------------------------------------------- /demo/data/sklearn_data_3clusters.m: -------------------------------------------------------------------------------- 1 | function [X_train, X_test, y_train, y_test] = sklearn_data_3clusters() 2 | % Generate a data set containing three clusters. 3 | 4 | %% PARAMETERS 5 | 6 | n = 300; 7 | test_size = 1/3; 8 | 9 | %% GENERATE 10 | 11 | X = 0.3*randn(n,2); 12 | y = ceil(3*rand(n,1)); 13 | y(y==0) = 1; 14 | 15 | % three clusters 16 | X(y==1,:) = bsxfun(@plus,X(y==1,:),[.8 1]); 17 | X(y==2,:) = bsxfun(@plus,X(y==2,:),[-.6 -1]); 18 | X(y==3,:) = bsxfun(@plus,X(y==3,:),[-1 .7]); 19 | 20 | [X_train, X_test, y_train, y_test] = train_test_split(X,y,test_size); 21 | 22 | %% OUTPUT 23 | 24 | % figure; hold all 25 | % plot(X(y==1,1),X(y==1,2),'o') 26 | % plot(X(y==2,1),X(y==2,2),'o') 27 | % plot(X(y==3,1),X(y==3,2),'o') 28 | -------------------------------------------------------------------------------- /demo/data/sklearn_data_noisyplane.m: -------------------------------------------------------------------------------- 1 | function [X_train, X_test, y_train, y_test] = sklearn_data_noisyplane() 2 | % Generate a data set containing noisy points on a 2D plane in 3 3 | % dimensions. 4 | 5 | %% PARAMETERS 6 | 7 | n = 500; 8 | test_size = 1/3; 9 | noisepower = 1E-1; 10 | 11 | %% GENERATE 12 | 13 | X = randn(n,2); 14 | noise = sqrt(noisepower)*randn(n,1); 15 | 16 | projection = randn(2,1); 17 | projection = projection/norm(projection); 18 | y = X*projection + noise; 19 | 20 | [X_train, X_test, y_train, y_test] = train_test_split(X,y,test_size); 21 | 22 | % %% OUTPUT 23 | % 24 | % figure; 25 | % plot3(X(:,1),X(:,2),y,'.') 26 | % view([45 45 0]) 27 | % axis equal 28 | % grid on 29 | -------------------------------------------------------------------------------- /demo/demo_k_neighbors_classifier.m: -------------------------------------------------------------------------------- 1 | % K-nearest neighbors classifier demo. 2 | 3 | close all 4 | clear 5 | 6 | %% PARAMETERS 7 | 8 | test_size = 0.3; 9 | n_neighbors = 5; 10 | 11 | %% PROGRAM 12 | 13 | load fisheriris 14 | X = meas; 15 | y = species; 16 | 17 | [X_train, X_test, y_train, y_test] = train_test_split(X,y,test_size); 18 | 19 | clf = KNeighborsClassifier(struct('n_neighbors',n_neighbors)); 20 | 21 | clf.fit(X,y); 22 | 23 | y_pred = clf.predict(X_test); 24 | 25 | %% OUTPUT 26 | 27 | score = accuracy_score(y_test, y_pred); 28 | 29 | fprintf('Accuracy: %.4f%%\n',score); 30 | -------------------------------------------------------------------------------- /demo/demo_kernel_ridge_regression.m: -------------------------------------------------------------------------------- 1 | % Kernel ridge regression demo. 2 | % 3 | % This program implements the example shown in Figure 2.1 of "Kernel 4 | % Methods for Nonlinear Identification, Equalization and Separation of 5 | % Signals". 6 | % 7 | % Author: Steven Van Vaerenbergh, 2018. 8 | 9 | close all 10 | clear 11 | rng('default'); rng(1); % Reproducibility 12 | 13 | %% PARAMETERS 14 | 15 | % data 16 | n_train = 50; % number of training data points 17 | n_test = 100; % number of test data points 18 | noise_var = 0.05; % noise variance 19 | 20 | % noisy sinc function 21 | my_fun = @(x,noise_var) (sin(3*x)./x + noise_var*randn(size(x))); 22 | 23 | % kernel 24 | alpha = 1E4; % regularization constant 25 | kernel = 'rbf'; % kernel type 26 | gamma = 1; % Gaussian kernel width 27 | 28 | %% PROGRAM 29 | tic 30 | 31 | % generate train data 32 | X_train = 6*(rand(n_train,1)-0.5); % sampled data 33 | y_train = my_fun(X_train,noise_var); 34 | 35 | % generate test data 36 | X_test = linspace(-3,3,n_test)'; % input data on a grid 37 | y_test = my_fun(X_test,0); 38 | 39 | % train and test 40 | clf = KernelRidge(struct('alpha',alpha,'kernel',kernel,'gamma',gamma)); 41 | clf.fit(X_train,y_train); 42 | y_pred = clf.predict(X_test); 43 | 44 | toc 45 | %% OUTPUT 46 | 47 | figure; hold on 48 | plot(X_train,y_train,'o'); 49 | plot(X_test,y_pred,'r'); 50 | plot(X_test,y_test,'--','Color',[.5 .5 .5]) 51 | legend('noisy data','regression','true sinc function') 52 | title('Kernel ridge regression demo') 53 | -------------------------------------------------------------------------------- /demo/demo_kmeans.m: -------------------------------------------------------------------------------- 1 | % KMeans demo on dummy 2D data. 2 | 3 | close all 4 | clear 5 | 6 | [X_train, X_test, y_train, y_test] = sklearn_data_3clusters(); 7 | 8 | clf = KMeans_(struct('n_clusters',3)); 9 | clf.fit(X_train); 10 | 11 | % test clustering on test data 12 | y_est = clf.predict(X_test); 13 | 14 | f1 = figure; hold all 15 | plot(X_test(y_est==1,1),X_test(y_est==1,2),'o') 16 | plot(X_test(y_est==2,1),X_test(y_est==2,2),'o') 17 | plot(X_test(y_est==3,1),X_test(y_est==3,2),'o') 18 | -------------------------------------------------------------------------------- /demo/demo_label_binarizer.m: -------------------------------------------------------------------------------- 1 | % Label Binarizer demo. 2 | 3 | close all 4 | clear 5 | 6 | y_train = [1, 2, 6, 4, 2]; 7 | fprintf('Train labels:\n') 8 | disp(y_train) 9 | 10 | lb = LabelBinarizer(); 11 | lb.fit(y_train); 12 | fprintf('Binarized classes:\n') 13 | disp(lb.classes_) 14 | 15 | y_test = [1, 1, 2, 4, 6, 6, 1, 2, 4]; 16 | fprintf('test labels:\n') 17 | disp(y_test) 18 | 19 | y_new = lb.transform(y_test); 20 | fprintf('Transformed test labels:\n') 21 | disp(y_new) 22 | 23 | y_test_orig = lb.inverse_transform(y_new); 24 | fprintf('Recovered test labels:\n') 25 | disp(y_test_orig) 26 | -------------------------------------------------------------------------------- /demo/demo_logistic_regression.m: -------------------------------------------------------------------------------- 1 | % Logistic regression demo on dummy 2D data 2 | 3 | close all 4 | clear 5 | 6 | [X_train, X_test, y_train, y_test] = sklearn_data_2clusters(); 7 | 8 | f1 = figure; hold all 9 | plot(X_train(y_train==0,1),X_train(y_train==0,2),'o') 10 | plot(X_train(y_train==1,1),X_train(y_train==1,2),'o') 11 | title('Training data') 12 | 13 | clf = LogisticRegression; 14 | 15 | clf.fit(X_train,y_train); 16 | 17 | proba = clf.predict_proba(X_test); 18 | 19 | [perfx,perfy,T,AUC] = perfcurve(y_test,proba,true); 20 | 21 | fprintf('AUC = %.4f\n',AUC) 22 | 23 | f2 = figure; plot(perfx,perfy) 24 | xlabel('False positive rate') 25 | ylabel('True positive rate') 26 | title('ROC for Classification by Logistic Regression') 27 | -------------------------------------------------------------------------------- /demo/demo_nystroem_ridge_regression.m: -------------------------------------------------------------------------------- 1 | % Nystroem ridge regression demo. 2 | % 3 | % This program implements the example shown in Figure 2.1 of "Kernel 4 | % Methods for Nonlinear Identification, Equalization and Separation of 5 | % Signals". 6 | % 7 | % Author: Steven Van Vaerenbergh, 2019. 8 | 9 | close all 10 | clear 11 | rng('default'); rng(1); % Reproducibility 12 | 13 | %% PARAMETERS 14 | 15 | % data 16 | n_train = 5000; % number of training data points 17 | n_test = 1000; % number of test data points 18 | noise_var = 0.05; % noise variance 19 | 20 | % noisy sinc function 21 | my_fun = @(x,noise_var) (sin(3*x)./x + noise_var*randn(size(x))); 22 | 23 | % kernel 24 | alpha = 1E-2; % regularization constant 25 | kernel = 'rbf'; % kernel type 26 | gamma = 1; % Gaussian kernel width 27 | n_components = 100; % number of bases for Nystroem approximation 28 | 29 | %% PROGRAM 30 | tic 31 | 32 | % generate train data 33 | X_train = 6*(rand(n_train,1)-0.5); % sampled data 34 | y_train = my_fun(X_train,noise_var); 35 | 36 | % generate test data 37 | X_test = linspace(-3,3,n_test)'; % input data on a grid 38 | y_test = my_fun(X_test,0); 39 | 40 | % train and test 41 | feature_map = Nystroem(struct('kernel',kernel,'gamma',gamma,... 42 | 'n_components',n_components)); 43 | ridge = Ridge(struct('alpha',alpha)); 44 | clf = make_pipeline(feature_map,ridge); 45 | clf.fit(X_train,y_train); 46 | y_pred = clf.predict(X_test); 47 | 48 | toc 49 | %% OUTPUT 50 | 51 | figure; hold on 52 | plot(X_train,y_train,'.'); 53 | plot(X_test,y_pred,'r'); 54 | plot(X_test,y_test,'--','Color',[.5 .5 .5]) 55 | legend('noisy data','regression','true sinc function') 56 | title('Nystroem ridge regression demo') 57 | -------------------------------------------------------------------------------- /demo/demo_pca.m: -------------------------------------------------------------------------------- 1 | % PCA demo on dummy 2D data 2 | 3 | close all 4 | clear 5 | 6 | [X_train, X_test, y_train, y_test] = sklearn_data_2clusters(); 7 | 8 | clf = PCA_(struct('n_components',1)); 9 | clf.fit(X_train); 10 | v = clf.components; 11 | 12 | f1 = figure; hold all 13 | plot(X_train(:,1),X_train(:,2),'o') 14 | plot(5*[-v(1) v(1)],5*[-v(2) v(2)],'r') 15 | legend('Data','First principal component') 16 | -------------------------------------------------------------------------------- /demo/demo_ridge_regression.m: -------------------------------------------------------------------------------- 1 | % Ridge regression demo on dummy data. 2 | 3 | close all 4 | clear 5 | rng('default'); rng(1); % Reproducibility 6 | 7 | [X_train, X_test, y_train, y_test] = sklearn_data_noisyplane(); 8 | 9 | f1 = figure; 10 | plot3(X_train(:,1),X_train(:,2),y_train,'.') 11 | view([45 45 0]) 12 | axis equal 13 | grid on 14 | title('Training data (3D)') 15 | 16 | clf = Ridge; 17 | 18 | clf.fit(X_train,y_train); 19 | 20 | score = clf.score(X_test,y_test); 21 | 22 | fprintf('Regression score is %.2f\n',score); 23 | 24 | y_pred = clf.predict(X_test); 25 | 26 | f2 = figure; hold all 27 | plot3(X_test(:,1),X_test(:,2),y_test,'.') 28 | plot3(X_test(:,1),X_test(:,2),y_pred,'.') 29 | view([80 50 30]) 30 | axis equal 31 | grid on 32 | legend('true','predicted') 33 | -------------------------------------------------------------------------------- /demo/demo_svc.m: -------------------------------------------------------------------------------- 1 | % Support Vector Machine classifier demo. 2 | 3 | close all 4 | clear 5 | 6 | %% PARAMETERS 7 | 8 | test_size = 0.3; 9 | 10 | %% PROGRAM 11 | 12 | load fisheriris 13 | ind = ~strcmp(species,'setosa'); 14 | X = meas(ind,:); 15 | y = species(ind); 16 | 17 | [X_train, X_test, y_train, y_test] = train_test_split(X,y,test_size); 18 | 19 | clf = SVC(struct('kernel','RBF','gamma',1)); 20 | 21 | clf.fit(X_train,y_train); 22 | 23 | y_pred = clf.predict(X_test); 24 | 25 | %% OUTPUT 26 | 27 | score = accuracy_score(y_test, y_pred); 28 | 29 | fprintf('Accuracy: %.4f%%\n',score); 30 | 31 | sv = clf.model.SupportVectors; 32 | 33 | figure; hold all 34 | ind1 = strcmp(y_train,'versicolor'); 35 | ind2 = strcmp(y_train,'virginica'); 36 | plot(X_train(ind1,3),X_train(ind1,4),'.','MarkerSize',16) 37 | plot(X_train(ind2,3),X_train(ind2,4),'.','MarkerSize',16) 38 | plot(sv(:,3),sv(:,4),'ko','MarkerSize',10) 39 | legend({'versicolor','setosa','Support Vector'},'Location','best') 40 | -------------------------------------------------------------------------------- /demo/run_all_demos.m: -------------------------------------------------------------------------------- 1 | % Script to run all demos consecutively. 2 | 3 | close all 4 | clear 5 | 6 | % get list of test functions 7 | fdir = fileparts(which('run_all_demos.m')); 8 | files = dir(fullfile(fdir,'demo_*.m')); 9 | [~,allfiles] = cellfun(@fileparts, {files.name}, 'UniformOutput',0); 10 | 11 | t1 = tic; 12 | fprintf('\n') 13 | for i=1:length(allfiles) 14 | close all 15 | clear eval 16 | save(fullfile(tempdir,'temp.mat'),'i','allfiles','t1'); % memory map 17 | 18 | % run script 19 | fname_demo = allfiles{i}; 20 | fprintf('\nRunning %s\n',fname_demo); 21 | eval(fname_demo); 22 | 23 | load(fullfile(tempdir,'temp.mat')); 24 | end 25 | fprintf('\n') 26 | delete(fullfile(tempdir,'temp.mat')); 27 | toc(t1) 28 | 29 | close all 30 | 31 | fprintf('All demos completed.\n') 32 | -------------------------------------------------------------------------------- /install.m: -------------------------------------------------------------------------------- 1 | % Installation file. Adds local folders to path. 2 | 3 | fprintf('Adding sklearn-matlab folders to Matlab path... ') 4 | 5 | % add lib/ with subfolders 6 | addpath(genpath(fullfile(pwd,'lib'))); 7 | 8 | % add data/ folder 9 | addpath(fullfile(pwd,'demo/data/')); 10 | 11 | fprintf('done.\n') 12 | disp('Type "savepath" if you wish to store the changes.') 13 | % savepath; 14 | -------------------------------------------------------------------------------- /lib/base/BaseEstimator.m: -------------------------------------------------------------------------------- 1 | classdef BaseEstimator < handle 2 | % Base class for all estimators in scikit-learn. 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | end 7 | 8 | properties (GetAccess = 'public', SetAccess = 'private') 9 | % attributes 10 | end 11 | 12 | methods 13 | % get parameter names for the estimator 14 | function names = get_param_names(obj) 15 | names = fieldnames(obj); 16 | end 17 | 18 | % get parameters 19 | function params = get_params(obj) 20 | params = struct; 21 | for fn = fieldnames(obj)' 22 | params.(fn{1}) = obj.(fn{1}); 23 | end 24 | end 25 | 26 | % set parameters 27 | function set_params(obj,params) 28 | if (nargin > 0) % copy valid parameters 29 | for fn = fieldnames(params)' 30 | if ismember(fn{1},fieldnames(obj)) 31 | values = params.(fn{1}); 32 | obj.(fn{1}) = values; 33 | else 34 | warning('Unknown parameter: %s.',fn{1}); 35 | end 36 | end 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /lib/base/ClassifierMixin.m: -------------------------------------------------------------------------------- 1 | classdef ClassifierMixin < handle 2 | % Mixin class for all classifier estimators in sklearn-matlab. 3 | 4 | properties 5 | estimator_type = 'classifier'; 6 | end 7 | 8 | methods 9 | 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/base/RegressorMixin.m: -------------------------------------------------------------------------------- 1 | classdef RegressorMixin < handle 2 | % Mixin class for all regression estimators in sklearn-matlab. 3 | 4 | properties 5 | estimator_type = 'regressor'; 6 | end 7 | 8 | methods 9 | 10 | function R2 = score(obj, X, y, sample_weight) 11 | % Returns the coefficient of determination R^2 of the 12 | % prediction. The coefficient R^2 is defined as (1 - u/v), 13 | % where u is the residual sum of squares sum((y_true - 14 | % y_pred).^2) and v is the total sum of squares sum((y_true - 15 | % mean(y_true)).^2). The best possible score is 1.0 and it can 16 | % be negative (because the model can be arbitrarily worse). A 17 | % constant model that always predicts the expected value of y, 18 | % disregarding the input features, would get a R^2 score of 19 | % 0.0. 20 | % 21 | % Inputs: 22 | % - X: Test samples. Shape: n_samples * n_features. 23 | % - y: True values for X. Shape: n_samples * 1. 24 | % - sample_weight: Weights. Shape: n_samples * 1. Optional. 25 | % 26 | % Output: 27 | % - score: R^2 of obj.predict(X) wrt. y. 28 | 29 | if nargin<4 30 | sample_weight = []; 31 | end 32 | 33 | y_pred = obj.predict(X); 34 | R2 = r2_score(y, y_pred, sample_weight); 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/base/TransformerMixin.m: -------------------------------------------------------------------------------- 1 | classdef TransformerMixin < handle 2 | % Mixin class for all transformers in scikit-learn. 3 | 4 | methods 5 | % Fit to data, then transform it. 6 | function X_new = fit_transform(obj,X,y) 7 | if nargin>2 8 | % fit method of arity 1 (unsupervised transformation) 9 | obj.fit(X); 10 | else 11 | obj.fit(X,y); 12 | end 13 | X_new = obj.transform(X); 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/cluster/KMeans_.m: -------------------------------------------------------------------------------- 1 | classdef KMeans_ < BaseEstimator & TransformerMixin 2 | % The KMeans algorithm clusters data by trying to separate samples in n 3 | % groups of equal variance, minimizing a criterion known as the inertia 4 | % or within-cluster sum-of-squares. This algorithm requires the number 5 | % of clusters to be specified. 6 | 7 | properties (GetAccess = 'public', SetAccess = 'public') 8 | % parameters 9 | n_clusters = 8; % The number of clusters and centroids. 10 | max_iter = 300; % Maximum number of iterations for a single run. 11 | n_init = 10; % Number of time the k-means algorithm will be run 12 | % with different centroid seeds. The final results will be the best 13 | % output of n_init consecutive runs in terms of inertia. 14 | n_jobs = 1; % The number of jobs to use for the computation. 15 | end 16 | 17 | properties (GetAccess = 'public', SetAccess = 'private') 18 | % attributes 19 | cluster_centers_; % Coordinates of cluster centers 20 | labels_; % Labels of each point 21 | end 22 | 23 | methods 24 | % constructor 25 | function obj = KMeans_(params) 26 | if nargin>0 27 | obj.set_params(params) 28 | end 29 | end 30 | 31 | % Compute k-means clustering. 32 | function fit(obj,X, ~) 33 | stream = RandStream('mlfg6331_64'); % Random number stream 34 | opts = statset('UseParallel',true,... 35 | 'UseSubstreams',true,... 36 | 'Streams',stream); 37 | 38 | [idx, C] = kmeans(X, obj.n_clusters,... 39 | 'Distance','cityblock',... 40 | 'Replicates',obj.n_init,... 41 | 'MaxIter',1000,... 42 | 'Display','final',... 43 | 'Options',opts); 44 | 45 | obj.cluster_centers_ = C; 46 | obj.labels_ = idx; 47 | end 48 | 49 | % Compute cluster centers and predict cluster index for each 50 | % sample. 51 | function labels = fit_predict(obj,X, ~) 52 | obj.fit(X); 53 | labels = obj.predict(X); 54 | end 55 | 56 | % Predict the closest cluster each sample in X belongs to. 57 | function labels = predict(obj,X,~) 58 | X_new = obj.transform(X); 59 | [~,labels] = min(X_new,[],2); 60 | end 61 | 62 | % Transform X to a cluster-distance space. In the new space, each 63 | % dimension is the distance to the cluster centers. 64 | function X_new = transform(obj,X,~) 65 | X_new = euclidean_distances(X,obj.cluster_centers_,true); 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/cross_validation/train_test_split.m: -------------------------------------------------------------------------------- 1 | function [X_train, X_test, y_train, y_test] = train_test_split(X,y,... 2 | test_size,random_state) 3 | 4 | if nargin<3 5 | test_size = 1/3; 6 | end 7 | 8 | if nargin>3 9 | rng('default'); 10 | rng(random_state); 11 | end 12 | 13 | n = size(X,1); 14 | n_test = floor(test_size*n); 15 | n_train = n - n_test; 16 | 17 | indp = randperm(n); 18 | ind_train = indp(1:n_train); 19 | ind_test = indp(n_train+1:n); 20 | 21 | X_train = X(ind_train,:); 22 | X_test = X(ind_test,:); 23 | y_train = y(ind_train,:); 24 | y_test = y(ind_test,:); 25 | -------------------------------------------------------------------------------- /lib/decomposition/PCA_.m: -------------------------------------------------------------------------------- 1 | classdef PCA_ < BaseEstimator & TransformerMixin 2 | % Principal component analysis (PCA). 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | n_components = 2; % Number of components to keep. 7 | end 8 | 9 | properties (GetAccess = 'public', SetAccess = 'private') 10 | % attributes 11 | components; % Principal axes in feature space. 12 | end 13 | 14 | methods 15 | % constructor 16 | function obj = PCA_(params) 17 | if nargin>0 18 | obj.set_params(params) 19 | end 20 | end 21 | 22 | % Fit the model with X. 23 | function fit(obj,X,~) 24 | obj.components = pca(X,'NumComponents',obj.n_components); 25 | end 26 | 27 | % Apply the dimensionality reduction on X. 28 | function X_new = transform(obj,X) 29 | X_new = X*obj.components; 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/ensemble/GradientBoostingRegressor.m: -------------------------------------------------------------------------------- 1 | classdef GradientBoostingRegressor < BaseEstimator & RegressorMixin 2 | % Gradient Boosting for regression. 3 | % 4 | % GB builds an additive model in a forward stage-wise fashion; it 5 | % allows for the optimization of arbitrary differentiable loss 6 | % functions. In each stage a regression tree is fit on the negative 7 | % gradient of the given loss function. 8 | 9 | properties (GetAccess = 'public', SetAccess = 'public') 10 | % parameters 11 | 12 | % Loss function to be optimized. 'ls' refers to least squares 13 | % regression. 14 | loss = 'ls'; 15 | 16 | n_estimators = 100; % The number of boosting stages to perform. 17 | end 18 | 19 | properties (GetAccess = 'public', SetAccess = 'private') 20 | % attributes 21 | regressor; % Matlab ensemble object 22 | end 23 | 24 | methods 25 | % constructor 26 | function obj = GradientBoostingRegressor(params) 27 | if nargin>0 28 | obj.set_params(params) 29 | end 30 | end 31 | 32 | % fit the gradient boosting model 33 | function fit(obj,X,y) 34 | 35 | loss_ = 'LSBoost'; 36 | switch obj.loss 37 | case 'ls' 38 | loss_ = 'LSBoost'; 39 | case 'bag' 40 | loss_ = 'Bag'; 41 | end 42 | 43 | % train 44 | regressor_ = fitensemble(X, y, loss_,... 45 | obj.n_estimators, 'Tree', 'Type', 'regression'); 46 | 47 | % store 48 | obj.regressor = compact(regressor_); 49 | end 50 | 51 | % predict regression target for X 52 | function C = predict(obj,X) 53 | C = obj.regressor.predict(X); 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /lib/kernel_approximation/Nystroem.m: -------------------------------------------------------------------------------- 1 | classdef Nystroem < BaseEstimator & TransformerMixin 2 | % Nystroem method: Approximate a kernel map using a subset of the 3 | % training data. 4 | 5 | properties (GetAccess = 'public', SetAccess = 'public') 6 | % parameters 7 | kernel = 'rbf'; % Kernel mapping. String or callable. 8 | gamma; % Gamma parameter for the RBF and similar kernels. 9 | coef0; % Zero coefficient for polynomial and sigmoid kernels. 10 | degree; % Degree of the polynomial kernel. 11 | kernel_params; % Parameters for callable kernel function. Optional. 12 | n_components = 100; % Number of components to keep. 13 | random_state; % Seed used by the random number generator. Integer. 14 | end 15 | 16 | properties (GetAccess = 'public', SetAccess = 'private') 17 | % attributes 18 | components_; % Subset of training points to construct feature map. 19 | component_indices_; % Indices of components_ in the training set. 20 | normalization_; % Normalization matrix needed for embedding. Square 21 | % root of the kernel matrix on components_. 22 | end 23 | 24 | methods 25 | % constructor 26 | function obj = Nystroem(params) 27 | if nargin>0 28 | obj.set_params(params) 29 | end 30 | end 31 | 32 | % Fit estimator to data. 33 | function fit(obj,X,~) 34 | if ~isempty(obj.random_state) 35 | rng('default') 36 | rng(obj.random_state); 37 | end 38 | n_samples = size(X,1); 39 | 40 | % get basis vectors 41 | if obj.n_components > n_samples 42 | n_components_ = n_samples; 43 | else 44 | n_components_ = obj.n_components; 45 | end 46 | 47 | inds = randperm(n_samples); 48 | basis_inds = inds(1:n_components_); 49 | basis = X(basis_inds,:); 50 | 51 | basis_kernel = pairwise_kernels(basis, basis, obj.kernel,... 52 | obj.get_kernel_params()); 53 | 54 | % sqrt of kernel matrix on basis vectors 55 | [U,S,V] = svd(basis_kernel); 56 | S = diag(max(diag(S), 1e-12)); 57 | 58 | obj.normalization_ = transpose(U/sqrt(S))*V; 59 | obj.components_ = basis; 60 | obj.component_indices_ = inds; 61 | end 62 | 63 | % Apply feature map to X. 64 | function X_new = transform(obj,X) 65 | 66 | embedded = pairwise_kernels(X, obj.components_,... 67 | obj.kernel,obj.get_kernel_params()); 68 | X_new = embedded*transpose(obj.normalization_); 69 | end 70 | 71 | function params = get_kernel_params(obj) 72 | if isa(obj.kernel,'function_handle') 73 | params = obj.kernel_params; 74 | else 75 | params = struct(... 76 | 'gamma',obj.gamma,... 77 | 'degree',obj.degree,... 78 | 'coef0',obj.coef0); 79 | end 80 | end 81 | end 82 | end 83 | -------------------------------------------------------------------------------- /lib/kernel_ridge/KernelRidge.m: -------------------------------------------------------------------------------- 1 | classdef KernelRidge < BaseEstimator & RegressorMixin 2 | % Kernel ridge regression. 3 | % 4 | % Kernel ridge regression (KRR) combines ridge regression (linear least 5 | % squares with L2-norm regularization) with the kernel trick. It thus 6 | % learns a linear function in the space induced by the respective 7 | % kernel and the data. For non-linear kernels, this corresponds to a 8 | % non-linear function in the original space. 9 | % 10 | % The form of the model learned by KRR is identical to support vector 11 | % regression (SVR). However, different loss functions are used: KRR 12 | % uses squared error loss while support vector regression uses 13 | % epsilon-insensitive loss, both combined with L2 regularization. In 14 | % contrast to SVR, fitting a KRR model can be done in closed-form and 15 | % is typically faster for medium-sized datasets. On the other hand, 16 | % the learned model is non-sparse and thus slower than SVR, which 17 | % learns a sparse model for epsilon > 0, at prediction-time. 18 | 19 | properties (GetAccess = 'public', SetAccess = 'public') 20 | % parameters 21 | alpha = 1; % Regularization. Corresponds to C^-1. 22 | kernel = 'linear'; % Kernel mapping. String or callable. 23 | gamma = []; % Gamma parameter for the RBF and similar kernels. 24 | degree = 3; % Degree of the polynomial kernel. 25 | coef0 = 1; % Zero coefficient for polynomial and sigmoid kernels. 26 | kernel_params; % Parameters for callable kernel function. Optional. 27 | end 28 | 29 | properties (GetAccess = 'public', SetAccess = 'private') 30 | % attributes 31 | dual_coef_; % Representation of weight vector(s) in kernel space. 32 | X_fit_; % Stored training data. Shape: n_samples * n_features. 33 | end 34 | 35 | methods 36 | % constructor 37 | function obj = KernelRidge(params) 38 | if nargin>0 39 | obj.set_params(params) 40 | end 41 | end 42 | 43 | % Fit the kernel ridge regression model 44 | function fit(obj,X,y) 45 | % TODO: replace by Cholesky 46 | 47 | I = eye(size(X,1)); 48 | K = obj.get_kernel(X, X); 49 | 50 | obj.dual_coef_ = (K+I/obj.alpha)\y; 51 | obj.X_fit_ = X; 52 | end 53 | 54 | % Predict using the kernel ridge model 55 | function C = predict(obj,X) 56 | K = obj.get_kernel(X, obj.X_fit_); 57 | C = K*obj.dual_coef_; 58 | end 59 | 60 | function K = get_kernel(obj, X, Y) 61 | if isa(obj.kernel,'function_handle') 62 | params = obj.kernel_params; 63 | else 64 | params = struct(... 65 | 'gamma',obj.gamma,... 66 | 'degree',obj.degree,... 67 | 'coef0',obj.coef0); 68 | end 69 | K = pairwise_kernels(X, Y, obj.kernel, params); 70 | end 71 | end 72 | end -------------------------------------------------------------------------------- /lib/linear_model/Lasso_.m: -------------------------------------------------------------------------------- 1 | classdef Lasso_ < BaseEstimator 2 | % Linear Model trained with L1 prior as regularizer. 3 | % 4 | % Requires the statistics toolbox 5 | 6 | properties (GetAccess = 'public', SetAccess = 'public') 7 | % parameters 8 | alpha = 1E-2; % regularization 9 | end 10 | 11 | properties (GetAccess = 'public', SetAccess = 'private') 12 | % attributes 13 | coef_; % coefficients 14 | intercept_; % independent term in decision function. 15 | end 16 | 17 | methods 18 | % constructor 19 | function obj = Lasso_(params) 20 | if nargin>0 21 | obj.set_params(params) 22 | end 23 | end 24 | 25 | function fit(obj,X,y,~) 26 | [coef, info] = lasso(X, y, 'Standardize', false,... 27 | 'Lambda', obj.alpha); 28 | obj.coef_ = coef; 29 | obj.intercept_ = info.Intercept; 30 | end 31 | 32 | function proba = predict_proba(obj,X,~) 33 | proba = X*obj.coef_ + obj.intercept_; 34 | end 35 | 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/linear_model/LinearRegression.m: -------------------------------------------------------------------------------- 1 | classdef LinearRegression < BaseEstimator & RegressorMixin 2 | % Ordinary least squares Linear Regression. 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | end 7 | 8 | properties (GetAccess = 'public', SetAccess = 'private') 9 | % attributes 10 | coef_; % coefficients for the linear regression problem 11 | intercept_; % independent term in the linear model 12 | end 13 | 14 | methods 15 | % constructor 16 | function obj = LinearRegression(params) 17 | if nargin>0 18 | obj.set_params(params) 19 | end 20 | end 21 | 22 | % Fit linear model. 23 | function fit(obj,X,y,~) 24 | ab = [X ones(size(X,1),1)]\y; 25 | obj.coef_ = ab(1:end-1); 26 | obj.intercept_ = ab(2); 27 | end 28 | 29 | % Predict using the linear model. 30 | function C = predict(obj,X) 31 | C = X*obj.coef_ + obj.intercept_; 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/linear_model/LogisticRegression.m: -------------------------------------------------------------------------------- 1 | classdef LogisticRegression < BaseEstimator 2 | 3 | properties 4 | reg = 1E-2; % regularization 5 | coef_; % coefficients 6 | sample_weights; % weights for individual samples 7 | end 8 | 9 | methods 10 | % constructor 11 | function obj = LogisticRegression(params) 12 | if nargin>0 13 | obj.set_params(params) 14 | end 15 | end 16 | 17 | function fit(obj,X,Y,~) 18 | % initialize as regression solution 19 | lambda = obj.reg; 20 | w = (X'*X+lambda*eye(size(X,2)))\X'*Y; 21 | 22 | if isempty(obj.sample_weights) 23 | obj.sample_weights = ones(size(X,1),1); 24 | end 25 | sw = obj.sample_weights; 26 | 27 | % logistic regression 28 | options = optimset('GradObj', 'on', 'MaxIter', 400,... 29 | 'Display', 'off'); 30 | [w, J, exit_flag] = ... 31 | fminunc(@(t)(log_cost_function_reg(t,X,Y,lambda,sw)),... 32 | w, options); %#ok 33 | 34 | obj.coef_ = w(:); 35 | end 36 | 37 | function proba = predict_proba(obj,X,~) 38 | proba = 1 ./ (1 + exp(-X*obj.coef_)); % sigmoid 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /lib/linear_model/Ridge.m: -------------------------------------------------------------------------------- 1 | classdef Ridge < BaseEstimator & RegressorMixin 2 | % Linear least squares with L2 regularization. 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | alpha = 1E2; % Regularization. Corresponds to C^-1. 7 | end 8 | 9 | properties (GetAccess = 'public', SetAccess = 'private') 10 | % attributes 11 | coef_; % Weight vectors 12 | end 13 | 14 | methods 15 | % constructor 16 | function obj = Ridge(params) 17 | if nargin>0 18 | obj.set_params(params) 19 | end 20 | end 21 | 22 | % Fit Ridge regression model 23 | function fit(obj,X,y) 24 | lambda = 1/obj.alpha; 25 | w = (X'*X+lambda*eye(size(X,2)))\X'*y; 26 | 27 | obj.coef_ = w(:); 28 | end 29 | 30 | function C = predict(obj,X) 31 | C = X*obj.coef_; 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/linear_model/log_cost_function_reg.m: -------------------------------------------------------------------------------- 1 | function [J, grad] = log_cost_function_reg(theta, X, y, lambda, weights) 2 | % LOG_COST_FUNCTION_REG Compute cost and gradient for logistic regression 3 | % with regularization. 4 | % J = LOG_COST_FUNCTION_REG(theta, X, y, lambda) computes the cost of 5 | % using theta as the parameter for regularized logistic regression and 6 | % the gradient of the cost w.r.t. to the parameters. 7 | % w is optional sample weight. 8 | 9 | % Initialize some useful values 10 | m = length(y); % number of training examples 11 | 12 | if nargin<5 13 | weights = ones(m,1); 14 | end 15 | 16 | % Compute the cost of a particular choice of theta. 17 | 18 | h = 1 ./ (1 + exp(-X*theta)); % sigmoid of X*theta 19 | 20 | theta_reg = theta; 21 | theta_reg(1) = 0; % exclude theta(1) from regularization 22 | 23 | % cost 24 | J = 1/m*(-y'*(log(h).*weights) - (1-y')*((log(1-h)).*weights)) + ... 25 | lambda/2/m*(theta_reg'*theta_reg); 26 | 27 | % gradient 28 | grad = 1/m*X'*((h-y).*weights) + lambda/m*theta_reg; 29 | end 30 | -------------------------------------------------------------------------------- /lib/metrics/accuracy_score.m: -------------------------------------------------------------------------------- 1 | function score = accuracy_score(y_true, y_pred, normalize, sample_weight) 2 | % Accuracy classification score. 3 | % 4 | % In multilabel classification, this function computes subset accuracy: the 5 | % set of labels predicted for a sample must *exactly* match the 6 | % corresponding set of labels in y_true. 7 | % 8 | % Inputs: 9 | % - y_true: Ground truth (correct) labels. 10 | % - y_pred: Predicted labels, as returned by a classifier. 11 | % - normalize: If False, return the number of correctly classified 12 | % samples. Otherwise, return the fraction of correctly classified 13 | % samples. default=True 14 | % - sample_weight: Sample weights. Shape: n_samples * 1. Optional. 15 | % 16 | % Outputs: 17 | % - score: If normalize == True, return the correctly classified samples 18 | % (float), else it returns the number of correctly classified samples 19 | % (int). The best performance is 1 with ``normalize == True`` and the 20 | % number of samples with ``normalize == False``. 21 | 22 | if nargin<3 23 | normalize = true; 24 | end 25 | if nargin<4 26 | sample_weight = ones(size(y_true)); 27 | end 28 | 29 | if isa(y_true,'cell') 30 | sample_score = cellfun(@isequal,y_true,y_pred); 31 | else 32 | sample_score = y_true == y_pred; 33 | end 34 | 35 | score = weighted_sum_(sample_score, sample_weight, normalize); 36 | 37 | function weighted_sum = weighted_sum_(sample_score, sample_weight, normalize) 38 | if normalize 39 | weighted_sum = mean(sample_score.*sample_weight); 40 | elseif ~isempty(sample_weight) 41 | weighted_sum = sample_score'*sample_weight; 42 | else 43 | weighted_sum = sum(sample_score); 44 | end 45 | -------------------------------------------------------------------------------- /lib/metrics/mean_squared_error.m: -------------------------------------------------------------------------------- 1 | function output_errors = mean_squared_error(y_true, y_pred, sample_weight) 2 | % Mean squared error regression loss. 3 | % 4 | % Inputs: 5 | % - y_true: Ground truth target values. Shape: n_samples * n_outputs. 6 | % - y_true: Estimated target values. Shape: n_samples * n_outputs. 7 | % - sample_weight: Sample weights. Shape: n_samples * 1. Optional. 8 | % 9 | % Output: 10 | % - output_errors: Array of MSE values. Shape: n_samples * 1. 11 | 12 | err = y_true - y_pred; 13 | 14 | if nargin < 3 15 | output_errors = mean(err(:).^2); 16 | else 17 | err = bsxfun(@times,err,sample_weight); 18 | output_errors = mean(err(:).^2); 19 | end 20 | -------------------------------------------------------------------------------- /lib/metrics/pairwise/euclidean_distances.m: -------------------------------------------------------------------------------- 1 | function distances = euclidean_distances(X, Y, squared) 2 | % Considering the rows of X (and Y) as vectors, compute the distance matrix 3 | % between each pair of vectors. 4 | 5 | if nargin<3 6 | squared = false; 7 | end 8 | 9 | n_samples_X = size(X,1); 10 | n_samples_Y = size(Y,1); 11 | 12 | norms_X = sum(X.^2,2); 13 | norms_Y = sum(Y.^2,2); 14 | 15 | dot_XX = repmat(norms_X,1,n_samples_Y); 16 | dot_YY = repmat(norms_Y.',n_samples_X,1); 17 | 18 | distances_squared = dot_XX + dot_YY - 2*X*Y.'; 19 | 20 | if squared 21 | distances = distances_squared; 22 | else 23 | distances = sqrt(distances_squared); 24 | end 25 | -------------------------------------------------------------------------------- /lib/metrics/pairwise/linear_kernel.m: -------------------------------------------------------------------------------- 1 | function K = linear_kernel(X, Y, ~) 2 | % Compute the linear kernel between X and Y. 3 | % 4 | % Inputs: 5 | % - X: Shape: n_samples_X * n_features. 6 | % - Y: Shape: n_samples_Y * n_features. 7 | % 8 | % Output: 9 | % - K: Kernel matrix. Shape: n_samples_X * n_samples_Y. 10 | 11 | K = X*Y'; 12 | -------------------------------------------------------------------------------- /lib/metrics/pairwise/pairwise_kernels.m: -------------------------------------------------------------------------------- 1 | function K = pairwise_kernels(X, Y, metric, kwds) 2 | % Compute the kernel between arrays X and optional array Y. 3 | % 4 | % This method takes either a vector array or a kernel matrix, and returns a 5 | % kernel matrix. If the input is a vector array, the kernels are computed. 6 | % If the input is a kernel matrix, it is returned instead. 7 | % 8 | % This method provides a safe way to take a kernel matrix as input, while 9 | % preserving compatibility with many other algorithms that take a vector 10 | % array. 11 | % 12 | % If Y is given (default is empty), then the returned matrix is the 13 | % pairwise kernel between the arrays from both X and Y. 14 | % 15 | % Valid values for metric are: 16 | % ['rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine'] 17 | % 18 | % Inputs: 19 | % - X: Array of pairwise kernels between samples, or a feature array. 20 | % Shape: n_samples * n_features, or n_samples * n_samples if precomputed. 21 | % - Y: A second feature array only if X has shape n_samples_a * n_features. 22 | % - metric: The metric to use when calculating kernel between instances in 23 | % a feature array. If metric is a string, it must be one of the metrics 24 | % in pairwise_kernel_functions. If metric is "precomputed", X is assumed 25 | % to be a kernel matrix. Alternatively, if metric is a callable 26 | % function, it is called on each pair of instances (rows) and the 27 | % resulting value recorded. The callable should take two arrays from X as 28 | % input and return a value indicating the distance between them. 29 | % - kwds: Optional keyword parameters. 30 | % 31 | % Output: 32 | % - K: A kernel matrix K such that K[i, j] is the kernel between the i-th 33 | % and j-th vectors of the given matrix X, if Y is empty. If Y is not 34 | % empty, then K[i, j] is the kernel between the i-th array from X and the 35 | % j-th 36 | % array from Y. 37 | 38 | pairwise_kernel_functions = struct(... 39 | ...'additive_chi2', @additive_chi2_kernel,... 40 | ...'chi2', @chi2_kernel,... 41 | 'linear', @linear_kernel,... 42 | 'rbf', @rbf_kernel,... 43 | ...'laplacian', @laplacian_kernel,... 44 | ...'sigmoid', @sigmoid_kernel,... 45 | ...'cosine', @cosine_similarity 46 | 'polynomial', @polynomial_kernel,... 47 | 'poly', @polynomial_kernel); 48 | 49 | % If metric is 'precomputed', Y is ignored and X is returned. 50 | if strcmp(metric,'precomputed') 51 | K = X; 52 | return 53 | elseif isa(metric,'function_handle') 54 | my_fun = metric; 55 | elseif ismember(metric,fieldnames(pairwise_kernel_functions)) 56 | my_fun = pairwise_kernel_functions.(metric); 57 | else 58 | error('Unknown kernel') 59 | end 60 | 61 | K = my_fun(X,Y,kwds); 62 | -------------------------------------------------------------------------------- /lib/metrics/pairwise/polynomial_kernel.m: -------------------------------------------------------------------------------- 1 | function K = polynomial_kernel(X, Y, degree, gamma, coef0) 2 | % Compute the polynomial kernel between X and Y: 3 | % 4 | % K(x, y) = (gamma*X*Y' + coef0).^degree 5 | % 6 | % for each pair of rows x in X and y in Y. 7 | % 8 | % Inputs: 9 | % - X: Shape: n_samples_X * n_features. 10 | % - Y: Shape: n_samples_Y * n_features. 11 | % - degree: Polynomial degree. If degree is a struct it is a set of 12 | % parameters that overwrite degree, gamma and coef. 13 | % - gamma: Kernel parameter. Defaults to 1/n_features. 14 | % - coef0: Additive constant. 15 | % 16 | % Output: 17 | % - K: kernel matrix. Shape: n_samples_X * n_samples_Y. 18 | 19 | if nargin<3 20 | degree = 3; 21 | end 22 | if nargin<4 23 | gamma = []; 24 | end 25 | if isempty(gamma) 26 | n_features = size(X,2); 27 | gamma = 1/n_features; 28 | end 29 | if nargin<5 30 | coef0 = 1; 31 | end 32 | 33 | if isa(degree,'struct') 34 | if ismember('gamma',fieldnames(degree)) 35 | gamma = degree.gamma; 36 | end 37 | if ismember('coef0',fieldnames(degree)) 38 | coef0 = degree.coef0; 39 | end 40 | if ismember('degree',fieldnames(degree)) 41 | degree = degree.degree; 42 | else 43 | degree = 3; 44 | end 45 | end 46 | 47 | K = (gamma*X*Y' + coef0).^degree; 48 | -------------------------------------------------------------------------------- /lib/metrics/pairwise/rbf_kernel.m: -------------------------------------------------------------------------------- 1 | function K = rbf_kernel(X, Y, gamma) 2 | % Compute the rbf (Gaussian) kernel between X and Y: 3 | % 4 | % K(x, y) = exp(-gamma * (x-y).^2) 5 | % 6 | % for each pair of rows x in X and y in Y. 7 | % 8 | % Inputs: 9 | % - X: Shape: n_samples_X * n_features. 10 | % - Y: Shape: n_samples_Y * n_features. 11 | % - gamma: Kernel parameter. Defaults to 1/n_features. Either a string or a 12 | % struct that contains the field gamma. 13 | % 14 | % Output: 15 | % - K: Kernel matrix. Shape: n_samples_X * n_samples_Y. 16 | 17 | n_features = size(X,2); 18 | 19 | % retrieve gamma 20 | if nargin<3 21 | gamma = 1/n_features; 22 | elseif isa(gamma,'struct') 23 | if ismember('gamma',fieldnames(gamma)) 24 | gamma = gamma.gamma; 25 | else 26 | gamma = 1/n_features; 27 | end 28 | end 29 | 30 | K = exp(-gamma*euclidean_distances(X,Y,true)); 31 | -------------------------------------------------------------------------------- /lib/metrics/r2_score.m: -------------------------------------------------------------------------------- 1 | function z = r2_score(y_true,y_pred,sample_weight) 2 | % R^2 (coefficient of determination) regression score function. Best 3 | % possible score is 1.0 and it can be negative (because the model can be 4 | % arbitrarily worse). A constant model that always predicts the expected 5 | % value of y, disregarding the input features, would get a R^2 score of 6 | % 0.0. 7 | % 8 | % Inputs: 9 | % - y_true: Ground truth (correct) target values. Shape: n_samples * 10 | % n_outputs. 11 | % - y_pred: Estimated target values. Shape: n_samples * n_outputs. 12 | % - sample_weight: Sample weights. Shape: n_samples * 1. Optional. 13 | % 14 | % Output: 15 | % - output_scores: The R^2 score. 16 | 17 | if nargin<3 18 | sample_weight = []; 19 | end 20 | if isempty(sample_weight) 21 | sample_weight = ones(size(y_true,1),1); 22 | end 23 | 24 | numerator = sum(bsxfun(@times,(y_true-y_pred).^2,sample_weight),1); 25 | denominator = sum(bsxfun(@times,(y_true-mean(y_true)).^2,sample_weight),1); 26 | 27 | nonzero_denominator = denominator ~= 0; 28 | nonzero_numerator = numerator ~= 0; 29 | valid_score = nonzero_denominator & nonzero_numerator; 30 | 31 | output_scores = ones(size(y_true,2),1); 32 | 33 | output_scores(valid_score) = 1 - ... 34 | numerator(valid_score)./denominator(valid_score); 35 | 36 | z = mean(output_scores); 37 | -------------------------------------------------------------------------------- /lib/neighbors/KNeighborsClassifier.m: -------------------------------------------------------------------------------- 1 | classdef KNeighborsClassifier < BaseEstimator & ClassifierMixin 2 | % Classifier implementing the k-nearest neighbors vote. 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | n_neighbors = 5; % Number of neighbors to use. 7 | end 8 | 9 | properties (GetAccess = 'public', SetAccess = 'private') 10 | % attributes 11 | model; 12 | end 13 | 14 | methods 15 | % constructor 16 | function obj = KNeighborsClassifier(params) 17 | if nargin>0 18 | obj.set_params(params) 19 | end 20 | end 21 | 22 | % Fit the model. 23 | function fit(obj,X,y,~) 24 | obj.model = fitcknn(X,y,'NumNeighbors',obj.n_neighbors); 25 | end 26 | 27 | % Predict using the model. 28 | function C = predict(obj,X) 29 | C = predict(obj.model,X); 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/pipeline/Pipeline.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/steven2358/sklearn-matlab/1687ef5802032e4e0982cf7f796036ecb165647c/lib/pipeline/Pipeline.m -------------------------------------------------------------------------------- /lib/pipeline/make_pipeline.m: -------------------------------------------------------------------------------- 1 | function p = make_pipeline(varargin) 2 | % Construct a Pipeline from the given estimators. 3 | % 4 | % This is a shorthand for the Pipeline constructor; it does not require, 5 | % and does not permit, naming the estimators. Instead, they will be given 6 | % names automatically based on their types. 7 | 8 | estimators = struct; 9 | num_est = length(varargin); 10 | for i=1:num_est 11 | fn = sprintf('%s%d',class(varargin{i}),i); 12 | estimators.(fn) = varargin{i}; 13 | end 14 | 15 | p = Pipeline(estimators); 16 | -------------------------------------------------------------------------------- /lib/preprocessing/FunctionTransformer.m: -------------------------------------------------------------------------------- 1 | classdef FunctionTransformer < BaseEstimator & TransformerMixin 2 | % Constructs a transformer from an arbitrary callable. 3 | % 4 | % A FunctionTransformer forwards its X (and optionally y) arguments to 5 | % a user-defined function or function object and returns the result of 6 | % this function. This is useful for stateless transformations such as 7 | % taking the log of frequencies, doing custom scaling, etc. 8 | 9 | properties (GetAccess = 'public', SetAccess = 'public') 10 | % parameters 11 | func = @(x) x; 12 | end 13 | 14 | methods 15 | % constructor 16 | function obj = FunctionTransformer(params) 17 | if nargin>0 18 | obj.set_params(params) 19 | end 20 | end 21 | 22 | function fit(obj,X,~) %#ok 23 | end 24 | 25 | function X_new = transform(obj,X) 26 | X_new = obj.func(X); 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/preprocessing/LabelBinarizer.m: -------------------------------------------------------------------------------- 1 | classdef LabelBinarizer < BaseEstimator & TransformerMixin 2 | % Binarize labels in a one-vs-all fashion. 3 | % 4 | % https://github.com/steven2358/sklearn-matlab 5 | 6 | properties (GetAccess = 'public', SetAccess = 'public') 7 | % parameters 8 | neg_label = 0; 9 | pos_label = 1; 10 | end 11 | 12 | properties (GetAccess = 'public', SetAccess = 'private') 13 | % attributes 14 | classes_; % Holds the label for each class. 15 | end 16 | 17 | methods 18 | % constructor 19 | function obj = LabelBinarizer(params) 20 | if nargin>0 21 | obj.set_params(params) 22 | end 23 | end 24 | 25 | % Fit label binarizer. 26 | function fit(obj,X,~) 27 | C = unique(X); 28 | obj.classes_ = C; 29 | end 30 | 31 | % Transform multi-class labels to binary labels. Returns an array 32 | % of shape [n_samples, n_features_new]. 33 | function X_new = transform(obj,X) 34 | n_samples = length(X); 35 | n_features = length(obj.classes_); 36 | X_new = obj.neg_label*ones(n_samples,n_features); 37 | for i=1:n_samples 38 | X_new(i,ismember(obj.classes_,X(i))) = obj.pos_label; 39 | end 40 | end 41 | 42 | % Transform binary labels back to multi-class labels 43 | function X_orig = inverse_transform(obj,X) 44 | n_samples = size(X,1); 45 | X_orig = zeros(n_samples,1); 46 | for i=1:n_samples 47 | X_orig(i) = obj.classes_(ismember(X(i,:),obj.pos_label)); 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/preprocessing/MinMaxScaler.m: -------------------------------------------------------------------------------- 1 | classdef MinMaxScaler < BaseEstimator & TransformerMixin 2 | % Transforms features by scaling each feature to a given range. 3 | % 4 | % This estimator scales and translates each feature individually such 5 | % that it is in the given range on the training set. 6 | 7 | properties (GetAccess = 'public', SetAccess = 'public') 8 | % parameters 9 | feature_range = [0 1]; 10 | end 11 | 12 | properties (GetAccess = 'public', SetAccess = 'private') 13 | % attributes 14 | scale_; % Per feature relative scaling of the data. 15 | data_min_; % Per feature minimum seen in the data. 16 | data_max_; % Per feature maximum seen in the data. 17 | data_range_; % Per feature range seen in the data. 18 | end 19 | 20 | methods 21 | % constructor 22 | function obj = MinMaxScaler(params) 23 | if nargin>0 24 | obj.set_params(params) 25 | end 26 | end 27 | 28 | % Compute the minimum and maximum to be used for later scaling. 29 | function fit(obj,X,~) 30 | obj.data_min_ = min(X,[],1); 31 | obj.data_max_ = max(X,[],1); 32 | obj.data_range_ = obj.data_max_ - obj.data_min_; 33 | 34 | % handle data range for zero variance data 35 | obj.data_range_(obj.data_range_==0) = 1; 36 | 37 | obj.scale_ = bsxfun(@rdivide,diff(obj.feature_range),... 38 | obj.data_range_); 39 | end 40 | 41 | % Scaling features of X according to feature_range. 42 | function X_new = transform(obj,X) 43 | % scale between 0 and 1 44 | X_new = bsxfun(@minus,X,obj.data_min_); 45 | X_new = bsxfun(@rdivide,X_new,obj.data_range_); 46 | 47 | % scale between min and max 48 | X_new = bsxfun(@times,X_new,diff(obj.feature_range)); 49 | X_new = bsxfun(@plus,X_new,obj.feature_range(1)); 50 | end 51 | 52 | % Undo the scaling of X according to feature_range. 53 | function X_orig = inverse_transform(obj,X) 54 | % scale between 0 and 1 55 | X_orig = (X - obj.feature_range(1))/diff(obj.feature_range); 56 | 57 | % scale between data_min_ and data_max_ 58 | X_orig = bsxfun(@times,X_orig,obj.data_max_-obj.data_min_); 59 | X_orig = bsxfun(@plus,X_orig,obj.data_min_); 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/preprocessing/StandardScaler.m: -------------------------------------------------------------------------------- 1 | classdef StandardScaler < BaseEstimator & TransformerMixin 2 | % Standardize features by removing the mean and scaling to unit 3 | % variance. 4 | 5 | properties (GetAccess = 'public', SetAccess = 'public') 6 | % parameters 7 | with_mean = true; 8 | with_std = true; 9 | end 10 | 11 | properties (GetAccess = 'public', SetAccess = 'private') 12 | % attributes 13 | scale_; % Per feature relative scaling of the data. 14 | mean_; % The mean value for each feature in the training set. 15 | var_; % The variance for each feature in the training set. 16 | end 17 | 18 | methods 19 | % constructor 20 | function obj = StandardScaler(params) 21 | if nargin>0 22 | obj.set_params(params) 23 | end 24 | end 25 | 26 | % Compute the mean and std to be used for later scaling. 27 | function fit(obj,X,~) 28 | if obj.with_mean 29 | obj.mean_ = mean(X,1); 30 | end 31 | if obj.with_std 32 | obj.var_ = var(X); 33 | obj.scale_ = 1./sqrt(obj.var_); 34 | 35 | % handle scale for zero variance 36 | obj.scale_(obj.var_==0) = 1; 37 | end 38 | end 39 | 40 | % Perform standardization by centering and scaling 41 | function X_new = transform(obj,X) 42 | X_new = X; 43 | if obj.with_mean 44 | X_new = bsxfun(@minus,X_new,obj.mean_); 45 | end 46 | if obj.with_std 47 | X_new = bsxfun(@times,X_new,obj.scale_); 48 | end 49 | end 50 | 51 | % Scale back the data to the original representation 52 | function X_orig = inverse_transform(obj,X) 53 | X_orig = X; 54 | if obj.with_mean 55 | X_orig = bsxfun(@plus,X_orig,obj.mean_); 56 | end 57 | if obj.with_std 58 | X_orig = bsxfun(@rdivide,X_orig,obj.scale_); 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/svm/SVC.m: -------------------------------------------------------------------------------- 1 | classdef SVC < BaseEstimator & ClassifierMixin 2 | % Support Vector Classification. 3 | 4 | properties (GetAccess = 'public', SetAccess = 'public') 5 | % parameters 6 | C = 1; % Penalty parameter C of the error term. 7 | kernel = 'rbf'; % Kernel mapping. String or callable. 8 | gamma = 1; % Gamma parameter for the RBF. Scalar or 'auto'. 9 | end 10 | 11 | properties (GetAccess = 'public', SetAccess = 'private') 12 | % attributes 13 | model; 14 | end 15 | 16 | methods 17 | % constructor 18 | function obj = SVC(params) 19 | if nargin>0 20 | obj.set_params(params) 21 | end 22 | end 23 | 24 | % Fit the model 25 | function fit(obj,X,y) 26 | obj.model = fitcsvm(X,y,... 27 | 'KernelFunction',upper(obj.kernel),... 28 | 'Cost',[0 obj.C; obj.C 0],... 29 | 'KernelScale',1/sqrt(obj.gamma)); 30 | end 31 | 32 | % Predict using the model 33 | function C = predict(obj,X) 34 | C = predict(obj.model,X); 35 | end 36 | 37 | function proba = pred_proba(obj,X) 38 | [~,score] = predict(obj.model,X); 39 | proba = score(:,2); 40 | end 41 | end 42 | end 43 | --------------------------------------------------------------------------------