├── .gitignore ├── LICENSE ├── README.md ├── mk_ls_svm_lib ├── __init__.py ├── crossvalidation.py ├── kernel.py └── mk_ls_svm.py └── report.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 FormMe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MultipleKernel-LeastSquares-SuportVectorMachine 2 | 3 | **The research thesis report in Russian presented in report.pdf** 4 | 5 | **Firstly import package** 6 | ```python 7 | from mk_ls_svm_lib as mk 8 | ``` 9 | **Create instance of classificator with list of kernels** 10 | ```python 11 | kernel_set = [mk.kernel.RBF(10), mk.kernel.Poly(1,2)] 12 | clf = mk.mk_ls_svm.MKLSSVM(kernel_set) 13 | ``` 14 | **Fit classificator** 15 | ```python 16 | import numpy as np 17 | X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) 18 | y = np.array([1, 1, 2, 2]) 19 | clf = clf.fit(X,y) 20 | ``` 21 | **Predict** 22 | ```python 23 | predicted_y = clf.predict(X) 24 | ``` 25 | **You can save your classificator into file** 26 | ```python 27 | clf.to_pkl('my_clf.pkl') 28 | ``` 29 | **And load it** 30 | ```python 31 | clf = mk.mk_ls_svm.load_clf_from_pkl('my_clf.pkl') 32 | ``` 33 | **Also you can use built-in k-fold crossvalidation** 34 | ```python 35 | score = mk.crossvalidation.cross_val_score(clf, X, y) 36 | ``` 37 |   38 | -------------------------------------------------------------------------------- /mk_ls_svm_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FormMe/MultipleKernel-LeastSquares-SupportVectorMachine/179dea95d8012332ac598c3051d71cfb45aed32e/mk_ls_svm_lib/__init__.py -------------------------------------------------------------------------------- /mk_ls_svm_lib/crossvalidation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import accuracy_score 3 | 4 | def cross_val_score(clf, X, y, n_splits=10): 5 | '''K-fold Cross-Validation 6 | 7 | :param clf: object 8 | The object to use to fit the data. 9 | :param X: array-like, shape = [n_samples, n_features] 10 | The data to fit. Can be, for example a list, or an array at least 2d. 11 | :param y: array-like, shape = [n_samples] 12 | The target variable to try to predict in the case of supervised learning. 13 | :param n_splits: int, optional (default=10) 14 | Number of folds. Must be at least 2. 15 | :return: float 16 | Score of the estimator for each run of the cross validation. 17 | ''' 18 | data = list(zip(X, y)) 19 | np.random.shuffle(data) 20 | data = np.array(data) 21 | test_size = int(len(X) / n_splits) 22 | scores = [] 23 | for slice in range(n_splits): 24 | test_start = slice*test_size 25 | test_data = data[test_start:test_start+test_size] 26 | 27 | a = data[:test_start] 28 | b = data[test_start+test_size:] 29 | train_data = np.concatenate((a, b), axis=0) 30 | 31 | test_X = test_data[:, 0] 32 | test_y = test_data[:, 1] 33 | train_X = train_data[:, 0] 34 | train_y = train_data[:, 1] 35 | 36 | clf.fit(np.array(train_X),np.array(train_y)) 37 | score = accuracy_score(list(test_y), list(clf.predict(test_X))) 38 | scores.append(score) 39 | return np.mean(scores), np.std(scores) 40 | -------------------------------------------------------------------------------- /mk_ls_svm_lib/kernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RBF: 5 | def __init__(self, sigma): 6 | ''' 7 | RBF-kernel 8 | :param sigma: float 9 | ''' 10 | self._sigma = sigma 11 | 12 | def compute(self, x, y): 13 | ''' 14 | :param x: array-like 15 | First vector. 16 | :param y: array-like 17 | Second vector. 18 | :return: float 19 | ''' 20 | sqr_dist = sum([(it1 - it2)**2 for it1, it2 in zip(x,y)]) 21 | return np.e**(-1.0 * (sqr_dist ** 2) / (2 * (self._sigma ** 2))) 22 | 23 | def display(self): 24 | return 'RBF('+str(round(self._sigma, 4))+')' 25 | 26 | class Poly: 27 | def __init__(self, c, d): 28 | ''' Polynomial kernel 29 | 30 | :param c: float 31 | Parameter trading off the influence of higher-order versus lower-order terms in the polynomial. 32 | :param d: float 33 | Degree. 34 | ''' 35 | self.__c = c 36 | self.__d = d 37 | 38 | def compute(self, x, y): 39 | ''' 40 | :param x: array-like 41 | First vector. 42 | :param y: array-like 43 | Second vector. 44 | :return: float 45 | ''' 46 | return (np.dot(x,y) + self.__c)**self.__d 47 | 48 | def display(self): 49 | return 'Poly(' + str(self.__c) + ', ' + str(self.__d) + ')' 50 | -------------------------------------------------------------------------------- /mk_ls_svm_lib/mk_ls_svm.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy 3 | import scipy 4 | from scipy.optimize import (minimize) 5 | from functools import reduce 6 | 7 | class MKLSSVM: 8 | def __init__(self, kernel_set, C=1.0, tol=1e-4, max_iter=50): 9 | '''MultipleKernel Least Squares Suport Vector Machine for binary classification. 10 | 11 | :param kernel_set: list of instances from kernel 12 | Set of kernels. 13 | :param C: float, optional (default=1.0) 14 | Penalty parameter C of the error term. 15 | :param tol: float, optional (default=1e-4) 16 | Tolerance for stopping criterion. 17 | :param max_iter: int, optional (default=50) 18 | Hard limit on iterations within solver > 0. 19 | ''' 20 | self.C = C 21 | self.tol = tol 22 | self.max_iter = max_iter 23 | self.kernel_set = kernel_set 24 | self.beta = numpy.array([1.0 / len(kernel_set) for _ in kernel_set]) 25 | self.fited = False 26 | 27 | def fit(self, data, target): 28 | '''Fit the SVM model according to the given training data. 29 | 30 | :param data: array-like, shape = [n_samples, n_features] 31 | ОTraining vectors, where n_samples is the number of samples and n_features is the number of features. 32 | :param target: array-like, shape = [n_samples] 33 | Target values (class labels). 34 | :return self: object 35 | Returns self. 36 | ''' 37 | def kernel_matrix(): 38 | trainSeqLen = len(target) 39 | H_vec = [] 40 | for K in self.kernel_set: 41 | H = numpy.matrix(numpy.zeros(shape=(trainSeqLen, trainSeqLen))) 42 | for i in range(trainSeqLen): 43 | for j in range(i, trainSeqLen): 44 | val = K.compute(data[i], data[j]) 45 | H[i, j] = val 46 | H[j, i] = val 47 | H_vec.append(H) 48 | return H_vec 49 | 50 | def kernel_matrix_y(): 51 | Ky_vec = [] 52 | for H in self.__Hvec: 53 | Ky = [] 54 | for i, _ in enumerate(H): 55 | Ky.append(numpy.asarray([y * H[i, j] for j, y in enumerate(target)], dtype=float)) 56 | Ky_vec.append(Ky) 57 | return Ky_vec 58 | 59 | def lagrange_coefficient_estimation(): 60 | trainSeqLen = len(target) 61 | weighted_H = map(lambda h, beta: h * beta, self.__Hvec, self.beta) 62 | H = reduce(lambda p_h, h: p_h + h, weighted_H) 63 | for i in range(trainSeqLen): 64 | for j in range(i, trainSeqLen): 65 | H[i, j] *= target[i] * target[j] 66 | H[j, i] *= target[j] * target[i] 67 | if i == j: 68 | H[i, j] += 1.0 / self.C 69 | 70 | d = numpy.ones(trainSeqLen) 71 | eta = scipy.sparse.linalg.cg(H, target, maxiter=1000)[0] 72 | nu = scipy.sparse.linalg.cg(H, d, maxiter=1000)[0] 73 | s = numpy.dot(target.T, eta) 74 | b = numpy.dot(eta.T, d) / s 75 | alpha = nu - eta * b 76 | return b, alpha 77 | 78 | def kernel_coefficient_estimation(): 79 | def score_func(beta_vec): 80 | def K_sum(i): 81 | weighted_kernels = [b_c * K[i] for b_c, K in zip(beta_vec, self.__Kyvec)] 82 | return numpy.array(reduce(lambda l, m: l + m, weighted_kernels)) 83 | 84 | loss_func_vec = [] 85 | for i, y in enumerate(target): 86 | weighted_kernels_sum = K_sum(i) 87 | loss_func_vec.append(1.0 - y * self.b - y * numpy.dot(weighted_kernels_sum, self.alpha)) 88 | 89 | loss_func = reduce(lambda e1, e2: e1 + e2 ** 2, loss_func_vec) 90 | return loss_func 91 | 92 | cons = ({'type': 'eq', 'fun': lambda x: sum(x) - 1.0}) 93 | bnds = [(0.0, 1.0) for _ in self.beta] 94 | betaopt = minimize(score_func, self.beta, 95 | bounds=bnds, constraints=cons, 96 | method='SLSQP', 97 | options={'maxiter': 1000, 'disp': False}) 98 | 99 | return betaopt.x, betaopt.fun 100 | 101 | classes = numpy.unique(target) 102 | 103 | if len(classes) == 1 or len(classes) != 2: 104 | raise Exception('Number of class should be equal two.') 105 | self.class_dict = { 106 | '1.0': classes[0], 107 | '-1.0': classes[1]} 108 | target = numpy.array(list(map(lambda y: 1.0 if y == classes[0] else -1.0, target))) 109 | 110 | self.__Xfit = data 111 | self.__Yfit = target 112 | 113 | self.__Hvec = kernel_matrix() 114 | self.__Kyvec = kernel_matrix_y() 115 | 116 | prev_score_value = 0 117 | prev_beta_norm = numpy.linalg.norm(self.beta) 118 | cur_iter = 0 119 | while True: 120 | self.b, self.alpha = lagrange_coefficient_estimation() 121 | if len(self.kernel_set) == 1: 122 | break 123 | self.beta, score_value = kernel_coefficient_estimation() 124 | # выход по количеству итераций 125 | if cur_iter >= self.max_iter: 126 | break 127 | # выход по невязке функции 128 | if abs(prev_score_value - score_value) < self.tol: 129 | break 130 | # выход по невязке нормы коэфициентов 131 | beta_norm = numpy.linalg.norm(self.beta) 132 | if abs(prev_beta_norm - beta_norm) < self.tol: 133 | break 134 | prev_score_value = score_value 135 | prev_beta_norm = beta_norm 136 | cur_iter += 1 137 | 138 | self.fited = True 139 | return self 140 | 141 | def predict(self, data): 142 | '''Perform classification on samples in data. 143 | 144 | :param data: array-like, shape = [n_samples, n_features] 145 | :return target: array-like, shape = [n_samples] 146 | Class labels for samples in data. 147 | ''' 148 | def y_prediction(z): 149 | support_vectors_sum = sum([alpha * y * 150 | sum([beta * K.compute(z, x) for beta, K in zip(self.beta, self.kernel_set)]) 151 | for alpha, x, y in zip(self.alpha, self.__Xfit, self.__Yfit)]) 152 | 153 | p = support_vectors_sum + self.b 154 | if p == 0.0: 155 | p = 1.0; 156 | return self.class_dict[str(numpy.sign(p))] 157 | 158 | if not self.fited: 159 | raise Exception("Fit classificator before.") 160 | 161 | return [y_prediction(test_x) for test_x in data] 162 | 163 | def to_pkl(self, filename): 164 | '''Save classificator to *.pkl file. 165 | 166 | :param filename: 167 | File with extention *.pkl to save. 168 | :return: 169 | ''' 170 | 171 | if not self.fited: 172 | raise Exception("Fit classificator before.") 173 | with open(filename, 'wb') as output: 174 | pickle.dump(self, output, pickle.HIGHEST_PROTOCOL) 175 | 176 | 177 | def load_clf_from_pkl(filename): 178 | '''Load classificator from *.pkl file. 179 | 180 | :param filename: 181 | File with extention *.pkl to load. 182 | :return: MKLSSVM 183 | Classificator 184 | ''' 185 | with open(filename, 'rb') as input: 186 | return pickle.load(input) -------------------------------------------------------------------------------- /report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FormMe/MultipleKernel-LeastSquares-SupportVectorMachine/179dea95d8012332ac598c3051d71cfb45aed32e/report.pdf --------------------------------------------------------------------------------