├── .gitignore
├── LICENSE
├── README.md
├── mk_ls_svm_lib
    ├── __init__.py
    ├── crossvalidation.py
    ├── kernel.py
    └── mk_ls_svm.py
└── report.pdf


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 FormMe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MultipleKernel-LeastSquares-SuportVectorMachine
 2 | 
 3 | **The research thesis report in Russian presented in report.pdf**
 4 | 
 5 | **Firstly import package**
 6 | ```python
 7 | from mk_ls_svm_lib as mk
 8 | ```
 9 | **Create instance of classificator with list of kernels**
10 | ```python
11 | kernel_set = [mk.kernel.RBF(10), mk.kernel.Poly(1,2)]
12 | clf = mk.mk_ls_svm.MKLSSVM(kernel_set)
13 | ```
14 | **Fit classificator**
15 | ```python
16 | import numpy as np
17 | X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
18 | y = np.array([1, 1, 2, 2]) 
19 | clf = clf.fit(X,y)
20 | ```
21 | **Predict**
22 | ```python
23 | predicted_y = clf.predict(X)
24 | ```
25 | **You can save your classificator into file**
26 | ```python
27 | clf.to_pkl('my_clf.pkl')
28 | ```
29 | **And load it**
30 | ```python
31 | clf = mk.mk_ls_svm.load_clf_from_pkl('my_clf.pkl') 
32 | ```
33 | **Also you can use built-in k-fold crossvalidation**
34 | ```python
35 | score = mk.crossvalidation.cross_val_score(clf, X, y)
36 | ```
37 |  
38 | 


--------------------------------------------------------------------------------
/mk_ls_svm_lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FormMe/MultipleKernel-LeastSquares-SupportVectorMachine/179dea95d8012332ac598c3051d71cfb45aed32e/mk_ls_svm_lib/__init__.py


--------------------------------------------------------------------------------
/mk_ls_svm_lib/crossvalidation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import accuracy_score
 3 | 
 4 | def cross_val_score(clf, X, y, n_splits=10):
 5 |     '''K-fold Cross-Validation
 6 | 
 7 |     :param clf: object
 8 |         The object to use to fit the data.
 9 |     :param X: array-like, shape = [n_samples, n_features]
10 |         The data to fit. Can be, for example a list, or an array at least 2d.
11 |     :param y: array-like, shape = [n_samples]
12 |         The target variable to try to predict in the case of supervised learning.
13 |     :param n_splits: int, optional (default=10)
14 |         Number of folds. Must be at least 2.
15 |     :return: float
16 |         Score of the estimator for each run of the cross validation.
17 |     '''
18 |     data = list(zip(X, y))
19 |     np.random.shuffle(data)
20 |     data = np.array(data)
21 |     test_size = int(len(X) / n_splits)
22 |     scores = []
23 |     for slice in range(n_splits):
24 |         test_start = slice*test_size
25 |         test_data = data[test_start:test_start+test_size]
26 | 
27 |         a = data[:test_start]
28 |         b = data[test_start+test_size:]
29 |         train_data = np.concatenate((a, b), axis=0)
30 | 
31 |         test_X = test_data[:, 0]
32 |         test_y = test_data[:, 1]
33 |         train_X = train_data[:, 0]
34 |         train_y = train_data[:, 1]
35 | 
36 |         clf.fit(np.array(train_X),np.array(train_y))
37 |         score = accuracy_score(list(test_y), list(clf.predict(test_X)))
38 |         scores.append(score)
39 |     return np.mean(scores), np.std(scores)
40 | 


--------------------------------------------------------------------------------
/mk_ls_svm_lib/kernel.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RBF:
 5 |     def __init__(self, sigma):
 6 |         '''
 7 |         RBF-kernel
 8 |         :param sigma: float
 9 |         '''
10 |         self._sigma = sigma
11 | 
12 |     def compute(self, x, y):
13 |         '''
14 |         :param x: array-like
15 |             First vector.
16 |         :param y: array-like
17 |             Second vector.
18 |         :return: float
19 |         '''
20 |         sqr_dist = sum([(it1 - it2)**2 for it1, it2 in zip(x,y)])
21 |         return np.e**(-1.0 * (sqr_dist ** 2) / (2 * (self._sigma ** 2)))
22 | 
23 |     def display(self):
24 |         return 'RBF('+str(round(self._sigma, 4))+')'
25 | 
26 | class Poly:
27 |     def __init__(self, c, d):
28 |         ''' Polynomial kernel
29 | 
30 |         :param c: float
31 |             Parameter trading off the influence of higher-order versus lower-order terms in the polynomial.
32 |         :param d: float
33 |             Degree.
34 |         '''
35 |         self.__c = c
36 |         self.__d = d
37 | 
38 |     def compute(self, x, y):
39 |         '''
40 |         :param x: array-like
41 |             First vector.
42 |         :param y: array-like
43 |             Second vector.
44 |         :return: float
45 |         '''
46 |         return (np.dot(x,y) + self.__c)**self.__d
47 | 
48 |     def display(self):
49 |         return 'Poly(' + str(self.__c) + ', ' + str(self.__d) + ')'
50 | 


--------------------------------------------------------------------------------
/mk_ls_svm_lib/mk_ls_svm.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy
  3 | import scipy
  4 | from scipy.optimize import (minimize)
  5 | from functools import reduce
  6 | 
  7 | class MKLSSVM:
  8 |     def __init__(self, kernel_set, C=1.0, tol=1e-4, max_iter=50):
  9 |         '''MultipleKernel Least Squares Suport Vector Machine for binary classification.
 10 | 
 11 |         :param kernel_set: list of instances from kernel
 12 |             Set of kernels.
 13 |         :param C: float, optional (default=1.0)
 14 |             Penalty parameter C of the error term.
 15 |         :param tol: float, optional (default=1e-4)
 16 |             Tolerance for stopping criterion.
 17 |         :param max_iter: int, optional (default=50)
 18 |            Hard limit on iterations within solver > 0.
 19 |         '''
 20 |         self.C = C
 21 |         self.tol = tol
 22 |         self.max_iter = max_iter
 23 |         self.kernel_set = kernel_set
 24 |         self.beta = numpy.array([1.0 / len(kernel_set) for _ in kernel_set])
 25 |         self.fited = False
 26 | 
 27 |     def fit(self, data, target):
 28 |         '''Fit the SVM model according to the given training data.
 29 | 
 30 |         :param data: array-like, shape = [n_samples, n_features]
 31 |             ОTraining vectors, where n_samples is the number of samples and n_features is the number of features. 
 32 |         :param target: array-like, shape = [n_samples]
 33 |             Target values (class labels).
 34 |         :return self: object
 35 |             Returns self.
 36 |         '''
 37 |         def kernel_matrix():
 38 |             trainSeqLen = len(target)
 39 |             H_vec = []
 40 |             for K in self.kernel_set:
 41 |                 H = numpy.matrix(numpy.zeros(shape=(trainSeqLen, trainSeqLen)))
 42 |                 for i in range(trainSeqLen):
 43 |                     for j in range(i, trainSeqLen):
 44 |                         val = K.compute(data[i], data[j])
 45 |                         H[i, j] = val
 46 |                         H[j, i] = val
 47 |                 H_vec.append(H)
 48 |             return H_vec
 49 | 
 50 |         def kernel_matrix_y():
 51 |             Ky_vec = []
 52 |             for H in self.__Hvec:
 53 |                 Ky = []
 54 |                 for i, _ in enumerate(H):
 55 |                     Ky.append(numpy.asarray([y * H[i, j] for j, y in enumerate(target)], dtype=float))
 56 |                 Ky_vec.append(Ky)
 57 |             return Ky_vec
 58 | 
 59 |         def lagrange_coefficient_estimation():
 60 |             trainSeqLen = len(target)
 61 |             weighted_H = map(lambda h, beta: h * beta, self.__Hvec, self.beta)
 62 |             H = reduce(lambda p_h, h: p_h + h, weighted_H)
 63 |             for i in range(trainSeqLen):
 64 |                 for j in range(i, trainSeqLen):
 65 |                     H[i, j] *= target[i] * target[j]
 66 |                     H[j, i] *= target[j] * target[i]
 67 |                     if i == j:
 68 |                         H[i, j] += 1.0 / self.C
 69 | 
 70 |             d = numpy.ones(trainSeqLen)
 71 |             eta = scipy.sparse.linalg.cg(H, target, maxiter=1000)[0]
 72 |             nu = scipy.sparse.linalg.cg(H, d, maxiter=1000)[0]
 73 |             s = numpy.dot(target.T, eta)
 74 |             b = numpy.dot(eta.T, d) / s
 75 |             alpha = nu - eta * b
 76 |             return b, alpha
 77 | 
 78 |         def kernel_coefficient_estimation():
 79 |             def score_func(beta_vec):
 80 |                 def K_sum(i):
 81 |                     weighted_kernels = [b_c * K[i] for b_c, K in zip(beta_vec, self.__Kyvec)]
 82 |                     return numpy.array(reduce(lambda l, m: l + m, weighted_kernels))
 83 | 
 84 |                 loss_func_vec = []
 85 |                 for i, y in enumerate(target):
 86 |                     weighted_kernels_sum = K_sum(i)
 87 |                     loss_func_vec.append(1.0 - y * self.b - y * numpy.dot(weighted_kernels_sum, self.alpha))
 88 | 
 89 |                 loss_func = reduce(lambda e1, e2: e1 + e2 ** 2, loss_func_vec)
 90 |                 return loss_func
 91 | 
 92 |             cons = ({'type': 'eq', 'fun': lambda x: sum(x) - 1.0})
 93 |             bnds = [(0.0, 1.0) for _ in self.beta]
 94 |             betaopt = minimize(score_func, self.beta,
 95 |                                bounds=bnds, constraints=cons,
 96 |                                method='SLSQP',
 97 |                                options={'maxiter': 1000, 'disp': False})
 98 | 
 99 |             return betaopt.x, betaopt.fun
100 | 
101 |         classes = numpy.unique(target)
102 | 
103 |         if len(classes) == 1 or len(classes) != 2:
104 |             raise Exception('Number of class should be equal two.')
105 |         self.class_dict = {
106 |             '1.0': classes[0],
107 |             '-1.0': classes[1]}
108 |         target = numpy.array(list(map(lambda y: 1.0 if y == classes[0] else -1.0, target)))
109 | 
110 |         self.__Xfit = data
111 |         self.__Yfit = target
112 | 
113 |         self.__Hvec = kernel_matrix()
114 |         self.__Kyvec = kernel_matrix_y()
115 | 
116 |         prev_score_value = 0
117 |         prev_beta_norm = numpy.linalg.norm(self.beta)
118 |         cur_iter = 0
119 |         while True:
120 |             self.b, self.alpha = lagrange_coefficient_estimation()
121 |             if len(self.kernel_set) == 1:
122 |                 break
123 |             self.beta, score_value = kernel_coefficient_estimation()
124 |             # выход по количеству итераций
125 |             if cur_iter >= self.max_iter:
126 |                 break
127 |             # выход по невязке функции
128 |             if abs(prev_score_value - score_value) < self.tol:
129 |                 break
130 |             # выход по невязке нормы коэфициентов
131 |             beta_norm = numpy.linalg.norm(self.beta)
132 |             if abs(prev_beta_norm - beta_norm) < self.tol:
133 |                 break
134 |             prev_score_value = score_value
135 |             prev_beta_norm = beta_norm
136 |             cur_iter += 1
137 | 
138 |         self.fited = True
139 |         return self
140 | 
141 |     def predict(self, data):
142 |         '''Perform classification on samples in data.
143 | 
144 |         :param data: array-like, shape = [n_samples, n_features]
145 |         :return target: array-like, shape = [n_samples]
146 |             Class labels for samples in data.
147 |         '''
148 |         def y_prediction(z):
149 |             support_vectors_sum = sum([alpha * y *
150 |                                        sum([beta * K.compute(z, x) for beta, K in zip(self.beta, self.kernel_set)])
151 |                                        for alpha, x, y in zip(self.alpha, self.__Xfit, self.__Yfit)])
152 | 
153 |             p = support_vectors_sum + self.b
154 |             if p == 0.0:
155 |                 p = 1.0;
156 |             return self.class_dict[str(numpy.sign(p))]
157 | 
158 |         if not self.fited:
159 |             raise Exception("Fit classificator before.")
160 | 
161 |         return [y_prediction(test_x) for test_x in data]
162 | 
163 |     def to_pkl(self, filename):
164 |         '''Save classificator to *.pkl file.
165 | 
166 |         :param filename:
167 |             File with extention *.pkl to save.
168 |         :return:
169 |         '''
170 |         
171 |         if not self.fited:
172 |             raise Exception("Fit classificator before.")
173 |         with open(filename, 'wb') as output:
174 |             pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
175 | 
176 | 
177 | def load_clf_from_pkl(filename):
178 |     '''Load classificator from *.pkl file.
179 | 
180 |     :param filename:
181 |         File with extention *.pkl to load.
182 |     :return: MKLSSVM
183 |         Classificator
184 |     '''
185 |     with open(filename, 'rb') as input:
186 |         return pickle.load(input)


--------------------------------------------------------------------------------
/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FormMe/MultipleKernel-LeastSquares-SupportVectorMachine/179dea95d8012332ac598c3051d71cfb45aed32e/report.pdf


--------------------------------------------------------------------------------