├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── mlfromscratch ├── __init__.py ├── data │ └── TempLinkoping2016.txt ├── deep_learning │ ├── __init__.py │ ├── activation_functions.py │ ├── layers.py │ ├── loss_functions.py │ ├── neural_network.py │ └── optimizers.py ├── examples │ ├── adaboost.py │ ├── apriori.py │ ├── bayesian_regression.py │ ├── convolutional_neural_network.py │ ├── dbscan.py │ ├── decision_tree_classifier.py │ ├── decision_tree_regressor.py │ ├── deep_q_network.py │ ├── demo.py │ ├── elastic_net.py │ ├── fp_growth.py │ ├── gaussian_mixture_model.py │ ├── genetic_algorithm.py │ ├── gradient_boosting_classifier.py │ ├── gradient_boosting_regressor.py │ ├── k_means.py │ ├── k_nearest_neighbors.py │ ├── lasso_regression.py │ ├── linear_discriminant_analysis.py │ ├── linear_regression.py │ ├── logistic_regression.py │ ├── multi_class_lda.py │ ├── multilayer_perceptron.py │ ├── naive_bayes.py │ ├── neuroevolution.py │ ├── particle_swarm_optimization.py │ ├── partitioning_around_medoids.py │ ├── perceptron.py │ ├── polynomial_regression.py │ ├── principal_component_analysis.py │ ├── random_forest.py │ ├── recurrent_neural_network.py │ ├── restricted_boltzmann_machine.py │ ├── ridge_regression.py │ ├── support_vector_machine.py │ └── xgboost.py ├── reinforcement_learning │ ├── __init__.py │ └── deep_q_network.py ├── supervised_learning │ ├── __init__.py │ ├── adaboost.py │ ├── bayesian_regression.py │ ├── decision_tree.py │ ├── gradient_boosting.py │ ├── k_nearest_neighbors.py │ ├── linear_discriminant_analysis.py │ ├── logistic_regression.py │ ├── multi_class_lda.py │ ├── multilayer_perceptron.py │ ├── naive_bayes.py │ ├── neuroevolution.py │ ├── particle_swarm_optimization.py │ ├── perceptron.py │ ├── random_forest.py │ ├── regression.py │ ├── support_vector_machine.py │ └── xgboost.py ├── unsupervised_learning │ ├── __init__.py │ ├── apriori.py │ ├── autoencoder.py │ ├── dbscan.py │ ├── dcgan.py │ ├── fp_growth.py │ ├── gaussian_mixture_model.py │ ├── generative_adversarial_network.py │ ├── genetic_algorithm.py │ ├── k_means.py │ ├── partitioning_around_medoids.py │ ├── principal_component_analysis.py │ └── restricted_boltzmann_machine.py └── utils │ ├── __init__.py │ ├── data_manipulation.py │ ├── data_operation.py │ ├── kernels.py │ └── misc.py ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | \.DS_STORE 3 | build/ 4 | dist/ 5 | *egg-info* 6 | *__pycache__/ 7 | *.py[cod] 8 | *eggs* 9 | *\.png 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Erik Linder-Norén 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include mlfs.data * -------------------------------------------------------------------------------- /mlfromscratch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriklindernoren/ML-From-Scratch/a2806c6732eee8d27762edd6d864e0c179d8e9e8/mlfromscratch/__init__.py -------------------------------------------------------------------------------- /mlfromscratch/data/TempLinkoping2016.txt: -------------------------------------------------------------------------------- 1 | time temp 2 | 0.00273224 0.1 3 | 0.005464481 -4.5 4 | 0.008196721 -6.3 5 | 0.010928962 -9.6 6 | 0.013661202 -9.9 7 | 0.016393443 -17.1 8 | 0.019125683 -11.6 9 | 0.021857923 -6.2 10 | 0.024590164 -6.4 11 | 0.027322404 -0.5 12 | 0.030054645 0.5 13 | 0.032786885 -2.4 14 | 0.035519126 -7.5 15 | 0.038251366 -16.8 16 | 0.040983607 -16.6 17 | 0.043715847 -14.6 18 | 0.046448087 -9.6 19 | 0.049180328 -5.8 20 | 0.051912568 -8.6 21 | 0.054644809 -9.0 22 | 0.057377049 -9.7 23 | 0.06010929 -6.9 24 | 0.06284153 -3.9 25 | 0.06557377 1.4 26 | 0.068306011 1.9 27 | 0.071038251 4.3 28 | 0.073770492 6.9 29 | 0.076502732 4.3 30 | 0.079234973 5.9 31 | 0.081967213 3.8 32 | 0.084699454 1.5 33 | 0.087431694 0.1 34 | 0.090163934 4.6 35 | 0.092896175 0.8 36 | 0.095628415 -0.5 37 | 0.098360656 -1.0 38 | 0.101092896 4.2 39 | 0.103825137 6.6 40 | 0.106557377 4.8 41 | 0.109289617 4.7 42 | 0.112021858 1.3 43 | 0.114754098 0.9 44 | 0.117486339 -2.8 45 | 0.120218579 -3.3 46 | 0.12295082 -5.3 47 | 0.12568306 -6.8 48 | 0.128415301 -5.1 49 | 0.131147541 -2.6 50 | 0.133879781 -0.5 51 | 0.136612022 -0.5 52 | 0.139344262 0.1 53 | 0.142076503 1.7 54 | 0.144808743 2.4 55 | 0.147540984 -0.9 56 | 0.150273224 -1.3 57 | 0.153005464 -1.4 58 | 0.155737705 -0.1 59 | 0.158469945 -0.7 60 | 0.161202186 -2.6 61 | 0.163934426 -4.1 62 | 0.166666667 -2.7 63 | 0.169398907 0.7 64 | 0.172131148 2.0 65 | 0.174863388 1.7 66 | 0.177595628 0.9 67 | 0.180327869 0.3 68 | 0.183060109 0.9 69 | 0.18579235 1.1 70 | 0.18852459 0.1 71 | 0.191256831 -0.9 72 | 0.193989071 0.2 73 | 0.196721311 0.1 74 | 0.199453552 1.0 75 | 0.202185792 3.4 76 | 0.204918033 5.2 77 | 0.207650273 4.9 78 | 0.210382514 4.9 79 | 0.213114754 2.2 80 | 0.215846995 2.9 81 | 0.218579235 5.3 82 | 0.221311475 3.7 83 | 0.224043716 3.4 84 | 0.226775956 2.1 85 | 0.229508197 1.8 86 | 0.232240437 4.3 87 | 0.234972678 7.0 88 | 0.237704918 7.7 89 | 0.240437158 6.2 90 | 0.243169399 7.5 91 | 0.245901639 4.9 92 | 0.24863388 4.4 93 | 0.25136612 3.8 94 | 0.254098361 6.4 95 | 0.256830601 8.0 96 | 0.259562842 7.9 97 | 0.262295082 8.9 98 | 0.265027322 6.6 99 | 0.267759563 6.5 100 | 0.270491803 5.8 101 | 0.273224044 5.6 102 | 0.275956284 4.7 103 | 0.278688525 5.5 104 | 0.281420765 5.5 105 | 0.284153005 5.8 106 | 0.286885246 5.3 107 | 0.289617486 6.9 108 | 0.292349727 5.9 109 | 0.295081967 6.1 110 | 0.297814208 6.6 111 | 0.300546448 6.7 112 | 0.303278689 6.5 113 | 0.306010929 7.0 114 | 0.308743169 5.8 115 | 0.31147541 3.0 116 | 0.31420765 2.5 117 | 0.316939891 2.4 118 | 0.319672131 4.3 119 | 0.322404372 2.8 120 | 0.325136612 3.6 121 | 0.327868852 6.8 122 | 0.330601093 9.1 123 | 0.333333333 8.4 124 | 0.336065574 9.3 125 | 0.338797814 13.3 126 | 0.341530055 10.6 127 | 0.344262295 10.5 128 | 0.346994536 11.8 129 | 0.349726776 14.7 130 | 0.352459016 16.2 131 | 0.355191257 16.4 132 | 0.357923497 16.9 133 | 0.360655738 12.3 134 | 0.363387978 10.2 135 | 0.366120219 11.2 136 | 0.368852459 6.1 137 | 0.371584699 6.4 138 | 0.37431694 6.1 139 | 0.37704918 10.4 140 | 0.379781421 10.3 141 | 0.382513661 11.9 142 | 0.385245902 12.9 143 | 0.387978142 12.5 144 | 0.390710383 17.5 145 | 0.393442623 19.9 146 | 0.396174863 19.3 147 | 0.398907104 11.4 148 | 0.401639344 9.7 149 | 0.404371585 10.7 150 | 0.407103825 13.0 151 | 0.409836066 12.4 152 | 0.412568306 16.3 153 | 0.415300546 19.2 154 | 0.418032787 19.2 155 | 0.420765027 19.8 156 | 0.423497268 19.5 157 | 0.426229508 16.6 158 | 0.428961749 13.0 159 | 0.431693989 12.6 160 | 0.43442623 17.6 161 | 0.43715847 13.7 162 | 0.43989071 11.3 163 | 0.442622951 10.2 164 | 0.445355191 10.2 165 | 0.448087432 11.6 166 | 0.450819672 14.2 167 | 0.453551913 14.4 168 | 0.456284153 17.4 169 | 0.459016393 13.1 170 | 0.461748634 17.4 171 | 0.464480874 15.9 172 | 0.467213115 15.9 173 | 0.469945355 15.5 174 | 0.472677596 16.4 175 | 0.475409836 16.7 176 | 0.478142077 18.2 177 | 0.480874317 20.9 178 | 0.483606557 22.2 179 | 0.486338798 19.1 180 | 0.489071038 16.3 181 | 0.491803279 16.6 182 | 0.494535519 15.1 183 | 0.49726776 14.5 184 | 0.5 17.4 185 | 0.50273224 16.5 186 | 0.505464481 13.7 187 | 0.508196721 14.0 188 | 0.510928962 14.2 189 | 0.513661202 15.6 190 | 0.516393443 15.7 191 | 0.519125683 15.6 192 | 0.521857923 16.2 193 | 0.524590164 16.3 194 | 0.527322404 18.3 195 | 0.530054645 16.6 196 | 0.532786885 16.1 197 | 0.535519126 15.9 198 | 0.538251366 16.0 199 | 0.540983607 15.9 200 | 0.543715847 16.0 201 | 0.546448087 15.7 202 | 0.549180328 17.2 203 | 0.551912568 19.9 204 | 0.554644809 21.0 205 | 0.557377049 19.4 206 | 0.56010929 20.4 207 | 0.56284153 23.1 208 | 0.56557377 23.0 209 | 0.568306011 19.9 210 | 0.571038251 17.6 211 | 0.573770492 18.8 212 | 0.576502732 17.8 213 | 0.579234973 18.6 214 | 0.581967213 16.4 215 | 0.584699454 15.2 216 | 0.587431694 15.3 217 | 0.590163934 16.0 218 | 0.592896175 18.0 219 | 0.595628415 17.7 220 | 0.598360656 16.0 221 | 0.601092896 16.4 222 | 0.603825137 16.7 223 | 0.606557377 14.3 224 | 0.609289617 12.2 225 | 0.612021858 10.0 226 | 0.614754098 12.0 227 | 0.617486339 16.2 228 | 0.620218579 15.9 229 | 0.62295082 14.5 230 | 0.62568306 15.3 231 | 0.628415301 13.3 232 | 0.631147541 14.5 233 | 0.633879781 15.5 234 | 0.636612022 15.3 235 | 0.639344262 17.3 236 | 0.642076503 15.3 237 | 0.644808743 16.4 238 | 0.647540984 17.0 239 | 0.650273224 20.2 240 | 0.653005464 22.4 241 | 0.655737705 18.1 242 | 0.658469945 11.6 243 | 0.661202186 14.6 244 | 0.663934426 13.5 245 | 0.666666667 17.9 246 | 0.669398907 16.4 247 | 0.672131148 15.5 248 | 0.674863388 15.9 249 | 0.677595628 14.1 250 | 0.680327869 13.2 251 | 0.683060109 14.5 252 | 0.68579235 19.0 253 | 0.68852459 18.3 254 | 0.691256831 18.8 255 | 0.693989071 16.8 256 | 0.696721311 16.8 257 | 0.699453552 14.3 258 | 0.702185792 18.4 259 | 0.704918033 18.3 260 | 0.707650273 18.4 261 | 0.710382514 14.9 262 | 0.713114754 11.4 263 | 0.715846995 12.6 264 | 0.718579235 14.0 265 | 0.721311475 14.8 266 | 0.724043716 9.9 267 | 0.726775956 11.4 268 | 0.729508197 12.9 269 | 0.732240437 12.1 270 | 0.734972678 12.8 271 | 0.737704918 13.5 272 | 0.740437158 12.9 273 | 0.743169399 14.0 274 | 0.745901639 14.6 275 | 0.74863388 12.0 276 | 0.75136612 10.5 277 | 0.754098361 9.5 278 | 0.756830601 7.6 279 | 0.759562842 6.4 280 | 0.762295082 7.0 281 | 0.765027322 8.1 282 | 0.767759563 8.1 283 | 0.770491803 7.6 284 | 0.773224044 7.4 285 | 0.775956284 7.2 286 | 0.778688525 7.0 287 | 0.781420765 6.4 288 | 0.784153005 5.8 289 | 0.786885246 5.5 290 | 0.789617486 6.4 291 | 0.792349727 7.3 292 | 0.795081967 7.4 293 | 0.797814208 7.8 294 | 0.800546448 7.9 295 | 0.803278689 6.9 296 | 0.806010929 6.1 297 | 0.808743169 3.7 298 | 0.81147541 5.3 299 | 0.81420765 6.1 300 | 0.816939891 4.3 301 | 0.819672131 3.3 302 | 0.822404372 8.8 303 | 0.825136612 9.8 304 | 0.827868852 6.4 305 | 0.830601093 4.6 306 | 0.833333333 5.2 307 | 0.836065574 5.5 308 | 0.838797814 1.4 309 | 0.841530055 0.5 310 | 0.844262295 -2.6 311 | 0.846994536 2.4 312 | 0.849726776 -0.8 313 | 0.852459016 -3.3 314 | 0.855191257 -2.8 315 | 0.857923497 -3.5 316 | 0.860655738 -2.8 317 | 0.863387978 -2.2 318 | 0.866120219 -0.3 319 | 0.868852459 0.0 320 | 0.871584699 2.3 321 | 0.87431694 4.9 322 | 0.87704918 3.1 323 | 0.879781421 3.6 324 | 0.882513661 5.2 325 | 0.885245902 3.8 326 | 0.887978142 3.2 327 | 0.890710383 7.7 328 | 0.893442623 7.8 329 | 0.896174863 6.9 330 | 0.898907104 2.7 331 | 0.901639344 2.8 332 | 0.904371585 6.6 333 | 0.907103825 1.9 334 | 0.909836066 -1.4 335 | 0.912568306 2.2 336 | 0.915300546 1.9 337 | 0.918032787 -1.3 338 | 0.920765027 -1.6 339 | 0.923497268 -3.2 340 | 0.926229508 -2.7 341 | 0.928961749 3.7 342 | 0.931693989 -3.2 343 | 0.93442623 -0.2 344 | 0.93715847 9.3 345 | 0.93989071 7.1 346 | 0.942622951 3.2 347 | 0.945355191 1.1 348 | 0.948087432 -6.0 349 | 0.950819672 1.7 350 | 0.953551913 -1.3 351 | 0.956284153 -2.2 352 | 0.959016393 -1.2 353 | 0.961748634 1.0 354 | 0.964480874 1.7 355 | 0.967213115 3.7 356 | 0.969945355 4.7 357 | 0.972677596 -0.3 358 | 0.975409836 3.5 359 | 0.978142077 3.4 360 | 0.980874317 3.9 361 | 0.983606557 4.5 362 | 0.986338798 5.3 363 | 0.989071038 2.7 364 | 0.991803279 -0.4 365 | 0.994535519 4.3 366 | 0.99726776 7.0 367 | 1 9.3 -------------------------------------------------------------------------------- /mlfromscratch/deep_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .neural_network import NeuralNetwork 2 | -------------------------------------------------------------------------------- /mlfromscratch/deep_learning/activation_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Collection of activation functions 4 | # Reference: https://en.wikipedia.org/wiki/Activation_function 5 | 6 | class Sigmoid(): 7 | def __call__(self, x): 8 | return 1 / (1 + np.exp(-x)) 9 | 10 | def gradient(self, x): 11 | return self.__call__(x) * (1 - self.__call__(x)) 12 | 13 | class Softmax(): 14 | def __call__(self, x): 15 | e_x = np.exp(x - np.max(x, axis=-1, keepdims=True)) 16 | return e_x / np.sum(e_x, axis=-1, keepdims=True) 17 | 18 | def gradient(self, x): 19 | p = self.__call__(x) 20 | return p * (1 - p) 21 | 22 | class TanH(): 23 | def __call__(self, x): 24 | return 2 / (1 + np.exp(-2*x)) - 1 25 | 26 | def gradient(self, x): 27 | return 1 - np.power(self.__call__(x), 2) 28 | 29 | class ReLU(): 30 | def __call__(self, x): 31 | return np.where(x >= 0, x, 0) 32 | 33 | def gradient(self, x): 34 | return np.where(x >= 0, 1, 0) 35 | 36 | class LeakyReLU(): 37 | def __init__(self, alpha=0.2): 38 | self.alpha = alpha 39 | 40 | def __call__(self, x): 41 | return np.where(x >= 0, x, self.alpha * x) 42 | 43 | def gradient(self, x): 44 | return np.where(x >= 0, 1, self.alpha) 45 | 46 | class ELU(): 47 | def __init__(self, alpha=0.1): 48 | self.alpha = alpha 49 | 50 | def __call__(self, x): 51 | return np.where(x >= 0.0, x, self.alpha * (np.exp(x) - 1)) 52 | 53 | def gradient(self, x): 54 | return np.where(x >= 0.0, 1, self.__call__(x) + self.alpha) 55 | 56 | class SELU(): 57 | # Reference : https://arxiv.org/abs/1706.02515, 58 | # https://github.com/bioinf-jku/SNNs/blob/master/SelfNormalizingNetworks_MLP_MNIST.ipynb 59 | def __init__(self): 60 | self.alpha = 1.6732632423543772848170429916717 61 | self.scale = 1.0507009873554804934193349852946 62 | 63 | def __call__(self, x): 64 | return self.scale * np.where(x >= 0.0, x, self.alpha*(np.exp(x)-1)) 65 | 66 | def gradient(self, x): 67 | return self.scale * np.where(x >= 0.0, 1, self.alpha * np.exp(x)) 68 | 69 | class SoftPlus(): 70 | def __call__(self, x): 71 | return np.log(1 + np.exp(x)) 72 | 73 | def gradient(self, x): 74 | return 1 / (1 + np.exp(-x)) 75 | 76 | -------------------------------------------------------------------------------- /mlfromscratch/deep_learning/loss_functions.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from mlfromscratch.utils import accuracy_score 4 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 5 | 6 | class Loss(object): 7 | def loss(self, y_true, y_pred): 8 | return NotImplementedError() 9 | 10 | def gradient(self, y, y_pred): 11 | raise NotImplementedError() 12 | 13 | def acc(self, y, y_pred): 14 | return 0 15 | 16 | class SquareLoss(Loss): 17 | def __init__(self): pass 18 | 19 | def loss(self, y, y_pred): 20 | return 0.5 * np.power((y - y_pred), 2) 21 | 22 | def gradient(self, y, y_pred): 23 | return -(y - y_pred) 24 | 25 | class CrossEntropy(Loss): 26 | def __init__(self): pass 27 | 28 | def loss(self, y, p): 29 | # Avoid division by zero 30 | p = np.clip(p, 1e-15, 1 - 1e-15) 31 | return - y * np.log(p) - (1 - y) * np.log(1 - p) 32 | 33 | def acc(self, y, p): 34 | return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1)) 35 | 36 | def gradient(self, y, p): 37 | # Avoid division by zero 38 | p = np.clip(p, 1e-15, 1 - 1e-15) 39 | return - (y / p) + (1 - y) / (1 - p) 40 | 41 | 42 | -------------------------------------------------------------------------------- /mlfromscratch/deep_learning/neural_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from terminaltables import AsciiTable 3 | import numpy as np 4 | import progressbar 5 | from mlfromscratch.utils import batch_iterator 6 | from mlfromscratch.utils.misc import bar_widgets 7 | 8 | 9 | class NeuralNetwork(): 10 | """Neural Network. Deep Learning base model. 11 | 12 | Parameters: 13 | ----------- 14 | optimizer: class 15 | The weight optimizer that will be used to tune the weights in order of minimizing 16 | the loss. 17 | loss: class 18 | Loss function used to measure the model's performance. SquareLoss or CrossEntropy. 19 | validation: tuple 20 | A tuple containing validation data and labels (X, y) 21 | """ 22 | def __init__(self, optimizer, loss, validation_data=None): 23 | self.optimizer = optimizer 24 | self.layers = [] 25 | self.errors = {"training": [], "validation": []} 26 | self.loss_function = loss() 27 | self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) 28 | 29 | self.val_set = None 30 | if validation_data: 31 | X, y = validation_data 32 | self.val_set = {"X": X, "y": y} 33 | 34 | def set_trainable(self, trainable): 35 | """ Method which enables freezing of the weights of the network's layers. """ 36 | for layer in self.layers: 37 | layer.trainable = trainable 38 | 39 | def add(self, layer): 40 | """ Method which adds a layer to the neural network """ 41 | # If this is not the first layer added then set the input shape 42 | # to the output shape of the last added layer 43 | if self.layers: 44 | layer.set_input_shape(shape=self.layers[-1].output_shape()) 45 | 46 | # If the layer has weights that needs to be initialized 47 | if hasattr(layer, 'initialize'): 48 | layer.initialize(optimizer=self.optimizer) 49 | 50 | # Add layer to the network 51 | self.layers.append(layer) 52 | 53 | def test_on_batch(self, X, y): 54 | """ Evaluates the model over a single batch of samples """ 55 | y_pred = self._forward_pass(X, training=False) 56 | loss = np.mean(self.loss_function.loss(y, y_pred)) 57 | acc = self.loss_function.acc(y, y_pred) 58 | 59 | return loss, acc 60 | 61 | def train_on_batch(self, X, y): 62 | """ Single gradient update over one batch of samples """ 63 | y_pred = self._forward_pass(X) 64 | loss = np.mean(self.loss_function.loss(y, y_pred)) 65 | acc = self.loss_function.acc(y, y_pred) 66 | # Calculate the gradient of the loss function wrt y_pred 67 | loss_grad = self.loss_function.gradient(y, y_pred) 68 | # Backpropagate. Update weights 69 | self._backward_pass(loss_grad=loss_grad) 70 | 71 | return loss, acc 72 | 73 | def fit(self, X, y, n_epochs, batch_size): 74 | """ Trains the model for a fixed number of epochs """ 75 | for _ in self.progressbar(range(n_epochs)): 76 | 77 | batch_error = [] 78 | for X_batch, y_batch in batch_iterator(X, y, batch_size=batch_size): 79 | loss, _ = self.train_on_batch(X_batch, y_batch) 80 | batch_error.append(loss) 81 | 82 | self.errors["training"].append(np.mean(batch_error)) 83 | 84 | if self.val_set is not None: 85 | val_loss, _ = self.test_on_batch(self.val_set["X"], self.val_set["y"]) 86 | self.errors["validation"].append(val_loss) 87 | 88 | return self.errors["training"], self.errors["validation"] 89 | 90 | def _forward_pass(self, X, training=True): 91 | """ Calculate the output of the NN """ 92 | layer_output = X 93 | for layer in self.layers: 94 | layer_output = layer.forward_pass(layer_output, training) 95 | 96 | return layer_output 97 | 98 | def _backward_pass(self, loss_grad): 99 | """ Propagate the gradient 'backwards' and update the weights in each layer """ 100 | for layer in reversed(self.layers): 101 | loss_grad = layer.backward_pass(loss_grad) 102 | 103 | def summary(self, name="Model Summary"): 104 | # Print model name 105 | print (AsciiTable([[name]]).table) 106 | # Network input shape (first layer's input shape) 107 | print ("Input Shape: %s" % str(self.layers[0].input_shape)) 108 | # Iterate through network and get each layer's configuration 109 | table_data = [["Layer Type", "Parameters", "Output Shape"]] 110 | tot_params = 0 111 | for layer in self.layers: 112 | layer_name = layer.layer_name() 113 | params = layer.parameters() 114 | out_shape = layer.output_shape() 115 | table_data.append([layer_name, str(params), str(out_shape)]) 116 | tot_params += params 117 | # Print network configuration table 118 | print (AsciiTable(table_data).table) 119 | print ("Total Parameters: %d\n" % tot_params) 120 | 121 | def predict(self, X): 122 | """ Use the trained model to predict labels of X """ 123 | return self._forward_pass(X, training=False) 124 | -------------------------------------------------------------------------------- /mlfromscratch/deep_learning/optimizers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mlfromscratch.utils import make_diagonal, normalize 3 | 4 | # Optimizers for models that use gradient based methods for finding the 5 | # weights that minimizes the loss. 6 | # A great resource for understanding these methods: 7 | # http://sebastianruder.com/optimizing-gradient-descent/index.html 8 | 9 | class StochasticGradientDescent(): 10 | def __init__(self, learning_rate=0.01, momentum=0): 11 | self.learning_rate = learning_rate 12 | self.momentum = momentum 13 | self.w_updt = None 14 | 15 | def update(self, w, grad_wrt_w): 16 | # If not initialized 17 | if self.w_updt is None: 18 | self.w_updt = np.zeros(np.shape(w)) 19 | # Use momentum if set 20 | self.w_updt = self.momentum * self.w_updt + (1 - self.momentum) * grad_wrt_w 21 | # Move against the gradient to minimize loss 22 | return w - self.learning_rate * self.w_updt 23 | 24 | class NesterovAcceleratedGradient(): 25 | def __init__(self, learning_rate=0.001, momentum=0.4): 26 | self.learning_rate = learning_rate 27 | self.momentum = momentum 28 | self.w_updt = np.array([]) 29 | 30 | def update(self, w, grad_func): 31 | # Calculate the gradient of the loss a bit further down the slope from w 32 | approx_future_grad = np.clip(grad_func(w - self.momentum * self.w_updt), -1, 1) 33 | # Initialize on first update 34 | if not self.w_updt.any(): 35 | self.w_updt = np.zeros(np.shape(w)) 36 | 37 | self.w_updt = self.momentum * self.w_updt + self.learning_rate * approx_future_grad 38 | # Move against the gradient to minimize loss 39 | return w - self.w_updt 40 | 41 | class Adagrad(): 42 | def __init__(self, learning_rate=0.01): 43 | self.learning_rate = learning_rate 44 | self.G = None # Sum of squares of the gradients 45 | self.eps = 1e-8 46 | 47 | def update(self, w, grad_wrt_w): 48 | # If not initialized 49 | if self.G is None: 50 | self.G = np.zeros(np.shape(w)) 51 | # Add the square of the gradient of the loss function at w 52 | self.G += np.power(grad_wrt_w, 2) 53 | # Adaptive gradient with higher learning rate for sparse data 54 | return w - self.learning_rate * grad_wrt_w / np.sqrt(self.G + self.eps) 55 | 56 | class Adadelta(): 57 | def __init__(self, rho=0.95, eps=1e-6): 58 | self.E_w_updt = None # Running average of squared parameter updates 59 | self.E_grad = None # Running average of the squared gradient of w 60 | self.w_updt = None # Parameter update 61 | self.eps = eps 62 | self.rho = rho 63 | 64 | def update(self, w, grad_wrt_w): 65 | # If not initialized 66 | if self.w_updt is None: 67 | self.w_updt = np.zeros(np.shape(w)) 68 | self.E_w_updt = np.zeros(np.shape(w)) 69 | self.E_grad = np.zeros(np.shape(grad_wrt_w)) 70 | 71 | # Update average of gradients at w 72 | self.E_grad = self.rho * self.E_grad + (1 - self.rho) * np.power(grad_wrt_w, 2) 73 | 74 | RMS_delta_w = np.sqrt(self.E_w_updt + self.eps) 75 | RMS_grad = np.sqrt(self.E_grad + self.eps) 76 | 77 | # Adaptive learning rate 78 | adaptive_lr = RMS_delta_w / RMS_grad 79 | 80 | # Calculate the update 81 | self.w_updt = adaptive_lr * grad_wrt_w 82 | 83 | # Update the running average of w updates 84 | self.E_w_updt = self.rho * self.E_w_updt + (1 - self.rho) * np.power(self.w_updt, 2) 85 | 86 | return w - self.w_updt 87 | 88 | class RMSprop(): 89 | def __init__(self, learning_rate=0.01, rho=0.9): 90 | self.learning_rate = learning_rate 91 | self.Eg = None # Running average of the square gradients at w 92 | self.eps = 1e-8 93 | self.rho = rho 94 | 95 | def update(self, w, grad_wrt_w): 96 | # If not initialized 97 | if self.Eg is None: 98 | self.Eg = np.zeros(np.shape(grad_wrt_w)) 99 | 100 | self.Eg = self.rho * self.Eg + (1 - self.rho) * np.power(grad_wrt_w, 2) 101 | 102 | # Divide the learning rate for a weight by a running average of the magnitudes of recent 103 | # gradients for that weight 104 | return w - self.learning_rate * grad_wrt_w / np.sqrt(self.Eg + self.eps) 105 | 106 | class Adam(): 107 | def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999): 108 | self.learning_rate = learning_rate 109 | self.eps = 1e-8 110 | self.m = None 111 | self.v = None 112 | # Decay rates 113 | self.b1 = b1 114 | self.b2 = b2 115 | 116 | def update(self, w, grad_wrt_w): 117 | # If not initialized 118 | if self.m is None: 119 | self.m = np.zeros(np.shape(grad_wrt_w)) 120 | self.v = np.zeros(np.shape(grad_wrt_w)) 121 | 122 | self.m = self.b1 * self.m + (1 - self.b1) * grad_wrt_w 123 | self.v = self.b2 * self.v + (1 - self.b2) * np.power(grad_wrt_w, 2) 124 | 125 | m_hat = self.m / (1 - self.b1) 126 | v_hat = self.v / (1 - self.b2) 127 | 128 | self.w_updt = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.eps) 129 | 130 | return w - self.w_updt 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /mlfromscratch/examples/adaboost.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | 5 | # Import helper functions 6 | from mlfromscratch.supervised_learning import Adaboost 7 | from mlfromscratch.utils.data_manipulation import train_test_split 8 | from mlfromscratch.utils.data_operation import accuracy_score 9 | from mlfromscratch.utils import Plot 10 | 11 | def main(): 12 | data = datasets.load_digits() 13 | X = data.data 14 | y = data.target 15 | 16 | digit1 = 1 17 | digit2 = 8 18 | idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) 19 | y = data.target[idx] 20 | # Change labels to {-1, 1} 21 | y[y == digit1] = -1 22 | y[y == digit2] = 1 23 | X = data.data[idx] 24 | 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) 26 | 27 | # Adaboost classification with 5 weak classifiers 28 | clf = Adaboost(n_clf=5) 29 | clf.fit(X_train, y_train) 30 | y_pred = clf.predict(X_test) 31 | 32 | accuracy = accuracy_score(y_test, y_pred) 33 | print ("Accuracy:", accuracy) 34 | 35 | # Reduce dimensions to 2d using pca and plot the results 36 | Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/apriori.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | 4 | from mlfromscratch.unsupervised_learning import Apriori 5 | 6 | def main(): 7 | # Demo transaction set 8 | # Example 2: https://en.wikipedia.org/wiki/Apriori_algorithm 9 | transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]]) 10 | print ("+-------------+") 11 | print ("| Apriori |") 12 | print ("+-------------+") 13 | min_sup = 0.25 14 | min_conf = 0.8 15 | print ("Minimum Support: %.2f" % (min_sup)) 16 | print ("Minimum Confidence: %s" % (min_conf)) 17 | print ("Transactions:") 18 | for transaction in transactions: 19 | print ("\t%s" % transaction) 20 | 21 | apriori = Apriori(min_sup=min_sup, min_conf=min_conf) 22 | 23 | # Get and print the frequent itemsets 24 | frequent_itemsets = apriori.find_frequent_itemsets(transactions) 25 | print ("Frequent Itemsets:\n\t%s" % frequent_itemsets) 26 | 27 | # Get and print the rules 28 | rules = apriori.generate_rules(transactions) 29 | print ("Rules:") 30 | for rule in rules: 31 | print ("\t%s -> %s (support: %.2f, confidence: %s)" % (rule.antecedent, rule.concequent, rule.support, rule.confidence,)) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/bayesian_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | # Import helper functions 6 | from mlfromscratch.utils.data_operation import mean_squared_error 7 | from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features 8 | from mlfromscratch.supervised_learning import BayesianRegression 9 | 10 | def main(): 11 | 12 | # Load temperature data 13 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 14 | 15 | time = np.atleast_2d(data["time"].values).T 16 | temp = np.atleast_2d(data["temp"].values).T 17 | 18 | X = time # fraction of the year [0, 1] 19 | y = temp 20 | 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 22 | 23 | n_samples, n_features = np.shape(X) 24 | 25 | # Prior parameters 26 | # - Weights are assumed distr. according to a Normal distribution 27 | # - The variance of the weights are assumed distributed according to 28 | # a scaled inverse chi-squared distribution. 29 | # High prior uncertainty! 30 | # Normal 31 | mu0 = np.array([0] * n_features) 32 | omega0 = np.diag([.0001] * n_features) 33 | # Scaled inverse chi-squared 34 | nu0 = 1 35 | sigma_sq0 = 100 36 | 37 | # The credible interval 38 | cred_int = 10 39 | 40 | clf = BayesianRegression(n_draws=2000, 41 | poly_degree=4, 42 | mu0=mu0, 43 | omega0=omega0, 44 | nu0=nu0, 45 | sigma_sq0=sigma_sq0, 46 | cred_int=cred_int) 47 | clf.fit(X_train, y_train) 48 | y_pred = clf.predict(X_test) 49 | 50 | mse = mean_squared_error(y_test, y_pred) 51 | 52 | # Get prediction line 53 | y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True) 54 | 55 | # Print the mean squared error 56 | print ("Mean Squared Error:", mse) 57 | 58 | # Color map 59 | cmap = plt.get_cmap('viridis') 60 | 61 | # Plot the results 62 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 63 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 64 | p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") 65 | p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int)) 66 | p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) 67 | plt.axis((0, 366, -20, 25)) 68 | plt.suptitle("Bayesian Regression") 69 | plt.title("MSE: %.2f" % mse, fontsize=10) 70 | plt.xlabel('Day') 71 | plt.ylabel('Temperature in Celcius') 72 | plt.legend(loc='lower right') 73 | # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 74 | plt.legend(loc='lower right') 75 | 76 | plt.show() 77 | 78 | if __name__ == "__main__": 79 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/convolutional_neural_network.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import math 6 | import numpy as np 7 | 8 | # Import helper functions 9 | from mlfromscratch.deep_learning import NeuralNetwork 10 | from mlfromscratch.utils import train_test_split, to_categorical, normalize 11 | from mlfromscratch.utils import get_random_subsets, shuffle_data, Plot 12 | from mlfromscratch.utils.data_operation import accuracy_score 13 | from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta 14 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 15 | from mlfromscratch.utils.misc import bar_widgets 16 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D 17 | from mlfromscratch.deep_learning.layers import AveragePooling2D, ZeroPadding2D, BatchNormalization, RNN 18 | 19 | 20 | 21 | def main(): 22 | 23 | #---------- 24 | # Conv Net 25 | #---------- 26 | 27 | optimizer = Adam() 28 | 29 | data = datasets.load_digits() 30 | X = data.data 31 | y = data.target 32 | 33 | # Convert to one-hot encoding 34 | y = to_categorical(y.astype("int")) 35 | 36 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) 37 | 38 | # Reshape X to (n_samples, channels, height, width) 39 | X_train = X_train.reshape((-1,1,8,8)) 40 | X_test = X_test.reshape((-1,1,8,8)) 41 | 42 | clf = NeuralNetwork(optimizer=optimizer, 43 | loss=CrossEntropy, 44 | validation_data=(X_test, y_test)) 45 | 46 | clf.add(Conv2D(n_filters=16, filter_shape=(3,3), stride=1, input_shape=(1,8,8), padding='same')) 47 | clf.add(Activation('relu')) 48 | clf.add(Dropout(0.25)) 49 | clf.add(BatchNormalization()) 50 | clf.add(Conv2D(n_filters=32, filter_shape=(3,3), stride=1, padding='same')) 51 | clf.add(Activation('relu')) 52 | clf.add(Dropout(0.25)) 53 | clf.add(BatchNormalization()) 54 | clf.add(Flatten()) 55 | clf.add(Dense(256)) 56 | clf.add(Activation('relu')) 57 | clf.add(Dropout(0.4)) 58 | clf.add(BatchNormalization()) 59 | clf.add(Dense(10)) 60 | clf.add(Activation('softmax')) 61 | 62 | print () 63 | clf.summary(name="ConvNet") 64 | 65 | train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) 66 | 67 | # Training and validation error plot 68 | n = len(train_err) 69 | training, = plt.plot(range(n), train_err, label="Training Error") 70 | validation, = plt.plot(range(n), val_err, label="Validation Error") 71 | plt.legend(handles=[training, validation]) 72 | plt.title("Error Plot") 73 | plt.ylabel('Error') 74 | plt.xlabel('Iterations') 75 | plt.show() 76 | 77 | _, accuracy = clf.test_on_batch(X_test, y_test) 78 | print ("Accuracy:", accuracy) 79 | 80 | 81 | y_pred = np.argmax(clf.predict(X_test), axis=1) 82 | X_test = X_test.reshape(-1, 8*8) 83 | # Reduce dimension to 2D using PCA and plot the results 84 | Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10)) 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /mlfromscratch/examples/dbscan.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import math 4 | import random 5 | from sklearn import datasets 6 | import numpy as np 7 | 8 | # Import helper functions 9 | from mlfromscratch.utils import Plot 10 | from mlfromscratch.unsupervised_learning import DBSCAN 11 | 12 | def main(): 13 | # Load the dataset 14 | X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False) 15 | 16 | # Cluster the data using DBSCAN 17 | clf = DBSCAN(eps=0.17, min_samples=5) 18 | y_pred = clf.predict(X) 19 | 20 | # Project the data onto the 2 primary principal components 21 | p = Plot() 22 | p.plot_in_2d(X, y_pred, title="DBSCAN") 23 | p.plot_in_2d(X, y, title="Actual Clustering") 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /mlfromscratch/examples/decision_tree_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import sys 6 | import os 7 | 8 | # Import helper functions 9 | from mlfromscratch.utils import train_test_split, standardize, accuracy_score 10 | from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot 11 | from mlfromscratch.supervised_learning import ClassificationTree 12 | 13 | def main(): 14 | 15 | print ("-- Classification Tree --") 16 | 17 | data = datasets.load_iris() 18 | X = data.data 19 | y = data.target 20 | 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 22 | 23 | clf = ClassificationTree() 24 | clf.fit(X_train, y_train) 25 | y_pred = clf.predict(X_test) 26 | 27 | accuracy = accuracy_score(y_test, y_pred) 28 | 29 | print ("Accuracy:", accuracy) 30 | 31 | Plot().plot_in_2d(X_test, y_pred, 32 | title="Decision Tree", 33 | accuracy=accuracy, 34 | legend_labels=data.target_names) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/decision_tree_regressor.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | 6 | from mlfromscratch.utils import train_test_split, standardize, accuracy_score 7 | from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot 8 | from mlfromscratch.supervised_learning import RegressionTree 9 | 10 | def main(): 11 | 12 | print ("-- Regression Tree --") 13 | 14 | # Load temperature data 15 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 16 | 17 | time = np.atleast_2d(data["time"].values).T 18 | temp = np.atleast_2d(data["temp"].values).T 19 | 20 | X = standardize(time) # Time. Fraction of the year [0, 1] 21 | y = temp[:, 0] # Temperature. Reduce to one-dim 22 | 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 24 | 25 | model = RegressionTree() 26 | model.fit(X_train, y_train) 27 | y_pred = model.predict(X_test) 28 | 29 | y_pred_line = model.predict(X) 30 | 31 | # Color map 32 | cmap = plt.get_cmap('viridis') 33 | 34 | mse = mean_squared_error(y_test, y_pred) 35 | 36 | print ("Mean Squared Error:", mse) 37 | 38 | # Plot the results 39 | # Plot the results 40 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 41 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 42 | m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) 43 | plt.suptitle("Regression Tree") 44 | plt.title("MSE: %.2f" % mse, fontsize=10) 45 | plt.xlabel('Day') 46 | plt.ylabel('Temperature in Celcius') 47 | plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') 48 | plt.show() 49 | 50 | 51 | if __name__ == "__main__": 52 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/deep_q_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from mlfromscratch.utils import to_categorical 4 | from mlfromscratch.deep_learning.optimizers import Adam 5 | from mlfromscratch.deep_learning.loss_functions import SquareLoss 6 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization 7 | from mlfromscratch.deep_learning import NeuralNetwork 8 | from mlfromscratch.reinforcement_learning import DeepQNetwork 9 | 10 | 11 | def main(): 12 | dqn = DeepQNetwork(env_name='CartPole-v1', 13 | epsilon=0.9, 14 | gamma=0.8, 15 | decay_rate=0.005, 16 | min_epsilon=0.1) 17 | 18 | # Model builder 19 | def model(n_inputs, n_outputs): 20 | clf = NeuralNetwork(optimizer=Adam(), loss=SquareLoss) 21 | clf.add(Dense(64, input_shape=(n_inputs,))) 22 | clf.add(Activation('relu')) 23 | clf.add(Dense(n_outputs)) 24 | return clf 25 | 26 | dqn.set_model(model) 27 | 28 | print () 29 | dqn.model.summary(name="Deep Q-Network") 30 | 31 | dqn.train(n_epochs=500) 32 | dqn.play(n_epochs=100) 33 | 34 | if __name__ == "__main__": 35 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | import math 5 | import matplotlib.pyplot as plt 6 | 7 | from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score 8 | from mlfromscratch.deep_learning.optimizers import Adam 9 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 10 | from mlfromscratch.deep_learning.activation_functions import Softmax 11 | from mlfromscratch.utils.kernels import * 12 | from mlfromscratch.supervised_learning import * 13 | from mlfromscratch.deep_learning import * 14 | from mlfromscratch.unsupervised_learning import PCA 15 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation 16 | 17 | 18 | print ("+-------------------------------------------+") 19 | print ("| |") 20 | print ("| Machine Learning From Scratch |") 21 | print ("| |") 22 | print ("+-------------------------------------------+") 23 | 24 | 25 | # ........... 26 | # LOAD DATA 27 | # ........... 28 | data = datasets.load_digits() 29 | digit1 = 1 30 | digit2 = 8 31 | idx = np.append(np.where(data.target == digit1)[0], np.where(data.target == digit2)[0]) 32 | y = data.target[idx] 33 | # Change labels to {0, 1} 34 | y[y == digit1] = 0 35 | y[y == digit2] = 1 36 | X = data.data[idx] 37 | X = normalize(X) 38 | 39 | print ("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2)) 40 | 41 | # .......................... 42 | # DIMENSIONALITY REDUCTION 43 | # .......................... 44 | pca = PCA() 45 | X = pca.transform(X, n_components=5) # Reduce to 5 dimensions 46 | 47 | n_samples, n_features = np.shape(X) 48 | 49 | # .......................... 50 | # TRAIN / TEST SPLIT 51 | # .......................... 52 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) 53 | # Rescaled labels {-1, 1} 54 | rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) 55 | rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) 56 | 57 | # ....... 58 | # SETUP 59 | # ....... 60 | adaboost = Adaboost(n_clf = 8) 61 | naive_bayes = NaiveBayes() 62 | knn = KNN(k=4) 63 | logistic_regression = LogisticRegression() 64 | mlp = NeuralNetwork(optimizer=Adam(), 65 | loss=CrossEntropy) 66 | mlp.add(Dense(input_shape=(n_features,), n_units=64)) 67 | mlp.add(Activation('relu')) 68 | mlp.add(Dense(n_units=64)) 69 | mlp.add(Activation('relu')) 70 | mlp.add(Dense(n_units=2)) 71 | mlp.add(Activation('softmax')) 72 | perceptron = Perceptron() 73 | decision_tree = ClassificationTree() 74 | random_forest = RandomForest(n_estimators=50) 75 | support_vector_machine = SupportVectorMachine() 76 | lda = LDA() 77 | gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2) 78 | xgboost = XGBoost(n_estimators=50, learning_rate=0.5) 79 | 80 | # ........ 81 | # TRAIN 82 | # ........ 83 | print ("Training:") 84 | print ("- Adaboost") 85 | adaboost.fit(X_train, rescaled_y_train) 86 | print ("- Decision Tree") 87 | decision_tree.fit(X_train, y_train) 88 | print ("- Gradient Boosting") 89 | gbc.fit(X_train, y_train) 90 | print ("- LDA") 91 | lda.fit(X_train, y_train) 92 | print ("- Logistic Regression") 93 | logistic_regression.fit(X_train, y_train) 94 | print ("- Multilayer Perceptron") 95 | mlp.fit(X_train, to_categorical(y_train), n_epochs=300, batch_size=50) 96 | print ("- Naive Bayes") 97 | naive_bayes.fit(X_train, y_train) 98 | print ("- Perceptron") 99 | perceptron.fit(X_train, to_categorical(y_train)) 100 | print ("- Random Forest") 101 | random_forest.fit(X_train, y_train) 102 | print ("- Support Vector Machine") 103 | support_vector_machine.fit(X_train, rescaled_y_train) 104 | print ("- XGBoost") 105 | xgboost.fit(X_train, y_train) 106 | 107 | 108 | 109 | # ......... 110 | # PREDICT 111 | # ......... 112 | y_pred = {} 113 | y_pred["Adaboost"] = adaboost.predict(X_test) 114 | y_pred["Gradient Boosting"] = gbc.predict(X_test) 115 | y_pred["Naive Bayes"] = naive_bayes.predict(X_test) 116 | y_pred["K Nearest Neighbors"] = knn.predict(X_test, X_train, y_train) 117 | y_pred["Logistic Regression"] = logistic_regression.predict(X_test) 118 | y_pred["LDA"] = lda.predict(X_test) 119 | y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1) 120 | y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1) 121 | y_pred["Decision Tree"] = decision_tree.predict(X_test) 122 | y_pred["Random Forest"] = random_forest.predict(X_test) 123 | y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test) 124 | y_pred["XGBoost"] = xgboost.predict(X_test) 125 | 126 | # .......... 127 | # ACCURACY 128 | # .......... 129 | print ("Accuracy:") 130 | for clf in y_pred: 131 | # Rescaled {-1 1} 132 | if clf == "Adaboost" or clf == "Support Vector Machine": 133 | print ("\t%-23s: %.5f" %(clf, accuracy_score(rescaled_y_test, y_pred[clf]))) 134 | # Categorical 135 | else: 136 | print ("\t%-23s: %.5f" %(clf, accuracy_score(y_test, y_pred[clf]))) 137 | 138 | # ....... 139 | # PLOT 140 | # ....... 141 | plt.scatter(X_test[:,0], X_test[:,1], c=y_test) 142 | plt.ylabel("Principal Component 2") 143 | plt.xlabel("Principal Component 1") 144 | plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2)) 145 | plt.show() 146 | 147 | 148 | -------------------------------------------------------------------------------- /mlfromscratch/examples/elastic_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | # Import helper functions 6 | from mlfromscratch.supervised_learning import ElasticNet 7 | from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error 8 | from mlfromscratch.utils import train_test_split, polynomial_features, Plot 9 | 10 | 11 | def main(): 12 | 13 | # Load temperature data 14 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 15 | 16 | time = np.atleast_2d(data["time"].values).T 17 | temp = data["temp"].values 18 | 19 | X = time # fraction of the year [0, 1] 20 | y = temp 21 | 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 23 | 24 | poly_degree = 13 25 | 26 | model = ElasticNet(degree=15, 27 | reg_factor=0.01, 28 | l1_ratio=0.7, 29 | learning_rate=0.001, 30 | n_iterations=4000) 31 | model.fit(X_train, y_train) 32 | 33 | # Training error plot 34 | n = len(model.training_errors) 35 | training, = plt.plot(range(n), model.training_errors, label="Training Error") 36 | plt.legend(handles=[training]) 37 | plt.title("Error Plot") 38 | plt.ylabel('Mean Squared Error') 39 | plt.xlabel('Iterations') 40 | plt.show() 41 | 42 | y_pred = model.predict(X_test) 43 | mse = mean_squared_error(y_test, y_pred) 44 | print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) 45 | 46 | y_pred_line = model.predict(X) 47 | 48 | # Color map 49 | cmap = plt.get_cmap('viridis') 50 | 51 | # Plot the results 52 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 53 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 54 | plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") 55 | plt.suptitle("Elastic Net") 56 | plt.title("MSE: %.2f" % mse, fontsize=10) 57 | plt.xlabel('Day') 58 | plt.ylabel('Temperature in Celcius') 59 | plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 60 | plt.show() 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /mlfromscratch/examples/fp_growth.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from mlfromscratch.unsupervised_learning import FPGrowth 4 | 5 | def main(): 6 | # Demo transaction set 7 | # Example: 8 | # https://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Frequent_Pattern_Mining/The_FP-Growth_Algorithm 9 | 10 | transactions = np.array([ 11 | ["A", "B", "D", "E"], 12 | ["B", "C", "E"], 13 | ["A", "B", "D", "E"], 14 | ["A", "B", "C", "E"], 15 | ["A", "B", "C", "D", "E"], 16 | ["B", "C", "D"] 17 | ]) 18 | 19 | print ("") 20 | print ("+---------------+") 21 | print ("| FP-Growth |") 22 | print ("+---------------+") 23 | min_sup = 3 24 | print ("Minimum Support: %s" % min_sup) 25 | print ("") 26 | print ("Transactions:") 27 | for transaction in transactions: 28 | print ("\t%s" % transaction) 29 | 30 | fp_growth = FPGrowth(min_sup=min_sup) 31 | 32 | print ("") 33 | # Get and print the frequent itemsets 34 | frequent_itemsets = fp_growth.find_frequent_itemsets( 35 | transactions, show_tree=True) 36 | 37 | print ("") 38 | print ("Frequent itemsets:") 39 | for itemset in frequent_itemsets: 40 | print ("\t%s" % itemset) 41 | print ("") 42 | 43 | if __name__ == "__main__": 44 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/gaussian_mixture_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import sys 3 | import os 4 | import math 5 | import random 6 | from sklearn import datasets 7 | import numpy as np 8 | 9 | from mlfromscratch.unsupervised_learning import GaussianMixtureModel 10 | from mlfromscratch.utils import Plot 11 | 12 | 13 | def main(): 14 | # Load the dataset 15 | X, y = datasets.make_blobs() 16 | 17 | # Cluster the data 18 | clf = GaussianMixtureModel(k=3) 19 | y_pred = clf.predict(X) 20 | 21 | p = Plot() 22 | p.plot_in_2d(X, y_pred, title="GMM Clustering") 23 | p.plot_in_2d(X, y, title="Actual Clustering") 24 | 25 | 26 | if __name__ == "__main__": 27 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/genetic_algorithm.py: -------------------------------------------------------------------------------- 1 | 2 | from mlfromscratch.unsupervised_learning import GeneticAlgorithm 3 | 4 | def main(): 5 | target_string = "Genetic Algorithm" 6 | population_size = 100 7 | mutation_rate = 0.05 8 | genetic_algorithm = GeneticAlgorithm(target_string, 9 | population_size, 10 | mutation_rate) 11 | 12 | print ("") 13 | print ("+--------+") 14 | print ("| GA |") 15 | print ("+--------+") 16 | print ("Description: Implementation of a Genetic Algorithm which aims to produce") 17 | print ("the user specified target string. This implementation calculates each") 18 | print ("candidate's fitness based on the alphabetical distance between the candidate") 19 | print ("and the target. A candidate is selected as a parent with probabilities proportional") 20 | print ("to the candidate's fitness. Reproduction is implemented as a single-point") 21 | print ("crossover between pairs of parents. Mutation is done by randomly assigning") 22 | print ("new characters with uniform probability.") 23 | print ("") 24 | print ("Parameters") 25 | print ("----------") 26 | print ("Target String: '%s'" % target_string) 27 | print ("Population Size: %d" % population_size) 28 | print ("Mutation Rate: %s" % mutation_rate) 29 | print ("") 30 | 31 | genetic_algorithm.run(iterations=1000) 32 | 33 | if __name__ == "__main__": 34 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/gradient_boosting_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | 6 | # Import helper functions 7 | from mlfromscratch.utils import train_test_split, accuracy_score 8 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 9 | from mlfromscratch.utils import Plot 10 | from mlfromscratch.supervised_learning import GradientBoostingClassifier 11 | 12 | def main(): 13 | 14 | print ("-- Gradient Boosting Classification --") 15 | 16 | data = datasets.load_iris() 17 | X = data.data 18 | y = data.target 19 | 20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 21 | 22 | clf = GradientBoostingClassifier() 23 | clf.fit(X_train, y_train) 24 | y_pred = clf.predict(X_test) 25 | 26 | accuracy = accuracy_score(y_test, y_pred) 27 | 28 | print ("Accuracy:", accuracy) 29 | 30 | 31 | Plot().plot_in_2d(X_test, y_pred, 32 | title="Gradient Boosting", 33 | accuracy=accuracy, 34 | legend_labels=data.target_names) 35 | 36 | 37 | 38 | if __name__ == "__main__": 39 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/gradient_boosting_regressor.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import progressbar 6 | 7 | from mlfromscratch.utils import train_test_split, standardize, to_categorical 8 | from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot 9 | from mlfromscratch.utils.loss_functions import SquareLoss 10 | from mlfromscratch.utils.misc import bar_widgets 11 | from mlfromscratch.supervised_learning import GradientBoostingRegressor 12 | 13 | 14 | def main(): 15 | print ("-- Gradient Boosting Regression --") 16 | 17 | # Load temperature data 18 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 19 | 20 | time = np.atleast_2d(data["time"].values).T 21 | temp = np.atleast_2d(data["temp"].values).T 22 | 23 | X = time.reshape((-1, 1)) # Time. Fraction of the year [0, 1] 24 | X = np.insert(X, 0, values=1, axis=1) # Insert bias term 25 | y = temp[:, 0] # Temperature. Reduce to one-dim 26 | 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) 28 | 29 | model = GradientBoostingRegressor() 30 | model.fit(X_train, y_train) 31 | y_pred = model.predict(X_test) 32 | 33 | y_pred_line = model.predict(X) 34 | 35 | # Color map 36 | cmap = plt.get_cmap('viridis') 37 | 38 | mse = mean_squared_error(y_test, y_pred) 39 | 40 | print ("Mean Squared Error:", mse) 41 | 42 | # Plot the results 43 | m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) 44 | m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) 45 | m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) 46 | plt.suptitle("Regression Tree") 47 | plt.title("MSE: %.2f" % mse, fontsize=10) 48 | plt.xlabel('Day') 49 | plt.ylabel('Temperature in Celcius') 50 | plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') 51 | plt.show() 52 | 53 | 54 | if __name__ == "__main__": 55 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/k_means.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | 5 | from mlfromscratch.unsupervised_learning import KMeans 6 | from mlfromscratch.utils import Plot 7 | 8 | 9 | def main(): 10 | # Load the dataset 11 | X, y = datasets.make_blobs() 12 | 13 | # Cluster the data using K-Means 14 | clf = KMeans(k=3) 15 | y_pred = clf.predict(X) 16 | 17 | # Project the data onto the 2 primary principal components 18 | p = Plot() 19 | p.plot_in_2d(X, y_pred, title="K-Means Clustering") 20 | p.plot_in_2d(X, y, title="Actual Clustering") 21 | 22 | 23 | 24 | if __name__ == "__main__": 25 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/k_nearest_neighbors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn import datasets 5 | 6 | from mlfromscratch.utils import train_test_split, normalize, accuracy_score 7 | from mlfromscratch.utils import euclidean_distance, Plot 8 | from mlfromscratch.supervised_learning import KNN 9 | 10 | def main(): 11 | data = datasets.load_iris() 12 | X = normalize(data.data) 13 | y = data.target 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) 15 | 16 | clf = KNN(k=5) 17 | y_pred = clf.predict(X_test, X_train, y_train) 18 | 19 | accuracy = accuracy_score(y_test, y_pred) 20 | 21 | print ("Accuracy:", accuracy) 22 | 23 | # Reduce dimensions to 2d using pca and plot the results 24 | Plot().plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names) 25 | 26 | 27 | if __name__ == "__main__": 28 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/lasso_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | # Import helper functions 6 | from mlfromscratch.supervised_learning import LassoRegression 7 | from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error 8 | from mlfromscratch.utils import train_test_split, polynomial_features, Plot 9 | 10 | 11 | def main(): 12 | 13 | # Load temperature data 14 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 15 | 16 | time = np.atleast_2d(data["time"].values).T 17 | temp = data["temp"].values 18 | 19 | X = time # fraction of the year [0, 1] 20 | y = temp 21 | 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 23 | 24 | poly_degree = 13 25 | 26 | model = LassoRegression(degree=15, 27 | reg_factor=0.05, 28 | learning_rate=0.001, 29 | n_iterations=4000) 30 | model.fit(X_train, y_train) 31 | 32 | # Training error plot 33 | n = len(model.training_errors) 34 | training, = plt.plot(range(n), model.training_errors, label="Training Error") 35 | plt.legend(handles=[training]) 36 | plt.title("Error Plot") 37 | plt.ylabel('Mean Squared Error') 38 | plt.xlabel('Iterations') 39 | plt.show() 40 | 41 | y_pred = model.predict(X_test) 42 | mse = mean_squared_error(y_test, y_pred) 43 | print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) 44 | 45 | y_pred_line = model.predict(X) 46 | 47 | # Color map 48 | cmap = plt.get_cmap('viridis') 49 | 50 | # Plot the results 51 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 52 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 53 | plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") 54 | plt.suptitle("Lasso Regression") 55 | plt.title("MSE: %.2f" % mse, fontsize=10) 56 | plt.xlabel('Day') 57 | plt.ylabel('Temperature in Celcius') 58 | plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 59 | plt.show() 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /mlfromscratch/examples/linear_discriminant_analysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn import datasets 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from mlfromscratch.supervised_learning import LDA 7 | from mlfromscratch.utils import calculate_covariance_matrix, accuracy_score 8 | from mlfromscratch.utils import normalize, standardize, train_test_split, Plot 9 | from mlfromscratch.unsupervised_learning import PCA 10 | 11 | def main(): 12 | # Load the dataset 13 | data = datasets.load_iris() 14 | X = data.data 15 | y = data.target 16 | 17 | # Three -> two classes 18 | X = X[y != 2] 19 | y = y[y != 2] 20 | 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) 22 | 23 | # Fit and predict using LDA 24 | lda = LDA() 25 | lda.fit(X_train, y_train) 26 | y_pred = lda.predict(X_test) 27 | 28 | accuracy = accuracy_score(y_test, y_pred) 29 | 30 | print ("Accuracy:", accuracy) 31 | 32 | Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy) 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /mlfromscratch/examples/linear_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | from sklearn.datasets import make_regression 5 | 6 | from mlfromscratch.utils import train_test_split, polynomial_features 7 | from mlfromscratch.utils import mean_squared_error, Plot 8 | from mlfromscratch.supervised_learning import LinearRegression 9 | 10 | def main(): 11 | 12 | X, y = make_regression(n_samples=100, n_features=1, noise=20) 13 | 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 15 | 16 | n_samples, n_features = np.shape(X) 17 | 18 | model = LinearRegression(n_iterations=100) 19 | 20 | model.fit(X_train, y_train) 21 | 22 | # Training error plot 23 | n = len(model.training_errors) 24 | training, = plt.plot(range(n), model.training_errors, label="Training Error") 25 | plt.legend(handles=[training]) 26 | plt.title("Error Plot") 27 | plt.ylabel('Mean Squared Error') 28 | plt.xlabel('Iterations') 29 | plt.show() 30 | 31 | y_pred = model.predict(X_test) 32 | mse = mean_squared_error(y_test, y_pred) 33 | print ("Mean squared error: %s" % (mse)) 34 | 35 | y_pred_line = model.predict(X) 36 | 37 | # Color map 38 | cmap = plt.get_cmap('viridis') 39 | 40 | # Plot the results 41 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 42 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 43 | plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") 44 | plt.suptitle("Linear Regression") 45 | plt.title("MSE: %.2f" % mse, fontsize=10) 46 | plt.xlabel('Day') 47 | plt.ylabel('Temperature in Celcius') 48 | plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 49 | plt.show() 50 | 51 | if __name__ == "__main__": 52 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/logistic_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | # Import helper functions 7 | from mlfromscratch.utils import make_diagonal, normalize, train_test_split, accuracy_score 8 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 9 | from mlfromscratch.utils import Plot 10 | from mlfromscratch.supervised_learning import LogisticRegression 11 | 12 | def main(): 13 | # Load dataset 14 | data = datasets.load_iris() 15 | X = normalize(data.data[data.target != 0]) 16 | y = data.target[data.target != 0] 17 | y[y == 1] = 0 18 | y[y == 2] = 1 19 | 20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) 21 | 22 | clf = LogisticRegression(gradient_descent=True) 23 | clf.fit(X_train, y_train) 24 | y_pred = clf.predict(X_test) 25 | 26 | accuracy = accuracy_score(y_test, y_pred) 27 | print ("Accuracy:", accuracy) 28 | 29 | # Reduce dimension to two using PCA and plot the results 30 | Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy) 31 | 32 | if __name__ == "__main__": 33 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/multi_class_lda.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | 5 | from mlfromscratch.supervised_learning import MultiClassLDA 6 | from mlfromscratch.utils import normalize 7 | 8 | def main(): 9 | # Load the dataset 10 | data = datasets.load_iris() 11 | X = normalize(data.data) 12 | y = data.target 13 | 14 | # Project the data onto the 2 primary components 15 | multi_class_lda = MultiClassLDA() 16 | multi_class_lda.plot_in_2d(X, y, title="LDA") 17 | 18 | if __name__ == "__main__": 19 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/multilayer_perceptron.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | # Import helper functions 8 | from mlfromscratch.deep_learning import NeuralNetwork 9 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot 10 | from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score 11 | from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta 12 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 13 | from mlfromscratch.utils.misc import bar_widgets 14 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Activation 15 | 16 | 17 | def main(): 18 | 19 | optimizer = Adam() 20 | 21 | #----- 22 | # MLP 23 | #----- 24 | 25 | data = datasets.load_digits() 26 | X = data.data 27 | y = data.target 28 | 29 | # Convert to one-hot encoding 30 | y = to_categorical(y.astype("int")) 31 | 32 | n_samples, n_features = X.shape 33 | n_hidden = 512 34 | 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) 36 | 37 | clf = NeuralNetwork(optimizer=optimizer, 38 | loss=CrossEntropy, 39 | validation_data=(X_test, y_test)) 40 | 41 | clf.add(Dense(n_hidden, input_shape=(n_features,))) 42 | clf.add(Activation('leaky_relu')) 43 | clf.add(Dense(n_hidden)) 44 | clf.add(Activation('leaky_relu')) 45 | clf.add(Dropout(0.25)) 46 | clf.add(Dense(n_hidden)) 47 | clf.add(Activation('leaky_relu')) 48 | clf.add(Dropout(0.25)) 49 | clf.add(Dense(n_hidden)) 50 | clf.add(Activation('leaky_relu')) 51 | clf.add(Dropout(0.25)) 52 | clf.add(Dense(10)) 53 | clf.add(Activation('softmax')) 54 | 55 | print () 56 | clf.summary(name="MLP") 57 | 58 | train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) 59 | 60 | # Training and validation error plot 61 | n = len(train_err) 62 | training, = plt.plot(range(n), train_err, label="Training Error") 63 | validation, = plt.plot(range(n), val_err, label="Validation Error") 64 | plt.legend(handles=[training, validation]) 65 | plt.title("Error Plot") 66 | plt.ylabel('Error') 67 | plt.xlabel('Iterations') 68 | plt.show() 69 | 70 | _, accuracy = clf.test_on_batch(X_test, y_test) 71 | print ("Accuracy:", accuracy) 72 | 73 | # Reduce dimension to 2D using PCA and plot the results 74 | y_pred = np.argmax(clf.predict(X_test), axis=1) 75 | Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=range(10)) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/naive_bayes.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot 5 | from mlfromscratch.supervised_learning import NaiveBayes 6 | 7 | def main(): 8 | data = datasets.load_digits() 9 | X = normalize(data.data) 10 | y = data.target 11 | 12 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 13 | 14 | clf = NaiveBayes() 15 | clf.fit(X_train, y_train) 16 | y_pred = clf.predict(X_test) 17 | 18 | accuracy = accuracy_score(y_test, y_pred) 19 | 20 | print ("Accuracy:", accuracy) 21 | 22 | # Reduce dimension to two using PCA and plot the results 23 | Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names) 24 | 25 | if __name__ == "__main__": 26 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/neuroevolution.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | from mlfromscratch.supervised_learning import Neuroevolution 8 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot 9 | from mlfromscratch.deep_learning import NeuralNetwork 10 | from mlfromscratch.deep_learning.layers import Activation, Dense 11 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 12 | from mlfromscratch.deep_learning.optimizers import Adam 13 | 14 | def main(): 15 | 16 | X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) 17 | 18 | data = datasets.load_digits() 19 | X = normalize(data.data) 20 | y = data.target 21 | y = to_categorical(y.astype("int")) 22 | 23 | # Model builder 24 | def model_builder(n_inputs, n_outputs): 25 | model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) 26 | model.add(Dense(16, input_shape=(n_inputs,))) 27 | model.add(Activation('relu')) 28 | model.add(Dense(n_outputs)) 29 | model.add(Activation('softmax')) 30 | 31 | return model 32 | 33 | # Print the model summary of a individual in the population 34 | print ("") 35 | model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() 36 | 37 | population_size = 100 38 | n_generations = 3000 39 | mutation_rate = 0.01 40 | 41 | print ("Population Size: %d" % population_size) 42 | print ("Generations: %d" % n_generations) 43 | print ("Mutation Rate: %.2f" % mutation_rate) 44 | print ("") 45 | 46 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) 47 | 48 | model = Neuroevolution(population_size=population_size, 49 | mutation_rate=mutation_rate, 50 | model_builder=model_builder) 51 | 52 | model = model.evolve(X_train, y_train, n_generations=n_generations) 53 | 54 | loss, accuracy = model.test_on_batch(X_test, y_test) 55 | 56 | # Reduce dimension to 2D using PCA and plot the results 57 | y_pred = np.argmax(model.predict(X_test), axis=1) 58 | Plot().plot_in_2d(X_test, y_pred, title="Evolutionary Evolved Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1])) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/particle_swarm_optimization.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | from mlfromscratch.supervised_learning import ParticleSwarmOptimizedNN 8 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot 9 | from mlfromscratch.deep_learning import NeuralNetwork 10 | from mlfromscratch.deep_learning.layers import Activation, Dense 11 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 12 | from mlfromscratch.deep_learning.optimizers import Adam 13 | 14 | def main(): 15 | 16 | X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) 17 | 18 | data = datasets.load_iris() 19 | X = normalize(data.data) 20 | y = data.target 21 | y = to_categorical(y.astype("int")) 22 | 23 | # Model builder 24 | def model_builder(n_inputs, n_outputs): 25 | model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) 26 | model.add(Dense(16, input_shape=(n_inputs,))) 27 | model.add(Activation('relu')) 28 | model.add(Dense(n_outputs)) 29 | model.add(Activation('softmax')) 30 | 31 | return model 32 | 33 | # Print the model summary of a individual in the population 34 | print ("") 35 | model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() 36 | 37 | population_size = 100 38 | n_generations = 10 39 | 40 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) 41 | 42 | inertia_weight = 0.8 43 | cognitive_weight = 0.8 44 | social_weight = 0.8 45 | 46 | print ("Population Size: %d" % population_size) 47 | print ("Generations: %d" % n_generations) 48 | print ("") 49 | print ("Inertia Weight: %.2f" % inertia_weight) 50 | print ("Cognitive Weight: %.2f" % cognitive_weight) 51 | print ("Social Weight: %.2f" % social_weight) 52 | print ("") 53 | 54 | model = ParticleSwarmOptimizedNN(population_size=population_size, 55 | inertia_weight=inertia_weight, 56 | cognitive_weight=cognitive_weight, 57 | social_weight=social_weight, 58 | max_velocity=5, 59 | model_builder=model_builder) 60 | 61 | model = model.evolve(X_train, y_train, n_generations=n_generations) 62 | 63 | loss, accuracy = model.test_on_batch(X_test, y_test) 64 | 65 | print ("Accuracy: %.1f%%" % float(100*accuracy)) 66 | 67 | # Reduce dimension to 2D using PCA and plot the results 68 | y_pred = np.argmax(model.predict(X_test), axis=1) 69 | Plot().plot_in_2d(X_test, y_pred, title="Particle Swarm Optimized Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1])) 70 | 71 | 72 | if __name__ == "__main__": 73 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/partitioning_around_medoids.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | import numpy as np 3 | 4 | # Import helper functions 5 | from mlfromscratch.utils import Plot 6 | from mlfromscratch.unsupervised_learning import PAM 7 | 8 | def main(): 9 | # Load the dataset 10 | X, y = datasets.make_blobs() 11 | 12 | # Cluster the data using K-Medoids 13 | clf = PAM(k=3) 14 | y_pred = clf.predict(X) 15 | 16 | # Project the data onto the 2 primary principal components 17 | p = Plot() 18 | p.plot_in_2d(X, y_pred, title="PAM Clustering") 19 | p.plot_in_2d(X, y, title="Actual Clustering") 20 | 21 | 22 | if __name__ == "__main__": 23 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/perceptron.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn import datasets 3 | import numpy as np 4 | 5 | # Import helper functions 6 | from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score 7 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 8 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 9 | from mlfromscratch.utils import Plot 10 | from mlfromscratch.supervised_learning import Perceptron 11 | 12 | 13 | def main(): 14 | data = datasets.load_digits() 15 | X = normalize(data.data) 16 | y = data.target 17 | 18 | # One-hot encoding of nominal y-values 19 | y = to_categorical(y) 20 | 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) 22 | 23 | # Perceptron 24 | clf = Perceptron(n_iterations=5000, 25 | learning_rate=0.001, 26 | loss=CrossEntropy, 27 | activation_function=Sigmoid) 28 | clf.fit(X_train, y_train) 29 | 30 | y_pred = np.argmax(clf.predict(X_test), axis=1) 31 | y_test = np.argmax(y_test, axis=1) 32 | 33 | accuracy = accuracy_score(y_test, y_pred) 34 | 35 | print ("Accuracy:", accuracy) 36 | 37 | # Reduce dimension to two using PCA and plot the results 38 | Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y)) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/polynomial_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | # Import helper functions 6 | from mlfromscratch.supervised_learning import PolynomialRidgeRegression 7 | from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error 8 | from mlfromscratch.utils import train_test_split, polynomial_features, Plot 9 | 10 | 11 | def main(): 12 | 13 | # Load temperature data 14 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 15 | 16 | time = np.atleast_2d(data["time"].values).T 17 | temp = data["temp"].values 18 | 19 | X = time # fraction of the year [0, 1] 20 | y = temp 21 | 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 23 | 24 | poly_degree = 15 25 | 26 | # Finding regularization constant using cross validation 27 | lowest_error = float("inf") 28 | best_reg_factor = None 29 | print ("Finding regularization constant using cross validation:") 30 | k = 10 31 | for reg_factor in np.arange(0, 0.1, 0.01): 32 | cross_validation_sets = k_fold_cross_validation_sets( 33 | X_train, y_train, k=k) 34 | mse = 0 35 | for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: 36 | model = PolynomialRidgeRegression(degree=poly_degree, 37 | reg_factor=reg_factor, 38 | learning_rate=0.001, 39 | n_iterations=10000) 40 | model.fit(_X_train, _y_train) 41 | y_pred = model.predict(_X_test) 42 | _mse = mean_squared_error(_y_test, y_pred) 43 | mse += _mse 44 | mse /= k 45 | 46 | # Print the mean squared error 47 | print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor)) 48 | 49 | # Save reg. constant that gave lowest error 50 | if mse < lowest_error: 51 | best_reg_factor = reg_factor 52 | lowest_error = mse 53 | 54 | # Make final prediction 55 | model = PolynomialRidgeRegression(degree=poly_degree, 56 | reg_factor=best_reg_factor, 57 | learning_rate=0.001, 58 | n_iterations=10000) 59 | model.fit(X_train, y_train) 60 | y_pred = model.predict(X_test) 61 | mse = mean_squared_error(y_test, y_pred) 62 | print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) 63 | 64 | y_pred_line = model.predict(X) 65 | 66 | # Color map 67 | cmap = plt.get_cmap('viridis') 68 | 69 | # Plot the results 70 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 71 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 72 | plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") 73 | plt.suptitle("Polynomial Ridge Regression") 74 | plt.title("MSE: %.2f" % mse, fontsize=10) 75 | plt.xlabel('Day') 76 | plt.ylabel('Temperature in Celcius') 77 | plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 78 | plt.show() 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /mlfromscratch/examples/principal_component_analysis.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | import matplotlib.pyplot as plt 3 | import matplotlib.cm as cmx 4 | import matplotlib.colors as colors 5 | import numpy as np 6 | from mlfromscratch.unsupervised_learning import PCA 7 | 8 | def main(): 9 | 10 | # Demo of how to reduce the dimensionality of the data to two dimension 11 | # and plot the results. 12 | 13 | # Load the dataset 14 | data = datasets.load_digits() 15 | X = data.data 16 | y = data.target 17 | 18 | # Project the data onto the 2 primary principal components 19 | X_trans = PCA().transform(X, 2) 20 | 21 | x1 = X_trans[:, 0] 22 | x2 = X_trans[:, 1] 23 | 24 | cmap = plt.get_cmap('viridis') 25 | colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))] 26 | 27 | class_distr = [] 28 | # Plot the different class distributions 29 | for i, l in enumerate(np.unique(y)): 30 | _x1 = x1[y == l] 31 | _x2 = x2[y == l] 32 | _y = y[y == l] 33 | class_distr.append(plt.scatter(_x1, _x2, color=colors[i])) 34 | 35 | # Add a legend 36 | plt.legend(class_distr, y, loc=1) 37 | 38 | # Axis labels 39 | plt.suptitle("PCA Dimensionality Reduction") 40 | plt.title("Digit Dataset") 41 | plt.xlabel('Principal Component 1') 42 | plt.ylabel('Principal Component 2') 43 | plt.show() 44 | 45 | 46 | if __name__ == "__main__": 47 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/random_forest.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | from mlfromscratch.utils import train_test_split, accuracy_score, Plot 5 | from mlfromscratch.supervised_learning import RandomForest 6 | 7 | def main(): 8 | data = datasets.load_digits() 9 | X = data.data 10 | y = data.target 11 | 12 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) 13 | 14 | clf = RandomForest(n_estimators=100) 15 | clf.fit(X_train, y_train) 16 | y_pred = clf.predict(X_test) 17 | 18 | accuracy = accuracy_score(y_test, y_pred) 19 | 20 | print ("Accuracy:", accuracy) 21 | 22 | Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names) 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /mlfromscratch/examples/recurrent_neural_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | from mlfromscratch.deep_learning import NeuralNetwork 6 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot 7 | from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score 8 | from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta 9 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 10 | from mlfromscratch.utils.misc import bar_widgets 11 | from mlfromscratch.deep_learning.layers import RNN, Activation 12 | 13 | 14 | def main(): 15 | 16 | optimizer = Adam() 17 | 18 | def gen_mult_ser(nums): 19 | """ Method which generates multiplication series """ 20 | X = np.zeros([nums, 10, 61], dtype=float) 21 | y = np.zeros([nums, 10, 61], dtype=float) 22 | for i in range(nums): 23 | start = np.random.randint(2, 7) 24 | mult_ser = np.linspace(start, start*10, num=10, dtype=int) 25 | X[i] = to_categorical(mult_ser, n_col=61) 26 | y[i] = np.roll(X[i], -1, axis=0) 27 | y[:, -1, 1] = 1 # Mark endpoint as 1 28 | return X, y 29 | 30 | 31 | def gen_num_seq(nums): 32 | """ Method which generates sequence of numbers """ 33 | X = np.zeros([nums, 10, 20], dtype=float) 34 | y = np.zeros([nums, 10, 20], dtype=float) 35 | for i in range(nums): 36 | start = np.random.randint(0, 10) 37 | num_seq = np.arange(start, start+10) 38 | X[i] = to_categorical(num_seq, n_col=20) 39 | y[i] = np.roll(X[i], -1, axis=0) 40 | y[:, -1, 1] = 1 # Mark endpoint as 1 41 | return X, y 42 | 43 | X, y = gen_mult_ser(3000) 44 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 45 | 46 | # Model definition 47 | clf = NeuralNetwork(optimizer=optimizer, 48 | loss=CrossEntropy) 49 | clf.add(RNN(10, activation="tanh", bptt_trunc=5, input_shape=(10, 61))) 50 | clf.add(Activation('softmax')) 51 | clf.summary("RNN") 52 | 53 | # Print a problem instance and the correct solution 54 | tmp_X = np.argmax(X_train[0], axis=1) 55 | tmp_y = np.argmax(y_train[0], axis=1) 56 | print ("Number Series Problem:") 57 | print ("X = [" + " ".join(tmp_X.astype("str")) + "]") 58 | print ("y = [" + " ".join(tmp_y.astype("str")) + "]") 59 | print () 60 | 61 | train_err, _ = clf.fit(X_train, y_train, n_epochs=500, batch_size=512) 62 | 63 | # Predict labels of the test data 64 | y_pred = np.argmax(clf.predict(X_test), axis=2) 65 | y_test = np.argmax(y_test, axis=2) 66 | 67 | print () 68 | print ("Results:") 69 | for i in range(5): 70 | # Print a problem instance and the correct solution 71 | tmp_X = np.argmax(X_test[i], axis=1) 72 | tmp_y1 = y_test[i] 73 | tmp_y2 = y_pred[i] 74 | print ("X = [" + " ".join(tmp_X.astype("str")) + "]") 75 | print ("y_true = [" + " ".join(tmp_y1.astype("str")) + "]") 76 | print ("y_pred = [" + " ".join(tmp_y2.astype("str")) + "]") 77 | print () 78 | 79 | accuracy = np.mean(accuracy_score(y_test, y_pred)) 80 | print ("Accuracy:", accuracy) 81 | 82 | training = plt.plot(range(500), train_err, label="Training Error") 83 | plt.title("Error Plot") 84 | plt.ylabel('Training Error') 85 | plt.xlabel('Iterations') 86 | plt.show() 87 | 88 | if __name__ == "__main__": 89 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/restricted_boltzmann_machine.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from sklearn import datasets 5 | from sklearn.datasets import fetch_mldata 6 | import matplotlib.pyplot as plt 7 | 8 | from mlfromscratch.unsupervised_learning import RBM 9 | 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | def main(): 13 | 14 | mnist = fetch_mldata('MNIST original') 15 | 16 | X = mnist.data / 255.0 17 | y = mnist.target 18 | 19 | # Select the samples of the digit 2 20 | X = X[y == 2] 21 | 22 | # Limit dataset to 500 samples 23 | idx = np.random.choice(range(X.shape[0]), size=500, replace=False) 24 | X = X[idx] 25 | 26 | rbm = RBM(n_hidden=50, n_iterations=200, batch_size=25, learning_rate=0.001) 27 | rbm.fit(X) 28 | 29 | # Training error plot 30 | training, = plt.plot(range(len(rbm.training_errors)), rbm.training_errors, label="Training Error") 31 | plt.legend(handles=[training]) 32 | plt.title("Error Plot") 33 | plt.ylabel('Error') 34 | plt.xlabel('Iterations') 35 | plt.show() 36 | 37 | # Get the images that were reconstructed during training 38 | gen_imgs = rbm.training_reconstructions 39 | 40 | # Plot the reconstructed images during the first iteration 41 | fig, axs = plt.subplots(5, 5) 42 | plt.suptitle("Restricted Boltzmann Machine - First Iteration") 43 | cnt = 0 44 | for i in range(5): 45 | for j in range(5): 46 | axs[i,j].imshow(gen_imgs[0][cnt].reshape((28, 28)), cmap='gray') 47 | axs[i,j].axis('off') 48 | cnt += 1 49 | fig.savefig("rbm_first.png") 50 | plt.close() 51 | 52 | # Plot the images during the last iteration 53 | fig, axs = plt.subplots(5, 5) 54 | plt.suptitle("Restricted Boltzmann Machine - Last Iteration") 55 | cnt = 0 56 | for i in range(5): 57 | for j in range(5): 58 | axs[i,j].imshow(gen_imgs[-1][cnt].reshape((28, 28)), cmap='gray') 59 | axs[i,j].axis('off') 60 | cnt += 1 61 | fig.savefig("rbm_last.png") 62 | plt.close() 63 | 64 | 65 | if __name__ == "__main__": 66 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/ridge_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | # Import helper functions 6 | from mlfromscratch.supervised_learning import PolynomialRidgeRegression 7 | from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, Plot 8 | from mlfromscratch.utils import train_test_split, polynomial_features, mean_squared_error 9 | 10 | 11 | def main(): 12 | 13 | # Load temperature data 14 | data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") 15 | 16 | time = np.atleast_2d(data["time"].values).T 17 | temp = data["temp"].values 18 | 19 | X = time # fraction of the year [0, 1] 20 | y = temp 21 | 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) 23 | 24 | poly_degree = 15 25 | 26 | # Finding regularization constant using cross validation 27 | lowest_error = float("inf") 28 | best_reg_factor = None 29 | print ("Finding regularization constant using cross validation:") 30 | k = 10 31 | for reg_factor in np.arange(0, 0.1, 0.01): 32 | cross_validation_sets = k_fold_cross_validation_sets( 33 | X_train, y_train, k=k) 34 | mse = 0 35 | for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: 36 | model = PolynomialRidgeRegression(degree=poly_degree, 37 | reg_factor=reg_factor, 38 | learning_rate=0.001, 39 | n_iterations=10000) 40 | model.fit(_X_train, _y_train) 41 | y_pred = model.predict(_X_test) 42 | _mse = mean_squared_error(_y_test, y_pred) 43 | mse += _mse 44 | mse /= k 45 | 46 | # Print the mean squared error 47 | print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor)) 48 | 49 | # Save reg. constant that gave lowest error 50 | if mse < lowest_error: 51 | best_reg_factor = reg_factor 52 | lowest_error = mse 53 | 54 | # Make final prediction 55 | model = PolynomialRidgeRegression(degree=poly_degree, 56 | reg_factor=reg_factor, 57 | learning_rate=0.001, 58 | n_iterations=10000) 59 | model.fit(X_train, y_train) 60 | 61 | y_pred = model.predict(X_test) 62 | mse = mean_squared_error(y_test, y_pred) 63 | print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, reg_factor)) 64 | 65 | y_pred_line = model.predict(X) 66 | 67 | # Color map 68 | cmap = plt.get_cmap('viridis') 69 | 70 | # Plot the results 71 | m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) 72 | m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) 73 | plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") 74 | plt.suptitle("Polynomial Ridge Regression") 75 | plt.title("MSE: %.2f" % mse, fontsize=10) 76 | plt.xlabel('Day') 77 | plt.ylabel('Temperature in Celcius') 78 | plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') 79 | plt.show() 80 | 81 | if __name__ == "__main__": 82 | main() 83 | -------------------------------------------------------------------------------- /mlfromscratch/examples/support_vector_machine.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | 5 | # Import helper functions 6 | from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot 7 | from mlfromscratch.utils.kernels import * 8 | from mlfromscratch.supervised_learning import SupportVectorMachine 9 | 10 | def main(): 11 | data = datasets.load_iris() 12 | X = normalize(data.data[data.target != 0]) 13 | y = data.target[data.target != 0] 14 | y[y == 1] = -1 15 | y[y == 2] = 1 16 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) 17 | 18 | clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) 19 | clf.fit(X_train, y_train) 20 | y_pred = clf.predict(X_test) 21 | 22 | accuracy = accuracy_score(y_test, y_pred) 23 | 24 | print ("Accuracy:", accuracy) 25 | 26 | # Reduce dimension to two using PCA and plot the results 27 | Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy) 28 | 29 | 30 | if __name__ == "__main__": 31 | main() -------------------------------------------------------------------------------- /mlfromscratch/examples/xgboost.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | import progressbar 6 | from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize 7 | from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot 8 | from mlfromscratch.supervised_learning import XGBoost 9 | 10 | def main(): 11 | 12 | print ("-- XGBoost --") 13 | 14 | data = datasets.load_iris() 15 | X = data.data 16 | y = data.target 17 | 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) 19 | 20 | clf = XGBoost() 21 | clf.fit(X_train, y_train) 22 | y_pred = clf.predict(X_test) 23 | 24 | accuracy = accuracy_score(y_test, y_pred) 25 | 26 | print ("Accuracy:", accuracy) 27 | 28 | Plot().plot_in_2d(X_test, y_pred, 29 | title="XGBoost", 30 | accuracy=accuracy, 31 | legend_labels=data.target_names) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /mlfromscratch/reinforcement_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_q_network import DeepQNetwork -------------------------------------------------------------------------------- /mlfromscratch/reinforcement_learning/deep_q_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import random 3 | import numpy as np 4 | import gym 5 | from collections import deque 6 | 7 | 8 | class DeepQNetwork(): 9 | """Q-Learning with deep neural network to learn the control policy. 10 | Uses a deep neural network model to predict the expected utility (Q-value) of executing an action in a given state. 11 | 12 | Reference: https://arxiv.org/abs/1312.5602 13 | Parameters: 14 | ----------- 15 | env_name: string 16 | The environment that the agent will explore. 17 | Check: https://gym.openai.com/envs 18 | epsilon: float 19 | The epsilon-greedy value. The probability that the agent should select a random action instead of 20 | the action that will maximize the expected utility. 21 | gamma: float 22 | Determines how much the agent should consider future rewards. 23 | decay_rate: float 24 | The rate of decay for the epsilon value after each epoch. 25 | min_epsilon: float 26 | The value which epsilon will approach as the training progresses. 27 | """ 28 | def __init__(self, env_name='CartPole-v1', epsilon=1, gamma=0.9, decay_rate=0.005, min_epsilon=0.1): 29 | self.epsilon = epsilon 30 | self.gamma = gamma 31 | self.decay_rate = decay_rate 32 | self.min_epsilon = min_epsilon 33 | self.memory_size = 300 34 | self.memory = [] 35 | 36 | # Initialize the environment 37 | self.env = gym.make(env_name) 38 | self.n_states = self.env.observation_space.shape[0] 39 | self.n_actions = self.env.action_space.n 40 | 41 | def set_model(self, model): 42 | self.model = model(n_inputs=self.n_states, n_outputs=self.n_actions) 43 | 44 | def _select_action(self, state): 45 | if np.random.rand() < self.epsilon: 46 | # Choose action randomly 47 | action = np.random.randint(self.n_actions) 48 | else: 49 | # Take action with highest predicted utility given state 50 | action = np.argmax(self.model.predict(state), axis=1)[0] 51 | 52 | return action 53 | 54 | def _memorize(self, state, action, reward, new_state, done): 55 | self.memory.append((state, action, reward, new_state, done)) 56 | # Make sure we restrict memory size to specified limit 57 | if len(self.memory) > self.memory_size: 58 | self.memory.pop(0) 59 | 60 | def _construct_training_set(self, replay): 61 | # Select states and new states from replay 62 | states = np.array([a[0] for a in replay]) 63 | new_states = np.array([a[3] for a in replay]) 64 | 65 | # Predict the expected utility of current state and new state 66 | Q = self.model.predict(states) 67 | Q_new = self.model.predict(new_states) 68 | 69 | replay_size = len(replay) 70 | X = np.empty((replay_size, self.n_states)) 71 | y = np.empty((replay_size, self.n_actions)) 72 | 73 | # Construct training set 74 | for i in range(replay_size): 75 | state_r, action_r, reward_r, new_state_r, done_r = replay[i] 76 | 77 | target = Q[i] 78 | target[action_r] = reward_r 79 | # If we're done the utility is simply the reward of executing action a in 80 | # state s, otherwise we add the expected maximum future reward as well 81 | if not done_r: 82 | target[action_r] += self.gamma * np.amax(Q_new[i]) 83 | 84 | X[i] = state_r 85 | y[i] = target 86 | 87 | return X, y 88 | 89 | def train(self, n_epochs=500, batch_size=32): 90 | max_reward = 0 91 | 92 | for epoch in range(n_epochs): 93 | state = self.env.reset() 94 | total_reward = 0 95 | 96 | epoch_loss = [] 97 | while True: 98 | 99 | action = self._select_action(state) 100 | # Take a step 101 | new_state, reward, done, _ = self.env.step(action) 102 | 103 | self._memorize(state, action, reward, new_state, done) 104 | 105 | # Sample replay batch from memory 106 | _batch_size = min(len(self.memory), batch_size) 107 | replay = random.sample(self.memory, _batch_size) 108 | 109 | # Construct training set from replay 110 | X, y = self._construct_training_set(replay) 111 | 112 | # Learn control policy 113 | loss = self.model.train_on_batch(X, y) 114 | epoch_loss.append(loss) 115 | 116 | total_reward += reward 117 | state = new_state 118 | 119 | if done: break 120 | 121 | epoch_loss = np.mean(epoch_loss) 122 | 123 | # Reduce the epsilon parameter 124 | self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp(-self.decay_rate * epoch) 125 | 126 | max_reward = max(max_reward, total_reward) 127 | 128 | print ("%d [Loss: %.4f, Reward: %s, Epsilon: %.4f, Max Reward: %s]" % (epoch, epoch_loss, total_reward, self.epsilon, max_reward)) 129 | 130 | print ("Training Finished") 131 | 132 | def play(self, n_epochs): 133 | # self.env = gym.wrappers.Monitor(self.env, '/tmp/cartpole-experiment-1', force=True) 134 | for epoch in range(n_epochs): 135 | state = self.env.reset() 136 | total_reward = 0 137 | while True: 138 | self.env.render() 139 | action = np.argmax(self.model.predict(state), axis=1)[0] 140 | state, reward, done, _ = self.env.step(action) 141 | total_reward += reward 142 | if done: break 143 | print ("%d Reward: %s" % (epoch, total_reward)) 144 | self.env.close() 145 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .adaboost import Adaboost 2 | from .bayesian_regression import BayesianRegression 3 | from .decision_tree import RegressionTree, ClassificationTree, XGBoostRegressionTree 4 | from .gradient_boosting import GradientBoostingClassifier, GradientBoostingRegressor 5 | from .k_nearest_neighbors import KNN 6 | from .linear_discriminant_analysis import LDA 7 | from .regression import LinearRegression, PolynomialRegression, LassoRegression 8 | from .regression import RidgeRegression, PolynomialRidgeRegression, ElasticNet 9 | from .logistic_regression import LogisticRegression 10 | from .multi_class_lda import MultiClassLDA 11 | from .naive_bayes import NaiveBayes 12 | from .perceptron import Perceptron 13 | from .random_forest import RandomForest 14 | from .support_vector_machine import SupportVectorMachine 15 | from .xgboost import XGBoost 16 | from .neuroevolution import Neuroevolution 17 | from .particle_swarm_optimization import ParticleSwarmOptimizedNN -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/adaboost.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import math 4 | from sklearn import datasets 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Import helper functions 9 | from mlfromscratch.utils import train_test_split, accuracy_score, Plot 10 | 11 | # Decision stump used as weak classifier in this impl. of Adaboost 12 | class DecisionStump(): 13 | def __init__(self): 14 | # Determines if sample shall be classified as -1 or 1 given threshold 15 | self.polarity = 1 16 | # The index of the feature used to make classification 17 | self.feature_index = None 18 | # The threshold value that the feature should be measured against 19 | self.threshold = None 20 | # Value indicative of the classifier's accuracy 21 | self.alpha = None 22 | 23 | class Adaboost(): 24 | """Boosting method that uses a number of weak classifiers in 25 | ensemble to make a strong classifier. This implementation uses decision 26 | stumps, which is a one level Decision Tree. 27 | 28 | Parameters: 29 | ----------- 30 | n_clf: int 31 | The number of weak classifiers that will be used. 32 | """ 33 | def __init__(self, n_clf=5): 34 | self.n_clf = n_clf 35 | 36 | def fit(self, X, y): 37 | n_samples, n_features = np.shape(X) 38 | 39 | # Initialize weights to 1/N 40 | w = np.full(n_samples, (1 / n_samples)) 41 | 42 | self.clfs = [] 43 | # Iterate through classifiers 44 | for _ in range(self.n_clf): 45 | clf = DecisionStump() 46 | # Minimum error given for using a certain feature value threshold 47 | # for predicting sample label 48 | min_error = float('inf') 49 | # Iterate throught every unique feature value and see what value 50 | # makes the best threshold for predicting y 51 | for feature_i in range(n_features): 52 | feature_values = np.expand_dims(X[:, feature_i], axis=1) 53 | unique_values = np.unique(feature_values) 54 | # Try every unique feature value as threshold 55 | for threshold in unique_values: 56 | p = 1 57 | # Set all predictions to '1' initially 58 | prediction = np.ones(np.shape(y)) 59 | # Label the samples whose values are below threshold as '-1' 60 | prediction[X[:, feature_i] < threshold] = -1 61 | # Error = sum of weights of misclassified samples 62 | error = sum(w[y != prediction]) 63 | 64 | # If the error is over 50% we flip the polarity so that samples that 65 | # were classified as 0 are classified as 1, and vice versa 66 | # E.g error = 0.8 => (1 - error) = 0.2 67 | if error > 0.5: 68 | error = 1 - error 69 | p = -1 70 | 71 | # If this threshold resulted in the smallest error we save the 72 | # configuration 73 | if error < min_error: 74 | clf.polarity = p 75 | clf.threshold = threshold 76 | clf.feature_index = feature_i 77 | min_error = error 78 | # Calculate the alpha which is used to update the sample weights, 79 | # Alpha is also an approximation of this classifier's proficiency 80 | clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10)) 81 | # Set all predictions to '1' initially 82 | predictions = np.ones(np.shape(y)) 83 | # The indexes where the sample values are below threshold 84 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) 85 | # Label those as '-1' 86 | predictions[negative_idx] = -1 87 | # Calculate new weights 88 | # Missclassified samples gets larger weights and correctly classified samples smaller 89 | w *= np.exp(-clf.alpha * y * predictions) 90 | # Normalize to one 91 | w /= np.sum(w) 92 | 93 | # Save classifier 94 | self.clfs.append(clf) 95 | 96 | def predict(self, X): 97 | n_samples = np.shape(X)[0] 98 | y_pred = np.zeros((n_samples, 1)) 99 | # For each classifier => label the samples 100 | for clf in self.clfs: 101 | # Set all predictions to '1' initially 102 | predictions = np.ones(np.shape(y_pred)) 103 | # The indexes where the sample values are below threshold 104 | negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) 105 | # Label those as '-1' 106 | predictions[negative_idx] = -1 107 | # Add predictions weighted by the classifiers alpha 108 | # (alpha indicative of classifier's proficiency) 109 | y_pred += clf.alpha * predictions 110 | 111 | # Return sign of prediction sum 112 | y_pred = np.sign(y_pred).flatten() 113 | 114 | return y_pred 115 | 116 | 117 | def main(): 118 | data = datasets.load_digits() 119 | X = data.data 120 | y = data.target 121 | 122 | digit1 = 1 123 | digit2 = 8 124 | idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) 125 | y = data.target[idx] 126 | # Change labels to {-1, 1} 127 | y[y == digit1] = -1 128 | y[y == digit2] = 1 129 | X = data.data[idx] 130 | 131 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) 132 | 133 | # Adaboost classification with 5 weak classifiers 134 | clf = Adaboost(n_clf=5) 135 | clf.fit(X_train, y_train) 136 | y_pred = clf.predict(X_test) 137 | 138 | accuracy = accuracy_score(y_test, y_pred) 139 | print ("Accuracy:", accuracy) 140 | 141 | # Reduce dimensions to 2d using pca and plot the results 142 | Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy) 143 | 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/bayesian_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from scipy.stats import chi2, multivariate_normal 4 | from mlfromscratch.utils import mean_squared_error, train_test_split, polynomial_features 5 | 6 | 7 | 8 | class BayesianRegression(object): 9 | """Bayesian regression model. If poly_degree is specified the features will 10 | be transformed to with a polynomial basis function, which allows for polynomial 11 | regression. Assumes Normal prior and likelihood for the weights and scaled inverse 12 | chi-squared prior and likelihood for the variance of the weights. 13 | 14 | Parameters: 15 | ----------- 16 | n_draws: float 17 | The number of simulated draws from the posterior of the parameters. 18 | mu0: array 19 | The mean values of the prior Normal distribution of the parameters. 20 | omega0: array 21 | The precision matrix of the prior Normal distribution of the parameters. 22 | nu0: float 23 | The degrees of freedom of the prior scaled inverse chi squared distribution. 24 | sigma_sq0: float 25 | The scale parameter of the prior scaled inverse chi squared distribution. 26 | poly_degree: int 27 | The polynomial degree that the features should be transformed to. Allows 28 | for polynomial regression. 29 | cred_int: float 30 | The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior 31 | of the parameters. 32 | 33 | Reference: 34 | https://github.com/mattiasvillani/BayesLearnCourse/raw/master/Slides/BayesLearnL5.pdf 35 | """ 36 | def __init__(self, n_draws, mu0, omega0, nu0, sigma_sq0, poly_degree=0, cred_int=95): 37 | self.w = None 38 | self.n_draws = n_draws 39 | self.poly_degree = poly_degree 40 | self.cred_int = cred_int 41 | 42 | # Prior parameters 43 | self.mu0 = mu0 44 | self.omega0 = omega0 45 | self.nu0 = nu0 46 | self.sigma_sq0 = sigma_sq0 47 | 48 | # Allows for simulation from the scaled inverse chi squared 49 | # distribution. Assumes the variance is distributed according to 50 | # this distribution. 51 | # Reference: 52 | # https://en.wikipedia.org/wiki/Scaled_inverse_chi-squared_distribution 53 | def _draw_scaled_inv_chi_sq(self, n, df, scale): 54 | X = chi2.rvs(size=n, df=df) 55 | sigma_sq = df * scale / X 56 | return sigma_sq 57 | 58 | def fit(self, X, y): 59 | 60 | # If polynomial transformation 61 | if self.poly_degree: 62 | X = polynomial_features(X, degree=self.poly_degree) 63 | 64 | n_samples, n_features = np.shape(X) 65 | 66 | X_X = X.T.dot(X) 67 | 68 | # Least squares approximate of beta 69 | beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y) 70 | 71 | # The posterior parameters can be determined analytically since we assume 72 | # conjugate priors for the likelihoods. 73 | 74 | # Normal prior / likelihood => Normal posterior 75 | mu_n = np.linalg.pinv(X_X + self.omega0).dot(X_X.dot(beta_hat)+self.omega0.dot(self.mu0)) 76 | omega_n = X_X + self.omega0 77 | # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior 78 | nu_n = self.nu0 + n_samples 79 | sigma_sq_n = (1.0/nu_n)*(self.nu0*self.sigma_sq0 + \ 80 | (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) - mu_n.T.dot(omega_n.dot(mu_n)))) 81 | 82 | # Simulate parameter values for n_draws 83 | beta_draws = np.empty((self.n_draws, n_features)) 84 | for i in range(self.n_draws): 85 | sigma_sq = self._draw_scaled_inv_chi_sq(n=1, df=nu_n, scale=sigma_sq_n) 86 | beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq*np.linalg.pinv(omega_n)) 87 | # Save parameter draws 88 | beta_draws[i, :] = beta 89 | 90 | # Select the mean of the simulated variables as the ones used to make predictions 91 | self.w = np.mean(beta_draws, axis=0) 92 | 93 | # Lower and upper boundary of the credible interval 94 | l_eti = 50 - self.cred_int/2 95 | u_eti = 50 + self.cred_int/2 96 | self.eti = np.array([[np.percentile(beta_draws[:,i], q=l_eti), np.percentile(beta_draws[:,i], q=u_eti)] \ 97 | for i in range(n_features)]) 98 | 99 | def predict(self, X, eti=False): 100 | 101 | # If polynomial transformation 102 | if self.poly_degree: 103 | X = polynomial_features(X, degree=self.poly_degree) 104 | 105 | y_pred = X.dot(self.w) 106 | # If the lower and upper boundaries for the 95% 107 | # equal tail interval should be returned 108 | if eti: 109 | lower_w = self.eti[:, 0] 110 | upper_w = self.eti[:, 1] 111 | y_lower_pred = X.dot(lower_w) 112 | y_upper_pred = X.dot(upper_w) 113 | return y_pred, y_lower_pred, y_upper_pred 114 | 115 | return y_pred 116 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/gradient_boosting.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import progressbar 4 | 5 | # Import helper functions 6 | from mlfromscratch.utils import train_test_split, standardize, to_categorical 7 | from mlfromscratch.utils import mean_squared_error, accuracy_score 8 | from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy 9 | from mlfromscratch.supervised_learning.decision_tree import RegressionTree 10 | from mlfromscratch.utils.misc import bar_widgets 11 | 12 | 13 | class GradientBoosting(object): 14 | """Super class of GradientBoostingClassifier and GradientBoostinRegressor. 15 | Uses a collection of regression trees that trains on predicting the gradient 16 | of the loss function. 17 | 18 | Parameters: 19 | ----------- 20 | n_estimators: int 21 | The number of classification trees that are used. 22 | learning_rate: float 23 | The step length that will be taken when following the negative gradient during 24 | training. 25 | min_samples_split: int 26 | The minimum number of samples needed to make a split when building a tree. 27 | min_impurity: float 28 | The minimum impurity required to split the tree further. 29 | max_depth: int 30 | The maximum depth of a tree. 31 | regression: boolean 32 | True or false depending on if we're doing regression or classification. 33 | """ 34 | def __init__(self, n_estimators, learning_rate, min_samples_split, 35 | min_impurity, max_depth, regression): 36 | self.n_estimators = n_estimators 37 | self.learning_rate = learning_rate 38 | self.min_samples_split = min_samples_split 39 | self.min_impurity = min_impurity 40 | self.max_depth = max_depth 41 | self.regression = regression 42 | self.bar = progressbar.ProgressBar(widgets=bar_widgets) 43 | 44 | # Square loss for regression 45 | # Log loss for classification 46 | self.loss = SquareLoss() 47 | if not self.regression: 48 | self.loss = CrossEntropy() 49 | 50 | # Initialize regression trees 51 | self.trees = [] 52 | for _ in range(n_estimators): 53 | tree = RegressionTree( 54 | min_samples_split=self.min_samples_split, 55 | min_impurity=min_impurity, 56 | max_depth=self.max_depth) 57 | self.trees.append(tree) 58 | 59 | 60 | def fit(self, X, y): 61 | y_pred = np.full(np.shape(y), np.mean(y, axis=0)) 62 | for i in self.bar(range(self.n_estimators)): 63 | gradient = self.loss.gradient(y, y_pred) 64 | self.trees[i].fit(X, gradient) 65 | update = self.trees[i].predict(X) 66 | # Update y prediction 67 | y_pred -= np.multiply(self.learning_rate, update) 68 | 69 | 70 | def predict(self, X): 71 | y_pred = np.array([]) 72 | # Make predictions 73 | for tree in self.trees: 74 | update = tree.predict(X) 75 | update = np.multiply(self.learning_rate, update) 76 | y_pred = -update if not y_pred.any() else y_pred - update 77 | 78 | if not self.regression: 79 | # Turn into probability distribution 80 | y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1) 81 | # Set label to the value that maximizes probability 82 | y_pred = np.argmax(y_pred, axis=1) 83 | return y_pred 84 | 85 | 86 | class GradientBoostingRegressor(GradientBoosting): 87 | def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2, 88 | min_var_red=1e-7, max_depth=4, debug=False): 89 | super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators, 90 | learning_rate=learning_rate, 91 | min_samples_split=min_samples_split, 92 | min_impurity=min_var_red, 93 | max_depth=max_depth, 94 | regression=True) 95 | 96 | class GradientBoostingClassifier(GradientBoosting): 97 | def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2, 98 | min_info_gain=1e-7, max_depth=2, debug=False): 99 | super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators, 100 | learning_rate=learning_rate, 101 | min_samples_split=min_samples_split, 102 | min_impurity=min_info_gain, 103 | max_depth=max_depth, 104 | regression=False) 105 | 106 | def fit(self, X, y): 107 | y = to_categorical(y) 108 | super(GradientBoostingClassifier, self).fit(X, y) 109 | 110 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/k_nearest_neighbors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import euclidean_distance 4 | 5 | class KNN(): 6 | """ K Nearest Neighbors classifier. 7 | 8 | Parameters: 9 | ----------- 10 | k: int 11 | The number of closest neighbors that will determine the class of the 12 | sample that we wish to predict. 13 | """ 14 | def __init__(self, k=5): 15 | self.k = k 16 | 17 | def _vote(self, neighbor_labels): 18 | """ Return the most common class among the neighbor samples """ 19 | counts = np.bincount(neighbor_labels.astype('int')) 20 | return counts.argmax() 21 | 22 | def predict(self, X_test, X_train, y_train): 23 | y_pred = np.empty(X_test.shape[0]) 24 | # Determine the class of each sample 25 | for i, test_sample in enumerate(X_test): 26 | # Sort the training samples by their distance to the test sample and get the K nearest 27 | idx = np.argsort([euclidean_distance(test_sample, x) for x in X_train])[:self.k] 28 | # Extract the labels of the K nearest neighboring training samples 29 | k_nearest_neighbors = np.array([y_train[i] for i in idx]) 30 | # Label sample as the most common class label 31 | y_pred[i] = self._vote(k_nearest_neighbors) 32 | 33 | return y_pred 34 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/linear_discriminant_analysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize 4 | 5 | class LDA(): 6 | """The Linear Discriminant Analysis classifier, also known as Fisher's linear discriminant. 7 | Can besides from classification also be used to reduce the dimensionaly of the dataset. 8 | """ 9 | def __init__(self): 10 | self.w = None 11 | 12 | def transform(self, X, y): 13 | self.fit(X, y) 14 | # Project data onto vector 15 | X_transform = X.dot(self.w) 16 | return X_transform 17 | 18 | def fit(self, X, y): 19 | # Separate data by class 20 | X1 = X[y == 0] 21 | X2 = X[y == 1] 22 | 23 | # Calculate the covariance matrices of the two datasets 24 | cov1 = calculate_covariance_matrix(X1) 25 | cov2 = calculate_covariance_matrix(X2) 26 | cov_tot = cov1 + cov2 27 | 28 | # Calculate the mean of the two datasets 29 | mean1 = X1.mean(0) 30 | mean2 = X2.mean(0) 31 | mean_diff = np.atleast_1d(mean1 - mean2) 32 | 33 | # Determine the vector which when X is projected onto it best separates the 34 | # data by class. w = (mean1 - mean2) / (cov1 + cov2) 35 | self.w = np.linalg.pinv(cov_tot).dot(mean_diff) 36 | 37 | def predict(self, X): 38 | y_pred = [] 39 | for sample in X: 40 | h = sample.dot(self.w) 41 | y = 1 * (h < 0) 42 | y_pred.append(y) 43 | return y_pred 44 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/logistic_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import math 4 | from mlfromscratch.utils import make_diagonal, Plot 5 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 6 | 7 | 8 | class LogisticRegression(): 9 | """ Logistic Regression classifier. 10 | Parameters: 11 | ----------- 12 | learning_rate: float 13 | The step length that will be taken when following the negative gradient during 14 | training. 15 | gradient_descent: boolean 16 | True or false depending if gradient descent should be used when training. If 17 | false then we use batch optimization by least squares. 18 | """ 19 | def __init__(self, learning_rate=.1, gradient_descent=True): 20 | self.param = None 21 | self.learning_rate = learning_rate 22 | self.gradient_descent = gradient_descent 23 | self.sigmoid = Sigmoid() 24 | 25 | def _initialize_parameters(self, X): 26 | n_features = np.shape(X)[1] 27 | # Initialize parameters between [-1/sqrt(N), 1/sqrt(N)] 28 | limit = 1 / math.sqrt(n_features) 29 | self.param = np.random.uniform(-limit, limit, (n_features,)) 30 | 31 | def fit(self, X, y, n_iterations=4000): 32 | self._initialize_parameters(X) 33 | # Tune parameters for n iterations 34 | for i in range(n_iterations): 35 | # Make a new prediction 36 | y_pred = self.sigmoid(X.dot(self.param)) 37 | if self.gradient_descent: 38 | # Move against the gradient of the loss function with 39 | # respect to the parameters to minimize the loss 40 | self.param -= self.learning_rate * -(y - y_pred).dot(X) 41 | else: 42 | # Make a diagonal matrix of the sigmoid gradient column vector 43 | diag_gradient = make_diagonal(self.sigmoid.gradient(X.dot(self.param))) 44 | # Batch opt: 45 | self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(X.T).dot(diag_gradient.dot(X).dot(self.param) + y - y_pred) 46 | 47 | def predict(self, X): 48 | y_pred = np.round(self.sigmoid(X.dot(self.param))).astype(int) 49 | return y_pred 50 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/multi_class_lda.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize 5 | 6 | 7 | class MultiClassLDA(): 8 | """Enables dimensionality reduction for multiple 9 | class distributions. It transforms the features space into a space where 10 | the between class scatter is maximized and the within class scatter is 11 | minimized. 12 | 13 | Parameters: 14 | ----------- 15 | solver: str 16 | If 'svd' we use the pseudo-inverse to calculate the inverse of matrices 17 | when doing the transformation. 18 | """ 19 | def __init__(self, solver="svd"): 20 | self.solver = solver 21 | 22 | def _calculate_scatter_matrices(self, X, y): 23 | n_features = np.shape(X)[1] 24 | labels = np.unique(y) 25 | 26 | # Within class scatter matrix: 27 | # SW = sum{ (X_for_class - mean_of_X_for_class)^2 } 28 | # <=> (n_samples_X_for_class - 1) * covar(X_for_class) 29 | SW = np.empty((n_features, n_features)) 30 | for label in labels: 31 | _X = X[y == label] 32 | SW += (len(_X) - 1) * calculate_covariance_matrix(_X) 33 | 34 | # Between class scatter: 35 | # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 } 36 | total_mean = np.mean(X, axis=0) 37 | SB = np.empty((n_features, n_features)) 38 | for label in labels: 39 | _X = X[y == label] 40 | _mean = np.mean(_X, axis=0) 41 | SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T) 42 | 43 | return SW, SB 44 | 45 | def transform(self, X, y, n_components): 46 | SW, SB = self._calculate_scatter_matrices(X, y) 47 | 48 | # Determine SW^-1 * SB by calculating inverse of SW 49 | A = np.linalg.inv(SW).dot(SB) 50 | 51 | # Get eigenvalues and eigenvectors of SW^-1 * SB 52 | eigenvalues, eigenvectors = np.linalg.eigh(A) 53 | 54 | # Sort the eigenvalues and corresponding eigenvectors from largest 55 | # to smallest eigenvalue and select the first n_components 56 | idx = eigenvalues.argsort()[::-1] 57 | eigenvalues = eigenvalues[idx][:n_components] 58 | eigenvectors = eigenvectors[:, idx][:, :n_components] 59 | 60 | # Project the data onto eigenvectors 61 | X_transformed = X.dot(eigenvectors) 62 | 63 | return X_transformed 64 | 65 | 66 | def plot_in_2d(self, X, y, title=None): 67 | """ Plot the dataset X and the corresponding labels y in 2D using the LDA 68 | transformation.""" 69 | X_transformed = self.transform(X, y, n_components=2) 70 | x1 = X_transformed[:, 0] 71 | x2 = X_transformed[:, 1] 72 | plt.scatter(x1, x2, c=y) 73 | if title: plt.title(title) 74 | plt.show() 75 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/multilayer_perceptron.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import math 4 | from sklearn import datasets 5 | 6 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score, Plot 7 | from mlfromscratch.deep_learning.activation_functions import Sigmoid, Softmax 8 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 9 | 10 | class MultilayerPerceptron(): 11 | """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. 12 | Unrolled to display the whole forward and backward pass. 13 | 14 | Parameters: 15 | ----------- 16 | n_hidden: int: 17 | The number of processing nodes (neurons) in the hidden layer. 18 | n_iterations: float 19 | The number of training iterations the algorithm will tune the weights for. 20 | learning_rate: float 21 | The step length that will be used when updating the weights. 22 | """ 23 | def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): 24 | self.n_hidden = n_hidden 25 | self.n_iterations = n_iterations 26 | self.learning_rate = learning_rate 27 | self.hidden_activation = Sigmoid() 28 | self.output_activation = Softmax() 29 | self.loss = CrossEntropy() 30 | 31 | def _initialize_weights(self, X, y): 32 | n_samples, n_features = X.shape 33 | _, n_outputs = y.shape 34 | # Hidden layer 35 | limit = 1 / math.sqrt(n_features) 36 | self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) 37 | self.w0 = np.zeros((1, self.n_hidden)) 38 | # Output layer 39 | limit = 1 / math.sqrt(self.n_hidden) 40 | self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) 41 | self.v0 = np.zeros((1, n_outputs)) 42 | 43 | def fit(self, X, y): 44 | 45 | self._initialize_weights(X, y) 46 | 47 | for i in range(self.n_iterations): 48 | 49 | # .............. 50 | # Forward Pass 51 | # .............. 52 | 53 | # HIDDEN LAYER 54 | hidden_input = X.dot(self.W) + self.w0 55 | hidden_output = self.hidden_activation(hidden_input) 56 | # OUTPUT LAYER 57 | output_layer_input = hidden_output.dot(self.V) + self.v0 58 | y_pred = self.output_activation(output_layer_input) 59 | 60 | # ............... 61 | # Backward Pass 62 | # ............... 63 | 64 | # OUTPUT LAYER 65 | # Grad. w.r.t input of output layer 66 | grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) 67 | grad_v = hidden_output.T.dot(grad_wrt_out_l_input) 68 | grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) 69 | # HIDDEN LAYER 70 | # Grad. w.r.t input of hidden layer 71 | grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input) 72 | grad_w = X.T.dot(grad_wrt_hidden_l_input) 73 | grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) 74 | 75 | # Update weights (by gradient descent) 76 | # Move against the gradient to minimize loss 77 | self.V -= self.learning_rate * grad_v 78 | self.v0 -= self.learning_rate * grad_v0 79 | self.W -= self.learning_rate * grad_w 80 | self.w0 -= self.learning_rate * grad_w0 81 | 82 | # Use the trained model to predict labels of X 83 | def predict(self, X): 84 | # Forward pass: 85 | hidden_input = X.dot(self.W) + self.w0 86 | hidden_output = self.hidden_activation(hidden_input) 87 | output_layer_input = hidden_output.dot(self.V) + self.v0 88 | y_pred = self.output_activation(output_layer_input) 89 | return y_pred 90 | 91 | 92 | def main(): 93 | data = datasets.load_digits() 94 | X = normalize(data.data) 95 | y = data.target 96 | 97 | # Convert the nominal y values to binary 98 | y = to_categorical(y) 99 | 100 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) 101 | 102 | # MLP 103 | clf = MultilayerPerceptron(n_hidden=16, 104 | n_iterations=1000, 105 | learning_rate=0.01) 106 | 107 | clf.fit(X_train, y_train) 108 | y_pred = np.argmax(clf.predict(X_test), axis=1) 109 | y_test = np.argmax(y_test, axis=1) 110 | 111 | accuracy = accuracy_score(y_test, y_pred) 112 | print ("Accuracy:", accuracy) 113 | 114 | # Reduce dimension to two using PCA and plot the results 115 | Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y)) 116 | 117 | if __name__ == "__main__": 118 | main() -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/naive_bayes.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import math 4 | from mlfromscratch.utils import train_test_split, normalize 5 | from mlfromscratch.utils import Plot, accuracy_score 6 | 7 | class NaiveBayes(): 8 | """The Gaussian Naive Bayes classifier. """ 9 | def fit(self, X, y): 10 | self.X, self.y = X, y 11 | self.classes = np.unique(y) 12 | self.parameters = [] 13 | # Calculate the mean and variance of each feature for each class 14 | for i, c in enumerate(self.classes): 15 | # Only select the rows where the label equals the given class 16 | X_where_c = X[np.where(y == c)] 17 | self.parameters.append([]) 18 | # Add the mean and variance for each feature (column) 19 | for col in X_where_c.T: 20 | parameters = {"mean": col.mean(), "var": col.var()} 21 | self.parameters[i].append(parameters) 22 | 23 | def _calculate_likelihood(self, mean, var, x): 24 | """ Gaussian likelihood of the data x given mean and var """ 25 | eps = 1e-4 # Added in denominator to prevent division by zero 26 | coeff = 1.0 / math.sqrt(2.0 * math.pi * var + eps) 27 | exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + eps))) 28 | return coeff * exponent 29 | 30 | def _calculate_prior(self, c): 31 | """ Calculate the prior of class c 32 | (samples where class == c / total number of samples)""" 33 | frequency = np.mean(self.y == c) 34 | return frequency 35 | 36 | def _classify(self, sample): 37 | """ Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X), 38 | or Posterior = Likelihood * Prior / Scaling Factor 39 | 40 | P(Y|X) - The posterior is the probability that sample x is of class y given the 41 | feature values of x being distributed according to distribution of y and the prior. 42 | P(X|Y) - Likelihood of data X given class distribution Y. 43 | Gaussian distribution (given by _calculate_likelihood) 44 | P(Y) - Prior (given by _calculate_prior) 45 | P(X) - Scales the posterior to make it a proper probability distribution. 46 | This term is ignored in this implementation since it doesn't affect 47 | which class distribution the sample is most likely to belong to. 48 | 49 | Classifies the sample as the class that results in the largest P(Y|X) (posterior) 50 | """ 51 | posteriors = [] 52 | # Go through list of classes 53 | for i, c in enumerate(self.classes): 54 | # Initialize posterior as prior 55 | posterior = self._calculate_prior(c) 56 | # Naive assumption (independence): 57 | # P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y) 58 | # Posterior is product of prior and likelihoods (ignoring scaling factor) 59 | for feature_value, params in zip(sample, self.parameters[i]): 60 | # Likelihood of feature value given distribution of feature values given y 61 | likelihood = self._calculate_likelihood(params["mean"], params["var"], feature_value) 62 | posterior *= likelihood 63 | posteriors.append(posterior) 64 | # Return the class with the largest posterior probability 65 | return self.classes[np.argmax(posteriors)] 66 | 67 | def predict(self, X): 68 | """ Predict the class labels of the samples in X """ 69 | y_pred = [self._classify(sample) for sample in X] 70 | return y_pred 71 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/neuroevolution.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import copy 4 | 5 | class Neuroevolution(): 6 | """ Evolutionary optimization of Neural Networks. 7 | 8 | Parameters: 9 | ----------- 10 | n_individuals: int 11 | The number of neural networks that are allowed in the population at a time. 12 | mutation_rate: float 13 | The probability that a weight will be mutated. 14 | model_builder: method 15 | A method which returns a user specified NeuralNetwork instance. 16 | """ 17 | def __init__(self, population_size, mutation_rate, model_builder): 18 | self.population_size = population_size 19 | self.mutation_rate = mutation_rate 20 | self.model_builder = model_builder 21 | 22 | def _build_model(self, id): 23 | """ Returns a new individual """ 24 | model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1]) 25 | model.id = id 26 | model.fitness = 0 27 | model.accuracy = 0 28 | 29 | return model 30 | 31 | def _initialize_population(self): 32 | """ Initialization of the neural networks forming the population""" 33 | self.population = [] 34 | for _ in range(self.population_size): 35 | model = self._build_model(id=np.random.randint(1000)) 36 | self.population.append(model) 37 | 38 | def _mutate(self, individual, var=1): 39 | """ Add zero mean gaussian noise to the layer weights with probability mutation_rate """ 40 | for layer in individual.layers: 41 | if hasattr(layer, 'W'): 42 | # Mutation of weight with probability self.mutation_rate 43 | mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.W.shape) 44 | layer.W += np.random.normal(loc=0, scale=var, size=layer.W.shape) * mutation_mask 45 | mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.w0.shape) 46 | layer.w0 += np.random.normal(loc=0, scale=var, size=layer.w0.shape) * mutation_mask 47 | 48 | return individual 49 | 50 | def _inherit_weights(self, child, parent): 51 | """ Copies the weights from parent to child """ 52 | for i in range(len(child.layers)): 53 | if hasattr(child.layers[i], 'W'): 54 | # The child inherits both weights W and bias weights w0 55 | child.layers[i].W = parent.layers[i].W.copy() 56 | child.layers[i].w0 = parent.layers[i].w0.copy() 57 | 58 | def _crossover(self, parent1, parent2): 59 | """ Performs crossover between the neurons in parent1 and parent2 to form offspring """ 60 | child1 = self._build_model(id=parent1.id+1) 61 | self._inherit_weights(child1, parent1) 62 | child2 = self._build_model(id=parent2.id+1) 63 | self._inherit_weights(child2, parent2) 64 | 65 | # Perform crossover 66 | for i in range(len(child1.layers)): 67 | if hasattr(child1.layers[i], 'W'): 68 | n_neurons = child1.layers[i].W.shape[1] 69 | # Perform crossover between the individuals' neuron weights 70 | cutoff = np.random.randint(0, n_neurons) 71 | child1.layers[i].W[:, cutoff:] = parent2.layers[i].W[:, cutoff:].copy() 72 | child1.layers[i].w0[:, cutoff:] = parent2.layers[i].w0[:, cutoff:].copy() 73 | child2.layers[i].W[:, cutoff:] = parent1.layers[i].W[:, cutoff:].copy() 74 | child2.layers[i].w0[:, cutoff:] = parent1.layers[i].w0[:, cutoff:].copy() 75 | 76 | return child1, child2 77 | 78 | def _calculate_fitness(self): 79 | """ Evaluate the NNs on the test set to get fitness scores """ 80 | for individual in self.population: 81 | loss, acc = individual.test_on_batch(self.X, self.y) 82 | individual.fitness = 1 / (loss + 1e-8) 83 | individual.accuracy = acc 84 | 85 | def evolve(self, X, y, n_generations): 86 | """ Will evolve the population for n_generations based on dataset X and labels y""" 87 | self.X, self.y = X, y 88 | 89 | self._initialize_population() 90 | 91 | # The 40% highest fittest individuals will be selected for the next generation 92 | n_winners = int(self.population_size * 0.4) 93 | # The fittest 60% of the population will be selected as parents to form offspring 94 | n_parents = self.population_size - n_winners 95 | 96 | for epoch in range(n_generations): 97 | # Determine the fitness of the individuals in the population 98 | self._calculate_fitness() 99 | 100 | # Sort population by fitness 101 | sorted_i = np.argsort([model.fitness for model in self.population])[::-1] 102 | self.population = [self.population[i] for i in sorted_i] 103 | 104 | # Get the individual with the highest fitness 105 | fittest_individual = self.population[0] 106 | print ("[%d Best Individual - Fitness: %.5f, Accuracy: %.1f%%]" % (epoch, 107 | fittest_individual.fitness, 108 | float(100*fittest_individual.accuracy))) 109 | # The 'winners' are selected for the next generation 110 | next_population = [self.population[i] for i in range(n_winners)] 111 | 112 | total_fitness = np.sum([model.fitness for model in self.population]) 113 | # The probability that a individual will be selected as a parent is proportionate to its fitness 114 | parent_probabilities = [model.fitness / total_fitness for model in self.population] 115 | # Select parents according to probabilities (without replacement to preserve diversity) 116 | parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False) 117 | for i in np.arange(0, len(parents), 2): 118 | # Perform crossover to produce offspring 119 | child1, child2 = self._crossover(parents[i], parents[i+1]) 120 | # Save mutated offspring for next population 121 | next_population += [self._mutate(child1), self._mutate(child2)] 122 | 123 | self.population = next_population 124 | 125 | return fittest_individual 126 | 127 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/particle_swarm_optimization.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | import copy 4 | 5 | class ParticleSwarmOptimizedNN(): 6 | """ Particle Swarm Optimization of Neural Network. 7 | 8 | Parameters: 9 | ----------- 10 | n_individuals: int 11 | The number of neural networks that are allowed in the population at a time. 12 | model_builder: method 13 | A method which returns a user specified NeuralNetwork instance. 14 | inertia_weight: float [0,1) 15 | cognitive_weight: float [0,1) 16 | social_weight: float [0,1) 17 | max_velocity: float 18 | The maximum allowed value for the velocity. 19 | 20 | Reference: 21 | Neural Network Training Using Particle Swarm Optimization 22 | https://visualstudiomagazine.com/articles/2013/12/01/neural-network-training-using-particle-swarm-optimization.aspx 23 | """ 24 | def __init__(self, population_size, 25 | model_builder, 26 | inertia_weight=0.8, 27 | cognitive_weight=2, 28 | social_weight=2, 29 | max_velocity=20): 30 | self.population_size = population_size 31 | self.model_builder = model_builder 32 | self.best_individual = None 33 | # Parameters used to update velocity 34 | self.cognitive_w = cognitive_weight 35 | self.inertia_w = inertia_weight 36 | self.social_w = social_weight 37 | self.min_v = -max_velocity 38 | self.max_v = max_velocity 39 | 40 | def _build_model(self, id): 41 | """ Returns a new individual """ 42 | model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1]) 43 | model.id = id 44 | model.fitness = 0 45 | model.highest_fitness = 0 46 | model.accuracy = 0 47 | # Set intial best as the current initialization 48 | model.best_layers = copy.copy(model.layers) 49 | 50 | # Set initial velocity to zero 51 | model.velocity = [] 52 | for layer in model.layers: 53 | velocity = {"W": 0, "w0": 0} 54 | if hasattr(layer, 'W'): 55 | velocity = {"W": np.zeros_like(layer.W), "w0": np.zeros_like(layer.w0)} 56 | model.velocity.append(velocity) 57 | 58 | return model 59 | 60 | def _initialize_population(self): 61 | """ Initialization of the neural networks forming the population""" 62 | self.population = [] 63 | for i in range(self.population_size): 64 | model = self._build_model(id=i) 65 | self.population.append(model) 66 | 67 | def _update_weights(self, individual): 68 | """ Calculate the new velocity and update weights for each layer """ 69 | # Two random parameters used to update the velocity 70 | r1 = np.random.uniform() 71 | r2 = np.random.uniform() 72 | for i, layer in enumerate(individual.layers): 73 | if hasattr(layer, 'W'): 74 | # Layer weights velocity 75 | first_term_W = self.inertia_w * individual.velocity[i]["W"] 76 | second_term_W = self.cognitive_w * r1 * (individual.best_layers[i].W - layer.W) 77 | third_term_W = self.social_w * r2 * (self.best_individual.layers[i].W - layer.W) 78 | new_velocity = first_term_W + second_term_W + third_term_W 79 | individual.velocity[i]["W"] = np.clip(new_velocity, self.min_v, self.max_v) 80 | 81 | # Bias weight velocity 82 | first_term_w0 = self.inertia_w * individual.velocity[i]["w0"] 83 | second_term_w0 = self.cognitive_w * r1 * (individual.best_layers[i].w0 - layer.w0) 84 | third_term_w0 = self.social_w * r2 * (self.best_individual.layers[i].w0 - layer.w0) 85 | new_velocity = first_term_w0 + second_term_w0 + third_term_w0 86 | individual.velocity[i]["w0"] = np.clip(new_velocity, self.min_v, self.max_v) 87 | 88 | # Update layer weights with velocity 89 | individual.layers[i].W += individual.velocity[i]["W"] 90 | individual.layers[i].w0 += individual.velocity[i]["w0"] 91 | 92 | def _calculate_fitness(self, individual): 93 | """ Evaluate the individual on the test set to get fitness scores """ 94 | loss, acc = individual.test_on_batch(self.X, self.y) 95 | individual.fitness = 1 / (loss + 1e-8) 96 | individual.accuracy = acc 97 | 98 | def evolve(self, X, y, n_generations): 99 | """ Will evolve the population for n_generations based on dataset X and labels y""" 100 | self.X, self.y = X, y 101 | 102 | self._initialize_population() 103 | 104 | # The best individual of the population is initialized as population's first ind. 105 | self.best_individual = copy.copy(self.population[0]) 106 | 107 | for epoch in range(n_generations): 108 | for individual in self.population: 109 | # Calculate new velocity and update the NN weights 110 | self._update_weights(individual) 111 | # Calculate the fitness of the updated individual 112 | self._calculate_fitness(individual) 113 | 114 | # If the current fitness is higher than the individual's previous highest 115 | # => update the individual's best layer setup 116 | if individual.fitness > individual.highest_fitness: 117 | individual.best_layers = copy.copy(individual.layers) 118 | individual.highest_fitness = individual.fitness 119 | # If the individual's fitness is higher than the highest recorded fitness for the 120 | # whole population => update the best individual 121 | if individual.fitness > self.best_individual.fitness: 122 | self.best_individual = copy.copy(individual) 123 | 124 | print ("[%d Best Individual - ID: %d Fitness: %.5f, Accuracy: %.1f%%]" % (epoch, 125 | self.best_individual.id, 126 | self.best_individual.fitness, 127 | 100*float(self.best_individual.accuracy))) 128 | return self.best_individual 129 | 130 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/perceptron.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import math 3 | import numpy as np 4 | 5 | # Import helper functions 6 | from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score 7 | from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU, TanH, ELU 8 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss 9 | from mlfromscratch.utils import Plot 10 | from mlfromscratch.utils.misc import bar_widgets 11 | import progressbar 12 | 13 | class Perceptron(): 14 | """The Perceptron. One layer neural network classifier. 15 | 16 | Parameters: 17 | ----------- 18 | n_iterations: float 19 | The number of training iterations the algorithm will tune the weights for. 20 | activation_function: class 21 | The activation that shall be used for each neuron. 22 | Possible choices: Sigmoid, ExpLU, ReLU, LeakyReLU, SoftPlus, TanH 23 | loss: class 24 | The loss function used to assess the model's performance. 25 | Possible choices: SquareLoss, CrossEntropy 26 | learning_rate: float 27 | The step length that will be used when updating the weights. 28 | """ 29 | def __init__(self, n_iterations=20000, activation_function=Sigmoid, loss=SquareLoss, learning_rate=0.01): 30 | self.n_iterations = n_iterations 31 | self.learning_rate = learning_rate 32 | self.loss = loss() 33 | self.activation_func = activation_function() 34 | self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) 35 | 36 | def fit(self, X, y): 37 | n_samples, n_features = np.shape(X) 38 | _, n_outputs = np.shape(y) 39 | 40 | # Initialize weights between [-1/sqrt(N), 1/sqrt(N)] 41 | limit = 1 / math.sqrt(n_features) 42 | self.W = np.random.uniform(-limit, limit, (n_features, n_outputs)) 43 | self.w0 = np.zeros((1, n_outputs)) 44 | 45 | for i in self.progressbar(range(self.n_iterations)): 46 | # Calculate outputs 47 | linear_output = X.dot(self.W) + self.w0 48 | y_pred = self.activation_func(linear_output) 49 | # Calculate the loss gradient w.r.t the input of the activation function 50 | error_gradient = self.loss.gradient(y, y_pred) * self.activation_func.gradient(linear_output) 51 | # Calculate the gradient of the loss with respect to each weight 52 | grad_wrt_w = X.T.dot(error_gradient) 53 | grad_wrt_w0 = np.sum(error_gradient, axis=0, keepdims=True) 54 | # Update weights 55 | self.W -= self.learning_rate * grad_wrt_w 56 | self.w0 -= self.learning_rate * grad_wrt_w0 57 | 58 | # Use the trained model to predict labels of X 59 | def predict(self, X): 60 | y_pred = self.activation_func(X.dot(self.W) + self.w0) 61 | return y_pred 62 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/random_forest.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import math 4 | import progressbar 5 | 6 | # Import helper functions 7 | from mlfromscratch.utils import divide_on_feature, train_test_split, get_random_subsets, normalize 8 | from mlfromscratch.utils import accuracy_score, calculate_entropy 9 | from mlfromscratch.unsupervised_learning import PCA 10 | from mlfromscratch.supervised_learning import ClassificationTree 11 | from mlfromscratch.utils.misc import bar_widgets 12 | from mlfromscratch.utils import Plot 13 | 14 | 15 | class RandomForest(): 16 | """Random Forest classifier. Uses a collection of classification trees that 17 | trains on random subsets of the data using a random subsets of the features. 18 | 19 | Parameters: 20 | ----------- 21 | n_estimators: int 22 | The number of classification trees that are used. 23 | max_features: int 24 | The maximum number of features that the classification trees are allowed to 25 | use. 26 | min_samples_split: int 27 | The minimum number of samples needed to make a split when building a tree. 28 | min_gain: float 29 | The minimum impurity required to split the tree further. 30 | max_depth: int 31 | The maximum depth of a tree. 32 | """ 33 | def __init__(self, n_estimators=100, max_features=None, min_samples_split=2, 34 | min_gain=0, max_depth=float("inf")): 35 | self.n_estimators = n_estimators # Number of trees 36 | self.max_features = max_features # Maxmimum number of features per tree 37 | self.min_samples_split = min_samples_split 38 | self.min_gain = min_gain # Minimum information gain req. to continue 39 | self.max_depth = max_depth # Maximum depth for tree 40 | self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) 41 | 42 | # Initialize decision trees 43 | self.trees = [] 44 | for _ in range(n_estimators): 45 | self.trees.append( 46 | ClassificationTree( 47 | min_samples_split=self.min_samples_split, 48 | min_impurity=min_gain, 49 | max_depth=self.max_depth)) 50 | 51 | def fit(self, X, y): 52 | n_features = np.shape(X)[1] 53 | # If max_features have not been defined => select it as 54 | # sqrt(n_features) 55 | if not self.max_features: 56 | self.max_features = int(math.sqrt(n_features)) 57 | 58 | # Choose one random subset of the data for each tree 59 | subsets = get_random_subsets(X, y, self.n_estimators) 60 | 61 | for i in self.progressbar(range(self.n_estimators)): 62 | X_subset, y_subset = subsets[i] 63 | # Feature bagging (select random subsets of the features) 64 | idx = np.random.choice(range(n_features), size=self.max_features, replace=True) 65 | # Save the indices of the features for prediction 66 | self.trees[i].feature_indices = idx 67 | # Choose the features corresponding to the indices 68 | X_subset = X_subset[:, idx] 69 | # Fit the tree to the data 70 | self.trees[i].fit(X_subset, y_subset) 71 | 72 | def predict(self, X): 73 | y_preds = np.empty((X.shape[0], len(self.trees))) 74 | # Let each tree make a prediction on the data 75 | for i, tree in enumerate(self.trees): 76 | # Indices of the features that the tree has trained on 77 | idx = tree.feature_indices 78 | # Make a prediction based on those features 79 | prediction = tree.predict(X[:, idx]) 80 | y_preds[:, i] = prediction 81 | 82 | y_pred = [] 83 | # For each sample 84 | for sample_predictions in y_preds: 85 | # Select the most common class prediction 86 | y_pred.append(np.bincount(sample_predictions.astype('int')).argmax()) 87 | return y_pred 88 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/support_vector_machine.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division, print_function 3 | import numpy as np 4 | import cvxopt 5 | from mlfromscratch.utils import train_test_split, normalize, accuracy_score 6 | from mlfromscratch.utils.kernels import * 7 | from mlfromscratch.utils import Plot 8 | 9 | # Hide cvxopt output 10 | cvxopt.solvers.options['show_progress'] = False 11 | 12 | class SupportVectorMachine(object): 13 | """The Support Vector Machine classifier. 14 | Uses cvxopt to solve the quadratic optimization problem. 15 | 16 | Parameters: 17 | ----------- 18 | C: float 19 | Penalty term. 20 | kernel: function 21 | Kernel function. Can be either polynomial, rbf or linear. 22 | power: int 23 | The degree of the polynomial kernel. Will be ignored by the other 24 | kernel functions. 25 | gamma: float 26 | Used in the rbf kernel function. 27 | coef: float 28 | Bias term used in the polynomial kernel function. 29 | """ 30 | def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4): 31 | self.C = C 32 | self.kernel = kernel 33 | self.power = power 34 | self.gamma = gamma 35 | self.coef = coef 36 | self.lagr_multipliers = None 37 | self.support_vectors = None 38 | self.support_vector_labels = None 39 | self.intercept = None 40 | 41 | def fit(self, X, y): 42 | 43 | n_samples, n_features = np.shape(X) 44 | 45 | # Set gamma to 1/n_features by default 46 | if not self.gamma: 47 | self.gamma = 1 / n_features 48 | 49 | # Initialize kernel method with parameters 50 | self.kernel = self.kernel( 51 | power=self.power, 52 | gamma=self.gamma, 53 | coef=self.coef) 54 | 55 | # Calculate kernel matrix 56 | kernel_matrix = np.zeros((n_samples, n_samples)) 57 | for i in range(n_samples): 58 | for j in range(n_samples): 59 | kernel_matrix[i, j] = self.kernel(X[i], X[j]) 60 | 61 | # Define the quadratic optimization problem 62 | P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d') 63 | q = cvxopt.matrix(np.ones(n_samples) * -1) 64 | A = cvxopt.matrix(y, (1, n_samples), tc='d') 65 | b = cvxopt.matrix(0, tc='d') 66 | 67 | if not self.C: 68 | G = cvxopt.matrix(np.identity(n_samples) * -1) 69 | h = cvxopt.matrix(np.zeros(n_samples)) 70 | else: 71 | G_max = np.identity(n_samples) * -1 72 | G_min = np.identity(n_samples) 73 | G = cvxopt.matrix(np.vstack((G_max, G_min))) 74 | h_max = cvxopt.matrix(np.zeros(n_samples)) 75 | h_min = cvxopt.matrix(np.ones(n_samples) * self.C) 76 | h = cvxopt.matrix(np.vstack((h_max, h_min))) 77 | 78 | # Solve the quadratic optimization problem using cvxopt 79 | minimization = cvxopt.solvers.qp(P, q, G, h, A, b) 80 | 81 | # Lagrange multipliers 82 | lagr_mult = np.ravel(minimization['x']) 83 | 84 | # Extract support vectors 85 | # Get indexes of non-zero lagr. multipiers 86 | idx = lagr_mult > 1e-7 87 | # Get the corresponding lagr. multipliers 88 | self.lagr_multipliers = lagr_mult[idx] 89 | # Get the samples that will act as support vectors 90 | self.support_vectors = X[idx] 91 | # Get the corresponding labels 92 | self.support_vector_labels = y[idx] 93 | 94 | # Calculate intercept with first support vector 95 | self.intercept = self.support_vector_labels[0] 96 | for i in range(len(self.lagr_multipliers)): 97 | self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[ 98 | i] * self.kernel(self.support_vectors[i], self.support_vectors[0]) 99 | 100 | def predict(self, X): 101 | y_pred = [] 102 | # Iterate through list of samples and make predictions 103 | for sample in X: 104 | prediction = 0 105 | # Determine the label of the sample by the support vectors 106 | for i in range(len(self.lagr_multipliers)): 107 | prediction += self.lagr_multipliers[i] * self.support_vector_labels[ 108 | i] * self.kernel(self.support_vectors[i], sample) 109 | prediction += self.intercept 110 | y_pred.append(np.sign(prediction)) 111 | return np.array(y_pred) 112 | -------------------------------------------------------------------------------- /mlfromscratch/supervised_learning/xgboost.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import numpy as np 3 | import progressbar 4 | 5 | from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize 6 | from mlfromscratch.utils import mean_squared_error, accuracy_score 7 | from mlfromscratch.supervised_learning import XGBoostRegressionTree 8 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 9 | from mlfromscratch.utils.misc import bar_widgets 10 | from mlfromscratch.utils import Plot 11 | 12 | 13 | class LogisticLoss(): 14 | def __init__(self): 15 | sigmoid = Sigmoid() 16 | self.log_func = sigmoid 17 | self.log_grad = sigmoid.gradient 18 | 19 | def loss(self, y, y_pred): 20 | y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15) 21 | p = self.log_func(y_pred) 22 | return y * np.log(p) + (1 - y) * np.log(1 - p) 23 | 24 | # gradient w.r.t y_pred 25 | def gradient(self, y, y_pred): 26 | p = self.log_func(y_pred) 27 | return -(y - p) 28 | 29 | # w.r.t y_pred 30 | def hess(self, y, y_pred): 31 | p = self.log_func(y_pred) 32 | return p * (1 - p) 33 | 34 | 35 | class XGBoost(object): 36 | """The XGBoost classifier. 37 | 38 | Reference: http://xgboost.readthedocs.io/en/latest/model.html 39 | 40 | Parameters: 41 | ----------- 42 | n_estimators: int 43 | The number of classification trees that are used. 44 | learning_rate: float 45 | The step length that will be taken when following the negative gradient during 46 | training. 47 | min_samples_split: int 48 | The minimum number of samples needed to make a split when building a tree. 49 | min_impurity: float 50 | The minimum impurity required to split the tree further. 51 | max_depth: int 52 | The maximum depth of a tree. 53 | """ 54 | def __init__(self, n_estimators=200, learning_rate=0.001, min_samples_split=2, 55 | min_impurity=1e-7, max_depth=2): 56 | self.n_estimators = n_estimators # Number of trees 57 | self.learning_rate = learning_rate # Step size for weight update 58 | self.min_samples_split = min_samples_split # The minimum n of sampels to justify split 59 | self.min_impurity = min_impurity # Minimum variance reduction to continue 60 | self.max_depth = max_depth # Maximum depth for tree 61 | 62 | self.bar = progressbar.ProgressBar(widgets=bar_widgets) 63 | 64 | # Log loss for classification 65 | self.loss = LogisticLoss() 66 | 67 | # Initialize regression trees 68 | self.trees = [] 69 | for _ in range(n_estimators): 70 | tree = XGBoostRegressionTree( 71 | min_samples_split=self.min_samples_split, 72 | min_impurity=min_impurity, 73 | max_depth=self.max_depth, 74 | loss=self.loss) 75 | 76 | self.trees.append(tree) 77 | 78 | def fit(self, X, y): 79 | y = to_categorical(y) 80 | 81 | y_pred = np.zeros(np.shape(y)) 82 | for i in self.bar(range(self.n_estimators)): 83 | tree = self.trees[i] 84 | y_and_pred = np.concatenate((y, y_pred), axis=1) 85 | tree.fit(X, y_and_pred) 86 | update_pred = tree.predict(X) 87 | 88 | y_pred -= np.multiply(self.learning_rate, update_pred) 89 | 90 | def predict(self, X): 91 | y_pred = None 92 | # Make predictions 93 | for tree in self.trees: 94 | # Estimate gradient and update prediction 95 | update_pred = tree.predict(X) 96 | if y_pred is None: 97 | y_pred = np.zeros_like(update_pred) 98 | y_pred -= np.multiply(self.learning_rate, update_pred) 99 | 100 | # Turn into probability distribution (Softmax) 101 | y_pred = np.exp(y_pred) / np.sum(np.exp(y_pred), axis=1, keepdims=True) 102 | # Set label to the value that maximizes probability 103 | y_pred = np.argmax(y_pred, axis=1) 104 | return y_pred 105 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .principal_component_analysis import PCA 2 | from .apriori import Apriori 3 | from .dbscan import DBSCAN 4 | from .fp_growth import FPGrowth 5 | from .gaussian_mixture_model import GaussianMixtureModel 6 | from .genetic_algorithm import GeneticAlgorithm 7 | from .k_means import KMeans 8 | from .partitioning_around_medoids import PAM 9 | from .restricted_boltzmann_machine import RBM 10 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/autoencoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from sklearn import datasets 3 | import math 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import progressbar 7 | 8 | from sklearn.datasets import fetch_mldata 9 | 10 | from mlfromscratch.deep_learning.optimizers import Adam 11 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss 12 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization 13 | from mlfromscratch.deep_learning import NeuralNetwork 14 | 15 | 16 | class Autoencoder(): 17 | """An Autoencoder with deep fully-connected neural nets. 18 | 19 | Training Data: MNIST Handwritten Digits (28x28 images) 20 | """ 21 | def __init__(self): 22 | self.img_rows = 28 23 | self.img_cols = 28 24 | self.img_dim = self.img_rows * self.img_cols 25 | self.latent_dim = 128 # The dimension of the data embedding 26 | 27 | optimizer = Adam(learning_rate=0.0002, b1=0.5) 28 | loss_function = SquareLoss 29 | 30 | self.encoder = self.build_encoder(optimizer, loss_function) 31 | self.decoder = self.build_decoder(optimizer, loss_function) 32 | 33 | self.autoencoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) 34 | self.autoencoder.layers.extend(self.encoder.layers) 35 | self.autoencoder.layers.extend(self.decoder.layers) 36 | 37 | print () 38 | self.autoencoder.summary(name="Variational Autoencoder") 39 | 40 | def build_encoder(self, optimizer, loss_function): 41 | 42 | encoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) 43 | encoder.add(Dense(512, input_shape=(self.img_dim,))) 44 | encoder.add(Activation('leaky_relu')) 45 | encoder.add(BatchNormalization(momentum=0.8)) 46 | encoder.add(Dense(256)) 47 | encoder.add(Activation('leaky_relu')) 48 | encoder.add(BatchNormalization(momentum=0.8)) 49 | encoder.add(Dense(self.latent_dim)) 50 | 51 | return encoder 52 | 53 | def build_decoder(self, optimizer, loss_function): 54 | 55 | decoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) 56 | decoder.add(Dense(256, input_shape=(self.latent_dim,))) 57 | decoder.add(Activation('leaky_relu')) 58 | decoder.add(BatchNormalization(momentum=0.8)) 59 | decoder.add(Dense(512)) 60 | decoder.add(Activation('leaky_relu')) 61 | decoder.add(BatchNormalization(momentum=0.8)) 62 | decoder.add(Dense(self.img_dim)) 63 | decoder.add(Activation('tanh')) 64 | 65 | return decoder 66 | 67 | def train(self, n_epochs, batch_size=128, save_interval=50): 68 | 69 | mnist = fetch_mldata('MNIST original') 70 | 71 | X = mnist.data 72 | y = mnist.target 73 | 74 | # Rescale [-1, 1] 75 | X = (X.astype(np.float32) - 127.5) / 127.5 76 | 77 | for epoch in range(n_epochs): 78 | 79 | # Select a random half batch of images 80 | idx = np.random.randint(0, X.shape[0], batch_size) 81 | imgs = X[idx] 82 | 83 | # Train the Autoencoder 84 | loss, _ = self.autoencoder.train_on_batch(imgs, imgs) 85 | 86 | # Display the progress 87 | print ("%d [D loss: %f]" % (epoch, loss)) 88 | 89 | # If at save interval => save generated image samples 90 | if epoch % save_interval == 0: 91 | self.save_imgs(epoch, X) 92 | 93 | def save_imgs(self, epoch, X): 94 | r, c = 5, 5 # Grid size 95 | # Select a random half batch of images 96 | idx = np.random.randint(0, X.shape[0], r*c) 97 | imgs = X[idx] 98 | # Generate images and reshape to image shape 99 | gen_imgs = self.autoencoder.predict(imgs).reshape((-1, self.img_rows, self.img_cols)) 100 | 101 | # Rescale images 0 - 1 102 | gen_imgs = 0.5 * gen_imgs + 0.5 103 | 104 | fig, axs = plt.subplots(r, c) 105 | plt.suptitle("Autoencoder") 106 | cnt = 0 107 | for i in range(r): 108 | for j in range(c): 109 | axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray') 110 | axs[i,j].axis('off') 111 | cnt += 1 112 | fig.savefig("ae_%d.png" % epoch) 113 | plt.close() 114 | 115 | 116 | if __name__ == '__main__': 117 | ae = Autoencoder() 118 | ae.train(n_epochs=200000, batch_size=64, save_interval=400) 119 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/dbscan.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import Plot, euclidean_distance, normalize 4 | 5 | 6 | class DBSCAN(): 7 | """A density based clustering method that expands clusters from 8 | samples that have more neighbors within a radius specified by eps 9 | than the value min_samples. 10 | 11 | Parameters: 12 | ----------- 13 | eps: float 14 | The radius within which samples are considered neighbors 15 | min_samples: int 16 | The number of neighbors required for the sample to be a core point. 17 | """ 18 | def __init__(self, eps=1, min_samples=5): 19 | self.eps = eps 20 | self.min_samples = min_samples 21 | 22 | def _get_neighbors(self, sample_i): 23 | """ Return a list of indexes of neighboring samples 24 | A sample_2 is considered a neighbor of sample_1 if the distance between 25 | them is smaller than epsilon """ 26 | neighbors = [] 27 | idxs = np.arange(len(self.X)) 28 | for i, _sample in enumerate(self.X[idxs != sample_i]): 29 | distance = euclidean_distance(self.X[sample_i], _sample) 30 | if distance < self.eps: 31 | neighbors.append(i) 32 | return np.array(neighbors) 33 | 34 | def _expand_cluster(self, sample_i, neighbors): 35 | """ Recursive method which expands the cluster until we have reached the border 36 | of the dense area (density determined by eps and min_samples) """ 37 | cluster = [sample_i] 38 | # Iterate through neighbors 39 | for neighbor_i in neighbors: 40 | if not neighbor_i in self.visited_samples: 41 | self.visited_samples.append(neighbor_i) 42 | # Fetch the sample's distant neighbors (neighbors of neighbor) 43 | self.neighbors[neighbor_i] = self._get_neighbors(neighbor_i) 44 | # Make sure the neighbor's neighbors are more than min_samples 45 | # (If this is true the neighbor is a core point) 46 | if len(self.neighbors[neighbor_i]) >= self.min_samples: 47 | # Expand the cluster from the neighbor 48 | expanded_cluster = self._expand_cluster( 49 | neighbor_i, self.neighbors[neighbor_i]) 50 | # Add expanded cluster to this cluster 51 | cluster = cluster + expanded_cluster 52 | else: 53 | # If the neighbor is not a core point we only add the neighbor point 54 | cluster.append(neighbor_i) 55 | return cluster 56 | 57 | def _get_cluster_labels(self): 58 | """ Return the samples labels as the index of the cluster in which they are 59 | contained """ 60 | # Set default value to number of clusters 61 | # Will make sure all outliers have same cluster label 62 | labels = np.full(shape=self.X.shape[0], fill_value=len(self.clusters)) 63 | for cluster_i, cluster in enumerate(self.clusters): 64 | for sample_i in cluster: 65 | labels[sample_i] = cluster_i 66 | return labels 67 | 68 | # DBSCAN 69 | def predict(self, X): 70 | self.X = X 71 | self.clusters = [] 72 | self.visited_samples = [] 73 | self.neighbors = {} 74 | n_samples = np.shape(self.X)[0] 75 | # Iterate through samples and expand clusters from them 76 | # if they have more neighbors than self.min_samples 77 | for sample_i in range(n_samples): 78 | if sample_i in self.visited_samples: 79 | continue 80 | self.neighbors[sample_i] = self._get_neighbors(sample_i) 81 | if len(self.neighbors[sample_i]) >= self.min_samples: 82 | # If core point => mark as visited 83 | self.visited_samples.append(sample_i) 84 | # Sample has more neighbors than self.min_samples => expand 85 | # cluster from sample 86 | new_cluster = self._expand_cluster( 87 | sample_i, self.neighbors[sample_i]) 88 | # Add cluster to list of clusters 89 | self.clusters.append(new_cluster) 90 | 91 | # Get the resulting cluster labels 92 | cluster_labels = self._get_cluster_labels() 93 | return cluster_labels 94 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/dcgan.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import progressbar 5 | from sklearn.datasets import fetch_mldata 6 | 7 | from mlfromscratch.deep_learning.optimizers import Adam 8 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 9 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization, ZeroPadding2D, Conv2D, UpSampling2D 10 | from mlfromscratch.deep_learning import NeuralNetwork 11 | 12 | 13 | class DCGAN(): 14 | def __init__(self): 15 | self.img_rows = 28 16 | self.img_cols = 28 17 | self.channels = 1 18 | self.img_shape = (self.channels, self.img_rows, self.img_cols) 19 | self.latent_dim = 100 20 | 21 | optimizer = Adam(learning_rate=0.0002, b1=0.5) 22 | loss_function = CrossEntropy 23 | 24 | # Build the discriminator 25 | self.discriminator = self.build_discriminator(optimizer, loss_function) 26 | 27 | # Build the generator 28 | self.generator = self.build_generator(optimizer, loss_function) 29 | 30 | # Build the combined model 31 | self.combined = NeuralNetwork(optimizer=optimizer, loss=loss_function) 32 | self.combined.layers.extend(self.generator.layers) 33 | self.combined.layers.extend(self.discriminator.layers) 34 | 35 | print () 36 | self.generator.summary(name="Generator") 37 | self.discriminator.summary(name="Discriminator") 38 | 39 | def build_generator(self, optimizer, loss_function): 40 | 41 | model = NeuralNetwork(optimizer=optimizer, loss=loss_function) 42 | 43 | model.add(Dense(128 * 7 * 7, input_shape=(100,))) 44 | model.add(Activation('leaky_relu')) 45 | model.add(Reshape((128, 7, 7))) 46 | model.add(BatchNormalization(momentum=0.8)) 47 | model.add(UpSampling2D()) 48 | model.add(Conv2D(128, filter_shape=(3,3), padding='same')) 49 | model.add(Activation("leaky_relu")) 50 | model.add(BatchNormalization(momentum=0.8)) 51 | model.add(UpSampling2D()) 52 | model.add(Conv2D(64, filter_shape=(3,3), padding='same')) 53 | model.add(Activation("leaky_relu")) 54 | model.add(BatchNormalization(momentum=0.8)) 55 | model.add(Conv2D(1, filter_shape=(3,3), padding='same')) 56 | model.add(Activation("tanh")) 57 | 58 | return model 59 | 60 | def build_discriminator(self, optimizer, loss_function): 61 | 62 | model = NeuralNetwork(optimizer=optimizer, loss=loss_function) 63 | 64 | model.add(Conv2D(32, filter_shape=(3,3), stride=2, input_shape=self.img_shape, padding='same')) 65 | model.add(Activation('leaky_relu')) 66 | model.add(Dropout(0.25)) 67 | model.add(Conv2D(64, filter_shape=(3,3), stride=2, padding='same')) 68 | model.add(ZeroPadding2D(padding=((0,1),(0,1)))) 69 | model.add(Activation('leaky_relu')) 70 | model.add(Dropout(0.25)) 71 | model.add(BatchNormalization(momentum=0.8)) 72 | model.add(Conv2D(128, filter_shape=(3,3), stride=2, padding='same')) 73 | model.add(Activation('leaky_relu')) 74 | model.add(Dropout(0.25)) 75 | model.add(BatchNormalization(momentum=0.8)) 76 | model.add(Conv2D(256, filter_shape=(3,3), stride=1, padding='same')) 77 | model.add(Activation('leaky_relu')) 78 | model.add(Dropout(0.25)) 79 | model.add(Flatten()) 80 | model.add(Dense(2)) 81 | model.add(Activation('softmax')) 82 | 83 | return model 84 | 85 | 86 | def train(self, epochs, batch_size=128, save_interval=50): 87 | 88 | mnist = fetch_mldata('MNIST original') 89 | 90 | X = mnist.data.reshape((-1,) + self.img_shape) 91 | y = mnist.target 92 | 93 | # Rescale -1 to 1 94 | X = (X.astype(np.float32) - 127.5) / 127.5 95 | 96 | half_batch = int(batch_size / 2) 97 | 98 | for epoch in range(epochs): 99 | 100 | # --------------------- 101 | # Train Discriminator 102 | # --------------------- 103 | 104 | self.discriminator.set_trainable(True) 105 | 106 | # Select a random half batch of images 107 | idx = np.random.randint(0, X.shape[0], half_batch) 108 | imgs = X[idx] 109 | 110 | # Sample noise to use as generator input 111 | noise = np.random.normal(0, 1, (half_batch, 100)) 112 | 113 | # Generate a half batch of images 114 | gen_imgs = self.generator.predict(noise) 115 | 116 | valid = np.concatenate((np.ones((half_batch, 1)), np.zeros((half_batch, 1))), axis=1) 117 | fake = np.concatenate((np.zeros((half_batch, 1)), np.ones((half_batch, 1))), axis=1) 118 | 119 | # Train the discriminator 120 | d_loss_real, d_acc_real = self.discriminator.train_on_batch(imgs, valid) 121 | d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake) 122 | d_loss = 0.5 * (d_loss_real + d_loss_fake) 123 | d_acc = 0.5 * (d_acc_real + d_acc_fake) 124 | 125 | 126 | # --------------------- 127 | # Train Generator 128 | # --------------------- 129 | 130 | # We only want to train the generator for the combined model 131 | self.discriminator.set_trainable(False) 132 | 133 | # Sample noise and use as generator input 134 | noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) 135 | 136 | # The generator wants the discriminator to label the generated samples as valid 137 | valid = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))), axis=1) 138 | 139 | # Train the generator 140 | g_loss, g_acc = self.combined.train_on_batch(noise, valid) 141 | 142 | # Display the progress 143 | print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" % (epoch, d_loss, 100*d_acc, g_loss, 100*g_acc)) 144 | 145 | # If at save interval => save generated image samples 146 | if epoch % save_interval == 0: 147 | self.save_imgs(epoch) 148 | 149 | def save_imgs(self, epoch): 150 | r, c = 5, 5 151 | noise = np.random.normal(0, 1, (r * c, 100)) 152 | gen_imgs = self.generator.predict(noise) 153 | 154 | # Rescale images 0 - 1 (from -1 to 1) 155 | gen_imgs = 0.5 * (gen_imgs + 1) 156 | 157 | fig, axs = plt.subplots(r, c) 158 | plt.suptitle("Deep Convolutional Generative Adversarial Network") 159 | cnt = 0 160 | for i in range(r): 161 | for j in range(c): 162 | axs[i,j].imshow(gen_imgs[cnt,0,:,:], cmap='gray') 163 | axs[i,j].axis('off') 164 | cnt += 1 165 | fig.savefig("mnist_%d.png" % epoch) 166 | plt.close() 167 | 168 | 169 | if __name__ == '__main__': 170 | dcgan = DCGAN() 171 | dcgan.train(epochs=200000, batch_size=64, save_interval=50) 172 | 173 | 174 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/gaussian_mixture_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | import math 3 | from sklearn import datasets 4 | import numpy as np 5 | 6 | from mlfromscratch.utils import normalize, euclidean_distance, calculate_covariance_matrix 7 | from mlfromscratch.utils import Plot 8 | 9 | 10 | class GaussianMixtureModel(): 11 | """A probabilistic clustering method for determining groupings among data samples. 12 | 13 | Parameters: 14 | ----------- 15 | k: int 16 | The number of clusters the algorithm will form. 17 | max_iterations: int 18 | The number of iterations the algorithm will run for if it does 19 | not converge before that. 20 | tolerance: float 21 | If the difference of the results from one iteration to the next is 22 | smaller than this value we will say that the algorithm has converged. 23 | """ 24 | def __init__(self, k=2, max_iterations=2000, tolerance=1e-8): 25 | self.k = k 26 | self.parameters = [] 27 | self.max_iterations = max_iterations 28 | self.tolerance = tolerance 29 | self.responsibilities = [] 30 | self.sample_assignments = None 31 | self.responsibility = None 32 | 33 | def _init_random_gaussians(self, X): 34 | """ Initialize gaussian randomly """ 35 | n_samples = np.shape(X)[0] 36 | self.priors = (1 / self.k) * np.ones(self.k) 37 | for i in range(self.k): 38 | params = {} 39 | params["mean"] = X[np.random.choice(range(n_samples))] 40 | params["cov"] = calculate_covariance_matrix(X) 41 | self.parameters.append(params) 42 | 43 | def multivariate_gaussian(self, X, params): 44 | """ Likelihood """ 45 | n_features = np.shape(X)[1] 46 | mean = params["mean"] 47 | covar = params["cov"] 48 | determinant = np.linalg.det(covar) 49 | likelihoods = np.zeros(np.shape(X)[0]) 50 | for i, sample in enumerate(X): 51 | d = n_features # dimension 52 | coeff = (1.0 / (math.pow((2.0 * math.pi), d / 2) 53 | * math.sqrt(determinant))) 54 | exponent = math.exp(-0.5 * (sample - mean).T.dot(np.linalg.pinv(covar)).dot((sample - mean))) 55 | likelihoods[i] = coeff * exponent 56 | 57 | return likelihoods 58 | 59 | def _get_likelihoods(self, X): 60 | """ Calculate the likelihood over all samples """ 61 | n_samples = np.shape(X)[0] 62 | likelihoods = np.zeros((n_samples, self.k)) 63 | for i in range(self.k): 64 | likelihoods[ 65 | :, i] = self.multivariate_gaussian( 66 | X, self.parameters[i]) 67 | return likelihoods 68 | 69 | def _expectation(self, X): 70 | """ Calculate the responsibility """ 71 | # Calculate probabilities of X belonging to the different clusters 72 | weighted_likelihoods = self._get_likelihoods(X) * self.priors 73 | sum_likelihoods = np.expand_dims( 74 | np.sum(weighted_likelihoods, axis=1), axis=1) 75 | # Determine responsibility as P(X|y)*P(y)/P(X) 76 | self.responsibility = weighted_likelihoods / sum_likelihoods 77 | # Assign samples to cluster that has largest probability 78 | self.sample_assignments = self.responsibility.argmax(axis=1) 79 | # Save value for convergence check 80 | self.responsibilities.append(np.max(self.responsibility, axis=1)) 81 | 82 | def _maximization(self, X): 83 | """ Update the parameters and priors """ 84 | # Iterate through clusters and recalculate mean and covariance 85 | for i in range(self.k): 86 | resp = np.expand_dims(self.responsibility[:, i], axis=1) 87 | mean = (resp * X).sum(axis=0) / resp.sum() 88 | covariance = (X - mean).T.dot((X - mean) * resp) / resp.sum() 89 | self.parameters[i]["mean"], self.parameters[ 90 | i]["cov"] = mean, covariance 91 | 92 | # Update weights 93 | n_samples = np.shape(X)[0] 94 | self.priors = self.responsibility.sum(axis=0) / n_samples 95 | 96 | def _converged(self, X): 97 | """ Covergence if || likehood - last_likelihood || < tolerance """ 98 | if len(self.responsibilities) < 2: 99 | return False 100 | diff = np.linalg.norm( 101 | self.responsibilities[-1] - self.responsibilities[-2]) 102 | # print ("Likelihood update: %s (tol: %s)" % (diff, self.tolerance)) 103 | return diff <= self.tolerance 104 | 105 | def predict(self, X): 106 | """ Run GMM and return the cluster indices """ 107 | # Initialize the gaussians randomly 108 | self._init_random_gaussians(X) 109 | 110 | # Run EM until convergence or for max iterations 111 | for _ in range(self.max_iterations): 112 | self._expectation(X) # E-step 113 | self._maximization(X) # M-step 114 | 115 | # Check convergence 116 | if self._converged(X): 117 | break 118 | 119 | # Make new assignments and return them 120 | self._expectation(X) 121 | return self.sample_assignments 122 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/generative_adversarial_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from sklearn import datasets 3 | import math 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import progressbar 7 | 8 | from sklearn.datasets import fetch_mldata 9 | 10 | from mlfromscratch.deep_learning.optimizers import Adam 11 | from mlfromscratch.deep_learning.loss_functions import CrossEntropy 12 | from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization 13 | from mlfromscratch.deep_learning import NeuralNetwork 14 | 15 | 16 | class GAN(): 17 | """A Generative Adversarial Network with deep fully-connected neural nets as 18 | Generator and Discriminator. 19 | 20 | Training Data: MNIST Handwritten Digits (28x28 images) 21 | """ 22 | def __init__(self): 23 | self.img_rows = 28 24 | self.img_cols = 28 25 | self.img_dim = self.img_rows * self.img_cols 26 | self.latent_dim = 100 27 | 28 | optimizer = Adam(learning_rate=0.0002, b1=0.5) 29 | loss_function = CrossEntropy 30 | 31 | # Build the discriminator 32 | self.discriminator = self.build_discriminator(optimizer, loss_function) 33 | 34 | # Build the generator 35 | self.generator = self.build_generator(optimizer, loss_function) 36 | 37 | # Build the combined model 38 | self.combined = NeuralNetwork(optimizer=optimizer, loss=loss_function) 39 | self.combined.layers.extend(self.generator.layers) 40 | self.combined.layers.extend(self.discriminator.layers) 41 | 42 | print () 43 | self.generator.summary(name="Generator") 44 | self.discriminator.summary(name="Discriminator") 45 | 46 | def build_generator(self, optimizer, loss_function): 47 | 48 | model = NeuralNetwork(optimizer=optimizer, loss=loss_function) 49 | 50 | model.add(Dense(256, input_shape=(self.latent_dim,))) 51 | model.add(Activation('leaky_relu')) 52 | model.add(BatchNormalization(momentum=0.8)) 53 | model.add(Dense(512)) 54 | model.add(Activation('leaky_relu')) 55 | model.add(BatchNormalization(momentum=0.8)) 56 | model.add(Dense(1024)) 57 | model.add(Activation('leaky_relu')) 58 | model.add(BatchNormalization(momentum=0.8)) 59 | model.add(Dense(self.img_dim)) 60 | model.add(Activation('tanh')) 61 | 62 | return model 63 | 64 | def build_discriminator(self, optimizer, loss_function): 65 | 66 | model = NeuralNetwork(optimizer=optimizer, loss=loss_function) 67 | 68 | model.add(Dense(512, input_shape=(self.img_dim,))) 69 | model.add(Activation('leaky_relu')) 70 | model.add(Dropout(0.5)) 71 | model.add(Dense(256)) 72 | model.add(Activation('leaky_relu')) 73 | model.add(Dropout(0.5)) 74 | model.add(Dense(2)) 75 | model.add(Activation('softmax')) 76 | 77 | return model 78 | 79 | def train(self, n_epochs, batch_size=128, save_interval=50): 80 | 81 | mnist = fetch_mldata('MNIST original') 82 | 83 | X = mnist.data 84 | y = mnist.target 85 | 86 | # Rescale [-1, 1] 87 | X = (X.astype(np.float32) - 127.5) / 127.5 88 | 89 | half_batch = int(batch_size / 2) 90 | 91 | for epoch in range(n_epochs): 92 | 93 | # --------------------- 94 | # Train Discriminator 95 | # --------------------- 96 | 97 | self.discriminator.set_trainable(True) 98 | 99 | # Select a random half batch of images 100 | idx = np.random.randint(0, X.shape[0], half_batch) 101 | imgs = X[idx] 102 | 103 | # Sample noise to use as generator input 104 | noise = np.random.normal(0, 1, (half_batch, self.latent_dim)) 105 | 106 | # Generate a half batch of images 107 | gen_imgs = self.generator.predict(noise) 108 | 109 | # Valid = [1, 0], Fake = [0, 1] 110 | valid = np.concatenate((np.ones((half_batch, 1)), np.zeros((half_batch, 1))), axis=1) 111 | fake = np.concatenate((np.zeros((half_batch, 1)), np.ones((half_batch, 1))), axis=1) 112 | 113 | # Train the discriminator 114 | d_loss_real, d_acc_real = self.discriminator.train_on_batch(imgs, valid) 115 | d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake) 116 | d_loss = 0.5 * (d_loss_real + d_loss_fake) 117 | d_acc = 0.5 * (d_acc_real + d_acc_fake) 118 | 119 | 120 | # --------------------- 121 | # Train Generator 122 | # --------------------- 123 | 124 | # We only want to train the generator for the combined model 125 | self.discriminator.set_trainable(False) 126 | 127 | # Sample noise and use as generator input 128 | noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) 129 | 130 | # The generator wants the discriminator to label the generated samples as valid 131 | valid = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))), axis=1) 132 | 133 | # Train the generator 134 | g_loss, g_acc = self.combined.train_on_batch(noise, valid) 135 | 136 | # Display the progress 137 | print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" % (epoch, d_loss, 100*d_acc, g_loss, 100*g_acc)) 138 | 139 | # If at save interval => save generated image samples 140 | if epoch % save_interval == 0: 141 | self.save_imgs(epoch) 142 | 143 | def save_imgs(self, epoch): 144 | r, c = 5, 5 # Grid size 145 | noise = np.random.normal(0, 1, (r * c, self.latent_dim)) 146 | # Generate images and reshape to image shape 147 | gen_imgs = self.generator.predict(noise).reshape((-1, self.img_rows, self.img_cols)) 148 | 149 | # Rescale images 0 - 1 150 | gen_imgs = 0.5 * gen_imgs + 0.5 151 | 152 | fig, axs = plt.subplots(r, c) 153 | plt.suptitle("Generative Adversarial Network") 154 | cnt = 0 155 | for i in range(r): 156 | for j in range(c): 157 | axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray') 158 | axs[i,j].axis('off') 159 | cnt += 1 160 | fig.savefig("mnist_%d.png" % epoch) 161 | plt.close() 162 | 163 | 164 | if __name__ == '__main__': 165 | gan = GAN() 166 | gan.train(n_epochs=200000, batch_size=64, save_interval=400) 167 | 168 | 169 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/genetic_algorithm.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import string 3 | import numpy as np 4 | 5 | class GeneticAlgorithm(): 6 | """An implementation of a Genetic Algorithm which will try to produce the user 7 | specified target string. 8 | 9 | Parameters: 10 | ----------- 11 | target_string: string 12 | The string which the GA should try to produce. 13 | population_size: int 14 | The number of individuals (possible solutions) in the population. 15 | mutation_rate: float 16 | The rate (or probability) of which the alleles (chars in this case) should be 17 | randomly changed. 18 | """ 19 | def __init__(self, target_string, population_size, mutation_rate): 20 | self.target = target_string 21 | self.population_size = population_size 22 | self.mutation_rate = mutation_rate 23 | self.letters = [" "] + list(string.ascii_letters) 24 | 25 | def _initialize(self): 26 | """ Initialize population with random strings """ 27 | self.population = [] 28 | for _ in range(self.population_size): 29 | # Select random letters as new individual 30 | individual = "".join(np.random.choice(self.letters, size=len(self.target))) 31 | self.population.append(individual) 32 | 33 | def _calculate_fitness(self): 34 | """ Calculates the fitness of each individual in the population """ 35 | population_fitness = [] 36 | for individual in self.population: 37 | # Calculate loss as the alphabetical distance between 38 | # the characters in the individual and the target string 39 | loss = 0 40 | for i in range(len(individual)): 41 | letter_i1 = self.letters.index(individual[i]) 42 | letter_i2 = self.letters.index(self.target[i]) 43 | loss += abs(letter_i1 - letter_i2) 44 | fitness = 1 / (loss + 1e-6) 45 | population_fitness.append(fitness) 46 | return population_fitness 47 | 48 | def _mutate(self, individual): 49 | """ Randomly change the individual's characters with probability 50 | self.mutation_rate """ 51 | individual = list(individual) 52 | for j in range(len(individual)): 53 | # Make change with probability mutation_rate 54 | if np.random.random() < self.mutation_rate: 55 | individual[j] = np.random.choice(self.letters) 56 | # Return mutated individual as string 57 | return "".join(individual) 58 | 59 | def _crossover(self, parent1, parent2): 60 | """ Create children from parents by crossover """ 61 | # Select random crossover point 62 | cross_i = np.random.randint(0, len(parent1)) 63 | child1 = parent1[:cross_i] + parent2[cross_i:] 64 | child2 = parent2[:cross_i] + parent1[cross_i:] 65 | return child1, child2 66 | 67 | def run(self, iterations): 68 | # Initialize new population 69 | self._initialize() 70 | 71 | for epoch in range(iterations): 72 | population_fitness = self._calculate_fitness() 73 | 74 | fittest_individual = self.population[np.argmax(population_fitness)] 75 | highest_fitness = max(population_fitness) 76 | 77 | # If we have found individual which matches the target => Done 78 | if fittest_individual == self.target: 79 | break 80 | 81 | # Set the probability that the individual should be selected as a parent 82 | # proportionate to the individual's fitness. 83 | parent_probabilities = [fitness / sum(population_fitness) for fitness in population_fitness] 84 | 85 | # Determine the next generation 86 | new_population = [] 87 | for i in np.arange(0, self.population_size, 2): 88 | # Select two parents randomly according to probabilities 89 | parent1, parent2 = np.random.choice(self.population, size=2, p=parent_probabilities, replace=False) 90 | # Perform crossover to produce offspring 91 | child1, child2 = self._crossover(parent1, parent2) 92 | # Save mutated offspring for next generation 93 | new_population += [self._mutate(child1), self._mutate(child2)] 94 | 95 | print ("[%d Closest Candidate: '%s', Fitness: %.2f]" % (epoch, fittest_individual, highest_fitness)) 96 | self.population = new_population 97 | 98 | print ("[%d Answer: '%s']" % (epoch, fittest_individual)) 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/k_means.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import normalize, euclidean_distance, Plot 4 | from mlfromscratch.unsupervised_learning import * 5 | 6 | class KMeans(): 7 | """A simple clustering method that forms k clusters by iteratively reassigning 8 | samples to the closest centroids and after that moves the centroids to the center 9 | of the new formed clusters. 10 | 11 | 12 | Parameters: 13 | ----------- 14 | k: int 15 | The number of clusters the algorithm will form. 16 | max_iterations: int 17 | The number of iterations the algorithm will run for if it does 18 | not converge before that. 19 | """ 20 | def __init__(self, k=2, max_iterations=500): 21 | self.k = k 22 | self.max_iterations = max_iterations 23 | 24 | def _init_random_centroids(self, X): 25 | """ Initialize the centroids as k random samples of X""" 26 | n_samples, n_features = np.shape(X) 27 | centroids = np.zeros((self.k, n_features)) 28 | for i in range(self.k): 29 | centroid = X[np.random.choice(range(n_samples))] 30 | centroids[i] = centroid 31 | return centroids 32 | 33 | def _closest_centroid(self, sample, centroids): 34 | """ Return the index of the closest centroid to the sample """ 35 | closest_i = 0 36 | closest_dist = float('inf') 37 | for i, centroid in enumerate(centroids): 38 | distance = euclidean_distance(sample, centroid) 39 | if distance < closest_dist: 40 | closest_i = i 41 | closest_dist = distance 42 | return closest_i 43 | 44 | def _create_clusters(self, centroids, X): 45 | """ Assign the samples to the closest centroids to create clusters """ 46 | n_samples = np.shape(X)[0] 47 | clusters = [[] for _ in range(self.k)] 48 | for sample_i, sample in enumerate(X): 49 | centroid_i = self._closest_centroid(sample, centroids) 50 | clusters[centroid_i].append(sample_i) 51 | return clusters 52 | 53 | def _calculate_centroids(self, clusters, X): 54 | """ Calculate new centroids as the means of the samples in each cluster """ 55 | n_features = np.shape(X)[1] 56 | centroids = np.zeros((self.k, n_features)) 57 | for i, cluster in enumerate(clusters): 58 | centroid = np.mean(X[cluster], axis=0) 59 | centroids[i] = centroid 60 | return centroids 61 | 62 | def _get_cluster_labels(self, clusters, X): 63 | """ Classify samples as the index of their clusters """ 64 | # One prediction for each sample 65 | y_pred = np.zeros(np.shape(X)[0]) 66 | for cluster_i, cluster in enumerate(clusters): 67 | for sample_i in cluster: 68 | y_pred[sample_i] = cluster_i 69 | return y_pred 70 | 71 | def predict(self, X): 72 | """ Do K-Means clustering and return cluster indices """ 73 | 74 | # Initialize centroids as k random samples from X 75 | centroids = self._init_random_centroids(X) 76 | 77 | # Iterate until convergence or for max iterations 78 | for _ in range(self.max_iterations): 79 | # Assign samples to closest centroids (create clusters) 80 | clusters = self._create_clusters(centroids, X) 81 | # Save current centroids for convergence check 82 | prev_centroids = centroids 83 | # Calculate new centroids from the clusters 84 | centroids = self._calculate_centroids(clusters, X) 85 | # If no centroids have changed => convergence 86 | diff = centroids - prev_centroids 87 | if not diff.any(): 88 | break 89 | 90 | return self._get_cluster_labels(clusters, X) 91 | 92 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/partitioning_around_medoids.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import normalize, euclidean_distance, Plot 4 | from mlfromscratch.unsupervised_learning import PCA 5 | 6 | 7 | class PAM(): 8 | """A simple clustering method that forms k clusters by first assigning 9 | samples to the closest medoids, and then swapping medoids with non-medoid 10 | samples if the total distance (cost) between the cluster members and their medoid 11 | is smaller than prevoisly. 12 | 13 | 14 | Parameters: 15 | ----------- 16 | k: int 17 | The number of clusters the algorithm will form. 18 | """ 19 | def __init__(self, k=2): 20 | self.k = k 21 | 22 | def _init_random_medoids(self, X): 23 | """ Initialize the medoids as random samples """ 24 | n_samples, n_features = np.shape(X) 25 | medoids = np.zeros((self.k, n_features)) 26 | for i in range(self.k): 27 | medoid = X[np.random.choice(range(n_samples))] 28 | medoids[i] = medoid 29 | return medoids 30 | 31 | def _closest_medoid(self, sample, medoids): 32 | """ Return the index of the closest medoid to the sample """ 33 | closest_i = None 34 | closest_distance = float("inf") 35 | for i, medoid in enumerate(medoids): 36 | distance = euclidean_distance(sample, medoid) 37 | if distance < closest_distance: 38 | closest_i = i 39 | closest_distance = distance 40 | return closest_i 41 | 42 | def _create_clusters(self, X, medoids): 43 | """ Assign the samples to the closest medoids to create clusters """ 44 | clusters = [[] for _ in range(self.k)] 45 | for sample_i, sample in enumerate(X): 46 | medoid_i = self._closest_medoid(sample, medoids) 47 | clusters[medoid_i].append(sample_i) 48 | return clusters 49 | 50 | def _calculate_cost(self, X, clusters, medoids): 51 | """ Calculate the cost (total distance between samples and their medoids) """ 52 | cost = 0 53 | # For each cluster 54 | for i, cluster in enumerate(clusters): 55 | medoid = medoids[i] 56 | for sample_i in cluster: 57 | # Add distance between sample and medoid as cost 58 | cost += euclidean_distance(X[sample_i], medoid) 59 | return cost 60 | 61 | def _get_non_medoids(self, X, medoids): 62 | """ Returns a list of all samples that are not currently medoids """ 63 | non_medoids = [] 64 | for sample in X: 65 | if not sample in medoids: 66 | non_medoids.append(sample) 67 | return non_medoids 68 | 69 | def _get_cluster_labels(self, clusters, X): 70 | """ Classify samples as the index of their clusters """ 71 | # One prediction for each sample 72 | y_pred = np.zeros(np.shape(X)[0]) 73 | for cluster_i in range(len(clusters)): 74 | cluster = clusters[cluster_i] 75 | for sample_i in cluster: 76 | y_pred[sample_i] = cluster_i 77 | return y_pred 78 | 79 | def predict(self, X): 80 | """ Do Partitioning Around Medoids and return the cluster labels """ 81 | # Initialize medoids randomly 82 | medoids = self._init_random_medoids(X) 83 | # Assign samples to closest medoids 84 | clusters = self._create_clusters(X, medoids) 85 | 86 | # Calculate the initial cost (total distance between samples and 87 | # corresponding medoids) 88 | cost = self._calculate_cost(X, clusters, medoids) 89 | 90 | # Iterate until we no longer have a cheaper cost 91 | while True: 92 | best_medoids = medoids 93 | lowest_cost = cost 94 | for medoid in medoids: 95 | # Get all non-medoid samples 96 | non_medoids = self._get_non_medoids(X, medoids) 97 | # Calculate the cost when swapping medoid and samples 98 | for sample in non_medoids: 99 | # Swap sample with the medoid 100 | new_medoids = medoids.copy() 101 | new_medoids[medoids == medoid] = sample 102 | # Assign samples to new medoids 103 | new_clusters = self._create_clusters(X, new_medoids) 104 | # Calculate the cost with the new set of medoids 105 | new_cost = self._calculate_cost( 106 | X, new_clusters, new_medoids) 107 | # If the swap gives us a lower cost we save the medoids and cost 108 | if new_cost < lowest_cost: 109 | lowest_cost = new_cost 110 | best_medoids = new_medoids 111 | # If there was a swap that resultet in a lower cost we save the 112 | # resulting medoids from the best swap and the new cost 113 | if lowest_cost < cost: 114 | cost = lowest_cost 115 | medoids = best_medoids 116 | # Else finished 117 | else: 118 | break 119 | 120 | final_clusters = self._create_clusters(X, medoids) 121 | # Return the samples cluster indices as labels 122 | return self._get_cluster_labels(final_clusters, X) 123 | 124 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/principal_component_analysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import numpy as np 3 | from mlfromscratch.utils import calculate_covariance_matrix 4 | 5 | 6 | class PCA(): 7 | """A method for doing dimensionality reduction by transforming the feature 8 | space to a lower dimensionality, removing correlation between features and 9 | maximizing the variance along each feature axis. This class is also used throughout 10 | the project to plot data. 11 | """ 12 | def transform(self, X, n_components): 13 | """ Fit the dataset to the number of principal components specified in the 14 | constructor and return the transformed dataset """ 15 | covariance_matrix = calculate_covariance_matrix(X) 16 | 17 | # Where (eigenvector[:,0] corresponds to eigenvalue[0]) 18 | eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) 19 | 20 | # Sort the eigenvalues and corresponding eigenvectors from largest 21 | # to smallest eigenvalue and select the first n_components 22 | idx = eigenvalues.argsort()[::-1] 23 | eigenvalues = eigenvalues[idx][:n_components] 24 | eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components] 25 | 26 | # Project the data onto principal components 27 | X_transformed = X.dot(eigenvectors) 28 | 29 | return X_transformed 30 | -------------------------------------------------------------------------------- /mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | import progressbar 4 | 5 | from mlfromscratch.utils.misc import bar_widgets 6 | from mlfromscratch.utils import batch_iterator 7 | from mlfromscratch.deep_learning.activation_functions import Sigmoid 8 | 9 | sigmoid = Sigmoid() 10 | 11 | class RBM(): 12 | """Bernoulli Restricted Boltzmann Machine (RBM) 13 | 14 | Parameters: 15 | ----------- 16 | n_hidden: int: 17 | The number of processing nodes (neurons) in the hidden layer. 18 | learning_rate: float 19 | The step length that will be used when updating the weights. 20 | batch_size: int 21 | The size of the mini-batch used to calculate each weight update. 22 | n_iterations: float 23 | The number of training iterations the algorithm will tune the weights for. 24 | 25 | Reference: 26 | A Practical Guide to Training Restricted Boltzmann Machines 27 | URL: https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf 28 | """ 29 | def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, n_iterations=100): 30 | self.n_iterations = n_iterations 31 | self.batch_size = batch_size 32 | self.lr = learning_rate 33 | self.n_hidden = n_hidden 34 | self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) 35 | 36 | def _initialize_weights(self, X): 37 | n_visible = X.shape[1] 38 | self.W = np.random.normal(scale=0.1, size=(n_visible, self.n_hidden)) 39 | self.v0 = np.zeros(n_visible) # Bias visible 40 | self.h0 = np.zeros(self.n_hidden) # Bias hidden 41 | 42 | def fit(self, X, y=None): 43 | '''Contrastive Divergence training procedure''' 44 | 45 | self._initialize_weights(X) 46 | 47 | self.training_errors = [] 48 | self.training_reconstructions = [] 49 | for _ in self.progressbar(range(self.n_iterations)): 50 | batch_errors = [] 51 | for batch in batch_iterator(X, batch_size=self.batch_size): 52 | # Positive phase 53 | positive_hidden = sigmoid(batch.dot(self.W) + self.h0) 54 | hidden_states = self._sample(positive_hidden) 55 | positive_associations = batch.T.dot(positive_hidden) 56 | 57 | # Negative phase 58 | negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0) 59 | negative_visible = self._sample(negative_visible) 60 | negative_hidden = sigmoid(negative_visible.dot(self.W) + self.h0) 61 | negative_associations = negative_visible.T.dot(negative_hidden) 62 | 63 | self.W += self.lr * (positive_associations - negative_associations) 64 | self.h0 += self.lr * (positive_hidden.sum(axis=0) - negative_hidden.sum(axis=0)) 65 | self.v0 += self.lr * (batch.sum(axis=0) - negative_visible.sum(axis=0)) 66 | 67 | batch_errors.append(np.mean((batch - negative_visible) ** 2)) 68 | 69 | self.training_errors.append(np.mean(batch_errors)) 70 | # Reconstruct a batch of images from the training set 71 | idx = np.random.choice(range(X.shape[0]), self.batch_size) 72 | self.training_reconstructions.append(self.reconstruct(X[idx])) 73 | 74 | def _sample(self, X): 75 | return X > np.random.random_sample(size=X.shape) 76 | 77 | def reconstruct(self, X): 78 | positive_hidden = sigmoid(X.dot(self.W) + self.h0) 79 | hidden_states = self._sample(positive_hidden) 80 | negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0) 81 | return negative_visible 82 | 83 | -------------------------------------------------------------------------------- /mlfromscratch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import Plot 2 | from .data_manipulation import * 3 | from .data_operation import * -------------------------------------------------------------------------------- /mlfromscratch/utils/data_manipulation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from itertools import combinations_with_replacement 3 | import numpy as np 4 | import math 5 | import sys 6 | 7 | 8 | def shuffle_data(X, y, seed=None): 9 | """ Random shuffle of the samples in X and y """ 10 | if seed: 11 | np.random.seed(seed) 12 | idx = np.arange(X.shape[0]) 13 | np.random.shuffle(idx) 14 | return X[idx], y[idx] 15 | 16 | 17 | def batch_iterator(X, y=None, batch_size=64): 18 | """ Simple batch generator """ 19 | n_samples = X.shape[0] 20 | for i in np.arange(0, n_samples, batch_size): 21 | begin, end = i, min(i+batch_size, n_samples) 22 | if y is not None: 23 | yield X[begin:end], y[begin:end] 24 | else: 25 | yield X[begin:end] 26 | 27 | 28 | def divide_on_feature(X, feature_i, threshold): 29 | """ Divide dataset based on if sample value on feature index is larger than 30 | the given threshold """ 31 | split_func = None 32 | if isinstance(threshold, int) or isinstance(threshold, float): 33 | split_func = lambda sample: sample[feature_i] >= threshold 34 | else: 35 | split_func = lambda sample: sample[feature_i] == threshold 36 | 37 | X_1 = np.array([sample for sample in X if split_func(sample)]) 38 | X_2 = np.array([sample for sample in X if not split_func(sample)]) 39 | 40 | return np.array([X_1, X_2]) 41 | 42 | 43 | def polynomial_features(X, degree): 44 | n_samples, n_features = np.shape(X) 45 | 46 | def index_combinations(): 47 | combs = [combinations_with_replacement(range(n_features), i) for i in range(0, degree + 1)] 48 | flat_combs = [item for sublist in combs for item in sublist] 49 | return flat_combs 50 | 51 | combinations = index_combinations() 52 | n_output_features = len(combinations) 53 | X_new = np.empty((n_samples, n_output_features)) 54 | 55 | for i, index_combs in enumerate(combinations): 56 | X_new[:, i] = np.prod(X[:, index_combs], axis=1) 57 | 58 | return X_new 59 | 60 | 61 | def get_random_subsets(X, y, n_subsets, replacements=True): 62 | """ Return random subsets (with replacements) of the data """ 63 | n_samples = np.shape(X)[0] 64 | # Concatenate x and y and do a random shuffle 65 | X_y = np.concatenate((X, y.reshape((1, len(y))).T), axis=1) 66 | np.random.shuffle(X_y) 67 | subsets = [] 68 | 69 | # Uses 50% of training samples without replacements 70 | subsample_size = int(n_samples // 2) 71 | if replacements: 72 | subsample_size = n_samples # 100% with replacements 73 | 74 | for _ in range(n_subsets): 75 | idx = np.random.choice( 76 | range(n_samples), 77 | size=np.shape(range(subsample_size)), 78 | replace=replacements) 79 | X = X_y[idx][:, :-1] 80 | y = X_y[idx][:, -1] 81 | subsets.append([X, y]) 82 | return subsets 83 | 84 | 85 | def normalize(X, axis=-1, order=2): 86 | """ Normalize the dataset X """ 87 | l2 = np.atleast_1d(np.linalg.norm(X, order, axis)) 88 | l2[l2 == 0] = 1 89 | return X / np.expand_dims(l2, axis) 90 | 91 | 92 | def standardize(X): 93 | """ Standardize the dataset X """ 94 | X_std = X 95 | mean = X.mean(axis=0) 96 | std = X.std(axis=0) 97 | for col in range(np.shape(X)[1]): 98 | if std[col]: 99 | X_std[:, col] = (X_std[:, col] - mean[col]) / std[col] 100 | # X_std = (X - X.mean(axis=0)) / X.std(axis=0) 101 | return X_std 102 | 103 | 104 | def train_test_split(X, y, test_size=0.5, shuffle=True, seed=None): 105 | """ Split the data into train and test sets """ 106 | if shuffle: 107 | X, y = shuffle_data(X, y, seed) 108 | # Split the training data from test data in the ratio specified in 109 | # test_size 110 | split_i = len(y) - int(len(y) // (1 / test_size)) 111 | X_train, X_test = X[:split_i], X[split_i:] 112 | y_train, y_test = y[:split_i], y[split_i:] 113 | 114 | return X_train, X_test, y_train, y_test 115 | 116 | 117 | def k_fold_cross_validation_sets(X, y, k, shuffle=True): 118 | """ Split the data into k sets of training / test data """ 119 | if shuffle: 120 | X, y = shuffle_data(X, y) 121 | 122 | n_samples = len(y) 123 | left_overs = {} 124 | n_left_overs = (n_samples % k) 125 | if n_left_overs != 0: 126 | left_overs["X"] = X[-n_left_overs:] 127 | left_overs["y"] = y[-n_left_overs:] 128 | X = X[:-n_left_overs] 129 | y = y[:-n_left_overs] 130 | 131 | X_split = np.split(X, k) 132 | y_split = np.split(y, k) 133 | sets = [] 134 | for i in range(k): 135 | X_test, y_test = X_split[i], y_split[i] 136 | X_train = np.concatenate(X_split[:i] + X_split[i + 1:], axis=0) 137 | y_train = np.concatenate(y_split[:i] + y_split[i + 1:], axis=0) 138 | sets.append([X_train, X_test, y_train, y_test]) 139 | 140 | # Add left over samples to last set as training samples 141 | if n_left_overs != 0: 142 | np.append(sets[-1][0], left_overs["X"], axis=0) 143 | np.append(sets[-1][2], left_overs["y"], axis=0) 144 | 145 | return np.array(sets) 146 | 147 | 148 | def to_categorical(x, n_col=None): 149 | """ One-hot encoding of nominal values """ 150 | if not n_col: 151 | n_col = np.amax(x) + 1 152 | one_hot = np.zeros((x.shape[0], n_col)) 153 | one_hot[np.arange(x.shape[0]), x] = 1 154 | return one_hot 155 | 156 | 157 | def to_nominal(x): 158 | """ Conversion from one-hot encoding to nominal """ 159 | return np.argmax(x, axis=1) 160 | 161 | 162 | def make_diagonal(x): 163 | """ Converts a vector into an diagonal matrix """ 164 | m = np.zeros((len(x), len(x))) 165 | for i in range(len(m[0])): 166 | m[i, i] = x[i] 167 | return m 168 | -------------------------------------------------------------------------------- /mlfromscratch/utils/data_operation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import math 4 | import sys 5 | 6 | 7 | def calculate_entropy(y): 8 | """ Calculate the entropy of label array y """ 9 | log2 = lambda x: math.log(x) / math.log(2) 10 | unique_labels = np.unique(y) 11 | entropy = 0 12 | for label in unique_labels: 13 | count = len(y[y == label]) 14 | p = count / len(y) 15 | entropy += -p * log2(p) 16 | return entropy 17 | 18 | 19 | def mean_squared_error(y_true, y_pred): 20 | """ Returns the mean squared error between y_true and y_pred """ 21 | mse = np.mean(np.power(y_true - y_pred, 2)) 22 | return mse 23 | 24 | 25 | def calculate_variance(X): 26 | """ Return the variance of the features in dataset X """ 27 | mean = np.ones(np.shape(X)) * X.mean(0) 28 | n_samples = np.shape(X)[0] 29 | variance = (1 / n_samples) * np.diag((X - mean).T.dot(X - mean)) 30 | 31 | return variance 32 | 33 | 34 | def calculate_std_dev(X): 35 | """ Calculate the standard deviations of the features in dataset X """ 36 | std_dev = np.sqrt(calculate_variance(X)) 37 | return std_dev 38 | 39 | 40 | def euclidean_distance(x1, x2): 41 | """ Calculates the l2 distance between two vectors """ 42 | distance = 0 43 | # Squared distance between each coordinate 44 | for i in range(len(x1)): 45 | distance += pow((x1[i] - x2[i]), 2) 46 | return math.sqrt(distance) 47 | 48 | 49 | def accuracy_score(y_true, y_pred): 50 | """ Compare y_true to y_pred and return the accuracy """ 51 | accuracy = np.sum(y_true == y_pred, axis=0) / len(y_true) 52 | return accuracy 53 | 54 | 55 | def calculate_covariance_matrix(X, Y=None): 56 | """ Calculate the covariance matrix for the dataset X """ 57 | if Y is None: 58 | Y = X 59 | n_samples = np.shape(X)[0] 60 | covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0)) 61 | 62 | return np.array(covariance_matrix, dtype=float) 63 | 64 | 65 | def calculate_correlation_matrix(X, Y=None): 66 | """ Calculate the correlation matrix for the dataset X """ 67 | if Y is None: 68 | Y = X 69 | n_samples = np.shape(X)[0] 70 | covariance = (1 / n_samples) * (X - X.mean(0)).T.dot(Y - Y.mean(0)) 71 | std_dev_X = np.expand_dims(calculate_std_dev(X), 1) 72 | std_dev_y = np.expand_dims(calculate_std_dev(Y), 1) 73 | correlation_matrix = np.divide(covariance, std_dev_X.dot(std_dev_y.T)) 74 | 75 | return np.array(correlation_matrix, dtype=float) 76 | -------------------------------------------------------------------------------- /mlfromscratch/utils/kernels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def linear_kernel(**kwargs): 5 | def f(x1, x2): 6 | return np.inner(x1, x2) 7 | return f 8 | 9 | 10 | def polynomial_kernel(power, coef, **kwargs): 11 | def f(x1, x2): 12 | return (np.inner(x1, x2) + coef)**power 13 | return f 14 | 15 | 16 | def rbf_kernel(gamma, **kwargs): 17 | def f(x1, x2): 18 | distance = np.linalg.norm(x1 - x2) ** 2 19 | return np.exp(-gamma * distance) 20 | return f 21 | -------------------------------------------------------------------------------- /mlfromscratch/utils/misc.py: -------------------------------------------------------------------------------- 1 | import progressbar 2 | from mpl_toolkits.mplot3d import Axes3D 3 | import matplotlib.pyplot as plt 4 | import matplotlib.cm as cmx 5 | import matplotlib.colors as colors 6 | import numpy as np 7 | 8 | from mlfromscratch.utils.data_operation import calculate_covariance_matrix 9 | from mlfromscratch.utils.data_operation import calculate_correlation_matrix 10 | from mlfromscratch.utils.data_manipulation import standardize 11 | 12 | bar_widgets = [ 13 | 'Training: ', progressbar.Percentage(), ' ', progressbar.Bar(marker="-", left="[", right="]"), 14 | ' ', progressbar.ETA() 15 | ] 16 | 17 | class Plot(): 18 | def __init__(self): 19 | self.cmap = plt.get_cmap('viridis') 20 | 21 | def _transform(self, X, dim): 22 | covariance = calculate_covariance_matrix(X) 23 | eigenvalues, eigenvectors = np.linalg.eig(covariance) 24 | # Sort eigenvalues and eigenvector by largest eigenvalues 25 | idx = eigenvalues.argsort()[::-1] 26 | eigenvalues = eigenvalues[idx][:dim] 27 | eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :dim] 28 | # Project the data onto principal components 29 | X_transformed = X.dot(eigenvectors) 30 | 31 | return X_transformed 32 | 33 | 34 | def plot_regression(self, lines, title, axis_labels=None, mse=None, scatter=None, legend={"type": "lines", "loc": "lower right"}): 35 | 36 | if scatter: 37 | scatter_plots = scatter_labels = [] 38 | for s in scatter: 39 | scatter_plots += [plt.scatter(s["x"], s["y"], color=s["color"], s=s["size"])] 40 | scatter_labels += [s["label"]] 41 | scatter_plots = tuple(scatter_plots) 42 | scatter_labels = tuple(scatter_labels) 43 | 44 | for l in lines: 45 | li = plt.plot(l["x"], l["y"], color=s["color"], linewidth=l["width"], label=l["label"]) 46 | 47 | if mse: 48 | plt.suptitle(title) 49 | plt.title("MSE: %.2f" % mse, fontsize=10) 50 | else: 51 | plt.title(title) 52 | 53 | if axis_labels: 54 | plt.xlabel(axis_labels["x"]) 55 | plt.ylabel(axis_labels["y"]) 56 | 57 | if legend["type"] == "lines": 58 | plt.legend(loc="lower_left") 59 | elif legend["type"] == "scatter" and scatter: 60 | plt.legend(scatter_plots, scatter_labels, loc=legend["loc"]) 61 | 62 | plt.show() 63 | 64 | 65 | 66 | # Plot the dataset X and the corresponding labels y in 2D using PCA. 67 | def plot_in_2d(self, X, y=None, title=None, accuracy=None, legend_labels=None): 68 | X_transformed = self._transform(X, dim=2) 69 | x1 = X_transformed[:, 0] 70 | x2 = X_transformed[:, 1] 71 | class_distr = [] 72 | 73 | y = np.array(y).astype(int) 74 | 75 | colors = [self.cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))] 76 | 77 | # Plot the different class distributions 78 | for i, l in enumerate(np.unique(y)): 79 | _x1 = x1[y == l] 80 | _x2 = x2[y == l] 81 | _y = y[y == l] 82 | class_distr.append(plt.scatter(_x1, _x2, color=colors[i])) 83 | 84 | # Plot legend 85 | if not legend_labels is None: 86 | plt.legend(class_distr, legend_labels, loc=1) 87 | 88 | # Plot title 89 | if title: 90 | if accuracy: 91 | perc = 100 * accuracy 92 | plt.suptitle(title) 93 | plt.title("Accuracy: %.1f%%" % perc, fontsize=10) 94 | else: 95 | plt.title(title) 96 | 97 | # Axis labels 98 | plt.xlabel('Principal Component 1') 99 | plt.ylabel('Principal Component 2') 100 | 101 | plt.show() 102 | 103 | # Plot the dataset X and the corresponding labels y in 3D using PCA. 104 | def plot_in_3d(self, X, y=None): 105 | X_transformed = self._transform(X, dim=3) 106 | x1 = X_transformed[:, 0] 107 | x2 = X_transformed[:, 1] 108 | x3 = X_transformed[:, 2] 109 | fig = plt.figure() 110 | ax = fig.add_subplot(111, projection='3d') 111 | ax.scatter(x1, x2, x3, c=y) 112 | plt.show() 113 | 114 | 115 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | sklearn 4 | pandas 5 | cvxopt 6 | scipy 7 | progressbar33 8 | terminaltables 9 | gym 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [easy_install] 5 | 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | __version__ = '0.0.4' 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | # get the dependencies and installs 10 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: 11 | all_reqs = f.read().split('\n') 12 | 13 | install_requires = [x.strip() for x in all_reqs if 'git+' not in x] 14 | dependency_links = [x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+')] 15 | 16 | setup( 17 | name='mlfromscratch', 18 | version=__version__, 19 | description='Python implementations of some of the fundamental Machine Learning models and algorithms from scratch.', 20 | url='https://github.com/eriklindernoren/ML-From-Scratch', 21 | download_url='https://github.com/eriklindernoren/ML-From-Scratch/tarball/master', 22 | license='MIT', 23 | packages=find_packages(), 24 | include_package_data=True, 25 | author='Erik Linder-Noren', 26 | install_requires=install_requires, 27 | setup_requires=['numpy>=1.10', 'scipy>=0.17'], 28 | dependency_links=dependency_links, 29 | author_email='eriklindernoren@gmail.com' 30 | ) --------------------------------------------------------------------------------