├── .github └── workflows │ └── python-app.yml ├── .gitignore ├── AUTHORS ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── examples ├── __init__.py ├── gaussian_mixture.py ├── gbm.py ├── kmeans.py ├── linear_models.py ├── naive_bayes.py ├── nearest_neighbors.py ├── nnet_convnet_mnist.py ├── nnet_mlp.py ├── nnet_rnn_binary_add.py ├── nnet_rnn_text_generation.py ├── pca.py ├── random_forest.py ├── rbm.py ├── rl_deep_q_learning.py ├── svm.py └── t-sne.py ├── mla ├── __init__.py ├── base │ ├── __init__.py │ └── base.py ├── datasets │ ├── __init__.py │ ├── base.py │ └── data │ │ ├── mnist │ │ ├── t10k-images-idx3-ubyte │ │ ├── t10k-labels-idx1-ubyte │ │ ├── train-images-idx3-ubyte │ │ └── train-labels-idx1-ubyte │ │ └── nietzsche.txt ├── ensemble │ ├── __init__.py │ ├── base.py │ ├── gbm.py │ ├── random_forest.py │ └── tree.py ├── fm.py ├── gaussian_mixture.py ├── kmeans.py ├── knn.py ├── linear_models.py ├── metrics │ ├── __init__.py │ ├── base.py │ ├── distance.py │ ├── metrics.py │ └── tests │ │ ├── __init__.py │ │ └── test_metrics.py ├── naive_bayes.py ├── neuralnet │ ├── __init__.py │ ├── activations.py │ ├── constraints.py │ ├── initializations.py │ ├── layers │ │ ├── __init__.py │ │ ├── basic.py │ │ ├── convnet.py │ │ ├── normalization.py │ │ └── recurrent │ │ │ ├── __init__.py │ │ │ ├── lstm.py │ │ │ └── rnn.py │ ├── loss.py │ ├── nnet.py │ ├── optimizers.py │ ├── parameters.py │ ├── regularizers.py │ └── tests │ │ ├── test_activations.py │ │ └── test_optimizers.py ├── pca.py ├── rbm.py ├── rl │ ├── __init__.py │ └── dqn.py ├── svm │ ├── __init__.py │ ├── kernerls.py │ └── svm.py ├── tests │ ├── __init__.py │ ├── test_classification_accuracy.py │ ├── test_reduction.py │ └── test_regression_accuracy.py ├── tsne.py └── utils │ ├── __init__.py │ └── main.py ├── requirements.txt ├── setup.cfg └── setup.py /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | timeout-minutes: 5 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.8 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.8 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install flake8 pytest 23 | pip install -r requirements.txt 24 | - name: Test with pytest 25 | run: | 26 | pytest 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build/ 3 | dist/ 4 | mla.egg-info/ 5 | .cache 6 | *.swp 7 | .idea -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Artem Golubin 2 | Anebi Agbo 3 | Convex Path 4 | James Chevalier 5 | Jiancheng 6 | KaiMin Lai 7 | Nguyễn Tuấn 8 | Nicolas Hug 9 | Xiaochun Ma 10 | Yiran Sheng 11 | brady salz 12 | junwang007 13 | keineahnung2345 14 | lucaskolstad 15 | vincent tang 16 | xq5he 17 | LanderTome 18 | therickli 19 | Andrew Melnik 20 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | RUN mkdir -p /var/app 4 | WORKDIR /var/app 5 | COPY . /var/app 6 | 7 | # install scipy & numpy 8 | # install required packages 9 | RUN pip install scipy numpy && \ 10 | pip install . 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2020 Artem Golubin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include mla/datasets/data * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine learning algorithms 2 | A collection of minimal and clean implementations of machine learning algorithms. 3 | 4 | ### Why? 5 | This project is targeting people who want to learn internals of ml algorithms or implement them from scratch. 6 | The code is much easier to follow than the optimized libraries and easier to play with. 7 | All algorithms are implemented in Python, using numpy, scipy and autograd. 8 | 9 | ### Implemented: 10 | * [Deep learning (MLP, CNN, RNN, LSTM)](mla/neuralnet) 11 | * [Linear regression, logistic regression](mla/linear_models.py) 12 | * [Random Forests](mla/ensemble/random_forest.py) 13 | * [Support vector machine (SVM) with kernels (Linear, Poly, RBF)](mla/svm) 14 | * [K-Means](mla/kmeans.py) 15 | * [Gaussian Mixture Model](mla/gaussian_mixture.py) 16 | * [K-nearest neighbors](mla/knn.py) 17 | * [Naive bayes](mla/naive_bayes.py) 18 | * [Principal component analysis (PCA)](mla/pca.py) 19 | * [Factorization machines](mla/fm.py) 20 | * [Restricted Boltzmann machine (RBM)](mla/rbm.py) 21 | * [t-Distributed Stochastic Neighbor Embedding (t-SNE)](mla/tsne.py) 22 | * [Gradient Boosting trees (also known as GBDT, GBRT, GBM, XGBoost)](mla/ensemble/gbm.py) 23 | * [Reinforcement learning (Deep Q learning)](mla/rl) 24 | 25 | 26 | ### Installation 27 | ```sh 28 | git clone https://github.com/rushter/MLAlgorithms 29 | cd MLAlgorithms 30 | pip install scipy numpy 31 | python setup.py develop 32 | ``` 33 | ### How to run examples without installation 34 | ```sh 35 | cd MLAlgorithms 36 | python -m examples.linear_models 37 | ``` 38 | ### How to run examples within Docker 39 | ```sh 40 | cd MLAlgorithms 41 | docker build -t mlalgorithms . 42 | docker run --rm -it mlalgorithms bash 43 | python -m examples.linear_models 44 | ``` 45 | ### Contributing 46 | 47 | Your contributions are always welcome! 48 | Feel free to improve existing code, documentation or implement new algorithm. 49 | Please open an issue to propose your changes if they are big enough. 50 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | -------------------------------------------------------------------------------- /examples/gaussian_mixture.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn import datasets 5 | from mla.kmeans import KMeans 6 | from mla.gaussian_mixture import GaussianMixture 7 | 8 | random.seed(1) 9 | np.random.seed(6) 10 | 11 | 12 | def make_clusters(skew=True, *arg, **kwargs): 13 | X, y = datasets.make_blobs(*arg, **kwargs) 14 | if skew: 15 | nrow = X.shape[1] 16 | for i in np.unique(y): 17 | X[y == i] = X[y == i].dot(np.random.random((nrow, nrow)) - 0.5) 18 | return X, y 19 | 20 | 21 | def KMeans_and_GMM(K): 22 | COLOR = "bgrcmyk" 23 | 24 | X, y = make_clusters(skew=True, n_samples=1500, centers=K) 25 | _, axes = plt.subplots(1, 3) 26 | 27 | # Ground Truth 28 | axes[0].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y]) 29 | axes[0].set_title("Ground Truth") 30 | 31 | # KMeans 32 | kmeans = KMeans(K=K, init="++") 33 | kmeans.fit(X) 34 | kmeans.predict() 35 | axes[1].set_title("KMeans") 36 | kmeans.plot(ax=axes[1], holdon=True) 37 | 38 | # Gaussian Mixture 39 | gmm = GaussianMixture(K=K, init="kmeans") 40 | gmm.fit(X) 41 | axes[2].set_title("Gaussian Mixture") 42 | gmm.plot(ax=axes[2]) 43 | 44 | 45 | if __name__ == "__main__": 46 | KMeans_and_GMM(4) 47 | -------------------------------------------------------------------------------- /examples/gbm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from sklearn.datasets import make_classification 4 | from sklearn.datasets import make_regression 5 | from sklearn.metrics import roc_auc_score 6 | 7 | try: 8 | from sklearn.model_selection import train_test_split 9 | except ImportError: 10 | from sklearn.cross_validation import train_test_split 11 | 12 | from mla.ensemble.gbm import GradientBoostingClassifier, GradientBoostingRegressor 13 | from mla.metrics.metrics import mean_squared_error 14 | 15 | logging.basicConfig(level=logging.DEBUG) 16 | 17 | 18 | def classification(): 19 | # Generate a random binary classification problem. 20 | X, y = make_classification( 21 | n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2, class_sep=1.0, n_redundant=0 22 | ) 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) 24 | 25 | model = GradientBoostingClassifier(n_estimators=50, max_depth=4, max_features=8, learning_rate=0.1) 26 | model.fit(X_train, y_train) 27 | predictions = model.predict(X_test) 28 | print(predictions) 29 | print(predictions.min()) 30 | print(predictions.max()) 31 | print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions)) 32 | 33 | 34 | def regression(): 35 | # Generate a random regression problem 36 | X, y = make_regression( 37 | n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 38 | ) 39 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 40 | 41 | model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3) 42 | model.fit(X_train, y_train) 43 | predictions = model.predict(X_test) 44 | print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten())) 45 | 46 | 47 | if __name__ == "__main__": 48 | classification() 49 | # regression() 50 | -------------------------------------------------------------------------------- /examples/kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import make_blobs 3 | 4 | from mla.kmeans import KMeans 5 | 6 | 7 | def kmeans_example(plot=False): 8 | X, y = make_blobs(centers=4, n_samples=500, n_features=2, shuffle=True, random_state=42) 9 | clusters = len(np.unique(y)) 10 | k = KMeans(K=clusters, max_iters=150, init="++") 11 | k.fit(X) 12 | k.predict() 13 | 14 | if plot: 15 | k.plot() 16 | 17 | 18 | if __name__ == "__main__": 19 | kmeans_example(plot=True) 20 | -------------------------------------------------------------------------------- /examples/linear_models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | try: 4 | from sklearn.model_selection import train_test_split 5 | except ImportError: 6 | from sklearn.cross_validation import train_test_split 7 | from sklearn.datasets import make_classification 8 | from sklearn.datasets import make_regression 9 | 10 | from mla.linear_models import LinearRegression, LogisticRegression 11 | from mla.metrics.metrics import mean_squared_error, accuracy 12 | 13 | # Change to DEBUG to see convergence 14 | logging.basicConfig(level=logging.ERROR) 15 | 16 | 17 | def regression(): 18 | # Generate a random regression problem 19 | X, y = make_regression( 20 | n_samples=10000, n_features=100, n_informative=75, n_targets=1, noise=0.05, random_state=1111, bias=0.5 21 | ) 22 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) 23 | 24 | model = LinearRegression(lr=0.01, max_iters=2000, penalty="l2", C=0.03) 25 | model.fit(X_train, y_train) 26 | predictions = model.predict(X_test) 27 | print("regression mse", mean_squared_error(y_test, predictions)) 28 | 29 | 30 | def classification(): 31 | # Generate a random binary classification problem. 32 | X, y = make_classification( 33 | n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 34 | ) 35 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 36 | 37 | model = LogisticRegression(lr=0.01, max_iters=500, penalty="l1", C=0.01) 38 | model.fit(X_train, y_train) 39 | predictions = model.predict(X_test) 40 | print("classification accuracy", accuracy(y_test, predictions)) 41 | 42 | 43 | if __name__ == "__main__": 44 | regression() 45 | classification() 46 | -------------------------------------------------------------------------------- /examples/naive_bayes.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_classification 2 | from sklearn.metrics import roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | 5 | from mla.naive_bayes import NaiveBayesClassifier 6 | 7 | 8 | def classification(): 9 | # Generate a random binary classification problem. 10 | X, y = make_classification( 11 | n_samples=1000, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 12 | ) 13 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 14 | 15 | model = NaiveBayesClassifier() 16 | model.fit(X_train, y_train) 17 | predictions = model.predict(X_test)[:, 1] 18 | 19 | print("classification accuracy", roc_auc_score(y_test, predictions)) 20 | 21 | 22 | if __name__ == "__main__": 23 | classification() 24 | -------------------------------------------------------------------------------- /examples/nearest_neighbors.py: -------------------------------------------------------------------------------- 1 | try: 2 | from sklearn.model_selection import train_test_split 3 | except ImportError: 4 | from sklearn.cross_validation import train_test_split 5 | from sklearn.datasets import make_classification 6 | from sklearn.datasets import make_regression 7 | from scipy.spatial import distance 8 | 9 | from mla import knn 10 | from mla.metrics.metrics import mean_squared_error, accuracy 11 | 12 | 13 | def regression(): 14 | # Generate a random regression problem 15 | X, y = make_regression( 16 | n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 17 | ) 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) 19 | 20 | model = knn.KNNRegressor(k=5, distance_func=distance.euclidean) 21 | model.fit(X_train, y_train) 22 | predictions = model.predict(X_test) 23 | print("regression mse", mean_squared_error(y_test, predictions)) 24 | 25 | 26 | def classification(): 27 | X, y = make_classification( 28 | n_samples=500, 29 | n_features=5, 30 | n_informative=5, 31 | n_redundant=0, 32 | n_repeated=0, 33 | n_classes=3, 34 | random_state=1111, 35 | class_sep=1.5, 36 | ) 37 | 38 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 39 | 40 | clf = knn.KNNClassifier(k=5, distance_func=distance.euclidean) 41 | 42 | clf.fit(X_train, y_train) 43 | predictions = clf.predict(X_test) 44 | print("classification accuracy", accuracy(y_test, predictions)) 45 | 46 | 47 | if __name__ == "__main__": 48 | regression() 49 | classification() 50 | -------------------------------------------------------------------------------- /examples/nnet_convnet_mnist.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mla.datasets import load_mnist 4 | from mla.metrics import accuracy 5 | from mla.neuralnet import NeuralNet 6 | from mla.neuralnet.layers import Activation, Convolution, MaxPooling, Flatten, Dropout, Parameters 7 | from mla.neuralnet.layers import Dense 8 | from mla.neuralnet.optimizers import Adadelta 9 | from mla.utils import one_hot 10 | 11 | logging.basicConfig(level=logging.DEBUG) 12 | 13 | 14 | # Load MNIST dataset 15 | X_train, X_test, y_train, y_test = load_mnist() 16 | 17 | # Normalize data 18 | X_train /= 255.0 19 | X_test /= 255.0 20 | 21 | y_train = one_hot(y_train.flatten()) 22 | y_test = one_hot(y_test.flatten()) 23 | print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) 24 | 25 | # Approx. 15-20 min. per epoch 26 | model = NeuralNet( 27 | layers=[ 28 | Convolution(n_filters=32, filter_shape=(3, 3), padding=(1, 1), stride=(1, 1)), 29 | Activation("relu"), 30 | Convolution(n_filters=32, filter_shape=(3, 3), padding=(1, 1), stride=(1, 1)), 31 | Activation("relu"), 32 | MaxPooling(pool_shape=(2, 2), stride=(2, 2)), 33 | Dropout(0.5), 34 | Flatten(), 35 | Dense(128), 36 | Activation("relu"), 37 | Dropout(0.5), 38 | Dense(10), 39 | Activation("softmax"), 40 | ], 41 | loss="categorical_crossentropy", 42 | optimizer=Adadelta(), 43 | metric="accuracy", 44 | batch_size=128, 45 | max_epochs=3, 46 | ) 47 | 48 | model.fit(X_train, y_train) 49 | predictions = model.predict(X_test) 50 | print(accuracy(y_test, predictions)) 51 | -------------------------------------------------------------------------------- /examples/nnet_mlp.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | try: 4 | from sklearn.model_selection import train_test_split 5 | except ImportError: 6 | from sklearn.cross_validation import train_test_split 7 | from sklearn.datasets import make_classification 8 | from sklearn.datasets import make_regression 9 | from sklearn.metrics import roc_auc_score 10 | 11 | from mla.metrics.metrics import mean_squared_error 12 | from mla.neuralnet import NeuralNet 13 | from mla.neuralnet.constraints import MaxNorm 14 | from mla.neuralnet.layers import Activation, Dense, Dropout 15 | from mla.neuralnet.optimizers import Adadelta, Adam 16 | from mla.neuralnet.parameters import Parameters 17 | from mla.neuralnet.regularizers import L2 18 | from mla.utils import one_hot 19 | 20 | logging.basicConfig(level=logging.DEBUG) 21 | 22 | 23 | def classification(): 24 | # Generate a random binary classification problem. 25 | X, y = make_classification( 26 | n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 27 | ) 28 | y = one_hot(y) 29 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) 30 | 31 | model = NeuralNet( 32 | layers=[ 33 | Dense(256, Parameters(init="uniform", regularizers={"W": L2(0.05)})), 34 | Activation("relu"), 35 | Dropout(0.5), 36 | Dense(128, Parameters(init="normal", constraints={"W": MaxNorm()})), 37 | Activation("relu"), 38 | Dense(2), 39 | Activation("softmax"), 40 | ], 41 | loss="categorical_crossentropy", 42 | optimizer=Adadelta(), 43 | metric="accuracy", 44 | batch_size=64, 45 | max_epochs=25, 46 | ) 47 | model.fit(X_train, y_train) 48 | predictions = model.predict(X_test) 49 | print("classification accuracy", roc_auc_score(y_test[:, 0], predictions[:, 0])) 50 | 51 | 52 | def regression(): 53 | # Generate a random regression problem 54 | X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05) 55 | y *= 0.01 56 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 57 | 58 | model = NeuralNet( 59 | layers=[ 60 | Dense(64, Parameters(init="normal")), 61 | Activation("linear"), 62 | Dense(32, Parameters(init="normal")), 63 | Activation("linear"), 64 | Dense(1), 65 | ], 66 | loss="mse", 67 | optimizer=Adam(), 68 | metric="mse", 69 | batch_size=256, 70 | max_epochs=15, 71 | ) 72 | model.fit(X_train, y_train) 73 | predictions = model.predict(X_test) 74 | print("regression mse", mean_squared_error(y_test, predictions.flatten())) 75 | 76 | 77 | if __name__ == "__main__": 78 | classification() 79 | regression() 80 | -------------------------------------------------------------------------------- /examples/nnet_rnn_binary_add.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from itertools import combinations, islice 3 | 4 | import numpy as np 5 | 6 | try: 7 | from sklearn.model_selection import train_test_split 8 | except ImportError: 9 | from sklearn.cross_validation import train_test_split 10 | 11 | from mla.metrics import accuracy 12 | from mla.neuralnet import NeuralNet 13 | from mla.neuralnet.layers import Activation, TimeDistributedDense 14 | from mla.neuralnet.layers.recurrent import LSTM 15 | from mla.neuralnet.optimizers import Adam 16 | 17 | logging.basicConfig(level=logging.DEBUG) 18 | 19 | 20 | def addition_dataset(dim=10, n_samples=10000, batch_size=64): 21 | """Generate binary addition dataset. 22 | http://devankuleindiren.com/Projects/rnn_arithmetic.php 23 | """ 24 | binary_format = "{:0" + str(dim) + "b}" 25 | 26 | # Generate all possible number combinations 27 | combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples)) 28 | 29 | # Initialize empty arrays 30 | X = np.zeros((len(combs), dim, 2), dtype=np.uint8) 31 | y = np.zeros((len(combs), dim, 1), dtype=np.uint8) 32 | 33 | for i, (a, b) in enumerate(combs): 34 | # Convert numbers to binary format 35 | X[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a)])) 36 | X[i, :, 1] = list(reversed([int(x) for x in binary_format.format(b)])) 37 | 38 | # Generate target variable (a+b) 39 | y[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a + b)])) 40 | 41 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111) 42 | 43 | # Round number of examples for batch processing 44 | train_b = (X_train.shape[0] // batch_size) * batch_size 45 | test_b = (X_test.shape[0] // batch_size) * batch_size 46 | X_train = X_train[0:train_b] 47 | y_train = y_train[0:train_b] 48 | 49 | X_test = X_test[0:test_b] 50 | y_test = y_test[0:test_b] 51 | return X_train, X_test, y_train, y_test 52 | 53 | 54 | def addition_problem(ReccurentLayer): 55 | X_train, X_test, y_train, y_test = addition_dataset(8, 5000) 56 | 57 | print(X_train.shape, X_test.shape) 58 | model = NeuralNet( 59 | layers=[ReccurentLayer, TimeDistributedDense(1), Activation("sigmoid")], 60 | loss="mse", 61 | optimizer=Adam(), 62 | metric="mse", 63 | batch_size=64, 64 | max_epochs=15, 65 | ) 66 | model.fit(X_train, y_train) 67 | predictions = np.round(model.predict(X_test)) 68 | predictions = np.packbits(predictions.astype(np.uint8)) 69 | y_test = np.packbits(y_test.astype(np.int)) 70 | print(accuracy(y_test, predictions)) 71 | 72 | 73 | # RNN 74 | # addition_problem(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()}))) 75 | # LSTM 76 | addition_problem(LSTM(16)) 77 | -------------------------------------------------------------------------------- /examples/nnet_rnn_text_generation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import logging 4 | import random 5 | 6 | import numpy as np 7 | import sys 8 | 9 | from mla.datasets import load_nietzsche 10 | from mla.neuralnet import NeuralNet 11 | from mla.neuralnet.constraints import SmallNorm 12 | from mla.neuralnet.layers import Activation, Dense 13 | from mla.neuralnet.layers.recurrent import LSTM, RNN 14 | from mla.neuralnet.optimizers import RMSprop 15 | 16 | logging.basicConfig(level=logging.DEBUG) 17 | 18 | 19 | # Example taken from: https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py 20 | 21 | 22 | def sample(preds, temperature=1.0): 23 | # helper function to sample an index from a probability array 24 | preds = np.asarray(preds).astype("float64") 25 | preds = np.log(preds) / temperature 26 | exp_preds = np.exp(preds) 27 | preds = exp_preds / np.sum(exp_preds) 28 | probas = np.random.multinomial(1, preds, 1) 29 | return np.argmax(probas) 30 | 31 | 32 | X, y, text, chars, char_indices, indices_char = load_nietzsche() 33 | # Round the number of sequences for batch processing 34 | items_count = X.shape[0] - (X.shape[0] % 64) 35 | maxlen = X.shape[1] 36 | X = X[0:items_count] 37 | y = y[0:items_count] 38 | 39 | print(X.shape, y.shape) 40 | # LSTM OR RNN 41 | # rnn_layer = RNN(128, return_sequences=False) 42 | rnn_layer = LSTM(128, return_sequences=False) 43 | 44 | model = NeuralNet( 45 | layers=[ 46 | rnn_layer, 47 | # Flatten(), 48 | # TimeStepSlicer(-1), 49 | Dense(X.shape[2]), 50 | Activation("softmax"), 51 | ], 52 | loss="categorical_crossentropy", 53 | optimizer=RMSprop(learning_rate=0.01), 54 | metric="accuracy", 55 | batch_size=64, 56 | max_epochs=1, 57 | shuffle=False, 58 | ) 59 | 60 | for _ in range(25): 61 | model.fit(X, y) 62 | start_index = random.randint(0, len(text) - maxlen - 1) 63 | 64 | generated = "" 65 | sentence = text[start_index : start_index + maxlen] 66 | generated += sentence 67 | print('----- Generating with seed: "' + sentence + '"') 68 | sys.stdout.write(generated) 69 | for i in range(100): 70 | x = np.zeros((64, maxlen, len(chars))) 71 | for t, char in enumerate(sentence): 72 | x[0, t, char_indices[char]] = 1.0 73 | preds = model.predict(x)[0] 74 | next_index = sample(preds, 0.5) 75 | next_char = indices_char[next_index] 76 | 77 | generated += next_char 78 | sentence = sentence[1:] + next_char 79 | 80 | sys.stdout.write(next_char) 81 | sys.stdout.flush() 82 | print() 83 | -------------------------------------------------------------------------------- /examples/pca.py: -------------------------------------------------------------------------------- 1 | try: 2 | from sklearn.model_selection import train_test_split 3 | except ImportError: 4 | from sklearn.cross_validation import train_test_split 5 | from sklearn.datasets import make_classification 6 | 7 | from mla.linear_models import LogisticRegression 8 | from mla.metrics import accuracy 9 | from mla.pca import PCA 10 | 11 | # logging.basicConfig(level=logging.DEBUG) 12 | 13 | # Generate a random binary classification problem. 14 | X, y = make_classification( 15 | n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 16 | ) 17 | 18 | 19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) 20 | 21 | for s in ["svd", "eigen"]: 22 | p = PCA(15, solver=s) 23 | 24 | # fit PCA with training data, not entire dataset 25 | p.fit(X_train) 26 | X_train_reduced = p.transform(X_train) 27 | X_test_reduced = p.transform(X_test) 28 | 29 | model = LogisticRegression(lr=0.001, max_iters=2500) 30 | model.fit(X_train_reduced, y_train) 31 | predictions = model.predict(X_test_reduced) 32 | print("Classification accuracy for %s PCA: %s" % (s, accuracy(y_test, predictions))) 33 | -------------------------------------------------------------------------------- /examples/random_forest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from sklearn.datasets import make_classification 5 | from sklearn.datasets import make_regression 6 | from sklearn.metrics import roc_auc_score, accuracy_score 7 | 8 | try: 9 | from sklearn.model_selection import train_test_split 10 | except ImportError: 11 | from sklearn.cross_validation import train_test_split 12 | 13 | from mla.ensemble.random_forest import RandomForestClassifier, RandomForestRegressor 14 | from mla.metrics.metrics import mean_squared_error 15 | 16 | logging.basicConfig(level=logging.DEBUG) 17 | 18 | 19 | def classification(): 20 | # Generate a random binary classification problem. 21 | X, y = make_classification( 22 | n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 23 | ) 24 | 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) 26 | 27 | model = RandomForestClassifier(n_estimators=10, max_depth=4) 28 | model.fit(X_train, y_train) 29 | 30 | predictions_prob = model.predict(X_test)[:, 1] 31 | predictions = np.argmax(model.predict(X_test), axis=1) 32 | #print(predictions.shape) 33 | print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob)) 34 | print("classification, accuracy score: %s" % accuracy_score(y_test, predictions)) 35 | 36 | 37 | def regression(): 38 | # Generate a random regression problem 39 | X, y = make_regression( 40 | n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 41 | ) 42 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) 43 | 44 | model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3) 45 | model.fit(X_train, y_train) 46 | predictions = model.predict(X_test) 47 | print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten())) 48 | 49 | 50 | if __name__ == "__main__": 51 | classification() 52 | # regression() 53 | -------------------------------------------------------------------------------- /examples/rbm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | 5 | from mla.rbm import RBM 6 | 7 | logging.basicConfig(level=logging.DEBUG) 8 | 9 | 10 | def print_curve(rbm): 11 | from matplotlib import pyplot as plt 12 | 13 | def moving_average(a, n=25): 14 | ret = np.cumsum(a, dtype=float) 15 | ret[n:] = ret[n:] - ret[:-n] 16 | return ret[n - 1:] / n 17 | 18 | plt.plot(moving_average(rbm.errors)) 19 | plt.show() 20 | 21 | 22 | X = np.random.uniform(0, 1, (1500, 10)) 23 | rbm = RBM(n_hidden=10, max_epochs=200, batch_size=10, learning_rate=0.1) 24 | rbm.fit(X) 25 | print_curve(rbm) 26 | -------------------------------------------------------------------------------- /examples/rl_deep_q_learning.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mla.neuralnet import NeuralNet 4 | from mla.neuralnet.layers import Activation, Dense 5 | from mla.neuralnet.optimizers import Adam 6 | from mla.rl.dqn import DQN 7 | 8 | logging.basicConfig(level=logging.CRITICAL) 9 | 10 | 11 | def mlp_model(n_actions, batch_size=64): 12 | model = NeuralNet( 13 | layers=[Dense(32), Activation("relu"), Dense(n_actions)], 14 | loss="mse", 15 | optimizer=Adam(), 16 | metric="mse", 17 | batch_size=batch_size, 18 | max_epochs=1, 19 | verbose=False, 20 | ) 21 | return model 22 | 23 | 24 | model = DQN(n_episodes=2500, batch_size=64) 25 | model.init_environment("CartPole-v0") 26 | model.init_model(mlp_model) 27 | 28 | try: 29 | # Train the model 30 | # It can take from 300 to 2500 episodes to solve CartPole-v0 problem due to randomness of environment. 31 | # You can stop training process using Ctrl+C signal 32 | # Read more about this problem: https://gym.openai.com/envs/CartPole-v0 33 | model.train(render=False) 34 | except KeyboardInterrupt: 35 | pass 36 | # Render trained model 37 | model.play(episodes=100) 38 | -------------------------------------------------------------------------------- /examples/svm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | try: 4 | from sklearn.model_selection import train_test_split 5 | except ImportError: 6 | from sklearn.cross_validation import train_test_split 7 | from sklearn.datasets import make_classification 8 | 9 | from mla.metrics.metrics import accuracy 10 | from mla.svm.kernerls import Linear, RBF 11 | from mla.svm.svm import SVM 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | 15 | 16 | def classification(): 17 | # Generate a random binary classification problem. 18 | X, y = make_classification( 19 | n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2, class_sep=1.75 20 | ) 21 | # Convert y to {-1, 1} 22 | y = (y * 2) - 1 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111) 24 | 25 | for kernel in [RBF(gamma=0.1), Linear()]: 26 | model = SVM(max_iter=500, kernel=kernel, C=0.6) 27 | model.fit(X_train, y_train) 28 | predictions = model.predict(X_test) 29 | print("Classification accuracy (%s): %s" % (kernel, accuracy(y_test, predictions))) 30 | 31 | 32 | if __name__ == "__main__": 33 | classification() 34 | -------------------------------------------------------------------------------- /examples/t-sne.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import matplotlib.pyplot as plt 4 | from sklearn.datasets import make_classification 5 | 6 | from mla.tsne import TSNE 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | 10 | X, y = make_classification( 11 | n_samples=500, n_features=10, n_informative=5, n_redundant=0, random_state=1111, n_classes=2, class_sep=2.5 12 | ) 13 | 14 | p = TSNE(2, max_iter=500) 15 | X = p.fit_transform(X) 16 | 17 | colors = ["red", "green"] 18 | for t in range(2): 19 | t_mask = (y == t).astype(bool) 20 | plt.scatter(X[t_mask, 0], X[t_mask, 1], color=colors[t]) 21 | 22 | plt.show() 23 | -------------------------------------------------------------------------------- /mla/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # copyright: (c) 2016 by Artem Golubin 3 | # license: MIT, see LICENSE for more details. 4 | -------------------------------------------------------------------------------- /mla/base/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from .base import * 3 | -------------------------------------------------------------------------------- /mla/base/base.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | 5 | class BaseEstimator: 6 | y_required = True 7 | fit_required = True 8 | 9 | def _setup_input(self, X, y=None): 10 | """Ensure inputs to an estimator are in the expected format. 11 | 12 | Ensures X and y are stored as numpy ndarrays by converting from an 13 | array-like object if necessary. Enables estimators to define whether 14 | they require a set of y target values or not with y_required, e.g. 15 | kmeans clustering requires no target labels and is fit against only X. 16 | 17 | Parameters 18 | ---------- 19 | X : array-like 20 | Feature dataset. 21 | y : array-like 22 | Target values. By default is required, but if y_required = false 23 | then may be omitted. 24 | """ 25 | if not isinstance(X, np.ndarray): 26 | X = np.array(X) 27 | 28 | if X.size == 0: 29 | raise ValueError("Got an empty matrix.") 30 | 31 | if X.ndim == 1: 32 | self.n_samples, self.n_features = 1, X.shape 33 | else: 34 | self.n_samples, self.n_features = X.shape[0], np.prod(X.shape[1:]) 35 | 36 | self.X = X 37 | 38 | if self.y_required: 39 | if y is None: 40 | raise ValueError("Missed required argument y") 41 | 42 | if not isinstance(y, np.ndarray): 43 | y = np.array(y) 44 | 45 | if y.size == 0: 46 | raise ValueError("The targets array must be no-empty.") 47 | 48 | self.y = y 49 | 50 | def fit(self, X, y=None): 51 | self._setup_input(X, y) 52 | 53 | def predict(self, X=None): 54 | if not isinstance(X, np.ndarray): 55 | X = np.array(X) 56 | 57 | if self.X is not None or not self.fit_required: 58 | return self._predict(X) 59 | else: 60 | raise ValueError("You must call `fit` before `predict`") 61 | 62 | def _predict(self, X=None): 63 | raise NotImplementedError() 64 | -------------------------------------------------------------------------------- /mla/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from mla.datasets.base import * 3 | -------------------------------------------------------------------------------- /mla/datasets/base.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import os 3 | 4 | import numpy as np 5 | 6 | 7 | def get_filename(name): 8 | return os.path.join(os.path.dirname(__file__), name) 9 | 10 | 11 | def load_mnist(): 12 | def load(dataset="training", digits=np.arange(10)): 13 | import struct 14 | from array import array as pyarray 15 | from numpy import array, int8, uint8, zeros 16 | 17 | if dataset == "train": 18 | fname_img = get_filename("data/mnist/train-images-idx3-ubyte") 19 | fname_lbl = get_filename("data/mnist/train-labels-idx1-ubyte") 20 | elif dataset == "test": 21 | fname_img = get_filename("data/mnist/t10k-images-idx3-ubyte") 22 | fname_lbl = get_filename("data/mnist/t10k-labels-idx1-ubyte") 23 | else: 24 | raise ValueError("Unexpected dataset name: %r" % dataset) 25 | 26 | flbl = open(fname_lbl, "rb") 27 | magic_nr, size = struct.unpack(">II", flbl.read(8)) 28 | lbl = pyarray("b", flbl.read()) 29 | flbl.close() 30 | 31 | fimg = open(fname_img, "rb") 32 | magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) 33 | img = pyarray("B", fimg.read()) 34 | fimg.close() 35 | 36 | ind = [k for k in range(size) if lbl[k] in digits] 37 | N = len(ind) 38 | 39 | images = zeros((N, rows, cols), dtype=uint8) 40 | labels = zeros((N, 1), dtype=int8) 41 | for i in range(len(ind)): 42 | images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols)) 43 | labels[i] = lbl[ind[i]] 44 | 45 | return images, labels 46 | 47 | X_train, y_train = load("train") 48 | X_test, y_test = load("test") 49 | 50 | X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype(np.float32) 51 | X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype(np.float32) 52 | 53 | return X_train, X_test, y_train, y_test 54 | 55 | 56 | def load_nietzsche(): 57 | text = open(get_filename("data/nietzsche.txt"), "rt").read().lower() 58 | chars = set(list(text)) 59 | char_indices = {ch: i for i, ch in enumerate(chars)} 60 | indices_char = {i: ch for i, ch in enumerate(chars)} 61 | 62 | maxlen = 40 63 | step = 3 64 | sentences = [] 65 | next_chars = [] 66 | for i in range(0, len(text) - maxlen, step): 67 | sentences.append(text[i: i + maxlen]) 68 | next_chars.append(text[i + maxlen]) 69 | 70 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 71 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 72 | for i, sentence in enumerate(sentences): 73 | for t, char in enumerate(sentence): 74 | X[i, t, char_indices[char]] = 1 75 | y[i, char_indices[next_chars[i]]] = 1 76 | return X, y, text, chars, char_indices, indices_char 77 | -------------------------------------------------------------------------------- /mla/datasets/data/mnist/t10k-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/t10k-images-idx3-ubyte -------------------------------------------------------------------------------- /mla/datasets/data/mnist/t10k-labels-idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /mla/datasets/data/mnist/train-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/train-images-idx3-ubyte -------------------------------------------------------------------------------- /mla/datasets/data/mnist/train-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/train-labels-idx1-ubyte -------------------------------------------------------------------------------- /mla/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from .random_forest import RandomForestClassifier, RandomForestRegressor 3 | -------------------------------------------------------------------------------- /mla/ensemble/base.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from scipy import stats 4 | 5 | 6 | def f_entropy(p): 7 | # Convert values to probability 8 | p = np.bincount(p) / float(p.shape[0]) 9 | 10 | ep = stats.entropy(p) 11 | if ep == -float("inf"): 12 | return 0.0 13 | return ep 14 | 15 | 16 | def information_gain(y, splits): 17 | splits_entropy = sum([f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits]) 18 | return f_entropy(y) - splits_entropy 19 | 20 | 21 | def mse_criterion(y, splits): 22 | y_mean = np.mean(y) 23 | return -sum([np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0]) for split in splits]) 24 | 25 | 26 | def xgb_criterion(y, left, right, loss): 27 | left = loss.gain(left["actual"], left["y_pred"]) 28 | right = loss.gain(right["actual"], right["y_pred"]) 29 | initial = loss.gain(y["actual"], y["y_pred"]) 30 | gain = left + right - initial 31 | return gain 32 | 33 | 34 | def get_split_mask(X, column, value): 35 | left_mask = X[:, column] < value 36 | right_mask = X[:, column] >= value 37 | return left_mask, right_mask 38 | 39 | 40 | def split(X, y, value): 41 | left_mask = X < value 42 | right_mask = X >= value 43 | return y[left_mask], y[right_mask] 44 | 45 | 46 | def split_dataset(X, target, column, value, return_X=True): 47 | left_mask, right_mask = get_split_mask(X, column, value) 48 | 49 | left, right = {}, {} 50 | for key in target.keys(): 51 | left[key] = target[key][left_mask] 52 | right[key] = target[key][right_mask] 53 | 54 | if return_X: 55 | left_X, right_X = X[left_mask], X[right_mask] 56 | return left_X, right_X, left, right 57 | else: 58 | return left, right 59 | -------------------------------------------------------------------------------- /mla/ensemble/gbm.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | # logistic function 4 | from scipy.special import expit 5 | 6 | from mla.base import BaseEstimator 7 | from mla.ensemble.base import mse_criterion 8 | from mla.ensemble.tree import Tree 9 | 10 | """ 11 | References: 12 | https://arxiv.org/pdf/1603.02754v3.pdf 13 | http://www.saedsayad.com/docs/xgboost.pdf 14 | https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf 15 | http://stats.stackexchange.com/questions/202858/loss-function-approximation-with-taylor-expansion 16 | """ 17 | 18 | 19 | class Loss: 20 | """Base class for loss functions.""" 21 | 22 | def __init__(self, regularization=1.0): 23 | self.regularization = regularization 24 | 25 | def grad(self, actual, predicted): 26 | """First order gradient.""" 27 | raise NotImplementedError() 28 | 29 | def hess(self, actual, predicted): 30 | """Second order gradient.""" 31 | raise NotImplementedError() 32 | 33 | def approximate(self, actual, predicted): 34 | """Approximate leaf value.""" 35 | return self.grad(actual, predicted).sum() / (self.hess(actual, predicted).sum() + self.regularization) 36 | 37 | def transform(self, pred): 38 | """Transform predictions values.""" 39 | return pred 40 | 41 | def gain(self, actual, predicted): 42 | """Calculate gain for split search.""" 43 | nominator = self.grad(actual, predicted).sum() ** 2 44 | denominator = self.hess(actual, predicted).sum() + self.regularization 45 | return 0.5 * (nominator / denominator) 46 | 47 | 48 | class LeastSquaresLoss(Loss): 49 | """Least squares loss""" 50 | 51 | def grad(self, actual, predicted): 52 | return actual - predicted 53 | 54 | def hess(self, actual, predicted): 55 | return np.ones_like(actual) 56 | 57 | 58 | class LogisticLoss(Loss): 59 | """Logistic loss.""" 60 | 61 | def grad(self, actual, predicted): 62 | return actual * expit(-actual * predicted) 63 | 64 | def hess(self, actual, predicted): 65 | expits = expit(predicted) 66 | return expits * (1 - expits) 67 | 68 | def transform(self, output): 69 | # Apply logistic (sigmoid) function to the output 70 | return expit(output) 71 | 72 | 73 | class GradientBoosting(BaseEstimator): 74 | """Gradient boosting trees with Taylor's expansion approximation (as in xgboost).""" 75 | 76 | def __init__(self, n_estimators, learning_rate=0.1, max_features=10, max_depth=2, min_samples_split=10): 77 | self.min_samples_split = min_samples_split 78 | self.learning_rate = learning_rate 79 | self.max_depth = max_depth 80 | self.max_features = max_features 81 | self.n_estimators = n_estimators 82 | self.trees = [] 83 | self.loss = None 84 | 85 | def fit(self, X, y=None): 86 | self._setup_input(X, y) 87 | self.y_mean = np.mean(y) 88 | self._train() 89 | 90 | def _train(self): 91 | # Initialize model with zeros 92 | y_pred = np.zeros(self.n_samples, np.float32) 93 | # Or mean 94 | # y_pred = np.full(self.n_samples, self.y_mean) 95 | 96 | for n in range(self.n_estimators): 97 | residuals = self.loss.grad(self.y, y_pred) 98 | tree = Tree(regression=True, criterion=mse_criterion) 99 | # Pass multiple target values to the tree learner 100 | targets = { 101 | # Residual values 102 | "y": residuals, 103 | # Actual target values 104 | "actual": self.y, 105 | # Predictions from previous step 106 | "y_pred": y_pred, 107 | } 108 | tree.train( 109 | self.X, 110 | targets, 111 | max_features=self.max_features, 112 | min_samples_split=self.min_samples_split, 113 | max_depth=self.max_depth, 114 | loss=self.loss, 115 | ) 116 | predictions = tree.predict(self.X) 117 | y_pred += self.learning_rate * predictions 118 | self.trees.append(tree) 119 | 120 | def _predict(self, X=None): 121 | y_pred = np.zeros(X.shape[0], np.float32) 122 | 123 | for i, tree in enumerate(self.trees): 124 | y_pred += self.learning_rate * tree.predict(X) 125 | return y_pred 126 | 127 | def predict(self, X=None): 128 | return self.loss.transform(self._predict(X)) 129 | 130 | 131 | class GradientBoostingRegressor(GradientBoosting): 132 | def fit(self, X, y=None): 133 | self.loss = LeastSquaresLoss() 134 | super(GradientBoostingRegressor, self).fit(X, y) 135 | 136 | 137 | class GradientBoostingClassifier(GradientBoosting): 138 | def fit(self, X, y=None): 139 | # Convert labels from {0, 1} to {-1, 1} 140 | y = (y * 2) - 1 141 | self.loss = LogisticLoss() 142 | super(GradientBoostingClassifier, self).fit(X, y) 143 | -------------------------------------------------------------------------------- /mla/ensemble/random_forest.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | from mla.base import BaseEstimator 5 | from mla.ensemble.base import information_gain, mse_criterion 6 | from mla.ensemble.tree import Tree 7 | 8 | 9 | class RandomForest(BaseEstimator): 10 | def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion=None): 11 | """Base class for RandomForest. 12 | 13 | Parameters 14 | ---------- 15 | n_estimators : int 16 | The number of decision tree. 17 | max_features : int 18 | The number of features to consider when looking for the best split. 19 | min_samples_split : int 20 | The minimum number of samples required to split an internal node. 21 | max_depth : int 22 | Maximum depth of the tree. 23 | criterion : str 24 | The function to measure the quality of a split. 25 | """ 26 | self.max_depth = max_depth 27 | self.min_samples_split = min_samples_split 28 | self.max_features = max_features 29 | self.n_estimators = n_estimators 30 | self.trees = [] 31 | 32 | def fit(self, X, y): 33 | self._setup_input(X, y) 34 | if self.max_features is None: 35 | self.max_features = int(np.sqrt(X.shape[1])) 36 | else: 37 | assert X.shape[1] > self.max_features 38 | self._train() 39 | 40 | def _train(self): 41 | for tree in self.trees: 42 | tree.train( 43 | self.X, 44 | self.y, 45 | max_features=self.max_features, 46 | min_samples_split=self.min_samples_split, 47 | max_depth=self.max_depth 48 | ) 49 | 50 | def _predict(self, X=None): 51 | raise NotImplementedError() 52 | 53 | 54 | class RandomForestClassifier(RandomForest): 55 | def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="entropy"): 56 | super(RandomForestClassifier, self).__init__( 57 | n_estimators=n_estimators, 58 | max_features=max_features, 59 | min_samples_split=min_samples_split, 60 | max_depth=max_depth, 61 | criterion=criterion, 62 | ) 63 | 64 | if criterion == "entropy": 65 | self.criterion = information_gain 66 | else: 67 | raise ValueError() 68 | 69 | # Initialize empty trees 70 | for _ in range(self.n_estimators): 71 | self.trees.append(Tree(criterion=self.criterion)) 72 | 73 | def _predict(self, X=None): 74 | y_shape = np.unique(self.y).shape[0] 75 | predictions = np.zeros((X.shape[0], y_shape)) 76 | 77 | for i in range(X.shape[0]): 78 | row_pred = np.zeros(y_shape) 79 | for tree in self.trees: 80 | row_pred += tree.predict_row(X[i, :]) 81 | 82 | row_pred /= self.n_estimators 83 | predictions[i, :] = row_pred 84 | return predictions 85 | 86 | 87 | class RandomForestRegressor(RandomForest): 88 | def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="mse"): 89 | super(RandomForestRegressor, self).__init__( 90 | n_estimators=n_estimators, 91 | max_features=max_features, 92 | min_samples_split=min_samples_split, 93 | max_depth=max_depth, 94 | ) 95 | 96 | if criterion == "mse": 97 | self.criterion = mse_criterion 98 | else: 99 | raise ValueError() 100 | 101 | # Initialize empty regression trees 102 | for _ in range(self.n_estimators): 103 | self.trees.append(Tree(regression=True, criterion=self.criterion)) 104 | 105 | def _predict(self, X=None): 106 | predictions = np.zeros((X.shape[0], self.n_estimators)) 107 | for i, tree in enumerate(self.trees): 108 | predictions[:, i] = tree.predict(X) 109 | return predictions.mean(axis=1) 110 | -------------------------------------------------------------------------------- /mla/ensemble/tree.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import random 3 | 4 | import numpy as np 5 | from scipy import stats 6 | 7 | from mla.ensemble.base import split, split_dataset, xgb_criterion 8 | 9 | random.seed(111) 10 | 11 | 12 | class Tree(object): 13 | """Recursive implementation of decision tree.""" 14 | 15 | def __init__(self, regression=False, criterion=None, n_classes=None): 16 | self.regression = regression 17 | self.impurity = None 18 | self.threshold = None 19 | self.column_index = None 20 | self.outcome = None 21 | self.criterion = criterion 22 | self.loss = None 23 | self.n_classes = n_classes # Only for classification 24 | 25 | self.left_child = None 26 | self.right_child = None 27 | 28 | @property 29 | def is_terminal(self): 30 | return not bool(self.left_child and self.right_child) 31 | 32 | def _find_splits(self, X): 33 | """Find all possible split values.""" 34 | split_values = set() 35 | 36 | # Get unique values in a sorted order 37 | x_unique = list(np.unique(X)) 38 | for i in range(1, len(x_unique)): 39 | # Find a point between two values 40 | average = (x_unique[i - 1] + x_unique[i]) / 2.0 41 | split_values.add(average) 42 | 43 | return list(split_values) 44 | 45 | def _find_best_split(self, X, target, n_features): 46 | """Find best feature and value for a split. Greedy algorithm.""" 47 | 48 | # Sample random subset of features 49 | subset = random.sample(list(range(0, X.shape[1])), n_features) 50 | max_gain, max_col, max_val = None, None, None 51 | 52 | for column in subset: 53 | split_values = self._find_splits(X[:, column]) 54 | for value in split_values: 55 | if self.loss is None: 56 | # Random forest 57 | splits = split(X[:, column], target["y"], value) 58 | gain = self.criterion(target["y"], splits) 59 | else: 60 | # Gradient boosting 61 | left, right = split_dataset(X, target, column, value, return_X=False) 62 | gain = xgb_criterion(target, left, right, self.loss) 63 | 64 | if (max_gain is None) or (gain > max_gain): 65 | max_col, max_val, max_gain = column, value, gain 66 | return max_col, max_val, max_gain 67 | 68 | def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01): 69 | try: 70 | # Exit from recursion using assert syntax 71 | assert X.shape[0] > min_samples_split 72 | assert max_depth > 0 73 | 74 | if max_features is None: 75 | max_features = X.shape[1] 76 | 77 | column, value, gain = self._find_best_split(X, target, max_features) 78 | assert gain is not None 79 | if self.regression: 80 | assert gain != 0 81 | else: 82 | assert gain > minimum_gain 83 | 84 | self.column_index = column 85 | self.threshold = value 86 | self.impurity = gain 87 | 88 | # Split dataset 89 | left_X, right_X, left_target, right_target = split_dataset(X, target, column, value) 90 | 91 | # Grow left and right child 92 | self.left_child = Tree(self.regression, self.criterion, self.n_classes) 93 | self.left_child._train( 94 | left_X, left_target, max_features, min_samples_split, max_depth - 1, minimum_gain 95 | ) 96 | 97 | self.right_child = Tree(self.regression, self.criterion, self.n_classes) 98 | self.right_child._train( 99 | right_X, right_target, max_features, min_samples_split, max_depth - 1, minimum_gain 100 | ) 101 | except AssertionError: 102 | self._calculate_leaf_value(target) 103 | 104 | def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01, loss=None): 105 | """Build a decision tree from training set. 106 | 107 | Parameters 108 | ---------- 109 | 110 | X : array-like 111 | Feature dataset. 112 | target : dictionary or array-like 113 | Target values. 114 | max_features : int or None 115 | The number of features to consider when looking for the best split. 116 | min_samples_split : int 117 | The minimum number of samples required to split an internal node. 118 | max_depth : int 119 | Maximum depth of the tree. 120 | minimum_gain : float, default 0.01 121 | Minimum gain required for splitting. 122 | loss : function, default None 123 | Loss function for gradient boosting. 124 | """ 125 | 126 | if not isinstance(target, dict): 127 | target = {"y": target} 128 | 129 | # Loss for gradient boosting 130 | if loss is not None: 131 | self.loss = loss 132 | 133 | if not self.regression: 134 | self.n_classes = len(np.unique(target['y'])) 135 | 136 | self._train(X, target, max_features=max_features, min_samples_split=min_samples_split, 137 | max_depth=max_depth, minimum_gain=minimum_gain) 138 | 139 | 140 | def _calculate_leaf_value(self, targets): 141 | """Find optimal value for leaf.""" 142 | if self.loss is not None: 143 | # Gradient boosting 144 | self.outcome = self.loss.approximate(targets["actual"], targets["y_pred"]) 145 | else: 146 | # Random Forest 147 | if self.regression: 148 | # Mean value for regression task 149 | self.outcome = np.mean(targets["y"]) 150 | else: 151 | # Probability for classification task 152 | self.outcome = np.bincount(targets["y"], minlength=self.n_classes) / targets["y"].shape[0] 153 | 154 | def predict_row(self, row): 155 | """Predict single row.""" 156 | if not self.is_terminal: 157 | if row[self.column_index] < self.threshold: 158 | return self.left_child.predict_row(row) 159 | else: 160 | return self.right_child.predict_row(row) 161 | return self.outcome 162 | 163 | def predict(self, X): 164 | result = np.zeros(X.shape[0]) 165 | for i in range(X.shape[0]): 166 | result[i] = self.predict_row(X[i, :]) 167 | return result 168 | -------------------------------------------------------------------------------- /mla/fm.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import autograd.numpy as np 4 | from autograd import elementwise_grad 5 | 6 | from mla.base import BaseEstimator 7 | from mla.metrics import mean_squared_error, binary_crossentropy 8 | 9 | np.random.seed(9999) 10 | 11 | """ 12 | References: 13 | Factorization Machines http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf 14 | """ 15 | 16 | 17 | class BaseFM(BaseEstimator): 18 | def __init__( 19 | self, n_components=10, max_iter=100, init_stdev=0.1, learning_rate=0.01, reg_v=0.1, reg_w=0.5, reg_w0=0.0 20 | ): 21 | """Simplified factorization machines implementation using SGD optimizer.""" 22 | self.reg_w0 = reg_w0 23 | self.reg_w = reg_w 24 | self.reg_v = reg_v 25 | self.n_components = n_components 26 | self.lr = learning_rate 27 | self.init_stdev = init_stdev 28 | self.max_iter = max_iter 29 | self.loss = None 30 | self.loss_grad = None 31 | 32 | def fit(self, X, y=None): 33 | self._setup_input(X, y) 34 | # bias 35 | self.wo = 0.0 36 | # Feature weights 37 | self.w = np.zeros(self.n_features) 38 | # Factor weights 39 | self.v = np.random.normal(scale=self.init_stdev, size=(self.n_features, self.n_components)) 40 | self._train() 41 | 42 | def _train(self): 43 | for epoch in range(self.max_iter): 44 | y_pred = self._predict(self.X) 45 | loss = self.loss_grad(self.y, y_pred) 46 | w_grad = np.dot(loss, self.X) / float(self.n_samples) 47 | self.wo -= self.lr * (loss.mean() + 2 * self.reg_w0 * self.wo) 48 | self.w -= self.lr * w_grad + (2 * self.reg_w * self.w) 49 | self._factor_step(loss) 50 | 51 | def _factor_step(self, loss): 52 | for ix, x in enumerate(self.X): 53 | for i in range(self.n_features): 54 | v_grad = loss[ix] * (x.dot(self.v).dot(x[i])[0] - self.v[i] * x[i] ** 2) 55 | self.v[i] -= self.lr * v_grad + (2 * self.reg_v * self.v[i]) 56 | 57 | def _predict(self, X=None): 58 | linear_output = np.dot(X, self.w) 59 | factors_output = np.sum(np.dot(X, self.v) ** 2 - np.dot(X ** 2, self.v ** 2), axis=1) / 2.0 60 | return self.wo + linear_output + factors_output 61 | 62 | 63 | class FMRegressor(BaseFM): 64 | def fit(self, X, y=None): 65 | super(FMRegressor, self).fit(X, y) 66 | self.loss = mean_squared_error 67 | self.loss_grad = elementwise_grad(mean_squared_error) 68 | 69 | 70 | class FMClassifier(BaseFM): 71 | def fit(self, X, y=None): 72 | super(FMClassifier, self).fit(X, y) 73 | self.loss = binary_crossentropy 74 | self.loss_grad = elementwise_grad(binary_crossentropy) 75 | 76 | def predict(self, X=None): 77 | predictions = self._predict(X) 78 | return np.sign(predictions) 79 | -------------------------------------------------------------------------------- /mla/gaussian_mixture.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import random 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | from scipy.stats import multivariate_normal 8 | 9 | from mla.base import BaseEstimator 10 | from mla.kmeans import KMeans 11 | 12 | 13 | class GaussianMixture(BaseEstimator): 14 | """Gaussian Mixture Model: clusters with Gaussian prior. 15 | 16 | Finds clusters by repeatedly performing Expectation–Maximization (EM) algorithm 17 | on the dataset. GMM assumes the datasets is distributed in multivariate Gaussian, 18 | and tries to find the underlying structure of the Gaussian, i.e. mean and covariance. 19 | E-step computes the "responsibility" of the data to each cluster, given the mean 20 | and covariance; M-step computes the mean, covariance and weights (prior of each 21 | cluster), given the responsibilities. It iterates until the total likelihood 22 | changes less than the tolerance. 23 | 24 | 25 | Parameters 26 | ---------- 27 | 28 | K : int 29 | The number of clusters into which the dataset is partitioned. 30 | max_iters: int 31 | The maximum iterations of assigning points to the perform EM. 32 | Short-circuited by the assignments converging on their own. 33 | init: str, default 'random' 34 | The name of the method used to initialize the first clustering. 35 | 36 | 'random' - Randomly select values from the dataset as the K centroids. 37 | 'kmeans' - Initialize the centroids, covariances, weights with KMeams's clusters. 38 | tolerance: float, default 1e-3 39 | The tolerance of difference of the two latest likelihood for convergence. 40 | """ 41 | 42 | y_required = False 43 | 44 | def __init__(self, K=4, init="random", max_iters=500, tolerance=1e-3): 45 | self.K = K 46 | self.max_iters = max_iters 47 | self.init = init 48 | self.assignments = None 49 | self.likelihood = [] 50 | self.tolerance = tolerance 51 | 52 | def fit(self, X, y=None): 53 | """Perform Expectation–Maximization (EM) until converged.""" 54 | self._setup_input(X, y) 55 | self._initialize() 56 | for _ in range(self.max_iters): 57 | self._E_step() 58 | self._M_step() 59 | if self._is_converged(): 60 | break 61 | 62 | def _initialize(self): 63 | """Set the initial weights, means and covs (with full covariance matrix). 64 | 65 | weights: the prior of the clusters (what percentage of data does a cluster have) 66 | means: the mean points of the clusters 67 | covs: the covariance matrix of the clusters 68 | """ 69 | self.weights = np.ones(self.K) 70 | if self.init == "random": 71 | self.means = [self.X[x] for x in random.sample(range(self.n_samples), self.K)] 72 | self.covs = [np.cov(self.X.T) for _ in range(self.K)] 73 | 74 | elif self.init == "kmeans": 75 | kmeans = KMeans(K=self.K, max_iters=self.max_iters // 3, init="++") 76 | kmeans.fit(self.X) 77 | self.assignments = kmeans.predict() 78 | self.means = kmeans.centroids 79 | self.covs = [] 80 | for i in np.unique(self.assignments): 81 | self.weights[int(i)] = (self.assignments == i).sum() 82 | self.covs.append(np.cov(self.X[self.assignments == i].T)) 83 | else: 84 | raise ValueError("Unknown type of init parameter") 85 | self.weights /= self.weights.sum() 86 | 87 | def _E_step(self): 88 | """Expectation(E-step) for Gaussian Mixture.""" 89 | likelihoods = self._get_likelihood(self.X) 90 | self.likelihood.append(likelihoods.sum()) 91 | weighted_likelihoods = self._get_weighted_likelihood(likelihoods) 92 | self.assignments = weighted_likelihoods.argmax(axis=1) 93 | weighted_likelihoods /= weighted_likelihoods.sum(axis=1)[:, np.newaxis] 94 | self.responsibilities = weighted_likelihoods 95 | 96 | def _M_step(self): 97 | """Maximization (M-step) for Gaussian Mixture.""" 98 | weights = self.responsibilities.sum(axis=0) 99 | for assignment in range(self.K): 100 | resp = self.responsibilities[:, assignment][:, np.newaxis] 101 | self.means[assignment] = (resp * self.X).sum(axis=0) / resp.sum() 102 | self.covs[assignment] = (self.X - self.means[assignment]).T.dot( 103 | (self.X - self.means[assignment]) * resp 104 | ) / weights[assignment] 105 | self.weights = weights / weights.sum() 106 | 107 | def _is_converged(self): 108 | """Check if the difference of the latest two likelihood is less than the tolerance.""" 109 | if (len(self.likelihood) > 1) and (self.likelihood[-1] - self.likelihood[-2] <= self.tolerance): 110 | return True 111 | return False 112 | 113 | def _predict(self, X): 114 | """Get the assignments for X with GMM clusters.""" 115 | if not X.shape: 116 | return self.assignments 117 | likelihoods = self._get_likelihood(X) 118 | weighted_likelihoods = self._get_weighted_likelihood(likelihoods) 119 | assignments = weighted_likelihoods.argmax(axis=1) 120 | return assignments 121 | 122 | def _get_likelihood(self, data): 123 | n_data = data.shape[0] 124 | likelihoods = np.zeros([n_data, self.K]) 125 | for c in range(self.K): 126 | likelihoods[:, c] = multivariate_normal.pdf(data, self.means[c], self.covs[c]) 127 | return likelihoods 128 | 129 | def _get_weighted_likelihood(self, likelihood): 130 | return self.weights * likelihood 131 | 132 | def plot(self, data=None, ax=None, holdon=False): 133 | """Plot contour for 2D data.""" 134 | if not (len(self.X.shape) == 2 and self.X.shape[1] == 2): 135 | raise AttributeError("Only support for visualizing 2D data.") 136 | 137 | if ax is None: 138 | _, ax = plt.subplots() 139 | 140 | if data is None: 141 | data = self.X 142 | assignments = self.assignments 143 | else: 144 | assignments = self.predict(data) 145 | 146 | COLOR = "bgrcmyk" 147 | cmap = lambda assignment: COLOR[int(assignment) % len(COLOR)] 148 | 149 | # generate grid 150 | delta = 0.025 151 | margin = 0.2 152 | xmax, ymax = self.X.max(axis=0) + margin 153 | xmin, ymin = self.X.min(axis=0) - margin 154 | axis_X, axis_Y = np.meshgrid(np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta)) 155 | 156 | def grid_gaussian_pdf(mean, cov): 157 | grid_array = np.array(list(zip(axis_X.flatten(), axis_Y.flatten()))) 158 | return multivariate_normal.pdf(grid_array, mean, cov).reshape(axis_X.shape) 159 | 160 | # plot scatters 161 | if assignments is None: 162 | c = None 163 | else: 164 | c = [cmap(assignment) for assignment in assignments] 165 | ax.scatter(data[:, 0], data[:, 1], c=c) 166 | 167 | # plot contours 168 | for assignment in range(self.K): 169 | ax.contour( 170 | axis_X, 171 | axis_Y, 172 | grid_gaussian_pdf(self.means[assignment], self.covs[assignment]), 173 | colors=cmap(assignment), 174 | ) 175 | 176 | if not holdon: 177 | plt.show() 178 | -------------------------------------------------------------------------------- /mla/kmeans.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import random 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn as sns 8 | 9 | from mla.base import BaseEstimator 10 | from mla.metrics.distance import euclidean_distance 11 | 12 | random.seed(1111) 13 | 14 | 15 | class KMeans(BaseEstimator): 16 | """Partition a dataset into K clusters. 17 | 18 | Finds clusters by repeatedly assigning each data point to the cluster with 19 | the nearest centroid and iterating until the assignments converge (meaning 20 | they don't change during an iteration) or the maximum number of iterations 21 | is reached. 22 | 23 | Parameters 24 | ---------- 25 | 26 | K : int 27 | The number of clusters into which the dataset is partitioned. 28 | max_iters: int 29 | The maximum iterations of assigning points to the nearest cluster. 30 | Short-circuited by the assignments converging on their own. 31 | init: str, default 'random' 32 | The name of the method used to initialize the first clustering. 33 | 34 | 'random' - Randomly select values from the dataset as the K centroids. 35 | '++' - Select a random first centroid from the dataset, then select 36 | K - 1 more centroids by choosing values from the dataset with a 37 | probability distribution proportional to the squared distance 38 | from each point's closest existing cluster. Attempts to create 39 | larger distances between initial clusters to improve convergence 40 | rates and avoid degenerate cases. 41 | """ 42 | 43 | y_required = False 44 | 45 | def __init__(self, K=5, max_iters=100, init="random"): 46 | self.K = K 47 | self.max_iters = max_iters 48 | self.clusters = [[] for _ in range(self.K)] 49 | self.centroids = [] 50 | self.init = init 51 | 52 | def _initialize_centroids(self, init): 53 | """Set the initial centroids.""" 54 | 55 | if init == "random": 56 | self.centroids = [self.X[x] for x in random.sample(range(self.n_samples), self.K)] 57 | elif init == "++": 58 | self.centroids = [random.choice(self.X)] 59 | while len(self.centroids) < self.K: 60 | self.centroids.append(self._choose_next_center()) 61 | else: 62 | raise ValueError("Unknown type of init parameter") 63 | 64 | def _predict(self, X=None): 65 | """Perform clustering on the dataset.""" 66 | self._initialize_centroids(self.init) 67 | centroids = self.centroids 68 | 69 | # Optimize clusters 70 | for _ in range(self.max_iters): 71 | self._assign(centroids) 72 | centroids_old = centroids 73 | centroids = [self._get_centroid(cluster) for cluster in self.clusters] 74 | 75 | if self._is_converged(centroids_old, centroids): 76 | break 77 | 78 | self.centroids = centroids 79 | 80 | return self._get_predictions() 81 | 82 | def _get_predictions(self): 83 | predictions = np.empty(self.n_samples) 84 | 85 | for i, cluster in enumerate(self.clusters): 86 | for index in cluster: 87 | predictions[index] = i 88 | return predictions 89 | 90 | def _assign(self, centroids): 91 | 92 | for row in range(self.n_samples): 93 | for i, cluster in enumerate(self.clusters): 94 | if row in cluster: 95 | self.clusters[i].remove(row) 96 | break 97 | 98 | closest = self._closest(row, centroids) 99 | self.clusters[closest].append(row) 100 | 101 | def _closest(self, fpoint, centroids): 102 | """Find the closest centroid for a point.""" 103 | closest_index = None 104 | closest_distance = None 105 | for i, point in enumerate(centroids): 106 | dist = euclidean_distance(self.X[fpoint], point) 107 | if closest_index is None or dist < closest_distance: 108 | closest_index = i 109 | closest_distance = dist 110 | return closest_index 111 | 112 | def _get_centroid(self, cluster): 113 | """Get values by indices and take the mean.""" 114 | return [np.mean(np.take(self.X[:, i], cluster)) for i in range(self.n_features)] 115 | 116 | def _dist_from_centers(self): 117 | """Calculate distance from centers.""" 118 | return np.array([min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X]) 119 | 120 | def _choose_next_center(self): 121 | distances = self._dist_from_centers() 122 | squared_distances = distances ** 2 123 | probs = squared_distances / squared_distances.sum() 124 | ind = np.random.choice(self.X.shape[0], 1, p=probs)[0] 125 | return self.X[ind] 126 | 127 | def _is_converged(self, centroids_old, centroids): 128 | """Check if the distance between old and new centroids is zero.""" 129 | distance = 0 130 | for i in range(self.K): 131 | distance += euclidean_distance(centroids_old[i], centroids[i]) 132 | return distance == 0 133 | 134 | def plot(self, ax=None, holdon=False): 135 | sns.set(style="white") 136 | palette = sns.color_palette("hls", self.K + 1) 137 | data = self.X 138 | 139 | if ax is None: 140 | _, ax = plt.subplots() 141 | 142 | for i, index in enumerate(self.clusters): 143 | point = np.array(data[index]).T 144 | ax.scatter(*point, c=[palette[i], ]) 145 | 146 | for point in self.centroids: 147 | ax.scatter(*point, marker="x", linewidths=10) 148 | 149 | if not holdon: 150 | plt.show() 151 | -------------------------------------------------------------------------------- /mla/knn.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from collections import Counter 4 | 5 | import numpy as np 6 | from scipy.spatial.distance import euclidean 7 | 8 | from mla.base import BaseEstimator 9 | 10 | 11 | class KNNBase(BaseEstimator): 12 | def __init__(self, k=5, distance_func=euclidean): 13 | """Base class for Nearest neighbors classifier and regressor. 14 | 15 | Parameters 16 | ---------- 17 | k : int, default 5 18 | The number of neighbors to take into account. If 0, all the 19 | training examples are used. 20 | distance_func : function, default euclidean distance 21 | A distance function taking two arguments. Any function from 22 | scipy.spatial.distance will do. 23 | """ 24 | 25 | self.k = None if k == 0 else k # l[:None] returns the whole list 26 | self.distance_func = distance_func 27 | 28 | def aggregate(self, neighbors_targets): 29 | raise NotImplementedError() 30 | 31 | def _predict(self, X=None): 32 | predictions = [self._predict_x(x) for x in X] 33 | 34 | return np.array(predictions) 35 | 36 | def _predict_x(self, x): 37 | """Predict the label of a single instance x.""" 38 | 39 | # compute distances between x and all examples in the training set. 40 | distances = (self.distance_func(x, example) for example in self.X) 41 | 42 | # Sort all examples by their distance to x and keep their target value. 43 | neighbors = sorted(((dist, target) for (dist, target) in zip(distances, self.y)), key=lambda x: x[0]) 44 | 45 | # Get targets of the k-nn and aggregate them (most common one or 46 | # average). 47 | neighbors_targets = [target for (_, target) in neighbors[: self.k]] 48 | 49 | return self.aggregate(neighbors_targets) 50 | 51 | 52 | class KNNClassifier(KNNBase): 53 | """Nearest neighbors classifier. 54 | 55 | Note: if there is a tie for the most common label among the neighbors, then 56 | the predicted label is arbitrary.""" 57 | 58 | def aggregate(self, neighbors_targets): 59 | """Return the most common target label.""" 60 | 61 | most_common_label = Counter(neighbors_targets).most_common(1)[0][0] 62 | return most_common_label 63 | 64 | 65 | class KNNRegressor(KNNBase): 66 | """Nearest neighbors regressor.""" 67 | 68 | def aggregate(self, neighbors_targets): 69 | """Return the mean of all targets.""" 70 | 71 | return np.mean(neighbors_targets) 72 | -------------------------------------------------------------------------------- /mla/linear_models.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import logging 4 | 5 | import autograd.numpy as np 6 | from autograd import grad 7 | 8 | from mla.base import BaseEstimator 9 | from mla.metrics.metrics import mean_squared_error, binary_crossentropy 10 | 11 | np.random.seed(1000) 12 | 13 | 14 | class BasicRegression(BaseEstimator): 15 | def __init__(self, lr=0.001, penalty="None", C=0.01, tolerance=0.0001, max_iters=1000): 16 | """Basic class for implementing continuous regression estimators which 17 | are trained with gradient descent optimization on their particular loss 18 | function. 19 | 20 | Parameters 21 | ---------- 22 | lr : float, default 0.001 23 | Learning rate. 24 | penalty : str, {'l1', 'l2', None'}, default None 25 | Regularization function name. 26 | C : float, default 0.01 27 | The regularization coefficient. 28 | tolerance : float, default 0.0001 29 | If the gradient descent updates are smaller than `tolerance`, then 30 | stop optimization process. 31 | max_iters : int, default 10000 32 | The maximum number of iterations. 33 | """ 34 | self.C = C 35 | self.penalty = penalty 36 | self.tolerance = tolerance 37 | self.lr = lr 38 | self.max_iters = max_iters 39 | self.errors = [] 40 | self.theta = [] 41 | self.n_samples, self.n_features = None, None 42 | self.cost_func = None 43 | 44 | def _loss(self, w): 45 | raise NotImplementedError() 46 | 47 | def init_cost(self): 48 | raise NotImplementedError() 49 | 50 | def _add_penalty(self, loss, w): 51 | """Apply regularization to the loss.""" 52 | if self.penalty == "l1": 53 | loss += self.C * np.abs(w[1:]).sum() 54 | elif self.penalty == "l2": 55 | loss += (0.5 * self.C) * (w[1:] ** 2).sum() 56 | return loss 57 | 58 | def _cost(self, X, y, theta): 59 | prediction = X.dot(theta) 60 | error = self.cost_func(y, prediction) 61 | return error 62 | 63 | def fit(self, X, y=None): 64 | self._setup_input(X, y) 65 | self.init_cost() 66 | self.n_samples, self.n_features = X.shape 67 | 68 | # Initialize weights + bias term 69 | self.theta = np.random.normal(size=(self.n_features + 1), scale=0.5) 70 | 71 | # Add an intercept column 72 | self.X = self._add_intercept(self.X) 73 | 74 | self._train() 75 | 76 | @staticmethod 77 | def _add_intercept(X): 78 | b = np.ones([X.shape[0], 1]) 79 | return np.concatenate([b, X], axis=1) 80 | 81 | def _train(self): 82 | self.theta, self.errors = self._gradient_descent() 83 | logging.info(" Theta: %s" % self.theta.flatten()) 84 | 85 | def _predict(self, X=None): 86 | X = self._add_intercept(X) 87 | return X.dot(self.theta) 88 | 89 | def _gradient_descent(self): 90 | theta = self.theta 91 | errors = [self._cost(self.X, self.y, theta)] 92 | # Get derivative of the loss function 93 | cost_d = grad(self._loss) 94 | for i in range(1, self.max_iters + 1): 95 | # Calculate gradient and update theta 96 | delta = cost_d(theta) 97 | theta -= self.lr * delta 98 | 99 | errors.append(self._cost(self.X, self.y, theta)) 100 | logging.info("Iteration %s, error %s" % (i, errors[i])) 101 | 102 | error_diff = np.linalg.norm(errors[i - 1] - errors[i]) 103 | if error_diff < self.tolerance: 104 | logging.info("Convergence has reached.") 105 | break 106 | return theta, errors 107 | 108 | 109 | class LinearRegression(BasicRegression): 110 | """Linear regression with gradient descent optimizer.""" 111 | 112 | def _loss(self, w): 113 | loss = self.cost_func(self.y, np.dot(self.X, w)) 114 | return self._add_penalty(loss, w) 115 | 116 | def init_cost(self): 117 | self.cost_func = mean_squared_error 118 | 119 | 120 | class LogisticRegression(BasicRegression): 121 | """Binary logistic regression with gradient descent optimizer.""" 122 | 123 | def init_cost(self): 124 | self.cost_func = binary_crossentropy 125 | 126 | def _loss(self, w): 127 | loss = self.cost_func(self.y, self.sigmoid(np.dot(self.X, w))) 128 | return self._add_penalty(loss, w) 129 | 130 | @staticmethod 131 | def sigmoid(x): 132 | return 0.5 * (np.tanh(0.5 * x) + 1) 133 | 134 | def _predict(self, X=None): 135 | X = self._add_intercept(X) 136 | return self.sigmoid(X.dot(self.theta)) 137 | -------------------------------------------------------------------------------- /mla/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from .metrics import * 3 | -------------------------------------------------------------------------------- /mla/metrics/base.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | 5 | def check_data(a, b): 6 | if not isinstance(a, np.ndarray): 7 | a = np.array(a) 8 | 9 | if not isinstance(b, np.ndarray): 10 | b = np.array(b) 11 | 12 | if type(a) != type(b): 13 | raise ValueError("Type mismatch: %s and %s" % (type(a), type(b))) 14 | 15 | if a.size != b.size: 16 | raise ValueError("Arrays must be equal in length.") 17 | return a, b 18 | 19 | 20 | def validate_input(function): 21 | def wrapper(a, b): 22 | a, b = check_data(a, b) 23 | return function(a, b) 24 | 25 | return wrapper 26 | -------------------------------------------------------------------------------- /mla/metrics/distance.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import math 3 | 4 | import numpy as np 5 | 6 | 7 | def euclidean_distance(a, b): 8 | if isinstance(a, list) and isinstance(b, list): 9 | a = np.array(a) 10 | b = np.array(b) 11 | 12 | return math.sqrt(sum((a - b) ** 2)) 13 | 14 | 15 | def l2_distance(X): 16 | sum_X = np.sum(X * X, axis=1) 17 | return (-2 * np.dot(X, X.T) + sum_X).T + sum_X 18 | -------------------------------------------------------------------------------- /mla/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import autograd.numpy as np 3 | 4 | EPS = 1e-15 5 | 6 | 7 | def unhot(function): 8 | """Convert one-hot representation into one column.""" 9 | 10 | def wrapper(actual, predicted): 11 | if len(actual.shape) > 1 and actual.shape[1] > 1: 12 | actual = actual.argmax(axis=1) 13 | if len(predicted.shape) > 1 and predicted.shape[1] > 1: 14 | predicted = predicted.argmax(axis=1) 15 | return function(actual, predicted) 16 | 17 | return wrapper 18 | 19 | 20 | def absolute_error(actual, predicted): 21 | return np.abs(actual - predicted) 22 | 23 | 24 | @unhot 25 | def classification_error(actual, predicted): 26 | return (actual != predicted).sum() / float(actual.shape[0]) 27 | 28 | 29 | @unhot 30 | def accuracy(actual, predicted): 31 | return 1.0 - classification_error(actual, predicted) 32 | 33 | 34 | def mean_absolute_error(actual, predicted): 35 | return np.mean(absolute_error(actual, predicted)) 36 | 37 | 38 | def squared_error(actual, predicted): 39 | return (actual - predicted) ** 2 40 | 41 | 42 | def squared_log_error(actual, predicted): 43 | return (np.log(np.array(actual) + 1) - np.log(np.array(predicted) + 1)) ** 2 44 | 45 | 46 | def mean_squared_log_error(actual, predicted): 47 | return np.mean(squared_log_error(actual, predicted)) 48 | 49 | 50 | def mean_squared_error(actual, predicted): 51 | return np.mean(squared_error(actual, predicted)) 52 | 53 | 54 | def root_mean_squared_error(actual, predicted): 55 | return np.sqrt(mean_squared_error(actual, predicted)) 56 | 57 | 58 | def root_mean_squared_log_error(actual, predicted): 59 | return np.sqrt(mean_squared_log_error(actual, predicted)) 60 | 61 | 62 | def logloss(actual, predicted): 63 | predicted = np.clip(predicted, EPS, 1 - EPS) 64 | loss = -np.sum(actual * np.log(predicted)) 65 | return loss / float(actual.shape[0]) 66 | 67 | 68 | def hinge(actual, predicted): 69 | return np.mean(np.max(1.0 - actual * predicted, 0.0)) 70 | 71 | 72 | def binary_crossentropy(actual, predicted): 73 | predicted = np.clip(predicted, EPS, 1 - EPS) 74 | return np.mean(-np.sum(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))) 75 | 76 | 77 | # aliases 78 | mse = mean_squared_error 79 | rmse = root_mean_squared_error 80 | mae = mean_absolute_error 81 | 82 | 83 | def get_metric(name): 84 | """Return metric function by name""" 85 | try: 86 | return globals()[name] 87 | except Exception: 88 | raise ValueError("Invalid metric function.") 89 | -------------------------------------------------------------------------------- /mla/metrics/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | -------------------------------------------------------------------------------- /mla/metrics/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import pytest 5 | from numpy.testing import assert_almost_equal 6 | 7 | from mla.metrics.base import check_data, validate_input 8 | from mla.metrics.metrics import get_metric 9 | 10 | 11 | def test_data_validation(): 12 | with pytest.raises(ValueError): 13 | check_data([], 1) 14 | 15 | with pytest.raises(ValueError): 16 | check_data([1, 2, 3], [3, 2]) 17 | 18 | a, b = check_data([1, 2, 3], [3, 2, 1]) 19 | 20 | assert np.all(a == np.array([1, 2, 3])) 21 | assert np.all(b == np.array([3, 2, 1])) 22 | 23 | 24 | def metric(name): 25 | return validate_input(get_metric(name)) 26 | 27 | 28 | def test_classification_error(): 29 | f = metric("classification_error") 30 | assert f([1, 2, 3, 4], [1, 2, 3, 4]) == 0 31 | assert f([1, 2, 3, 4], [1, 2, 3, 5]) == 0.25 32 | assert f([1, 1, 1, 0, 0, 0], [1, 1, 1, 1, 0, 0]) == (1.0 / 6) 33 | 34 | 35 | def test_absolute_error(): 36 | f = metric("absolute_error") 37 | assert f([3], [5]) == [2] 38 | assert f([-1], [-4]) == [3] 39 | 40 | 41 | def test_mean_absolute_error(): 42 | f = metric("mean_absolute_error") 43 | assert f([1, 2, 3], [1, 2, 3]) == 0 44 | assert f([1, 2, 3], [3, 2, 1]) == 4 / 3 45 | 46 | 47 | def test_squared_error(): 48 | f = metric("squared_error") 49 | assert f([1], [1]) == [0] 50 | assert f([3], [1]) == [4] 51 | 52 | 53 | def test_squared_log_error(): 54 | f = metric("squared_log_error") 55 | assert f([1], [1]) == [0] 56 | assert f([3], [1]) == [np.log(2) ** 2] 57 | assert f([np.exp(2) - 1], [np.exp(1) - 1]) == [1.0] 58 | 59 | 60 | def test_mean_squared_log_error(): 61 | f = metric("mean_squared_log_error") 62 | assert f([1, 2, 3], [1, 2, 3]) == 0 63 | assert f([1, 2, 3, np.exp(1) - 1], [1, 2, 3, np.exp(2) - 1]) == 0.25 64 | 65 | 66 | def test_root_mean_squared_log_error(): 67 | f = metric("root_mean_squared_log_error") 68 | assert f([1, 2, 3], [1, 2, 3]) == 0 69 | assert f([1, 2, 3, np.exp(1) - 1], [1, 2, 3, np.exp(2) - 1]) == 0.5 70 | 71 | 72 | def test_mean_squared_error(): 73 | f = metric("mean_squared_error") 74 | assert f([1, 2, 3], [1, 2, 3]) == 0 75 | assert f(range(1, 5), [1, 2, 3, 6]) == 1 76 | 77 | 78 | def test_root_mean_squared_error(): 79 | f = metric("root_mean_squared_error") 80 | assert f([1, 2, 3], [1, 2, 3]) == 0 81 | assert f(range(1, 5), [1, 2, 3, 5]) == 0.5 82 | 83 | 84 | def test_multiclass_logloss(): 85 | f = metric("logloss") 86 | assert_almost_equal(f([1], [1]), 0) 87 | assert_almost_equal(f([1, 1], [1, 1]), 0) 88 | assert_almost_equal(f([1], [0.5]), -np.log(0.5)) 89 | -------------------------------------------------------------------------------- /mla/naive_bayes.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | from mla.base import BaseEstimator 6 | from mla.neuralnet.activations import softmax 7 | 8 | 9 | class NaiveBayesClassifier(BaseEstimator): 10 | """Gaussian Naive Bayes.""" 11 | 12 | # Binary problem. 13 | n_classes = 2 14 | 15 | def fit(self, X, y=None): 16 | self._setup_input(X, y) 17 | # Check target labels 18 | assert list(np.unique(y)) == [0, 1] 19 | 20 | # Mean and variance for each class and feature combination 21 | self._mean = np.zeros((self.n_classes, self.n_features), dtype=np.float64) 22 | self._var = np.zeros((self.n_classes, self.n_features), dtype=np.float64) 23 | 24 | self._priors = np.zeros(self.n_classes, dtype=np.float64) 25 | 26 | for c in range(self.n_classes): 27 | # Filter features by class 28 | X_c = X[y == c] 29 | 30 | # Calculate mean, variance, prior for each class 31 | self._mean[c, :] = X_c.mean(axis=0) 32 | self._var[c, :] = X_c.var(axis=0) 33 | self._priors[c] = X_c.shape[0] / float(X.shape[0]) 34 | 35 | def _predict(self, X=None): 36 | # Apply _predict_proba for each row 37 | predictions = np.apply_along_axis(self._predict_row, 1, X) 38 | 39 | # Normalize probabilities so that each row will sum up to 1.0 40 | return softmax(predictions) 41 | 42 | def _predict_row(self, x): 43 | """Predict log likelihood for given row.""" 44 | output = [] 45 | for y in range(self.n_classes): 46 | prior = np.log(self._priors[y]) 47 | posterior = np.log(self._pdf(y, x)).sum() 48 | prediction = prior + posterior 49 | 50 | output.append(prediction) 51 | return output 52 | 53 | def _pdf(self, n_class, x): 54 | """Calculate Gaussian PDF for each feature.""" 55 | 56 | mean = self._mean[n_class] 57 | var = self._var[n_class] 58 | 59 | numerator = np.exp(-(x - mean) ** 2 / (2 * var)) 60 | denominator = np.sqrt(2 * np.pi * var) 61 | return numerator / denominator 62 | -------------------------------------------------------------------------------- /mla/neuralnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .nnet import NeuralNet 2 | -------------------------------------------------------------------------------- /mla/neuralnet/activations.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | """ 4 | References: 5 | https://en.wikipedia.org/wiki/Activation_function 6 | """ 7 | 8 | 9 | def sigmoid(z): 10 | return 1.0 / (1.0 + np.exp(-z)) 11 | 12 | 13 | def softmax(z): 14 | # Avoid numerical overflow by removing max 15 | e = np.exp(z - np.amax(z, axis=1, keepdims=True)) 16 | return e / np.sum(e, axis=1, keepdims=True) 17 | 18 | 19 | def linear(z): 20 | return z 21 | 22 | 23 | def softplus(z): 24 | """Smooth relu.""" 25 | # Avoid numerical overflow, see: 26 | # https://docs.scipy.org/doc/numpy/reference/generated/numpy.logaddexp.html 27 | return np.logaddexp(0.0, z) 28 | 29 | 30 | def softsign(z): 31 | return z / (1 + np.abs(z)) 32 | 33 | 34 | def tanh(z): 35 | return np.tanh(z) 36 | 37 | 38 | def relu(z): 39 | return np.maximum(0, z) 40 | 41 | 42 | def leakyrelu(z, a=0.01): 43 | return np.maximum(z * a, z) 44 | 45 | 46 | def get_activation(name): 47 | """Return activation function by name""" 48 | try: 49 | return globals()[name] 50 | except Exception: 51 | raise ValueError("Invalid activation function.") 52 | -------------------------------------------------------------------------------- /mla/neuralnet/constraints.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | EPSILON = 10e-8 5 | 6 | 7 | class Constraint(object): 8 | def clip(self, p): 9 | return p 10 | 11 | 12 | class MaxNorm(object): 13 | def __init__(self, m=2, axis=0): 14 | self.axis = axis 15 | self.m = m 16 | 17 | def clip(self, p): 18 | norms = np.sqrt(np.sum(p ** 2, axis=self.axis)) 19 | desired = np.clip(norms, 0, self.m) 20 | p = p * (desired / (EPSILON + norms)) 21 | return p 22 | 23 | 24 | class NonNeg(object): 25 | def clip(self, p): 26 | p[p < 0.0] = 0.0 27 | return p 28 | 29 | 30 | class SmallNorm(object): 31 | def clip(self, p): 32 | return np.clip(p, -5, 5) 33 | 34 | 35 | class UnitNorm(Constraint): 36 | def __init__(self, axis=0): 37 | self.axis = axis 38 | 39 | def clip(self, p): 40 | return p / (EPSILON + np.sqrt(np.sum(p ** 2, axis=self.axis))) 41 | -------------------------------------------------------------------------------- /mla/neuralnet/initializations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | References: 5 | http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf 6 | 7 | """ 8 | 9 | 10 | def normal(shape, scale=0.5): 11 | return np.random.normal(size=shape, scale=scale) 12 | 13 | 14 | def uniform(shape, scale=0.5): 15 | return np.random.uniform(size=shape, low=-scale, high=scale) 16 | 17 | 18 | def zero(shape, **kwargs): 19 | return np.zeros(shape) 20 | 21 | 22 | def one(shape, **kwargs): 23 | return np.ones(shape) 24 | 25 | 26 | def orthogonal(shape, scale=0.5): 27 | flat_shape = (shape[0], np.prod(shape[1:])) 28 | array = np.random.normal(size=flat_shape) 29 | u, _, v = np.linalg.svd(array, full_matrices=False) 30 | array = u if u.shape == flat_shape else v 31 | return np.reshape(array * scale, shape) 32 | 33 | 34 | def _glorot_fan(shape): 35 | assert len(shape) >= 2 36 | 37 | if len(shape) == 4: 38 | receptive_field_size = np.prod(shape[2:]) 39 | fan_in = shape[1] * receptive_field_size 40 | fan_out = shape[0] * receptive_field_size 41 | else: 42 | fan_in, fan_out = shape[:2] 43 | return float(fan_in), float(fan_out) 44 | 45 | 46 | def glorot_normal(shape, **kwargs): 47 | fan_in, fan_out = _glorot_fan(shape) 48 | s = np.sqrt(2.0 / (fan_in + fan_out)) 49 | return normal(shape, s) 50 | 51 | 52 | def glorot_uniform(shape, **kwargs): 53 | fan_in, fan_out = _glorot_fan(shape) 54 | s = np.sqrt(6.0 / (fan_in + fan_out)) 55 | return uniform(shape, s) 56 | 57 | 58 | def he_normal(shape, **kwargs): 59 | fan_in, fan_out = _glorot_fan(shape) 60 | s = np.sqrt(2.0 / fan_in) 61 | return normal(shape, s) 62 | 63 | 64 | def he_uniform(shape, **kwargs): 65 | fan_in, fan_out = _glorot_fan(shape) 66 | s = np.sqrt(6.0 / fan_in) 67 | return uniform(shape, s) 68 | 69 | 70 | def get_initializer(name): 71 | """Returns initialization function by the name.""" 72 | try: 73 | return globals()[name] 74 | except Exception: 75 | raise ValueError("Invalid initialization function.") 76 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from .basic import * 3 | from .convnet import * 4 | from .normalization import * 5 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/basic.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import autograd.numpy as np 3 | from autograd import elementwise_grad 4 | 5 | from mla.neuralnet.activations import get_activation 6 | from mla.neuralnet.parameters import Parameters 7 | 8 | np.random.seed(9999) 9 | 10 | 11 | class Layer(object): 12 | def setup(self, X_shape): 13 | """Allocates initial weights.""" 14 | pass 15 | 16 | def forward_pass(self, x): 17 | raise NotImplementedError() 18 | 19 | def backward_pass(self, delta): 20 | raise NotImplementedError() 21 | 22 | def shape(self, x_shape): 23 | """Returns shape of the current layer.""" 24 | raise NotImplementedError() 25 | 26 | 27 | class ParamMixin(object): 28 | @property 29 | def parameters(self): 30 | return self._params 31 | 32 | 33 | class PhaseMixin(object): 34 | _train = False 35 | 36 | @property 37 | def is_training(self): 38 | return self._train 39 | 40 | @is_training.setter 41 | def is_training(self, is_train=True): 42 | self._train = is_train 43 | 44 | @property 45 | def is_testing(self): 46 | return not self._train 47 | 48 | @is_testing.setter 49 | def is_testing(self, is_test=True): 50 | self._train = not is_test 51 | 52 | 53 | class Dense(Layer, ParamMixin): 54 | def __init__(self, output_dim, parameters=None): 55 | """A fully connected layer. 56 | 57 | Parameters 58 | ---------- 59 | output_dim : int 60 | """ 61 | self._params = parameters 62 | self.output_dim = output_dim 63 | self.last_input = None 64 | 65 | if parameters is None: 66 | self._params = Parameters() 67 | 68 | def setup(self, x_shape): 69 | self._params.setup_weights((x_shape[1], self.output_dim)) 70 | 71 | def forward_pass(self, X): 72 | self.last_input = X 73 | return self.weight(X) 74 | 75 | def weight(self, X): 76 | W = np.dot(X, self._params["W"]) 77 | return W + self._params["b"] 78 | 79 | def backward_pass(self, delta): 80 | dW = np.dot(self.last_input.T, delta) 81 | db = np.sum(delta, axis=0) 82 | 83 | # Update gradient values 84 | self._params.update_grad("W", dW) 85 | self._params.update_grad("b", db) 86 | return np.dot(delta, self._params["W"].T) 87 | 88 | def shape(self, x_shape): 89 | return x_shape[0], self.output_dim 90 | 91 | 92 | class Activation(Layer): 93 | def __init__(self, name): 94 | self.last_input = None 95 | self.activation = get_activation(name) 96 | # Derivative of activation function 97 | self.activation_d = elementwise_grad(self.activation) 98 | 99 | def forward_pass(self, X): 100 | self.last_input = X 101 | return self.activation(X) 102 | 103 | def backward_pass(self, delta): 104 | return self.activation_d(self.last_input) * delta 105 | 106 | def shape(self, x_shape): 107 | return x_shape 108 | 109 | 110 | class Dropout(Layer, PhaseMixin): 111 | """Randomly set a fraction of `p` inputs to 0 at each training update.""" 112 | 113 | def __init__(self, p=0.1): 114 | self.p = p 115 | self._mask = None 116 | 117 | def forward_pass(self, X): 118 | assert self.p > 0 119 | if self.is_training: 120 | self._mask = np.random.uniform(size=X.shape) > self.p 121 | y = X * self._mask 122 | else: 123 | y = X * (1.0 - self.p) 124 | 125 | return y 126 | 127 | def backward_pass(self, delta): 128 | return delta * self._mask 129 | 130 | def shape(self, x_shape): 131 | return x_shape 132 | 133 | 134 | class TimeStepSlicer(Layer): 135 | """Take a specific time step from 3D tensor.""" 136 | 137 | def __init__(self, step=-1): 138 | self.step = step 139 | 140 | def forward_pass(self, x): 141 | return x[:, self.step, :] 142 | 143 | def backward_pass(self, delta): 144 | return np.repeat(delta[:, np.newaxis, :], 2, 1) 145 | 146 | def shape(self, x_shape): 147 | return x_shape[0], x_shape[2] 148 | 149 | 150 | class TimeDistributedDense(Layer): 151 | """Apply regular Dense layer to every timestep.""" 152 | 153 | def __init__(self, output_dim): 154 | self.output_dim = output_dim 155 | self.n_timesteps = None 156 | self.dense = None 157 | self.input_dim = None 158 | 159 | def setup(self, X_shape): 160 | self.dense = Dense(self.output_dim) 161 | self.dense.setup((X_shape[0], X_shape[2])) 162 | self.input_dim = X_shape[2] 163 | 164 | def forward_pass(self, X): 165 | n_timesteps = X.shape[1] 166 | X = X.reshape(-1, X.shape[-1]) 167 | y = self.dense.forward_pass(X) 168 | y = y.reshape((-1, n_timesteps, self.output_dim)) 169 | return y 170 | 171 | def backward_pass(self, delta): 172 | n_timesteps = delta.shape[1] 173 | X = delta.reshape(-1, delta.shape[-1]) 174 | y = self.dense.backward_pass(X) 175 | y = y.reshape((-1, n_timesteps, self.input_dim)) 176 | return y 177 | 178 | @property 179 | def parameters(self): 180 | return self.dense._params 181 | 182 | def shape(self, x_shape): 183 | return x_shape[0], x_shape[1], self.output_dim 184 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/convnet.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import autograd.numpy as np 3 | 4 | from mla.neuralnet.layers import Layer, ParamMixin 5 | from mla.neuralnet.parameters import Parameters 6 | 7 | 8 | class Convolution(Layer, ParamMixin): 9 | def __init__(self, n_filters=8, filter_shape=(3, 3), padding=(0, 0), stride=(1, 1), parameters=None): 10 | """A 2D convolutional layer. 11 | Input shape: (n_images, n_channels, height, width) 12 | 13 | Parameters 14 | ---------- 15 | n_filters : int, default 8 16 | The number of filters (kernels). 17 | filter_shape : tuple(int, int), default (3, 3) 18 | The shape of the filters. (height, width) 19 | parameters : Parameters instance, default None 20 | stride : tuple(int, int), default (1, 1) 21 | The step of the convolution. (height, width). 22 | padding : tuple(int, int), default (0, 0) 23 | The number of pixel to add to each side of the input. (height, weight) 24 | 25 | """ 26 | self.padding = padding 27 | self._params = parameters 28 | self.stride = stride 29 | self.filter_shape = filter_shape 30 | self.n_filters = n_filters 31 | if self._params is None: 32 | self._params = Parameters() 33 | 34 | def setup(self, X_shape): 35 | n_channels, self.height, self.width = X_shape[1:] 36 | 37 | W_shape = (self.n_filters, n_channels) + self.filter_shape 38 | b_shape = self.n_filters 39 | self._params.setup_weights(W_shape, b_shape) 40 | 41 | def forward_pass(self, X): 42 | n_images, n_channels, height, width = self.shape(X.shape) 43 | self.last_input = X 44 | self.col = image_to_column(X, self.filter_shape, self.stride, self.padding) 45 | self.col_W = self._params["W"].reshape(self.n_filters, -1).T 46 | 47 | out = np.dot(self.col, self.col_W) + self._params["b"] 48 | out = out.reshape(n_images, height, width, -1).transpose(0, 3, 1, 2) 49 | return out 50 | 51 | def backward_pass(self, delta): 52 | delta = delta.transpose(0, 2, 3, 1).reshape(-1, self.n_filters) 53 | 54 | d_W = np.dot(self.col.T, delta).transpose(1, 0).reshape(self._params["W"].shape) 55 | d_b = np.sum(delta, axis=0) 56 | self._params.update_grad("b", d_b) 57 | self._params.update_grad("W", d_W) 58 | 59 | d_c = np.dot(delta, self.col_W.T) 60 | return column_to_image(d_c, self.last_input.shape, self.filter_shape, self.stride, self.padding) 61 | 62 | def shape(self, x_shape): 63 | height, width = convoltuion_shape(self.height, self.width, self.filter_shape, self.stride, self.padding) 64 | return x_shape[0], self.n_filters, height, width 65 | 66 | 67 | class MaxPooling(Layer): 68 | def __init__(self, pool_shape=(2, 2), stride=(1, 1), padding=(0, 0)): 69 | """Max pooling layer. 70 | Input shape: (n_images, n_channels, height, width) 71 | 72 | Parameters 73 | ---------- 74 | pool_shape : tuple(int, int), default (2, 2) 75 | stride : tuple(int, int), default (1,1) 76 | padding : tuple(int, int), default (0,0) 77 | """ 78 | self.pool_shape = pool_shape 79 | self.stride = stride 80 | self.padding = padding 81 | 82 | def forward_pass(self, X): 83 | self.last_input = X 84 | 85 | out_height, out_width = pooling_shape(self.pool_shape, X.shape, self.stride) 86 | n_images, n_channels, _, _ = X.shape 87 | 88 | col = image_to_column(X, self.pool_shape, self.stride, self.padding) 89 | col = col.reshape(-1, self.pool_shape[0] * self.pool_shape[1]) 90 | 91 | arg_max = np.argmax(col, axis=1) 92 | out = np.max(col, axis=1) 93 | self.arg_max = arg_max 94 | return out.reshape(n_images, out_height, out_width, n_channels).transpose(0, 3, 1, 2) 95 | 96 | def backward_pass(self, delta): 97 | delta = delta.transpose(0, 2, 3, 1) 98 | 99 | pool_size = self.pool_shape[0] * self.pool_shape[1] 100 | y_max = np.zeros((delta.size, pool_size)) 101 | y_max[np.arange(self.arg_max.size), self.arg_max.flatten()] = delta.flatten() 102 | y_max = y_max.reshape(delta.shape + (pool_size,)) 103 | 104 | dcol = y_max.reshape(y_max.shape[0] * y_max.shape[1] * y_max.shape[2], -1) 105 | return column_to_image(dcol, self.last_input.shape, self.pool_shape, self.stride, self.padding) 106 | 107 | def shape(self, x_shape): 108 | h, w = convoltuion_shape(x_shape[2], x_shape[3], self.pool_shape, self.stride, self.padding) 109 | return x_shape[0], x_shape[1], h, w 110 | 111 | 112 | class Flatten(Layer): 113 | """Flattens multidimensional input into 2D matrix.""" 114 | 115 | def forward_pass(self, X): 116 | self.last_input_shape = X.shape 117 | return X.reshape((X.shape[0], -1)) 118 | 119 | def backward_pass(self, delta): 120 | return delta.reshape(self.last_input_shape) 121 | 122 | def shape(self, x_shape): 123 | return x_shape[0], np.prod(x_shape[1:]) 124 | 125 | 126 | def image_to_column(images, filter_shape, stride, padding): 127 | """Rearrange image blocks into columns. 128 | 129 | Parameters 130 | ---------- 131 | 132 | filter_shape : tuple(height, width) 133 | images : np.array, shape (n_images, n_channels, height, width) 134 | padding: tuple(height, width) 135 | stride : tuple (height, width) 136 | 137 | """ 138 | n_images, n_channels, height, width = images.shape 139 | f_height, f_width = filter_shape 140 | out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding) 141 | images = np.pad(images, ((0, 0), (0, 0), padding, padding), mode="constant") 142 | 143 | col = np.zeros((n_images, n_channels, f_height, f_width, out_height, out_width)) 144 | for y in range(f_height): 145 | y_bound = y + stride[0] * out_height 146 | for x in range(f_width): 147 | x_bound = x + stride[1] * out_width 148 | col[:, :, y, x, :, :] = images[:, :, y: y_bound: stride[0], x: x_bound: stride[1]] 149 | 150 | col = col.transpose(0, 4, 5, 1, 2, 3).reshape(n_images * out_height * out_width, -1) 151 | return col 152 | 153 | 154 | def column_to_image(columns, images_shape, filter_shape, stride, padding): 155 | """Rearrange columns into image blocks. 156 | 157 | Parameters 158 | ---------- 159 | columns 160 | images_shape : tuple(n_images, n_channels, height, width) 161 | filter_shape : tuple(height, _width) 162 | stride : tuple(height, width) 163 | padding : tuple(height, width) 164 | """ 165 | n_images, n_channels, height, width = images_shape 166 | f_height, f_width = filter_shape 167 | 168 | out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding) 169 | columns = columns.reshape(n_images, out_height, out_width, n_channels, f_height, f_width).transpose( 170 | 0, 3, 4, 5, 1, 2 171 | ) 172 | 173 | img_h = height + 2 * padding[0] + stride[0] - 1 174 | img_w = width + 2 * padding[1] + stride[1] - 1 175 | img = np.zeros((n_images, n_channels, img_h, img_w)) 176 | for y in range(f_height): 177 | y_bound = y + stride[0] * out_height 178 | for x in range(f_width): 179 | x_bound = x + stride[1] * out_width 180 | img[:, :, y: y_bound: stride[0], x: x_bound: stride[1]] += columns[:, :, y, x, :, :] 181 | 182 | return img[:, :, padding[0]: height + padding[0], padding[1]: width + padding[1]] 183 | 184 | 185 | def convoltuion_shape(img_height, img_width, filter_shape, stride, padding): 186 | """Calculate output shape for convolution layer.""" 187 | height = (img_height + 2 * padding[0] - filter_shape[0]) / float(stride[0]) + 1 188 | width = (img_width + 2 * padding[1] - filter_shape[1]) / float(stride[1]) + 1 189 | 190 | assert height % 1 == 0 191 | assert width % 1 == 0 192 | 193 | return int(height), int(width) 194 | 195 | 196 | def pooling_shape(pool_shape, image_shape, stride): 197 | """Calculate output shape for pooling layer.""" 198 | n_images, n_channels, height, width = image_shape 199 | 200 | height = (height - pool_shape[0]) / float(stride[0]) + 1 201 | width = (width - pool_shape[1]) / float(stride[1]) + 1 202 | 203 | assert height % 1 == 0 204 | assert width % 1 == 0 205 | 206 | return int(height), int(width) 207 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/normalization.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | from mla.neuralnet.layers import Layer, PhaseMixin, ParamMixin 5 | from mla.neuralnet.parameters import Parameters 6 | 7 | """ 8 | References: 9 | https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html 10 | """ 11 | 12 | 13 | class BatchNormalization(Layer, ParamMixin, PhaseMixin): 14 | def __init__(self, momentum=0.9, eps=1e-5, parameters=None): 15 | super().__init__() 16 | self._params = parameters 17 | if self._params is None: 18 | self._params = Parameters() 19 | self.momentum = momentum 20 | self.eps = eps 21 | self.ema_mean = None 22 | self.ema_var = None 23 | 24 | def setup(self, x_shape): 25 | self._params.setup_weights((1, x_shape[1])) 26 | 27 | def _forward_pass(self, X): 28 | gamma = self._params["W"] 29 | beta = self._params["b"] 30 | 31 | if self.is_testing: 32 | mu = self.ema_mean 33 | xmu = X - mu 34 | var = self.ema_var 35 | sqrtvar = np.sqrt(var + self.eps) 36 | ivar = 1.0 / sqrtvar 37 | xhat = xmu * ivar 38 | gammax = gamma * xhat 39 | return gammax + beta 40 | 41 | N, D = X.shape 42 | 43 | # step1: calculate mean 44 | mu = 1.0 / N * np.sum(X, axis=0) 45 | 46 | # step2: subtract mean vector of every trainings example 47 | xmu = X - mu 48 | 49 | # step3: following the lower branch - calculation denominator 50 | sq = xmu ** 2 51 | 52 | # step4: calculate variance 53 | var = 1.0 / N * np.sum(sq, axis=0) 54 | 55 | # step5: add eps for numerical stability, then sqrt 56 | sqrtvar = np.sqrt(var + self.eps) 57 | 58 | # step6: invert sqrtwar 59 | ivar = 1.0 / sqrtvar 60 | 61 | # step7: execute normalization 62 | xhat = xmu * ivar 63 | 64 | # step8: Nor the two transformation steps 65 | gammax = gamma * xhat 66 | 67 | # step9 68 | out = gammax + beta 69 | 70 | # store running averages of mean and variance during training for use during testing 71 | if self.ema_mean is None or self.ema_var is None: 72 | self.ema_mean = mu 73 | self.ema_var = var 74 | else: 75 | self.ema_mean = self.momentum * self.ema_mean + (1 - self.momentum) * mu 76 | self.ema_var = self.momentum * self.ema_var + (1 - self.momentum) * var 77 | # store intermediate 78 | self.cache = (xhat, gamma, xmu, ivar, sqrtvar, var) 79 | 80 | return out 81 | 82 | def forward_pass(self, X): 83 | if len(X.shape) == 2: 84 | # input is a regular layer 85 | return self._forward_pass(X) 86 | elif len(X.shape) == 4: 87 | # input is a convolution layer 88 | N, C, H, W = X.shape 89 | x_flat = X.transpose(0, 2, 3, 1).reshape(-1, C) 90 | out_flat = self._forward_pass(x_flat) 91 | return out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 92 | else: 93 | raise NotImplementedError("Unknown model with dimensions = {}".format(len(X.shape))) 94 | 95 | def _backward_pass(self, delta): 96 | # unfold the variables stored in cache 97 | xhat, gamma, xmu, ivar, sqrtvar, var = self.cache 98 | 99 | # get the dimensions of the input/output 100 | N, D = delta.shape 101 | 102 | # step9 103 | dbeta = np.sum(delta, axis=0) 104 | dgammax = delta # not necessary, but more understandable 105 | 106 | # step8 107 | dgamma = np.sum(dgammax * xhat, axis=0) 108 | dxhat = dgammax * gamma 109 | 110 | # step7 111 | divar = np.sum(dxhat * xmu, axis=0) 112 | dxmu1 = dxhat * ivar 113 | 114 | # step6 115 | dsqrtvar = -1.0 / (sqrtvar ** 2) * divar 116 | 117 | # step5 118 | dvar = 0.5 * 1.0 / np.sqrt(var + self.eps) * dsqrtvar 119 | 120 | # step4 121 | dsq = 1.0 / N * np.ones((N, D)) * dvar 122 | 123 | # step3 124 | dxmu2 = 2 * xmu * dsq 125 | 126 | # step2 127 | dx1 = dxmu1 + dxmu2 128 | dmu = -1 * np.sum(dxmu1 + dxmu2, axis=0) 129 | 130 | # step1 131 | dx2 = 1.0 / N * np.ones((N, D)) * dmu 132 | 133 | # step0 134 | dx = dx1 + dx2 135 | 136 | # Update gradient values 137 | self._params.update_grad("W", dgamma) 138 | self._params.update_grad("b", dbeta) 139 | 140 | return dx 141 | 142 | def backward_pass(self, X): 143 | if len(X.shape) == 2: 144 | # input is a regular layer 145 | return self._backward_pass(X) 146 | elif len(X.shape) == 4: 147 | # input is a convolution layer 148 | N, C, H, W = X.shape 149 | x_flat = X.transpose(0, 2, 3, 1).reshape(-1, C) 150 | out_flat = self._backward_pass(x_flat) 151 | return out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 152 | else: 153 | raise NotImplementedError("Unknown model shape: {}".format(X.shape)) 154 | 155 | def shape(self, x_shape): 156 | return x_shape 157 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/recurrent/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from .lstm import * 3 | from .rnn import * 4 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/recurrent/lstm.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import autograd.numpy as np 3 | from autograd import elementwise_grad 4 | 5 | from mla.neuralnet.activations import sigmoid 6 | from mla.neuralnet.initializations import get_initializer 7 | from mla.neuralnet.layers import Layer, get_activation, ParamMixin 8 | from mla.neuralnet.parameters import Parameters 9 | 10 | """ 11 | References: 12 | Understanding LSTM Networks http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 13 | A Critical Review of Recurrent Neural Networks for Sequence Learning http://arxiv.org/pdf/1506.00019v4.pdf 14 | """ 15 | 16 | 17 | class LSTM(Layer, ParamMixin): 18 | def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True): 19 | self.return_sequences = return_sequences 20 | self.hidden_dim = hidden_dim 21 | self.inner_init = get_initializer(inner_init) 22 | self.activation = get_activation(activation) 23 | self.activation_d = elementwise_grad(self.activation) 24 | self.sigmoid_d = elementwise_grad(sigmoid) 25 | 26 | if parameters is None: 27 | self._params = Parameters() 28 | else: 29 | self._params = parameters 30 | 31 | self.last_input = None 32 | self.states = None 33 | self.outputs = None 34 | self.gates = None 35 | self.hprev = None 36 | self.input_dim = None 37 | self.W = None 38 | self.U = None 39 | 40 | def setup(self, x_shape): 41 | """ 42 | Naming convention: 43 | i : input gate 44 | f : forget gate 45 | c : cell 46 | o : output gate 47 | 48 | Parameters 49 | ---------- 50 | x_shape : np.array(batch size, time steps, input shape) 51 | """ 52 | self.input_dim = x_shape[2] 53 | # Input -> Hidden 54 | W_params = ["W_i", "W_f", "W_o", "W_c"] 55 | # Hidden -> Hidden 56 | U_params = ["U_i", "U_f", "U_o", "U_c"] 57 | # Bias terms 58 | b_params = ["b_i", "b_f", "b_o", "b_c"] 59 | 60 | # Initialize params 61 | for param in W_params: 62 | self._params[param] = self._params.init((self.input_dim, self.hidden_dim)) 63 | 64 | for param in U_params: 65 | self._params[param] = self.inner_init((self.hidden_dim, self.hidden_dim)) 66 | 67 | for param in b_params: 68 | self._params[param] = np.full((self.hidden_dim,), self._params.initial_bias) 69 | 70 | # Combine weights for simplicity 71 | self.W = [self._params[param] for param in W_params] 72 | self.U = [self._params[param] for param in U_params] 73 | 74 | # Init gradient arrays for all weights 75 | self._params.init_grad() 76 | 77 | self.hprev = np.zeros((x_shape[0], self.hidden_dim)) 78 | self.oprev = np.zeros((x_shape[0], self.hidden_dim)) 79 | 80 | def forward_pass(self, X): 81 | n_samples, n_timesteps, input_shape = X.shape 82 | p = self._params 83 | self.last_input = X 84 | 85 | self.states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) 86 | self.outputs = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) 87 | self.gates = {k: np.zeros((n_samples, n_timesteps, self.hidden_dim)) for k in ["i", "f", "o", "c"]} 88 | 89 | self.states[:, -1, :] = self.hprev 90 | self.outputs[:, -1, :] = self.oprev 91 | 92 | for i in range(n_timesteps): 93 | t_gates = np.dot(X[:, i, :], self.W) + np.dot(self.outputs[:, i - 1, :], self.U) 94 | 95 | # Input 96 | self.gates["i"][:, i, :] = sigmoid(t_gates[:, 0, :] + p["b_i"]) 97 | # Forget 98 | self.gates["f"][:, i, :] = sigmoid(t_gates[:, 1, :] + p["b_f"]) 99 | # Output 100 | self.gates["o"][:, i, :] = sigmoid(t_gates[:, 2, :] + p["b_o"]) 101 | # Cell 102 | self.gates["c"][:, i, :] = self.activation(t_gates[:, 3, :] + p["b_c"]) 103 | 104 | # (previous state * forget) + input + cell 105 | self.states[:, i, :] = ( 106 | self.states[:, i - 1, :] * self.gates["f"][:, i, :] 107 | + self.gates["i"][:, i, :] * self.gates["c"][:, i, :] 108 | ) 109 | self.outputs[:, i, :] = self.gates["o"][:, i, :] * self.activation(self.states[:, i, :]) 110 | 111 | self.hprev = self.states[:, n_timesteps - 1, :].copy() 112 | self.oprev = self.outputs[:, n_timesteps - 1, :].copy() 113 | 114 | if self.return_sequences: 115 | return self.outputs[:, 0:-1, :] 116 | else: 117 | return self.outputs[:, -2, :] 118 | 119 | def backward_pass(self, delta): 120 | if len(delta.shape) == 2: 121 | delta = delta[:, np.newaxis, :] 122 | 123 | n_samples, n_timesteps, input_shape = delta.shape 124 | 125 | # Temporal gradient arrays 126 | grad = {k: np.zeros_like(self._params[k]) for k in self._params.keys()} 127 | 128 | dh_next = np.zeros((n_samples, input_shape)) 129 | output = np.zeros((n_samples, n_timesteps, self.input_dim)) 130 | 131 | # Backpropagation through time 132 | for i in reversed(range(n_timesteps)): 133 | dhi = delta[:, i, :] * self.gates["o"][:, i, :] * self.activation_d(self.states[:, i, :]) + dh_next 134 | 135 | og = delta[:, i, :] * self.activation(self.states[:, i, :]) 136 | de_o = og * self.sigmoid_d(self.gates["o"][:, i, :]) 137 | 138 | grad["W_o"] += np.dot(self.last_input[:, i, :].T, de_o) 139 | grad["U_o"] += np.dot(self.outputs[:, i - 1, :].T, de_o) 140 | grad["b_o"] += de_o.sum(axis=0) 141 | 142 | de_f = (dhi * self.states[:, i - 1, :]) * self.sigmoid_d(self.gates["f"][:, i, :]) 143 | grad["W_f"] += np.dot(self.last_input[:, i, :].T, de_f) 144 | grad["U_f"] += np.dot(self.outputs[:, i - 1, :].T, de_f) 145 | grad["b_f"] += de_f.sum(axis=0) 146 | 147 | de_i = (dhi * self.gates["c"][:, i, :]) * self.sigmoid_d(self.gates["i"][:, i, :]) 148 | grad["W_i"] += np.dot(self.last_input[:, i, :].T, de_i) 149 | grad["U_i"] += np.dot(self.outputs[:, i - 1, :].T, de_i) 150 | grad["b_i"] += de_i.sum(axis=0) 151 | 152 | de_c = (dhi * self.gates["i"][:, i, :]) * self.activation_d(self.gates["c"][:, i, :]) 153 | grad["W_c"] += np.dot(self.last_input[:, i, :].T, de_c) 154 | grad["U_c"] += np.dot(self.outputs[:, i - 1, :].T, de_c) 155 | grad["b_c"] += de_c.sum(axis=0) 156 | 157 | dh_next = dhi * self.gates["f"][:, i, :] 158 | 159 | # TODO: propagate error to the next layer 160 | 161 | # Change actual gradient arrays 162 | for k in grad.keys(): 163 | self._params.update_grad(k, grad[k]) 164 | return output 165 | 166 | def shape(self, x_shape): 167 | if self.return_sequences: 168 | return x_shape[0], x_shape[1], self.hidden_dim 169 | else: 170 | return x_shape[0], self.hidden_dim 171 | -------------------------------------------------------------------------------- /mla/neuralnet/layers/recurrent/rnn.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import autograd.numpy as np 3 | from autograd import elementwise_grad 4 | 5 | from mla.neuralnet.initializations import get_initializer 6 | from mla.neuralnet.layers import Layer, get_activation, ParamMixin 7 | from mla.neuralnet.parameters import Parameters 8 | 9 | 10 | class RNN(Layer, ParamMixin): 11 | """Vanilla RNN.""" 12 | 13 | def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True): 14 | self.return_sequences = return_sequences 15 | self.hidden_dim = hidden_dim 16 | self.inner_init = get_initializer(inner_init) 17 | self.activation = get_activation(activation) 18 | self.activation_d = elementwise_grad(self.activation) 19 | if parameters is None: 20 | self._params = Parameters() 21 | else: 22 | self._params = parameters 23 | self.last_input = None 24 | self.states = None 25 | self.hprev = None 26 | self.input_dim = None 27 | 28 | def setup(self, x_shape): 29 | """ 30 | Parameters 31 | ---------- 32 | x_shape : np.array(batch size, time steps, input shape) 33 | """ 34 | self.input_dim = x_shape[2] 35 | 36 | # Input -> Hidden 37 | self._params["W"] = self._params.init((self.input_dim, self.hidden_dim)) 38 | # Bias 39 | self._params["b"] = np.full((self.hidden_dim,), self._params.initial_bias) 40 | # Hidden -> Hidden layer 41 | self._params["U"] = self.inner_init((self.hidden_dim, self.hidden_dim)) 42 | 43 | # Init gradient arrays 44 | self._params.init_grad() 45 | 46 | self.hprev = np.zeros((x_shape[0], self.hidden_dim)) 47 | 48 | def forward_pass(self, X): 49 | self.last_input = X 50 | n_samples, n_timesteps, input_shape = X.shape 51 | states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) 52 | states[:, -1, :] = self.hprev.copy() 53 | p = self._params 54 | 55 | for i in range(n_timesteps): 56 | states[:, i, :] = np.tanh(np.dot(X[:, i, :], p["W"]) + np.dot(states[:, i - 1, :], p["U"]) + p["b"]) 57 | 58 | self.states = states 59 | self.hprev = states[:, n_timesteps - 1, :].copy() 60 | if self.return_sequences: 61 | return states[:, 0:-1, :] 62 | else: 63 | return states[:, -2, :] 64 | 65 | def backward_pass(self, delta): 66 | if len(delta.shape) == 2: 67 | delta = delta[:, np.newaxis, :] 68 | n_samples, n_timesteps, input_shape = delta.shape 69 | p = self._params 70 | 71 | # Temporal gradient arrays 72 | grad = {k: np.zeros_like(p[k]) for k in p.keys()} 73 | 74 | dh_next = np.zeros((n_samples, input_shape)) 75 | output = np.zeros((n_samples, n_timesteps, self.input_dim)) 76 | 77 | # Backpropagation through time 78 | for i in reversed(range(n_timesteps)): 79 | dhi = self.activation_d(self.states[:, i, :]) * (delta[:, i, :] + dh_next) 80 | 81 | grad["W"] += np.dot(self.last_input[:, i, :].T, dhi) 82 | grad["b"] += delta[:, i, :].sum(axis=0) 83 | grad["U"] += np.dot(self.states[:, i - 1, :].T, dhi) 84 | 85 | dh_next = np.dot(dhi, p["U"].T) 86 | 87 | d = np.dot(delta[:, i, :], p["U"].T) 88 | output[:, i, :] = np.dot(d, p["W"].T) 89 | 90 | # Change actual gradient arrays 91 | for k in grad.keys(): 92 | self._params.update_grad(k, grad[k]) 93 | return output 94 | 95 | def shape(self, x_shape): 96 | if self.return_sequences: 97 | return x_shape[0], x_shape[1], self.hidden_dim 98 | else: 99 | return x_shape[0], self.hidden_dim 100 | -------------------------------------------------------------------------------- /mla/neuralnet/loss.py: -------------------------------------------------------------------------------- 1 | from ..metrics import mse, logloss, mae, hinge, binary_crossentropy 2 | categorical_crossentropy = logloss 3 | 4 | 5 | def get_loss(name): 6 | """Returns loss function by the name.""" 7 | try: 8 | return globals()[name] 9 | except KeyError: 10 | raise ValueError("Invalid metric function.") 11 | -------------------------------------------------------------------------------- /mla/neuralnet/nnet.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | from autograd import elementwise_grad 5 | 6 | from mla.base import BaseEstimator 7 | from mla.metrics.metrics import get_metric 8 | from mla.neuralnet.layers import PhaseMixin 9 | from mla.neuralnet.loss import get_loss 10 | from mla.utils import batch_iterator 11 | 12 | np.random.seed(9999) 13 | 14 | """ 15 | Architecture inspired from: 16 | 17 | https://github.com/fchollet/keras 18 | https://github.com/andersbll/deeppy 19 | """ 20 | 21 | 22 | class NeuralNet(BaseEstimator): 23 | fit_required = False 24 | 25 | def __init__( 26 | self, layers, optimizer, loss, max_epochs=10, batch_size=64, metric="mse", shuffle=False, verbose=True 27 | ): 28 | self.verbose = verbose 29 | self.shuffle = shuffle 30 | self.optimizer = optimizer 31 | 32 | self.loss = get_loss(loss) 33 | 34 | # TODO: fix 35 | if loss == "categorical_crossentropy": 36 | self.loss_grad = lambda actual, predicted: -(actual - predicted) 37 | else: 38 | self.loss_grad = elementwise_grad(self.loss, 1) 39 | self.metric = get_metric(metric) 40 | self.layers = layers 41 | self.batch_size = batch_size 42 | self.max_epochs = max_epochs 43 | self._n_layers = 0 44 | self.log_metric = True if loss != metric else False 45 | self.metric_name = metric 46 | self.bprop_entry = self._find_bprop_entry() 47 | self.training = False 48 | self._initialized = False 49 | 50 | def _setup_layers(self, x_shape): 51 | """Initialize model's layers.""" 52 | x_shape = list(x_shape) 53 | x_shape[0] = self.batch_size 54 | 55 | for layer in self.layers: 56 | layer.setup(x_shape) 57 | x_shape = layer.shape(x_shape) 58 | 59 | self._n_layers = len(self.layers) 60 | # Setup optimizer 61 | self.optimizer.setup(self) 62 | self._initialized = True 63 | logging.info("Total parameters: %s" % self.n_params) 64 | 65 | def _find_bprop_entry(self): 66 | """Find entry layer for back propagation.""" 67 | 68 | if len(self.layers) > 0 and not hasattr(self.layers[-1], "parameters"): 69 | return -1 70 | return len(self.layers) 71 | 72 | def fit(self, X, y=None): 73 | if not self._initialized: 74 | self._setup_layers(X.shape) 75 | 76 | if y.ndim == 1: 77 | # Reshape vector to matrix 78 | y = y[:, np.newaxis] 79 | self._setup_input(X, y) 80 | 81 | self.is_training = True 82 | # Pass neural network instance to an optimizer 83 | self.optimizer.optimize(self) 84 | self.is_training = False 85 | 86 | def update(self, X, y): 87 | # Forward pass 88 | y_pred = self.fprop(X) 89 | 90 | # Backward pass 91 | grad = self.loss_grad(y, y_pred) 92 | for layer in reversed(self.layers[: self.bprop_entry]): 93 | grad = layer.backward_pass(grad) 94 | return self.loss(y, y_pred) 95 | 96 | def fprop(self, X): 97 | """Forward propagation.""" 98 | for layer in self.layers: 99 | X = layer.forward_pass(X) 100 | return X 101 | 102 | def _predict(self, X=None): 103 | if not self._initialized: 104 | self._setup_layers(X.shape) 105 | 106 | y = [] 107 | X_batch = batch_iterator(X, self.batch_size) 108 | for Xb in X_batch: 109 | y.append(self.fprop(Xb)) 110 | return np.concatenate(y) 111 | 112 | @property 113 | def parametric_layers(self): 114 | for layer in self.layers: 115 | if hasattr(layer, "parameters"): 116 | yield layer 117 | 118 | @property 119 | def parameters(self): 120 | """Returns a list of all parameters.""" 121 | params = [] 122 | for layer in self.parametric_layers: 123 | params.append(layer.parameters) 124 | return params 125 | 126 | def error(self, X=None, y=None): 127 | """Calculate an error for given examples.""" 128 | training_phase = self.is_training 129 | if training_phase: 130 | # Temporally disable training. 131 | # Some layers work differently while training (e.g. Dropout). 132 | self.is_training = False 133 | if X is None and y is None: 134 | y_pred = self._predict(self.X) 135 | score = self.metric(self.y, y_pred) 136 | else: 137 | y_pred = self._predict(X) 138 | score = self.metric(y, y_pred) 139 | if training_phase: 140 | self.is_training = True 141 | return score 142 | 143 | @property 144 | def is_training(self): 145 | return self.training 146 | 147 | @is_training.setter 148 | def is_training(self, train): 149 | self.training = train 150 | for layer in self.layers: 151 | if isinstance(layer, PhaseMixin): 152 | layer.is_training = train 153 | 154 | def shuffle_dataset(self): 155 | """Shuffle rows in the dataset.""" 156 | n_samples = self.X.shape[0] 157 | indices = np.arange(n_samples) 158 | np.random.shuffle(indices) 159 | self.X = self.X.take(indices, axis=0) 160 | self.y = self.y.take(indices, axis=0) 161 | 162 | @property 163 | def n_layers(self): 164 | """Returns the number of layers.""" 165 | return self._n_layers 166 | 167 | @property 168 | def n_params(self): 169 | """Return the number of trainable parameters.""" 170 | return sum([layer.parameters.n_params for layer in self.parametric_layers]) 171 | 172 | def reset(self): 173 | self._initialized = False 174 | -------------------------------------------------------------------------------- /mla/neuralnet/optimizers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from collections import defaultdict 4 | 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | from mla.utils import batch_iterator 9 | 10 | """ 11 | References: 12 | 13 | Gradient descent optimization algorithms https://ruder.io/optimizing-gradient-descent/ 14 | """ 15 | 16 | 17 | class Optimizer(object): 18 | def optimize(self, network): 19 | loss_history = [] 20 | for i in range(network.max_epochs): 21 | if network.shuffle: 22 | network.shuffle_dataset() 23 | 24 | start_time = time.time() 25 | loss = self.train_epoch(network) 26 | loss_history.append(loss) 27 | if network.verbose: 28 | msg = "Epoch:%s, train loss: %s" % (i, loss) 29 | if network.log_metric: 30 | msg += ", train %s: %s" % (network.metric_name, network.error()) 31 | msg += ", elapsed: %s sec." % (time.time() - start_time) 32 | logging.info(msg) 33 | return loss_history 34 | 35 | def update(self, network): 36 | """Performs an update of parameters.""" 37 | raise NotImplementedError 38 | 39 | def train_epoch(self, network): 40 | losses = [] 41 | 42 | # Create batch iterator 43 | X_batch = batch_iterator(network.X, network.batch_size) 44 | y_batch = batch_iterator(network.y, network.batch_size) 45 | 46 | batch = zip(X_batch, y_batch) 47 | if network.verbose: 48 | batch = tqdm(batch, total=int(np.ceil(network.n_samples / network.batch_size))) 49 | 50 | for X, y in batch: 51 | loss = np.mean(network.update(X, y)) 52 | self.update(network) 53 | losses.append(loss) 54 | 55 | epoch_loss = np.mean(losses) 56 | return epoch_loss 57 | 58 | def train_batch(self, network, X, y): 59 | loss = np.mean(network.update(X, y)) 60 | self.update(network) 61 | return loss 62 | 63 | def setup(self, network): 64 | """Creates additional variables. 65 | Note: Must be called before optimization process.""" 66 | raise NotImplementedError 67 | 68 | 69 | class SGD(Optimizer): 70 | def __init__(self, learning_rate=0.01, momentum=0.9, decay=0.0, nesterov=False): 71 | self.nesterov = nesterov 72 | self.decay = decay 73 | self.momentum = momentum 74 | self.lr = learning_rate 75 | self.iteration = 0 76 | self.velocity = None 77 | 78 | def update(self, network): 79 | lr = self.lr * (1.0 / (1.0 + self.decay * self.iteration)) 80 | 81 | for i, layer in enumerate(network.parametric_layers): 82 | for n in layer.parameters.keys(): 83 | # Get gradient values 84 | grad = layer.parameters.grad[n] 85 | update = self.momentum * self.velocity[i][n] - lr * grad 86 | self.velocity[i][n] = update 87 | if self.nesterov: 88 | # Adjust using updated velocity 89 | update = self.momentum * self.velocity[i][n] - lr * grad 90 | layer.parameters.step(n, update) 91 | self.iteration += 1 92 | 93 | def setup(self, network): 94 | self.velocity = defaultdict(dict) 95 | for i, layer in enumerate(network.parametric_layers): 96 | for n in layer.parameters.keys(): 97 | self.velocity[i][n] = np.zeros_like(layer.parameters[n]) 98 | 99 | 100 | class Adagrad(Optimizer): 101 | def __init__(self, learning_rate=0.01, epsilon=1e-8): 102 | self.eps = epsilon 103 | self.lr = learning_rate 104 | 105 | def update(self, network): 106 | for i, layer in enumerate(network.parametric_layers): 107 | for n in layer.parameters.keys(): 108 | grad = layer.parameters.grad[n] 109 | self.accu[i][n] += grad ** 2 110 | step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps) 111 | layer.parameters.step(n, -step) 112 | 113 | def setup(self, network): 114 | # Accumulators 115 | self.accu = defaultdict(dict) 116 | for i, layer in enumerate(network.parametric_layers): 117 | for n in layer.parameters.keys(): 118 | self.accu[i][n] = np.zeros_like(layer.parameters[n]) 119 | 120 | 121 | class Adadelta(Optimizer): 122 | def __init__(self, learning_rate=1.0, rho=0.95, epsilon=1e-8): 123 | self.rho = rho 124 | self.eps = epsilon 125 | self.lr = learning_rate 126 | 127 | def update(self, network): 128 | for i, layer in enumerate(network.parametric_layers): 129 | for n in layer.parameters.keys(): 130 | grad = layer.parameters.grad[n] 131 | self.accu[i][n] = self.rho * self.accu[i][n] + (1.0 - self.rho) * grad ** 2 132 | step = grad * np.sqrt(self.d_accu[i][n] + self.eps) / np.sqrt(self.accu[i][n] + self.eps) 133 | 134 | layer.parameters.step(n, -step * self.lr) 135 | # Update delta accumulator 136 | self.d_accu[i][n] = self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step ** 2 137 | 138 | def setup(self, network): 139 | # Accumulators 140 | self.accu = defaultdict(dict) 141 | self.d_accu = defaultdict(dict) 142 | for i, layer in enumerate(network.parametric_layers): 143 | for n in layer.parameters.keys(): 144 | self.accu[i][n] = np.zeros_like(layer.parameters[n]) 145 | self.d_accu[i][n] = np.zeros_like(layer.parameters[n]) 146 | 147 | 148 | class RMSprop(Optimizer): 149 | def __init__(self, learning_rate=0.001, rho=0.9, epsilon=1e-8): 150 | self.eps = epsilon 151 | self.rho = rho 152 | self.lr = learning_rate 153 | 154 | def update(self, network): 155 | for i, layer in enumerate(network.parametric_layers): 156 | for n in layer.parameters.keys(): 157 | grad = layer.parameters.grad[n] 158 | self.accu[i][n] = (self.rho * self.accu[i][n]) + (1.0 - self.rho) * (grad ** 2) 159 | step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps) 160 | layer.parameters.step(n, -step) 161 | 162 | def setup(self, network): 163 | # Accumulators 164 | self.accu = defaultdict(dict) 165 | for i, layer in enumerate(network.parametric_layers): 166 | for n in layer.parameters.keys(): 167 | self.accu[i][n] = np.zeros_like(layer.parameters[n]) 168 | 169 | 170 | class Adam(Optimizer): 171 | def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8): 172 | 173 | self.epsilon = epsilon 174 | self.beta_2 = beta_2 175 | self.beta_1 = beta_1 176 | self.lr = learning_rate 177 | self.iterations = 0 178 | self.t = 1 179 | 180 | def update(self, network): 181 | for i, layer in enumerate(network.parametric_layers): 182 | for n in layer.parameters.keys(): 183 | grad = layer.parameters.grad[n] 184 | self.ms[i][n] = (self.beta_1 * self.ms[i][n]) + (1.0 - self.beta_1) * grad 185 | self.vs[i][n] = (self.beta_2 * self.vs[i][n]) + (1.0 - self.beta_2) * grad ** 2 186 | lr = self.lr * np.sqrt(1.0 - self.beta_2 ** self.t) / (1.0 - self.beta_1 ** self.t) 187 | 188 | step = lr * self.ms[i][n] / (np.sqrt(self.vs[i][n]) + self.epsilon) 189 | layer.parameters.step(n, -step) 190 | self.t += 1 191 | 192 | def setup(self, network): 193 | # Accumulators 194 | self.ms = defaultdict(dict) 195 | self.vs = defaultdict(dict) 196 | for i, layer in enumerate(network.parametric_layers): 197 | for n in layer.parameters.keys(): 198 | self.ms[i][n] = np.zeros_like(layer.parameters[n]) 199 | self.vs[i][n] = np.zeros_like(layer.parameters[n]) 200 | 201 | 202 | class Adamax(Optimizer): 203 | def __init__(self, learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8): 204 | 205 | self.epsilon = epsilon 206 | self.beta_2 = beta_2 207 | self.beta_1 = beta_1 208 | self.lr = learning_rate 209 | self.t = 1 210 | 211 | def update(self, network): 212 | for i, layer in enumerate(network.parametric_layers): 213 | for n in layer.parameters.keys(): 214 | grad = layer.parameters.grad[n] 215 | self.ms[i][n] = self.beta_1 * self.ms[i][n] + (1.0 - self.beta_1) * grad 216 | self.us[i][n] = np.maximum(self.beta_2 * self.us[i][n], np.abs(grad)) 217 | 218 | step = self.lr / (1 - self.beta_1 ** self.t) * self.ms[i][n] / (self.us[i][n] + self.epsilon) 219 | layer.parameters.step(n, -step) 220 | self.t += 1 221 | 222 | def setup(self, network): 223 | self.ms = defaultdict(dict) 224 | self.us = defaultdict(dict) 225 | for i, layer in enumerate(network.parametric_layers): 226 | for n in layer.parameters.keys(): 227 | self.ms[i][n] = np.zeros_like(layer.parameters[n]) 228 | self.us[i][n] = np.zeros_like(layer.parameters[n]) 229 | -------------------------------------------------------------------------------- /mla/neuralnet/parameters.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | from mla.neuralnet.initializations import get_initializer 5 | 6 | 7 | class Parameters(object): 8 | def __init__(self, init="glorot_uniform", scale=0.5, bias=1.0, regularizers=None, constraints=None): 9 | """A container for layer's parameters. 10 | 11 | Parameters 12 | ---------- 13 | init : str, default 'glorot_uniform'. 14 | The name of the weight initialization function. 15 | scale : float, default 0.5 16 | bias : float, default 1.0 17 | Initial values for bias. 18 | regularizers : dict 19 | Weight regularizers. 20 | >>> {'W' : L2()} 21 | constraints : dict 22 | Weight constraints. 23 | >>> {'b' : MaxNorm()} 24 | """ 25 | if constraints is None: 26 | self.constraints = {} 27 | else: 28 | self.constraints = constraints 29 | 30 | if regularizers is None: 31 | self.regularizers = {} 32 | else: 33 | self.regularizers = regularizers 34 | 35 | self.initial_bias = bias 36 | self.scale = scale 37 | self.init = get_initializer(init) 38 | 39 | self._params = {} 40 | self._grads = {} 41 | 42 | def setup_weights(self, W_shape, b_shape=None): 43 | if "W" not in self._params: 44 | self._params["W"] = self.init(shape=W_shape, scale=self.scale) 45 | if b_shape is None: 46 | self._params["b"] = np.full(W_shape[1], self.initial_bias) 47 | else: 48 | self._params["b"] = np.full(b_shape, self.initial_bias) 49 | self.init_grad() 50 | 51 | def init_grad(self): 52 | """Init gradient arrays corresponding to each weight array.""" 53 | for key in self._params.keys(): 54 | if key not in self._grads: 55 | self._grads[key] = np.zeros_like(self._params[key]) 56 | 57 | def step(self, name, step): 58 | """Increase specific weight by amount of the step parameter.""" 59 | self._params[name] += step 60 | 61 | if name in self.constraints: 62 | self._params[name] = self.constraints[name].clip(self._params[name]) 63 | 64 | def update_grad(self, name, value): 65 | """Update gradient values.""" 66 | self._grads[name] = value 67 | 68 | if name in self.regularizers: 69 | self._grads[name] += self.regularizers[name](self._params[name]) 70 | 71 | @property 72 | def n_params(self): 73 | """Count the number of parameters in this layer.""" 74 | return sum([np.prod(self._params[x].shape) for x in self._params.keys()]) 75 | 76 | def keys(self): 77 | return self._params.keys() 78 | 79 | @property 80 | def grad(self): 81 | return self._grads 82 | 83 | # Allow access to the fields using dict syntax, e.g. parameters['W'] 84 | def __getitem__(self, item): 85 | if item in self._params: 86 | return self._params[item] 87 | else: 88 | raise ValueError 89 | 90 | def __setitem__(self, key, value): 91 | self._params[key] = value 92 | -------------------------------------------------------------------------------- /mla/neuralnet/regularizers.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from autograd import elementwise_grad 4 | 5 | 6 | class Regularizer(object): 7 | def __init__(self, C=0.01): 8 | self.C = C 9 | self._grad = elementwise_grad(self._penalty) 10 | 11 | def _penalty(self, weights): 12 | raise NotImplementedError() 13 | 14 | def grad(self, weights): 15 | return self._grad(weights) 16 | 17 | def __call__(self, weights): 18 | return self.grad(weights) 19 | 20 | 21 | class L1(Regularizer): 22 | def _penalty(self, weights): 23 | return self.C * np.abs(weights) 24 | 25 | 26 | class L2(Regularizer): 27 | def _penalty(self, weights): 28 | return self.C * weights ** 2 29 | 30 | 31 | class ElasticNet(Regularizer): 32 | """Linear combination of L1 and L2 penalties.""" 33 | 34 | def _penalty(self, weights): 35 | return 0.5 * self.C * weights ** 2 + (1.0 - self.C) * np.abs(weights) 36 | -------------------------------------------------------------------------------- /mla/neuralnet/tests/test_activations.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | 5 | from mla.neuralnet.activations import * 6 | 7 | 8 | def test_softplus(): 9 | # np.exp(z_max) will overflow 10 | z_max = np.log(sys.float_info.max) + 1.0e10 11 | # 1.0 / np.exp(z_min) will overflow 12 | z_min = np.log(sys.float_info.min) - 1.0e10 13 | inputs = np.array([0.0, 1.0, -1.0, z_min, z_max]) 14 | # naive implementation of np.log(1 + np.exp(z_max)) will overflow 15 | # naive implementation of z + np.log(1 + 1 / np.exp(z_min)) will 16 | # throw ZeroDivisionError 17 | outputs = np.array([np.log(2.0), np.log1p(np.exp(1.0)), np.log1p(np.exp(-1.0)), 0.0, z_max]) 18 | 19 | assert np.allclose(outputs, softplus(inputs)) 20 | -------------------------------------------------------------------------------- /mla/neuralnet/tests/test_optimizers.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_classification 2 | from sklearn.metrics import roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | 5 | from mla.neuralnet import NeuralNet 6 | from mla.neuralnet.layers import Dense, Activation, Dropout, Parameters 7 | from mla.neuralnet.optimizers import * 8 | from mla.utils import one_hot 9 | 10 | 11 | def clasifier(optimizer): 12 | X, y = make_classification( 13 | n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 14 | ) 15 | y = one_hot(y) 16 | 17 | X -= np.mean(X, axis=0) 18 | X /= np.std(X, axis=0) 19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) 20 | 21 | model = NeuralNet( 22 | layers=[ 23 | Dense(128, Parameters(init="uniform")), 24 | Activation("relu"), 25 | Dropout(0.5), 26 | Dense(64, Parameters(init="normal")), 27 | Activation("relu"), 28 | Dense(2), 29 | Activation("softmax"), 30 | ], 31 | loss="categorical_crossentropy", 32 | optimizer=optimizer, 33 | metric="accuracy", 34 | batch_size=64, 35 | max_epochs=10, 36 | ) 37 | model.fit(X_train, y_train) 38 | predictions = model.predict(X_test) 39 | return roc_auc_score(y_test[:, 0], predictions[:, 0]) 40 | 41 | 42 | def test_adadelta(): 43 | assert clasifier(Adadelta()) > 0.9 44 | 45 | 46 | def test_adam(): 47 | assert clasifier(Adam()) > 0.9 48 | 49 | 50 | def test_adamax(): 51 | assert clasifier(Adamax()) > 0.9 52 | 53 | 54 | def test_rmsprop(): 55 | assert clasifier(RMSprop()) > 0.9 56 | 57 | 58 | def test_adagrad(): 59 | assert clasifier(Adagrad()) > 0.9 60 | 61 | 62 | def test_sgd(): 63 | assert clasifier(SGD(learning_rate=0.0001)) > 0.9 64 | assert clasifier(SGD(learning_rate=0.0001, nesterov=True, momentum=0.9)) > 0.9 65 | assert clasifier(SGD(learning_rate=0.0001, nesterov=False, momentum=0.0)) > 0.9 66 | -------------------------------------------------------------------------------- /mla/pca.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import logging 3 | 4 | import numpy as np 5 | from scipy.linalg import svd 6 | 7 | from mla.base import BaseEstimator 8 | 9 | np.random.seed(1000) 10 | 11 | 12 | class PCA(BaseEstimator): 13 | y_required = False 14 | 15 | def __init__(self, n_components, solver="svd"): 16 | """Principal component analysis (PCA) implementation. 17 | 18 | Transforms a dataset of possibly correlated values into n linearly 19 | uncorrelated components. The components are ordered such that the first 20 | has the largest possible variance and each following component as the 21 | largest possible variance given the previous components. This causes 22 | the early components to contain most of the variability in the dataset. 23 | 24 | Parameters 25 | ---------- 26 | n_components : int 27 | solver : str, default 'svd' 28 | {'svd', 'eigen'} 29 | """ 30 | self.solver = solver 31 | self.n_components = n_components 32 | self.components = None 33 | self.mean = None 34 | 35 | def fit(self, X, y=None): 36 | self.mean = np.mean(X, axis=0) 37 | self._decompose(X) 38 | 39 | def _decompose(self, X): 40 | # Mean centering 41 | X = X.copy() 42 | X -= self.mean 43 | 44 | if self.solver == "svd": 45 | _, s, Vh = svd(X, full_matrices=True) 46 | elif self.solver == "eigen": 47 | s, Vh = np.linalg.eig(np.cov(X.T)) 48 | Vh = Vh.T 49 | 50 | s_squared = s ** 2 51 | variance_ratio = s_squared / s_squared.sum() 52 | logging.info("Explained variance ratio: %s" % (variance_ratio[0: self.n_components])) 53 | self.components = Vh[0: self.n_components] 54 | 55 | def transform(self, X): 56 | X = X.copy() 57 | X -= self.mean 58 | return np.dot(X, self.components.T) 59 | 60 | def _predict(self, X=None): 61 | return self.transform(X) 62 | -------------------------------------------------------------------------------- /mla/rbm.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import logging 3 | 4 | import numpy as np 5 | from scipy.special import expit 6 | 7 | from mla.base import BaseEstimator 8 | from mla.utils import batch_iterator 9 | 10 | np.random.seed(9999) 11 | sigmoid = expit 12 | 13 | """ 14 | References: 15 | A Practical Guide to Training Restricted Boltzmann Machines https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf 16 | """ 17 | 18 | 19 | class RBM(BaseEstimator): 20 | y_required = False 21 | 22 | def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, max_epochs=100): 23 | """Bernoulli Restricted Boltzmann Machine (RBM) 24 | 25 | Parameters 26 | ---------- 27 | 28 | n_hidden : int, default 128 29 | The number of hidden units. 30 | learning_rate : float, default 0.1 31 | batch_size : int, default 10 32 | max_epochs : int, default 100 33 | """ 34 | self.max_epochs = max_epochs 35 | self.batch_size = batch_size 36 | self.lr = learning_rate 37 | self.n_hidden = n_hidden 38 | 39 | def fit(self, X, y=None): 40 | self.n_visible = X.shape[1] 41 | self._init_weights() 42 | self._setup_input(X, y) 43 | self._train() 44 | 45 | def _init_weights(self): 46 | 47 | self.W = np.random.randn(self.n_visible, self.n_hidden) * 0.1 48 | 49 | # Bias for visible and hidden units 50 | self.bias_v = np.zeros(self.n_visible, dtype=np.float32) 51 | self.bias_h = np.zeros(self.n_hidden, dtype=np.float32) 52 | 53 | self.errors = [] 54 | 55 | def _train(self): 56 | """Use CD-1 training procedure, basically an exact inference for `positive_associations`, 57 | followed by a "non burn-in" block Gibbs Sampling for the `negative_associations`.""" 58 | 59 | for i in range(self.max_epochs): 60 | error = 0 61 | for batch in batch_iterator(self.X, batch_size=self.batch_size): 62 | positive_hidden = sigmoid(np.dot(batch, self.W) + self.bias_h) 63 | hidden_states = self._sample(positive_hidden) # sample hidden state h1 64 | positive_associations = np.dot(batch.T, positive_hidden) 65 | 66 | negative_visible = sigmoid(np.dot(hidden_states, self.W.T) + self.bias_v) 67 | negative_visible = self._sample(negative_visible) # use the sampled hidden state h1 to sample v1 68 | negative_hidden = sigmoid(np.dot(negative_visible, self.W) + self.bias_h) 69 | negative_associations = np.dot(negative_visible.T, negative_hidden) 70 | 71 | lr = self.lr / float(batch.shape[0]) 72 | self.W += lr * ((positive_associations - negative_associations) / float(self.batch_size)) 73 | self.bias_h += lr * (negative_hidden.sum(axis=0) - negative_associations.sum(axis=0)) 74 | self.bias_v += lr * (np.asarray(batch.sum(axis=0)).squeeze() - negative_visible.sum(axis=0)) 75 | 76 | error += np.sum((batch - negative_visible) ** 2) 77 | 78 | self.errors.append(error) 79 | logging.info("Iteration %s, error %s" % (i, error)) 80 | logging.debug("Weights: %s" % self.W) 81 | logging.debug("Hidden bias: %s" % self.bias_h) 82 | logging.debug("Visible bias: %s" % self.bias_v) 83 | 84 | def _sample(self, X): 85 | return X > np.random.random_sample(size=X.shape) 86 | 87 | def _predict(self, X=None): 88 | return sigmoid(np.dot(X, self.W) + self.bias_h) 89 | -------------------------------------------------------------------------------- /mla/rl/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | -------------------------------------------------------------------------------- /mla/rl/dqn.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import logging 3 | import random 4 | 5 | import gym 6 | import numpy as np 7 | from gym import wrappers 8 | 9 | np.random.seed(9999) 10 | 11 | logger = logging.getLogger() 12 | logger.setLevel(logging.INFO) 13 | 14 | """ 15 | References: 16 | Sutton, Barto (2017). Reinforcement Learning: An Introduction. MIT Press, Cambridge, MA. 17 | """ 18 | 19 | 20 | class DQN(object): 21 | def __init__( 22 | self, n_episodes=500, gamma=0.99, batch_size=32, epsilon=1.0, decay=0.005, min_epsilon=0.1, memory_limit=500 23 | ): 24 | """Deep Q learning implementation. 25 | 26 | Parameters 27 | ---------- 28 | 29 | min_epsilon : float 30 | Minimal value for epsilon. 31 | epsilon : float 32 | ε-greedy value. 33 | decay : float 34 | Epsilon decay rate. 35 | memory_limit : int 36 | Limit of experience replay memory. 37 | 38 | """ 39 | 40 | self.memory_limit = memory_limit 41 | self.min_epsilon = min_epsilon 42 | self.gamma = gamma 43 | self.epsilon = epsilon 44 | self.n_episodes = n_episodes 45 | self.batch_size = batch_size 46 | self.decay = decay 47 | 48 | def init_environment(self, name="CartPole-v0", monitor=False): 49 | self.env = gym.make(name) 50 | if monitor: 51 | self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False) 52 | 53 | self.n_states = self.env.observation_space.shape[0] 54 | self.n_actions = self.env.action_space.n 55 | 56 | # Experience replay 57 | self.replay = [] 58 | 59 | def init_model(self, model): 60 | self.model = model(self.n_actions, self.batch_size) 61 | 62 | def train(self, render=False): 63 | max_reward = 0 64 | 65 | for ep in range(self.n_episodes): 66 | state = self.env.reset() 67 | 68 | total_reward = 0 69 | 70 | while True: 71 | if render: 72 | self.env.render() 73 | 74 | if np.random.rand() <= self.epsilon: 75 | # Exploration 76 | action = np.random.randint(self.n_actions) 77 | else: 78 | # Exploitation 79 | action = np.argmax(self.model.predict(state[np.newaxis, :])[0]) 80 | 81 | # Run one timestep of the environment 82 | new_state, reward, done, _ = self.env.step(action) 83 | self.replay.append([state, action, reward, new_state, done]) 84 | 85 | # Sample batch from experience replay 86 | batch_size = min(len(self.replay), self.batch_size) 87 | batch = random.sample(self.replay, batch_size) 88 | 89 | X = np.zeros((batch_size, self.n_states)) 90 | y = np.zeros((batch_size, self.n_actions)) 91 | 92 | states = np.array([b[0] for b in batch]) 93 | new_states = np.array([b[3] for b in batch]) 94 | 95 | Q = self.model.predict(states) 96 | new_Q = self.model.predict(new_states) 97 | 98 | # Construct training data 99 | for i in range(batch_size): 100 | state_r, action_r, reward_r, new_state_r, done_r = batch[i] 101 | target = Q[i] 102 | 103 | if done_r: 104 | target[action_r] = reward_r 105 | else: 106 | target[action_r] = reward_r + self.gamma * np.amax(new_Q[i]) 107 | 108 | X[i, :] = state_r 109 | y[i, :] = target 110 | 111 | # Train deep learning model 112 | self.model.fit(X, y) 113 | 114 | total_reward += reward 115 | state = new_state 116 | 117 | if done: 118 | # Exit from current episode 119 | break 120 | 121 | # Remove old entries from replay memory 122 | while len(self.replay) > self.memory_limit: 123 | self.replay.pop(0) 124 | 125 | self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp(-self.decay * ep) 126 | 127 | max_reward = max(max_reward, total_reward) 128 | logger.info( 129 | "Episode: %s, reward %s, epsilon %s, max reward %s" % (ep, total_reward, self.epsilon, max_reward) 130 | ) 131 | logging.info("Training finished.") 132 | 133 | def play(self, episodes): 134 | for i in range(episodes): 135 | state = self.env.reset() 136 | total_reward = 0 137 | 138 | while True: 139 | self.env.render() 140 | action = np.argmax(self.model.predict(state[np.newaxis, :])[0]) 141 | state, reward, done, _ = self.env.step(action) 142 | total_reward += reward 143 | if done: 144 | break 145 | logger.info("Episode: %s, reward %s" % (i, total_reward)) 146 | self.env.close() 147 | -------------------------------------------------------------------------------- /mla/svm/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | -------------------------------------------------------------------------------- /mla/svm/kernerls.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | import scipy.spatial.distance as dist 4 | 5 | 6 | class Linear(object): 7 | def __call__(self, x, y): 8 | return np.dot(x, y.T) 9 | 10 | def __repr__(self): 11 | return "Linear kernel" 12 | 13 | 14 | class Poly(object): 15 | def __init__(self, degree=2): 16 | self.degree = degree 17 | 18 | def __call__(self, x, y): 19 | return np.dot(x, y.T) ** self.degree 20 | 21 | def __repr__(self): 22 | return "Poly kernel" 23 | 24 | 25 | class RBF(object): 26 | def __init__(self, gamma=0.1): 27 | self.gamma = gamma 28 | 29 | def __call__(self, x, y): 30 | x = np.atleast_2d(x) 31 | y = np.atleast_2d(y) 32 | return np.exp(-self.gamma * dist.cdist(x, y) ** 2).flatten() 33 | 34 | def __repr__(self): 35 | return "RBF kernel" 36 | -------------------------------------------------------------------------------- /mla/svm/svm.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import logging 3 | 4 | import numpy as np 5 | 6 | from mla.base import BaseEstimator 7 | from mla.svm.kernerls import Linear 8 | 9 | np.random.seed(9999) 10 | 11 | """ 12 | References: 13 | The Simplified SMO Algorithm http://cs229.stanford.edu/materials/smo.pdf 14 | """ 15 | 16 | 17 | class SVM(BaseEstimator): 18 | def __init__(self, C=1.0, kernel=None, tol=1e-3, max_iter=100): 19 | """Support vector machines implementation using simplified SMO optimization. 20 | 21 | Parameters 22 | ---------- 23 | C : float, default 1.0 24 | kernel : Kernel object 25 | tol : float , default 1e-3 26 | max_iter : int, default 100 27 | """ 28 | self.C = C 29 | self.tol = tol 30 | self.max_iter = max_iter 31 | if kernel is None: 32 | self.kernel = Linear() 33 | else: 34 | self.kernel = kernel 35 | 36 | self.b = 0 37 | self.alpha = None 38 | self.K = None 39 | 40 | def fit(self, X, y=None): 41 | self._setup_input(X, y) 42 | self.K = np.zeros((self.n_samples, self.n_samples)) 43 | for i in range(self.n_samples): 44 | self.K[:, i] = self.kernel(self.X, self.X[i, :]) 45 | self.alpha = np.zeros(self.n_samples) 46 | self.sv_idx = np.arange(0, self.n_samples) 47 | return self._train() 48 | 49 | def _train(self): 50 | iters = 0 51 | while iters < self.max_iter: 52 | iters += 1 53 | alpha_prev = np.copy(self.alpha) 54 | 55 | for j in range(self.n_samples): 56 | # Pick random i 57 | i = self.random_index(j) 58 | 59 | eta = 2.0 * self.K[i, j] - self.K[i, i] - self.K[j, j] 60 | if eta >= 0: 61 | continue 62 | L, H = self._find_bounds(i, j) 63 | 64 | # Error for current examples 65 | e_i, e_j = self._error(i), self._error(j) 66 | 67 | # Save old alphas 68 | alpha_io, alpha_jo = self.alpha[i], self.alpha[j] 69 | 70 | # Update alpha 71 | self.alpha[j] -= (self.y[j] * (e_i - e_j)) / eta 72 | self.alpha[j] = self.clip(self.alpha[j], H, L) 73 | 74 | self.alpha[i] = self.alpha[i] + self.y[i] * self.y[j] * (alpha_jo - self.alpha[j]) 75 | 76 | # Find intercept 77 | b1 = ( 78 | self.b - e_i - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, i] 79 | - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[i, j] 80 | ) 81 | b2 = ( 82 | self.b - e_j - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[j, j] 83 | - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, j] 84 | ) 85 | if 0 < self.alpha[i] < self.C: 86 | self.b = b1 87 | elif 0 < self.alpha[j] < self.C: 88 | self.b = b2 89 | else: 90 | self.b = 0.5 * (b1 + b2) 91 | 92 | # Check convergence 93 | diff = np.linalg.norm(self.alpha - alpha_prev) 94 | if diff < self.tol: 95 | break 96 | logging.info("Convergence has reached after %s." % iters) 97 | 98 | # Save support vectors index 99 | self.sv_idx = np.where(self.alpha > 0)[0] 100 | 101 | def _predict(self, X=None): 102 | n = X.shape[0] 103 | result = np.zeros(n) 104 | for i in range(n): 105 | result[i] = np.sign(self._predict_row(X[i, :])) 106 | return result 107 | 108 | def _predict_row(self, X): 109 | k_v = self.kernel(self.X[self.sv_idx], X) 110 | return np.dot((self.alpha[self.sv_idx] * self.y[self.sv_idx]).T, k_v.T) + self.b 111 | 112 | def clip(self, alpha, H, L): 113 | if alpha > H: 114 | alpha = H 115 | if alpha < L: 116 | alpha = L 117 | return alpha 118 | 119 | def _error(self, i): 120 | """Error for single example.""" 121 | return self._predict_row(self.X[i]) - self.y[i] 122 | 123 | def _find_bounds(self, i, j): 124 | """Find L and H such that L <= alpha <= H. 125 | Also, alpha must satisfy the constraint 0 <= αlpha <= C. 126 | """ 127 | if self.y[i] != self.y[j]: 128 | L = max(0, self.alpha[j] - self.alpha[i]) 129 | H = min(self.C, self.C - self.alpha[i] + self.alpha[j]) 130 | else: 131 | L = max(0, self.alpha[i] + self.alpha[j] - self.C) 132 | H = min(self.C, self.alpha[i] + self.alpha[j]) 133 | return L, H 134 | 135 | def random_index(self, z): 136 | i = z 137 | while i == z: 138 | i = np.random.randint(0, self.n_samples - 1) 139 | return i 140 | -------------------------------------------------------------------------------- /mla/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/tests/__init__.py -------------------------------------------------------------------------------- /mla/tests/test_classification_accuracy.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import roc_auc_score 2 | 3 | from mla.ensemble import RandomForestClassifier 4 | from mla.ensemble.gbm import GradientBoostingClassifier 5 | from mla.knn import KNNClassifier 6 | from mla.linear_models import LogisticRegression 7 | from mla.metrics import accuracy 8 | from mla.naive_bayes import NaiveBayesClassifier 9 | from mla.neuralnet import NeuralNet 10 | from mla.neuralnet.constraints import MaxNorm 11 | from mla.neuralnet.layers import Activation, Dense, Dropout 12 | from mla.neuralnet.optimizers import Adadelta 13 | from mla.neuralnet.parameters import Parameters 14 | from mla.neuralnet.regularizers import L2 15 | from mla.svm.kernerls import RBF, Linear 16 | from mla.svm.svm import SVM 17 | from mla.utils import one_hot 18 | 19 | try: 20 | from sklearn.model_selection import train_test_split 21 | except ImportError: 22 | from sklearn.cross_validation import train_test_split 23 | from sklearn.datasets import make_classification 24 | 25 | # Generate a random regression problem 26 | X, y = make_classification( 27 | n_samples=750, n_features=10, n_informative=8, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 28 | ) 29 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=1111) 30 | 31 | 32 | # All classifiers except convnet, RNN, LSTM. 33 | 34 | 35 | def test_linear_model(): 36 | model = LogisticRegression(lr=0.01, max_iters=500, penalty="l1", C=0.01) 37 | model.fit(X_train, y_train) 38 | predictions = model.predict(X_test) 39 | assert roc_auc_score(y_test, predictions) >= 0.95 40 | 41 | 42 | def test_random_forest(): 43 | model = RandomForestClassifier(n_estimators=10, max_depth=4) 44 | model.fit(X_train, y_train) 45 | predictions = model.predict(X_test)[:, 1] 46 | assert roc_auc_score(y_test, predictions) >= 0.95 47 | 48 | 49 | def test_svm_classification(): 50 | y_signed_train = (y_train * 2) - 1 51 | y_signed_test = (y_test * 2) - 1 52 | 53 | for kernel in [RBF(gamma=0.05), Linear()]: 54 | model = SVM(max_iter=500, kernel=kernel) 55 | model.fit(X_train, y_signed_train) 56 | predictions = model.predict(X_test) 57 | assert accuracy(y_signed_test, predictions) >= 0.8 58 | 59 | 60 | def test_mlp(): 61 | y_train_onehot = one_hot(y_train) 62 | y_test_onehot = one_hot(y_test) 63 | 64 | model = NeuralNet( 65 | layers=[ 66 | Dense(256, Parameters(init="uniform", regularizers={"W": L2(0.05)})), 67 | Activation("relu"), 68 | Dropout(0.5), 69 | Dense(128, Parameters(init="normal", constraints={"W": MaxNorm()})), 70 | Activation("relu"), 71 | Dense(2), 72 | Activation("softmax"), 73 | ], 74 | loss="categorical_crossentropy", 75 | optimizer=Adadelta(), 76 | metric="accuracy", 77 | batch_size=64, 78 | max_epochs=25, 79 | ) 80 | model.fit(X_train, y_train_onehot) 81 | predictions = model.predict(X_test) 82 | assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95 83 | 84 | 85 | def test_gbm(): 86 | model = GradientBoostingClassifier(n_estimators=25, max_depth=3, max_features=5, learning_rate=0.1) 87 | model.fit(X_train, y_train) 88 | predictions = model.predict(X_test) 89 | assert roc_auc_score(y_test, predictions) >= 0.95 90 | 91 | 92 | def test_naive_bayes(): 93 | model = NaiveBayesClassifier() 94 | model.fit(X_train, y_train) 95 | predictions = model.predict(X_test)[:, 1] 96 | assert roc_auc_score(y_test, predictions) >= 0.95 97 | 98 | 99 | def test_knn(): 100 | clf = KNNClassifier(k=5) 101 | 102 | clf.fit(X_train, y_train) 103 | predictions = clf.predict(X_test) 104 | assert accuracy(y_test, predictions) >= 0.95 105 | -------------------------------------------------------------------------------- /mla/tests/test_reduction.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import pytest 3 | from sklearn.datasets import make_classification 4 | from sklearn.metrics import roc_auc_score 5 | 6 | try: 7 | from sklearn.model_selection import train_test_split 8 | except ImportError: 9 | from sklearn.cross_validation import train_test_split 10 | 11 | from mla.ensemble import RandomForestClassifier 12 | from mla.pca import PCA 13 | 14 | 15 | @pytest.fixture 16 | def dataset(): 17 | # Generate a random binary classification problem. 18 | return make_classification( 19 | n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 20 | ) 21 | 22 | 23 | # TODO: fix 24 | @pytest.mark.skip() 25 | def test_PCA(dataset): 26 | X, y = dataset 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) 28 | p = PCA(50, solver="eigen") 29 | 30 | # fit PCA with training set, not the entire dataset 31 | p.fit(X_train) 32 | X_train_reduced = p.transform(X_train) 33 | X_test_reduced = p.transform(X_test) 34 | 35 | model = RandomForestClassifier(n_estimators=25, max_depth=5) 36 | model.fit(X_train_reduced, y_train) 37 | predictions = model.predict(X_test_reduced)[:, 1] 38 | score = roc_auc_score(y_test, predictions) 39 | assert score >= 0.75 40 | -------------------------------------------------------------------------------- /mla/tests/test_regression_accuracy.py: -------------------------------------------------------------------------------- 1 | try: 2 | from sklearn.model_selection import train_test_split 3 | except ImportError: 4 | from sklearn.cross_validation import train_test_split 5 | from sklearn.datasets import make_regression 6 | 7 | from mla.knn import KNNRegressor 8 | from mla.linear_models import LinearRegression 9 | from mla.metrics.metrics import mean_squared_error 10 | from mla.neuralnet import NeuralNet 11 | from mla.neuralnet.layers import Activation, Dense 12 | from mla.neuralnet.optimizers import Adam 13 | from mla.neuralnet.parameters import Parameters 14 | 15 | # Generate a random regression problem 16 | X, y = make_regression( 17 | n_samples=1000, n_features=10, n_informative=10, n_targets=1, noise=0.05, random_state=1111, bias=0.5 18 | ) 19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) 20 | 21 | 22 | def test_linear(): 23 | model = LinearRegression(lr=0.01, max_iters=2000, penalty="l2", C=0.003) 24 | model.fit(X_train, y_train) 25 | predictions = model.predict(X_test) 26 | assert mean_squared_error(y_test, predictions) < 0.25 27 | 28 | 29 | def test_mlp(): 30 | model = NeuralNet( 31 | layers=[ 32 | Dense(16, Parameters(init="normal")), 33 | Activation("linear"), 34 | Dense(8, Parameters(init="normal")), 35 | Activation("linear"), 36 | Dense(1), 37 | ], 38 | loss="mse", 39 | optimizer=Adam(), 40 | metric="mse", 41 | batch_size=64, 42 | max_epochs=150, 43 | ) 44 | model.fit(X_train, y_train) 45 | predictions = model.predict(X_test) 46 | assert mean_squared_error(y_test, predictions.flatten()) < 1.0 47 | 48 | 49 | def test_knn(): 50 | model = KNNRegressor(k=5) 51 | model.fit(X_train, y_train) 52 | predictions = model.predict(X_test) 53 | assert mean_squared_error(y_test, predictions) < 10000 54 | -------------------------------------------------------------------------------- /mla/tsne.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import logging 3 | 4 | import numpy as np 5 | 6 | from mla.base import BaseEstimator 7 | from mla.metrics.distance import l2_distance 8 | 9 | np.random.seed(999) 10 | 11 | """ 12 | References: 13 | https://lvdmaaten.github.io/tsne/ 14 | Based on: 15 | https://lvdmaaten.github.io/tsne/code/tsne_python.zip 16 | """ 17 | 18 | 19 | class TSNE(BaseEstimator): 20 | y_required = False 21 | 22 | def __init__(self, n_components=2, perplexity=30.0, max_iter=200, learning_rate=500): 23 | """A t-Distributed Stochastic Neighbor Embedding implementation. 24 | 25 | Parameters 26 | ---------- 27 | max_iter : int, default 200 28 | perplexity : float, default 30.0 29 | n_components : int, default 2 30 | """ 31 | self.max_iter = max_iter 32 | self.perplexity = perplexity 33 | self.n_components = n_components 34 | self.initial_momentum = 0.5 35 | self.final_momentum = 0.8 36 | self.min_gain = 0.01 37 | self.lr = learning_rate 38 | self.tol = 1e-5 39 | self.perplexity_tries = 50 40 | 41 | def fit_transform(self, X, y=None): 42 | self._setup_input(X, y) 43 | 44 | Y = np.random.randn(self.n_samples, self.n_components) 45 | velocity = np.zeros_like(Y) 46 | gains = np.ones_like(Y) 47 | 48 | P = self._get_pairwise_affinities(X) 49 | 50 | iter_num = 0 51 | while iter_num < self.max_iter: 52 | iter_num += 1 53 | 54 | D = l2_distance(Y) 55 | Q = self._q_distribution(D) 56 | 57 | # Normalizer q distribution 58 | Q_n = Q / np.sum(Q) 59 | 60 | # Early exaggeration & momentum 61 | pmul = 4.0 if iter_num < 100 else 1.0 62 | momentum = 0.5 if iter_num < 20 else 0.8 63 | 64 | # Perform gradient step 65 | grads = np.zeros(Y.shape) 66 | for i in range(self.n_samples): 67 | grad = 4 * np.dot((pmul * P[i] - Q_n[i]) * Q[i], Y[i] - Y) 68 | grads[i] = grad 69 | 70 | gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0)) 71 | gains = gains.clip(min=self.min_gain) 72 | 73 | velocity = momentum * velocity - self.lr * (gains * grads) 74 | Y += velocity 75 | Y = Y - np.mean(Y, 0) 76 | 77 | error = np.sum(P * np.log(P / Q_n)) 78 | logging.info("Iteration %s, error %s" % (iter_num, error)) 79 | return Y 80 | 81 | def _get_pairwise_affinities(self, X): 82 | """Computes pairwise affinities.""" 83 | affines = np.zeros((self.n_samples, self.n_samples), dtype=np.float32) 84 | target_entropy = np.log(self.perplexity) 85 | distances = l2_distance(X) 86 | 87 | for i in range(self.n_samples): 88 | affines[i, :] = self._binary_search(distances[i], target_entropy) 89 | 90 | # Fill diagonal with near zero value 91 | np.fill_diagonal(affines, 1.0e-12) 92 | 93 | affines = affines.clip(min=1e-100) 94 | affines = (affines + affines.T) / (2 * self.n_samples) 95 | return affines 96 | 97 | def _binary_search(self, dist, target_entropy): 98 | """Performs binary search to find suitable precision.""" 99 | precision_min = 0 100 | precision_max = 1.0e15 101 | precision = 1.0e5 102 | 103 | for _ in range(self.perplexity_tries): 104 | denom = np.sum(np.exp(-dist[dist > 0.0] / precision)) 105 | beta = np.exp(-dist / precision) / denom 106 | 107 | # Exclude zeros 108 | g_beta = beta[beta > 0.0] 109 | entropy = -np.sum(g_beta * np.log2(g_beta)) 110 | 111 | error = entropy - target_entropy 112 | 113 | if error > 0: 114 | # Decrease precision 115 | precision_max = precision 116 | precision = (precision + precision_min) / 2.0 117 | else: 118 | # Increase precision 119 | precision_min = precision 120 | precision = (precision + precision_max) / 2.0 121 | 122 | if np.abs(error) < self.tol: 123 | break 124 | 125 | return beta 126 | 127 | def _q_distribution(self, D): 128 | """Computes Student t-distribution.""" 129 | Q = 1.0 / (1.0 + D) 130 | np.fill_diagonal(Q, 0.0) 131 | Q = Q.clip(min=1e-100) 132 | return Q 133 | -------------------------------------------------------------------------------- /mla/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from .main import * 4 | -------------------------------------------------------------------------------- /mla/utils/main.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | 4 | 5 | def one_hot(y): 6 | n_values = np.max(y) + 1 7 | return np.eye(n_values)[y] 8 | 9 | 10 | def batch_iterator(X, batch_size=64): 11 | """Splits X into equal sized chunks.""" 12 | n_samples = X.shape[0] 13 | n_batches = n_samples // batch_size 14 | batch_end = 0 15 | 16 | for b in range(n_batches): 17 | batch_begin = b * batch_size 18 | batch_end = batch_begin + batch_size 19 | 20 | X_batch = X[batch_begin:batch_end] 21 | 22 | yield X_batch 23 | 24 | if n_batches * batch_size < n_samples: 25 | yield X[batch_end:] 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | matplotlib>=1.5.1 3 | numpy>=1.11.1 4 | scikit-learn>=0.18 5 | scipy>=0.18.0 6 | seaborn>=0.7.1 7 | autograd>=1.1.7 8 | gym 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [metadata] 5 | description-file=README.md 6 | 7 | [flake8] 8 | max-line-length = 120 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | __version__ = '0.0.1' 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | # Get the long description from the README file 10 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 11 | long_description = f.read() 12 | 13 | # get the dependencies and installs 14 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: 15 | all_reqs = f.read().split('\n') 16 | 17 | install_requires = [x.strip() for x in all_reqs if 'git+' not in x] 18 | dependency_links = [x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+')] 19 | 20 | setup( 21 | name='mla', 22 | version=__version__, 23 | description='A collection of minimal and clean implementations of machine learning algorithms.', 24 | long_description=long_description, 25 | url='https://github.com/rushter/mla', 26 | download_url='https://github.com/rushter/mla/tarball/' + __version__, 27 | license='MIT', 28 | packages=find_packages(exclude=['docs', 'tests*']), 29 | include_package_data=True, 30 | author='Artem Golubin', 31 | install_requires=install_requires, 32 | setup_requires=['numpy>=1.10', 'scipy>=0.17'], 33 | dependency_links=dependency_links, 34 | author_email='gh@rushter.com' 35 | ) 36 | --------------------------------------------------------------------------------