├── .github
    └── workflows
    │   └── python-app.yml
├── .gitignore
├── AUTHORS
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── examples
    ├── __init__.py
    ├── gaussian_mixture.py
    ├── gbm.py
    ├── kmeans.py
    ├── linear_models.py
    ├── naive_bayes.py
    ├── nearest_neighbors.py
    ├── nnet_convnet_mnist.py
    ├── nnet_mlp.py
    ├── nnet_rnn_binary_add.py
    ├── nnet_rnn_text_generation.py
    ├── pca.py
    ├── random_forest.py
    ├── rbm.py
    ├── rl_deep_q_learning.py
    ├── svm.py
    └── t-sne.py
├── mla
    ├── __init__.py
    ├── base
    │   ├── __init__.py
    │   └── base.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base.py
    │   └── data
    │   │   ├── mnist
    │   │       ├── t10k-images-idx3-ubyte
    │   │       ├── t10k-labels-idx1-ubyte
    │   │       ├── train-images-idx3-ubyte
    │   │       └── train-labels-idx1-ubyte
    │   │   └── nietzsche.txt
    ├── ensemble
    │   ├── __init__.py
    │   ├── base.py
    │   ├── gbm.py
    │   ├── random_forest.py
    │   └── tree.py
    ├── fm.py
    ├── gaussian_mixture.py
    ├── kmeans.py
    ├── knn.py
    ├── linear_models.py
    ├── metrics
    │   ├── __init__.py
    │   ├── base.py
    │   ├── distance.py
    │   ├── metrics.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   └── test_metrics.py
    ├── naive_bayes.py
    ├── neuralnet
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── constraints.py
    │   ├── initializations.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── basic.py
    │   │   ├── convnet.py
    │   │   ├── normalization.py
    │   │   └── recurrent
    │   │   │   ├── __init__.py
    │   │   │   ├── lstm.py
    │   │   │   └── rnn.py
    │   ├── loss.py
    │   ├── nnet.py
    │   ├── optimizers.py
    │   ├── parameters.py
    │   ├── regularizers.py
    │   └── tests
    │   │   ├── test_activations.py
    │   │   └── test_optimizers.py
    ├── pca.py
    ├── rbm.py
    ├── rl
    │   ├── __init__.py
    │   └── dqn.py
    ├── svm
    │   ├── __init__.py
    │   ├── kernerls.py
    │   └── svm.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_classification_accuracy.py
    │   ├── test_reduction.py
    │   └── test_regression_accuracy.py
    ├── tsne.py
    └── utils
    │   ├── __init__.py
    │   └── main.py
├── requirements.txt
├── setup.cfg
└── setup.py


/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     timeout-minutes: 5
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python 3.8
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: 3.8
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install flake8 pytest
23 |         pip install -r requirements.txt
24 |     - name: Test with pytest
25 |       run: |
26 |         pytest
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | build/
3 | dist/
4 | mla.egg-info/
5 | .cache
6 | *.swp
7 | .idea


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Artem Golubin <me@rushter.com>
 2 | Anebi Agbo
 3 | Convex Path
 4 | James Chevalier
 5 | Jiancheng
 6 | KaiMin Lai
 7 | Nguyễn Tuấn
 8 | Nicolas Hug
 9 | Xiaochun Ma
10 | Yiran Sheng
11 | brady salz
12 | junwang007
13 | keineahnung2345
14 | lucaskolstad
15 | vincent tang
16 | xq5he
17 | LanderTome
18 | therickli
19 | Andrew Melnik
20 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | RUN mkdir -p /var/app
 4 | WORKDIR /var/app
 5 | COPY . /var/app
 6 | 
 7 | # install scipy & numpy
 8 | # install required packages
 9 | RUN pip install scipy numpy && \
10 |     pip install .
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016-2020 Artem Golubin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include mla/datasets/data *
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine learning algorithms
 2 | A collection of minimal and clean implementations of machine learning algorithms.
 3 | 
 4 | ### Why?
 5 | This project is targeting people who want to learn internals of ml algorithms or implement them from scratch.  
 6 | The code is much easier to follow than the optimized libraries and easier to play with.  
 7 | All algorithms are implemented in Python, using numpy, scipy and autograd.  
 8 | 
 9 | ### Implemented:
10 | * [Deep learning (MLP, CNN, RNN, LSTM)](mla/neuralnet)
11 | * [Linear regression, logistic regression](mla/linear_models.py)
12 | * [Random Forests](mla/ensemble/random_forest.py)
13 | * [Support vector machine (SVM) with kernels (Linear, Poly, RBF)](mla/svm)
14 | * [K-Means](mla/kmeans.py)
15 | * [Gaussian Mixture Model](mla/gaussian_mixture.py)
16 | * [K-nearest neighbors](mla/knn.py)
17 | * [Naive bayes](mla/naive_bayes.py)
18 | * [Principal component analysis (PCA)](mla/pca.py)
19 | * [Factorization machines](mla/fm.py)
20 | * [Restricted Boltzmann machine (RBM)](mla/rbm.py)
21 | * [t-Distributed Stochastic Neighbor Embedding (t-SNE)](mla/tsne.py)
22 | * [Gradient Boosting trees (also known as GBDT, GBRT, GBM, XGBoost)](mla/ensemble/gbm.py)
23 | * [Reinforcement learning (Deep Q learning)](mla/rl)
24 | 
25 | 
26 | ### Installation
27 | ```sh
28 |         git clone https://github.com/rushter/MLAlgorithms
29 |         cd MLAlgorithms
30 |         pip install scipy numpy
31 |         python setup.py develop
32 | ```
33 | ### How to run examples without installation
34 | ```sh
35 |         cd MLAlgorithms
36 |         python -m examples.linear_models
37 | ```
38 | ### How to run examples within Docker
39 | ```sh
40 |         cd MLAlgorithms
41 |         docker build -t mlalgorithms .
42 |         docker run --rm -it mlalgorithms bash
43 |         python -m examples.linear_models
44 | ```
45 | ### Contributing
46 | 
47 | Your contributions are always welcome!  
48 | Feel free to improve existing code, documentation or implement new algorithm.  
49 | Please open an issue to propose your changes if they are big enough.  
50 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 


--------------------------------------------------------------------------------
/examples/gaussian_mixture.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from sklearn import datasets
 5 | from mla.kmeans import KMeans
 6 | from mla.gaussian_mixture import GaussianMixture
 7 | 
 8 | random.seed(1)
 9 | np.random.seed(6)
10 | 
11 | 
12 | def make_clusters(skew=True, *arg, **kwargs):
13 |     X, y = datasets.make_blobs(*arg, **kwargs)
14 |     if skew:
15 |         nrow = X.shape[1]
16 |         for i in np.unique(y):
17 |             X[y == i] = X[y == i].dot(np.random.random((nrow, nrow)) - 0.5)
18 |     return X, y
19 | 
20 | 
21 | def KMeans_and_GMM(K):
22 |     COLOR = "bgrcmyk"
23 | 
24 |     X, y = make_clusters(skew=True, n_samples=1500, centers=K)
25 |     _, axes = plt.subplots(1, 3)
26 | 
27 |     # Ground Truth
28 |     axes[0].scatter(X[:, 0], X[:, 1], c=[COLOR[int(assignment)] for assignment in y])
29 |     axes[0].set_title("Ground Truth")
30 | 
31 |     # KMeans
32 |     kmeans = KMeans(K=K, init="++")
33 |     kmeans.fit(X)
34 |     kmeans.predict()
35 |     axes[1].set_title("KMeans")
36 |     kmeans.plot(ax=axes[1], holdon=True)
37 | 
38 |     # Gaussian Mixture
39 |     gmm = GaussianMixture(K=K, init="kmeans")
40 |     gmm.fit(X)
41 |     axes[2].set_title("Gaussian Mixture")
42 |     gmm.plot(ax=axes[2])
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     KMeans_and_GMM(4)
47 | 


--------------------------------------------------------------------------------
/examples/gbm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from sklearn.datasets import make_classification
 4 | from sklearn.datasets import make_regression
 5 | from sklearn.metrics import roc_auc_score
 6 | 
 7 | try:
 8 |     from sklearn.model_selection import train_test_split
 9 | except ImportError:
10 |     from sklearn.cross_validation import train_test_split
11 | 
12 | from mla.ensemble.gbm import GradientBoostingClassifier, GradientBoostingRegressor
13 | from mla.metrics.metrics import mean_squared_error
14 | 
15 | logging.basicConfig(level=logging.DEBUG)
16 | 
17 | 
18 | def classification():
19 |     # Generate a random binary classification problem.
20 |     X, y = make_classification(
21 |         n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2, class_sep=1.0, n_redundant=0
22 |     )
23 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
24 | 
25 |     model = GradientBoostingClassifier(n_estimators=50, max_depth=4, max_features=8, learning_rate=0.1)
26 |     model.fit(X_train, y_train)
27 |     predictions = model.predict(X_test)
28 |     print(predictions)
29 |     print(predictions.min())
30 |     print(predictions.max())
31 |     print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions))
32 | 
33 | 
34 | def regression():
35 |     # Generate a random regression problem
36 |     X, y = make_regression(
37 |         n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5
38 |     )
39 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
40 | 
41 |     model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3)
42 |     model.fit(X_train, y_train)
43 |     predictions = model.predict(X_test)
44 |     print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten()))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     classification()
49 |     # regression()
50 | 


--------------------------------------------------------------------------------
/examples/kmeans.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.datasets import make_blobs
 3 | 
 4 | from mla.kmeans import KMeans
 5 | 
 6 | 
 7 | def kmeans_example(plot=False):
 8 |     X, y = make_blobs(centers=4, n_samples=500, n_features=2, shuffle=True, random_state=42)
 9 |     clusters = len(np.unique(y))
10 |     k = KMeans(K=clusters, max_iters=150, init="++")
11 |     k.fit(X)
12 |     k.predict()
13 | 
14 |     if plot:
15 |         k.plot()
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     kmeans_example(plot=True)
20 | 


--------------------------------------------------------------------------------
/examples/linear_models.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | try:
 4 |     from sklearn.model_selection import train_test_split
 5 | except ImportError:
 6 |     from sklearn.cross_validation import train_test_split
 7 | from sklearn.datasets import make_classification
 8 | from sklearn.datasets import make_regression
 9 | 
10 | from mla.linear_models import LinearRegression, LogisticRegression
11 | from mla.metrics.metrics import mean_squared_error, accuracy
12 | 
13 | # Change to DEBUG to see convergence
14 | logging.basicConfig(level=logging.ERROR)
15 | 
16 | 
17 | def regression():
18 |     # Generate a random regression problem
19 |     X, y = make_regression(
20 |         n_samples=10000, n_features=100, n_informative=75, n_targets=1, noise=0.05, random_state=1111, bias=0.5
21 |     )
22 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
23 | 
24 |     model = LinearRegression(lr=0.01, max_iters=2000, penalty="l2", C=0.03)
25 |     model.fit(X_train, y_train)
26 |     predictions = model.predict(X_test)
27 |     print("regression mse", mean_squared_error(y_test, predictions))
28 | 
29 | 
30 | def classification():
31 |     # Generate a random binary classification problem.
32 |     X, y = make_classification(
33 |         n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5
34 |     )
35 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
36 | 
37 |     model = LogisticRegression(lr=0.01, max_iters=500, penalty="l1", C=0.01)
38 |     model.fit(X_train, y_train)
39 |     predictions = model.predict(X_test)
40 |     print("classification accuracy", accuracy(y_test, predictions))
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     regression()
45 |     classification()
46 | 


--------------------------------------------------------------------------------
/examples/naive_bayes.py:
--------------------------------------------------------------------------------
 1 | from sklearn.datasets import make_classification
 2 | from sklearn.metrics import roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | from mla.naive_bayes import NaiveBayesClassifier
 6 | 
 7 | 
 8 | def classification():
 9 |     # Generate a random binary classification problem.
10 |     X, y = make_classification(
11 |         n_samples=1000, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
12 |     )
13 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
14 | 
15 |     model = NaiveBayesClassifier()
16 |     model.fit(X_train, y_train)
17 |     predictions = model.predict(X_test)[:, 1]
18 | 
19 |     print("classification accuracy", roc_auc_score(y_test, predictions))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     classification()
24 | 


--------------------------------------------------------------------------------
/examples/nearest_neighbors.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from sklearn.model_selection import train_test_split
 3 | except ImportError:
 4 |     from sklearn.cross_validation import train_test_split
 5 | from sklearn.datasets import make_classification
 6 | from sklearn.datasets import make_regression
 7 | from scipy.spatial import distance
 8 | 
 9 | from mla import knn
10 | from mla.metrics.metrics import mean_squared_error, accuracy
11 | 
12 | 
13 | def regression():
14 |     # Generate a random regression problem
15 |     X, y = make_regression(
16 |         n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5
17 |     )
18 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
19 | 
20 |     model = knn.KNNRegressor(k=5, distance_func=distance.euclidean)
21 |     model.fit(X_train, y_train)
22 |     predictions = model.predict(X_test)
23 |     print("regression mse", mean_squared_error(y_test, predictions))
24 | 
25 | 
26 | def classification():
27 |     X, y = make_classification(
28 |         n_samples=500,
29 |         n_features=5,
30 |         n_informative=5,
31 |         n_redundant=0,
32 |         n_repeated=0,
33 |         n_classes=3,
34 |         random_state=1111,
35 |         class_sep=1.5,
36 |     )
37 | 
38 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
39 | 
40 |     clf = knn.KNNClassifier(k=5, distance_func=distance.euclidean)
41 | 
42 |     clf.fit(X_train, y_train)
43 |     predictions = clf.predict(X_test)
44 |     print("classification accuracy", accuracy(y_test, predictions))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     regression()
49 |     classification()
50 | 


--------------------------------------------------------------------------------
/examples/nnet_convnet_mnist.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mla.datasets import load_mnist
 4 | from mla.metrics import accuracy
 5 | from mla.neuralnet import NeuralNet
 6 | from mla.neuralnet.layers import Activation, Convolution, MaxPooling, Flatten, Dropout, Parameters
 7 | from mla.neuralnet.layers import Dense
 8 | from mla.neuralnet.optimizers import Adadelta
 9 | from mla.utils import one_hot
10 | 
11 | logging.basicConfig(level=logging.DEBUG)
12 | 
13 | 
14 | # Load MNIST dataset
15 | X_train, X_test, y_train, y_test = load_mnist()
16 | 
17 | # Normalize data
18 | X_train /= 255.0
19 | X_test /= 255.0
20 | 
21 | y_train = one_hot(y_train.flatten())
22 | y_test = one_hot(y_test.flatten())
23 | print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
24 | 
25 | # Approx. 15-20 min. per epoch
26 | model = NeuralNet(
27 |     layers=[
28 |         Convolution(n_filters=32, filter_shape=(3, 3), padding=(1, 1), stride=(1, 1)),
29 |         Activation("relu"),
30 |         Convolution(n_filters=32, filter_shape=(3, 3), padding=(1, 1), stride=(1, 1)),
31 |         Activation("relu"),
32 |         MaxPooling(pool_shape=(2, 2), stride=(2, 2)),
33 |         Dropout(0.5),
34 |         Flatten(),
35 |         Dense(128),
36 |         Activation("relu"),
37 |         Dropout(0.5),
38 |         Dense(10),
39 |         Activation("softmax"),
40 |     ],
41 |     loss="categorical_crossentropy",
42 |     optimizer=Adadelta(),
43 |     metric="accuracy",
44 |     batch_size=128,
45 |     max_epochs=3,
46 | )
47 | 
48 | model.fit(X_train, y_train)
49 | predictions = model.predict(X_test)
50 | print(accuracy(y_test, predictions))
51 | 


--------------------------------------------------------------------------------
/examples/nnet_mlp.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | try:
 4 |     from sklearn.model_selection import train_test_split
 5 | except ImportError:
 6 |     from sklearn.cross_validation import train_test_split
 7 | from sklearn.datasets import make_classification
 8 | from sklearn.datasets import make_regression
 9 | from sklearn.metrics import roc_auc_score
10 | 
11 | from mla.metrics.metrics import mean_squared_error
12 | from mla.neuralnet import NeuralNet
13 | from mla.neuralnet.constraints import MaxNorm
14 | from mla.neuralnet.layers import Activation, Dense, Dropout
15 | from mla.neuralnet.optimizers import Adadelta, Adam
16 | from mla.neuralnet.parameters import Parameters
17 | from mla.neuralnet.regularizers import L2
18 | from mla.utils import one_hot
19 | 
20 | logging.basicConfig(level=logging.DEBUG)
21 | 
22 | 
23 | def classification():
24 |     # Generate a random binary classification problem.
25 |     X, y = make_classification(
26 |         n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5
27 |     )
28 |     y = one_hot(y)
29 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
30 | 
31 |     model = NeuralNet(
32 |         layers=[
33 |             Dense(256, Parameters(init="uniform", regularizers={"W": L2(0.05)})),
34 |             Activation("relu"),
35 |             Dropout(0.5),
36 |             Dense(128, Parameters(init="normal", constraints={"W": MaxNorm()})),
37 |             Activation("relu"),
38 |             Dense(2),
39 |             Activation("softmax"),
40 |         ],
41 |         loss="categorical_crossentropy",
42 |         optimizer=Adadelta(),
43 |         metric="accuracy",
44 |         batch_size=64,
45 |         max_epochs=25,
46 |     )
47 |     model.fit(X_train, y_train)
48 |     predictions = model.predict(X_test)
49 |     print("classification accuracy", roc_auc_score(y_test[:, 0], predictions[:, 0]))
50 | 
51 | 
52 | def regression():
53 |     # Generate a random regression problem
54 |     X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05)
55 |     y *= 0.01
56 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
57 | 
58 |     model = NeuralNet(
59 |         layers=[
60 |             Dense(64, Parameters(init="normal")),
61 |             Activation("linear"),
62 |             Dense(32, Parameters(init="normal")),
63 |             Activation("linear"),
64 |             Dense(1),
65 |         ],
66 |         loss="mse",
67 |         optimizer=Adam(),
68 |         metric="mse",
69 |         batch_size=256,
70 |         max_epochs=15,
71 |     )
72 |     model.fit(X_train, y_train)
73 |     predictions = model.predict(X_test)
74 |     print("regression mse", mean_squared_error(y_test, predictions.flatten()))
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     classification()
79 |     regression()
80 | 


--------------------------------------------------------------------------------
/examples/nnet_rnn_binary_add.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from itertools import combinations, islice
 3 | 
 4 | import numpy as np
 5 | 
 6 | try:
 7 |     from sklearn.model_selection import train_test_split
 8 | except ImportError:
 9 |     from sklearn.cross_validation import train_test_split
10 | 
11 | from mla.metrics import accuracy
12 | from mla.neuralnet import NeuralNet
13 | from mla.neuralnet.layers import Activation, TimeDistributedDense
14 | from mla.neuralnet.layers.recurrent import LSTM
15 | from mla.neuralnet.optimizers import Adam
16 | 
17 | logging.basicConfig(level=logging.DEBUG)
18 | 
19 | 
20 | def addition_dataset(dim=10, n_samples=10000, batch_size=64):
21 |     """Generate binary addition dataset.
22 |     http://devankuleindiren.com/Projects/rnn_arithmetic.php
23 |     """
24 |     binary_format = "{:0" + str(dim) + "b}"
25 | 
26 |     # Generate all possible number combinations
27 |     combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples))
28 | 
29 |     # Initialize empty arrays
30 |     X = np.zeros((len(combs), dim, 2), dtype=np.uint8)
31 |     y = np.zeros((len(combs), dim, 1), dtype=np.uint8)
32 | 
33 |     for i, (a, b) in enumerate(combs):
34 |         # Convert numbers to binary format
35 |         X[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a)]))
36 |         X[i, :, 1] = list(reversed([int(x) for x in binary_format.format(b)]))
37 | 
38 |         # Generate target variable (a+b)
39 |         y[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a + b)]))
40 | 
41 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111)
42 | 
43 |     # Round number of examples for batch processing
44 |     train_b = (X_train.shape[0] // batch_size) * batch_size
45 |     test_b = (X_test.shape[0] // batch_size) * batch_size
46 |     X_train = X_train[0:train_b]
47 |     y_train = y_train[0:train_b]
48 | 
49 |     X_test = X_test[0:test_b]
50 |     y_test = y_test[0:test_b]
51 |     return X_train, X_test, y_train, y_test
52 | 
53 | 
54 | def addition_problem(ReccurentLayer):
55 |     X_train, X_test, y_train, y_test = addition_dataset(8, 5000)
56 | 
57 |     print(X_train.shape, X_test.shape)
58 |     model = NeuralNet(
59 |         layers=[ReccurentLayer, TimeDistributedDense(1), Activation("sigmoid")],
60 |         loss="mse",
61 |         optimizer=Adam(),
62 |         metric="mse",
63 |         batch_size=64,
64 |         max_epochs=15,
65 |     )
66 |     model.fit(X_train, y_train)
67 |     predictions = np.round(model.predict(X_test))
68 |     predictions = np.packbits(predictions.astype(np.uint8))
69 |     y_test = np.packbits(y_test.astype(np.int))
70 |     print(accuracy(y_test, predictions))
71 | 
72 | 
73 | # RNN
74 | # addition_problem(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()})))
75 | # LSTM
76 | addition_problem(LSTM(16))
77 | 


--------------------------------------------------------------------------------
/examples/nnet_rnn_text_generation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import logging
 4 | import random
 5 | 
 6 | import numpy as np
 7 | import sys
 8 | 
 9 | from mla.datasets import load_nietzsche
10 | from mla.neuralnet import NeuralNet
11 | from mla.neuralnet.constraints import SmallNorm
12 | from mla.neuralnet.layers import Activation, Dense
13 | from mla.neuralnet.layers.recurrent import LSTM, RNN
14 | from mla.neuralnet.optimizers import RMSprop
15 | 
16 | logging.basicConfig(level=logging.DEBUG)
17 | 
18 | 
19 | # Example taken from: https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
20 | 
21 | 
22 | def sample(preds, temperature=1.0):
23 |     # helper function to sample an index from a probability array
24 |     preds = np.asarray(preds).astype("float64")
25 |     preds = np.log(preds) / temperature
26 |     exp_preds = np.exp(preds)
27 |     preds = exp_preds / np.sum(exp_preds)
28 |     probas = np.random.multinomial(1, preds, 1)
29 |     return np.argmax(probas)
30 | 
31 | 
32 | X, y, text, chars, char_indices, indices_char = load_nietzsche()
33 | # Round the number of sequences for batch processing
34 | items_count = X.shape[0] - (X.shape[0] % 64)
35 | maxlen = X.shape[1]
36 | X = X[0:items_count]
37 | y = y[0:items_count]
38 | 
39 | print(X.shape, y.shape)
40 | # LSTM OR RNN
41 | # rnn_layer = RNN(128, return_sequences=False)
42 | rnn_layer = LSTM(128, return_sequences=False)
43 | 
44 | model = NeuralNet(
45 |     layers=[
46 |         rnn_layer,
47 |         # Flatten(),
48 |         # TimeStepSlicer(-1),
49 |         Dense(X.shape[2]),
50 |         Activation("softmax"),
51 |     ],
52 |     loss="categorical_crossentropy",
53 |     optimizer=RMSprop(learning_rate=0.01),
54 |     metric="accuracy",
55 |     batch_size=64,
56 |     max_epochs=1,
57 |     shuffle=False,
58 | )
59 | 
60 | for _ in range(25):
61 |     model.fit(X, y)
62 |     start_index = random.randint(0, len(text) - maxlen - 1)
63 | 
64 |     generated = ""
65 |     sentence = text[start_index : start_index + maxlen]
66 |     generated += sentence
67 |     print('----- Generating with seed: "' + sentence + '"')
68 |     sys.stdout.write(generated)
69 |     for i in range(100):
70 |         x = np.zeros((64, maxlen, len(chars)))
71 |         for t, char in enumerate(sentence):
72 |             x[0, t, char_indices[char]] = 1.0
73 |         preds = model.predict(x)[0]
74 |         next_index = sample(preds, 0.5)
75 |         next_char = indices_char[next_index]
76 | 
77 |         generated += next_char
78 |         sentence = sentence[1:] + next_char
79 | 
80 |         sys.stdout.write(next_char)
81 |         sys.stdout.flush()
82 |     print()
83 | 


--------------------------------------------------------------------------------
/examples/pca.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from sklearn.model_selection import train_test_split
 3 | except ImportError:
 4 |     from sklearn.cross_validation import train_test_split
 5 | from sklearn.datasets import make_classification
 6 | 
 7 | from mla.linear_models import LogisticRegression
 8 | from mla.metrics import accuracy
 9 | from mla.pca import PCA
10 | 
11 | # logging.basicConfig(level=logging.DEBUG)
12 | 
13 | # Generate a random binary classification problem.
14 | X, y = make_classification(
15 |     n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5
16 | )
17 | 
18 | 
19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
20 | 
21 | for s in ["svd", "eigen"]:
22 |     p = PCA(15, solver=s)
23 | 
24 |     # fit PCA with training data, not entire dataset
25 |     p.fit(X_train)
26 |     X_train_reduced = p.transform(X_train)
27 |     X_test_reduced = p.transform(X_test)
28 | 
29 |     model = LogisticRegression(lr=0.001, max_iters=2500)
30 |     model.fit(X_train_reduced, y_train)
31 |     predictions = model.predict(X_test_reduced)
32 |     print("Classification accuracy for %s PCA: %s" % (s, accuracy(y_test, predictions)))
33 | 


--------------------------------------------------------------------------------
/examples/random_forest.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | from sklearn.datasets import make_classification
 5 | from sklearn.datasets import make_regression
 6 | from sklearn.metrics import roc_auc_score, accuracy_score
 7 | 
 8 | try:
 9 |     from sklearn.model_selection import train_test_split
10 | except ImportError:
11 |     from sklearn.cross_validation import train_test_split
12 | 
13 | from mla.ensemble.random_forest import RandomForestClassifier, RandomForestRegressor
14 | from mla.metrics.metrics import mean_squared_error
15 | 
16 | logging.basicConfig(level=logging.DEBUG)
17 | 
18 | 
19 | def classification():
20 |     # Generate a random binary classification problem.
21 |     X, y = make_classification(
22 |         n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
23 |     )
24 | 
25 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
26 | 
27 |     model = RandomForestClassifier(n_estimators=10, max_depth=4)
28 |     model.fit(X_train, y_train)
29 | 
30 |     predictions_prob = model.predict(X_test)[:, 1]
31 |     predictions = np.argmax(model.predict(X_test), axis=1)
32 |     #print(predictions.shape)
33 |     print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob))
34 |     print("classification, accuracy score: %s" % accuracy_score(y_test, predictions))
35 | 
36 | 
37 | def regression():
38 |     # Generate a random regression problem
39 |     X, y = make_regression(
40 |         n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5
41 |     )
42 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
43 | 
44 |     model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3)
45 |     model.fit(X_train, y_train)
46 |     predictions = model.predict(X_test)
47 |     print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten()))
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     classification()
52 |     # regression()
53 | 


--------------------------------------------------------------------------------
/examples/rbm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | 
 5 | from mla.rbm import RBM
 6 | 
 7 | logging.basicConfig(level=logging.DEBUG)
 8 | 
 9 | 
10 | def print_curve(rbm):
11 |     from matplotlib import pyplot as plt
12 | 
13 |     def moving_average(a, n=25):
14 |         ret = np.cumsum(a, dtype=float)
15 |         ret[n:] = ret[n:] - ret[:-n]
16 |         return ret[n - 1:] / n
17 | 
18 |     plt.plot(moving_average(rbm.errors))
19 |     plt.show()
20 | 
21 | 
22 | X = np.random.uniform(0, 1, (1500, 10))
23 | rbm = RBM(n_hidden=10, max_epochs=200, batch_size=10, learning_rate=0.1)
24 | rbm.fit(X)
25 | print_curve(rbm)
26 | 


--------------------------------------------------------------------------------
/examples/rl_deep_q_learning.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mla.neuralnet import NeuralNet
 4 | from mla.neuralnet.layers import Activation, Dense
 5 | from mla.neuralnet.optimizers import Adam
 6 | from mla.rl.dqn import DQN
 7 | 
 8 | logging.basicConfig(level=logging.CRITICAL)
 9 | 
10 | 
11 | def mlp_model(n_actions, batch_size=64):
12 |     model = NeuralNet(
13 |         layers=[Dense(32), Activation("relu"), Dense(n_actions)],
14 |         loss="mse",
15 |         optimizer=Adam(),
16 |         metric="mse",
17 |         batch_size=batch_size,
18 |         max_epochs=1,
19 |         verbose=False,
20 |     )
21 |     return model
22 | 
23 | 
24 | model = DQN(n_episodes=2500, batch_size=64)
25 | model.init_environment("CartPole-v0")
26 | model.init_model(mlp_model)
27 | 
28 | try:
29 |     # Train the model
30 |     # It can take from 300 to 2500 episodes to solve CartPole-v0 problem due to randomness of environment.
31 |     # You can stop training process using Ctrl+C signal
32 |     # Read more about this problem: https://gym.openai.com/envs/CartPole-v0
33 |     model.train(render=False)
34 | except KeyboardInterrupt:
35 |     pass
36 | # Render trained model
37 | model.play(episodes=100)
38 | 


--------------------------------------------------------------------------------
/examples/svm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | try:
 4 |     from sklearn.model_selection import train_test_split
 5 | except ImportError:
 6 |     from sklearn.cross_validation import train_test_split
 7 | from sklearn.datasets import make_classification
 8 | 
 9 | from mla.metrics.metrics import accuracy
10 | from mla.svm.kernerls import Linear, RBF
11 | from mla.svm.svm import SVM
12 | 
13 | logging.basicConfig(level=logging.DEBUG)
14 | 
15 | 
16 | def classification():
17 |     # Generate a random binary classification problem.
18 |     X, y = make_classification(
19 |         n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2, class_sep=1.75
20 |     )
21 |     # Convert y to {-1, 1}
22 |     y = (y * 2) - 1
23 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111)
24 | 
25 |     for kernel in [RBF(gamma=0.1), Linear()]:
26 |         model = SVM(max_iter=500, kernel=kernel, C=0.6)
27 |         model.fit(X_train, y_train)
28 |         predictions = model.predict(X_test)
29 |         print("Classification accuracy (%s): %s" % (kernel, accuracy(y_test, predictions)))
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     classification()
34 | 


--------------------------------------------------------------------------------
/examples/t-sne.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.datasets import make_classification
 5 | 
 6 | from mla.tsne import TSNE
 7 | 
 8 | logging.basicConfig(level=logging.DEBUG)
 9 | 
10 | X, y = make_classification(
11 |     n_samples=500, n_features=10, n_informative=5, n_redundant=0, random_state=1111, n_classes=2, class_sep=2.5
12 | )
13 | 
14 | p = TSNE(2, max_iter=500)
15 | X = p.fit_transform(X)
16 | 
17 | colors = ["red", "green"]
18 | for t in range(2):
19 |     t_mask = (y == t).astype(bool)
20 |     plt.scatter(X[t_mask, 0], X[t_mask, 1], color=colors[t])
21 | 
22 | plt.show()
23 | 


--------------------------------------------------------------------------------
/mla/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | # copyright: (c) 2016 by Artem Golubin
3 | # license: MIT, see LICENSE for more details.
4 | 


--------------------------------------------------------------------------------
/mla/base/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from .base import *
3 | 


--------------------------------------------------------------------------------
/mla/base/base.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | class BaseEstimator:
 6 |     y_required = True
 7 |     fit_required = True
 8 | 
 9 |     def _setup_input(self, X, y=None):
10 |         """Ensure inputs to an estimator are in the expected format.
11 | 
12 |         Ensures X and y are stored as numpy ndarrays by converting from an
13 |         array-like object if necessary. Enables estimators to define whether
14 |         they require a set of y target values or not with y_required, e.g.
15 |         kmeans clustering requires no target labels and is fit against only X.
16 | 
17 |         Parameters
18 |         ----------
19 |         X : array-like
20 |             Feature dataset.
21 |         y : array-like
22 |             Target values. By default is required, but if y_required = false
23 |             then may be omitted.
24 |         """
25 |         if not isinstance(X, np.ndarray):
26 |             X = np.array(X)
27 | 
28 |         if X.size == 0:
29 |             raise ValueError("Got an empty matrix.")
30 | 
31 |         if X.ndim == 1:
32 |             self.n_samples, self.n_features = 1, X.shape
33 |         else:
34 |             self.n_samples, self.n_features = X.shape[0], np.prod(X.shape[1:])
35 | 
36 |         self.X = X
37 | 
38 |         if self.y_required:
39 |             if y is None:
40 |                 raise ValueError("Missed required argument y")
41 | 
42 |             if not isinstance(y, np.ndarray):
43 |                 y = np.array(y)
44 | 
45 |             if y.size == 0:
46 |                 raise ValueError("The targets array must be no-empty.")
47 | 
48 |         self.y = y
49 | 
50 |     def fit(self, X, y=None):
51 |         self._setup_input(X, y)
52 | 
53 |     def predict(self, X=None):
54 |         if not isinstance(X, np.ndarray):
55 |             X = np.array(X)
56 | 
57 |         if self.X is not None or not self.fit_required:
58 |             return self._predict(X)
59 |         else:
60 |             raise ValueError("You must call `fit` before `predict`")
61 | 
62 |     def _predict(self, X=None):
63 |         raise NotImplementedError()
64 | 


--------------------------------------------------------------------------------
/mla/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from mla.datasets.base import *
3 | 


--------------------------------------------------------------------------------
/mla/datasets/base.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import os
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def get_filename(name):
 8 |     return os.path.join(os.path.dirname(__file__), name)
 9 | 
10 | 
11 | def load_mnist():
12 |     def load(dataset="training", digits=np.arange(10)):
13 |         import struct
14 |         from array import array as pyarray
15 |         from numpy import array, int8, uint8, zeros
16 | 
17 |         if dataset == "train":
18 |             fname_img = get_filename("data/mnist/train-images-idx3-ubyte")
19 |             fname_lbl = get_filename("data/mnist/train-labels-idx1-ubyte")
20 |         elif dataset == "test":
21 |             fname_img = get_filename("data/mnist/t10k-images-idx3-ubyte")
22 |             fname_lbl = get_filename("data/mnist/t10k-labels-idx1-ubyte")
23 |         else:
24 |             raise ValueError("Unexpected dataset name: %r" % dataset)
25 | 
26 |         flbl = open(fname_lbl, "rb")
27 |         magic_nr, size = struct.unpack(">II", flbl.read(8))
28 |         lbl = pyarray("b", flbl.read())
29 |         flbl.close()
30 | 
31 |         fimg = open(fname_img, "rb")
32 |         magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
33 |         img = pyarray("B", fimg.read())
34 |         fimg.close()
35 | 
36 |         ind = [k for k in range(size) if lbl[k] in digits]
37 |         N = len(ind)
38 | 
39 |         images = zeros((N, rows, cols), dtype=uint8)
40 |         labels = zeros((N, 1), dtype=int8)
41 |         for i in range(len(ind)):
42 |             images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
43 |             labels[i] = lbl[ind[i]]
44 | 
45 |         return images, labels
46 | 
47 |     X_train, y_train = load("train")
48 |     X_test, y_test = load("test")
49 | 
50 |     X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype(np.float32)
51 |     X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype(np.float32)
52 | 
53 |     return X_train, X_test, y_train, y_test
54 | 
55 | 
56 | def load_nietzsche():
57 |     text = open(get_filename("data/nietzsche.txt"), "rt").read().lower()
58 |     chars = set(list(text))
59 |     char_indices = {ch: i for i, ch in enumerate(chars)}
60 |     indices_char = {i: ch for i, ch in enumerate(chars)}
61 | 
62 |     maxlen = 40
63 |     step = 3
64 |     sentences = []
65 |     next_chars = []
66 |     for i in range(0, len(text) - maxlen, step):
67 |         sentences.append(text[i: i + maxlen])
68 |         next_chars.append(text[i + maxlen])
69 | 
70 |     X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
71 |     y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
72 |     for i, sentence in enumerate(sentences):
73 |         for t, char in enumerate(sentence):
74 |             X[i, t, char_indices[char]] = 1
75 |         y[i, char_indices[next_chars[i]]] = 1
76 |     return X, y, text, chars, char_indices, indices_char
77 | 


--------------------------------------------------------------------------------
/mla/datasets/data/mnist/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/mla/datasets/data/mnist/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/mla/datasets/data/mnist/train-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/train-images-idx3-ubyte


--------------------------------------------------------------------------------
/mla/datasets/data/mnist/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/datasets/data/mnist/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/mla/ensemble/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from .random_forest import RandomForestClassifier, RandomForestRegressor
3 | 


--------------------------------------------------------------------------------
/mla/ensemble/base.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | from scipy import stats
 4 | 
 5 | 
 6 | def f_entropy(p):
 7 |     # Convert values to probability
 8 |     p = np.bincount(p) / float(p.shape[0])
 9 | 
10 |     ep = stats.entropy(p)
11 |     if ep == -float("inf"):
12 |         return 0.0
13 |     return ep
14 | 
15 | 
16 | def information_gain(y, splits):
17 |     splits_entropy = sum([f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits])
18 |     return f_entropy(y) - splits_entropy
19 | 
20 | 
21 | def mse_criterion(y, splits):
22 |     y_mean = np.mean(y)
23 |     return -sum([np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0]) for split in splits])
24 | 
25 | 
26 | def xgb_criterion(y, left, right, loss):
27 |     left = loss.gain(left["actual"], left["y_pred"])
28 |     right = loss.gain(right["actual"], right["y_pred"])
29 |     initial = loss.gain(y["actual"], y["y_pred"])
30 |     gain = left + right - initial
31 |     return gain
32 | 
33 | 
34 | def get_split_mask(X, column, value):
35 |     left_mask = X[:, column] < value
36 |     right_mask = X[:, column] >= value
37 |     return left_mask, right_mask
38 | 
39 | 
40 | def split(X, y, value):
41 |     left_mask = X < value
42 |     right_mask = X >= value
43 |     return y[left_mask], y[right_mask]
44 | 
45 | 
46 | def split_dataset(X, target, column, value, return_X=True):
47 |     left_mask, right_mask = get_split_mask(X, column, value)
48 | 
49 |     left, right = {}, {}
50 |     for key in target.keys():
51 |         left[key] = target[key][left_mask]
52 |         right[key] = target[key][right_mask]
53 | 
54 |     if return_X:
55 |         left_X, right_X = X[left_mask], X[right_mask]
56 |         return left_X, right_X, left, right
57 |     else:
58 |         return left, right
59 | 


--------------------------------------------------------------------------------
/mla/ensemble/gbm.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import numpy as np
  3 | # logistic function
  4 | from scipy.special import expit
  5 | 
  6 | from mla.base import BaseEstimator
  7 | from mla.ensemble.base import mse_criterion
  8 | from mla.ensemble.tree import Tree
  9 | 
 10 | """
 11 | References:
 12 | https://arxiv.org/pdf/1603.02754v3.pdf
 13 | http://www.saedsayad.com/docs/xgboost.pdf
 14 | https://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf
 15 | http://stats.stackexchange.com/questions/202858/loss-function-approximation-with-taylor-expansion
 16 | """
 17 | 
 18 | 
 19 | class Loss:
 20 |     """Base class for loss functions."""
 21 | 
 22 |     def __init__(self, regularization=1.0):
 23 |         self.regularization = regularization
 24 | 
 25 |     def grad(self, actual, predicted):
 26 |         """First order gradient."""
 27 |         raise NotImplementedError()
 28 | 
 29 |     def hess(self, actual, predicted):
 30 |         """Second order gradient."""
 31 |         raise NotImplementedError()
 32 | 
 33 |     def approximate(self, actual, predicted):
 34 |         """Approximate leaf value."""
 35 |         return self.grad(actual, predicted).sum() / (self.hess(actual, predicted).sum() + self.regularization)
 36 | 
 37 |     def transform(self, pred):
 38 |         """Transform predictions values."""
 39 |         return pred
 40 | 
 41 |     def gain(self, actual, predicted):
 42 |         """Calculate gain for split search."""
 43 |         nominator = self.grad(actual, predicted).sum() ** 2
 44 |         denominator = self.hess(actual, predicted).sum() + self.regularization
 45 |         return 0.5 * (nominator / denominator)
 46 | 
 47 | 
 48 | class LeastSquaresLoss(Loss):
 49 |     """Least squares loss"""
 50 | 
 51 |     def grad(self, actual, predicted):
 52 |         return actual - predicted
 53 | 
 54 |     def hess(self, actual, predicted):
 55 |         return np.ones_like(actual)
 56 | 
 57 | 
 58 | class LogisticLoss(Loss):
 59 |     """Logistic loss."""
 60 | 
 61 |     def grad(self, actual, predicted):
 62 |         return actual * expit(-actual * predicted)
 63 | 
 64 |     def hess(self, actual, predicted):
 65 |         expits = expit(predicted)
 66 |         return expits * (1 - expits)
 67 | 
 68 |     def transform(self, output):
 69 |         # Apply logistic (sigmoid) function to the output
 70 |         return expit(output)
 71 | 
 72 | 
 73 | class GradientBoosting(BaseEstimator):
 74 |     """Gradient boosting trees with Taylor's expansion approximation (as in xgboost)."""
 75 | 
 76 |     def __init__(self, n_estimators, learning_rate=0.1, max_features=10, max_depth=2, min_samples_split=10):
 77 |         self.min_samples_split = min_samples_split
 78 |         self.learning_rate = learning_rate
 79 |         self.max_depth = max_depth
 80 |         self.max_features = max_features
 81 |         self.n_estimators = n_estimators
 82 |         self.trees = []
 83 |         self.loss = None
 84 | 
 85 |     def fit(self, X, y=None):
 86 |         self._setup_input(X, y)
 87 |         self.y_mean = np.mean(y)
 88 |         self._train()
 89 | 
 90 |     def _train(self):
 91 |         # Initialize model with zeros
 92 |         y_pred = np.zeros(self.n_samples, np.float32)
 93 |         # Or mean
 94 |         # y_pred = np.full(self.n_samples, self.y_mean)
 95 | 
 96 |         for n in range(self.n_estimators):
 97 |             residuals = self.loss.grad(self.y, y_pred)
 98 |             tree = Tree(regression=True, criterion=mse_criterion)
 99 |             # Pass multiple target values to the tree learner
100 |             targets = {
101 |                 # Residual values
102 |                 "y": residuals,
103 |                 # Actual target values
104 |                 "actual": self.y,
105 |                 # Predictions from previous step
106 |                 "y_pred": y_pred,
107 |             }
108 |             tree.train(
109 |                 self.X,
110 |                 targets,
111 |                 max_features=self.max_features,
112 |                 min_samples_split=self.min_samples_split,
113 |                 max_depth=self.max_depth,
114 |                 loss=self.loss,
115 |             )
116 |             predictions = tree.predict(self.X)
117 |             y_pred += self.learning_rate * predictions
118 |             self.trees.append(tree)
119 | 
120 |     def _predict(self, X=None):
121 |         y_pred = np.zeros(X.shape[0], np.float32)
122 | 
123 |         for i, tree in enumerate(self.trees):
124 |             y_pred += self.learning_rate * tree.predict(X)
125 |         return y_pred
126 | 
127 |     def predict(self, X=None):
128 |         return self.loss.transform(self._predict(X))
129 | 
130 | 
131 | class GradientBoostingRegressor(GradientBoosting):
132 |     def fit(self, X, y=None):
133 |         self.loss = LeastSquaresLoss()
134 |         super(GradientBoostingRegressor, self).fit(X, y)
135 | 
136 | 
137 | class GradientBoostingClassifier(GradientBoosting):
138 |     def fit(self, X, y=None):
139 |         # Convert labels from {0, 1} to {-1, 1}
140 |         y = (y * 2) - 1
141 |         self.loss = LogisticLoss()
142 |         super(GradientBoostingClassifier, self).fit(X, y)
143 | 


--------------------------------------------------------------------------------
/mla/ensemble/random_forest.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import numpy as np
  3 | 
  4 | from mla.base import BaseEstimator
  5 | from mla.ensemble.base import information_gain, mse_criterion
  6 | from mla.ensemble.tree import Tree
  7 | 
  8 | 
  9 | class RandomForest(BaseEstimator):
 10 |     def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion=None):
 11 |         """Base class for RandomForest.
 12 | 
 13 |         Parameters
 14 |         ----------
 15 |         n_estimators : int
 16 |             The number of decision tree.
 17 |         max_features : int
 18 |             The number of features to consider when looking for the best split.
 19 |         min_samples_split : int
 20 |             The minimum number of samples required to split an internal node.
 21 |         max_depth : int
 22 |             Maximum depth of the tree.
 23 |         criterion : str
 24 |             The function to measure the quality of a split.
 25 |         """
 26 |         self.max_depth = max_depth
 27 |         self.min_samples_split = min_samples_split
 28 |         self.max_features = max_features
 29 |         self.n_estimators = n_estimators
 30 |         self.trees = []
 31 | 
 32 |     def fit(self, X, y):
 33 |         self._setup_input(X, y)
 34 |         if self.max_features is None:
 35 |             self.max_features = int(np.sqrt(X.shape[1]))
 36 |         else:
 37 |             assert X.shape[1] > self.max_features
 38 |         self._train()
 39 | 
 40 |     def _train(self):
 41 |         for tree in self.trees:
 42 |             tree.train(
 43 |                 self.X,
 44 |                 self.y,
 45 |                 max_features=self.max_features,
 46 |                 min_samples_split=self.min_samples_split,
 47 |                 max_depth=self.max_depth
 48 |             )
 49 | 
 50 |     def _predict(self, X=None):
 51 |         raise NotImplementedError()
 52 | 
 53 | 
 54 | class RandomForestClassifier(RandomForest):
 55 |     def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="entropy"):
 56 |         super(RandomForestClassifier, self).__init__(
 57 |             n_estimators=n_estimators,
 58 |             max_features=max_features,
 59 |             min_samples_split=min_samples_split,
 60 |             max_depth=max_depth,
 61 |             criterion=criterion,
 62 |         )
 63 | 
 64 |         if criterion == "entropy":
 65 |             self.criterion = information_gain
 66 |         else:
 67 |             raise ValueError()
 68 | 
 69 |         # Initialize empty trees
 70 |         for _ in range(self.n_estimators):
 71 |             self.trees.append(Tree(criterion=self.criterion))
 72 | 
 73 |     def _predict(self, X=None):
 74 |         y_shape = np.unique(self.y).shape[0]
 75 |         predictions = np.zeros((X.shape[0], y_shape))
 76 | 
 77 |         for i in range(X.shape[0]):
 78 |             row_pred = np.zeros(y_shape)
 79 |             for tree in self.trees:
 80 |                 row_pred += tree.predict_row(X[i, :])
 81 | 
 82 |             row_pred /= self.n_estimators
 83 |             predictions[i, :] = row_pred
 84 |         return predictions
 85 | 
 86 | 
 87 | class RandomForestRegressor(RandomForest):
 88 |     def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="mse"):
 89 |         super(RandomForestRegressor, self).__init__(
 90 |             n_estimators=n_estimators,
 91 |             max_features=max_features,
 92 |             min_samples_split=min_samples_split,
 93 |             max_depth=max_depth,
 94 |         )
 95 | 
 96 |         if criterion == "mse":
 97 |             self.criterion = mse_criterion
 98 |         else:
 99 |             raise ValueError()
100 | 
101 |         # Initialize empty regression trees
102 |         for _ in range(self.n_estimators):
103 |             self.trees.append(Tree(regression=True, criterion=self.criterion))
104 | 
105 |     def _predict(self, X=None):
106 |         predictions = np.zeros((X.shape[0], self.n_estimators))
107 |         for i, tree in enumerate(self.trees):
108 |             predictions[:, i] = tree.predict(X)
109 |         return predictions.mean(axis=1)
110 | 


--------------------------------------------------------------------------------
/mla/ensemble/tree.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import random
  3 | 
  4 | import numpy as np
  5 | from scipy import stats
  6 | 
  7 | from mla.ensemble.base import split, split_dataset, xgb_criterion
  8 | 
  9 | random.seed(111)
 10 | 
 11 | 
 12 | class Tree(object):
 13 |     """Recursive implementation of decision tree."""
 14 | 
 15 |     def __init__(self, regression=False, criterion=None, n_classes=None):
 16 |         self.regression = regression
 17 |         self.impurity = None
 18 |         self.threshold = None
 19 |         self.column_index = None
 20 |         self.outcome = None
 21 |         self.criterion = criterion
 22 |         self.loss = None
 23 |         self.n_classes = n_classes  # Only for classification
 24 | 
 25 |         self.left_child = None
 26 |         self.right_child = None
 27 | 
 28 |     @property
 29 |     def is_terminal(self):
 30 |         return not bool(self.left_child and self.right_child)
 31 | 
 32 |     def _find_splits(self, X):
 33 |         """Find all possible split values."""
 34 |         split_values = set()
 35 | 
 36 |         # Get unique values in a sorted order
 37 |         x_unique = list(np.unique(X))
 38 |         for i in range(1, len(x_unique)):
 39 |             # Find a point between two values
 40 |             average = (x_unique[i - 1] + x_unique[i]) / 2.0
 41 |             split_values.add(average)
 42 | 
 43 |         return list(split_values)
 44 | 
 45 |     def _find_best_split(self, X, target, n_features):
 46 |         """Find best feature and value for a split. Greedy algorithm."""
 47 | 
 48 |         # Sample random subset of features
 49 |         subset = random.sample(list(range(0, X.shape[1])), n_features)
 50 |         max_gain, max_col, max_val = None, None, None
 51 | 
 52 |         for column in subset:
 53 |             split_values = self._find_splits(X[:, column])
 54 |             for value in split_values:
 55 |                 if self.loss is None:
 56 |                     # Random forest
 57 |                     splits = split(X[:, column], target["y"], value)
 58 |                     gain = self.criterion(target["y"], splits)
 59 |                 else:
 60 |                     # Gradient boosting
 61 |                     left, right = split_dataset(X, target, column, value, return_X=False)
 62 |                     gain = xgb_criterion(target, left, right, self.loss)
 63 | 
 64 |                 if (max_gain is None) or (gain > max_gain):
 65 |                     max_col, max_val, max_gain = column, value, gain
 66 |         return max_col, max_val, max_gain
 67 | 
 68 |     def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01):
 69 |         try:
 70 |             # Exit from recursion using assert syntax
 71 |             assert X.shape[0] > min_samples_split
 72 |             assert max_depth > 0
 73 | 
 74 |             if max_features is None:
 75 |                 max_features = X.shape[1]
 76 | 
 77 |             column, value, gain = self._find_best_split(X, target, max_features)
 78 |             assert gain is not None
 79 |             if self.regression:
 80 |                 assert gain != 0
 81 |             else:
 82 |                 assert gain > minimum_gain
 83 | 
 84 |             self.column_index = column
 85 |             self.threshold = value
 86 |             self.impurity = gain
 87 | 
 88 |             # Split dataset
 89 |             left_X, right_X, left_target, right_target = split_dataset(X, target, column, value)
 90 | 
 91 |             # Grow left and right child
 92 |             self.left_child = Tree(self.regression, self.criterion, self.n_classes)
 93 |             self.left_child._train(
 94 |                 left_X, left_target, max_features, min_samples_split, max_depth - 1, minimum_gain
 95 |             )
 96 | 
 97 |             self.right_child = Tree(self.regression, self.criterion, self.n_classes)
 98 |             self.right_child._train(
 99 |                 right_X, right_target, max_features, min_samples_split, max_depth - 1, minimum_gain
100 |             )
101 |         except AssertionError:
102 |             self._calculate_leaf_value(target)
103 | 
104 |     def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01, loss=None):
105 |         """Build a decision tree from training set.
106 | 
107 |         Parameters
108 |         ----------
109 | 
110 |         X : array-like
111 |             Feature dataset.
112 |         target : dictionary or array-like
113 |             Target values.
114 |         max_features : int or None
115 |             The number of features to consider when looking for the best split.
116 |         min_samples_split : int
117 |             The minimum number of samples required to split an internal node.
118 |         max_depth : int
119 |             Maximum depth of the tree.
120 |         minimum_gain : float, default 0.01
121 |             Minimum gain required for splitting.
122 |         loss : function, default None
123 |             Loss function for gradient boosting.
124 |         """
125 | 
126 |         if not isinstance(target, dict):
127 |             target = {"y": target}
128 | 
129 |         # Loss for gradient boosting
130 |         if loss is not None:
131 |             self.loss = loss
132 | 
133 |         if not self.regression:
134 |             self.n_classes = len(np.unique(target['y']))
135 | 
136 |         self._train(X, target, max_features=max_features, min_samples_split=min_samples_split,
137 |                     max_depth=max_depth, minimum_gain=minimum_gain)
138 | 
139 | 
140 |     def _calculate_leaf_value(self, targets):
141 |         """Find optimal value for leaf."""
142 |         if self.loss is not None:
143 |             # Gradient boosting
144 |             self.outcome = self.loss.approximate(targets["actual"], targets["y_pred"])
145 |         else:
146 |             # Random Forest
147 |             if self.regression:
148 |                 # Mean value for regression task
149 |                 self.outcome = np.mean(targets["y"])
150 |             else:
151 |                 # Probability for classification task
152 |                 self.outcome = np.bincount(targets["y"], minlength=self.n_classes) / targets["y"].shape[0]
153 | 
154 |     def predict_row(self, row):
155 |         """Predict single row."""
156 |         if not self.is_terminal:
157 |             if row[self.column_index] < self.threshold:
158 |                 return self.left_child.predict_row(row)
159 |             else:
160 |                 return self.right_child.predict_row(row)
161 |         return self.outcome
162 | 
163 |     def predict(self, X):
164 |         result = np.zeros(X.shape[0])
165 |         for i in range(X.shape[0]):
166 |             result[i] = self.predict_row(X[i, :])
167 |         return result
168 | 


--------------------------------------------------------------------------------
/mla/fm.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import autograd.numpy as np
 4 | from autograd import elementwise_grad
 5 | 
 6 | from mla.base import BaseEstimator
 7 | from mla.metrics import mean_squared_error, binary_crossentropy
 8 | 
 9 | np.random.seed(9999)
10 | 
11 | """
12 | References:
13 | Factorization Machines http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
14 | """
15 | 
16 | 
17 | class BaseFM(BaseEstimator):
18 |     def __init__(
19 |         self, n_components=10, max_iter=100, init_stdev=0.1, learning_rate=0.01, reg_v=0.1, reg_w=0.5, reg_w0=0.0
20 |     ):
21 |         """Simplified factorization machines implementation using SGD optimizer."""
22 |         self.reg_w0 = reg_w0
23 |         self.reg_w = reg_w
24 |         self.reg_v = reg_v
25 |         self.n_components = n_components
26 |         self.lr = learning_rate
27 |         self.init_stdev = init_stdev
28 |         self.max_iter = max_iter
29 |         self.loss = None
30 |         self.loss_grad = None
31 | 
32 |     def fit(self, X, y=None):
33 |         self._setup_input(X, y)
34 |         # bias
35 |         self.wo = 0.0
36 |         # Feature weights
37 |         self.w = np.zeros(self.n_features)
38 |         # Factor weights
39 |         self.v = np.random.normal(scale=self.init_stdev, size=(self.n_features, self.n_components))
40 |         self._train()
41 | 
42 |     def _train(self):
43 |         for epoch in range(self.max_iter):
44 |             y_pred = self._predict(self.X)
45 |             loss = self.loss_grad(self.y, y_pred)
46 |             w_grad = np.dot(loss, self.X) / float(self.n_samples)
47 |             self.wo -= self.lr * (loss.mean() + 2 * self.reg_w0 * self.wo)
48 |             self.w -= self.lr * w_grad + (2 * self.reg_w * self.w)
49 |             self._factor_step(loss)
50 | 
51 |     def _factor_step(self, loss):
52 |         for ix, x in enumerate(self.X):
53 |             for i in range(self.n_features):
54 |                 v_grad = loss[ix] * (x.dot(self.v).dot(x[i])[0] - self.v[i] * x[i] ** 2)
55 |                 self.v[i] -= self.lr * v_grad + (2 * self.reg_v * self.v[i])
56 | 
57 |     def _predict(self, X=None):
58 |         linear_output = np.dot(X, self.w)
59 |         factors_output = np.sum(np.dot(X, self.v) ** 2 - np.dot(X ** 2, self.v ** 2), axis=1) / 2.0
60 |         return self.wo + linear_output + factors_output
61 | 
62 | 
63 | class FMRegressor(BaseFM):
64 |     def fit(self, X, y=None):
65 |         super(FMRegressor, self).fit(X, y)
66 |         self.loss = mean_squared_error
67 |         self.loss_grad = elementwise_grad(mean_squared_error)
68 | 
69 | 
70 | class FMClassifier(BaseFM):
71 |     def fit(self, X, y=None):
72 |         super(FMClassifier, self).fit(X, y)
73 |         self.loss = binary_crossentropy
74 |         self.loss_grad = elementwise_grad(binary_crossentropy)
75 | 
76 |     def predict(self, X=None):
77 |         predictions = self._predict(X)
78 |         return np.sign(predictions)
79 | 


--------------------------------------------------------------------------------
/mla/gaussian_mixture.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import random
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | from scipy.stats import multivariate_normal
  8 | 
  9 | from mla.base import BaseEstimator
 10 | from mla.kmeans import KMeans
 11 | 
 12 | 
 13 | class GaussianMixture(BaseEstimator):
 14 |     """Gaussian Mixture Model: clusters with Gaussian prior.
 15 | 
 16 |     Finds clusters by repeatedly performing Expectation–Maximization (EM) algorithm
 17 |     on the dataset. GMM assumes the datasets is distributed in multivariate Gaussian,
 18 |     and tries to find the underlying structure of the Gaussian, i.e. mean and covariance.
 19 |     E-step computes the "responsibility" of the data to each cluster, given the mean
 20 |     and covariance; M-step computes the mean, covariance and weights (prior of each
 21 |     cluster), given the responsibilities. It iterates until the total likelihood
 22 |     changes less than the tolerance.
 23 | 
 24 | 
 25 |     Parameters
 26 |     ----------
 27 | 
 28 |     K : int
 29 |         The number of clusters into which the dataset is partitioned.
 30 |     max_iters: int
 31 |         The maximum iterations of assigning points to the perform EM.
 32 |         Short-circuited by the assignments converging on their own.
 33 |     init: str, default 'random'
 34 |         The name of the method used to initialize the first clustering.
 35 | 
 36 |         'random' - Randomly select values from the dataset as the K centroids.
 37 |         'kmeans' - Initialize the centroids, covariances, weights with KMeams's clusters.
 38 |     tolerance: float, default 1e-3
 39 |         The tolerance of difference of the two latest likelihood for convergence.
 40 |     """
 41 | 
 42 |     y_required = False
 43 | 
 44 |     def __init__(self, K=4, init="random", max_iters=500, tolerance=1e-3):
 45 |         self.K = K
 46 |         self.max_iters = max_iters
 47 |         self.init = init
 48 |         self.assignments = None
 49 |         self.likelihood = []
 50 |         self.tolerance = tolerance
 51 | 
 52 |     def fit(self, X, y=None):
 53 |         """Perform Expectation–Maximization (EM) until converged."""
 54 |         self._setup_input(X, y)
 55 |         self._initialize()
 56 |         for _ in range(self.max_iters):
 57 |             self._E_step()
 58 |             self._M_step()
 59 |             if self._is_converged():
 60 |                 break
 61 | 
 62 |     def _initialize(self):
 63 |         """Set the initial weights, means and covs (with full covariance matrix).
 64 | 
 65 |         weights: the prior of the clusters (what percentage of data does a cluster have)
 66 |         means: the mean points of the clusters
 67 |         covs: the covariance matrix of the clusters
 68 |         """
 69 |         self.weights = np.ones(self.K)
 70 |         if self.init == "random":
 71 |             self.means = [self.X[x] for x in random.sample(range(self.n_samples), self.K)]
 72 |             self.covs = [np.cov(self.X.T) for _ in range(self.K)]
 73 | 
 74 |         elif self.init == "kmeans":
 75 |             kmeans = KMeans(K=self.K, max_iters=self.max_iters // 3, init="++")
 76 |             kmeans.fit(self.X)
 77 |             self.assignments = kmeans.predict()
 78 |             self.means = kmeans.centroids
 79 |             self.covs = []
 80 |             for i in np.unique(self.assignments):
 81 |                 self.weights[int(i)] = (self.assignments == i).sum()
 82 |                 self.covs.append(np.cov(self.X[self.assignments == i].T))
 83 |         else:
 84 |             raise ValueError("Unknown type of init parameter")
 85 |         self.weights /= self.weights.sum()
 86 | 
 87 |     def _E_step(self):
 88 |         """Expectation(E-step) for Gaussian Mixture."""
 89 |         likelihoods = self._get_likelihood(self.X)
 90 |         self.likelihood.append(likelihoods.sum())
 91 |         weighted_likelihoods = self._get_weighted_likelihood(likelihoods)
 92 |         self.assignments = weighted_likelihoods.argmax(axis=1)
 93 |         weighted_likelihoods /= weighted_likelihoods.sum(axis=1)[:, np.newaxis]
 94 |         self.responsibilities = weighted_likelihoods
 95 | 
 96 |     def _M_step(self):
 97 |         """Maximization (M-step) for Gaussian Mixture."""
 98 |         weights = self.responsibilities.sum(axis=0)
 99 |         for assignment in range(self.K):
100 |             resp = self.responsibilities[:, assignment][:, np.newaxis]
101 |             self.means[assignment] = (resp * self.X).sum(axis=0) / resp.sum()
102 |             self.covs[assignment] = (self.X - self.means[assignment]).T.dot(
103 |                 (self.X - self.means[assignment]) * resp
104 |             ) / weights[assignment]
105 |         self.weights = weights / weights.sum()
106 | 
107 |     def _is_converged(self):
108 |         """Check if the difference of the latest two likelihood is less than the tolerance."""
109 |         if (len(self.likelihood) > 1) and (self.likelihood[-1] - self.likelihood[-2] <= self.tolerance):
110 |             return True
111 |         return False
112 | 
113 |     def _predict(self, X):
114 |         """Get the assignments for X with GMM clusters."""
115 |         if not X.shape:
116 |             return self.assignments
117 |         likelihoods = self._get_likelihood(X)
118 |         weighted_likelihoods = self._get_weighted_likelihood(likelihoods)
119 |         assignments = weighted_likelihoods.argmax(axis=1)
120 |         return assignments
121 | 
122 |     def _get_likelihood(self, data):
123 |         n_data = data.shape[0]
124 |         likelihoods = np.zeros([n_data, self.K])
125 |         for c in range(self.K):
126 |             likelihoods[:, c] = multivariate_normal.pdf(data, self.means[c], self.covs[c])
127 |         return likelihoods
128 | 
129 |     def _get_weighted_likelihood(self, likelihood):
130 |         return self.weights * likelihood
131 | 
132 |     def plot(self, data=None, ax=None, holdon=False):
133 |         """Plot contour for 2D data."""
134 |         if not (len(self.X.shape) == 2 and self.X.shape[1] == 2):
135 |             raise AttributeError("Only support for visualizing 2D data.")
136 | 
137 |         if ax is None:
138 |             _, ax = plt.subplots()
139 | 
140 |         if data is None:
141 |             data = self.X
142 |             assignments = self.assignments
143 |         else:
144 |             assignments = self.predict(data)
145 | 
146 |         COLOR = "bgrcmyk"
147 |         cmap = lambda assignment: COLOR[int(assignment) % len(COLOR)]
148 | 
149 |         # generate grid
150 |         delta = 0.025
151 |         margin = 0.2
152 |         xmax, ymax = self.X.max(axis=0) + margin
153 |         xmin, ymin = self.X.min(axis=0) - margin
154 |         axis_X, axis_Y = np.meshgrid(np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta))
155 | 
156 |         def grid_gaussian_pdf(mean, cov):
157 |             grid_array = np.array(list(zip(axis_X.flatten(), axis_Y.flatten())))
158 |             return multivariate_normal.pdf(grid_array, mean, cov).reshape(axis_X.shape)
159 | 
160 |         # plot scatters
161 |         if assignments is None:
162 |             c = None
163 |         else:
164 |             c = [cmap(assignment) for assignment in assignments]
165 |         ax.scatter(data[:, 0], data[:, 1], c=c)
166 | 
167 |         # plot contours
168 |         for assignment in range(self.K):
169 |             ax.contour(
170 |                 axis_X,
171 |                 axis_Y,
172 |                 grid_gaussian_pdf(self.means[assignment], self.covs[assignment]),
173 |                 colors=cmap(assignment),
174 |             )
175 | 
176 |         if not holdon:
177 |             plt.show()
178 | 


--------------------------------------------------------------------------------
/mla/kmeans.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import random
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import seaborn as sns
  8 | 
  9 | from mla.base import BaseEstimator
 10 | from mla.metrics.distance import euclidean_distance
 11 | 
 12 | random.seed(1111)
 13 | 
 14 | 
 15 | class KMeans(BaseEstimator):
 16 |     """Partition a dataset into K clusters.
 17 | 
 18 |     Finds clusters by repeatedly assigning each data point to the cluster with
 19 |     the nearest centroid and iterating until the assignments converge (meaning
 20 |     they don't change during an iteration) or the maximum number of iterations
 21 |     is reached.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 | 
 26 |     K : int
 27 |         The number of clusters into which the dataset is partitioned.
 28 |     max_iters: int
 29 |         The maximum iterations of assigning points to the nearest cluster.
 30 |         Short-circuited by the assignments converging on their own.
 31 |     init: str, default 'random'
 32 |         The name of the method used to initialize the first clustering.
 33 | 
 34 |         'random' - Randomly select values from the dataset as the K centroids.
 35 |         '++' - Select a random first centroid from the dataset, then select
 36 |                K - 1 more centroids by choosing values from the dataset with a
 37 |                probability distribution proportional to the squared distance
 38 |                from each point's closest existing cluster. Attempts to create
 39 |                larger distances between initial clusters to improve convergence
 40 |                rates and avoid degenerate cases.
 41 |     """
 42 | 
 43 |     y_required = False
 44 | 
 45 |     def __init__(self, K=5, max_iters=100, init="random"):
 46 |         self.K = K
 47 |         self.max_iters = max_iters
 48 |         self.clusters = [[] for _ in range(self.K)]
 49 |         self.centroids = []
 50 |         self.init = init
 51 | 
 52 |     def _initialize_centroids(self, init):
 53 |         """Set the initial centroids."""
 54 | 
 55 |         if init == "random":
 56 |             self.centroids = [self.X[x] for x in random.sample(range(self.n_samples), self.K)]
 57 |         elif init == "++":
 58 |             self.centroids = [random.choice(self.X)]
 59 |             while len(self.centroids) < self.K:
 60 |                 self.centroids.append(self._choose_next_center())
 61 |         else:
 62 |             raise ValueError("Unknown type of init parameter")
 63 | 
 64 |     def _predict(self, X=None):
 65 |         """Perform clustering on the dataset."""
 66 |         self._initialize_centroids(self.init)
 67 |         centroids = self.centroids
 68 | 
 69 |         # Optimize clusters
 70 |         for _ in range(self.max_iters):
 71 |             self._assign(centroids)
 72 |             centroids_old = centroids
 73 |             centroids = [self._get_centroid(cluster) for cluster in self.clusters]
 74 | 
 75 |             if self._is_converged(centroids_old, centroids):
 76 |                 break
 77 | 
 78 |         self.centroids = centroids
 79 | 
 80 |         return self._get_predictions()
 81 | 
 82 |     def _get_predictions(self):
 83 |         predictions = np.empty(self.n_samples)
 84 | 
 85 |         for i, cluster in enumerate(self.clusters):
 86 |             for index in cluster:
 87 |                 predictions[index] = i
 88 |         return predictions
 89 | 
 90 |     def _assign(self, centroids):
 91 | 
 92 |         for row in range(self.n_samples):
 93 |             for i, cluster in enumerate(self.clusters):
 94 |                 if row in cluster:
 95 |                     self.clusters[i].remove(row)
 96 |                     break
 97 | 
 98 |             closest = self._closest(row, centroids)
 99 |             self.clusters[closest].append(row)
100 | 
101 |     def _closest(self, fpoint, centroids):
102 |         """Find the closest centroid for a point."""
103 |         closest_index = None
104 |         closest_distance = None
105 |         for i, point in enumerate(centroids):
106 |             dist = euclidean_distance(self.X[fpoint], point)
107 |             if closest_index is None or dist < closest_distance:
108 |                 closest_index = i
109 |                 closest_distance = dist
110 |         return closest_index
111 | 
112 |     def _get_centroid(self, cluster):
113 |         """Get values by indices and take the mean."""
114 |         return [np.mean(np.take(self.X[:, i], cluster)) for i in range(self.n_features)]
115 | 
116 |     def _dist_from_centers(self):
117 |         """Calculate distance from centers."""
118 |         return np.array([min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X])
119 | 
120 |     def _choose_next_center(self):
121 |         distances = self._dist_from_centers()
122 |         squared_distances = distances ** 2
123 |         probs = squared_distances / squared_distances.sum()
124 |         ind = np.random.choice(self.X.shape[0], 1, p=probs)[0]
125 |         return self.X[ind]
126 | 
127 |     def _is_converged(self, centroids_old, centroids):
128 |         """Check if the distance between old and new centroids is zero."""
129 |         distance = 0
130 |         for i in range(self.K):
131 |             distance += euclidean_distance(centroids_old[i], centroids[i])
132 |         return distance == 0
133 | 
134 |     def plot(self, ax=None, holdon=False):
135 |         sns.set(style="white")
136 |         palette = sns.color_palette("hls", self.K + 1)
137 |         data = self.X
138 | 
139 |         if ax is None:
140 |             _, ax = plt.subplots()
141 | 
142 |         for i, index in enumerate(self.clusters):
143 |             point = np.array(data[index]).T
144 |             ax.scatter(*point, c=[palette[i], ])
145 | 
146 |         for point in self.centroids:
147 |             ax.scatter(*point, marker="x", linewidths=10)
148 | 
149 |         if not holdon:
150 |             plt.show()
151 | 


--------------------------------------------------------------------------------
/mla/knn.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | from collections import Counter
 4 | 
 5 | import numpy as np
 6 | from scipy.spatial.distance import euclidean
 7 | 
 8 | from mla.base import BaseEstimator
 9 | 
10 | 
11 | class KNNBase(BaseEstimator):
12 |     def __init__(self, k=5, distance_func=euclidean):
13 |         """Base class for Nearest neighbors classifier and regressor.
14 | 
15 |         Parameters
16 |         ----------
17 |         k : int, default 5
18 |             The number of neighbors to take into account. If 0, all the
19 |             training examples are used.
20 |         distance_func : function, default euclidean distance
21 |             A distance function taking two arguments. Any function from
22 |             scipy.spatial.distance will do.
23 |         """
24 | 
25 |         self.k = None if k == 0 else k  # l[:None] returns the whole list
26 |         self.distance_func = distance_func
27 | 
28 |     def aggregate(self, neighbors_targets):
29 |         raise NotImplementedError()
30 | 
31 |     def _predict(self, X=None):
32 |         predictions = [self._predict_x(x) for x in X]
33 | 
34 |         return np.array(predictions)
35 | 
36 |     def _predict_x(self, x):
37 |         """Predict the label of a single instance x."""
38 | 
39 |         # compute distances between x and all examples in the training set.
40 |         distances = (self.distance_func(x, example) for example in self.X)
41 | 
42 |         # Sort all examples by their distance to x and keep their target value.
43 |         neighbors = sorted(((dist, target) for (dist, target) in zip(distances, self.y)), key=lambda x: x[0])
44 | 
45 |         # Get targets of the k-nn and aggregate them (most common one or
46 |         # average).
47 |         neighbors_targets = [target for (_, target) in neighbors[: self.k]]
48 | 
49 |         return self.aggregate(neighbors_targets)
50 | 
51 | 
52 | class KNNClassifier(KNNBase):
53 |     """Nearest neighbors classifier.
54 | 
55 |     Note: if there is a tie for the most common label among the neighbors, then
56 |     the predicted label is arbitrary."""
57 | 
58 |     def aggregate(self, neighbors_targets):
59 |         """Return the most common target label."""
60 | 
61 |         most_common_label = Counter(neighbors_targets).most_common(1)[0][0]
62 |         return most_common_label
63 | 
64 | 
65 | class KNNRegressor(KNNBase):
66 |     """Nearest neighbors regressor."""
67 | 
68 |     def aggregate(self, neighbors_targets):
69 |         """Return the mean of all targets."""
70 | 
71 |         return np.mean(neighbors_targets)
72 | 


--------------------------------------------------------------------------------
/mla/linear_models.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import logging
  4 | 
  5 | import autograd.numpy as np
  6 | from autograd import grad
  7 | 
  8 | from mla.base import BaseEstimator
  9 | from mla.metrics.metrics import mean_squared_error, binary_crossentropy
 10 | 
 11 | np.random.seed(1000)
 12 | 
 13 | 
 14 | class BasicRegression(BaseEstimator):
 15 |     def __init__(self, lr=0.001, penalty="None", C=0.01, tolerance=0.0001, max_iters=1000):
 16 |         """Basic class for implementing continuous regression estimators which
 17 |         are trained with gradient descent optimization on their particular loss
 18 |         function.
 19 | 
 20 |         Parameters
 21 |         ----------
 22 |         lr : float, default 0.001
 23 |             Learning rate.
 24 |         penalty : str, {'l1', 'l2', None'}, default None
 25 |             Regularization function name.
 26 |         C : float, default 0.01
 27 |             The regularization coefficient.
 28 |         tolerance : float, default 0.0001
 29 |             If the gradient descent updates are smaller than `tolerance`, then
 30 |             stop optimization process.
 31 |         max_iters : int, default 10000
 32 |             The maximum number of iterations.
 33 |         """
 34 |         self.C = C
 35 |         self.penalty = penalty
 36 |         self.tolerance = tolerance
 37 |         self.lr = lr
 38 |         self.max_iters = max_iters
 39 |         self.errors = []
 40 |         self.theta = []
 41 |         self.n_samples, self.n_features = None, None
 42 |         self.cost_func = None
 43 | 
 44 |     def _loss(self, w):
 45 |         raise NotImplementedError()
 46 | 
 47 |     def init_cost(self):
 48 |         raise NotImplementedError()
 49 | 
 50 |     def _add_penalty(self, loss, w):
 51 |         """Apply regularization to the loss."""
 52 |         if self.penalty == "l1":
 53 |             loss += self.C * np.abs(w[1:]).sum()
 54 |         elif self.penalty == "l2":
 55 |             loss += (0.5 * self.C) * (w[1:] ** 2).sum()
 56 |         return loss
 57 | 
 58 |     def _cost(self, X, y, theta):
 59 |         prediction = X.dot(theta)
 60 |         error = self.cost_func(y, prediction)
 61 |         return error
 62 | 
 63 |     def fit(self, X, y=None):
 64 |         self._setup_input(X, y)
 65 |         self.init_cost()
 66 |         self.n_samples, self.n_features = X.shape
 67 | 
 68 |         # Initialize weights + bias term
 69 |         self.theta = np.random.normal(size=(self.n_features + 1), scale=0.5)
 70 | 
 71 |         # Add an intercept column
 72 |         self.X = self._add_intercept(self.X)
 73 | 
 74 |         self._train()
 75 | 
 76 |     @staticmethod
 77 |     def _add_intercept(X):
 78 |         b = np.ones([X.shape[0], 1])
 79 |         return np.concatenate([b, X], axis=1)
 80 | 
 81 |     def _train(self):
 82 |         self.theta, self.errors = self._gradient_descent()
 83 |         logging.info(" Theta: %s" % self.theta.flatten())
 84 | 
 85 |     def _predict(self, X=None):
 86 |         X = self._add_intercept(X)
 87 |         return X.dot(self.theta)
 88 | 
 89 |     def _gradient_descent(self):
 90 |         theta = self.theta
 91 |         errors = [self._cost(self.X, self.y, theta)]
 92 |         # Get derivative of the loss function
 93 |         cost_d = grad(self._loss)
 94 |         for i in range(1, self.max_iters + 1):
 95 |             # Calculate gradient and update theta
 96 |             delta = cost_d(theta)
 97 |             theta -= self.lr * delta
 98 | 
 99 |             errors.append(self._cost(self.X, self.y, theta))
100 |             logging.info("Iteration %s, error %s" % (i, errors[i]))
101 | 
102 |             error_diff = np.linalg.norm(errors[i - 1] - errors[i])
103 |             if error_diff < self.tolerance:
104 |                 logging.info("Convergence has reached.")
105 |                 break
106 |         return theta, errors
107 | 
108 | 
109 | class LinearRegression(BasicRegression):
110 |     """Linear regression with gradient descent optimizer."""
111 | 
112 |     def _loss(self, w):
113 |         loss = self.cost_func(self.y, np.dot(self.X, w))
114 |         return self._add_penalty(loss, w)
115 | 
116 |     def init_cost(self):
117 |         self.cost_func = mean_squared_error
118 | 
119 | 
120 | class LogisticRegression(BasicRegression):
121 |     """Binary logistic regression with gradient descent optimizer."""
122 | 
123 |     def init_cost(self):
124 |         self.cost_func = binary_crossentropy
125 | 
126 |     def _loss(self, w):
127 |         loss = self.cost_func(self.y, self.sigmoid(np.dot(self.X, w)))
128 |         return self._add_penalty(loss, w)
129 | 
130 |     @staticmethod
131 |     def sigmoid(x):
132 |         return 0.5 * (np.tanh(0.5 * x) + 1)
133 | 
134 |     def _predict(self, X=None):
135 |         X = self._add_intercept(X)
136 |         return self.sigmoid(X.dot(self.theta))
137 | 


--------------------------------------------------------------------------------
/mla/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from .metrics import *
3 | 


--------------------------------------------------------------------------------
/mla/metrics/base.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | def check_data(a, b):
 6 |     if not isinstance(a, np.ndarray):
 7 |         a = np.array(a)
 8 | 
 9 |     if not isinstance(b, np.ndarray):
10 |         b = np.array(b)
11 | 
12 |     if type(a) != type(b):
13 |         raise ValueError("Type mismatch: %s and %s" % (type(a), type(b)))
14 | 
15 |     if a.size != b.size:
16 |         raise ValueError("Arrays must be equal in length.")
17 |     return a, b
18 | 
19 | 
20 | def validate_input(function):
21 |     def wrapper(a, b):
22 |         a, b = check_data(a, b)
23 |         return function(a, b)
24 | 
25 |     return wrapper
26 | 


--------------------------------------------------------------------------------
/mla/metrics/distance.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import math
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def euclidean_distance(a, b):
 8 |     if isinstance(a, list) and isinstance(b, list):
 9 |         a = np.array(a)
10 |         b = np.array(b)
11 | 
12 |     return math.sqrt(sum((a - b) ** 2))
13 | 
14 | 
15 | def l2_distance(X):
16 |     sum_X = np.sum(X * X, axis=1)
17 |     return (-2 * np.dot(X, X.T) + sum_X).T + sum_X
18 | 


--------------------------------------------------------------------------------
/mla/metrics/metrics.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import autograd.numpy as np
 3 | 
 4 | EPS = 1e-15
 5 | 
 6 | 
 7 | def unhot(function):
 8 |     """Convert one-hot representation into one column."""
 9 | 
10 |     def wrapper(actual, predicted):
11 |         if len(actual.shape) > 1 and actual.shape[1] > 1:
12 |             actual = actual.argmax(axis=1)
13 |         if len(predicted.shape) > 1 and predicted.shape[1] > 1:
14 |             predicted = predicted.argmax(axis=1)
15 |         return function(actual, predicted)
16 | 
17 |     return wrapper
18 | 
19 | 
20 | def absolute_error(actual, predicted):
21 |     return np.abs(actual - predicted)
22 | 
23 | 
24 | @unhot
25 | def classification_error(actual, predicted):
26 |     return (actual != predicted).sum() / float(actual.shape[0])
27 | 
28 | 
29 | @unhot
30 | def accuracy(actual, predicted):
31 |     return 1.0 - classification_error(actual, predicted)
32 | 
33 | 
34 | def mean_absolute_error(actual, predicted):
35 |     return np.mean(absolute_error(actual, predicted))
36 | 
37 | 
38 | def squared_error(actual, predicted):
39 |     return (actual - predicted) ** 2
40 | 
41 | 
42 | def squared_log_error(actual, predicted):
43 |     return (np.log(np.array(actual) + 1) - np.log(np.array(predicted) + 1)) ** 2
44 | 
45 | 
46 | def mean_squared_log_error(actual, predicted):
47 |     return np.mean(squared_log_error(actual, predicted))
48 | 
49 | 
50 | def mean_squared_error(actual, predicted):
51 |     return np.mean(squared_error(actual, predicted))
52 | 
53 | 
54 | def root_mean_squared_error(actual, predicted):
55 |     return np.sqrt(mean_squared_error(actual, predicted))
56 | 
57 | 
58 | def root_mean_squared_log_error(actual, predicted):
59 |     return np.sqrt(mean_squared_log_error(actual, predicted))
60 | 
61 | 
62 | def logloss(actual, predicted):
63 |     predicted = np.clip(predicted, EPS, 1 - EPS)
64 |     loss = -np.sum(actual * np.log(predicted))
65 |     return loss / float(actual.shape[0])
66 | 
67 | 
68 | def hinge(actual, predicted):
69 |     return np.mean(np.max(1.0 - actual * predicted, 0.0))
70 | 
71 | 
72 | def binary_crossentropy(actual, predicted):
73 |     predicted = np.clip(predicted, EPS, 1 - EPS)
74 |     return np.mean(-np.sum(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted)))
75 | 
76 | 
77 | # aliases
78 | mse = mean_squared_error
79 | rmse = root_mean_squared_error
80 | mae = mean_absolute_error
81 | 
82 | 
83 | def get_metric(name):
84 |     """Return metric function by name"""
85 |     try:
86 |         return globals()[name]
87 |     except Exception:
88 |         raise ValueError("Invalid metric function.")
89 | 


--------------------------------------------------------------------------------
/mla/metrics/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | 


--------------------------------------------------------------------------------
/mla/metrics/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from numpy.testing import assert_almost_equal
 6 | 
 7 | from mla.metrics.base import check_data, validate_input
 8 | from mla.metrics.metrics import get_metric
 9 | 
10 | 
11 | def test_data_validation():
12 |     with pytest.raises(ValueError):
13 |         check_data([], 1)
14 | 
15 |     with pytest.raises(ValueError):
16 |         check_data([1, 2, 3], [3, 2])
17 | 
18 |     a, b = check_data([1, 2, 3], [3, 2, 1])
19 | 
20 |     assert np.all(a == np.array([1, 2, 3]))
21 |     assert np.all(b == np.array([3, 2, 1]))
22 | 
23 | 
24 | def metric(name):
25 |     return validate_input(get_metric(name))
26 | 
27 | 
28 | def test_classification_error():
29 |     f = metric("classification_error")
30 |     assert f([1, 2, 3, 4], [1, 2, 3, 4]) == 0
31 |     assert f([1, 2, 3, 4], [1, 2, 3, 5]) == 0.25
32 |     assert f([1, 1, 1, 0, 0, 0], [1, 1, 1, 1, 0, 0]) == (1.0 / 6)
33 | 
34 | 
35 | def test_absolute_error():
36 |     f = metric("absolute_error")
37 |     assert f([3], [5]) == [2]
38 |     assert f([-1], [-4]) == [3]
39 | 
40 | 
41 | def test_mean_absolute_error():
42 |     f = metric("mean_absolute_error")
43 |     assert f([1, 2, 3], [1, 2, 3]) == 0
44 |     assert f([1, 2, 3], [3, 2, 1]) == 4 / 3
45 | 
46 | 
47 | def test_squared_error():
48 |     f = metric("squared_error")
49 |     assert f([1], [1]) == [0]
50 |     assert f([3], [1]) == [4]
51 | 
52 | 
53 | def test_squared_log_error():
54 |     f = metric("squared_log_error")
55 |     assert f([1], [1]) == [0]
56 |     assert f([3], [1]) == [np.log(2) ** 2]
57 |     assert f([np.exp(2) - 1], [np.exp(1) - 1]) == [1.0]
58 | 
59 | 
60 | def test_mean_squared_log_error():
61 |     f = metric("mean_squared_log_error")
62 |     assert f([1, 2, 3], [1, 2, 3]) == 0
63 |     assert f([1, 2, 3, np.exp(1) - 1], [1, 2, 3, np.exp(2) - 1]) == 0.25
64 | 
65 | 
66 | def test_root_mean_squared_log_error():
67 |     f = metric("root_mean_squared_log_error")
68 |     assert f([1, 2, 3], [1, 2, 3]) == 0
69 |     assert f([1, 2, 3, np.exp(1) - 1], [1, 2, 3, np.exp(2) - 1]) == 0.5
70 | 
71 | 
72 | def test_mean_squared_error():
73 |     f = metric("mean_squared_error")
74 |     assert f([1, 2, 3], [1, 2, 3]) == 0
75 |     assert f(range(1, 5), [1, 2, 3, 6]) == 1
76 | 
77 | 
78 | def test_root_mean_squared_error():
79 |     f = metric("root_mean_squared_error")
80 |     assert f([1, 2, 3], [1, 2, 3]) == 0
81 |     assert f(range(1, 5), [1, 2, 3, 5]) == 0.5
82 | 
83 | 
84 | def test_multiclass_logloss():
85 |     f = metric("logloss")
86 |     assert_almost_equal(f([1], [1]), 0)
87 |     assert_almost_equal(f([1, 1], [1, 1]), 0)
88 |     assert_almost_equal(f([1], [0.5]), -np.log(0.5))
89 | 


--------------------------------------------------------------------------------
/mla/naive_bayes.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | from mla.base import BaseEstimator
 6 | from mla.neuralnet.activations import softmax
 7 | 
 8 | 
 9 | class NaiveBayesClassifier(BaseEstimator):
10 |     """Gaussian Naive Bayes."""
11 | 
12 |     # Binary problem.
13 |     n_classes = 2
14 | 
15 |     def fit(self, X, y=None):
16 |         self._setup_input(X, y)
17 |         # Check target labels
18 |         assert list(np.unique(y)) == [0, 1]
19 | 
20 |         # Mean and variance for each class and feature combination
21 |         self._mean = np.zeros((self.n_classes, self.n_features), dtype=np.float64)
22 |         self._var = np.zeros((self.n_classes, self.n_features), dtype=np.float64)
23 | 
24 |         self._priors = np.zeros(self.n_classes, dtype=np.float64)
25 | 
26 |         for c in range(self.n_classes):
27 |             # Filter features by class
28 |             X_c = X[y == c]
29 | 
30 |             # Calculate mean, variance, prior for each class
31 |             self._mean[c, :] = X_c.mean(axis=0)
32 |             self._var[c, :] = X_c.var(axis=0)
33 |             self._priors[c] = X_c.shape[0] / float(X.shape[0])
34 | 
35 |     def _predict(self, X=None):
36 |         # Apply _predict_proba for each row
37 |         predictions = np.apply_along_axis(self._predict_row, 1, X)
38 | 
39 |         # Normalize probabilities so that each row will sum up to 1.0
40 |         return softmax(predictions)
41 | 
42 |     def _predict_row(self, x):
43 |         """Predict log likelihood for given row."""
44 |         output = []
45 |         for y in range(self.n_classes):
46 |             prior = np.log(self._priors[y])
47 |             posterior = np.log(self._pdf(y, x)).sum()
48 |             prediction = prior + posterior
49 | 
50 |             output.append(prediction)
51 |         return output
52 | 
53 |     def _pdf(self, n_class, x):
54 |         """Calculate Gaussian PDF for each feature."""
55 | 
56 |         mean = self._mean[n_class]
57 |         var = self._var[n_class]
58 | 
59 |         numerator = np.exp(-(x - mean) ** 2 / (2 * var))
60 |         denominator = np.sqrt(2 * np.pi * var)
61 |         return numerator / denominator
62 | 


--------------------------------------------------------------------------------
/mla/neuralnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .nnet import NeuralNet
2 | 


--------------------------------------------------------------------------------
/mla/neuralnet/activations.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | """
 4 | References:
 5 | https://en.wikipedia.org/wiki/Activation_function
 6 | """
 7 | 
 8 | 
 9 | def sigmoid(z):
10 |     return 1.0 / (1.0 + np.exp(-z))
11 | 
12 | 
13 | def softmax(z):
14 |     # Avoid numerical overflow by removing max
15 |     e = np.exp(z - np.amax(z, axis=1, keepdims=True))
16 |     return e / np.sum(e, axis=1, keepdims=True)
17 | 
18 | 
19 | def linear(z):
20 |     return z
21 | 
22 | 
23 | def softplus(z):
24 |     """Smooth relu."""
25 |     # Avoid numerical overflow, see:
26 |     # https://docs.scipy.org/doc/numpy/reference/generated/numpy.logaddexp.html
27 |     return np.logaddexp(0.0, z)
28 | 
29 | 
30 | def softsign(z):
31 |     return z / (1 + np.abs(z))
32 | 
33 | 
34 | def tanh(z):
35 |     return np.tanh(z)
36 | 
37 | 
38 | def relu(z):
39 |     return np.maximum(0, z)
40 | 
41 | 
42 | def leakyrelu(z, a=0.01):
43 |     return np.maximum(z * a, z)
44 | 
45 | 
46 | def get_activation(name):
47 |     """Return activation function by name"""
48 |     try:
49 |         return globals()[name]
50 |     except Exception:
51 |         raise ValueError("Invalid activation function.")
52 | 


--------------------------------------------------------------------------------
/mla/neuralnet/constraints.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | 
 4 | EPSILON = 10e-8
 5 | 
 6 | 
 7 | class Constraint(object):
 8 |     def clip(self, p):
 9 |         return p
10 | 
11 | 
12 | class MaxNorm(object):
13 |     def __init__(self, m=2, axis=0):
14 |         self.axis = axis
15 |         self.m = m
16 | 
17 |     def clip(self, p):
18 |         norms = np.sqrt(np.sum(p ** 2, axis=self.axis))
19 |         desired = np.clip(norms, 0, self.m)
20 |         p = p * (desired / (EPSILON + norms))
21 |         return p
22 | 
23 | 
24 | class NonNeg(object):
25 |     def clip(self, p):
26 |         p[p < 0.0] = 0.0
27 |         return p
28 | 
29 | 
30 | class SmallNorm(object):
31 |     def clip(self, p):
32 |         return np.clip(p, -5, 5)
33 | 
34 | 
35 | class UnitNorm(Constraint):
36 |     def __init__(self, axis=0):
37 |         self.axis = axis
38 | 
39 |     def clip(self, p):
40 |         return p / (EPSILON + np.sqrt(np.sum(p ** 2, axis=self.axis)))
41 | 


--------------------------------------------------------------------------------
/mla/neuralnet/initializations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | """
 4 | References:
 5 | http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
 6 | 
 7 | """
 8 | 
 9 | 
10 | def normal(shape, scale=0.5):
11 |     return np.random.normal(size=shape, scale=scale)
12 | 
13 | 
14 | def uniform(shape, scale=0.5):
15 |     return np.random.uniform(size=shape, low=-scale, high=scale)
16 | 
17 | 
18 | def zero(shape, **kwargs):
19 |     return np.zeros(shape)
20 | 
21 | 
22 | def one(shape, **kwargs):
23 |     return np.ones(shape)
24 | 
25 | 
26 | def orthogonal(shape, scale=0.5):
27 |     flat_shape = (shape[0], np.prod(shape[1:]))
28 |     array = np.random.normal(size=flat_shape)
29 |     u, _, v = np.linalg.svd(array, full_matrices=False)
30 |     array = u if u.shape == flat_shape else v
31 |     return np.reshape(array * scale, shape)
32 | 
33 | 
34 | def _glorot_fan(shape):
35 |     assert len(shape) >= 2
36 | 
37 |     if len(shape) == 4:
38 |         receptive_field_size = np.prod(shape[2:])
39 |         fan_in = shape[1] * receptive_field_size
40 |         fan_out = shape[0] * receptive_field_size
41 |     else:
42 |         fan_in, fan_out = shape[:2]
43 |     return float(fan_in), float(fan_out)
44 | 
45 | 
46 | def glorot_normal(shape, **kwargs):
47 |     fan_in, fan_out = _glorot_fan(shape)
48 |     s = np.sqrt(2.0 / (fan_in + fan_out))
49 |     return normal(shape, s)
50 | 
51 | 
52 | def glorot_uniform(shape, **kwargs):
53 |     fan_in, fan_out = _glorot_fan(shape)
54 |     s = np.sqrt(6.0 / (fan_in + fan_out))
55 |     return uniform(shape, s)
56 | 
57 | 
58 | def he_normal(shape, **kwargs):
59 |     fan_in, fan_out = _glorot_fan(shape)
60 |     s = np.sqrt(2.0 / fan_in)
61 |     return normal(shape, s)
62 | 
63 | 
64 | def he_uniform(shape, **kwargs):
65 |     fan_in, fan_out = _glorot_fan(shape)
66 |     s = np.sqrt(6.0 / fan_in)
67 |     return uniform(shape, s)
68 | 
69 | 
70 | def get_initializer(name):
71 |     """Returns initialization function by the name."""
72 |     try:
73 |         return globals()[name]
74 |     except Exception:
75 |         raise ValueError("Invalid initialization function.")
76 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from .basic import *
3 | from .convnet import *
4 | from .normalization import *
5 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/basic.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import autograd.numpy as np
  3 | from autograd import elementwise_grad
  4 | 
  5 | from mla.neuralnet.activations import get_activation
  6 | from mla.neuralnet.parameters import Parameters
  7 | 
  8 | np.random.seed(9999)
  9 | 
 10 | 
 11 | class Layer(object):
 12 |     def setup(self, X_shape):
 13 |         """Allocates initial weights."""
 14 |         pass
 15 | 
 16 |     def forward_pass(self, x):
 17 |         raise NotImplementedError()
 18 | 
 19 |     def backward_pass(self, delta):
 20 |         raise NotImplementedError()
 21 | 
 22 |     def shape(self, x_shape):
 23 |         """Returns shape of the current layer."""
 24 |         raise NotImplementedError()
 25 | 
 26 | 
 27 | class ParamMixin(object):
 28 |     @property
 29 |     def parameters(self):
 30 |         return self._params
 31 | 
 32 | 
 33 | class PhaseMixin(object):
 34 |     _train = False
 35 | 
 36 |     @property
 37 |     def is_training(self):
 38 |         return self._train
 39 | 
 40 |     @is_training.setter
 41 |     def is_training(self, is_train=True):
 42 |         self._train = is_train
 43 | 
 44 |     @property
 45 |     def is_testing(self):
 46 |         return not self._train
 47 | 
 48 |     @is_testing.setter
 49 |     def is_testing(self, is_test=True):
 50 |         self._train = not is_test
 51 | 
 52 | 
 53 | class Dense(Layer, ParamMixin):
 54 |     def __init__(self, output_dim, parameters=None):
 55 |         """A fully connected layer.
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         output_dim : int
 60 |         """
 61 |         self._params = parameters
 62 |         self.output_dim = output_dim
 63 |         self.last_input = None
 64 | 
 65 |         if parameters is None:
 66 |             self._params = Parameters()
 67 | 
 68 |     def setup(self, x_shape):
 69 |         self._params.setup_weights((x_shape[1], self.output_dim))
 70 | 
 71 |     def forward_pass(self, X):
 72 |         self.last_input = X
 73 |         return self.weight(X)
 74 | 
 75 |     def weight(self, X):
 76 |         W = np.dot(X, self._params["W"])
 77 |         return W + self._params["b"]
 78 | 
 79 |     def backward_pass(self, delta):
 80 |         dW = np.dot(self.last_input.T, delta)
 81 |         db = np.sum(delta, axis=0)
 82 | 
 83 |         # Update gradient values
 84 |         self._params.update_grad("W", dW)
 85 |         self._params.update_grad("b", db)
 86 |         return np.dot(delta, self._params["W"].T)
 87 | 
 88 |     def shape(self, x_shape):
 89 |         return x_shape[0], self.output_dim
 90 | 
 91 | 
 92 | class Activation(Layer):
 93 |     def __init__(self, name):
 94 |         self.last_input = None
 95 |         self.activation = get_activation(name)
 96 |         # Derivative of activation function
 97 |         self.activation_d = elementwise_grad(self.activation)
 98 | 
 99 |     def forward_pass(self, X):
100 |         self.last_input = X
101 |         return self.activation(X)
102 | 
103 |     def backward_pass(self, delta):
104 |         return self.activation_d(self.last_input) * delta
105 | 
106 |     def shape(self, x_shape):
107 |         return x_shape
108 | 
109 | 
110 | class Dropout(Layer, PhaseMixin):
111 |     """Randomly set a fraction of `p` inputs to 0 at each training update."""
112 | 
113 |     def __init__(self, p=0.1):
114 |         self.p = p
115 |         self._mask = None
116 | 
117 |     def forward_pass(self, X):
118 |         assert self.p > 0
119 |         if self.is_training:
120 |             self._mask = np.random.uniform(size=X.shape) > self.p
121 |             y = X * self._mask
122 |         else:
123 |             y = X * (1.0 - self.p)
124 | 
125 |         return y
126 | 
127 |     def backward_pass(self, delta):
128 |         return delta * self._mask
129 | 
130 |     def shape(self, x_shape):
131 |         return x_shape
132 | 
133 | 
134 | class TimeStepSlicer(Layer):
135 |     """Take a specific time step from 3D tensor."""
136 | 
137 |     def __init__(self, step=-1):
138 |         self.step = step
139 | 
140 |     def forward_pass(self, x):
141 |         return x[:, self.step, :]
142 | 
143 |     def backward_pass(self, delta):
144 |         return np.repeat(delta[:, np.newaxis, :], 2, 1)
145 | 
146 |     def shape(self, x_shape):
147 |         return x_shape[0], x_shape[2]
148 | 
149 | 
150 | class TimeDistributedDense(Layer):
151 |     """Apply regular Dense layer to every timestep."""
152 | 
153 |     def __init__(self, output_dim):
154 |         self.output_dim = output_dim
155 |         self.n_timesteps = None
156 |         self.dense = None
157 |         self.input_dim = None
158 | 
159 |     def setup(self, X_shape):
160 |         self.dense = Dense(self.output_dim)
161 |         self.dense.setup((X_shape[0], X_shape[2]))
162 |         self.input_dim = X_shape[2]
163 | 
164 |     def forward_pass(self, X):
165 |         n_timesteps = X.shape[1]
166 |         X = X.reshape(-1, X.shape[-1])
167 |         y = self.dense.forward_pass(X)
168 |         y = y.reshape((-1, n_timesteps, self.output_dim))
169 |         return y
170 | 
171 |     def backward_pass(self, delta):
172 |         n_timesteps = delta.shape[1]
173 |         X = delta.reshape(-1, delta.shape[-1])
174 |         y = self.dense.backward_pass(X)
175 |         y = y.reshape((-1, n_timesteps, self.input_dim))
176 |         return y
177 | 
178 |     @property
179 |     def parameters(self):
180 |         return self.dense._params
181 | 
182 |     def shape(self, x_shape):
183 |         return x_shape[0], x_shape[1], self.output_dim
184 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/convnet.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import autograd.numpy as np
  3 | 
  4 | from mla.neuralnet.layers import Layer, ParamMixin
  5 | from mla.neuralnet.parameters import Parameters
  6 | 
  7 | 
  8 | class Convolution(Layer, ParamMixin):
  9 |     def __init__(self, n_filters=8, filter_shape=(3, 3), padding=(0, 0), stride=(1, 1), parameters=None):
 10 |         """A 2D convolutional layer.
 11 |         Input shape: (n_images, n_channels, height, width)
 12 | 
 13 |         Parameters
 14 |         ----------
 15 |         n_filters : int, default 8
 16 |             The number of filters (kernels).
 17 |         filter_shape : tuple(int, int), default (3, 3)
 18 |             The shape of the filters. (height, width)
 19 |         parameters : Parameters instance, default None
 20 |         stride : tuple(int, int), default (1, 1)
 21 |             The step of the convolution. (height, width).
 22 |         padding : tuple(int, int), default (0, 0)
 23 |             The number of pixel to add to each side of the input. (height, weight)
 24 | 
 25 |         """
 26 |         self.padding = padding
 27 |         self._params = parameters
 28 |         self.stride = stride
 29 |         self.filter_shape = filter_shape
 30 |         self.n_filters = n_filters
 31 |         if self._params is None:
 32 |             self._params = Parameters()
 33 | 
 34 |     def setup(self, X_shape):
 35 |         n_channels, self.height, self.width = X_shape[1:]
 36 | 
 37 |         W_shape = (self.n_filters, n_channels) + self.filter_shape
 38 |         b_shape = self.n_filters
 39 |         self._params.setup_weights(W_shape, b_shape)
 40 | 
 41 |     def forward_pass(self, X):
 42 |         n_images, n_channels, height, width = self.shape(X.shape)
 43 |         self.last_input = X
 44 |         self.col = image_to_column(X, self.filter_shape, self.stride, self.padding)
 45 |         self.col_W = self._params["W"].reshape(self.n_filters, -1).T
 46 | 
 47 |         out = np.dot(self.col, self.col_W) + self._params["b"]
 48 |         out = out.reshape(n_images, height, width, -1).transpose(0, 3, 1, 2)
 49 |         return out
 50 | 
 51 |     def backward_pass(self, delta):
 52 |         delta = delta.transpose(0, 2, 3, 1).reshape(-1, self.n_filters)
 53 | 
 54 |         d_W = np.dot(self.col.T, delta).transpose(1, 0).reshape(self._params["W"].shape)
 55 |         d_b = np.sum(delta, axis=0)
 56 |         self._params.update_grad("b", d_b)
 57 |         self._params.update_grad("W", d_W)
 58 | 
 59 |         d_c = np.dot(delta, self.col_W.T)
 60 |         return column_to_image(d_c, self.last_input.shape, self.filter_shape, self.stride, self.padding)
 61 | 
 62 |     def shape(self, x_shape):
 63 |         height, width = convoltuion_shape(self.height, self.width, self.filter_shape, self.stride, self.padding)
 64 |         return x_shape[0], self.n_filters, height, width
 65 | 
 66 | 
 67 | class MaxPooling(Layer):
 68 |     def __init__(self, pool_shape=(2, 2), stride=(1, 1), padding=(0, 0)):
 69 |         """Max pooling layer.
 70 |         Input shape: (n_images, n_channels, height, width)
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         pool_shape : tuple(int, int), default (2, 2)
 75 |         stride : tuple(int, int), default (1,1)
 76 |         padding : tuple(int, int), default (0,0)
 77 |         """
 78 |         self.pool_shape = pool_shape
 79 |         self.stride = stride
 80 |         self.padding = padding
 81 | 
 82 |     def forward_pass(self, X):
 83 |         self.last_input = X
 84 | 
 85 |         out_height, out_width = pooling_shape(self.pool_shape, X.shape, self.stride)
 86 |         n_images, n_channels, _, _ = X.shape
 87 | 
 88 |         col = image_to_column(X, self.pool_shape, self.stride, self.padding)
 89 |         col = col.reshape(-1, self.pool_shape[0] * self.pool_shape[1])
 90 | 
 91 |         arg_max = np.argmax(col, axis=1)
 92 |         out = np.max(col, axis=1)
 93 |         self.arg_max = arg_max
 94 |         return out.reshape(n_images, out_height, out_width, n_channels).transpose(0, 3, 1, 2)
 95 | 
 96 |     def backward_pass(self, delta):
 97 |         delta = delta.transpose(0, 2, 3, 1)
 98 | 
 99 |         pool_size = self.pool_shape[0] * self.pool_shape[1]
100 |         y_max = np.zeros((delta.size, pool_size))
101 |         y_max[np.arange(self.arg_max.size), self.arg_max.flatten()] = delta.flatten()
102 |         y_max = y_max.reshape(delta.shape + (pool_size,))
103 | 
104 |         dcol = y_max.reshape(y_max.shape[0] * y_max.shape[1] * y_max.shape[2], -1)
105 |         return column_to_image(dcol, self.last_input.shape, self.pool_shape, self.stride, self.padding)
106 | 
107 |     def shape(self, x_shape):
108 |         h, w = convoltuion_shape(x_shape[2], x_shape[3], self.pool_shape, self.stride, self.padding)
109 |         return x_shape[0], x_shape[1], h, w
110 | 
111 | 
112 | class Flatten(Layer):
113 |     """Flattens multidimensional input into 2D matrix."""
114 | 
115 |     def forward_pass(self, X):
116 |         self.last_input_shape = X.shape
117 |         return X.reshape((X.shape[0], -1))
118 | 
119 |     def backward_pass(self, delta):
120 |         return delta.reshape(self.last_input_shape)
121 | 
122 |     def shape(self, x_shape):
123 |         return x_shape[0], np.prod(x_shape[1:])
124 | 
125 | 
126 | def image_to_column(images, filter_shape, stride, padding):
127 |     """Rearrange image blocks into columns.
128 | 
129 |     Parameters
130 |     ----------
131 | 
132 |     filter_shape : tuple(height, width)
133 |     images : np.array, shape (n_images, n_channels, height, width)
134 |     padding: tuple(height, width)
135 |     stride : tuple (height, width)
136 | 
137 |     """
138 |     n_images, n_channels, height, width = images.shape
139 |     f_height, f_width = filter_shape
140 |     out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding)
141 |     images = np.pad(images, ((0, 0), (0, 0), padding, padding), mode="constant")
142 | 
143 |     col = np.zeros((n_images, n_channels, f_height, f_width, out_height, out_width))
144 |     for y in range(f_height):
145 |         y_bound = y + stride[0] * out_height
146 |         for x in range(f_width):
147 |             x_bound = x + stride[1] * out_width
148 |             col[:, :, y, x, :, :] = images[:, :, y: y_bound: stride[0], x: x_bound: stride[1]]
149 | 
150 |     col = col.transpose(0, 4, 5, 1, 2, 3).reshape(n_images * out_height * out_width, -1)
151 |     return col
152 | 
153 | 
154 | def column_to_image(columns, images_shape, filter_shape, stride, padding):
155 |     """Rearrange columns into image blocks.
156 | 
157 |     Parameters
158 |     ----------
159 |     columns
160 |     images_shape : tuple(n_images, n_channels, height, width)
161 |     filter_shape : tuple(height, _width)
162 |     stride : tuple(height, width)
163 |     padding : tuple(height, width)
164 |     """
165 |     n_images, n_channels, height, width = images_shape
166 |     f_height, f_width = filter_shape
167 | 
168 |     out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding)
169 |     columns = columns.reshape(n_images, out_height, out_width, n_channels, f_height, f_width).transpose(
170 |         0, 3, 4, 5, 1, 2
171 |     )
172 | 
173 |     img_h = height + 2 * padding[0] + stride[0] - 1
174 |     img_w = width + 2 * padding[1] + stride[1] - 1
175 |     img = np.zeros((n_images, n_channels, img_h, img_w))
176 |     for y in range(f_height):
177 |         y_bound = y + stride[0] * out_height
178 |         for x in range(f_width):
179 |             x_bound = x + stride[1] * out_width
180 |             img[:, :, y: y_bound: stride[0], x: x_bound: stride[1]] += columns[:, :, y, x, :, :]
181 | 
182 |     return img[:, :, padding[0]: height + padding[0], padding[1]: width + padding[1]]
183 | 
184 | 
185 | def convoltuion_shape(img_height, img_width, filter_shape, stride, padding):
186 |     """Calculate output shape for convolution layer."""
187 |     height = (img_height + 2 * padding[0] - filter_shape[0]) / float(stride[0]) + 1
188 |     width = (img_width + 2 * padding[1] - filter_shape[1]) / float(stride[1]) + 1
189 | 
190 |     assert height % 1 == 0
191 |     assert width % 1 == 0
192 | 
193 |     return int(height), int(width)
194 | 
195 | 
196 | def pooling_shape(pool_shape, image_shape, stride):
197 |     """Calculate output shape for pooling layer."""
198 |     n_images, n_channels, height, width = image_shape
199 | 
200 |     height = (height - pool_shape[0]) / float(stride[0]) + 1
201 |     width = (width - pool_shape[1]) / float(stride[1]) + 1
202 | 
203 |     assert height % 1 == 0
204 |     assert width % 1 == 0
205 | 
206 |     return int(height), int(width)
207 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/normalization.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import numpy as np
  3 | 
  4 | from mla.neuralnet.layers import Layer, PhaseMixin, ParamMixin
  5 | from mla.neuralnet.parameters import Parameters
  6 | 
  7 | """
  8 | References:
  9 | https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html
 10 | """
 11 | 
 12 | 
 13 | class BatchNormalization(Layer, ParamMixin, PhaseMixin):
 14 |     def __init__(self, momentum=0.9, eps=1e-5, parameters=None):
 15 |         super().__init__()
 16 |         self._params = parameters
 17 |         if self._params is None:
 18 |             self._params = Parameters()
 19 |         self.momentum = momentum
 20 |         self.eps = eps
 21 |         self.ema_mean = None
 22 |         self.ema_var = None
 23 | 
 24 |     def setup(self, x_shape):
 25 |         self._params.setup_weights((1, x_shape[1]))
 26 | 
 27 |     def _forward_pass(self, X):
 28 |         gamma = self._params["W"]
 29 |         beta = self._params["b"]
 30 | 
 31 |         if self.is_testing:
 32 |             mu = self.ema_mean
 33 |             xmu = X - mu
 34 |             var = self.ema_var
 35 |             sqrtvar = np.sqrt(var + self.eps)
 36 |             ivar = 1.0 / sqrtvar
 37 |             xhat = xmu * ivar
 38 |             gammax = gamma * xhat
 39 |             return gammax + beta
 40 | 
 41 |         N, D = X.shape
 42 | 
 43 |         # step1: calculate mean
 44 |         mu = 1.0 / N * np.sum(X, axis=0)
 45 | 
 46 |         # step2: subtract mean vector of every trainings example
 47 |         xmu = X - mu
 48 | 
 49 |         # step3: following the lower branch - calculation denominator
 50 |         sq = xmu ** 2
 51 | 
 52 |         # step4: calculate variance
 53 |         var = 1.0 / N * np.sum(sq, axis=0)
 54 | 
 55 |         # step5: add eps for numerical stability, then sqrt
 56 |         sqrtvar = np.sqrt(var + self.eps)
 57 | 
 58 |         # step6: invert sqrtwar
 59 |         ivar = 1.0 / sqrtvar
 60 | 
 61 |         # step7: execute normalization
 62 |         xhat = xmu * ivar
 63 | 
 64 |         # step8: Nor the two transformation steps
 65 |         gammax = gamma * xhat
 66 | 
 67 |         # step9
 68 |         out = gammax + beta
 69 | 
 70 |         # store running averages of mean and variance during training for use during testing
 71 |         if self.ema_mean is None or self.ema_var is None:
 72 |             self.ema_mean = mu
 73 |             self.ema_var = var
 74 |         else:
 75 |             self.ema_mean = self.momentum * self.ema_mean + (1 - self.momentum) * mu
 76 |             self.ema_var = self.momentum * self.ema_var + (1 - self.momentum) * var
 77 |         # store intermediate
 78 |         self.cache = (xhat, gamma, xmu, ivar, sqrtvar, var)
 79 | 
 80 |         return out
 81 | 
 82 |     def forward_pass(self, X):
 83 |         if len(X.shape) == 2:
 84 |             # input is a regular layer
 85 |             return self._forward_pass(X)
 86 |         elif len(X.shape) == 4:
 87 |             # input is a convolution layer
 88 |             N, C, H, W = X.shape
 89 |             x_flat = X.transpose(0, 2, 3, 1).reshape(-1, C)
 90 |             out_flat = self._forward_pass(x_flat)
 91 |             return out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
 92 |         else:
 93 |             raise NotImplementedError("Unknown model with dimensions = {}".format(len(X.shape)))
 94 | 
 95 |     def _backward_pass(self, delta):
 96 |         # unfold the variables stored in cache
 97 |         xhat, gamma, xmu, ivar, sqrtvar, var = self.cache
 98 | 
 99 |         # get the dimensions of the input/output
100 |         N, D = delta.shape
101 | 
102 |         # step9
103 |         dbeta = np.sum(delta, axis=0)
104 |         dgammax = delta  # not necessary, but more understandable
105 | 
106 |         # step8
107 |         dgamma = np.sum(dgammax * xhat, axis=0)
108 |         dxhat = dgammax * gamma
109 | 
110 |         # step7
111 |         divar = np.sum(dxhat * xmu, axis=0)
112 |         dxmu1 = dxhat * ivar
113 | 
114 |         # step6
115 |         dsqrtvar = -1.0 / (sqrtvar ** 2) * divar
116 | 
117 |         # step5
118 |         dvar = 0.5 * 1.0 / np.sqrt(var + self.eps) * dsqrtvar
119 | 
120 |         # step4
121 |         dsq = 1.0 / N * np.ones((N, D)) * dvar
122 | 
123 |         # step3
124 |         dxmu2 = 2 * xmu * dsq
125 | 
126 |         # step2
127 |         dx1 = dxmu1 + dxmu2
128 |         dmu = -1 * np.sum(dxmu1 + dxmu2, axis=0)
129 | 
130 |         # step1
131 |         dx2 = 1.0 / N * np.ones((N, D)) * dmu
132 | 
133 |         # step0
134 |         dx = dx1 + dx2
135 | 
136 |         # Update gradient values
137 |         self._params.update_grad("W", dgamma)
138 |         self._params.update_grad("b", dbeta)
139 | 
140 |         return dx
141 | 
142 |     def backward_pass(self, X):
143 |         if len(X.shape) == 2:
144 |             # input is a regular layer
145 |             return self._backward_pass(X)
146 |         elif len(X.shape) == 4:
147 |             # input is a convolution layer
148 |             N, C, H, W = X.shape
149 |             x_flat = X.transpose(0, 2, 3, 1).reshape(-1, C)
150 |             out_flat = self._backward_pass(x_flat)
151 |             return out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
152 |         else:
153 |             raise NotImplementedError("Unknown model shape: {}".format(X.shape))
154 | 
155 |     def shape(self, x_shape):
156 |         return x_shape
157 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/recurrent/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from .lstm import *
3 | from .rnn import *
4 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/recurrent/lstm.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import autograd.numpy as np
  3 | from autograd import elementwise_grad
  4 | 
  5 | from mla.neuralnet.activations import sigmoid
  6 | from mla.neuralnet.initializations import get_initializer
  7 | from mla.neuralnet.layers import Layer, get_activation, ParamMixin
  8 | from mla.neuralnet.parameters import Parameters
  9 | 
 10 | """
 11 | References:
 12 | Understanding LSTM Networks http://colah.github.io/posts/2015-08-Understanding-LSTMs/
 13 | A Critical Review of Recurrent Neural Networks for Sequence Learning http://arxiv.org/pdf/1506.00019v4.pdf
 14 | """
 15 | 
 16 | 
 17 | class LSTM(Layer, ParamMixin):
 18 |     def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True):
 19 |         self.return_sequences = return_sequences
 20 |         self.hidden_dim = hidden_dim
 21 |         self.inner_init = get_initializer(inner_init)
 22 |         self.activation = get_activation(activation)
 23 |         self.activation_d = elementwise_grad(self.activation)
 24 |         self.sigmoid_d = elementwise_grad(sigmoid)
 25 | 
 26 |         if parameters is None:
 27 |             self._params = Parameters()
 28 |         else:
 29 |             self._params = parameters
 30 | 
 31 |         self.last_input = None
 32 |         self.states = None
 33 |         self.outputs = None
 34 |         self.gates = None
 35 |         self.hprev = None
 36 |         self.input_dim = None
 37 |         self.W = None
 38 |         self.U = None
 39 | 
 40 |     def setup(self, x_shape):
 41 |         """
 42 |         Naming convention:
 43 |         i : input gate
 44 |         f : forget gate
 45 |         c : cell
 46 |         o : output gate
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         x_shape : np.array(batch size, time steps, input shape)
 51 |         """
 52 |         self.input_dim = x_shape[2]
 53 |         # Input -> Hidden
 54 |         W_params = ["W_i", "W_f", "W_o", "W_c"]
 55 |         # Hidden -> Hidden
 56 |         U_params = ["U_i", "U_f", "U_o", "U_c"]
 57 |         # Bias terms
 58 |         b_params = ["b_i", "b_f", "b_o", "b_c"]
 59 | 
 60 |         # Initialize params
 61 |         for param in W_params:
 62 |             self._params[param] = self._params.init((self.input_dim, self.hidden_dim))
 63 | 
 64 |         for param in U_params:
 65 |             self._params[param] = self.inner_init((self.hidden_dim, self.hidden_dim))
 66 | 
 67 |         for param in b_params:
 68 |             self._params[param] = np.full((self.hidden_dim,), self._params.initial_bias)
 69 | 
 70 |         # Combine weights for simplicity
 71 |         self.W = [self._params[param] for param in W_params]
 72 |         self.U = [self._params[param] for param in U_params]
 73 | 
 74 |         # Init gradient arrays for all weights
 75 |         self._params.init_grad()
 76 | 
 77 |         self.hprev = np.zeros((x_shape[0], self.hidden_dim))
 78 |         self.oprev = np.zeros((x_shape[0], self.hidden_dim))
 79 | 
 80 |     def forward_pass(self, X):
 81 |         n_samples, n_timesteps, input_shape = X.shape
 82 |         p = self._params
 83 |         self.last_input = X
 84 | 
 85 |         self.states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim))
 86 |         self.outputs = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim))
 87 |         self.gates = {k: np.zeros((n_samples, n_timesteps, self.hidden_dim)) for k in ["i", "f", "o", "c"]}
 88 | 
 89 |         self.states[:, -1, :] = self.hprev
 90 |         self.outputs[:, -1, :] = self.oprev
 91 | 
 92 |         for i in range(n_timesteps):
 93 |             t_gates = np.dot(X[:, i, :], self.W) + np.dot(self.outputs[:, i - 1, :], self.U)
 94 | 
 95 |             # Input
 96 |             self.gates["i"][:, i, :] = sigmoid(t_gates[:, 0, :] + p["b_i"])
 97 |             # Forget
 98 |             self.gates["f"][:, i, :] = sigmoid(t_gates[:, 1, :] + p["b_f"])
 99 |             # Output
100 |             self.gates["o"][:, i, :] = sigmoid(t_gates[:, 2, :] + p["b_o"])
101 |             # Cell
102 |             self.gates["c"][:, i, :] = self.activation(t_gates[:, 3, :] + p["b_c"])
103 | 
104 |             # (previous state * forget) + input + cell
105 |             self.states[:, i, :] = (
106 |                 self.states[:, i - 1, :] * self.gates["f"][:, i, :]
107 |                 + self.gates["i"][:, i, :] * self.gates["c"][:, i, :]
108 |             )
109 |             self.outputs[:, i, :] = self.gates["o"][:, i, :] * self.activation(self.states[:, i, :])
110 | 
111 |         self.hprev = self.states[:, n_timesteps - 1, :].copy()
112 |         self.oprev = self.outputs[:, n_timesteps - 1, :].copy()
113 | 
114 |         if self.return_sequences:
115 |             return self.outputs[:, 0:-1, :]
116 |         else:
117 |             return self.outputs[:, -2, :]
118 | 
119 |     def backward_pass(self, delta):
120 |         if len(delta.shape) == 2:
121 |             delta = delta[:, np.newaxis, :]
122 | 
123 |         n_samples, n_timesteps, input_shape = delta.shape
124 | 
125 |         # Temporal gradient arrays
126 |         grad = {k: np.zeros_like(self._params[k]) for k in self._params.keys()}
127 | 
128 |         dh_next = np.zeros((n_samples, input_shape))
129 |         output = np.zeros((n_samples, n_timesteps, self.input_dim))
130 | 
131 |         # Backpropagation through time
132 |         for i in reversed(range(n_timesteps)):
133 |             dhi = delta[:, i, :] * self.gates["o"][:, i, :] * self.activation_d(self.states[:, i, :]) + dh_next
134 | 
135 |             og = delta[:, i, :] * self.activation(self.states[:, i, :])
136 |             de_o = og * self.sigmoid_d(self.gates["o"][:, i, :])
137 | 
138 |             grad["W_o"] += np.dot(self.last_input[:, i, :].T, de_o)
139 |             grad["U_o"] += np.dot(self.outputs[:, i - 1, :].T, de_o)
140 |             grad["b_o"] += de_o.sum(axis=0)
141 | 
142 |             de_f = (dhi * self.states[:, i - 1, :]) * self.sigmoid_d(self.gates["f"][:, i, :])
143 |             grad["W_f"] += np.dot(self.last_input[:, i, :].T, de_f)
144 |             grad["U_f"] += np.dot(self.outputs[:, i - 1, :].T, de_f)
145 |             grad["b_f"] += de_f.sum(axis=0)
146 | 
147 |             de_i = (dhi * self.gates["c"][:, i, :]) * self.sigmoid_d(self.gates["i"][:, i, :])
148 |             grad["W_i"] += np.dot(self.last_input[:, i, :].T, de_i)
149 |             grad["U_i"] += np.dot(self.outputs[:, i - 1, :].T, de_i)
150 |             grad["b_i"] += de_i.sum(axis=0)
151 | 
152 |             de_c = (dhi * self.gates["i"][:, i, :]) * self.activation_d(self.gates["c"][:, i, :])
153 |             grad["W_c"] += np.dot(self.last_input[:, i, :].T, de_c)
154 |             grad["U_c"] += np.dot(self.outputs[:, i - 1, :].T, de_c)
155 |             grad["b_c"] += de_c.sum(axis=0)
156 | 
157 |             dh_next = dhi * self.gates["f"][:, i, :]
158 | 
159 |         # TODO: propagate error to the next layer
160 | 
161 |         # Change actual gradient arrays
162 |         for k in grad.keys():
163 |             self._params.update_grad(k, grad[k])
164 |         return output
165 | 
166 |     def shape(self, x_shape):
167 |         if self.return_sequences:
168 |             return x_shape[0], x_shape[1], self.hidden_dim
169 |         else:
170 |             return x_shape[0], self.hidden_dim
171 | 


--------------------------------------------------------------------------------
/mla/neuralnet/layers/recurrent/rnn.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import autograd.numpy as np
  3 | from autograd import elementwise_grad
  4 | 
  5 | from mla.neuralnet.initializations import get_initializer
  6 | from mla.neuralnet.layers import Layer, get_activation, ParamMixin
  7 | from mla.neuralnet.parameters import Parameters
  8 | 
  9 | 
 10 | class RNN(Layer, ParamMixin):
 11 |     """Vanilla RNN."""
 12 | 
 13 |     def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True):
 14 |         self.return_sequences = return_sequences
 15 |         self.hidden_dim = hidden_dim
 16 |         self.inner_init = get_initializer(inner_init)
 17 |         self.activation = get_activation(activation)
 18 |         self.activation_d = elementwise_grad(self.activation)
 19 |         if parameters is None:
 20 |             self._params = Parameters()
 21 |         else:
 22 |             self._params = parameters
 23 |         self.last_input = None
 24 |         self.states = None
 25 |         self.hprev = None
 26 |         self.input_dim = None
 27 | 
 28 |     def setup(self, x_shape):
 29 |         """
 30 |         Parameters
 31 |         ----------
 32 |         x_shape : np.array(batch size, time steps, input shape)
 33 |         """
 34 |         self.input_dim = x_shape[2]
 35 | 
 36 |         # Input -> Hidden
 37 |         self._params["W"] = self._params.init((self.input_dim, self.hidden_dim))
 38 |         # Bias
 39 |         self._params["b"] = np.full((self.hidden_dim,), self._params.initial_bias)
 40 |         # Hidden -> Hidden layer
 41 |         self._params["U"] = self.inner_init((self.hidden_dim, self.hidden_dim))
 42 | 
 43 |         # Init gradient arrays
 44 |         self._params.init_grad()
 45 | 
 46 |         self.hprev = np.zeros((x_shape[0], self.hidden_dim))
 47 | 
 48 |     def forward_pass(self, X):
 49 |         self.last_input = X
 50 |         n_samples, n_timesteps, input_shape = X.shape
 51 |         states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim))
 52 |         states[:, -1, :] = self.hprev.copy()
 53 |         p = self._params
 54 | 
 55 |         for i in range(n_timesteps):
 56 |             states[:, i, :] = np.tanh(np.dot(X[:, i, :], p["W"]) + np.dot(states[:, i - 1, :], p["U"]) + p["b"])
 57 | 
 58 |         self.states = states
 59 |         self.hprev = states[:, n_timesteps - 1, :].copy()
 60 |         if self.return_sequences:
 61 |             return states[:, 0:-1, :]
 62 |         else:
 63 |             return states[:, -2, :]
 64 | 
 65 |     def backward_pass(self, delta):
 66 |         if len(delta.shape) == 2:
 67 |             delta = delta[:, np.newaxis, :]
 68 |         n_samples, n_timesteps, input_shape = delta.shape
 69 |         p = self._params
 70 | 
 71 |         # Temporal gradient arrays
 72 |         grad = {k: np.zeros_like(p[k]) for k in p.keys()}
 73 | 
 74 |         dh_next = np.zeros((n_samples, input_shape))
 75 |         output = np.zeros((n_samples, n_timesteps, self.input_dim))
 76 | 
 77 |         # Backpropagation through time
 78 |         for i in reversed(range(n_timesteps)):
 79 |             dhi = self.activation_d(self.states[:, i, :]) * (delta[:, i, :] + dh_next)
 80 | 
 81 |             grad["W"] += np.dot(self.last_input[:, i, :].T, dhi)
 82 |             grad["b"] += delta[:, i, :].sum(axis=0)
 83 |             grad["U"] += np.dot(self.states[:, i - 1, :].T, dhi)
 84 | 
 85 |             dh_next = np.dot(dhi, p["U"].T)
 86 | 
 87 |             d = np.dot(delta[:, i, :], p["U"].T)
 88 |             output[:, i, :] = np.dot(d, p["W"].T)
 89 | 
 90 |         # Change actual gradient arrays
 91 |         for k in grad.keys():
 92 |             self._params.update_grad(k, grad[k])
 93 |         return output
 94 | 
 95 |     def shape(self, x_shape):
 96 |         if self.return_sequences:
 97 |             return x_shape[0], x_shape[1], self.hidden_dim
 98 |         else:
 99 |             return x_shape[0], self.hidden_dim
100 | 


--------------------------------------------------------------------------------
/mla/neuralnet/loss.py:
--------------------------------------------------------------------------------
 1 | from ..metrics import mse, logloss, mae, hinge, binary_crossentropy
 2 | categorical_crossentropy = logloss
 3 | 
 4 | 
 5 | def get_loss(name):
 6 |     """Returns loss function by the name."""
 7 |     try:
 8 |         return globals()[name]
 9 |     except KeyError:
10 |         raise ValueError("Invalid metric function.")
11 | 


--------------------------------------------------------------------------------
/mla/neuralnet/nnet.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | from autograd import elementwise_grad
  5 | 
  6 | from mla.base import BaseEstimator
  7 | from mla.metrics.metrics import get_metric
  8 | from mla.neuralnet.layers import PhaseMixin
  9 | from mla.neuralnet.loss import get_loss
 10 | from mla.utils import batch_iterator
 11 | 
 12 | np.random.seed(9999)
 13 | 
 14 | """
 15 | Architecture inspired from:
 16 | 
 17 |     https://github.com/fchollet/keras
 18 |     https://github.com/andersbll/deeppy
 19 | """
 20 | 
 21 | 
 22 | class NeuralNet(BaseEstimator):
 23 |     fit_required = False
 24 | 
 25 |     def __init__(
 26 |         self, layers, optimizer, loss, max_epochs=10, batch_size=64, metric="mse", shuffle=False, verbose=True
 27 |     ):
 28 |         self.verbose = verbose
 29 |         self.shuffle = shuffle
 30 |         self.optimizer = optimizer
 31 | 
 32 |         self.loss = get_loss(loss)
 33 | 
 34 |         # TODO: fix
 35 |         if loss == "categorical_crossentropy":
 36 |             self.loss_grad = lambda actual, predicted: -(actual - predicted)
 37 |         else:
 38 |             self.loss_grad = elementwise_grad(self.loss, 1)
 39 |         self.metric = get_metric(metric)
 40 |         self.layers = layers
 41 |         self.batch_size = batch_size
 42 |         self.max_epochs = max_epochs
 43 |         self._n_layers = 0
 44 |         self.log_metric = True if loss != metric else False
 45 |         self.metric_name = metric
 46 |         self.bprop_entry = self._find_bprop_entry()
 47 |         self.training = False
 48 |         self._initialized = False
 49 | 
 50 |     def _setup_layers(self, x_shape):
 51 |         """Initialize model's layers."""
 52 |         x_shape = list(x_shape)
 53 |         x_shape[0] = self.batch_size
 54 | 
 55 |         for layer in self.layers:
 56 |             layer.setup(x_shape)
 57 |             x_shape = layer.shape(x_shape)
 58 | 
 59 |         self._n_layers = len(self.layers)
 60 |         # Setup optimizer
 61 |         self.optimizer.setup(self)
 62 |         self._initialized = True
 63 |         logging.info("Total parameters: %s" % self.n_params)
 64 | 
 65 |     def _find_bprop_entry(self):
 66 |         """Find entry layer for back propagation."""
 67 | 
 68 |         if len(self.layers) > 0 and not hasattr(self.layers[-1], "parameters"):
 69 |             return -1
 70 |         return len(self.layers)
 71 | 
 72 |     def fit(self, X, y=None):
 73 |         if not self._initialized:
 74 |             self._setup_layers(X.shape)
 75 | 
 76 |         if y.ndim == 1:
 77 |             # Reshape vector to matrix
 78 |             y = y[:, np.newaxis]
 79 |         self._setup_input(X, y)
 80 | 
 81 |         self.is_training = True
 82 |         # Pass neural network instance to an optimizer
 83 |         self.optimizer.optimize(self)
 84 |         self.is_training = False
 85 | 
 86 |     def update(self, X, y):
 87 |         # Forward pass
 88 |         y_pred = self.fprop(X)
 89 | 
 90 |         # Backward pass
 91 |         grad = self.loss_grad(y, y_pred)
 92 |         for layer in reversed(self.layers[: self.bprop_entry]):
 93 |             grad = layer.backward_pass(grad)
 94 |         return self.loss(y, y_pred)
 95 | 
 96 |     def fprop(self, X):
 97 |         """Forward propagation."""
 98 |         for layer in self.layers:
 99 |             X = layer.forward_pass(X)
100 |         return X
101 | 
102 |     def _predict(self, X=None):
103 |         if not self._initialized:
104 |             self._setup_layers(X.shape)
105 | 
106 |         y = []
107 |         X_batch = batch_iterator(X, self.batch_size)
108 |         for Xb in X_batch:
109 |             y.append(self.fprop(Xb))
110 |         return np.concatenate(y)
111 | 
112 |     @property
113 |     def parametric_layers(self):
114 |         for layer in self.layers:
115 |             if hasattr(layer, "parameters"):
116 |                 yield layer
117 | 
118 |     @property
119 |     def parameters(self):
120 |         """Returns a list of all parameters."""
121 |         params = []
122 |         for layer in self.parametric_layers:
123 |             params.append(layer.parameters)
124 |         return params
125 | 
126 |     def error(self, X=None, y=None):
127 |         """Calculate an error for given examples."""
128 |         training_phase = self.is_training
129 |         if training_phase:
130 |             # Temporally disable training.
131 |             # Some layers work differently while training (e.g. Dropout).
132 |             self.is_training = False
133 |         if X is None and y is None:
134 |             y_pred = self._predict(self.X)
135 |             score = self.metric(self.y, y_pred)
136 |         else:
137 |             y_pred = self._predict(X)
138 |             score = self.metric(y, y_pred)
139 |         if training_phase:
140 |             self.is_training = True
141 |         return score
142 | 
143 |     @property
144 |     def is_training(self):
145 |         return self.training
146 | 
147 |     @is_training.setter
148 |     def is_training(self, train):
149 |         self.training = train
150 |         for layer in self.layers:
151 |             if isinstance(layer, PhaseMixin):
152 |                 layer.is_training = train
153 | 
154 |     def shuffle_dataset(self):
155 |         """Shuffle rows in the dataset."""
156 |         n_samples = self.X.shape[0]
157 |         indices = np.arange(n_samples)
158 |         np.random.shuffle(indices)
159 |         self.X = self.X.take(indices, axis=0)
160 |         self.y = self.y.take(indices, axis=0)
161 | 
162 |     @property
163 |     def n_layers(self):
164 |         """Returns the number of layers."""
165 |         return self._n_layers
166 | 
167 |     @property
168 |     def n_params(self):
169 |         """Return the number of trainable parameters."""
170 |         return sum([layer.parameters.n_params for layer in self.parametric_layers])
171 | 
172 |     def reset(self):
173 |         self._initialized = False
174 | 


--------------------------------------------------------------------------------
/mla/neuralnet/optimizers.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | from collections import defaultdict
  4 | 
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | from mla.utils import batch_iterator
  9 | 
 10 | """
 11 | References:
 12 | 
 13 |     Gradient descent optimization algorithms  https://ruder.io/optimizing-gradient-descent/
 14 | """
 15 | 
 16 | 
 17 | class Optimizer(object):
 18 |     def optimize(self, network):
 19 |         loss_history = []
 20 |         for i in range(network.max_epochs):
 21 |             if network.shuffle:
 22 |                 network.shuffle_dataset()
 23 | 
 24 |             start_time = time.time()
 25 |             loss = self.train_epoch(network)
 26 |             loss_history.append(loss)
 27 |             if network.verbose:
 28 |                 msg = "Epoch:%s, train loss: %s" % (i, loss)
 29 |                 if network.log_metric:
 30 |                     msg += ", train %s: %s" % (network.metric_name, network.error())
 31 |                 msg += ", elapsed: %s sec." % (time.time() - start_time)
 32 |                 logging.info(msg)
 33 |         return loss_history
 34 | 
 35 |     def update(self, network):
 36 |         """Performs an update of parameters."""
 37 |         raise NotImplementedError
 38 | 
 39 |     def train_epoch(self, network):
 40 |         losses = []
 41 | 
 42 |         # Create batch iterator
 43 |         X_batch = batch_iterator(network.X, network.batch_size)
 44 |         y_batch = batch_iterator(network.y, network.batch_size)
 45 | 
 46 |         batch = zip(X_batch, y_batch)
 47 |         if network.verbose:
 48 |             batch = tqdm(batch, total=int(np.ceil(network.n_samples / network.batch_size)))
 49 | 
 50 |         for X, y in batch:
 51 |             loss = np.mean(network.update(X, y))
 52 |             self.update(network)
 53 |             losses.append(loss)
 54 | 
 55 |         epoch_loss = np.mean(losses)
 56 |         return epoch_loss
 57 | 
 58 |     def train_batch(self, network, X, y):
 59 |         loss = np.mean(network.update(X, y))
 60 |         self.update(network)
 61 |         return loss
 62 | 
 63 |     def setup(self, network):
 64 |         """Creates additional variables.
 65 |         Note: Must be called before optimization process."""
 66 |         raise NotImplementedError
 67 | 
 68 | 
 69 | class SGD(Optimizer):
 70 |     def __init__(self, learning_rate=0.01, momentum=0.9, decay=0.0, nesterov=False):
 71 |         self.nesterov = nesterov
 72 |         self.decay = decay
 73 |         self.momentum = momentum
 74 |         self.lr = learning_rate
 75 |         self.iteration = 0
 76 |         self.velocity = None
 77 | 
 78 |     def update(self, network):
 79 |         lr = self.lr * (1.0 / (1.0 + self.decay * self.iteration))
 80 | 
 81 |         for i, layer in enumerate(network.parametric_layers):
 82 |             for n in layer.parameters.keys():
 83 |                 # Get gradient values
 84 |                 grad = layer.parameters.grad[n]
 85 |                 update = self.momentum * self.velocity[i][n] - lr * grad
 86 |                 self.velocity[i][n] = update
 87 |                 if self.nesterov:
 88 |                     # Adjust using updated velocity
 89 |                     update = self.momentum * self.velocity[i][n] - lr * grad
 90 |                 layer.parameters.step(n, update)
 91 |         self.iteration += 1
 92 | 
 93 |     def setup(self, network):
 94 |         self.velocity = defaultdict(dict)
 95 |         for i, layer in enumerate(network.parametric_layers):
 96 |             for n in layer.parameters.keys():
 97 |                 self.velocity[i][n] = np.zeros_like(layer.parameters[n])
 98 | 
 99 | 
100 | class Adagrad(Optimizer):
101 |     def __init__(self, learning_rate=0.01, epsilon=1e-8):
102 |         self.eps = epsilon
103 |         self.lr = learning_rate
104 | 
105 |     def update(self, network):
106 |         for i, layer in enumerate(network.parametric_layers):
107 |             for n in layer.parameters.keys():
108 |                 grad = layer.parameters.grad[n]
109 |                 self.accu[i][n] += grad ** 2
110 |                 step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps)
111 |                 layer.parameters.step(n, -step)
112 | 
113 |     def setup(self, network):
114 |         # Accumulators
115 |         self.accu = defaultdict(dict)
116 |         for i, layer in enumerate(network.parametric_layers):
117 |             for n in layer.parameters.keys():
118 |                 self.accu[i][n] = np.zeros_like(layer.parameters[n])
119 | 
120 | 
121 | class Adadelta(Optimizer):
122 |     def __init__(self, learning_rate=1.0, rho=0.95, epsilon=1e-8):
123 |         self.rho = rho
124 |         self.eps = epsilon
125 |         self.lr = learning_rate
126 | 
127 |     def update(self, network):
128 |         for i, layer in enumerate(network.parametric_layers):
129 |             for n in layer.parameters.keys():
130 |                 grad = layer.parameters.grad[n]
131 |                 self.accu[i][n] = self.rho * self.accu[i][n] + (1.0 - self.rho) * grad ** 2
132 |                 step = grad * np.sqrt(self.d_accu[i][n] + self.eps) / np.sqrt(self.accu[i][n] + self.eps)
133 | 
134 |                 layer.parameters.step(n, -step * self.lr)
135 |                 # Update delta accumulator
136 |                 self.d_accu[i][n] = self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step ** 2
137 | 
138 |     def setup(self, network):
139 |         # Accumulators
140 |         self.accu = defaultdict(dict)
141 |         self.d_accu = defaultdict(dict)
142 |         for i, layer in enumerate(network.parametric_layers):
143 |             for n in layer.parameters.keys():
144 |                 self.accu[i][n] = np.zeros_like(layer.parameters[n])
145 |                 self.d_accu[i][n] = np.zeros_like(layer.parameters[n])
146 | 
147 | 
148 | class RMSprop(Optimizer):
149 |     def __init__(self, learning_rate=0.001, rho=0.9, epsilon=1e-8):
150 |         self.eps = epsilon
151 |         self.rho = rho
152 |         self.lr = learning_rate
153 | 
154 |     def update(self, network):
155 |         for i, layer in enumerate(network.parametric_layers):
156 |             for n in layer.parameters.keys():
157 |                 grad = layer.parameters.grad[n]
158 |                 self.accu[i][n] = (self.rho * self.accu[i][n]) + (1.0 - self.rho) * (grad ** 2)
159 |                 step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps)
160 |                 layer.parameters.step(n, -step)
161 | 
162 |     def setup(self, network):
163 |         # Accumulators
164 |         self.accu = defaultdict(dict)
165 |         for i, layer in enumerate(network.parametric_layers):
166 |             for n in layer.parameters.keys():
167 |                 self.accu[i][n] = np.zeros_like(layer.parameters[n])
168 | 
169 | 
170 | class Adam(Optimizer):
171 |     def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8):
172 | 
173 |         self.epsilon = epsilon
174 |         self.beta_2 = beta_2
175 |         self.beta_1 = beta_1
176 |         self.lr = learning_rate
177 |         self.iterations = 0
178 |         self.t = 1
179 | 
180 |     def update(self, network):
181 |         for i, layer in enumerate(network.parametric_layers):
182 |             for n in layer.parameters.keys():
183 |                 grad = layer.parameters.grad[n]
184 |                 self.ms[i][n] = (self.beta_1 * self.ms[i][n]) + (1.0 - self.beta_1) * grad
185 |                 self.vs[i][n] = (self.beta_2 * self.vs[i][n]) + (1.0 - self.beta_2) * grad ** 2
186 |                 lr = self.lr * np.sqrt(1.0 - self.beta_2 ** self.t) / (1.0 - self.beta_1 ** self.t)
187 | 
188 |                 step = lr * self.ms[i][n] / (np.sqrt(self.vs[i][n]) + self.epsilon)
189 |                 layer.parameters.step(n, -step)
190 |         self.t += 1
191 | 
192 |     def setup(self, network):
193 |         # Accumulators
194 |         self.ms = defaultdict(dict)
195 |         self.vs = defaultdict(dict)
196 |         for i, layer in enumerate(network.parametric_layers):
197 |             for n in layer.parameters.keys():
198 |                 self.ms[i][n] = np.zeros_like(layer.parameters[n])
199 |                 self.vs[i][n] = np.zeros_like(layer.parameters[n])
200 | 
201 | 
202 | class Adamax(Optimizer):
203 |     def __init__(self, learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8):
204 | 
205 |         self.epsilon = epsilon
206 |         self.beta_2 = beta_2
207 |         self.beta_1 = beta_1
208 |         self.lr = learning_rate
209 |         self.t = 1
210 | 
211 |     def update(self, network):
212 |         for i, layer in enumerate(network.parametric_layers):
213 |             for n in layer.parameters.keys():
214 |                 grad = layer.parameters.grad[n]
215 |                 self.ms[i][n] = self.beta_1 * self.ms[i][n] + (1.0 - self.beta_1) * grad
216 |                 self.us[i][n] = np.maximum(self.beta_2 * self.us[i][n], np.abs(grad))
217 | 
218 |                 step = self.lr / (1 - self.beta_1 ** self.t) * self.ms[i][n] / (self.us[i][n] + self.epsilon)
219 |                 layer.parameters.step(n, -step)
220 |         self.t += 1
221 | 
222 |     def setup(self, network):
223 |         self.ms = defaultdict(dict)
224 |         self.us = defaultdict(dict)
225 |         for i, layer in enumerate(network.parametric_layers):
226 |             for n in layer.parameters.keys():
227 |                 self.ms[i][n] = np.zeros_like(layer.parameters[n])
228 |                 self.us[i][n] = np.zeros_like(layer.parameters[n])
229 | 


--------------------------------------------------------------------------------
/mla/neuralnet/parameters.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | 
 4 | from mla.neuralnet.initializations import get_initializer
 5 | 
 6 | 
 7 | class Parameters(object):
 8 |     def __init__(self, init="glorot_uniform", scale=0.5, bias=1.0, regularizers=None, constraints=None):
 9 |         """A container for layer's parameters.
10 | 
11 |         Parameters
12 |         ----------
13 |         init : str, default 'glorot_uniform'.
14 |             The name of the weight initialization function.
15 |         scale : float, default 0.5
16 |         bias : float, default 1.0
17 |             Initial values for bias.
18 |         regularizers : dict
19 |             Weight regularizers.
20 |             >>> {'W' : L2()}
21 |         constraints : dict
22 |             Weight constraints.
23 |             >>> {'b' : MaxNorm()}
24 |         """
25 |         if constraints is None:
26 |             self.constraints = {}
27 |         else:
28 |             self.constraints = constraints
29 | 
30 |         if regularizers is None:
31 |             self.regularizers = {}
32 |         else:
33 |             self.regularizers = regularizers
34 | 
35 |         self.initial_bias = bias
36 |         self.scale = scale
37 |         self.init = get_initializer(init)
38 | 
39 |         self._params = {}
40 |         self._grads = {}
41 | 
42 |     def setup_weights(self, W_shape, b_shape=None):
43 |         if "W" not in self._params:
44 |             self._params["W"] = self.init(shape=W_shape, scale=self.scale)
45 |             if b_shape is None:
46 |                 self._params["b"] = np.full(W_shape[1], self.initial_bias)
47 |             else:
48 |                 self._params["b"] = np.full(b_shape, self.initial_bias)
49 |         self.init_grad()
50 | 
51 |     def init_grad(self):
52 |         """Init gradient arrays corresponding to each weight array."""
53 |         for key in self._params.keys():
54 |             if key not in self._grads:
55 |                 self._grads[key] = np.zeros_like(self._params[key])
56 | 
57 |     def step(self, name, step):
58 |         """Increase specific weight by amount of the step parameter."""
59 |         self._params[name] += step
60 | 
61 |         if name in self.constraints:
62 |             self._params[name] = self.constraints[name].clip(self._params[name])
63 | 
64 |     def update_grad(self, name, value):
65 |         """Update gradient values."""
66 |         self._grads[name] = value
67 | 
68 |         if name in self.regularizers:
69 |             self._grads[name] += self.regularizers[name](self._params[name])
70 | 
71 |     @property
72 |     def n_params(self):
73 |         """Count the number of parameters in this layer."""
74 |         return sum([np.prod(self._params[x].shape) for x in self._params.keys()])
75 | 
76 |     def keys(self):
77 |         return self._params.keys()
78 | 
79 |     @property
80 |     def grad(self):
81 |         return self._grads
82 | 
83 |     # Allow access to the fields using dict syntax, e.g. parameters['W']
84 |     def __getitem__(self, item):
85 |         if item in self._params:
86 |             return self._params[item]
87 |         else:
88 |             raise ValueError
89 | 
90 |     def __setitem__(self, key, value):
91 |         self._params[key] = value
92 | 


--------------------------------------------------------------------------------
/mla/neuralnet/regularizers.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | from autograd import elementwise_grad
 4 | 
 5 | 
 6 | class Regularizer(object):
 7 |     def __init__(self, C=0.01):
 8 |         self.C = C
 9 |         self._grad = elementwise_grad(self._penalty)
10 | 
11 |     def _penalty(self, weights):
12 |         raise NotImplementedError()
13 | 
14 |     def grad(self, weights):
15 |         return self._grad(weights)
16 | 
17 |     def __call__(self, weights):
18 |         return self.grad(weights)
19 | 
20 | 
21 | class L1(Regularizer):
22 |     def _penalty(self, weights):
23 |         return self.C * np.abs(weights)
24 | 
25 | 
26 | class L2(Regularizer):
27 |     def _penalty(self, weights):
28 |         return self.C * weights ** 2
29 | 
30 | 
31 | class ElasticNet(Regularizer):
32 |     """Linear combination of L1 and L2 penalties."""
33 | 
34 |     def _penalty(self, weights):
35 |         return 0.5 * self.C * weights ** 2 + (1.0 - self.C) * np.abs(weights)
36 | 


--------------------------------------------------------------------------------
/mla/neuralnet/tests/test_activations.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | 
 5 | from mla.neuralnet.activations import *
 6 | 
 7 | 
 8 | def test_softplus():
 9 |     # np.exp(z_max) will overflow
10 |     z_max = np.log(sys.float_info.max) + 1.0e10
11 |     # 1.0 / np.exp(z_min) will overflow
12 |     z_min = np.log(sys.float_info.min) - 1.0e10
13 |     inputs = np.array([0.0, 1.0, -1.0, z_min, z_max])
14 |     # naive implementation of np.log(1 + np.exp(z_max)) will overflow
15 |     # naive implementation of z + np.log(1 + 1 / np.exp(z_min)) will
16 |     # throw ZeroDivisionError
17 |     outputs = np.array([np.log(2.0), np.log1p(np.exp(1.0)), np.log1p(np.exp(-1.0)), 0.0, z_max])
18 | 
19 |     assert np.allclose(outputs, softplus(inputs))
20 | 


--------------------------------------------------------------------------------
/mla/neuralnet/tests/test_optimizers.py:
--------------------------------------------------------------------------------
 1 | from sklearn.datasets import make_classification
 2 | from sklearn.metrics import roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | from mla.neuralnet import NeuralNet
 6 | from mla.neuralnet.layers import Dense, Activation, Dropout, Parameters
 7 | from mla.neuralnet.optimizers import *
 8 | from mla.utils import one_hot
 9 | 
10 | 
11 | def clasifier(optimizer):
12 |     X, y = make_classification(
13 |         n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5
14 |     )
15 |     y = one_hot(y)
16 | 
17 |     X -= np.mean(X, axis=0)
18 |     X /= np.std(X, axis=0)
19 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
20 | 
21 |     model = NeuralNet(
22 |         layers=[
23 |             Dense(128, Parameters(init="uniform")),
24 |             Activation("relu"),
25 |             Dropout(0.5),
26 |             Dense(64, Parameters(init="normal")),
27 |             Activation("relu"),
28 |             Dense(2),
29 |             Activation("softmax"),
30 |         ],
31 |         loss="categorical_crossentropy",
32 |         optimizer=optimizer,
33 |         metric="accuracy",
34 |         batch_size=64,
35 |         max_epochs=10,
36 |     )
37 |     model.fit(X_train, y_train)
38 |     predictions = model.predict(X_test)
39 |     return roc_auc_score(y_test[:, 0], predictions[:, 0])
40 | 
41 | 
42 | def test_adadelta():
43 |     assert clasifier(Adadelta()) > 0.9
44 | 
45 | 
46 | def test_adam():
47 |     assert clasifier(Adam()) > 0.9
48 | 
49 | 
50 | def test_adamax():
51 |     assert clasifier(Adamax()) > 0.9
52 | 
53 | 
54 | def test_rmsprop():
55 |     assert clasifier(RMSprop()) > 0.9
56 | 
57 | 
58 | def test_adagrad():
59 |     assert clasifier(Adagrad()) > 0.9
60 | 
61 | 
62 | def test_sgd():
63 |     assert clasifier(SGD(learning_rate=0.0001)) > 0.9
64 |     assert clasifier(SGD(learning_rate=0.0001, nesterov=True, momentum=0.9)) > 0.9
65 |     assert clasifier(SGD(learning_rate=0.0001, nesterov=False, momentum=0.0)) > 0.9
66 | 


--------------------------------------------------------------------------------
/mla/pca.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import logging
 3 | 
 4 | import numpy as np
 5 | from scipy.linalg import svd
 6 | 
 7 | from mla.base import BaseEstimator
 8 | 
 9 | np.random.seed(1000)
10 | 
11 | 
12 | class PCA(BaseEstimator):
13 |     y_required = False
14 | 
15 |     def __init__(self, n_components, solver="svd"):
16 |         """Principal component analysis (PCA) implementation.
17 | 
18 |         Transforms a dataset of possibly correlated values into n linearly
19 |         uncorrelated components. The components are ordered such that the first
20 |         has the largest possible variance and each following component as the
21 |         largest possible variance given the previous components. This causes
22 |         the early components to contain most of the variability in the dataset.
23 | 
24 |         Parameters
25 |         ----------
26 |         n_components : int
27 |         solver : str, default 'svd'
28 |             {'svd', 'eigen'}
29 |         """
30 |         self.solver = solver
31 |         self.n_components = n_components
32 |         self.components = None
33 |         self.mean = None
34 | 
35 |     def fit(self, X, y=None):
36 |         self.mean = np.mean(X, axis=0)
37 |         self._decompose(X)
38 | 
39 |     def _decompose(self, X):
40 |         # Mean centering
41 |         X = X.copy()
42 |         X -= self.mean
43 | 
44 |         if self.solver == "svd":
45 |             _, s, Vh = svd(X, full_matrices=True)
46 |         elif self.solver == "eigen":
47 |             s, Vh = np.linalg.eig(np.cov(X.T))
48 |             Vh = Vh.T
49 | 
50 |         s_squared = s ** 2
51 |         variance_ratio = s_squared / s_squared.sum()
52 |         logging.info("Explained variance ratio: %s" % (variance_ratio[0: self.n_components]))
53 |         self.components = Vh[0: self.n_components]
54 | 
55 |     def transform(self, X):
56 |         X = X.copy()
57 |         X -= self.mean
58 |         return np.dot(X, self.components.T)
59 | 
60 |     def _predict(self, X=None):
61 |         return self.transform(X)
62 | 


--------------------------------------------------------------------------------
/mla/rbm.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import logging
 3 | 
 4 | import numpy as np
 5 | from scipy.special import expit
 6 | 
 7 | from mla.base import BaseEstimator
 8 | from mla.utils import batch_iterator
 9 | 
10 | np.random.seed(9999)
11 | sigmoid = expit
12 | 
13 | """
14 | References:
15 | A Practical Guide to Training Restricted Boltzmann Machines https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
16 | """
17 | 
18 | 
19 | class RBM(BaseEstimator):
20 |     y_required = False
21 | 
22 |     def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, max_epochs=100):
23 |         """Bernoulli Restricted Boltzmann Machine (RBM)
24 | 
25 |         Parameters
26 |         ----------
27 | 
28 |         n_hidden : int, default 128
29 |             The number of hidden units.
30 |         learning_rate : float, default 0.1
31 |         batch_size : int, default 10
32 |         max_epochs : int, default 100
33 |         """
34 |         self.max_epochs = max_epochs
35 |         self.batch_size = batch_size
36 |         self.lr = learning_rate
37 |         self.n_hidden = n_hidden
38 | 
39 |     def fit(self, X, y=None):
40 |         self.n_visible = X.shape[1]
41 |         self._init_weights()
42 |         self._setup_input(X, y)
43 |         self._train()
44 | 
45 |     def _init_weights(self):
46 | 
47 |         self.W = np.random.randn(self.n_visible, self.n_hidden) * 0.1
48 | 
49 |         # Bias for visible and hidden units
50 |         self.bias_v = np.zeros(self.n_visible, dtype=np.float32)
51 |         self.bias_h = np.zeros(self.n_hidden, dtype=np.float32)
52 | 
53 |         self.errors = []
54 | 
55 |     def _train(self):
56 |         """Use CD-1 training procedure, basically an exact inference for `positive_associations`,
57 |         followed by a "non burn-in" block Gibbs Sampling for the `negative_associations`."""
58 | 
59 |         for i in range(self.max_epochs):
60 |             error = 0
61 |             for batch in batch_iterator(self.X, batch_size=self.batch_size):
62 |                 positive_hidden = sigmoid(np.dot(batch, self.W) + self.bias_h)
63 |                 hidden_states = self._sample(positive_hidden)  # sample hidden state h1
64 |                 positive_associations = np.dot(batch.T, positive_hidden)
65 | 
66 |                 negative_visible = sigmoid(np.dot(hidden_states, self.W.T) + self.bias_v)
67 |                 negative_visible = self._sample(negative_visible)  # use the sampled hidden state h1 to sample v1
68 |                 negative_hidden = sigmoid(np.dot(negative_visible, self.W) + self.bias_h)
69 |                 negative_associations = np.dot(negative_visible.T, negative_hidden)
70 | 
71 |                 lr = self.lr / float(batch.shape[0])
72 |                 self.W += lr * ((positive_associations - negative_associations) / float(self.batch_size))
73 |                 self.bias_h += lr * (negative_hidden.sum(axis=0) - negative_associations.sum(axis=0))
74 |                 self.bias_v += lr * (np.asarray(batch.sum(axis=0)).squeeze() - negative_visible.sum(axis=0))
75 | 
76 |                 error += np.sum((batch - negative_visible) ** 2)
77 | 
78 |             self.errors.append(error)
79 |             logging.info("Iteration %s, error %s" % (i, error))
80 |         logging.debug("Weights: %s" % self.W)
81 |         logging.debug("Hidden bias: %s" % self.bias_h)
82 |         logging.debug("Visible bias: %s" % self.bias_v)
83 | 
84 |     def _sample(self, X):
85 |         return X > np.random.random_sample(size=X.shape)
86 | 
87 |     def _predict(self, X=None):
88 |         return sigmoid(np.dot(X, self.W) + self.bias_h)
89 | 


--------------------------------------------------------------------------------
/mla/rl/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | 


--------------------------------------------------------------------------------
/mla/rl/dqn.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import logging
  3 | import random
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | from gym import wrappers
  8 | 
  9 | np.random.seed(9999)
 10 | 
 11 | logger = logging.getLogger()
 12 | logger.setLevel(logging.INFO)
 13 | 
 14 | """
 15 | References:
 16 |     Sutton, Barto (2017). Reinforcement Learning: An Introduction. MIT Press, Cambridge, MA.
 17 | """
 18 | 
 19 | 
 20 | class DQN(object):
 21 |     def __init__(
 22 |         self, n_episodes=500, gamma=0.99, batch_size=32, epsilon=1.0, decay=0.005, min_epsilon=0.1, memory_limit=500
 23 |     ):
 24 |         """Deep Q learning implementation.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 | 
 29 |         min_epsilon : float
 30 |             Minimal value for epsilon.
 31 |         epsilon : float
 32 |             ε-greedy value.
 33 |         decay : float
 34 |             Epsilon decay rate.
 35 |         memory_limit : int
 36 |             Limit of experience replay memory.
 37 | 
 38 |         """
 39 | 
 40 |         self.memory_limit = memory_limit
 41 |         self.min_epsilon = min_epsilon
 42 |         self.gamma = gamma
 43 |         self.epsilon = epsilon
 44 |         self.n_episodes = n_episodes
 45 |         self.batch_size = batch_size
 46 |         self.decay = decay
 47 | 
 48 |     def init_environment(self, name="CartPole-v0", monitor=False):
 49 |         self.env = gym.make(name)
 50 |         if monitor:
 51 |             self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False)
 52 | 
 53 |         self.n_states = self.env.observation_space.shape[0]
 54 |         self.n_actions = self.env.action_space.n
 55 | 
 56 |         # Experience replay
 57 |         self.replay = []
 58 | 
 59 |     def init_model(self, model):
 60 |         self.model = model(self.n_actions, self.batch_size)
 61 | 
 62 |     def train(self, render=False):
 63 |         max_reward = 0
 64 | 
 65 |         for ep in range(self.n_episodes):
 66 |             state = self.env.reset()
 67 | 
 68 |             total_reward = 0
 69 | 
 70 |             while True:
 71 |                 if render:
 72 |                     self.env.render()
 73 | 
 74 |                 if np.random.rand() <= self.epsilon:
 75 |                     # Exploration
 76 |                     action = np.random.randint(self.n_actions)
 77 |                 else:
 78 |                     # Exploitation
 79 |                     action = np.argmax(self.model.predict(state[np.newaxis, :])[0])
 80 | 
 81 |                 # Run one timestep of the environment
 82 |                 new_state, reward, done, _ = self.env.step(action)
 83 |                 self.replay.append([state, action, reward, new_state, done])
 84 | 
 85 |                 # Sample batch from experience replay
 86 |                 batch_size = min(len(self.replay), self.batch_size)
 87 |                 batch = random.sample(self.replay, batch_size)
 88 | 
 89 |                 X = np.zeros((batch_size, self.n_states))
 90 |                 y = np.zeros((batch_size, self.n_actions))
 91 | 
 92 |                 states = np.array([b[0] for b in batch])
 93 |                 new_states = np.array([b[3] for b in batch])
 94 | 
 95 |                 Q = self.model.predict(states)
 96 |                 new_Q = self.model.predict(new_states)
 97 | 
 98 |                 # Construct training data
 99 |                 for i in range(batch_size):
100 |                     state_r, action_r, reward_r, new_state_r, done_r = batch[i]
101 |                     target = Q[i]
102 | 
103 |                     if done_r:
104 |                         target[action_r] = reward_r
105 |                     else:
106 |                         target[action_r] = reward_r + self.gamma * np.amax(new_Q[i])
107 | 
108 |                     X[i, :] = state_r
109 |                     y[i, :] = target
110 | 
111 |                 # Train deep learning model
112 |                 self.model.fit(X, y)
113 | 
114 |                 total_reward += reward
115 |                 state = new_state
116 | 
117 |                 if done:
118 |                     # Exit from current episode
119 |                     break
120 | 
121 |             # Remove old entries from replay memory
122 |             while len(self.replay) > self.memory_limit:
123 |                 self.replay.pop(0)
124 | 
125 |             self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp(-self.decay * ep)
126 | 
127 |             max_reward = max(max_reward, total_reward)
128 |             logger.info(
129 |                 "Episode: %s, reward %s,  epsilon %s, max reward %s" % (ep, total_reward, self.epsilon, max_reward)
130 |             )
131 |         logging.info("Training finished.")
132 | 
133 |     def play(self, episodes):
134 |         for i in range(episodes):
135 |             state = self.env.reset()
136 |             total_reward = 0
137 | 
138 |             while True:
139 |                 self.env.render()
140 |                 action = np.argmax(self.model.predict(state[np.newaxis, :])[0])
141 |                 state, reward, done, _ = self.env.step(action)
142 |                 total_reward += reward
143 |                 if done:
144 |                     break
145 |             logger.info("Episode: %s, reward %s" % (i, total_reward))
146 |         self.env.close()
147 | 


--------------------------------------------------------------------------------
/mla/svm/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | 


--------------------------------------------------------------------------------
/mla/svm/kernerls.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | import scipy.spatial.distance as dist
 4 | 
 5 | 
 6 | class Linear(object):
 7 |     def __call__(self, x, y):
 8 |         return np.dot(x, y.T)
 9 | 
10 |     def __repr__(self):
11 |         return "Linear kernel"
12 | 
13 | 
14 | class Poly(object):
15 |     def __init__(self, degree=2):
16 |         self.degree = degree
17 | 
18 |     def __call__(self, x, y):
19 |         return np.dot(x, y.T) ** self.degree
20 | 
21 |     def __repr__(self):
22 |         return "Poly kernel"
23 | 
24 | 
25 | class RBF(object):
26 |     def __init__(self, gamma=0.1):
27 |         self.gamma = gamma
28 | 
29 |     def __call__(self, x, y):
30 |         x = np.atleast_2d(x)
31 |         y = np.atleast_2d(y)
32 |         return np.exp(-self.gamma * dist.cdist(x, y) ** 2).flatten()
33 | 
34 |     def __repr__(self):
35 |         return "RBF kernel"
36 | 


--------------------------------------------------------------------------------
/mla/svm/svm.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import logging
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mla.base import BaseEstimator
  7 | from mla.svm.kernerls import Linear
  8 | 
  9 | np.random.seed(9999)
 10 | 
 11 | """
 12 | References:
 13 | The Simplified SMO Algorithm http://cs229.stanford.edu/materials/smo.pdf
 14 | """
 15 | 
 16 | 
 17 | class SVM(BaseEstimator):
 18 |     def __init__(self, C=1.0, kernel=None, tol=1e-3, max_iter=100):
 19 |         """Support vector machines implementation using simplified SMO optimization.
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         C : float, default 1.0
 24 |         kernel : Kernel object
 25 |         tol : float , default 1e-3
 26 |         max_iter : int, default 100
 27 |         """
 28 |         self.C = C
 29 |         self.tol = tol
 30 |         self.max_iter = max_iter
 31 |         if kernel is None:
 32 |             self.kernel = Linear()
 33 |         else:
 34 |             self.kernel = kernel
 35 | 
 36 |         self.b = 0
 37 |         self.alpha = None
 38 |         self.K = None
 39 | 
 40 |     def fit(self, X, y=None):
 41 |         self._setup_input(X, y)
 42 |         self.K = np.zeros((self.n_samples, self.n_samples))
 43 |         for i in range(self.n_samples):
 44 |             self.K[:, i] = self.kernel(self.X, self.X[i, :])
 45 |         self.alpha = np.zeros(self.n_samples)
 46 |         self.sv_idx = np.arange(0, self.n_samples)
 47 |         return self._train()
 48 | 
 49 |     def _train(self):
 50 |         iters = 0
 51 |         while iters < self.max_iter:
 52 |             iters += 1
 53 |             alpha_prev = np.copy(self.alpha)
 54 | 
 55 |             for j in range(self.n_samples):
 56 |                 # Pick random i
 57 |                 i = self.random_index(j)
 58 | 
 59 |                 eta = 2.0 * self.K[i, j] - self.K[i, i] - self.K[j, j]
 60 |                 if eta >= 0:
 61 |                     continue
 62 |                 L, H = self._find_bounds(i, j)
 63 | 
 64 |                 # Error for current examples
 65 |                 e_i, e_j = self._error(i), self._error(j)
 66 | 
 67 |                 # Save old alphas
 68 |                 alpha_io, alpha_jo = self.alpha[i], self.alpha[j]
 69 | 
 70 |                 # Update alpha
 71 |                 self.alpha[j] -= (self.y[j] * (e_i - e_j)) / eta
 72 |                 self.alpha[j] = self.clip(self.alpha[j], H, L)
 73 | 
 74 |                 self.alpha[i] = self.alpha[i] + self.y[i] * self.y[j] * (alpha_jo - self.alpha[j])
 75 | 
 76 |                 # Find intercept
 77 |                 b1 = (
 78 |                     self.b - e_i - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, i]
 79 |                     - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[i, j]
 80 |                 )
 81 |                 b2 = (
 82 |                     self.b - e_j - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[j, j]
 83 |                     - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, j]
 84 |                 )
 85 |                 if 0 < self.alpha[i] < self.C:
 86 |                     self.b = b1
 87 |                 elif 0 < self.alpha[j] < self.C:
 88 |                     self.b = b2
 89 |                 else:
 90 |                     self.b = 0.5 * (b1 + b2)
 91 | 
 92 |             # Check convergence
 93 |             diff = np.linalg.norm(self.alpha - alpha_prev)
 94 |             if diff < self.tol:
 95 |                 break
 96 |         logging.info("Convergence has reached after %s." % iters)
 97 | 
 98 |         # Save support vectors index
 99 |         self.sv_idx = np.where(self.alpha > 0)[0]
100 | 
101 |     def _predict(self, X=None):
102 |         n = X.shape[0]
103 |         result = np.zeros(n)
104 |         for i in range(n):
105 |             result[i] = np.sign(self._predict_row(X[i, :]))
106 |         return result
107 | 
108 |     def _predict_row(self, X):
109 |         k_v = self.kernel(self.X[self.sv_idx], X)
110 |         return np.dot((self.alpha[self.sv_idx] * self.y[self.sv_idx]).T, k_v.T) + self.b
111 | 
112 |     def clip(self, alpha, H, L):
113 |         if alpha > H:
114 |             alpha = H
115 |         if alpha < L:
116 |             alpha = L
117 |         return alpha
118 | 
119 |     def _error(self, i):
120 |         """Error for single example."""
121 |         return self._predict_row(self.X[i]) - self.y[i]
122 | 
123 |     def _find_bounds(self, i, j):
124 |         """Find L and H such that L <= alpha <= H.
125 |         Also, alpha must satisfy the constraint 0 <= αlpha <= C.
126 |         """
127 |         if self.y[i] != self.y[j]:
128 |             L = max(0, self.alpha[j] - self.alpha[i])
129 |             H = min(self.C, self.C - self.alpha[i] + self.alpha[j])
130 |         else:
131 |             L = max(0, self.alpha[i] + self.alpha[j] - self.C)
132 |             H = min(self.C, self.alpha[i] + self.alpha[j])
133 |         return L, H
134 | 
135 |     def random_index(self, z):
136 |         i = z
137 |         while i == z:
138 |             i = np.random.randint(0, self.n_samples - 1)
139 |         return i
140 | 


--------------------------------------------------------------------------------
/mla/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rushter/MLAlgorithms/035e489a879d01a84fffff74885dc6b1bca3c96f/mla/tests/__init__.py


--------------------------------------------------------------------------------
/mla/tests/test_classification_accuracy.py:
--------------------------------------------------------------------------------
  1 | from sklearn.metrics import roc_auc_score
  2 | 
  3 | from mla.ensemble import RandomForestClassifier
  4 | from mla.ensemble.gbm import GradientBoostingClassifier
  5 | from mla.knn import KNNClassifier
  6 | from mla.linear_models import LogisticRegression
  7 | from mla.metrics import accuracy
  8 | from mla.naive_bayes import NaiveBayesClassifier
  9 | from mla.neuralnet import NeuralNet
 10 | from mla.neuralnet.constraints import MaxNorm
 11 | from mla.neuralnet.layers import Activation, Dense, Dropout
 12 | from mla.neuralnet.optimizers import Adadelta
 13 | from mla.neuralnet.parameters import Parameters
 14 | from mla.neuralnet.regularizers import L2
 15 | from mla.svm.kernerls import RBF, Linear
 16 | from mla.svm.svm import SVM
 17 | from mla.utils import one_hot
 18 | 
 19 | try:
 20 |     from sklearn.model_selection import train_test_split
 21 | except ImportError:
 22 |     from sklearn.cross_validation import train_test_split
 23 | from sklearn.datasets import make_classification
 24 | 
 25 | # Generate a random regression problem
 26 | X, y = make_classification(
 27 |     n_samples=750, n_features=10, n_informative=8, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
 28 | )
 29 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=1111)
 30 | 
 31 | 
 32 | # All classifiers except convnet, RNN, LSTM.
 33 | 
 34 | 
 35 | def test_linear_model():
 36 |     model = LogisticRegression(lr=0.01, max_iters=500, penalty="l1", C=0.01)
 37 |     model.fit(X_train, y_train)
 38 |     predictions = model.predict(X_test)
 39 |     assert roc_auc_score(y_test, predictions) >= 0.95
 40 | 
 41 | 
 42 | def test_random_forest():
 43 |     model = RandomForestClassifier(n_estimators=10, max_depth=4)
 44 |     model.fit(X_train, y_train)
 45 |     predictions = model.predict(X_test)[:, 1]
 46 |     assert roc_auc_score(y_test, predictions) >= 0.95
 47 | 
 48 | 
 49 | def test_svm_classification():
 50 |     y_signed_train = (y_train * 2) - 1
 51 |     y_signed_test = (y_test * 2) - 1
 52 | 
 53 |     for kernel in [RBF(gamma=0.05), Linear()]:
 54 |         model = SVM(max_iter=500, kernel=kernel)
 55 |         model.fit(X_train, y_signed_train)
 56 |         predictions = model.predict(X_test)
 57 |         assert accuracy(y_signed_test, predictions) >= 0.8
 58 | 
 59 | 
 60 | def test_mlp():
 61 |     y_train_onehot = one_hot(y_train)
 62 |     y_test_onehot = one_hot(y_test)
 63 | 
 64 |     model = NeuralNet(
 65 |         layers=[
 66 |             Dense(256, Parameters(init="uniform", regularizers={"W": L2(0.05)})),
 67 |             Activation("relu"),
 68 |             Dropout(0.5),
 69 |             Dense(128, Parameters(init="normal", constraints={"W": MaxNorm()})),
 70 |             Activation("relu"),
 71 |             Dense(2),
 72 |             Activation("softmax"),
 73 |         ],
 74 |         loss="categorical_crossentropy",
 75 |         optimizer=Adadelta(),
 76 |         metric="accuracy",
 77 |         batch_size=64,
 78 |         max_epochs=25,
 79 |     )
 80 |     model.fit(X_train, y_train_onehot)
 81 |     predictions = model.predict(X_test)
 82 |     assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95
 83 | 
 84 | 
 85 | def test_gbm():
 86 |     model = GradientBoostingClassifier(n_estimators=25, max_depth=3, max_features=5, learning_rate=0.1)
 87 |     model.fit(X_train, y_train)
 88 |     predictions = model.predict(X_test)
 89 |     assert roc_auc_score(y_test, predictions) >= 0.95
 90 | 
 91 | 
 92 | def test_naive_bayes():
 93 |     model = NaiveBayesClassifier()
 94 |     model.fit(X_train, y_train)
 95 |     predictions = model.predict(X_test)[:, 1]
 96 |     assert roc_auc_score(y_test, predictions) >= 0.95
 97 | 
 98 | 
 99 | def test_knn():
100 |     clf = KNNClassifier(k=5)
101 | 
102 |     clf.fit(X_train, y_train)
103 |     predictions = clf.predict(X_test)
104 |     assert accuracy(y_test, predictions) >= 0.95
105 | 


--------------------------------------------------------------------------------
/mla/tests/test_reduction.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import pytest
 3 | from sklearn.datasets import make_classification
 4 | from sklearn.metrics import roc_auc_score
 5 | 
 6 | try:
 7 |     from sklearn.model_selection import train_test_split
 8 | except ImportError:
 9 |     from sklearn.cross_validation import train_test_split
10 | 
11 | from mla.ensemble import RandomForestClassifier
12 | from mla.pca import PCA
13 | 
14 | 
15 | @pytest.fixture
16 | def dataset():
17 |     # Generate a random binary classification problem.
18 |     return make_classification(
19 |         n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5
20 |     )
21 | 
22 | 
23 | # TODO: fix
24 | @pytest.mark.skip()
25 | def test_PCA(dataset):
26 |     X, y = dataset
27 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
28 |     p = PCA(50, solver="eigen")
29 | 
30 |     # fit PCA with training set, not the entire dataset
31 |     p.fit(X_train)
32 |     X_train_reduced = p.transform(X_train)
33 |     X_test_reduced = p.transform(X_test)
34 | 
35 |     model = RandomForestClassifier(n_estimators=25, max_depth=5)
36 |     model.fit(X_train_reduced, y_train)
37 |     predictions = model.predict(X_test_reduced)[:, 1]
38 |     score = roc_auc_score(y_test, predictions)
39 |     assert score >= 0.75
40 | 


--------------------------------------------------------------------------------
/mla/tests/test_regression_accuracy.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from sklearn.model_selection import train_test_split
 3 | except ImportError:
 4 |     from sklearn.cross_validation import train_test_split
 5 | from sklearn.datasets import make_regression
 6 | 
 7 | from mla.knn import KNNRegressor
 8 | from mla.linear_models import LinearRegression
 9 | from mla.metrics.metrics import mean_squared_error
10 | from mla.neuralnet import NeuralNet
11 | from mla.neuralnet.layers import Activation, Dense
12 | from mla.neuralnet.optimizers import Adam
13 | from mla.neuralnet.parameters import Parameters
14 | 
15 | # Generate a random regression problem
16 | X, y = make_regression(
17 |     n_samples=1000, n_features=10, n_informative=10, n_targets=1, noise=0.05, random_state=1111, bias=0.5
18 | )
19 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
20 | 
21 | 
22 | def test_linear():
23 |     model = LinearRegression(lr=0.01, max_iters=2000, penalty="l2", C=0.003)
24 |     model.fit(X_train, y_train)
25 |     predictions = model.predict(X_test)
26 |     assert mean_squared_error(y_test, predictions) < 0.25
27 | 
28 | 
29 | def test_mlp():
30 |     model = NeuralNet(
31 |         layers=[
32 |             Dense(16, Parameters(init="normal")),
33 |             Activation("linear"),
34 |             Dense(8, Parameters(init="normal")),
35 |             Activation("linear"),
36 |             Dense(1),
37 |         ],
38 |         loss="mse",
39 |         optimizer=Adam(),
40 |         metric="mse",
41 |         batch_size=64,
42 |         max_epochs=150,
43 |     )
44 |     model.fit(X_train, y_train)
45 |     predictions = model.predict(X_test)
46 |     assert mean_squared_error(y_test, predictions.flatten()) < 1.0
47 | 
48 | 
49 | def test_knn():
50 |     model = KNNRegressor(k=5)
51 |     model.fit(X_train, y_train)
52 |     predictions = model.predict(X_test)
53 |     assert mean_squared_error(y_test, predictions) < 10000
54 | 


--------------------------------------------------------------------------------
/mla/tsne.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import logging
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mla.base import BaseEstimator
  7 | from mla.metrics.distance import l2_distance
  8 | 
  9 | np.random.seed(999)
 10 | 
 11 | """
 12 | References:
 13 | https://lvdmaaten.github.io/tsne/
 14 | Based on:
 15 | https://lvdmaaten.github.io/tsne/code/tsne_python.zip
 16 | """
 17 | 
 18 | 
 19 | class TSNE(BaseEstimator):
 20 |     y_required = False
 21 | 
 22 |     def __init__(self, n_components=2, perplexity=30.0, max_iter=200, learning_rate=500):
 23 |         """A t-Distributed Stochastic Neighbor Embedding implementation.
 24 | 
 25 |         Parameters
 26 |         ----------
 27 |         max_iter : int, default 200
 28 |         perplexity : float, default 30.0
 29 |         n_components : int, default 2
 30 |         """
 31 |         self.max_iter = max_iter
 32 |         self.perplexity = perplexity
 33 |         self.n_components = n_components
 34 |         self.initial_momentum = 0.5
 35 |         self.final_momentum = 0.8
 36 |         self.min_gain = 0.01
 37 |         self.lr = learning_rate
 38 |         self.tol = 1e-5
 39 |         self.perplexity_tries = 50
 40 | 
 41 |     def fit_transform(self, X, y=None):
 42 |         self._setup_input(X, y)
 43 | 
 44 |         Y = np.random.randn(self.n_samples, self.n_components)
 45 |         velocity = np.zeros_like(Y)
 46 |         gains = np.ones_like(Y)
 47 | 
 48 |         P = self._get_pairwise_affinities(X)
 49 | 
 50 |         iter_num = 0
 51 |         while iter_num < self.max_iter:
 52 |             iter_num += 1
 53 | 
 54 |             D = l2_distance(Y)
 55 |             Q = self._q_distribution(D)
 56 | 
 57 |             # Normalizer q distribution
 58 |             Q_n = Q / np.sum(Q)
 59 | 
 60 |             # Early exaggeration & momentum
 61 |             pmul = 4.0 if iter_num < 100 else 1.0
 62 |             momentum = 0.5 if iter_num < 20 else 0.8
 63 | 
 64 |             # Perform gradient step
 65 |             grads = np.zeros(Y.shape)
 66 |             for i in range(self.n_samples):
 67 |                 grad = 4 * np.dot((pmul * P[i] - Q_n[i]) * Q[i], Y[i] - Y)
 68 |                 grads[i] = grad
 69 | 
 70 |             gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0))
 71 |             gains = gains.clip(min=self.min_gain)
 72 | 
 73 |             velocity = momentum * velocity - self.lr * (gains * grads)
 74 |             Y += velocity
 75 |             Y = Y - np.mean(Y, 0)
 76 | 
 77 |             error = np.sum(P * np.log(P / Q_n))
 78 |             logging.info("Iteration %s, error %s" % (iter_num, error))
 79 |         return Y
 80 | 
 81 |     def _get_pairwise_affinities(self, X):
 82 |         """Computes pairwise affinities."""
 83 |         affines = np.zeros((self.n_samples, self.n_samples), dtype=np.float32)
 84 |         target_entropy = np.log(self.perplexity)
 85 |         distances = l2_distance(X)
 86 | 
 87 |         for i in range(self.n_samples):
 88 |             affines[i, :] = self._binary_search(distances[i], target_entropy)
 89 | 
 90 |         # Fill diagonal with near zero value
 91 |         np.fill_diagonal(affines, 1.0e-12)
 92 | 
 93 |         affines = affines.clip(min=1e-100)
 94 |         affines = (affines + affines.T) / (2 * self.n_samples)
 95 |         return affines
 96 | 
 97 |     def _binary_search(self, dist, target_entropy):
 98 |         """Performs binary search to find suitable precision."""
 99 |         precision_min = 0
100 |         precision_max = 1.0e15
101 |         precision = 1.0e5
102 | 
103 |         for _ in range(self.perplexity_tries):
104 |             denom = np.sum(np.exp(-dist[dist > 0.0] / precision))
105 |             beta = np.exp(-dist / precision) / denom
106 | 
107 |             # Exclude zeros
108 |             g_beta = beta[beta > 0.0]
109 |             entropy = -np.sum(g_beta * np.log2(g_beta))
110 | 
111 |             error = entropy - target_entropy
112 | 
113 |             if error > 0:
114 |                 # Decrease precision
115 |                 precision_max = precision
116 |                 precision = (precision + precision_min) / 2.0
117 |             else:
118 |                 # Increase precision
119 |                 precision_min = precision
120 |                 precision = (precision + precision_max) / 2.0
121 | 
122 |             if np.abs(error) < self.tol:
123 |                 break
124 | 
125 |         return beta
126 | 
127 |     def _q_distribution(self, D):
128 |         """Computes Student t-distribution."""
129 |         Q = 1.0 / (1.0 + D)
130 |         np.fill_diagonal(Q, 0.0)
131 |         Q = Q.clip(min=1e-100)
132 |         return Q
133 | 


--------------------------------------------------------------------------------
/mla/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | 
3 | from .main import *
4 | 


--------------------------------------------------------------------------------
/mla/utils/main.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import numpy as np
 3 | 
 4 | 
 5 | def one_hot(y):
 6 |     n_values = np.max(y) + 1
 7 |     return np.eye(n_values)[y]
 8 | 
 9 | 
10 | def batch_iterator(X, batch_size=64):
11 |     """Splits X into equal sized chunks."""
12 |     n_samples = X.shape[0]
13 |     n_batches = n_samples // batch_size
14 |     batch_end = 0
15 | 
16 |     for b in range(n_batches):
17 |         batch_begin = b * batch_size
18 |         batch_end = batch_begin + batch_size
19 | 
20 |         X_batch = X[batch_begin:batch_end]
21 | 
22 |         yield X_batch
23 | 
24 |     if n_batches * batch_size < n_samples:
25 |         yield X[batch_end:]
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | matplotlib>=1.5.1
3 | numpy>=1.11.1
4 | scikit-learn>=0.18
5 | scipy>=0.18.0
6 | seaborn>=0.7.1
7 | autograd>=1.1.7
8 | gym
9 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [metadata]
5 | description-file=README.md
6 | 
7 | [flake8]
8 | max-line-length = 120
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from codecs import open
 3 | from os import path
 4 | 
 5 | __version__ = '0.0.1'
 6 | 
 7 | here = path.abspath(path.dirname(__file__))
 8 | 
 9 | # Get the long description from the README file
10 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
11 |     long_description = f.read()
12 | 
13 | # get the dependencies and installs
14 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f:
15 |     all_reqs = f.read().split('\n')
16 | 
17 | install_requires = [x.strip() for x in all_reqs if 'git+' not in x]
18 | dependency_links = [x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+')]
19 | 
20 | setup(
21 |     name='mla',
22 |     version=__version__,
23 |     description='A collection of minimal and clean implementations of machine learning algorithms.',
24 |     long_description=long_description,
25 |     url='https://github.com/rushter/mla',
26 |     download_url='https://github.com/rushter/mla/tarball/' + __version__,
27 |     license='MIT',
28 |     packages=find_packages(exclude=['docs', 'tests*']),
29 |     include_package_data=True,
30 |     author='Artem Golubin',
31 |     install_requires=install_requires,
32 |     setup_requires=['numpy>=1.10', 'scipy>=0.17'],
33 |     dependency_links=dependency_links,
34 |     author_email='gh@rushter.com'
35 | )
36 | 


--------------------------------------------------------------------------------