├── aorun ├── __init__.py ├── datasets.py ├── activations.py ├── initializers.py ├── utils.py ├── optimizers.py ├── losses.py ├── models.py └── layers.py ├── tests ├── __init__.py ├── context.py ├── test_datasets.py ├── test_initializers.py ├── test_utils.py ├── test_optimizers.py ├── test_activations.py ├── test_losses.py ├── test_layers.py └── test_models.py ├── .gitignore ├── setup.cfg ├── .travis.yml ├── setup.py ├── LICENSE ├── ROADMAP.md ├── examples ├── regression.py ├── classification.py ├── probabilistic_regression.py ├── mnist.py └── probabilistic_classification.py └── README.md /aorun/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .coverage 2 | .cache/ 3 | __pycache__/ 4 | build/ 5 | dist/ 6 | aorun.egg-info/ 7 | -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath('..')) 4 | 5 | import aorun 6 | -------------------------------------------------------------------------------- /aorun/datasets.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import utils 3 | 4 | 5 | def load_mnist(): 6 | url = 'https://s3.amazonaws.com/img-datasets/mnist.npz' 7 | filepath = utils.get_file(url, cache_subdir='datasets') 8 | d = np.load(filepath) 9 | return (d['x_train'], d['y_train']), (d['x_test'], d['y_test']) 10 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | from .context import aorun 2 | 3 | from aorun import datasets 4 | 5 | 6 | def test_mnist(): 7 | (X_train, y_train), (X_test, y_test) = datasets.load_mnist() 8 | 9 | assert len(X_train) == 60000 10 | assert len(y_train) == 60000 11 | assert len(X_test) == 10000 12 | assert len(y_test) == 10000 13 | -------------------------------------------------------------------------------- /aorun/activations.py: -------------------------------------------------------------------------------- 1 | from torch.nn.functional import relu, softmax 2 | 3 | 4 | def get(obj): 5 | if callable(obj): 6 | return obj 7 | elif type(obj) is str: 8 | if obj in globals(): 9 | return globals()[obj] 10 | else: 11 | raise Exception(f'Unknown activation: {obj}') 12 | else: 13 | raise Exception('Activation must be a callable or str') 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | addopts=--durations=10 3 | --cov-report term-missing 4 | --cov=aorun 5 | --pep8 6 | --rerun=5 7 | 8 | # PEP-8 The following are ignored: 9 | # E501 line too long (82 > 79 characters) 10 | # E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path 11 | # E731 do not assign a lambda expression, use a def 12 | # E701 multiple statements on one line (colon) 13 | 14 | pep8ignore=* E402 15 | -------------------------------------------------------------------------------- /aorun/initializers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn import Parameter 4 | 5 | 6 | def glorot_uniform(shape, in_units, out_units): 7 | limit = np.sqrt(6 / (in_units + out_units)) 8 | W = np.random.uniform(-limit, limit, size=shape).astype('float32') 9 | W = torch.from_numpy(W) 10 | return Parameter(W) 11 | 12 | 13 | def get(obj): 14 | if callable(obj): 15 | return obj 16 | elif type(obj) is str: 17 | if obj in globals(): 18 | return globals()[obj] 19 | else: 20 | raise Exception(f'Unknown initializer: {obj}') 21 | else: 22 | raise Exception('Initializer must be a callable or str') 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | 5 | install: 6 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 7 | - bash miniconda.sh -b -p $HOME/miniconda 8 | - export PATH="$HOME/miniconda/bin:$PATH" 9 | - conda config --set always_yes true 10 | - conda update --quiet conda 11 | - conda create --quiet --name tests python=$TRAVIS_PYTHON_VERSION 12 | - source activate tests 13 | - conda install requests numpy pytest pytest-cov pytest-pep8 14 | - pip install pytest-rerunfailures 15 | - conda install pytorch=0.1.10 torchvision -c soumith 16 | - pip install tqdm 17 | - pip install coveralls 18 | 19 | script: pytest 20 | 21 | after_success: coveralls 22 | -------------------------------------------------------------------------------- /tests/test_initializers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import math 5 | from torch import Tensor 6 | from aorun.layers import Dense 7 | from aorun import initializers 8 | 9 | 10 | def test_initializer_glorot_uniform(): 11 | X = Tensor([[10, 10, 10]]) 12 | input_dim = X.size()[-1] 13 | dense = Dense(10, init='glorot_uniform', input_dim=input_dim) 14 | 15 | assert dense.params[0].max() <= math.sqrt(6 / (input_dim + 10)) 16 | assert dense.params[1].max() <= math.sqrt(6 / (input_dim + 10)) 17 | assert dense.params[0].min() >= -math.sqrt(6 / (input_dim + 10)) 18 | assert dense.params[1].min() >= -math.sqrt(6 / (input_dim + 10)) 19 | 20 | 21 | def test_initializer_get(): 22 | with pytest.raises(Exception) as e: 23 | initializers.get('UNKNOWN_TEST') 24 | with pytest.raises(Exception) as e: 25 | initializers.get(123) 26 | 27 | def init_test(w, a, b): 28 | return w 29 | assert initializers.get(init_test) == init_test 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup( 6 | name='aorun', 7 | version='0.1', 8 | description='Deep Learning over PyTorch', 9 | author='Ramon Oliveira', 10 | author_email='ramon@roliveira.net', 11 | url='https://github.com/ramon-oliveira/aorun', 12 | license='MIT', 13 | keywords='neural-networks deep-learning pytorch', 14 | install_requires=[ 15 | 'numpy>=1.10', 16 | 'tqdm>=4.11', 17 | 'requests>=2.12', 18 | 'torch>=0.1.10', 19 | ], 20 | extra_requires={ 21 | 'tests': [ 22 | 'pytest', 23 | 'pytest-cov', 24 | 'pytest-pep8', 25 | ] 26 | }, 27 | packages=find_packages(), 28 | classifiers=[ 29 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 30 | 'License :: OSI Approved :: MIT License', 31 | 'Intended Audience :: Developers', 32 | 'Intended Audience :: Science/Research', 33 | 'Programming Language :: Python :: 3.6', 34 | 'Development Status :: 3 - Alpha', 35 | ] 36 | ) 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016, Ramon Oliveira. 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | ### Roadmap for v0.1.1: 2 | - Layers: 3 | - [ ] ProbabilisticConv2D 4 | - [x] Recurrent 5 | - [x] Dropout 6 | - Model: 7 | - [ ] Callbacks 8 | - [ ] remove model build or refactoring 9 | - Losses: 10 | - [ ] variational_loss refactoring 11 | - Documentation: 12 | - [ ] Introduction 13 | - [x] Install 14 | - [ ] Classes 15 | - Examples: 16 | - [ ] DQN 17 | 18 | ### Roadmap for v0.1: 19 | - Layers: 20 | - [x] Dense 21 | - [x] ProbabilisticDense 22 | - [x] Conv2D 23 | - Activations: 24 | - [x] Relu 25 | - [x] Softmax 26 | - Optimizers: 27 | - [x] SGD (with momentum) 28 | - [x] Adam 29 | - Losses: 30 | - [x] Mean Squared Error 31 | - [x] Binary Crossentropy 32 | - [x] Categorical Crossentropy 33 | - [x] Variational 34 | - Model fit: 35 | - [x] Training progress bar 36 | - [x] Validation split 37 | - Examples: 38 | - [x] MNIST 39 | - Package: 40 | - [x] Create setup.py 41 | - [x] Continuous integration (Travis-CI) 42 | - [x] Publish on pypi 43 | - README: 44 | - [x] Code coverage badge 45 | - [x] Build badge 46 | - [x] License badge 47 | - [x] Python version badge 48 | -------------------------------------------------------------------------------- /examples/regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath('..')) 4 | 5 | from sklearn import datasets 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn import metrics 9 | 10 | from aorun.models import Model 11 | from aorun.layers import Dense 12 | from aorun.layers import Activation 13 | from aorun.optimizers import SGD 14 | 15 | X, y = datasets.load_boston(return_X_y=True) 16 | X = StandardScaler().fit_transform(X).astype('float32') 17 | y = StandardScaler().fit_transform(y).astype('float32') 18 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 19 | 20 | model = Model( 21 | Dense(100, input_dim=X_train.shape[-1]), 22 | Activation('relu'), 23 | Dense(100), 24 | Activation('relu'), 25 | Dense(1) 26 | ) 27 | 28 | sgd = SGD(lr=0.1) 29 | history = model.fit(X_train, y_train, loss='mse', optimizer=sgd, epochs=100) 30 | y_pred = model.predict(X_test) 31 | print('r2_score:', metrics.r2_score(y_test, y_pred)) 32 | print('mean_absolute_error:', metrics.mean_absolute_error(y_test, y_pred)) 33 | print('mean_squared_error:', metrics.mean_squared_error(y_test, y_pred)) 34 | -------------------------------------------------------------------------------- /examples/classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath('..')) 4 | 5 | from sklearn import datasets 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn import metrics 9 | 10 | import numpy as np 11 | from aorun.models import Model 12 | from aorun.layers import Dense 13 | from aorun.layers import Activation 14 | 15 | X, y = datasets.load_digits(return_X_y=True) 16 | X = X.astype('float32') 17 | y = np.eye(y.max() + 1)[y].astype('float32') 18 | 19 | X = StandardScaler().fit_transform(X) 20 | 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 22 | print(X_train.shape, y_train.shape) 23 | 24 | model = Model( 25 | Dense(100, input_dim=X_train.shape[-1]), 26 | Activation('relu'), 27 | Dense(100), 28 | Activation('relu'), 29 | Dense(y_test.shape[-1]), 30 | Activation('softmax') 31 | ) 32 | 33 | loss = 'categorical_crossentropy' 34 | history = model.fit(X_train, y_train, loss=loss, val_split=0.1) 35 | 36 | y_pred = model.predict(X_test) 37 | acc = metrics.accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1)) 38 | print('Classes:', y.shape[1]) 39 | print('Accuracy:', acc) 40 | -------------------------------------------------------------------------------- /examples/probabilistic_regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath('..')) 4 | 5 | from sklearn import datasets 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn import metrics 9 | 10 | import torch 11 | from aorun.models import Model 12 | from aorun.layers import ProbabilisticDense 13 | from aorun.layers import Activation 14 | from aorun.optimizers import SGD 15 | from aorun.losses import variational_loss 16 | 17 | X, y = datasets.load_boston(return_X_y=True) 18 | X = X.astype('float32') 19 | y = y.astype('float32') 20 | X = StandardScaler().fit_transform(X) 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 22 | 23 | model = Model() 24 | model.add(ProbabilisticDense(100, input_dim=X_train.shape[-1])) 25 | model.add(Activation('relu')) 26 | model.add(ProbabilisticDense(100)) 27 | model.add(Activation('relu')) 28 | model.add(ProbabilisticDense(1)) 29 | 30 | sgd = SGD(lr=0.1) 31 | loss = variational_loss(model, 'mean_squared_error') 32 | history = model.fit(X_train, y_train, loss=loss, optimizer=sgd, epochs=100) 33 | y_pred = model.predict(X_test) 34 | print('r2_score:', metrics.r2_score(y_test, y_pred)) 35 | print('mean_absolute_error:', metrics.mean_absolute_error(y_test, y_pred)) 36 | print('mean_squared_error:', metrics.mean_squared_error(y_test, y_pred)) 37 | -------------------------------------------------------------------------------- /examples/mnist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath('..')) 4 | 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.preprocessing import StandardScaler 7 | from sklearn import metrics 8 | 9 | import numpy as np 10 | from aorun import datasets 11 | from aorun.models import Model 12 | from aorun.layers import Conv2D 13 | from aorun.layers import Dense 14 | from aorun.layers import Flatten 15 | from aorun.layers import Activation 16 | from aorun.layers import Dropout 17 | 18 | (X, y), (X_test, y_test) = datasets.load_mnist() 19 | X = X / 127.0 20 | X_test = X_test / 127.0 21 | y = np.eye(y.max() + 1)[y] 22 | y_test = np.eye(y_test.max() + 1)[y_test] 23 | print(X.shape, X_test.shape) 24 | 25 | X = X.astype('float32') 26 | X_test = X_test.astype('float32') 27 | y = y.astype('float32') 28 | y_test = y_test.astype('float32') 29 | 30 | model = Model( 31 | Conv2D(8, kernel_size=(3, 3), input_dim=X.shape[1:]), 32 | Flatten(), 33 | Activation('relu'), 34 | Dropout(0.5), 35 | Dense(100), 36 | Activation('relu'), 37 | Dropout(0.5), 38 | Dense(y_test.shape[-1]), 39 | Activation('softmax') 40 | ) 41 | 42 | loss = 'categorical_crossentropy' 43 | history = model.fit(X, y, loss=loss, val_data=(X_test, y_test)) 44 | 45 | y_pred = model.predict(X_test) 46 | acc = metrics.accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1)) 47 | print('Classes:', y.shape[1]) 48 | print('Accuracy:', acc) 49 | -------------------------------------------------------------------------------- /examples/probabilistic_classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath('..')) 4 | import functools 5 | from sklearn import datasets 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn import metrics 9 | 10 | import numpy as np 11 | from aorun.models import Model 12 | from aorun.layers import ProbabilisticDense 13 | from aorun.layers import Activation 14 | from aorun.optimizers import SGD 15 | from aorun.losses import variational_loss 16 | 17 | X, y = datasets.load_digits(return_X_y=True) 18 | X = X.astype('float32') 19 | y = np.eye(y.max() + 1)[y].astype('float32') 20 | X = StandardScaler().fit_transform(X) 21 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 22 | print(X_train.shape, y_train.shape) 23 | 24 | model = Model( 25 | ProbabilisticDense(200, input_dim=X_train.shape[-1]), 26 | Activation('relu'), 27 | ProbabilisticDense(200), 28 | Activation('relu'), 29 | ProbabilisticDense(y_test.shape[-1]), 30 | Activation('softmax') 31 | ) 32 | 33 | opt = SGD(lr=0.1, momentum=0.9) 34 | loss = variational_loss(model, 'categorical_crossentropy') 35 | history = model.fit(X_train, y_train, epochs=20, loss=loss, optimizer=opt) 36 | 37 | y_pred = model.predict(X_test) 38 | acc = metrics.accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1)) 39 | print('test samples:', len(y_test)) 40 | print('classes:', len(y_test[0])) 41 | print('Accuracy:', acc) 42 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import numpy as np 5 | from torch import Tensor 6 | from torch.autograd import Variable 7 | 8 | from aorun import utils 9 | 10 | 11 | def test_to_numpy(): 12 | a = Tensor(10) 13 | a = utils.to_numpy(a) 14 | assert type(a) is np.ndarray 15 | 16 | a = Variable(Tensor(10)) 17 | a = utils.to_numpy(a) 18 | assert type(a) is np.ndarray 19 | 20 | a = np.array([10]) 21 | a = utils.to_numpy(a) 22 | assert type(a) is np.ndarray 23 | 24 | with pytest.raises(ValueError) as e: 25 | a = 'hahaha' 26 | utils.to_numpy(a) 27 | 28 | 29 | def test_to_tensor(): 30 | a = Tensor(10) 31 | a = utils.to_tensor(a) 32 | assert type(a) is Tensor 33 | 34 | a = Variable(Tensor(10)) 35 | a = utils.to_tensor(a) 36 | assert type(a) is Variable 37 | 38 | a = np.array([10.0], dtype='float32') 39 | a = utils.to_tensor(a) 40 | assert type(a) is Tensor 41 | 42 | with pytest.raises(ValueError) as e: 43 | a = 'hahaha' 44 | utils.to_tensor(a) 45 | 46 | 47 | def test_to_variable(): 48 | a = Tensor(10) 49 | a = utils.to_variable(a) 50 | assert type(a) is Variable 51 | 52 | a = Variable(Tensor(10)) 53 | a = utils.to_variable(a) 54 | assert type(a) is Variable 55 | 56 | a = np.array([10.0], dtype='float32') 57 | a = utils.to_variable(a) 58 | assert type(a) is Variable 59 | 60 | with pytest.raises(ValueError) as e: 61 | a = 'hahaha' 62 | utils.to_variable(a) 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aorun: Deep Learning over PyTorch 2 | 3 | [![build](https://travis-ci.org/ramon-oliveira/aorun.svg?branch=master)](https://travis-ci.org/ramon-oliveira/aorun) 4 | [![coverage](https://coveralls.io/repos/github/ramon-oliveira/aorun/badge.svg)](https://coveralls.io/github/ramon-oliveira/aorun) 5 | [![Code Climate](https://codeclimate.com/github/ramon-oliveira/aorun/badges/gpa.svg)](https://codeclimate.com/github/ramon-oliveira/aorun) 6 | [![python](https://img.shields.io/pypi/pyversions/aorun.svg)](https://pypi.python.org/pypi/aorun) 7 | [![license](https://img.shields.io/github/license/ramon-oliveira/aorun.svg)](https://github.com/ramon-oliveira/aorun/blob/master/LICENSE) 8 | 9 | Aorun intend to implement an API similar to [Keras](https://keras.io) with PyTorch as backend. 10 | 11 | ## Getting started 12 | 13 | Here is a simple regression example: 14 | 15 | ```python 16 | from aorun.models import Model 17 | from aorun.layers import Dense, Activation 18 | 19 | model = Model() 20 | model.add(Dense(10, input_dim=3)) 21 | model.add(Activation('relu')) 22 | model.add(Dense(1)) 23 | 24 | model.fit(X_train, y_train, loss='mse', optimizer='adam') 25 | 26 | y_pred = model.predict(X_test) 27 | ``` 28 | 29 | ## Install 30 | 31 | First of all, it's import to mention that this project is develop with **Python 3.5+** in mind. I do not recommend using Aorun with older versions. 32 | 33 | As prerequisite, you have to install the latest stable version of [PyTorch](http://pytorch.org) 34 | 35 | Then you can install aorun as any other python package, with pip: 36 | ```bash 37 | $ pip install aorun 38 | ``` 39 | 40 | ## Why Aorun? 41 | 42 | [Aorun](https://en.wikipedia.org/wiki/Aorun) is a Dinosaur. Dinosaurs are cool. 43 | -------------------------------------------------------------------------------- /tests/test_optimizers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import torch 5 | from torch.autograd import Variable 6 | from torch.nn import Parameter 7 | from aorun.losses import mean_squared_error 8 | from aorun import optimizers 9 | from aorun.optimizers import SGD 10 | from aorun.optimizers import Adam 11 | 12 | 13 | def test_optmizers_get(): 14 | with pytest.raises(Exception) as e: 15 | optimizers.get('UNKNOWN_TEST') 16 | 17 | with pytest.raises(Exception) as e: 18 | optimizers.get(123123) 19 | 20 | 21 | def test_sgd_without_params(): 22 | opt = optimizers.get('sgd') 23 | with pytest.raises(Exception) as e: 24 | opt.step() 25 | 26 | 27 | def test_sgd_learning_rate(): 28 | X = Variable(torch.rand(5, 3)) 29 | y = Variable(torch.rand(5)) 30 | w = Variable(torch.rand(3), requires_grad=True) 31 | opt = SGD(lr=0.1, params=[w]) 32 | 33 | o = X @ w 34 | loss = mean_squared_error(y, o) 35 | loss.backward() 36 | opt.step() 37 | 38 | assert loss > mean_squared_error(y, X @ w) 39 | 40 | 41 | def test_sgd_momentum(): 42 | X = Variable(torch.rand(5, 3)) 43 | y = Variable(torch.rand(5)) 44 | w = Parameter(torch.rand(3)) 45 | opt = SGD(lr=0.1, momentum=0.99, params=[w]) 46 | 47 | o = X @ w 48 | loss = mean_squared_error(y, o) 49 | loss.backward() 50 | opt.step() 51 | 52 | assert loss.data[0] > mean_squared_error(y, X @ w).data[0] 53 | 54 | 55 | def test_adam(): 56 | X = Variable(torch.rand(5, 3)) 57 | y = Variable(torch.rand(5)) 58 | w = Parameter(torch.rand(3)) 59 | opt = Adam(params=[w]) 60 | 61 | o = X @ w 62 | loss = mean_squared_error(y, o) 63 | loss.backward() 64 | opt.step() 65 | 66 | assert loss.data[0] > mean_squared_error(y, X @ w).data[0] 67 | -------------------------------------------------------------------------------- /aorun/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import torch 4 | import requests 5 | from torch import Tensor 6 | from torch.autograd import Variable 7 | import numpy as np 8 | from functools import wraps 9 | 10 | 11 | def shuffle_arrays(arrays): 12 | idxs = np.arange(len(arrays[0])) 13 | np.random.shuffle(idxs) 14 | return [a[idxs] for a in arrays] 15 | 16 | 17 | def split_arrays(arrays, proportion): 18 | """ 19 | proportion will be in the last part 20 | examples: 21 | proportion = 0.7 22 | [30%] | [70%] 23 | proportion = 0.3 24 | [70%] | [30%] 25 | """ 26 | proportion = 1 - proportion 27 | split = int(len(arrays[0]) * proportion) 28 | return [(a[:split], a[split:]) for a in arrays] 29 | 30 | 31 | def to_tensor(a): 32 | if type(a) is np.ndarray: 33 | return torch.from_numpy(a) 34 | elif type(a) is Tensor or type(a) is Variable: 35 | return a 36 | else: 37 | raise ValueError('Unknown value type: {0}'.format(type(a))) 38 | 39 | 40 | def to_variable(a): 41 | a = to_tensor(a) 42 | if type(a) is Variable: 43 | return a 44 | else: 45 | return Variable(a) 46 | 47 | 48 | def to_numpy(a): 49 | if type(a) is Tensor: 50 | return a.numpy() 51 | elif type(a) is Variable: 52 | return a.data.numpy() 53 | elif type(a) is np.ndarray: 54 | return a 55 | else: 56 | raise ValueError('Unknown value type: {0}'.format(type(a))) 57 | 58 | 59 | def get_file(url, cache_subdir): 60 | path = os.path.expanduser(os.path.join('~/.aorun', cache_subdir)) 61 | os.makedirs(path, exist_ok=True) 62 | filepath = os.path.join(path, url.split('/')[-1]) 63 | if not os.path.exists(filepath): 64 | r = requests.get(url, stream=True) 65 | with open(filepath, 'wb') as f: 66 | for chunk in tqdm.tqdm(r.iter_content(chunk_size=1024)): 67 | if chunk: 68 | f.write(chunk) 69 | return filepath 70 | -------------------------------------------------------------------------------- /tests/test_activations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | from aorun import activations 5 | from aorun.activations import relu 6 | from aorun.activations import softmax 7 | from aorun.layers import Activation 8 | 9 | import numpy as np 10 | import torch 11 | from torch.autograd import Variable 12 | 13 | 14 | def test_get_unknown(): 15 | with pytest.raises(Exception) as e: 16 | activations.get('UNKNOWN_TEST') 17 | 18 | with pytest.raises(Exception) as e: 19 | activations.get(23452345) 20 | 21 | 22 | def test_custom_activation(): 23 | def custom(x): 24 | return x 25 | 26 | activation = activations.get(custom) 27 | x = torch.randn(10) 28 | assert torch.equal(x, custom(x)) 29 | 30 | 31 | def test_relu(): 32 | x = Variable(torch.randn(10, 10)) 33 | assert np.any(x.data.numpy() < 0.0) 34 | assert np.any(relu(x).data.numpy() >= 0.0) 35 | 36 | 37 | def test_get_relu(): 38 | x = Variable(torch.randn(10, 10)) 39 | assert np.any(x.data.numpy() < 0.0) 40 | assert np.any(activations.get('relu')(x).data.numpy() >= 0.0) 41 | 42 | 43 | def test_layer_relu(): 44 | x = Variable(torch.randn(10, 10)) 45 | l = Activation('relu') 46 | assert np.any(x.data.numpy() < 0.0) 47 | assert np.any(l.forward(x).data.numpy() >= 0.0) 48 | 49 | 50 | def test_softmax(): 51 | x = Variable(torch.randn(10, 10)) 52 | sum_softmax_x = torch.sum(softmax(x), dim=1).data.numpy() 53 | assert np.all(np.abs(sum_softmax_x - 1) <= 1e-6) 54 | 55 | 56 | def test_get_softmax(): 57 | x = Variable(torch.randn(10, 10)) 58 | softmax = activations.get('softmax') 59 | sum_softmax_x = torch.sum(softmax(x), dim=1).data.numpy() 60 | assert np.all(np.abs(sum_softmax_x - 1) <= 1e-6) 61 | 62 | 63 | def test_layer_softmax(): 64 | x = Variable(torch.randn(10, 10)) 65 | l = Activation('softmax') 66 | sum_softmax_x = torch.sum(l.forward(x), dim=1).data.numpy() 67 | assert np.all(np.abs(sum_softmax_x - 1) <= 1e-6) 68 | -------------------------------------------------------------------------------- /tests/test_losses.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import Variable 7 | from torch.nn import Parameter 8 | import functools 9 | from aorun.models import Model 10 | from aorun.layers import ProbabilisticDense 11 | from aorun.optimizers import SGD 12 | from aorun.losses import mean_squared_error 13 | from aorun.losses import binary_crossentropy 14 | from aorun.losses import categorical_crossentropy 15 | from aorun.losses import log_gaussian 16 | from aorun.losses import variational_loss 17 | from aorun import losses 18 | 19 | 20 | def test_get_unknown(): 21 | with pytest.raises(Exception) as e: 22 | losses.get('UNKNOWN_TEST') 23 | 24 | with pytest.raises(Exception) as e: 25 | losses.get(23452345) 26 | 27 | 28 | def test_mse_variable(): 29 | true = Variable(torch.Tensor([11, 11])) 30 | pred = Variable(torch.Tensor([10, 10])) 31 | 32 | loss = mean_squared_error(true, pred) 33 | 34 | assert type(loss) is Variable 35 | assert loss == 1 36 | 37 | 38 | def test_mse_tensor(): 39 | true = torch.Tensor([11, 11]) 40 | pred = torch.Tensor([10, 10]) 41 | loss = mean_squared_error(true, pred) 42 | assert loss == 1 43 | 44 | true = torch.Tensor([10, 10]) 45 | pred = torch.Tensor([10, 10]) 46 | loss = mean_squared_error(true, pred) 47 | assert loss == 0 48 | 49 | 50 | def test_binary_crossentropy(): 51 | true = torch.Tensor([1, 1, 1]) 52 | 53 | loss = binary_crossentropy(true, true) 54 | assert loss == 0 55 | 56 | pred = torch.Tensor([[0, 0, 0]]) 57 | loss = binary_crossentropy(true, pred) 58 | assert loss > 1 59 | 60 | 61 | def test_categorical_crossentropy(): 62 | true = torch.Tensor([[1, 0], [0, 1], [1, 0]]) 63 | 64 | loss = categorical_crossentropy(true, true) 65 | assert loss == 0 66 | 67 | pred = torch.Tensor([[0, 1], [0, 1], [1, 0]]) 68 | loss = categorical_crossentropy(true, pred) 69 | assert loss > 1 70 | 71 | 72 | def test_log_gaussian(): 73 | x = Variable(torch.Tensor([1, 1])) 74 | mu = Parameter(torch.Tensor([5, 3])) 75 | sigma = Parameter(torch.Tensor([4, 5])) 76 | loss = log_gaussian(x, mu, sigma) 77 | loss.mean().backward() 78 | assert np.all(loss.data.numpy() < 0) 79 | 80 | 81 | def test_variational_loss(): 82 | X = torch.randn(4, 4) 83 | y = torch.Tensor([[0, 1], [1, 0], [0, 1], [1, 0]]) 84 | 85 | model = Model( 86 | ProbabilisticDense(10, input_dim=4), 87 | ProbabilisticDense(2) 88 | ) 89 | 90 | opt = SGD(lr=0.01) 91 | variational = variational_loss(model, 'categorical_crossentropy') 92 | history = model.fit(X, y, loss=variational, optimizer=opt) 93 | assert history['loss'] == sorted(history['loss'], reverse=True) 94 | -------------------------------------------------------------------------------- /aorun/optimizers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | 5 | class Optimizer(object): 6 | 7 | def __init__(self, params=None, decay=0.0, epsilon=1e-8): 8 | self.params = params 9 | self.decay = decay 10 | self.epsilon = epsilon 11 | 12 | def clear_gradients(self): 13 | if self.params is not None: 14 | for p in self.params: 15 | p.grad.data.zero_() 16 | 17 | def step(self): 18 | if self.params is None: 19 | raise Exception('None parameters') 20 | 21 | 22 | class SGD(Optimizer): 23 | 24 | def __init__(self, lr=0.001, momentum=0.0, *args, **kwargs): 25 | super(SGD, self).__init__(*args, **kwargs) 26 | self.lr = lr 27 | self.momentum = momentum 28 | self.updates = [] 29 | 30 | def step(self): 31 | super(SGD, self).step() 32 | 33 | if len(self.updates) == 0: 34 | for p in self.params: 35 | update = torch.zeros(p.size()) 36 | self.updates.append(update) 37 | 38 | for i, (p, update) in enumerate(zip(self.params, self.updates)): 39 | cur_update = self.momentum * update + self.lr * p.grad.data 40 | p.data.sub_(cur_update) 41 | self.updates[i] = cur_update 42 | 43 | self.lr = max(1e-9, self.lr - self.decay) 44 | self.clear_gradients() 45 | 46 | 47 | class Adam(Optimizer): 48 | 49 | def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, *args, **kwargs): 50 | super(Adam, self).__init__(*args, **kwargs) 51 | self.lr = lr 52 | self.beta1 = beta1 53 | self.beta2 = beta2 54 | # average gradients 55 | self.m = {} 56 | # average gradients**2 57 | self.v = {} 58 | # timestep 59 | self.t = 0 60 | 61 | def step(self): 62 | super(Adam, self).step() 63 | self.t += 1 64 | 65 | if len(self.m) == 0: 66 | for p in self.params: 67 | self.m[p] = torch.zeros(p.size()) 68 | self.v[p] = torch.zeros(p.size()) 69 | 70 | for p in self.params: 71 | mt = self.beta1 * self.m[p] + (1 - self.beta1) * p.grad.data 72 | vt = self.beta2 * self.v[p] + (1 - self.beta2) * p.grad.data**2 73 | m = mt / (1 - self.beta1**self.t) 74 | v = vt / (1 - self.beta2**self.t) 75 | 76 | rate = self.lr / (torch.sqrt(v) + self.epsilon) 77 | p.data.sub_(rate * m) 78 | 79 | self.m[p] = mt 80 | self.v[p] = vt 81 | 82 | self.clear_gradients() 83 | 84 | # Alias 85 | sgd = SGD 86 | adam = Adam 87 | 88 | 89 | def get(obj): 90 | if hasattr(obj, 'step'): 91 | return obj 92 | elif type(obj) is str: 93 | if obj in globals(): 94 | return globals()[obj]() 95 | else: 96 | raise Exception(f'Unknown optmizer: {obj}') 97 | else: 98 | raise Exception('Optimizer must be a callable or str') 99 | -------------------------------------------------------------------------------- /aorun/losses.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import Tensor 4 | from torch.autograd import Variable 5 | from .layers import ProbabilisticDense 6 | 7 | 8 | def log_gaussian(x, mu, sigma): 9 | assert x.size() == mu.size() == sigma.size() 10 | 11 | log_sigma = torch.log(sigma) 12 | # log(2 * pi) == 1.8378770664093453 13 | log2pi_2 = Variable(Tensor([1.8378770664093453 / 2])) 14 | log2pi_2 = log2pi_2.expand_as(mu) 15 | 16 | return -log_sigma - log2pi_2 - (x - mu)**2 / (2 * sigma**2) 17 | 18 | 19 | def mean_squared_error(true, pred): 20 | return ((true - pred)**2).mean() 21 | 22 | 23 | def binary_crossentropy(true, pred, eps=1e-9): 24 | p1 = true * torch.log(pred + eps) 25 | p2 = (1 - true) * torch.log(1 - pred + eps) 26 | return torch.mean(-(p1 + p2)) 27 | 28 | 29 | def categorical_crossentropy(true, pred, eps=1e-9): 30 | return torch.mean(-torch.sum(true * torch.log(pred + eps), dim=1)) 31 | 32 | 33 | def variational_loss(model, negative_log_likelihood): 34 | negative_log_likelihood = get(negative_log_likelihood) 35 | prior_ratio = 0.5 36 | prior_mu = Variable(Tensor([0.0])) 37 | prior_sigma1 = Variable(Tensor([1.0])) 38 | prior_sigma2 = Variable(Tensor([0.5])) 39 | 40 | def loss(true, pred): 41 | log_p = Variable(torch.Tensor([0.0])) 42 | log_q = Variable(torch.Tensor([0.0])) 43 | for layer in model.layers: 44 | if type(layer) is ProbabilisticDense: 45 | # prior 46 | mu = prior_mu.expand_as(layer.W) 47 | sigma1 = prior_sigma1.expand_as(layer.W) 48 | sigma2 = prior_sigma2.expand_as(layer.W) 49 | p1 = prior_ratio * log_gaussian(layer.W, mu, sigma1) 50 | p2 = (1 - prior_ratio) * log_gaussian(layer.W, mu, sigma2) 51 | log_p += torch.sum(p1 + p2) 52 | 53 | mu = prior_mu.expand_as(layer.b) 54 | sigma1 = prior_sigma1.expand_as(layer.b) 55 | sigma2 = prior_sigma2.expand_as(layer.b) 56 | p1 = prior_ratio * log_gaussian(layer.b, mu, sigma1) 57 | p2 = (1 - prior_ratio) * log_gaussian(layer.b, mu, sigma2) 58 | log_p += torch.sum(p1 + p2) 59 | 60 | # posterior 61 | sigma = torch.log1p(torch.exp(layer.W_rho)) 62 | log_q += log_gaussian(layer.W, layer.W_mu, sigma).sum() 63 | sigma = torch.log1p(torch.exp(layer.b_rho)) 64 | log_q += log_gaussian(layer.b, layer.b_mu, sigma).sum() 65 | 66 | ll = -negative_log_likelihood(true, pred) 67 | return ((log_q - log_p) / model.batches - ll) / model.batch_size 68 | return loss 69 | 70 | # aliases short names 71 | mse = mean_squared_error 72 | 73 | 74 | def get(obj): 75 | if callable(obj): 76 | return obj 77 | elif type(obj) is str: 78 | if obj in globals(): 79 | return globals()[obj] 80 | else: 81 | raise Exception(f'Unknown loss: {obj}') 82 | else: 83 | raise Exception('Loss must be a callable or str') 84 | -------------------------------------------------------------------------------- /tests/test_layers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import Variable 7 | from aorun.layers import Dense 8 | from aorun.layers import ProbabilisticDense 9 | from aorun.layers import Conv2D 10 | from aorun.layers import Activation 11 | from aorun.layers import Dropout 12 | from aorun.layers import Recurrent 13 | 14 | 15 | def test_dense_layer_output_dim(): 16 | l = Dense(5, input_dim=10) 17 | 18 | assert l.output_dim == 5 19 | 20 | 21 | def test_dense_layer_forward(): 22 | x = torch.randn(2, 10) 23 | l = Dense(5, input_dim=10) 24 | y = l.forward(x) 25 | 26 | assert y.size() == (2, 5) 27 | 28 | 29 | def test_dense_multiple_layers(): 30 | x = torch.randn(2, 10) 31 | l1 = Dense(5, input_dim=10) 32 | l2 = Dense(3, input_dim=5) 33 | 34 | y = l1.forward(x) 35 | assert y.size() == (2, 5) 36 | 37 | y = l2.forward(y) 38 | assert y.size() == (2, 3) 39 | 40 | 41 | def test_relu_output_size(): 42 | x = torch.randn(2, 2) 43 | l1 = Dense(3, input_dim=2) 44 | l2 = Activation('relu') 45 | 46 | y = l1.forward(x) 47 | y = l2.forward(y) 48 | 49 | assert y.size() == (2, 3) 50 | assert (y.data >= 0).sum() == 6 51 | 52 | 53 | def test_layer_get_params(): 54 | l = Dense(3, input_dim=3) 55 | assert len(l.params) == 2 56 | 57 | l = Activation('relu') 58 | assert len(l.params) == 0 59 | 60 | 61 | def test_layer_probabilistic_dense(): 62 | x = torch.randn(2, 10) 63 | l = ProbabilisticDense(5, input_dim=10) 64 | 65 | y1 = l.forward(x) 66 | assert y1.size() == (2, 5) 67 | 68 | y2 = l.forward(x) 69 | assert y2.size() == (2, 5) 70 | assert not torch.equal(y1.data, y2.data) 71 | 72 | 73 | def test_layer_probabilistic_dense_build(): 74 | x = torch.randn(2, 10) 75 | l = ProbabilisticDense(5) 76 | l.build(10) 77 | 78 | y1 = l.forward(x) 79 | assert y1.size() == (2, 5) 80 | 81 | y2 = l.forward(x) 82 | assert y2.size() == (2, 5) 83 | assert not torch.equal(y1.data, y2.data) 84 | 85 | 86 | def test_layer_conv2d(): 87 | x = torch.randn(2, 9, 9) 88 | layer = Conv2D(64, (3, 3), input_dim=[9, 9]) 89 | 90 | y1 = layer.forward(x) 91 | assert y1.size() == (2, 64, 7, 7) 92 | 93 | 94 | def test_layer_conv2d_params(): 95 | x = torch.randn(2, 3, 9, 9) 96 | layer = Conv2D(64, (3, 3), input_dim=[3, 9, 9]) 97 | 98 | assert len(layer.params) == 2 99 | 100 | 101 | def test_layer_dropout(): 102 | before = torch.ones(2, 3, 9, 9) 103 | layer = Dropout(p=0.6, input_dim=before.size()[1:]) 104 | after = layer.forward(before).data.numpy() 105 | assert np.sum(after == 0) <= np.prod(before.size()) / 2 106 | 107 | before = Variable(torch.ones(2, 3, 9, 9), requires_grad=True) 108 | layer = Dropout(p=0.6, input_dim=before.size()[1:]) 109 | after = layer.forward(before) 110 | loss = torch.mean(after) 111 | loss.backward() 112 | after = after.data.numpy() 113 | assert np.sum(after == 0) <= np.prod(before.size()) / 2 114 | 115 | 116 | def test_layer_recurrent(): 117 | X = Variable(torch.ones(2, 10, 3)) 118 | layer = Recurrent(units=20, length=10, input_dim=3) 119 | y1 = layer.forward(X) 120 | y2 = layer.forward(X) 121 | assert y1.size() == (2, 10, 20) 122 | assert torch.equal(y1.data, y2.data) 123 | 124 | 125 | def test_layer_recurrent_stateful(): 126 | X = Variable(torch.ones(2, 10, 3)) 127 | layer = Recurrent(units=20, length=10, input_dim=3, stateful=True) 128 | y1 = layer.forward(X) 129 | y2 = layer.forward(X) 130 | assert y1.size() == (2, 10, 20) 131 | assert not torch.equal(y1.data, y2.data) 132 | 133 | 134 | def test_layer_recurrent_stateful_clear_states(): 135 | X = Variable(torch.ones(2, 10, 3)) 136 | layer = Recurrent(units=20, length=10, input_dim=3, stateful=True) 137 | y1 = layer.forward(X) 138 | layer.clear_states() 139 | y2 = layer.forward(X) 140 | assert y1.size() == (2, 10, 20) 141 | assert torch.equal(y1.data, y2.data) 142 | -------------------------------------------------------------------------------- /aorun/models.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from tqdm import tqdm, trange 3 | import numpy as np 4 | import torch 5 | from torch.nn import MSELoss 6 | from torch.autograd import Variable 7 | from torch.optim import SGD 8 | 9 | from . import losses 10 | from . import optimizers 11 | from . import utils 12 | from .layers import Dropout 13 | 14 | 15 | class Model(object): 16 | 17 | def __init__(self, *layers): 18 | self.layers = list(layers) 19 | self.ready = False 20 | self.train = False 21 | 22 | @property 23 | def params(self): 24 | if not self.ready: 25 | self._build() 26 | return [p for layer in self.layers for p in layer.params] 27 | 28 | def _build(self): 29 | assert not self.ready 30 | for prev_layer, next_layer in zip(self.layers[:-1], self.layers[1:]): 31 | next_layer.build(prev_layer.output_dim) 32 | self.ready = True 33 | 34 | def add(self, layer): 35 | self.layers.append(layer) 36 | self.ready = False 37 | 38 | def evaluate(self, X, y, metric, batch_size=32): 39 | n_samples, *_ = X.size() 40 | begin = min(batch_size, n_samples) 41 | end = n_samples + (n_samples % batch_size) + 1 42 | metric_sum = 0 43 | for split in range(begin, end, batch_size): 44 | X_batch = Variable(X[(split - batch_size):split], volatile=True) 45 | y_batch = Variable(y[(split - batch_size):split], volatile=True) 46 | 47 | out_batch = self.predict(X_batch) 48 | value = metric(y_batch, out_batch) 49 | metric_sum += value.data[0] 50 | 51 | return metric_sum / max(1, (end // batch_size)) 52 | 53 | def forward(self, X): 54 | if not self.ready: 55 | self._build() 56 | y = self.layers[0].forward(X) 57 | for layer in self.layers[1:]: 58 | y = layer.forward(y) 59 | return y 60 | 61 | def fit(self, X, y, loss, optimizer='adam', batch_size=32, epochs=10, 62 | shuffle=True, val_split=0.0, val_data=None, verbose=2): 63 | """ 64 | verbose: 0, 1 or 2 65 | - 0 total silence 66 | - 1 only a progress bar for all training 67 | - 2 one progress bar for each epoch 68 | """ 69 | X = utils.to_numpy(X) 70 | y = utils.to_numpy(y) 71 | 72 | if shuffle: 73 | X, y = utils.shuffle_arrays([X, y]) 74 | 75 | if val_data is not None: 76 | X_val, y_val = val_data 77 | elif val_split > 0.0: 78 | (X, X_val), (y, y_val) = utils.split_arrays([X, y], val_split) 79 | else: 80 | X_val, y_val = None, None 81 | 82 | X = utils.to_tensor(X) 83 | y = utils.to_tensor(y) 84 | if X_val is not None: 85 | X_val = utils.to_tensor(X_val) 86 | y_val = utils.to_tensor(y_val) 87 | 88 | # create layers params if not created already 89 | if not self.ready: 90 | self._build() 91 | 92 | # set model params 93 | loss = losses.get(loss) 94 | optimizer = optimizers.get(optimizer) 95 | optimizer.params = self.params 96 | n_samples, *_ = X.size() 97 | batches = n_samples // batch_size + min(n_samples % batch_size, 1) 98 | self.loss = loss 99 | self.optimizer = optimizer 100 | self.batches = batches # used by optmizers 101 | self.batch_size = batch_size 102 | 103 | progress_bar = None 104 | epochs_iterator = range(epochs) 105 | if verbose == 1: 106 | epochs_iterator = trange(epochs, desc=f'Epoch 0') 107 | progress_bar = epochs_iterator 108 | 109 | # batches 110 | begin = min(batch_size, n_samples) 111 | end = n_samples + (n_samples % batch_size) + 1 112 | step = batch_size 113 | history = defaultdict(list) 114 | for epoch in epochs_iterator: 115 | epoch_iterator = range(begin, end, step) 116 | if verbose == 2: 117 | progress_bar = tqdm(epoch_iterator) 118 | if progress_bar is not None: 119 | progress_bar.set_description(f'Epoch {epoch+1}') 120 | 121 | loss_sum = 0 122 | for batch, split in enumerate(epoch_iterator, start=1): 123 | X_batch = Variable(X[(split - batch_size):split]) 124 | y_batch = Variable(y[(split - batch_size):split]) 125 | 126 | out_batch = self.forward(X_batch) 127 | loss_value = loss(y_batch, out_batch) 128 | loss_value.backward() 129 | optimizer.step() 130 | loss_sum += loss_value.data[0] 131 | if progress_bar is not None: 132 | progress_bar.set_postfix(loss=f'{loss_sum/batch:.4f}') 133 | progress_bar.update() 134 | 135 | history['loss'].append(loss_sum / batches) 136 | if X_val is not None: 137 | val_loss = self.evaluate(X_val, y_val, loss, batch_size) 138 | history['val_loss'].append(val_loss) 139 | progress_bar.set_postfix(loss=f'{loss_sum/batches:.4f}', 140 | val_loss=f'{val_loss:.4f}') 141 | progress_bar.refresh() 142 | 143 | if verbose == 2: 144 | progress_bar.close() 145 | 146 | return history 147 | 148 | def predict(self, X, propabilistic=False): 149 | if not self.ready: 150 | self._build() 151 | return_np = False 152 | if type(X) is np.ndarray: 153 | return_np = True 154 | y = self.layers[0].forward(X) 155 | for layer in self.layers[1:]: 156 | if propabilistic or type(layer) is not Dropout: 157 | y = layer.forward(y) 158 | if return_np: 159 | return utils.to_numpy(y) 160 | else: 161 | return y 162 | -------------------------------------------------------------------------------- /aorun/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | from torch.nn import Parameter 6 | from torch.nn import Conv2d as TorchConv2D 7 | from torch.nn import RNN as TorchRecurrent 8 | from torch.nn import Linear as TorchDense 9 | from . import activations 10 | from . import initializers 11 | from . import utils 12 | 13 | 14 | class Layer(object): 15 | 16 | def __init__(self, input_dim=None, init='glorot_uniform'): 17 | self.input_dim = input_dim 18 | self.init = initializers.get(init) 19 | 20 | def forward(self, X): 21 | X = utils.to_variable(X) 22 | return X 23 | 24 | def build(self, input_dim): 25 | self.output_dim = input_dim 26 | 27 | @property 28 | def params(self): 29 | return tuple() 30 | 31 | 32 | class Activation(Layer): 33 | 34 | def __init__(self, activation, *args, **kwargs): 35 | super(Activation, self).__init__(*args, **kwargs) 36 | self.activation = activations.get(activation) 37 | 38 | def forward(self, X): 39 | X = super(Activation, self).forward(X) 40 | return self.activation(X) 41 | 42 | 43 | class Dense(Layer): 44 | 45 | def __init__(self, units, *args, **kwargs): 46 | super(Dense, self).__init__(*args, **kwargs) 47 | self.units = units 48 | self.output_dim = units 49 | if self.input_dim: 50 | self.build(self.input_dim) 51 | 52 | @property 53 | def params(self): 54 | return list(self.layer.parameters()) 55 | 56 | def build(self, input_dim): 57 | assert type(input_dim) is int 58 | self.input_dim = input_dim 59 | self.layer = TorchDense(self.input_dim, self.units) 60 | 61 | def forward(self, X): 62 | X = super(Dense, self).forward(X) 63 | return self.layer.forward(X) 64 | 65 | 66 | class ProbabilisticDense(Layer): 67 | 68 | def __init__(self, units, init='glorot_uniform', *args, **kwargs): 69 | super(ProbabilisticDense, self).__init__(*args, **kwargs) 70 | self.units = units 71 | self.output_dim = units 72 | self.init = initializers.get(init) 73 | if self.input_dim: 74 | self.build(self.input_dim) 75 | 76 | @property 77 | def params(self): 78 | return (self.W_mu, self.W_rho, self.b_mu, self.b_rho) 79 | 80 | def build(self, input_dim): 81 | self.input_dim = input_dim 82 | W_shape = [self.input_dim, self.output_dim] 83 | b_shape = [self.output_dim] 84 | self.W_mu = self.init(W_shape, self.input_dim, self.output_dim) 85 | self.W_rho = self.init(W_shape, self.input_dim, self.output_dim) 86 | self.b_mu = self.init(b_shape, self.input_dim, self.output_dim) 87 | self.b_rho = self.init(b_shape, self.input_dim, self.output_dim) 88 | 89 | def forward(self, X): 90 | X = super(ProbabilisticDense, self).forward(X) 91 | sigma_prior = math.exp(-3) 92 | W_eps = Variable(torch.zeros(self.input_dim, self.output_dim)) 93 | W_eps = torch.normal(W_eps, std=sigma_prior) 94 | self.W = W = self.W_mu + torch.log1p(torch.exp(self.W_rho)) * W_eps 95 | b_eps = Variable(torch.zeros(self.output_dim)) 96 | b_eps = torch.normal(b_eps, std=sigma_prior) 97 | self.b = b = self.b_mu + torch.log1p(torch.exp(self.b_rho)) * b_eps 98 | XW = X @ W 99 | return XW + b.expand_as(XW) 100 | 101 | 102 | class Conv2D(Layer): 103 | 104 | def __init__(self, filters, kernel_size, stride=1, *args, **kwargs): 105 | super(Conv2D, self).__init__(*args, **kwargs) 106 | self.filters = filters 107 | self.kernel_size = kernel_size 108 | self.stride = stride 109 | if self.input_dim is not None: 110 | self.build(self.input_dim) 111 | 112 | @property 113 | def params(self): 114 | return list(self.layer.parameters()) 115 | 116 | def build(self, input_dim): 117 | input_dim = list(input_dim) 118 | assert len(input_dim) >= 2 119 | if len(input_dim) == 2: 120 | in_channels = 1 121 | input_dim = [1] + input_dim 122 | else: 123 | in_channels = input_dim[0] 124 | self.input_dim = input_dim 125 | d1 = (input_dim[1] - self.kernel_size[0]) / self.stride + 1 126 | d2 = (input_dim[2] - self.kernel_size[1]) / self.stride + 1 127 | self.output_dim = [self.filters, d1, d2] 128 | self.layer = TorchConv2D(in_channels, self.filters, 129 | kernel_size=self.kernel_size, 130 | stride=self.stride) 131 | 132 | def forward(self, X): 133 | X = super(Conv2D, self).forward(X) 134 | X = X.view(-1, *self.input_dim) 135 | return self.layer.forward(X) 136 | 137 | 138 | class Dropout(Layer): 139 | 140 | def __init__(self, p=0.5, *args, **kwargs): 141 | super(Dropout, self).__init__(*args, **kwargs) 142 | self.p = p 143 | 144 | def forward(self, X): 145 | X = super(Dropout, self).forward(X) 146 | eps = torch.Tensor(*X.size()) 147 | eps.fill_(self.p) 148 | eps = Variable(torch.bernoulli(eps)) 149 | return X * eps 150 | 151 | 152 | class Recurrent(Layer): 153 | 154 | def __init__(self, units, length, stateful=False, *args, **kwargs): 155 | super(Recurrent, self).__init__(*args, **kwargs) 156 | self.units = units 157 | self.length = length 158 | self.output_dim = [length, units] 159 | self.stateful = stateful 160 | self.states = None 161 | if self.input_dim is not None: 162 | self.build(self.input_dim) 163 | 164 | @property 165 | def params(self): 166 | return list(self.layer.parameters()) 167 | 168 | def build(self, input_dim): 169 | self.input_dim = input_dim 170 | self.layer = TorchRecurrent(self.input_dim, self.units, self.length) 171 | 172 | def clear_states(self): 173 | self.states = None 174 | 175 | def forward(self, X): 176 | X = super(Recurrent, self).forward(X) 177 | if self.stateful and self.states is not None: 178 | outputs, self.states = self.layer.forward(X, self.states) 179 | else: 180 | outputs, self.states = self.layer.forward(X) 181 | 182 | return outputs 183 | 184 | 185 | class Flatten(Layer): 186 | 187 | def __init__(self, *args, **kwargs): 188 | super(Flatten, self).__init__(*args, **kwargs) 189 | 190 | def build(self, input_dim): 191 | self.input_dim = input_dim 192 | self.output_dim = int(np.prod(input_dim)) 193 | 194 | def forward(self, X): 195 | X = super(Flatten, self).forward(X) 196 | X = X.view(X.size()[0], self.output_dim) 197 | return X 198 | 199 | 200 | class TimeDistributed(object): 201 | 202 | def __init__(self, layer): 203 | self.layer = layer 204 | 205 | @property 206 | def params(self): 207 | return self.layer.params 208 | 209 | def build(self, input_dim): 210 | length, dim = input_dim 211 | return self.layer.build(dim) 212 | 213 | def forward(self, X): 214 | batch_size, length, dim = X.size() 215 | out = self.layer.forward(X.view(batch_size * length, dim)) 216 | return out.view(batch_size, length, out.size(1)) 217 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .context import aorun 3 | 4 | import numpy as np 5 | import torch 6 | from aorun.models import Model 7 | from aorun.layers import Dense 8 | from aorun.layers import Conv2D 9 | from aorun.layers import Dropout 10 | from aorun.layers import Recurrent 11 | from aorun.layers import Activation 12 | from aorun.layers import Flatten 13 | from aorun.layers import TimeDistributed 14 | from aorun.optimizers import SGD 15 | from aorun.losses import mean_squared_error 16 | 17 | 18 | def test_model_constructor_empty(): 19 | model = Model() 20 | 21 | assert len(model.layers) == 0 22 | 23 | 24 | def test_model_constructor_layers(): 25 | model = Model( 26 | Dense(10), 27 | Activation('relu'), 28 | Dense(1) 29 | ) 30 | 31 | assert len(model.layers) == 3 32 | assert type(model.layers[0]) == Dense 33 | assert type(model.layers[1]) == Activation 34 | 35 | 36 | def test_model_add_layers(): 37 | model = Model() 38 | model.add(Dense(10)) 39 | model.add(Activation('relu')) 40 | model.add(Dense(1)) 41 | 42 | assert len(model.layers) == 3 43 | assert type(model.layers[0]) == Dense 44 | assert type(model.layers[1]) == Activation 45 | 46 | 47 | def test_model_forward(): 48 | model = Model( 49 | Dense(10, input_dim=4), 50 | Dense(1), 51 | Dense(20) 52 | ) 53 | 54 | x = torch.randn(2, 4) 55 | y = model.forward(x) 56 | 57 | assert y.size() == (2, 20) 58 | 59 | model = Model( 60 | Dense(10, input_dim=4), 61 | Dense(1), 62 | Dense(20) 63 | ) 64 | 65 | x = torch.randn(2, 4) 66 | y = model.predict(x) 67 | 68 | assert y.size() == (2, 20) 69 | 70 | 71 | def test_model_simple_fit(): 72 | x = torch.rand(20, 4) 73 | y = torch.rand(20, 10) 74 | 75 | model = Model( 76 | Dense(10, input_dim=x.size()[-1]), 77 | Activation('relu'), 78 | Dense(5), 79 | Activation('relu'), 80 | Dense(y.size()[-1]) 81 | ) 82 | 83 | opt = SGD(lr=0.01, momentum=0.9) 84 | loss = mean_squared_error 85 | history = model.fit(x, y, loss=loss, optimizer='sgd', epochs=10, verbose=1) 86 | 87 | assert len(history['loss']) == 10 88 | assert all(type(v) is float for v in history['loss']) 89 | assert history['loss'] == sorted(history['loss'], reverse=True) 90 | 91 | 92 | def test_model_fit_unknown_loss(): 93 | x = torch.rand(20, 4) 94 | y = torch.rand(20, 10) 95 | 96 | model = Model( 97 | Dense(10, input_dim=x.size()[-1]), 98 | Activation('relu'), 99 | Dense(5), 100 | Activation('relu'), 101 | Dense(y.size()[-1]) 102 | ) 103 | 104 | assert len(model.params) > 0 105 | 106 | with pytest.raises(Exception) as e: 107 | model.fit(x, y, loss='UNKNOWN_TEST', batch_size=10, n_epoch=5) 108 | 109 | 110 | def test_model_loss_str_param(): 111 | x = torch.rand(20, 4) 112 | y = torch.rand(20, 10) 113 | 114 | model = Model( 115 | Dense(10, input_dim=x.size()[-1]), 116 | Activation('relu'), 117 | Dense(5), 118 | Activation('relu'), 119 | Dense(y.size()[-1]) 120 | ) 121 | 122 | opt = SGD(lr=0.01, momentum=0.9) 123 | 124 | history = model.fit(x, y, loss='mse', optimizer=opt, epochs=10) 125 | assert len(history['loss']) == 10 126 | assert all(type(v) is float for v in history['loss']) 127 | assert history['loss'] == sorted(history['loss'], reverse=True) 128 | 129 | loss = 'mean_squared_error' 130 | history = model.fit(x, y, loss=loss, optimizer=opt, epochs=10) 131 | assert len(history['loss']) == 10 132 | assert all(type(v) is float for v in history['loss']) 133 | assert history['loss'] == sorted(history['loss'], reverse=True) 134 | 135 | 136 | def test_model_custom_loss(): 137 | x = torch.rand(20, 4) 138 | y = torch.rand(20, 10) 139 | 140 | model = Model( 141 | Dense(10, input_dim=x.size()[-1]), 142 | Activation('relu'), 143 | Dense(5), 144 | Activation('relu'), 145 | Dense(y.size()[-1]) 146 | ) 147 | 148 | opt = SGD(lr=0.01, momentum=0.9) 149 | 150 | def mae(y_true, y_pred): 151 | return torch.mean(torch.abs(y_true - y_pred)) 152 | 153 | history = model.fit(x, y, loss=mae, optimizer=opt, epochs=10) 154 | assert len(history['loss']) == 10 155 | assert all(type(v) is float for v in history['loss']) 156 | assert history['loss'] == sorted(history['loss'], reverse=True) 157 | 158 | 159 | def test_model_numpy_friendly(): 160 | X = np.random.normal(size=[10, 10]).astype('float32') 161 | y = np.random.normal(size=[10, 1]).astype('float32') 162 | 163 | model = Model( 164 | Dense(10, input_dim=X.shape[-1]), 165 | Activation('relu'), 166 | Dense(5), 167 | Activation('relu'), 168 | Dense(y.shape[-1]) 169 | ) 170 | history = model.fit(X, y=y, loss='mse', optimizer='sgd', epochs=10) 171 | 172 | y_pred = model.predict(X) 173 | assert type(y_pred) is np.ndarray 174 | 175 | assert len(history['loss']) == 10 176 | assert all(type(v) is float for v in history['loss']) 177 | assert history['loss'] == sorted(history['loss'], reverse=True) 178 | 179 | 180 | def test_model_adam_optmizer(): 181 | X = np.random.normal(size=[10, 10]).astype('float32') 182 | y = np.random.normal(size=[10, 1]).astype('float32') 183 | 184 | model = Model( 185 | Dense(10, input_dim=X.shape[-1]), 186 | Activation('relu'), 187 | Dense(5), 188 | Activation('relu'), 189 | Dense(y.shape[-1]) 190 | ) 191 | history = model.fit(X, y=y, loss='mse', optimizer='adam', epochs=10) 192 | 193 | y_pred = model.predict(X) 194 | assert type(y_pred) is np.ndarray 195 | 196 | assert len(history['loss']) == 10 197 | assert all(type(v) is float for v in history['loss']) 198 | assert history['loss'] == sorted(history['loss'], reverse=True) 199 | 200 | 201 | def test_model_validation_split(): 202 | X = np.random.normal(size=[10, 10]).astype('float32') 203 | y = np.random.normal(size=[10, 1]).astype('float32') 204 | 205 | model = Model( 206 | Dense(10, input_dim=X.shape[-1]), 207 | Activation('relu'), 208 | Dense(5), 209 | Activation('relu'), 210 | Dense(y.shape[-1]) 211 | ) 212 | history = model.fit(X, y=y, loss='mse', val_split=0.1) 213 | 214 | y_pred = model.predict(X) 215 | assert type(y_pred) is np.ndarray 216 | 217 | assert 'loss' in history 218 | assert 'val_loss' in history 219 | assert all(type(v) is float for v in history['loss']) 220 | assert all(type(v) is float for v in history['val_loss']) 221 | assert history['loss'] == sorted(history['loss'], reverse=True) 222 | 223 | 224 | def test_model_validation_data(): 225 | X = np.random.normal(size=[10, 10]).astype('float32') 226 | y = np.random.normal(size=[10, 1]).astype('float32') 227 | 228 | model = Model( 229 | Dense(10, input_dim=X.shape[-1]), 230 | Activation('relu'), 231 | Dense(5), 232 | Activation('relu'), 233 | Dense(y.shape[-1]) 234 | ) 235 | history = model.fit(X, y=y, loss='mse', val_data=(X, y)) 236 | 237 | y_pred = model.predict(X) 238 | assert type(y_pred) is np.ndarray 239 | 240 | assert 'loss' in history 241 | assert 'val_loss' in history 242 | assert all(type(v) is float for v in history['loss']) 243 | assert all(type(v) is float for v in history['val_loss']) 244 | assert history['loss'] == sorted(history['loss'], reverse=True) 245 | 246 | 247 | def test_model_conv2d(): 248 | X = np.random.normal(size=[10, 3, 10, 10]).astype('float32') 249 | y = np.random.normal(size=[10, 1]).astype('float32') 250 | 251 | model = Model( 252 | Conv2D(4, kernel_size=(3, 3), input_dim=X.shape[1:]), 253 | Flatten(), 254 | Dense(5), 255 | Activation('relu'), 256 | Dense(y.shape[-1]) 257 | ) 258 | history = model.fit(X, y=y, loss='mse', val_data=(X, y)) 259 | 260 | y_pred = model.predict(X) 261 | assert type(y_pred) is np.ndarray 262 | 263 | assert 'loss' in history 264 | assert 'val_loss' in history 265 | assert all(type(v) is float for v in history['loss']) 266 | assert all(type(v) is float for v in history['val_loss']) 267 | assert history['loss'] == sorted(history['loss'], reverse=True) 268 | 269 | 270 | def test_model_conv2d_dropout(): 271 | X = np.random.normal(size=[10, 3, 10, 10]).astype('float32') 272 | y = np.random.normal(size=[10, 1]).astype('float32') 273 | 274 | model = Model( 275 | Conv2D(4, kernel_size=(3, 3), input_dim=X.shape[1:]), 276 | Flatten(), 277 | Dense(5), 278 | Activation('relu'), 279 | Dense(5), 280 | Dropout(0.5), 281 | Activation('relu'), 282 | Dense(y.shape[-1]) 283 | ) 284 | history = model.fit(X, y=y, loss='mse', epochs=10, val_data=(X, y)) 285 | 286 | y_pred = model.predict(X) 287 | assert type(y_pred) is np.ndarray 288 | 289 | assert 'loss' in history 290 | assert 'val_loss' in history 291 | assert all(type(v) is float for v in history['loss']) 292 | assert all(type(v) is float for v in history['val_loss']) 293 | assert history['val_loss'] == sorted(history['val_loss'], reverse=True) 294 | 295 | 296 | def test_model_recurrent(): 297 | X = np.random.normal(size=[2, 3, 4]).astype('float32') 298 | y = np.random.normal(size=[2, 3, 2]).astype('float32') 299 | 300 | model = Model( 301 | Recurrent(units=2, length=3, input_dim=4), 302 | Activation('relu') 303 | ) 304 | history = model.fit(X, y, loss='mse') 305 | 306 | y_pred = model.predict(X) 307 | assert type(y_pred) is np.ndarray 308 | 309 | assert 'loss' in history 310 | assert history['loss'] == sorted(history['loss'], reverse=True) 311 | 312 | 313 | def test_model_recurrent_time_distributed(): 314 | X = np.random.normal(size=[2, 3, 4]).astype('float32') 315 | y = np.random.normal(size=[2, 3, 10]).astype('float32') 316 | 317 | model = Model( 318 | Recurrent(units=2, length=3, input_dim=4), 319 | Activation('relu'), 320 | TimeDistributed(Dense(units=10)), 321 | ) 322 | history = model.fit(X, y, loss='mse') 323 | y_pred = model.predict(X) 324 | assert history['loss'] == sorted(history['loss'], reverse=True) 325 | --------------------------------------------------------------------------------