├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── pyfunt
    ├── __init__.py
    ├── affine.py
    ├── batch_normalization.py
    ├── c_add_table.py
    ├── class_nll_criterion.py
    ├── concat_table.py
    ├── container.py
    ├── criterion.py
    ├── dropout.py
    ├── examples
    │   ├── __init__.py
    │   ├── model_testing
    │   │   └── test_model.py
    │   └── residual_networks
    │   │   ├── __init__.py
    │   │   ├── resnet.py
    │   │   └── train-cifar.py
    ├── identity.py
    ├── im2col.py
    ├── im2col_cyt.pyx
    ├── linear.py
    ├── log_soft_max.py
    ├── module.py
    ├── mul_constant.py
    ├── optim.py
    ├── padding.py
    ├── parallel.py
    ├── relu.py
    ├── reshape.py
    ├── sequential.py
    ├── setup.py
    ├── sigmoid.py
    ├── soft_max.py
    ├── solver.py
    ├── spatial_average_pooling.py
    ├── spatial_batch_normalitazion.py
    ├── spatial_convolution.py
    ├── spatial_full_convolution.py
    ├── spatial_max_pooling.py
    ├── spatial_reflection_padding.py
    ├── spatial_replication_padding.py
    ├── spatial_up_sampling_nearest.py
    ├── tanh.py
    ├── threshold.py
    ├── utils
    │   ├── __init__.py
    │   ├── gradient_check.py
    │   ├── load_torch_model.py
    │   └── vis_utils.py
    └── view.py
├── requirements.txt
├── setup.py
└── tools
    └── cythonize.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .*
  2 | CIFAR_DATASET_PATH
  3 | .ipynb_checkpoints/
  4 | *.ipynb
  5 | test.py
  6 | 
  7 | # Editor temporary/working/backup files #
  8 | #########################################
  9 | .#*
 10 | [#]*#
 11 | *~
 12 | *$
 13 | *.bak
 14 | .idea/*
 15 | *.kdev4
 16 | *.org
 17 | .project
 18 | .pydevproject
 19 | *.rej
 20 | .settings/
 21 | .*.sw[nop]
 22 | .sw[nop]
 23 | *.tmp
 24 | *.vim
 25 | tags
 26 | 
 27 | # Compiled source #
 28 | ###################
 29 | *.a
 30 | *.com
 31 | *.class
 32 | *.dll
 33 | *.exe
 34 | *.l[ao]
 35 | *.o
 36 | *.py[ocd]
 37 | *.so
 38 | _configtest.c
 39 | 
 40 | # Packages #
 41 | ############
 42 | # it's better to unpack these files and commit the raw source
 43 | # git has its own built in compression methods
 44 | *.7z
 45 | *.bz2
 46 | *.bzip2
 47 | *.dmg
 48 | *.gz
 49 | *.iso
 50 | *.jar
 51 | *.rar
 52 | *.tar
 53 | *.tbz2
 54 | *.tgz
 55 | *.zip
 56 | 
 57 | # Python files #
 58 | ################
 59 | # setup.py working directory
 60 | build
 61 | # sphinx build directory
 62 | doc/_build
 63 | # cython files
 64 | cythonize.dat
 65 | # setup.py dist directory
 66 | dist
 67 | # Egg metadata
 68 | *.egg-info
 69 | # tox testing tool
 70 | .tox
 71 | # The shelf plugin uses this dir
 72 | ./.shelf
 73 | MANIFEST
 74 | # distutils configuration
 75 | site.cfg
 76 | # other temporary files
 77 | .coverage
 78 | .deps
 79 | .libs
 80 | 
 81 | # Paver generated files #
 82 | #########################
 83 | /release
 84 | 
 85 | # Logs and databases #
 86 | ######################
 87 | *.log
 88 | *.sql
 89 | *.sqlite
 90 | 
 91 | # Patches #
 92 | ###########
 93 | *.patch
 94 | *.diff
 95 | 
 96 | # OS generated files #
 97 | ######################
 98 | .directory
 99 | .fseventsd
100 | .DS_Store*
101 | .gdb_history
102 | .VolumeIcon.icns
103 | ehthumbs.db
104 | Icon?
105 | Thumbs.db
106 | 
107 | # Documentation generated files #
108 | #################################
109 | doc/frontpage/build
110 | doc/source/generated
111 | 
112 | # Things specific to this project #
113 | ###################################
114 | 
115 | pyfunt/version.py
116 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Daniele Ciriello
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include MANIFEST.in
 2 | include *.txt
 3 | # Top-level build scripts
 4 | include setup.py setupegg.py bscript bento.info
 5 | # All source files
 6 | recursive-include pyfunt *
 7 | # All documentation
 8 | recursive-include doc *
 9 | # Add build and testing tools
10 | include tox.ini
11 | recursive-include tools *
12 | # Cached Cython signatures
13 | include cythonize.dat
14 | # Exclude what we don't want to include
15 | recursive-exclude pyfunt/linalg/src/id_dist/src *_subr_*.f
16 | prune doc/build
17 | prune doc/source/generated
18 | prune */__pycache__
19 | global-exclude *.pyc *~ *.bak *.swp *.pyo


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyFunt (/paɪfʊnt/)
 2 | 
 3 | ![Project frozen](https://img.shields.io/badge/status-frozen-blue.png) ![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg)
 4 | 
 5 | Pythonic Deep Learning Framework (WIP and CPU only) inspired by [Torch](http://torch.ch)'s [Neural Network package](https://github.com/torch/nn).
 6 | 
 7 | ## Requirements
 8 | 
 9 | - [Python 2.7](https://www.python.org/)
10 | - [Cython](cython.org/)
11 | - [numpy](www.numpy.org/)
12 | - [torchfile](https://github.com/bshillingford/python-torchfile/)
13 | 
14 | 
15 | ## Installation
16 | 
17 | Get [pip](https://pypi.python.org/pypi/pip) and run:
18 | 
19 | 	pip install git+git://github.com/dnlcrl/PyFunt.git
20 | 
21 | ## Usage
22 | 
23 | Check the [examples folder](https://github.com/dnlcrl/PyFunt/tree/master/pyfunt/examples)
24 | 
25 | ### Example: Parametric Residual Model
26 | 
27 | Parametric models can be built easily thanks to the module structure:
28 | 
29 | 	from pyfunt import (SpatialConvolution, SpatialBatchNormalization,
30 |                     SpatialAveragePooling, Sequential, ReLU, Linear,
31 |                     Reshape, LogSoftMax, Padding, Identity, ConcatTable,
32 |                     CAddTable)
33 | 
34 | 	def residual_layer(n_channels, n_out_channels=None, stride=None):
35 | 	    n_out_channels = n_out_channels or n_channels
36 | 	    stride = stride or 1
37 | 
38 | 	    convs = Sequential()
39 | 	    add = convs.add
40 | 	    add(SpatialConvolution(
41 | 		n_channels, n_out_channels, 3, 3, stride, stride, 1, 1))
42 | 	    add(SpatialBatchNormalization(n_out_channels))
43 | 	    add(SpatialConvolution(n_out_channels, n_out_channels, 3, 3, 1, 1, 1, 1))
44 | 	    add(SpatialBatchNormalization(n_out_channels))
45 | 
46 | 	    if stride > 1:
47 | 		shortcut = Sequential()
48 | 		shortcut.add(SpatialAveragePooling(2, 2, stride, stride))
49 | 		shortcut.add(Padding(1, (n_out_channels - n_channels)/2, 3))
50 | 	    else:
51 | 		shortcut = Identity()
52 | 
53 | 	    res = Sequential()
54 | 	    res.add(ConcatTable().add(convs).add(shortcut)).add(CAddTable())
55 | 	    res.add(ReLU(True))
56 | 	    return res
57 | 
58 | 
59 | 	def resnet(n_size, num_starting_filters, reg):
60 | 	    nfs = num_starting_filters
61 | 	    model = Sequential()
62 | 	    add = model.add
63 | 	    add(SpatialConvolution(3, nfs, 3, 3, 1, 1, 1, 1))
64 | 	    add(SpatialBatchNormalization(nfs))
65 | 	    add(ReLU())
66 | 
67 | 	    for i in xrange(1, n_size):
68 | 		add(residual_layer(nfs))
69 | 	    add(residual_layer(nfs, 2*nfs, 2))
70 | 
71 | 	    for i in xrange(1, n_size-1):
72 | 		add(residual_layer(2*nfs))
73 | 	    add(residual_layer(2*nfs, 4*nfs, 2))
74 | 
75 | 	    for i in xrange(1, n_size-1):
76 | 		add(residual_layer(4*nfs))
77 | 
78 | 	    add(SpatialAveragePooling(8, 8))
79 | 	    add(Reshape(nfs*4))
80 | 	    add(Linear(nfs*4, 10))
81 | 	    add(LogSoftMax())
82 | 	    return model
83 | 
84 | ---
85 |  
86 | Check the Torch documentation for more informations about the implemented layers (pyfunt is more or less a python port of torch/nn): [https://github.com/torch/nn/blob/master/doc/index.md](https://github.com/torch/nn/blob/master/doc/index.md)
87 | 


--------------------------------------------------------------------------------
/pyfunt/__init__.py:
--------------------------------------------------------------------------------
 1 | from affine import Affine
 2 | from batch_normalization import BatchNormalization
 3 | from c_add_table import CAddTable
 4 | from class_nll_criterion import ClassNLLCriterion
 5 | from concat_table import ConcatTable
 6 | from container import Container
 7 | from criterion import Criterion
 8 | from dropout import Dropout
 9 | from identity import Identity
10 | from linear import Linear
11 | from log_soft_max import LogSoftMax
12 | from module import Module
13 | from mul_constant import MulConstant
14 | from padding import Padding
15 | from parallel import Parallel
16 | from relu import ReLU
17 | from reshape import Reshape
18 | from sequential import Sequential
19 | from sigmoid import Sigmoid
20 | from soft_max import SoftMax
21 | from solver import Solver
22 | from spatial_average_pooling import SpatialAveragePooling
23 | from spatial_batch_normalitazion import SpatialBatchNormalization
24 | from spatial_convolution import SpatialConvolution
25 | from spatial_full_convolution import SpatialFullConvolution
26 | from spatial_replication_padding import SpatialReplicationPadding
27 | from spatial_reflection_padding import SpatialReflectionPadding
28 | from spatial_max_pooling import SpatialMaxPooling
29 | from spatial_up_sampling_nearest import SpatialUpSamplingNearest
30 | from tanh import Tanh
31 | from threshold import Threshold
32 | from view import View
33 | 
34 | from . import *
35 | 


--------------------------------------------------------------------------------
/pyfunt/affine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | from module import Module
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Affine(Module):
 9 | 
10 |     def __init__(self, input_size, output_size, bias=False):
11 |         super(Affine, self).__init__()
12 |         self.weight = np.ndarray(output_size, input_size)
13 |         self.grad_weight = np.ndarray(output_size, input_size)
14 |         if bias:
15 |             self.bias = np.ndarray(output_size)
16 |             self.grad_bias = np.ndarray(output_size)
17 |         self.reset()
18 | 
19 |     def no_bias(self):
20 |         self.bias = None
21 |         self.grad_bias = None
22 |         return self
23 | 
24 |     def reset(self, stdv=None):
25 |         if not stdv:
26 |             stdv = 1./np.sqrt(self.weight.shape[2])
27 |         self.weight = np.uniform(-stdv, stdv, self.weight.shape)
28 |         self.bias = np.uniform(-stdv, stdv, self.weight.shape[0])
29 | 
30 |     def update_output(self, x):
31 |         w = self.weight
32 |         b = self.bias or np.zeros(self.weight.shape[0])
33 |         self.out = x.reshape(x.shape[0], -1).dot(w) + b
34 |         self.x = x
35 |         return self.output
36 | 
37 |     def update_grad_input(self, input, grad_output):
38 |         x, w = self.x, self.weight
39 |         self.grad_input = grad_output.dot(w.T).reshape(x.shape)
40 |         return self.grad_input
41 | 
42 |     def acc_grad_parameters(self, x, grad_output, scale=1):
43 |         x = self.x
44 |         self.grad_weight = x.reshape(x.shape[0], -1).T.dot(grad_output)
45 |         if self.bias:
46 |             self.grad_bias = np.sum(grad_output, axis=0)
47 | 
48 |     def clear_state(self):
49 |         pass
50 | 
51 |     def __str__(self):
52 |         pass
53 | 


--------------------------------------------------------------------------------
/pyfunt/batch_normalization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from module import Module
  5 | import numpy as np
  6 | 
  7 | 
  8 | class BatchNormalization(Module):
  9 | 
 10 |     def __init__(self, n_output, eps=1e-5, momentum=0.1, affine=True):
 11 |         super(BatchNormalization, self).__init__()
 12 |         self.eps = eps
 13 |         self.momentum = momentum
 14 |         self.train = True
 15 |         self.running_mean = np.zeros(n_output)
 16 |         self.running_var = np.zeros(n_output)
 17 |         n_dim = 2
 18 |         if affine:
 19 |             self.weight = np.ndarray(n_output)
 20 |             self.bias = np.ndarray(n_output)
 21 |             self.grad_weight = np.ndarray(n_output)
 22 |             self.grad_bias = np.ndarray(n_output)
 23 |         else:
 24 |             self.weight = None
 25 |             self.bias = None
 26 |             self.grad_weight = None
 27 |             self.grad_bias = None
 28 |         self.reset()
 29 | 
 30 |     def reset(self):
 31 |         if self.weight is not None:
 32 |             self.weight[:] = np.random.uniform(size=len(self.weight))[:]
 33 |         if self.bias is not None:
 34 |             self.bias[:] = np.zeros(len(self.bias))[:]
 35 |         self.running_mean = np.zeros(len(self.running_mean))
 36 |         self.running_var = np.ones(len(self.running_var))
 37 | 
 38 |     def check_input_dim(self, x):
 39 |         i_dim = len(x.shape)
 40 |         if i_dim != self.n_dim or (i_dim != self.n_dim - 1 and self.train is not False):
 41 |             raise Exception('TODO ERROR :(')
 42 |         # feast_dim = (i_dim == self.n_dim -1) and 1 or 2
 43 |         #      local featDim = (iDim == self.nDim - 1) and 1 or 2
 44 |         # assert(input:size(featDim) == self.running_mean:nElement(), string.format(
 45 |         #    'got %d-feature tensor, expected %d',
 46 |    #    input:size(featDim), self.running_mean:nElement()))
 47 | 
 48 |     def make_contigous(self, x, grad_output):
 49 |         #TODO
 50 |         pass
 51 | 
 52 | 
 53 |     def update_output(self, x):
 54 | 
 55 |         eps = self.eps
 56 |         momentum = self.momentum
 57 |         N, D = x.shape
 58 |         running_mean = self.running_mean
 59 |         running_var = self.running_var
 60 | 
 61 |         if self.train:
 62 |             mean = 1. / N * np.sum(x, axis=0)
 63 | 
 64 |             xmu = x - mean
 65 | 
 66 |             carre = xmu*xmu
 67 | 
 68 |             var = 1. / N * np.sum(carre, axis=0)
 69 | 
 70 |             sqrtvar = np.sqrt(var + eps)
 71 | 
 72 |             invstd = 1. / sqrtvar
 73 | 
 74 |             running_mean = momentum * mean + (1. - momentum) * running_mean
 75 | 
 76 |             unbiased_var = np.sum(carre, axis=0)/(N - 1.)
 77 | 
 78 |             running_var = momentum * unbiased_var + \
 79 |                 (1. - momentum) * running_var
 80 | 
 81 |             self.xmu = xmu
 82 |             self.invstd = invstd
 83 | 
 84 |         else:
 85 |             mean = running_mean
 86 |             invstd = 1. / np.sqrt(running_var + eps)
 87 | 
 88 |         out = ((x - mean) * invstd)
 89 |         if self.weight is not None:
 90 |             out *= self.weight
 91 |         if self.bias is not None:
 92 |             out += self.bias
 93 |         #out = ((x - mean) * invstd) * self.weight + self.bias
 94 |         # Store the updated running means back into bn_param
 95 |         self.running_mean = np.array(running_mean, copy=True)
 96 |         self.running_var = np.array(running_var, copy=True)
 97 |         self.output = out
 98 | 
 99 |         return self.output
100 | 
101 |     def update_grad_input(self, x, grad_output, scale=1):
102 | 
103 |         xmu, invstd = self.xmu, self.invstd
104 | 
105 |         N, D = grad_output.shape
106 | 
107 |         _sum = np.sum(grad_output, axis=0)
108 |         dotp = np.sum((xmu * grad_output), axis=0)
109 | 
110 |         k = 1. / N * dotp * invstd * invstd
111 |         dx = xmu * k
112 | 
113 |         dmean = 1. / N * _sum
114 |         dx = (grad_output - dmean - dx) * invstd * self.weight
115 | 
116 |         self.grad_weight[:] = dotp * invstd
117 | 
118 |         self.grad_bias[:] = _sum
119 |         self.grad_input = dx
120 | 
121 |         return self.grad_input
122 | 
123 |     # def backward(self, x, grad_output, scale=1):
124 |     #     return self.update_grad_input(x, grad_output, scale)
125 | 
126 |     def acc_grad_input(self, x, grad_output, scale):
127 |         return self.backward(x, grad_output, scale, None, self.grad_weight, self.grad_bias)
128 | 
129 |     def clear_state(self):
130 |         pass
131 | 


--------------------------------------------------------------------------------
/pyfunt/c_add_table.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class CAddTable(Module):
 6 | 
 7 |     def __init__(self):
 8 |         super(CAddTable, self).__init__()
 9 |         self.grad_input = None
10 | 
11 |     def update_output(self, x):
12 |         self.output = np.sum(x, axis=0)
13 |         return self.output
14 | 
15 |     def update_grad_input(self, x, grad_output):
16 |         self.grad_input = np.zeros_like(x)
17 |         for i in xrange(len(x)):
18 |             self.grad_input[i] = np.copy(grad_output)
19 |         return self.grad_input
20 | 
21 |     def reset(self):
22 |         pass
23 | 


--------------------------------------------------------------------------------
/pyfunt/class_nll_criterion.py:
--------------------------------------------------------------------------------
 1 | from criterion import Criterion
 2 | import numpy as np
 3 | 
 4 | 
 5 | class ClassNLLCriterion(Criterion):
 6 | 
 7 |     """docstring for ClassNLLCriterion"""
 8 | 
 9 |     def __init__(self, weights=None, size_average=None):
10 |         super(ClassNLLCriterion, self).__init__()
11 |         if size_average:
12 |             self.size_average = size_average
13 |         else:
14 |             self.size_average = True
15 | 
16 |         if weights:
17 |             # assert(weights:dim() == 1, "weights input should be 1-D Tensor")
18 |             self.weights = weights
19 |         self.output_tensor = np.zeros(1)
20 |         self.total_weight_tensor = np.ones(1)
21 |         self.target = np.zeros(1)  # , dtype=np.long)
22 | 
23 |     def __len__(self):
24 |         if self.weights:
25 |             return len(self.weights)
26 |         else:
27 |             return 0
28 | 
29 |     def update_output(self, x, target):
30 | 
31 |         # probs=np.exp(scores - np.max(scores, axis=1, keepdims=True))
32 |         # probs /= np.sum(probs, axis=1, keepdims=True)
33 |         # return probs
34 |         # # N = x.shape[0]
35 |         # # loss = -np.mean(self.logp[np.arange(N), target])
36 |         # # self.output = -x
37 |         # # return loss
38 | 
39 |         # N = x.shape[0]
40 |         # xdev = x - x.max(1, keepdims=True)
41 |         # self.logp = xdev - np.log(np.sum(np.exp(xdev), axis=1, keepdims=True))
42 |         # loss = -np.mean(self.logp[np.arange(N), target])
43 |         # self.output = loss
44 |         # import pdb; pdb.set_trace()
45 |         # return self.output
46 | 
47 |         self.output = - np.mean(x[np.arange(x.shape[0]), target])
48 |         return self.output
49 | 
50 |     def update_grad_input(self, x, target):
51 |         N = x.shape[0]
52 |         dx = np.exp(x)
53 |         dx[np.arange(N), target] -= 1
54 |         dx /= N
55 |         self.grad_input = dx
56 |         #import pdb; pdb.set_trace()
57 |         return self.grad_input
58 | 


--------------------------------------------------------------------------------
/pyfunt/concat_table.py:
--------------------------------------------------------------------------------
 1 | from container import Container
 2 | import numpy as np
 3 | 
 4 | 
 5 | class ConcatTable(Container):
 6 | 
 7 |     """docstring for ConcatTable"""
 8 | 
 9 |     def __init__(self):
10 |         super(ConcatTable, self).__init__()
11 |         self.modules = []
12 | 
13 |     def update_output(self, x):
14 |         self.output = []
15 |         for i in xrange(len(self.modules)):
16 |             current_output = self.rethrow_errors(
17 |                 self.modules[i], i, 'update_output', x)
18 |             self.output.append(current_output)
19 |             # if i == 0:
20 |             #     self.output = current_output
21 |             # else:
22 |             #     np.concatenate((self.output, current_output), axis=0)
23 |             # self.output.append(self.rethrow_errors(
24 |             # self.modules[i], i, 'update_output', x))
25 |         return self.output
26 | 
27 |     def _backward(self, method, x, grad_output, scale=1):
28 |         for i, module in enumerate(self.modules):
29 |             if method == 'update_grad_input':
30 |                 args = self.modules[i], i, method, x, grad_output[i]
31 |             else:
32 |                 args = self.modules[i], i, method, x, grad_output[i], scale
33 |             current_grad_input = self.rethrow_errors(*args)
34 |             if i == 0:
35 |                 self.grad_input = current_grad_input
36 |             else:
37 |                 self.grad_input += current_grad_input
38 |         return self.grad_input
39 | 
40 |     def update_grad_input(self, x, grad_output):
41 |         return self._backward('update_grad_input', x, grad_output)
42 | 
43 |     def backward(self, x, grad_output, scale=1):
44 |         return self._backward('backward', x, grad_output, scale)
45 | 
46 |     def acc_grad_parameters(self, x, grad_output, scale=1):
47 |         for i, module in enumerate(self.modules):
48 |             self.rethrow_errors(
49 |                 self.modules[i], i, 'acc_grad_parameters', x, grad_output[i], scale)
50 | 


--------------------------------------------------------------------------------
/pyfunt/container.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from module import Module
 3 | from types import DictType
 4 | import sys
 5 | import traceback
 6 | import numpy as np
 7 | import abc
 8 | 
 9 | 
10 | class Container(Module):
11 |     """docstring for Container"""
12 |     def __init__(self):
13 |         super(Container, self).__init__()
14 |         self.modules = []
15 | 
16 |     def add(self, module):
17 |         self.modules.append(module)
18 |         return self
19 | 
20 |     def get(self, index):
21 |         return self.modules[index]
22 | 
23 |     def size(self):
24 |         return len(self.modules)
25 | 
26 |     def rethrow_errors(self, module, module_index, func_name, *args):
27 |         def handle_error(err):
28 |             # TODO
29 |             return err
30 |         func = getattr(module, func_name)
31 |         try:
32 |             result = func(*args)
33 |         except Exception as e:
34 |             print('In %d module (%s) of %s:' % (module_index, type(module).__name__, type(self).__name__))
35 |             traceback.print_exc()
36 |             raise e
37 | 
38 |         return result
39 | 
40 |     def apply_to_modules(self, func):
41 |         for module in self.modules:
42 |             func(module)
43 | 
44 |     def zero_grad_parameters(self):
45 |         self.apply_to_modules(lambda module: module.zero_grad_parameters())
46 | 
47 |     def update_parameters(self, lr):
48 |         self.apply_to_modules(lambda module: module.update_parameters(lr))
49 | 
50 |     def training(self):
51 |         super(Container, self).training()
52 |         self.apply_to_modules(lambda module: module.training())
53 | 
54 |     def evaluate(self):
55 |         super(Container, self).evaluate()
56 |         self.apply_to_modules(lambda module: module.evaluate())
57 | 
58 |     def share(self, mlp, args):
59 |         pass
60 | 
61 |     def reset(self, stdv):
62 |         self.apply_to_modules(lambda module: module.reset(stdv))
63 | 
64 |     def parameters(self):
65 |         def tinsert(to, _from):
66 |             if isinstance(_from, list):
67 |                 for i in xrange(len(_from)):
68 |                     tinsert(to, _from[i])
69 |             else:
70 |                 to.append(_from)
71 | 
72 |         w = []
73 |         gw = []
74 |         for i in xrange(len(self.modules)):
75 | 
76 |             res = self.modules[i].parameters()
77 |             if res:
78 |                 mw, mgw = res
79 |                 tinsert(w, mw)
80 |                 tinsert(gw, mgw)
81 |         return w, gw
82 | 
83 |     def clear_state(self):
84 |         pass
85 | 


--------------------------------------------------------------------------------
/pyfunt/criterion.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import abc
 3 | 
 4 | 
 5 | class Criterion(object):
 6 |     __metaclass__ = abc.ABCMeta
 7 |     """docstring for Criterion"""
 8 |     def __init__(self):
 9 |         super(Criterion, self).__init__()
10 |         self.output = 0
11 | 
12 |     @abc.abstractmethod
13 |     def update_output(self, x, target):
14 |         pass
15 | 
16 |     def forward(self, x, target):
17 |         return self.update_output(x, target)
18 | 
19 |     def backward(self, x, target):
20 |         return self.update_grad_input(x, target)
21 | 
22 |     @abc.abstractmethod
23 |     def update_grad_input(self, x, target):
24 |         pass
25 | 
26 |     def clone(self):
27 |         pass
28 | 
29 |     def __call__(self, x, target):
30 |         self.output = self.forward(x, target)
31 |         self.grad_input = self.backward(x, target)
32 |         return self.output, self.grad_input
33 | 


--------------------------------------------------------------------------------
/pyfunt/dropout.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Dropout(Module):
 6 | 
 7 |     def __init__(self, p=0.5, v1=False, stochastic_inference=False):
 8 |         super(Dropout, self).__init__()
 9 |         self.p = p
10 |         self.train = True
11 |         self.stochastic_inference = stochastic_inference
12 |         # version 2 scales output during training instead of evaluation
13 |         self.v2 = not v1
14 |         if self.p >= 1 or self.p < 0:
15 |             raise('<Dropout> illegal percentage, must be 0 <= p < 1')
16 |         self.noise = None
17 | 
18 |     def update_output(self, x):
19 |         self.output = x.copy()
20 |         if self.p > 0:
21 |             if self.train or self.stochastic_inference:
22 |                 self.noise = np.random.binomial(1, p=1-self.p, size=x.shape)  # bernoulli
23 |                 if self.v2:
24 |                     self.noise /= 1-self.p
25 |                 self.output *= self.noise
26 |             elif not self.v2:
27 |                 self.output *= 1-self.p
28 |         return self.output
29 | 
30 |     def update_grad_input(self, x, grad_output):
31 |         self.grad_input = grad_output.copy()
32 |         if self.train:
33 |             if self.p > 0:
34 |                 self.grad_input *= self.noise
35 |         else:
36 |             if not self.v2 and self.p > 0:
37 |                 self.grad_input *= 1-self.p
38 |         return self.grad_input
39 | 
40 |     def __str__(self):
41 |         return '%s(%f)' % (type(self), self.p)
42 | 
43 |     def reset(self):
44 |         pass
45 | 


--------------------------------------------------------------------------------
/pyfunt/examples/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/pyfunt/examples/model_testing/test_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pyfunt import (SpatialConvolution, SpatialBatchNormalization,
 3 |                     SpatialAveragePooling, Sequential, ReLU, Linear,
 4 |                     Reshape, LogSoftMax)
 5 | from pyfunt.utils import eval_numerical_gradient_array
 6 | 
 7 | def rel_error(x, y):
 8 |     """ returns relative error """
 9 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
10 | 
11 | x = np.random.randn(3, 4, 8, 8)
12 | # x = np.random.randn(3, 2, 8, 8)
13 | dout = np.random.randn(3, 10)
14 | pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
15 | 
16 | s = Sequential()
17 | s.add(SpatialConvolution(4, 2, 1, 1, 1, 1))
18 | s.add(SpatialAveragePooling(2, 2, 2, 2, 0, 0))
19 | s.add(SpatialBatchNormalization(2))
20 | s.add(ReLU())
21 | s.add(Reshape(2*4*4))
22 | s.add(Linear(2*4*4, 10))
23 | s.add(LogSoftMax())
24 | 
25 | dx_num = eval_numerical_gradient_array(lambda x: s.update_output(x), x, dout)
26 | 
27 | out = s.update_output(x)
28 | dx = s.update_grad_input(x, dout)
29 | # Your error should be around 1e-8
30 | print('Testing net backward function:')
31 | print('dx error: ', rel_error(dx, dx_num))
32 | # import pdb; pdb.set_trace()
33 | 


--------------------------------------------------------------------------------
/pyfunt/examples/residual_networks/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/pyfunt/examples/residual_networks/resnet.py:
--------------------------------------------------------------------------------
  1 | from pyfunt import (SpatialConvolution, SpatialBatchNormalization,
  2 |                     SpatialAveragePooling, Sequential, ReLU, Linear,
  3 |                     Reshape, LogSoftMax, Padding, Identity, ConcatTable,
  4 |                     CAddTable)
  5 | 
  6 | 
  7 | def residual_layer(n_channels, n_out_channels=None, stride=None):
  8 |     n_out_channels = n_out_channels or n_channels
  9 |     stride = stride or 1
 10 | 
 11 |     convs = Sequential()
 12 |     add = convs.add
 13 |     add(SpatialConvolution(
 14 |         n_channels, n_out_channels, 3, 3, stride, stride, 1, 1))
 15 |     add(SpatialBatchNormalization(n_out_channels))
 16 |     add(SpatialConvolution(n_out_channels, n_out_channels, 3, 3, 1, 1, 1, 1))
 17 |     add(SpatialBatchNormalization(n_out_channels))
 18 | 
 19 |     if stride > 1:
 20 |         shortcut = Sequential()
 21 |         shortcut.add(SpatialAveragePooling(2, 2, stride, stride))
 22 |         shortcut.add(Padding(1, (n_out_channels - n_channels)/2, 3))
 23 |     else:
 24 |         shortcut = Identity()
 25 | 
 26 |     res = Sequential()
 27 |     res.add(ConcatTable().add(convs).add(shortcut)).add(CAddTable())
 28 |     # https://github.com/szagoruyko/wide-residual-networks/blob/master/models/resnet-pre-act.lua
 29 | 
 30 |     res.add(ReLU(True))
 31 | 
 32 |     return res
 33 | 
 34 | 
 35 | def resnet(n_size, num_starting_filters, reg):
 36 |     '''
 37 |     Implementation of ["Deep Residual Learning for Image Recognition",Kaiming \
 38 |     He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - http://arxiv.org/abs/1512.03385
 39 | 
 40 |     Inspired by https://github.com/gcr/torch-residual-networks
 41 | 
 42 |     This network should model a similiar behaviour of gcr's implementation.
 43 |     Check https://github.com/gcr/torch-residual-networks for more infos about \
 44 |     the structure.
 45 | 
 46 |     The network operates on minibatches of data that have shape (N, C, H, W)
 47 |     consisting of N images, each with height H and width W and with C input
 48 |     channels.
 49 | 
 50 |     The network has, like in the reference paper (except for the final optional
 51 |     affine layers), (6*n)+2 layers, composed as below:
 52 | 
 53 |                                                 (image_dim: 3, 32, 32; F=16)
 54 |                                                 (input_dim: N, *image_dim)
 55 |          INPUT
 56 |             |
 57 |             v
 58 |        +-------------------+
 59 |        |conv[F, *image_dim]|                    (out_shape: N, 16, 32, 32)
 60 |        +-------------------+
 61 |             |
 62 |             v
 63 |        +-------------------------+
 64 |        |n * res_block[F, F, 3, 3]|              (out_shape: N, 16, 32, 32)
 65 |        +-------------------------+
 66 |             |
 67 |             v
 68 |        +-------------------------+
 69 |        |res_block[2*F, F, 3, 3]  |              (out_shape: N, 32, 16, 16)
 70 |        +-------------------------+
 71 |             |
 72 |             v
 73 |        +---------------------------------+
 74 |        |(n-1) * res_block[2*F, 2*F, 3, 3]|      (out_shape: N, 32, 16, 16)
 75 |        +---------------------------------+
 76 |             |
 77 |             v
 78 |        +-------------------------+
 79 |        |res_block[4*F, 2*F, 3, 3]|              (out_shape: N, 64, 8, 8)
 80 |        +-------------------------+
 81 |             |
 82 |             v
 83 |        +---------------------------------+
 84 |        |(n-1) * res_block[4*F, 4*F, 3, 3]|      (out_shape: N, 64, 8, 8)
 85 |        +---------------------------------+
 86 |             |
 87 |             v
 88 |        +-------------+
 89 |        |pool[1, 8, 8]|                          (out_shape: N, 64, 1, 1)
 90 |        +-------------+
 91 |             |
 92 |             v
 93 |        +- - - - - - - - -+
 94 |        |(opt) m * affine |                      (out_shape: N, 64, 1, 1)
 95 |        +- - - - - - - - -+
 96 |             |
 97 |             v
 98 |        +-------+
 99 |        |softmax|                                (out_shape: N, num_classes)
100 |        +-------+
101 |             |
102 |             v
103 |          OUTPUT
104 | 
105 |     Every convolution layer has a pad=1 and stride=1, except for the  dimension
106 |     enhancning layers which has a stride of 2 to mantain the computational
107 |     complexity.
108 |     Optionally, there is the possibility of setting m affine layers immediatley
109 |     before the softmax layer by setting the hidden_dims parameter, which should
110 |     be a list of integers representing the numbe of neurons for each affine
111 |     layer.
112 | 
113 |     Each residual block is composed as below:
114 | 
115 |                   Input
116 |                      |
117 |              ,-------+-----.
118 |        Downsampling      3x3 convolution+dimensionality reduction
119 |             |               |
120 |             v               v
121 |        Zero-padding      3x3 convolution
122 |             |               |
123 |             `-----( Add )---'
124 |                      |
125 |                   Output
126 | 
127 |     After every layer, a batch normalization with momentum .1 is applied.
128 | 
129 |     Weight initialization (check also layers/init.py and layers/README.md):
130 |     - Inizialize the weights and biases for the affine layers in the same
131 |      way of torch's default mode by calling _init_affine_wb that returns a
132 |      tuple (w, b).
133 |     - Inizialize the weights for the conv layers in the same
134 |      way of torch's default mode by calling init_conv_w.
135 |     - Inizialize the weights for the conv layers in the same
136 |      way of kaiming's mode by calling init_conv_w_kaiming
137 |      (http://arxiv.org/abs/1502.01852 and
138 |       http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-\
139 |       initialization)
140 |     - Initialize batch normalization layer's weights like torch's default by
141 |     calling init_bn_w
142 |     - Initialize batch normalization layer's weights like cgr's first resblock\
143 |     's bn (https://github.com/gcr/torch-residual-networks/blob/master/residual\
144 |            -layers.lua#L57-L59) by calling init_bn_w_gcr.
145 | 
146 |     num_filters=[16, 16, 32, 32, 64, 64],
147 |         Initialize a new network.
148 | 
149 |         Inputs:
150 |         - input_dim: Tuple (C, H, W) giving size of input data.
151 |         - num_starting_filters: Number of filters for the first convolution
152 |         layer.
153 |         - n_size: nSize for the residual network like in the reference paper
154 |         - hidden_dims: Optional list number of units to use in the
155 |         fully-connected hidden layers between the fianl pool and the sofmatx
156 |         layer.
157 |         - num_classes: Number of scores to produce from the final affine layer.
158 |         - reg: Scalar giving L2 regularization strength
159 |         - dtype: numpy datatype to use for computation.
160 |     '''
161 | 
162 |     nfs = num_starting_filters
163 |     model = Sequential()
164 |     add = model.add
165 |     add(SpatialConvolution(3, nfs, 3, 3, 1, 1, 1, 1))
166 |     add(SpatialBatchNormalization(nfs))
167 |     add(ReLU())
168 | 
169 |     for i in xrange(1, n_size):
170 |         add(residual_layer(nfs))
171 |     add(residual_layer(nfs, 2*nfs, 2))
172 | 
173 |     for i in xrange(1, n_size-1):
174 |         add(residual_layer(2*nfs))
175 |     add(residual_layer(2*nfs, 4*nfs, 2))
176 | 
177 |     for i in xrange(1, n_size-1):
178 |         add(residual_layer(4*nfs))
179 | 
180 |     add(SpatialAveragePooling(8, 8))
181 |     add(Reshape(nfs*4))
182 |     add(Linear(nfs*4, 10))
183 |     add(LogSoftMax())
184 |     return model
185 | 


--------------------------------------------------------------------------------
/pyfunt/examples/residual_networks/train-cifar.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import uuid
  5 | import numpy as np
  6 | # import matplotlib.pyplot as plt
  7 | from pydatset.cifar10 import get_CIFAR10_data
  8 | from pydatset.data_augmentation import (random_flips,
  9 |                                         random_crops)
 10 | from resnet import resnet
 11 | from pyfunt.solver import Solver as Solver
 12 | 
 13 | import inspect
 14 | import argparse
 15 | 
 16 | from pyfunt.class_nll_criterion import ClassNLLCriterion
 17 | 
 18 | np.seterr(all='raise')
 19 | 
 20 | np.random.seed(0)
 21 | 
 22 | DATA_PATH = '../CIFAR_DATASET_PATH'
 23 | 
 24 | path_set = False
 25 | while not path_set:
 26 |     try:
 27 |         with open(DATA_PATH) as f:
 28 |             DATASET_PATH = f.read()
 29 |         path_set = True
 30 |     except:
 31 |         data_path = raw_input('Enter the path for the CIFAR10 dataset: ')
 32 |         with open(DATA_PATH, "w") as f:
 33 |             f.write(data_path)
 34 | 
 35 | 
 36 | EXPERIMENT_PATH = '../Experiments/' + str(uuid.uuid4())[-10:]
 37 | 
 38 | # residual network constants
 39 | NSIZE = 3
 40 | N_STARTING_FILTERS = 16
 41 | 
 42 | # solver constants
 43 | NUM_PROCESSES = 4
 44 | 
 45 | NUM_TRAIN = 50000
 46 | NUM_TEST = 10000
 47 | 
 48 | WEIGHT_DEACY = 1e-4
 49 | REGULARIZATION = 0
 50 | LEARNING_RATE = .1
 51 | MOMENTUM = .99
 52 | NUM_EPOCHS = 160
 53 | BATCH_SIZE = 64
 54 | CHECKPOINT_EVERY = 20
 55 | 
 56 | XH, XW = 32, 32
 57 | 
 58 | args = argparse.Namespace()
 59 | 
 60 | 
 61 | def parse_args():
 62 |     """
 63 |     Parse the options for running the Residual Network on CIFAR-10.
 64 |     """
 65 |     desc = 'Train a Residual Network on CIFAR-10.'
 66 |     parser = argparse.ArgumentParser(description=desc,
 67 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 68 |     add = parser.add_argument
 69 |     add('--dataset_path',
 70 |         metavar='DIRECOTRY',
 71 |         default=DATASET_PATH,
 72 |         type=str,
 73 |         help='directory where results will be saved')
 74 |     add('--experiment_path',
 75 |         metavar='DIRECOTRY',
 76 |         default=EXPERIMENT_PATH,
 77 |         type=str,
 78 |         help='directory where results will be saved')
 79 |     add('-load', '--load_checkpoint',
 80 |         metavar='DIRECOTRY',
 81 |         default='',
 82 |         type=str,
 83 |         help='load checkpoint from load_checkpoint')
 84 |     add('--n_size',
 85 |         metavar='INT',
 86 |         default=NSIZE,
 87 |         type=int,
 88 |         help='Network will have (6*n)+2 conv layers')
 89 |     add('--n_starting_filters',
 90 |         metavar='INT',
 91 |         default=N_STARTING_FILTERS,
 92 |         type=int,
 93 |         help='Network will starts with those number of filters')
 94 |     add('--n_processes', '-np',
 95 |         metavar='INT',
 96 |         default=NUM_PROCESSES,
 97 |         type=int,
 98 |         help='Number of processes for each step')
 99 |     add('--n_train',
100 |         metavar='INT',
101 |         default=NUM_TRAIN,
102 |         type=int,
103 |         help='Number of total images to select for training')
104 |     add('--n_test',
105 |         metavar='INT',
106 |         default=NUM_TEST,
107 |         type=int,
108 |         help='Number of total images to select for validation')
109 |     add('-wd', '--weight_decay',
110 |         metavar='FLOAT',
111 |         default=WEIGHT_DEACY,
112 |         type=float,
113 |         help='Weight decay for sgd_th')
114 |     add('-reg', '--network_regularization',
115 |         metavar='FLOAT',
116 |         default=REGULARIZATION,
117 |         type=float,
118 |         help='L2 regularization term for the network')
119 |     add('-lr', '--learning_rate',
120 |         metavar='FLOAT',
121 |         default=LEARNING_RATE,
122 |         type=float,
123 |         help='Learning rate to use with sgd_th')
124 |     add('-mom', '--momentum',
125 |         metavar='FLOAT',
126 |         default=MOMENTUM,
127 |         type=float,
128 |         help='Nesterov momentum use with sgd_th')
129 |     add('--n_epochs', '-nep',
130 |         metavar='INT',
131 |         default=NUM_EPOCHS,
132 |         type=int,
133 |         help='Number of epochs for training')
134 |     add('--batch_size', '-bs',
135 |         metavar='INT',
136 |         default=BATCH_SIZE,
137 |         type=int,
138 |         help='Number of images for each iteration')
139 |     add('--checkpoint_every', '-cp',
140 |         metavar='INT',
141 |         default=CHECKPOINT_EVERY,
142 |         type=int,
143 |         help='Number of epochs between each checkpoint')
144 |     parser.parse_args(namespace=args)
145 |     assert not (args.network_regularization and args.weight_decay)
146 | 
147 | 
148 | def data_augm(batch):
149 |     p = 2
150 |     h, w = XH, XW
151 | 
152 |     # batch = random_tint(batch)
153 |     # batch = random_contrast(batch)
154 |     batch = random_flips(batch)
155 |     # batch = random_rotate(batch, 10)
156 |     batch = random_crops(batch, (h, w), pad=p)
157 |     return batch
158 | 
159 | 
160 | def custom_update_decay(epoch):
161 |     if epoch in (80, 120):
162 |         return 0.1
163 |     return 1
164 | 
165 | 
166 | def print_infos(solver):
167 |     print('Model: \n%s' % solver.model)
168 | 
169 |     print('Solver: \n%s' % solver)
170 | 
171 |     print('Data Augmentation Function: \n')
172 |     print(''.join(['\t' + i for i in inspect.getsourcelines(data_augm)[0]]))
173 |     print('Custom Weight Decay Update Rule: \n')
174 |     print(''.join(['\t' + i for i in inspect.getsourcelines(custom_update_decay)[0]]))
175 | 
176 | 
177 | def main():
178 |     parse_args()
179 | 
180 |     data = get_CIFAR10_data(args.dataset_path,
181 |                             num_training=args.n_train, num_validation=0, num_test=args.n_test)
182 | 
183 |     data = {
184 |         'X_train': data['X_train'],
185 |         'y_train': data['y_train'],
186 |         'X_val': data['X_test'],
187 |         'y_val': data['y_test'],
188 |     }
189 | 
190 |     exp_path = args.experiment_path
191 |     nf = args.n_starting_filters
192 |     reg = args.network_regularization
193 | 
194 |     model = resnet(n_size=args.n_size,
195 |                    num_starting_filters=nf,
196 |                    reg=reg)
197 | 
198 |     wd = args.weight_decay
199 |     lr = args.learning_rate
200 |     mom = args.momentum
201 | 
202 |     optim_config = {'learning_rate': lr, 'nesterov': True,
203 |                     'momentum': mom, 'weight_decay': wd}
204 | 
205 |     epochs = args.n_epochs
206 |     bs = args.batch_size
207 |     num_p = args.n_processes
208 |     cp = args.checkpoint_every
209 |     criterion = ClassNLLCriterion()
210 |     solver = Solver(model, data, args.load_checkpoint,
211 |                     criterion=criterion,
212 |                     num_epochs=epochs, batch_size=bs,  # 20
213 |                     update_rule='sgd_th',
214 |                     optim_config=optim_config,
215 |                     custom_update_ld=custom_update_decay,
216 |                     batch_augment_func=data_augm,
217 |                     checkpoint_every=cp,
218 |                     num_processes=num_p)
219 | 
220 |     print_infos(solver)
221 |     solver.train()
222 | 
223 |     solver.export_model(exp_path)
224 |     solver.export_histories(exp_path)
225 | 
226 |     print('finish')
227 | 
228 | 
229 | if __name__ == '__main__':
230 |     main()
231 | 


--------------------------------------------------------------------------------
/pyfunt/identity.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | 
 3 | 
 4 | class Identity(Module):
 5 |     """docstring for Identity"""
 6 |     def __init__(self):
 7 |         super(Identity, self).__init__()
 8 | 
 9 |     def update_output(self, x):
10 |         self.output = x.copy()
11 |         return self.output
12 | 
13 |     def update_grad_input(self, x, grad_output):
14 |         self.grad_input = grad_output.copy()
15 |         return self.grad_input
16 | 
17 |     def clear_state(self):
18 |         pass
19 | 
20 |     def reset(self):
21 |         pass
22 | 


--------------------------------------------------------------------------------
/pyfunt/im2col.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import numpy as np
 4 | 
 5 | 
 6 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
 7 |     # First figure out what the size of the output should be
 8 |     N, C, H, W = x_shape
 9 |     assert (H + 2 * padding - field_height) % stride == 0
10 |     assert (W + 2 * padding - field_height) % stride == 0
11 |     out_height = (H + 2 * padding - field_height) / stride + 1
12 |     out_width = (W + 2 * padding - field_width) / stride + 1
13 | 
14 |     i0 = np.repeat(np.arange(field_height), field_width)
15 |     i0 = np.tile(i0, C)
16 |     i1 = stride * np.repeat(np.arange(out_height), out_width)
17 |     j0 = np.tile(np.arange(field_width), field_height * C)
18 |     j1 = stride * np.tile(np.arange(out_width), out_height)
19 |     i = i0.reshape(-1, 1) + i1.reshape(1, -1)
20 |     j = j0.reshape(-1, 1) + j1.reshape(1, -1)
21 | 
22 |     k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
23 | 
24 |     return (k, i, j)
25 | 
26 | 
27 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
28 |     ''' An implementation of im2col based on some fancy indexing '''
29 |     # Zero-pad the input
30 |     p = padding
31 |     x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
32 | 
33 |     k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
34 |                                  stride)
35 | 
36 |     cols = x_padded[:, k, i, j]
37 |     C = x.shape[1]
38 |     cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
39 |     return cols
40 | 
41 | 
42 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
43 |                    stride=1):
44 |     ''' An implementation of col2im based on fancy indexing and np.add.at '''
45 |     N, C, H, W = x_shape
46 |     H_padded, W_padded = H + 2 * padding, W + 2 * padding
47 |     x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
48 |     k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
49 |                                  stride)
50 |     cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
51 |     cols_reshaped = cols_reshaped.transpose(2, 0, 1)
52 |     np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
53 |     if padding == 0:
54 |         return x_padded
55 |     return x_padded[:, :, padding:-padding, padding:-padding]
56 | 
57 | 


--------------------------------------------------------------------------------
/pyfunt/im2col_cyt.pyx:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | cimport numpy as np
  4 | cimport cython
  5 | 
  6 | # DTYPE = np.float64
  7 | # ctypedef np.float64_t DTYPE_t
  8 | 
  9 | ctypedef fused DTYPE_t:
 10 |     np.float32_t
 11 |     np.float64_t
 12 | 
 13 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
 14 |                   int field_width, int padding, int stride):
 15 |     cdef int N = x.shape[0]
 16 |     cdef int C = x.shape[1]
 17 |     cdef int H = x.shape[2]
 18 |     cdef int W = x.shape[3]
 19 |     
 20 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 21 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 22 | 
 23 |     cdef int p = padding
 24 |     cdef double c = 0.0
 25 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
 26 |             ((0, 0), (0, 0), (p, p), (p, p)), mode='constant', constant_values=(c,))
 27 | 
 28 |     cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
 29 |             (C * field_height * field_width, N * HH * WW),
 30 |             dtype=x.dtype)
 31 | 
 32 |     # Moving the inner loop to a C function with no bounds checking works, but does
 33 |     # not seem to help performance in any measurable way.
 34 | 
 35 |     im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
 36 |                         field_height, field_width, padding, stride)
 37 |     return cols
 38 | 
 39 | 
 40 | @cython.boundscheck(False)
 41 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 42 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 43 |                              int N, int C, int H, int W, int HH, int WW,
 44 |                              int field_height, int field_width, int padding, int stride) except? -1:
 45 |     cdef int c, ii, jj, row, yy, xx, i, col
 46 | 
 47 |     for c in range(C):
 48 |         for yy in range(HH):
 49 |             for xx in range(WW):
 50 |                 for ii in range(field_height):
 51 |                     for jj in range(field_width):
 52 |                         row = c * field_width * field_height + ii * field_height + jj
 53 |                         for i in range(N):
 54 |                             col = yy * WW * N + xx * N + i
 55 |                             cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
 56 | 
 57 | 
 58 | 
 59 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
 60 |                   int field_height, int field_width, int padding, int stride):
 61 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
 62 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 63 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 64 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
 65 |                                         dtype=cols.dtype)
 66 | 
 67 |     # Moving the inner loop to a C-function with no bounds checking improves
 68 |     # performance quite a bit for col2im.
 69 |     col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 
 70 |                         field_height, field_width, padding, stride)
 71 |     if padding > 0:
 72 |         return x_padded[:, :, padding:-padding, padding:-padding]
 73 |     return x_padded
 74 | 
 75 | 
 76 | @cython.boundscheck(False)
 77 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 78 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 79 |                              int N, int C, int H, int W, int HH, int WW,
 80 |                              int field_height, int field_width, int padding, int stride) except? -1:
 81 |     cdef int c, ii, jj, row, yy, xx, i, col
 82 | 
 83 |     for c in range(C):
 84 |         for ii in range(field_height):
 85 |             for jj in range(field_width):
 86 |                 row = c * field_width * field_height + ii * field_height + jj
 87 |                 for yy in range(HH):
 88 |                     for xx in range(WW):
 89 |                         for i in range(N):
 90 |                             col = yy * WW * N + xx * N + i
 91 |                             x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
 92 | 
 93 | 
 94 | @cython.boundscheck(False)
 95 | @cython.wraparound(False)
 96 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
 97 |                             np.ndarray[DTYPE_t, ndim=4] x_padded,
 98 |                             int N, int C, int H, int W, int HH, int WW,
 99 |                             int out_h, int out_w, int pad, int stride):
100 | 
101 |     cdef int c, hh, ww, n, h, w
102 |     for n in range(N):
103 |         for c in range(C):
104 |             for hh in range(HH):
105 |                 for ww in range(WW):
106 |                     for h in range(out_h):
107 |                         for w in range(out_w):
108 |                             x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
109 |     
110 | 
111 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
112 |         int HH, int WW, int pad, int stride):
113 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
114 |     cdef int out_h = (H + 2 * pad - HH) / stride + 1
115 |     cdef int out_w = (W + 2 * pad - WW) / stride + 1
116 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
117 |                                                   dtype=cols.dtype)
118 | 
119 |     col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
120 | 
121 |     if pad > 0:
122 |         return x_padded[:, :, pad:-pad, pad:-pad]
123 |     return x_padded 


--------------------------------------------------------------------------------
/pyfunt/linear.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Linear(Module):
 6 | 
 7 |     def __init__(self, input_size, output_size, bias=True):
 8 |         super(Linear, self).__init__()
 9 |         self.weight = np.ndarray((input_size, output_size))
10 |         self.grad_weight = np.ndarray((input_size, output_size))
11 |         if bias:
12 |             self.bias = np.ndarray(output_size)
13 |             self.grad_bias = np.ndarray(output_size)
14 |         else:
15 |             self.bias = None
16 |             self.grad_bias = None
17 |         self.reset()
18 | 
19 |     def no_bias(self):
20 |         self.bias = None
21 |         self.grad_bias = None
22 | 
23 |     def reset(self, stdv=None):
24 |         if stdv:
25 |             stdv = stdv * np.sqrt(3)
26 |         else:
27 |             std = 1./np.sqrt(self.weight.shape[0])
28 |             # stdv = 1./np.sqrt(self.weight.shape[1])
29 |         self.weight = np.random.uniform(-std, std, self.weight.shape)
30 |         if self.bias is not None:
31 |             self.bias = np.random.uniform(-std, std, self.bias.shape)
32 | 
33 |     def update_output(self, x):
34 |         out = x.reshape(x.shape[0], -1)
35 |         out = out.dot(self.weight)
36 |         if self.bias is not None:
37 |             out += self.bias
38 |         self.output = out
39 |         return self.output
40 | 
41 |     def update_grad_input(self, x, grad_output):
42 |         dx = grad_output.dot(self.weight.T).reshape(x.shape)
43 |         self.grad_weight[:] = x.reshape(x.shape[0], -1).T.dot(grad_output)[:]
44 |         if self.bias is not None:
45 |             self.grad_bias[:] = np.sum(grad_output, axis=0)[:]
46 |         self.grad_input = dx
47 |         return dx
48 | 
49 |     def acc_grad_parameters(self, x, grad_output, scale=None):
50 |         pass
51 | 


--------------------------------------------------------------------------------
/pyfunt/log_soft_max.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class LogSoftMax(Module):
 6 |     """docstring for LogSoftMax"""
 7 |     def __init__(self):
 8 |         super(LogSoftMax, self).__init__()
 9 | 
10 |     def update_output(self, x):
11 |         max_input = x.max(1, keepdims=True)
12 |         log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True)
13 |         log_sum = max_input + np.log(log_sum)
14 |         self.output = x - log_sum
15 |         return self.output
16 | 
17 |     def update_grad_input(self, x, grad_output):
18 |         _sum = np.sum(grad_output, axis=1, keepdims=True)
19 | 
20 |         max_input = x.max(1, keepdims=True)
21 |         log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True)
22 |         log_sum = max_input + np.log(log_sum)
23 |         self.output = x - log_sum
24 | 
25 |         self.grad_input = grad_output - np.exp(self.output)*_sum
26 |         return self.grad_input
27 | 
28 |     def reset(self):
29 |         pass
30 | 


--------------------------------------------------------------------------------
/pyfunt/module.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import abc
  5 | import numpy as np
  6 | from copy import deepcopy
  7 | from types import DictType
  8 | 
  9 | 
 10 | class Module(object):
 11 |     __metaclass__ = abc.ABCMeta
 12 | 
 13 |     def __init__(self):
 14 |         self.grad_input = None  # np.ndarray()
 15 |         self.output = None  # np.ndarray()
 16 |         self._type = np.float
 17 | 
 18 |     def parameters(self):
 19 |         if hasattr(self, 'weight'):
 20 |             if self.weight is not None and self.bias is not None:
 21 |                 return [self.weight, self.bias], [self.grad_weight, self.grad_bias]
 22 |             if self.weight is not None:
 23 |                 return [self.weight], [self.grad_weight]
 24 |             if self.bias is not None:
 25 |                 return [self.bias], [self.grad_bias]
 26 | 
 27 |     @abc.abstractmethod
 28 |     def update_output(self, _input=None):
 29 |         # return self.output
 30 |         raise NotImplementedError()
 31 | 
 32 |     def forward(self, x):
 33 |         return self.update_output(x)
 34 | 
 35 |     def backward(self, _input, grad_output, scale=1):
 36 |         self.grad_input = self.update_grad_input(_input, grad_output)
 37 |         self.acc_grad_parameters(_input, grad_output, scale)
 38 |         return self.grad_input
 39 | 
 40 |     def backward_update(self, _input, grad_output, lr):
 41 |         grad_weight = self.grad_weight
 42 |         grad_bias = self.grad_bias
 43 |         self.grad_weight = self.weight
 44 |         self.grad_bias = self.bias
 45 |         self.acc_grad_parameters(_input, grad_output, -lr)
 46 |         self.grad_weight = grad_weight
 47 |         self.grad_bias = grad_bias
 48 | 
 49 |     @abc.abstractmethod
 50 |     def update_grad_input(self, _input, grad_output):
 51 |         # return self.grad_input
 52 |         raise NotImplementedError()
 53 | 
 54 |     def acc_grad_parameters(self, _input, grad_output, scale):
 55 |         pass
 56 | 
 57 |     def acc_update_grad_parameters(self, _input, grad_output, lr):
 58 |         grad_weight = self.grad_weight
 59 |         grad_bias = self.grad_bias
 60 |         self.grad_weight = self.weight
 61 |         self.grad_bias = self.bias
 62 |         self.acc_grad_parameters(_input, grad_output, -lr)
 63 |         self.grad_weight = grad_weight
 64 |         self.grad_bias = grad_bias
 65 | 
 66 |     def shared_acc_update_grad_parameters(self, _input, grad_output, lr):
 67 |         if self.parameters():
 68 |             self.zero_grad_parameters()
 69 |             self.acc_grad_parameters(_input, grad_output, 1)
 70 |             self.update_parameters(lr)
 71 | 
 72 |     def zero_grad_parameters(self):
 73 |         _, grad_params = self.parameters()
 74 |         if grad_params:
 75 |             for g in grad_params:
 76 |                 g.zero()
 77 | 
 78 |     def update_parameters(self, lr):
 79 |         res = self.parameters()
 80 |         if res:
 81 |             params, grad_params = res
 82 |             for i, p in enumerate(params):
 83 |                 p -= lr*grad_params[i]
 84 | 
 85 |     def training(self):
 86 |         self.train = True
 87 | 
 88 |     def evaluate(self):
 89 |         self.train = False
 90 | 
 91 |     def share(self, mlp, p_names):
 92 |         for i, v in enumerate(p_names):
 93 |             if self[v] is not None:
 94 |                 self[v].set(mlp[v])
 95 |                 self.acc_update_grad_parameters = self.shared_acc_update_grad_parameters
 96 |                 mlp.acc_update_grad_parameters = self.acc_update_grad_parameters
 97 |         return self
 98 | 
 99 |     def clone(self, p_names=None):
100 |         clone = deepcopy(self)
101 |         if p_names:
102 |             clone.share(self, p_names)
103 |         return clone
104 | 
105 |     # def type(self, type=None, cache=None):
106 |     #     if type is not None:
107 |     #         return self._type
108 |     #     cache = cache or {}
109 |     #     # find all tensors and convert them
110 |     #     for key, param in pairs(self):
111 |     #         self[key] = utils.recursive_type(param, type, cache)
112 | 
113 |     #     self._type = type
114 |     #     return self
115 | 
116 | 
117 | # function Module:float(...)
118 | #    return self:type('torch.FloatTensor',...)
119 | # end
120 | 
121 | # function Module:double(...)
122 | #    return self:type('torch.DoubleTensor',...)
123 | # end
124 | 
125 | # function Module:cuda(...)
126 | #    return self:type('torch.CudaTensor',...)
127 | # end
128 | 
129 |     def reset(self):
130 |         raise NotImplementedError()
131 | 
132 |     def write(self, file):
133 |         np.save(file, self)
134 | 
135 |     def read(self, file):
136 |         obj = np.load(file)[0]
137 |         for k, v in enumerate(obj):
138 |             self[k] = v
139 | 
140 | 
141 | # -- This function is not easy to understand. It works as follows:
142 | # --
143 | # -- - gather all parameter tensors for this module (and children);
144 | # --   count all parameter values (floats)
145 | # -- - create one ginormous memory area (Storage object) with room for all
146 | # --   parameters
147 | # -- - remap each parameter tensor to point to an area within the ginormous
148 | # --   Storage, and copy it there
149 | # --
150 | # -- It has the effect of making all parameters point to the same memory area,
151 | # -- which is then returned.
152 | # --
153 | # -- The purpose is to allow operations over all parameters (such as momentum
154 | # -- updates and serialization), but it assumes that all parameters are of
155 | # -- the same type (and, in the case of CUDA, on the same device), which
156 | # -- is not always true. Use for_each() to iterate over this module and
157 | # -- children instead.
158 | # --
159 | # -- Module._flattenTensorBuffer can be used by other packages (e.g. cunn)
160 | # -- to specify the type of temporary buffers. For example, the temporary
161 | # -- buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage.
162 | # --
163 | # -- TODO: This logically belongs to torch.Tensor, not nn.
164 | # Module._flattenTensorBuffer = {}
165 | 
166 | 
167 | # function Module.flatten(parameters)
168 | 
169 | #    -- returns true if tensor occupies a contiguous region of memory (no holes)
170 | #    local function isCompact(tensor)
171 | #       local sortedStride, perm = torch.sort(
172 | #             torch.LongTensor(tensor:nDimension()):set(tensor:stride()), 1, true)
173 | #       local sortedSize = torch.LongTensor(tensor:nDimension()):set(
174 | #             tensor:size()):index(1, perm)
175 | #       local nRealDim = torch.clamp(sortedStride, 0, 1):sum()
176 | #       sortedStride = sortedStride:narrow(1, 1, nRealDim):clone()
177 | #       sortedSize   = sortedSize:narrow(1, 1, nRealDim):clone()
178 | #       local t = tensor.new():set(tensor:storage(), 1,
179 | #                                  sortedSize:storage(),
180 | #                                  sortedStride:storage())
181 | #       return t:isContiguous()
182 | #    end
183 | 
184 | #    if not parameters or #parameters == 0 then
185 | #       return torch.Tensor()
186 | #    end
187 | #    local Tensor = parameters[1].new
188 | # local TmpTensor = Module._flattenTensorBuffer[torch.type(parameters[1])]
189 | # or Tensor
190 | 
191 | #    -- 1. construct the set of all unique storages referenced by parameter tensors
192 | #    local storages = {}
193 | #    local nParameters = 0
194 | #    local parameterMeta = {}
195 | #    for k = 1,#parameters do
196 | #       local param = parameters[k]
197 | #       local storage = parameters[k]:storage()
198 | #       local storageKey = torch.pointer(storage)
199 | 
200 | #       if not storages[storageKey] then
201 | #          storages[storageKey] = {storage, nParameters}
202 | #          nParameters = nParameters + storage:size()
203 | #       end
204 | 
205 | #       parameterMeta[k] = {storageOffset = param:storageOffset() +
206 | #                                           storages[storageKey][2],
207 | #                           size          = param:size(),
208 | #                           stride        = param:stride()}
209 | #    end
210 | 
211 | #    -- 2. construct a single tensor that will hold all the parameters
212 | #    local flatParameters = TmpTensor(nParameters):zero()
213 | 
214 | #    -- 3. determine if there are elements in the storage that none of the
215 | #    --    parameter tensors reference ('holes')
216 | #    local tensorsCompact = true
217 | #    for k = 1,#parameters do
218 | #       local meta = parameterMeta[k]
219 | #       local tmp = TmpTensor():set(
220 | #          flatParameters:storage(), meta.storageOffset, meta.size, meta.stride)
221 | #       tmp:fill(1)
222 | #       tensorsCompact = tensorsCompact and isCompact(tmp)
223 | #    end
224 | 
225 | #    local maskParameters  = flatParameters:byte():clone()
226 | #    local compactOffsets  = flatParameters:long():cumsum(1)
227 | #    local nUsedParameters = compactOffsets[-1]
228 | 
229 | #    -- 4. copy storages into the flattened parameter tensor
230 | #    for _, storageAndOffset in pairs(storages) do
231 | #       local storage, offset = table.unpack(storageAndOffset)
232 | #       flatParameters[{{offset+1,offset+storage:size()}}]:copy(Tensor():set(storage))
233 | #    end
234 | 
235 | #    -- 5. allow garbage collection
236 | #    storages = nil
237 | #    for k = 1,#parameters do
238 | #        parameters[k]:set(Tensor())
239 | #    end
240 | 
241 | #    -- 6. compact the flattened parameters if there were holes
242 | #    if nUsedParameters ~= nParameters then
243 | #       assert(tensorsCompact,
244 | #          "Cannot gather tensors that are not compact")
245 | 
246 | #       flatParameters = TmpTensor(nUsedParameters):copy(
247 | #             flatParameters:maskedSelect(maskParameters))
248 | #       for k = 1,#parameters do
249 | #         parameterMeta[k].storageOffset =
250 | #               compactOffsets[parameterMeta[k].storageOffset]
251 | #       end
252 | #    end
253 | 
254 | #    if TmpTensor ~= Tensor then
255 | #       flatParameters = Tensor(flatParameters:nElement()):copy(flatParameters)
256 | #    end
257 | 
258 | #    -- 7. fix up the parameter tensors to point at the flattened parameters
259 | #    for k = 1,#parameters do
260 | #       parameters[k]:set(flatParameters:storage(),
261 | #           parameterMeta[k].storageOffset,
262 | #           parameterMeta[k].size,
263 | #           parameterMeta[k].stride)
264 | #    end
265 | 
266 | #    return flatParameters
267 | # end
268 | 
269 |     def get_parameters(self):
270 |         parameters, grad_parameters = self.parameters()
271 |         #p, g = Module.flatten(parameters), Module.flatten(grad_parameters)
272 |         #if not p.n_element() == g.n_element():
273 |         #    raise Exception('check that you are sharing parameters and gradParameters')
274 |         return parameters, grad_parameters
275 | 
276 |     def __call__(self, _input=None, grad_output=None):
277 |         self.forward(_input)
278 |         if self.grad_output:
279 |             self.backward(_input, grad_output)
280 |             return self.output, self.grad_input
281 |         else:
282 |             return self.output
283 | 
284 |     # Run a callback (called with the module as an argument) in preorder over this
285 |     # module and its children.
286 |     def apply(self, callback):
287 |         callback(self)
288 |         if self.modules:
289 |             for module in self.modules:
290 |                 module.apply(callback)
291 | 
292 |     def find_modules(self, type_c, container):
293 |         container = container or self
294 |         nodes = {}
295 |         containers = {}
296 |         mod_type = type(self)
297 |         if mod_type == type_c:
298 |             nodes[len(nodes)+1] = self
299 |             containers[len(containers)] = container
300 |         # Recurse on nodes with 'modules'
301 |         if self.modules is not None:
302 |             if type(self.modules) is DictType:
303 |                 for i in xrange(len(self.modules)):
304 |                     child = self.modules[i]
305 |                     cur_nodes, cur_containers = child.find_modules(
306 |                         type_c, self)
307 | 
308 |                     # This shouldn't happen
309 |                     if not len(cur_nodes) == len(cur_containers):
310 |                         raise Exception('Internal error: incorrect return length')
311 | 
312 |                     # add the list items from our child to our list (ie return a
313 |                     # flattened table of the return nodes).
314 |                     for j in xrange(len(cur_nodes)):
315 |                         nodes[len(cur_nodes)+1] = cur_nodes[j]
316 |                         containers[len(containers)+1] = cur_containers[j]
317 | 
318 |         return nodes, containers
319 | 
320 |     def list_modules(self):
321 |         def tinsert(to, _from):
322 |             if type(_from) == DictType:
323 |                 for i in xrange(len(_from)):
324 |                     tinsert(to, _from[i])
325 |             else:
326 |                 to.update(_from)
327 | 
328 |         modules = self
329 |         if self.modules:
330 |             for i in xrange(len(self.modules)):
331 |                 modulas = self.modules[i].list_modules()
332 |                 if modulas:
333 |                     tinsert(modules, modulas)
334 |         return modules
335 | 
336 |     def clear_state(self):
337 |         return  # clear utils clear(self, 'output', 'gradInput')
338 | 
339 |     # similar to apply, recursively goes over network and calls
340 |     # a callback function which returns a new module replacing the old one
341 | 
342 |     def replace(self, callback):
343 |         callback(self)
344 |         if self.modules:
345 |             for i, m in enumerate(self.modules):
346 |                 self.modules[i] = Module.replace(callback)
347 | 


--------------------------------------------------------------------------------
/pyfunt/mul_constant.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class MulConstant(Module):
 6 | 
 7 |     def __init__(self, constant_scalar):
 8 |         super(MulConstant, self).__init__()
 9 |         if not np.isscalar(constant_scalar):
10 |             raise Exception('Constant is not a scalar: ' + constant_scalar)
11 |         self.constant_scalar = constant_scalar
12 | 
13 |     def update_output(self, x):
14 |         self.output = x * self.constant_scalar
15 |         return self.output
16 | 
17 |     def update_grad_input(self, x, grad_output):
18 |         self.grad_input = grad_output * self.constant_scalar
19 |         return self.grad_input
20 | 
21 |     def validate_parameters(self):
22 |         if self.inplace:
23 |             if self.val > self.th:
24 |                 raise Exception('in-place processing requires value not exceed threshold')
25 | 
26 |     def reset(self):
27 |         pass
28 | 


--------------------------------------------------------------------------------
/pyfunt/optim.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | '''
  4 | This file implements various first-order update rules that are commonly used for
  5 | training neural networks. Each update rule accepts current weights and the
  6 | gradient of the loss with respect to those weights and produces the next set of
  7 | weights. Each update rule has the same interface:
  8 | 
  9 | def update(w, dw, config=None):
 10 | 
 11 | Inputs:
 12 |   - w: A numpy array giving the current weights.
 13 |   - dw: A numpy array of the same shape as w giving the gradient of the
 14 |     loss with respect to w.
 15 |   - config: A dictionary containing hyperparameter values such as learning rate,
 16 |     momentum, etc. If the update rule requires caching values over many
 17 |     iterations, then config will also hold these cached values.
 18 | 
 19 | Returns:
 20 |   - next_w: The next point after the update.
 21 |   - config: The config dictionary to be passed to the next iteration of the
 22 |     update rule.
 23 | 
 24 | NOTE: For most update rules, the default learning rate will probably not perform
 25 | well; however the default values of the other hyperparameters should work well
 26 | for a variety of different problems.
 27 | 
 28 | For efficiency, update rules may perform in-place updates, mutating w and
 29 | setting next_w equal to w.
 30 | '''
 31 | 
 32 | 
 33 | def sgd_th(w, dw, config=None):
 34 |     '''
 35 |     Performs stochastic gradient descent with nesterov momentum,
 36 |     like Torch's optim.sgd:
 37 |     https://github.com/torch/optim/blob/master/sgd.lua
 38 | 
 39 |     config format:
 40 |     - learning_rate: Scalar learning rate.
 41 |     - momentum: Scalar between 0 and 1 giving the momentum value.
 42 |       Setting momentum = 0 reduces to sgd.
 43 |     - nesterov: Boolean to indicate if nesterov momentum should be applied
 44 |     - dampening: default equal to momentum.
 45 |     - weight_decay: apply weight_decay in place.
 46 |     - state_dw: stored gradients for the next update.
 47 |     '''
 48 |     if config is None:
 49 |         config = {}
 50 | 
 51 |     learning_rate = config.get('learning_rate', 1e-2)
 52 |     momentum = config.get('momentum', 0)
 53 |     nesterov = config.get('nesterov', False)
 54 |     dampening = config.get('dampening', 0)
 55 |     weight_decay = config.get('weight_decay', 0)
 56 |     state_dw = config.get('state_dw', None)
 57 |     assert (not nesterov or (momentum > 0 and dampening == 0)
 58 |             ), 'Nesterov momentum requires a momentum and zero dampening'
 59 |     dampening = dampening or momentum
 60 |     dw = dw.copy()
 61 |     if weight_decay:
 62 |         dw += weight_decay * w
 63 | 
 64 |     if momentum:
 65 |         if state_dw is None:
 66 |             state_dw = dw
 67 |         else:
 68 |             state_dw *= momentum
 69 |             state_dw += (1 - dampening) * dw
 70 |         if nesterov:
 71 |             dw = dw + momentum * state_dw
 72 |         else:
 73 |             dw = state_dw
 74 | 
 75 |     next_w = w - learning_rate * dw
 76 | 
 77 |     config['state_dw'] = state_dw
 78 | 
 79 |     return next_w, config
 80 | 
 81 | 
 82 | def nesterov(w, dw, config=None):
 83 |     '''
 84 |     Performs stochastic gradient descent with nesterov momentum.
 85 | 
 86 |     config format:
 87 |     - learning_rate: Scalar learning rate.
 88 |     - momentum: Scalar between 0 and 1 giving the momentum value.
 89 |       Setting momentum = 0 reduces to sgd.
 90 |     - velocity: A numpy array of the same shape as w and dw used to store a moving
 91 |       average of the gradients.
 92 |     '''
 93 |     if config is None:
 94 |         config = {}
 95 |     config.setdefault('learning_rate', 1e-2)
 96 |     config.setdefault('momentum', 0.9)
 97 |     v = config.get('velocity', np.zeros_like(w, dtype=np.float64))
 98 | 
 99 |     next_w = None
100 |     prev_v = v
101 |     v = config['momentum'] * v - config['learning_rate'] * dw
102 |     next_w = w - config['momentum'] * prev_v + (1 + config['momentum']) * v
103 |     config['velocity'] = v
104 | 
105 |     return next_w, config
106 | 
107 | 
108 | def sgd(w, dw, config=None, p=-1):
109 |     '''
110 |     Performs vanilla stochastic gradient descent.
111 | 
112 |     config format:
113 |     - learning_rate: Scalar learning rate.
114 |     '''
115 |     if config is None:
116 |         config = {}
117 |     config.setdefault('learning_rate', 1e-2)
118 | 
119 |     w -= config['learning_rate'] * dw
120 |     return w, config
121 | 
122 | 
123 | def sgd_momentum(w, dw, config=None):
124 |     '''
125 |     Performs stochastic gradient descent with momentum.
126 | 
127 |     config format:
128 |     - learning_rate: Scalar learning rate.
129 |     - momentum: Scalar between 0 and 1 giving the momentum value.
130 |       Setting momentum = 0 reduces to sgd.
131 |     - velocity: A numpy array of the same shape as w and dw used to store a moving
132 |       average of the gradients.
133 |     '''
134 |     if config is None:
135 |         config = {}
136 |     config.setdefault('learning_rate', 1e-2)
137 |     config.setdefault('momentum', 0.9)
138 |     v = config.get('velocity', np.zeros_like(w))
139 | 
140 |     next_w = None
141 |     v = config['momentum'] * v + config['learning_rate'] * dw
142 |     next_w = w - v
143 |     config['velocity'] = v
144 | 
145 |     return next_w, config
146 | 
147 | 
148 | 
149 | def rmsprop(x, dx, config=None):
150 |     '''
151 |     Uses the RMSProp update rule, which uses a moving average of squared gradient
152 |     values to set adaptive per-parameter learning rates.
153 | 
154 |     config format:
155 |     - learning_rate: Scalar learning rate.
156 |     - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
157 |       gradient cache.
158 |     - epsilon: Small scalar used for smoothing to avoid dividing by zero.
159 |     - cache: Moving average of second moments of gradients.
160 |     '''
161 |     if config is None:
162 |         config = {}
163 |     config.setdefault('learning_rate', 1e-2)
164 |     config.setdefault('decay_rate', 0.99)
165 |     config.setdefault('epsilon', 1e-8)
166 |     config.setdefault('cache', np.zeros_like(x))
167 | 
168 |     next_x = None
169 |     cache = config['cache']
170 |     decay_rate = config['decay_rate']
171 |     learning_rate = config['learning_rate']
172 |     cache = decay_rate * cache + (1 - decay_rate) * dx**2
173 |     x += - learning_rate * dx / (np.sqrt(cache) + 1e-8)
174 | 
175 |     config['cache'] = cache
176 |     next_x = x
177 | 
178 |     return next_x, config
179 | 
180 | 
181 | def adam(x, dx, config=None):
182 |     '''
183 |     Uses the Adam update rule, which incorporates moving averages of both the
184 |     gradient and its square and a bias correction term.
185 | 
186 |     config format:
187 |     - learning_rate: Scalar learning rate.
188 |     - beta1: Decay rate for moving average of first moment of gradient.
189 |     - beta2: Decay rate for moving average of second moment of gradient.
190 |     - epsilon: Small scalar used for smoothing to avoid dividing by zero.
191 |     - m: Moving average of gradient.
192 |     - v: Moving average of squared gradient.
193 |     - t: Iteration number.
194 |     '''
195 |     if config is None:
196 |         config = {}
197 |     config.setdefault('learning_rate', 1e-3)
198 |     config.setdefault('beta1', 0.9)
199 |     config.setdefault('beta2', 0.999)
200 |     config.setdefault('epsilon', 1e-8)
201 |     config.setdefault('m', np.zeros_like(x))
202 |     config.setdefault('v', np.zeros_like(x))
203 |     config.setdefault('t', 1)
204 | 
205 |     next_x = None
206 |     m = config['m']
207 |     v = config['v']
208 |     t = config['t']
209 |     beta1 = config['beta1']
210 |     beta2 = config['beta2']
211 | 
212 |     # update parameters
213 |     learning_rate = config['learning_rate']
214 |     epsilon = config['epsilon']
215 |     m = beta1*m + (1-beta1)*dx
216 |     v = beta2*v + (1-beta2)*dx**2
217 |     t = t + 1
218 |     next_x = x - learning_rate*m/(np.sqrt(v) + epsilon)
219 | 
220 |     # Writing back in config
221 |     config['m'] = m
222 |     config['v'] = v
223 |     config['t'] = t
224 | 
225 |     return next_x, config
226 | 


--------------------------------------------------------------------------------
/pyfunt/padding.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Padding(Module):
 6 | 
 7 |     def __init__(self, dim, pad, n_input_dim, value=None, index=None):
 8 |         super(Padding, self).__init__()
 9 |         self.value = value or 0
10 |         self.index = index or 1
11 |         self.dim = [dim] if type(dim) == int else dim
12 |         self.pad = pad if pad > 0 else -pad
13 |         self.n_input_dim = n_input_dim
14 | 
15 |     def update_output(self, x):
16 |         pads = []
17 |         for axis in range(x.ndim):
18 |             if axis in self.dim:
19 |                 pads += [(self.pad, self.pad)]
20 |             else:
21 |                 pads += [(0, 0)]
22 |         pads = tuple(pads)
23 |         self.output = np.pad(x, pads, mode='constant')
24 |         return self.output
25 | 
26 |     def update_grad_input(self, x, grad_output):
27 |         slc = [slice(None)] * x.ndim
28 |         self.grad_input = grad_output
29 |         for axis in range(x.ndim):
30 |             if axis in self.dim:
31 |                 slc[axis] = slice(self.pad, -self.pad)
32 |         self.grad_input = grad_output[slc]
33 |         return self.grad_input
34 | 
35 |     def reset(self):
36 |         pass
37 | 


--------------------------------------------------------------------------------
/pyfunt/parallel.py:
--------------------------------------------------------------------------------
 1 | from container import Container
 2 | 
 3 | 
 4 | class Parallel(Container):
 5 |     """docstring for Parallel"""
 6 |     def __init__(self):
 7 |         super(Parallel, self).__init__()
 8 | 
 9 |     def len(self):
10 |         return len(self.modules)
11 | 
12 |     def add(self,  module):
13 |         pass
14 | 
15 |     def insert(self, modules, module):
16 |         pass
17 | 
18 |     def remove(self, index):
19 |         pass
20 | 
21 |     def update_output(self, x):
22 |         pass
23 | 
24 |     def update_grad_input(self, grad_output):
25 |         pass
26 | 
27 |     def acc_grad_parameters(self, grad_output, scale):
28 |         pass
29 | 
30 |     def backward(self, grad_output, scale):
31 |         pass
32 | 
33 |     def __str__(self):
34 |         pass
35 | 


--------------------------------------------------------------------------------
/pyfunt/relu.py:
--------------------------------------------------------------------------------
1 | from threshold import Threshold
2 | 
3 | 
4 | class ReLU(Threshold):
5 |     def __init__(self, ip=False):
6 |         super(ReLU, self).__init__(0, 0, ip)
7 | 


--------------------------------------------------------------------------------
/pyfunt/reshape.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | 
 3 | 
 4 | class Reshape(Module):
 5 | 
 6 |     def __init__(self, shape):
 7 |         super(Reshape, self).__init__()
 8 |         if type(shape) is not tuple:
 9 |             shape = (shape,)
10 |         self.shape = shape
11 | 
12 |     def update_output(self, x):
13 |         self.output = x.reshape((x.shape[0],) + self.shape)
14 |         return self.output
15 | 
16 |     def update_grad_input(self, x, grad_output):
17 |         self.grad_input = grad_output.reshape(x.shape)
18 |         return self.grad_input
19 | 
20 |     def reset(self):
21 |         pass
22 | 


--------------------------------------------------------------------------------
/pyfunt/sequential.py:
--------------------------------------------------------------------------------
 1 | from container import Container
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Sequential(Container):
 6 | 
 7 |     """docstring for Sequential"""
 8 | 
 9 |     def __init__(self):
10 |         super(Sequential, self).__init__()
11 | 
12 |     def len(self):
13 |         return len(self.modules)
14 | 
15 |     def add(self,  module):
16 |         if len(self.modules) == 0:
17 |             self.grad_input = module.grad_input
18 |         self.modules.append(module)
19 |         self.output = module.output
20 |         return self
21 | 
22 |     def insert(self, module, index=None):
23 |         index = index or len(self.modules) + 1
24 |         if index > len(self.modules) + 1 or index < 1:
25 |             raise Exception('index should be contiguous to existing modules')
26 |         self.modules.insert(module, index)
27 |         self.output = self.modules[len(self.modules)].output
28 |         self.grad_input = self.modules[0].grad_input  # 1??
29 | 
30 |     def remove(self, index):
31 |         if index > len(self.modules) or index < 1:
32 |             raise Exception('index out of range')
33 |         self.modules.remove(index)
34 |         if len(self.modules) > 0:
35 |             self.output = self.modules[-1].output
36 |             self.grad_input = self.modules[0].grad_input
37 |         else:
38 |             self.output = np.ndarray()
39 |             self.grad_input = np.ndarray()
40 | 
41 |     def update_output(self, x):
42 |         current_output = x
43 |         for i in xrange(len(self.modules)):
44 |             current_output = self.rethrow_errors(self.modules[i], i, 'update_output', current_output)
45 |         self.output = current_output
46 |         return self.output
47 | 
48 |     def update_grad_input(self, x, grad_output):
49 |         current_grad_output = grad_output
50 |         current_module = self.modules[-1]
51 |         for i in range(len(self.modules)-2, -1, -1):
52 |             previous_module = self.modules[i]
53 |             current_grad_output = self.rethrow_errors(current_module, i, 'update_grad_input', previous_module.output, current_grad_output)
54 |             current_module = previous_module
55 |         current_grad_output = self.rethrow_errors(current_module, 0, 'update_grad_input', x, current_grad_output)
56 |         self.grad_input = current_grad_output
57 |         return current_grad_output
58 | 
59 |     def acc_grad_parameters(self, x, grad_output, scale=1):
60 |         current_grad_output = grad_output
61 |         current_module = self.modules[-1]
62 |         for i in range(len(self.modules)-2, -1, -1):
63 |             previous_module = self.modules[i]
64 |             self.rethrow_errors(current_module, i, 'acc_grad_parameters', previous_module.output, current_grad_output, scale)
65 |             current_grad_output = current_module.grad_input
66 |             current_module = previous_module
67 |         self.rethrow_errors(current_module, 0, 'acc_grad_parameters', x, current_grad_output, scale)
68 | 
69 |     def backward(self, x, grad_output, scale=1):
70 |         current_grad_output = grad_output
71 |         current_module = self.modules[-1]
72 |         for i in range(len(self.modules)-2, -1, -1):
73 |             previous_module = self.modules[i]
74 |             current_grad_output = self.rethrow_errors(current_module, i, 'backward', previous_module.output, current_grad_output, scale)
75 |             current_module.grad_input[:] = current_grad_output[:]
76 |             current_module = previous_module
77 | 
78 |         current_grad_output = self.rethrow_errors(current_module, 0, 'backward', x, current_grad_output, scale)
79 |         self.grad_input = current_grad_output
80 |         return current_grad_output
81 | 
82 |     def acc_update_grad_parameters(self, x, grad_output, lr):
83 |         current_grad_output = grad_output
84 |         current_module = self.modules[-1]
85 |         for i in range(len(self.modules)-2, -1, -1):
86 |             previous_module = self.modules[i]
87 |             self.rethrow_errors(current_module, i, 'acc_update_grad_parameters', previous_module.output, current_grad_output, lr)
88 |             current_grad_output = current_module.grad_input
89 |             current_module = previous_module
90 |         self.rethrow_errors(current_module, 1, 'acc_update_grad_parameters', x, current_grad_output, lr)
91 | 
92 |     def __str__(self):
93 |         return 'temporary string for Sequential class'
94 | 


--------------------------------------------------------------------------------
/pyfunt/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | from distutils.core import setup
 3 | 
 4 | 
 5 | def configuration(parent_package='', top_path=None):
 6 |     from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
 7 | 
 8 |     config = Configuration('pyfunt', parent_package, top_path)
 9 |     config.add_subpackage('examples')
10 |     config.add_extension('im2col_cyt',
11 |                          sources=[('im2col_cyt.c')],
12 |                          include_dirs=[get_numpy_include_dirs()])
13 | 
14 |     return config
15 | 
16 | if __name__ == '__main__':
17 |     setup(**configuration(top_path='').todict())
18 | 


--------------------------------------------------------------------------------
/pyfunt/sigmoid.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Sigmoid(Module):
 6 | 
 7 |     def __init__(self):
 8 |         super(Sigmoid, self).__init__()
 9 | 
10 |     def update_output(self, x):
11 |         self.output = 1 / (1 + np.exp(-x))
12 |         return self.output
13 | 
14 |     def update_grad_input(self, x, grad_output):
15 |         self.grad_input = grad_output * (1.0 - grad_output)
16 |         return self.grad_input
17 | 


--------------------------------------------------------------------------------
/pyfunt/soft_max.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class SoftMax(Module):
 6 |     """docstring for LogSoftMax"""
 7 |     def __init__(self):
 8 |         super(SoftMax, self).__init__()
 9 | 
10 |     def update_output(self, x):
11 |         max_input = x.max(1, keepdims=True)
12 |         z = np.exp(x - max_input)
13 |         log_sum = np.sum(z, axis=1, keepdims=True)
14 |         # log_sum = max_input + np.log(log_sum)
15 |         self.output = z * 1/log_sum
16 |         return self.output
17 | 
18 |     def update_grad_input(self, x, grad_output):
19 |         _sum = np.sum(grad_output*self.ouput, axis=1, keepdims=True)
20 |         self.grad_input = self.output * (self.grad_output - _sum)
21 | 
22 |         # max_input = x.max(1, keepdims=True)
23 |         # log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True)
24 |         # log_sum = max_input + np.log(log_sum)
25 |         # self.output = x - log_sum
26 | 
27 |         # self.grad_input = grad_output - np.exp(self.output)*_sum
28 |         return self.grad_input
29 | 
30 |     def reset(self):
31 |         pass
32 | 


--------------------------------------------------------------------------------
/pyfunt/solver.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | from datetime import datetime
  4 | import optim
  5 | import os
  6 | import multiprocessing as mp
  7 | import signal
  8 | from copy_reg import pickle
  9 | from types import MethodType
 10 | import sys
 11 | from tqdm import tqdm
 12 | 
 13 | def rel_error(x, y):
 14 |     """ returns relative error """
 15 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
 16 | 
 17 | 
 18 | def _pickle_method(method):
 19 |     '''
 20 |     Helper for multiprocessing ops, for more infos, check answer and comments
 21 |     here:
 22 |     http://stackoverflow.com/a/1816969/1142814
 23 |     '''
 24 |     func_name = method.im_func.__name__
 25 |     obj = method.im_self
 26 |     cls = method.im_class
 27 |     return _unpickle_method, (func_name, obj, cls)
 28 | 
 29 | 
 30 | def _unpickle_method(func_name, obj, cls):
 31 |     '''
 32 |     Helper for multiprocessing ops, for more infos, check answer and comments
 33 |     here:
 34 |     http://stackoverflow.com/a/1816969/1142814
 35 |     '''
 36 |     for cls in cls.mro():
 37 |         try:
 38 |             func = cls.__dict__[func_name]
 39 |         except KeyError:
 40 |             pass
 41 |         else:
 42 |             break
 43 |     return func.__get__(obj, cls)
 44 | 
 45 | 
 46 | def init_worker():
 47 |     '''
 48 |     Permit to interrupt all processes trough ^C.
 49 |     '''
 50 |     signal.signal(signal.SIGINT, signal.SIG_IGN)
 51 | 
 52 | 
 53 | def loss_helper(args):
 54 |     model, criterion, x, y = args
 55 |     preds = model.forward(x)
 56 |     loss = criterion.forward(preds, y)
 57 |     dout = criterion.backward(preds, y)
 58 |     _ = model.backward(x, dout)
 59 |     _, grads = model.get_parameters()
 60 |     return loss, grads
 61 | 
 62 | 
 63 | class Solver(object):
 64 | 
 65 |     '''
 66 |     A Solver encapsulates all the logic necessary for training classification
 67 |     models. The Solver performs stochastic gradient descent using different
 68 |     update rules defined in optim.py.
 69 | 
 70 |     The solver accepts both training and validataion data and labels so it can
 71 |     periodically check classification accuracy on both training and validation
 72 |     data to watch out for overfitting.
 73 | 
 74 |     To train a model, you will first construct a Solver instance, passing the
 75 |     model, dataset, and various optoins (learning rate, batch size, etc) to the
 76 |     constructor. You will then call the train() method to run the optimization
 77 |     procedure and train the model.
 78 | 
 79 |     After the train() method returns, model.params will contain the parameters
 80 |     that performed best on the validation set over the course of training.
 81 |     In addition, the instance variable solver.loss_history will contain a list
 82 |     of all losses encountered during training and the instance variables
 83 |     solver.train_acc_history and solver.val_acc_history will be lists containing
 84 |     the accuracies of the model on the training and validation set at each epoch.
 85 | 
 86 |     Example usage might look something like this:
 87 | 
 88 |     data = {
 89 |       'X_train': # training data
 90 |       'y_train': # training labels
 91 |       'X_val': # validation data
 92 |       'X_train': # validation labels
 93 |     }
 94 |     model = MyAwesomeModel(hidden_size=100, reg=10)
 95 |     solver = Solver(model, data,
 96 |                     update_rule='sgd',
 97 |                     optim_config={
 98 |                       'learning_rate': 1e-3,
 99 |                     },
100 |                     lr_decay=0.95,
101 |                     num_epochs=10, batch_size=100,
102 |                     print_every=100)
103 |     solver.train()
104 | 
105 | 
106 |     A Solver works on a model object that must conform to the following API:
107 | 
108 |     - model.params must be a dictionary mapping string parameter names to numpy
109 |       arrays containing parameter values.
110 | 
111 |     - model.loss(X, y) must be a function that computes training-time loss and
112 |       gradients, and test-time classification scores, with the following inputs
113 |       and outputs:
114 | 
115 |       Inputs:
116 |       - X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k)
117 |       - y: Array of labels, of shape (N,) giving labels for X where y[i] is the
118 |         label for X[i].
119 | 
120 |       Returns:
121 |       If y is None, run a test-time forward pass and return:
122 |       - scores: Array of shape (N, C) giving classification scores for X where
123 |         scores[i, c] gives the score of class c for X[i].
124 | 
125 |       If y is not None, run a training time forward and backward pass and return
126 |       a tuple of:
127 |       - loss: Scalar giving the loss
128 |       - grads: Dictionary with the same keys as self.params mapping parameter
129 |         names to gradients of the loss with respect to those parameters.
130 |     '''
131 | 
132 |     def __init__(self, model, data=None, load_dir=None, **kwargs):
133 |         '''
134 |         Construct a new Solver instance.
135 | 
136 |         Required arguments:
137 |         - model: A model object conforming to the API described above
138 |         - data: A dictionary of training and validation data with the following:
139 |           'X_train': Array of shape (N_train, d_1, ..., d_k) giving training images
140 |           'X_val': Array of shape (N_val, d_1, ..., d_k) giving validation images
141 |           'y_train': Array of shape (N_train,) giving labels for training images
142 |           'y_val': Array of shape (N_val,) giving labels for validation images
143 | 
144 |         Optional arguments: Arguments you also find in the Stanford's
145 |         cs231n assignments' Solver
146 |         - update_rule: A string giving the name of an update rule in optim.py.
147 |           Default is 'sgd_th'.
148 |         - optim_config: A dictionary containing hyperparameters that will be
149 |           passed to the chosen update rule. Each update rule requires different
150 |           hyperparameters (see optim.py) but all update rules require a
151 |           'learning_rate' parameter so that should always be present.
152 |         - lr_decay: A scalar for learning rate decay; after each epoch the learning
153 |           rate is multiplied by this value.
154 |         - batch_size: Size of minibatches used to compute loss and gradient during
155 |           training.
156 |         - num_epochs: The number of epochs to run for during training.
157 |         Custom arguments:
158 |         - load_dir: root directory for the checkpoints folder, if is not False,
159 |           the instance tries to load the most recent checkpoint found in load_dir.
160 |         - path_checkpoints: root directory where the checkpoints folder resides.
161 |         - check_point_every: save a checkpoint every check_point_every epochs.
162 |         - custom_update_ld: optional function to update the learning rate decay
163 |           parameter, if not False the instruction
164 |           self.lr_decay = custom_update_ld(self.epoch) is executed at the and
165 |           of each epoch.
166 |         - acc_check_train_pre_process: optional function to pre-process the
167 |           training subset for checking accuracy on training data.
168 |           If not False acc_check_train_pre_process is called before each
169 |           accuracy check.
170 |         - acc_check_val_pre_process: optional function to pre-process the
171 |           validation data.
172 |           If not False acc_check_val_pre_process is called on the validation
173 |           before each accuracy check.
174 |         - batch_augment_func: optional function to augment the batch data.
175 |           If not False X_batch = batch_augment_func(X_batch) is called before
176 |           each training step.
177 |         - num_processes: optional number of parallel processes for each
178 |           training step. If not 1, at each training/accuracy_check step, each
179 |           batch is divided by the number of processes and losses (and grads)
180 |           are computed in parallel when all processes finish we compute the
181 |           mean for the loss (and grads) and continue as usual.
182 |         '''
183 |         self.model = model
184 |         if data:
185 |             self.X_train = data['X_train']
186 |             self.y_train = data['y_train']
187 |             self.X_val = data['X_val']
188 |             self.y_val = data['y_val']
189 | 
190 |         # Unpack keyword arguments
191 |         self.criterion = kwargs.pop('criterion', None)
192 |         if self.criterion is None:
193 |             raise(Exception('Criterion cannot be None'))
194 | 
195 |         self.update_rule = kwargs.pop('update_rule', 'sgd')
196 |         self.optim_config = kwargs.pop('optim_config', {})
197 |         self.learning_rate = self.optim_config['learning_rate']
198 |         self.lr_decay = kwargs.pop('lr_decay', 1.0)
199 |         self.batch_size = kwargs.pop('batch_size', 100)
200 |         self.num_epochs = kwargs.pop('num_epochs', 10)
201 | 
202 |         # Personal Edits
203 |         self.path_checkpoints = kwargs.pop('path_checkpoints', 'checkpoints')
204 |         self.checkpoint_every = kwargs.pop('checkpoint_every', 0)
205 |         self.check_and_swap_every = kwargs.pop('check_and_swap_every', 0)
206 |         self.silent_train = kwargs.pop('silent_train', False)
207 |         self.custom_update_ld = kwargs.pop('custom_update_ld', False)
208 |         self.acc_check_train_pre_process = kwargs.pop(
209 |             'acc_check_train_pre_process', False)
210 |         self.acc_check_val_pre_process = kwargs.pop(
211 |             'acc_check_val_pre_process', False)
212 |         self.batch_augment_func = kwargs.pop('batch_augment_func', False)
213 |         self.num_processes = kwargs.pop('num_processes', 1)
214 | 
215 |         # Throw an error if there are extra keyword arguments
216 |         if len(kwargs) > 0:
217 |             extra = ', '.join('"%s"' % k for k in kwargs.keys())
218 |             raise ValueError('Unrecognized arguments %s' % extra)
219 | 
220 |         # Make sure the update rule exists, then replace the string
221 |         # name with the actual function
222 |         if not hasattr(optim, self.update_rule):
223 |             raise ValueError('Invalid update_rule "%s"' % self.update_rule)
224 |         self.update_rule = getattr(optim, self.update_rule)
225 |         self._reset()
226 |         if load_dir:
227 |             self.load_dir = load_dir
228 |             self.load_current_checkpoint()
229 | 
230 |     def __str__(self):
231 |         return """
232 |         Number of processes: %d;
233 |         Update Rule: %s;
234 |         Optim Config: %s;
235 |         Learning Rate Decay: %d;
236 |         Batch Size: %d;
237 |         Number of Epochs: %d;
238 |         """ % (
239 |                self.num_processes,
240 |                self.update_rule.__name__,
241 |                str(self.optim_config),
242 |                self.lr_decay,
243 |                self.batch_size,
244 |                self.num_epochs
245 |         )
246 | 
247 |     def _reset(self):
248 |         '''
249 |         Set up some book-keeping variables for optimization. Don't call this
250 |         manually.
251 |         '''
252 |         # Set up some variables for book-keeping
253 |         self.epoch = 0
254 |         self.best_val_acc = 0
255 |         self.best_params = {}
256 |         self.loss_history = []
257 |         self.val_acc_history = []
258 |         self.train_acc_history = []
259 |         self.pbar = None
260 | 
261 |         # Make a deep copy of the optim_config for each parameter
262 |         self.optim_configs = {}
263 |         self.params, self.grad_params = self.model.get_parameters()
264 |         # self.weights, _ = self.model.get_parameters()
265 |         for p in range(len(self.params)):
266 |             d = {k: v for k, v in self.optim_config.iteritems()}
267 |             self.optim_configs[p] = d
268 | 
269 |         self.multiprocessing = bool(self.num_processes-1)
270 |         if self.multiprocessing:
271 |             self.pool = mp.Pool(self.num_processes, init_worker)
272 | 
273 |     def load_current_checkpoint(self):
274 |         '''
275 |         Return the current checkpoint
276 |         '''
277 |         checkpoints = [f for f in os.listdir(
278 |             self.load_dir) if not f.startswith('.')]
279 | 
280 |         try:
281 |             num = max([int(f.split('_')[1]) for f in checkpoints])
282 |             name = 'check_' + str(num)
283 |             try:
284 |                 cp = np.load(
285 |                     os.path.join(self.path_checkpoints, name, name + '.pkl'))
286 |             except:
287 |                 print('sorry, I haven\'t fixed this line, but it should be easy to fix, if you want you can try now and make a pull request')
288 |                 raise()
289 |             # Set up some variables for book-keeping
290 | 
291 |             self.epoch = cp['epoch']
292 |             self.best_val_acc = cp['best_val_acc']
293 |             self.best_params = cp['best_params']
294 |             self.loss_history = cp['loss_history']
295 |             self.val_acc_history = cp['val_acc_history']
296 |             self.train_acc_history = cp['train_acc_history']
297 |             self.model = cp['model']
298 | 
299 |         except Exception, e:
300 |             raise e
301 | 
302 |     def make_check_point(self):
303 |         '''
304 |         Save the solver's current status
305 |         '''
306 |         checkpoints = {
307 |             'model': self.model,
308 |             'epoch': self.epoch,
309 |             'best_params': self.best_params,
310 |             'best_val_acc': self.best_val_acc,
311 |             'loss_history': self.loss_history,
312 |             'val_acc_history': self.val_acc_history,
313 |             'train_acc_history': self.train_acc_history}
314 | 
315 |         name = 'check_' + str(self.epoch)
316 |         directory = os.path.join(self.path_checkpoints, name)
317 |         if not os.path.exists(directory):
318 |             os.makedirs(directory)
319 |         try:
320 |             np.save(checkpoints, os.path.join(
321 |                 directory, name + '.pkl'))
322 |         except:
323 |             print('sorry, I haven\'t fixed this line, but it should be easy to fix, if you want you can try now and make a pull request')
324 |             raise()
325 | 
326 |     def export_model(self, path):
327 |         if not os.path.exists(path):
328 |             os.makedirs(path)
329 |         np.save('%smodel' % path, self.best_params)
330 | 
331 |     def export_histories(self, path):
332 |         if not os.path.exists(path):
333 |             os.makedirs(path)
334 |         i = np.arange(len(self.loss_history)) + 1
335 |         z = np.array(zip(i, i*self.batch_size, self.loss_history))
336 |         np.savetxt(path + 'loss_history.csv', z, delimiter=',', fmt=[
337 |                    '%d', '%d', '%f'], header='iteration, n_images, loss')
338 | 
339 |         i = np.arange(len(self.train_acc_history), dtype=np.int)
340 | 
341 |         z = np.array(zip(i, self.train_acc_history))
342 |         np.savetxt(path + 'train_acc_history.csv', z, delimiter=',', fmt=[
343 |             '%d', '%f'], header='epoch, train_acc')
344 | 
345 |         z = np.array(zip(i, self.val_acc_history))
346 |         np.savetxt(path + 'val_acc_history.csv', z, delimiter=',', fmt=[
347 |             '%d', '%f'], header='epoch, val_acc')
348 |         np.save(path + 'loss', self.loss_history)
349 |         np.save(path + 'train_acc_history', self.train_acc_history)
350 |         np.save(path + 'val_acc_history', self.val_acc_history)
351 | 
352 |     def _loss_helper(self, args):
353 |         x, y = args
354 |         preds = self.model.forward(x)
355 |         loss = self.criterion.forward(preds, y)
356 |         dout = self.criterion.backward(preds, y)
357 |         self.model.backward(x, dout)
358 |         return loss, self.grad_params
359 | 
360 |     def _step(self):
361 |         '''
362 |         Make a single gradient update. This is called by train() and should not
363 |         be called manually.
364 |         '''
365 |         # Make a minibatch of training data
366 |         num_train = self.X_train.shape[0]
367 |         batch_mask = np.random.choice(num_train, self.batch_size)
368 |         X_batch = self.X_train[batch_mask]
369 |         y_batch = self.y_train[batch_mask]
370 | 
371 |         if not self.multiprocessing:
372 |             # pred = model.forward(X_batch)
373 |             # loss = self.criterion.forward(pred, y_batch)
374 |             loss, grads = self._loss_helper((X_batch, y_batch))
375 |         else:
376 |             n = self.num_processes
377 |             pool = self.pool
378 | 
379 |             X_batches = np.split(X_batch, n)
380 |             # sub_weights = np.array([len(x)
381 |             #                         for x in X_batches], dtype=np.float32)
382 |             # sub_weights /= sub_weights.sum()
383 | 
384 |             y_batches = np.split(y_batch, n)
385 |             try:
386 |                 job_args = [(self.model, self.criterion, X_batches[i], y_batches[i]) for i in range(n)]
387 |                 results = pool.map_async(loss_helper, job_args).get()
388 |                 losses = np.zeros(len(results))
389 |                 gradses = []
390 |                 i = 0
391 |                 for i, r in enumerate(results):
392 |                     l, g = r
393 |                     losses[i] = l
394 |                     gradses.append(g)
395 |                     i += 1
396 |             except Exception, e:
397 |                 self.pool.terminate()
398 |                 self.pool.join()
399 |                 raise e
400 |             loss = np.mean(losses)
401 |             grads = []
402 |             for p, w in enumerate(gradses[0]):
403 |                 grad = np.mean([grad[p] for grad in gradses], axis=0)
404 |                 grads.append(grad)
405 |                 self.grad_params[p][:] = grad
406 | 
407 |         self.loss_history.append(loss)
408 |         return loss, grads
409 | 
410 |     def eval_model(self, X, y, num_samples=None, batch_size=100, return_preds=False):
411 |         '''
412 |         Check accuracy of the model on the provided data.
413 | 
414 |         Inputs:
415 |         - X: Array of data, of shape (N, d_1, ..., d_k)
416 |         - y: Array of labels, of shape (N,)
417 |         - num_samples: If not None, subsample the data and only test the model
418 |           on num_samples datapoints. TODO
419 |         - batch_size: Split X and y into batches of this size to avoid using too
420 |           much memory. TODO
421 |         - return_preds: if True returns predictions probabilities
422 | 
423 |         Returns:
424 |         - acc: Scalar giving the fraction of instances that were correctly
425 |           classified by the model.
426 |         '''
427 |         N = X.shape[0]
428 |         batch_size = self.batch_size
429 |         num_batches = N / batch_size
430 |         if N % batch_size != 0:
431 |             num_batches += 1
432 |         y_pred1 = []
433 |         y_pred5 = []
434 |         self.pbar = tqdm(total=N, desc='Accuracy Check', unit='im')
435 |         for i in xrange(num_batches):
436 |             start = i * batch_size
437 |             end = (i + 1) * batch_size
438 | 
439 |             if not self.multiprocessing:
440 |                 scores = self.model.forward(X[start:end])
441 |                 y_pred1.append(np.argmax(scores, axis=1))
442 |                 y_pred5.append(scores.argsort()[-5:][::-1])
443 |             else:
444 |                 n = self.num_processes
445 |                 pool = self.pool
446 |                 X_batches = np.split(X[start:end], n)
447 |                 try:
448 |                     results = pool.map_async(self.model.forward, X_batches).get()
449 |                     scores = np.vstack(results)
450 |                     y_pred1.append(np.argmax(scores, axis=1))
451 |                     y_pred5.append(scores.argsort()[-5:][::-1])
452 | 
453 |                 except Exception, e:
454 |                     self.pool.terminate()
455 |                     self.pool.join()
456 |                     raise e
457 | 
458 |             self.pbar.update(end - start)
459 |         print()
460 |         y_pred1 = np.hstack(y_pred1)
461 |         if return_preds:
462 |             return y_pred1
463 |         acc1 = np.mean(y_pred1 == y)
464 |         acc5 = np.mean(np.any(y_pred5 == y))
465 |         return acc1, acc5
466 | 
467 |     def _check_and_swap(self, it=0):
468 |         '''
469 |         Check accuracy for both X_train[:1000] and X_val.
470 |         '''
471 |         if self.acc_check_train_pre_process:
472 |             X_tr_check = self.acc_check_train_pre_process(self.X_train[:1000])
473 |         else:
474 |             X_tr_check = self.X_train[:1000]
475 |         if self.acc_check_val_pre_process:
476 |             X_val_check = self.acc_check_val_pre_process(self.X_val)
477 |         else:
478 |             X_val_check = self.X_val
479 | 
480 |         train_acc, val_acc = 0, 0
481 | 
482 |         train_acc, _ = self.eval_model(
483 |             X_tr_check, self.y_train[:1000])
484 |         val_acc, _ = self.eval_model(X_val_check, self.y_val)
485 | 
486 |         self.train_acc_history.append(train_acc)
487 |         self.val_acc_history.append(val_acc)
488 | 
489 |         self.emit_sound()
490 |         # Keep track of the best model
491 |         if val_acc > self.best_val_acc:
492 |             self.best_val_acc = val_acc
493 |             # self.best_params = {}
494 |             for p, w in enumerate(self.params):
495 |                 self.best_params[p] = w.copy()
496 |             # for k, v in self.model.params.iteritems():
497 |                 # self.best_params[k] = v.copy()
498 | 
499 |         loss = '%.4f' % self.loss_history[it-1] if it > 0 else '-'
500 |         print('%s - iteration %d: loss:%s, train_acc:%.4f, val_acc: %.4f, best_val_acc: %.4f;\n' % (
501 |             # print('%s - iteration %d: loss:%s, train_acc: %.4f, val_acc: %.4f, best_val_acc: %.4f;\n' % ()
502 |             # str(datetime.now()), it, loss, val_acc, self.best_val_acc)
503 |             str(datetime.now()), it, loss, train_acc, val_acc, self.best_val_acc))
504 | 
505 |     def _new_training_bar(self, total):
506 |         '''
507 |         Create a new loading bar.
508 |         '''
509 |         if not self.silent_train:
510 |             d = 'Epoch %d / %d' % (
511 |                 self.epoch + 1, self.num_epochs)
512 |             self.pbar = tqdm(total=total, desc=d, unit='s.')
513 | 
514 |     def _update_bar(self, amount):
515 |         if not self.silent_train:
516 |             self.pbar.update(amount)
517 | 
518 |     def train(self):
519 |         '''
520 |         Run optimization to train the model.
521 |         '''
522 |         num_train = self.X_train.shape[0]
523 |         iterations_per_epoch = int(np.ceil(num_train / float(self.batch_size)))
524 |         images_per_epochs = iterations_per_epoch * self.batch_size
525 |         num_iterations = self.num_epochs * iterations_per_epoch
526 | 
527 |         print('Training for %d epochs (%d iterations).\n' %
528 |               (self.num_epochs, num_iterations))
529 |         epoch_end = True
530 |         lr_decay_updated = False
531 |         self._check_and_swap()
532 |         self._new_training_bar(images_per_epochs)
533 |         # self.params, self.grad_params = self.model.get_parameters()
534 |         self.best_params = np.copy(self.params)
535 |         for it in xrange(num_iterations):
536 | 
537 |             loss, _ = self._step()
538 | 
539 |             # self.loss_history.append(loss)
540 | 
541 |             # Perform a parameter update
542 |             # self.model.params.iteritems():
543 |             for p, w in enumerate(self.params):
544 |                 dw = self.grad_params[p]
545 |                 config = self.optim_configs[p]
546 |                 next_w, next_config = self.update_rule(w, dw, config)
547 |                 self.params[p][:] = next_w[:]
548 |                 self.optim_configs[p] = next_config
549 | 
550 |             self.pbar.update(self.batch_size)
551 | 
552 |             epoch_end = (it + 1) % iterations_per_epoch == 0
553 | 
554 |             if epoch_end:
555 |                 print()
556 |                 self.epoch += 1
557 | 
558 |                 if self.custom_update_ld:
559 |                     self.lr_decay = self.custom_update_ld(self.epoch)
560 |                     lr_decay_updated = self.lr_decay != 1
561 | 
562 |                 for k in self.optim_configs:
563 |                     self.optim_configs[k]['learning_rate'] *= self.lr_decay
564 | 
565 |                 if self.checkpoint_every and (self.epoch % self.checkpoint_every == 0):
566 |                     self.make_check_point()
567 | 
568 |                 if not self.check_and_swap_every or (self.epoch % self.check_and_swap_every == 0):
569 |                     self._check_and_swap(it)
570 | 
571 |                 finish = it == num_iterations - 1
572 |                 if not finish:
573 |                     if lr_decay_updated:
574 |                         print('learning_rate updated: ', next(
575 |                             self.optim_configs.itervalues())['learning_rate'])
576 |                         lr_decay_updated = False
577 |                     print()
578 |                     self._new_training_bar(images_per_epochs)
579 | 
580 |         # At the end of training swap the best params into the model
581 |         self.params[:] = self.best_params[:]
582 |         if self.multiprocessing:
583 |             try:
584 |                 self.pool.terminate()
585 |                 self.pool.join()
586 |             except:
587 |                 pass
588 | 
589 |     def emit_sound(self):
590 |         '''
591 |         Emit sound when epoch end.
592 |         '''
593 |         sys.stdout.write('\a')
594 | 
595 | 
596 | # again, check http://stackoverflow.com/a/1816969/1142814 and comments
597 | pickle(MethodType, _pickle_method, _unpickle_method)
598 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_average_pooling.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | try:
 5 |     from im2col_cyt import im2col_cython, col2im_cython
 6 | except ImportError:
 7 |     print('Installation broken, please reinstall PyFunt')
 8 | 
 9 | 
10 | class SpatialAveragePooling(Module):
11 | 
12 |     """docstring for SpatialAveragePooling"""
13 | 
14 |     def __init__(self, kW, kH, dW=1, dH=1, padW=0, padH=0):
15 |         super(SpatialAveragePooling, self).__init__()
16 |         self.kW = kW
17 |         self.kH = kH
18 |         self.dW = dW
19 |         self.dH = dH
20 |         self.padW = padW
21 |         self.padH = padH
22 |         self.ceil_mode = False
23 |         self.count_include_pad = True
24 |         self.divide = True
25 | 
26 |     def reset(self):
27 |         #TODO
28 |         pass
29 | 
30 |     def ceil(self):
31 |         self.ceil_mode = True
32 | 
33 |     def floor(self):
34 |         self.ceil_mode = False
35 | 
36 |     def set_count_include_pad(self):
37 |         self.count_include_pad = True
38 | 
39 |     def set_count_exclude_pad(self):
40 |         self.count_include_pad = False
41 | 
42 |     def update_output(self, x):
43 |         N, C, H, W = x.shape
44 |         pool_height, pool_width = self.kW, self.kH
45 |         stride = self.dW
46 | 
47 |         assert (
48 |             H - pool_height) % stride == 0 or H == pool_height, 'Invalid height'
49 |         assert (
50 |             W - pool_width) % stride == 0 or W == pool_width, 'Invalid width'
51 | 
52 |         out_height = int(np.floor((H - pool_height) / stride + 1))
53 |         out_width = int(np.floor((W - pool_width) / stride + 1))
54 | 
55 |         x_split = x.reshape(N * C, 1, H, W)
56 |         x_cols = im2col_cython(
57 |             x_split, pool_height, pool_width, padding=0, stride=stride)
58 |         x_cols_avg = np.mean(x_cols, axis=0)
59 |         out = x_cols_avg.reshape(
60 |             out_height, out_width, N, C).transpose(2, 3, 0, 1)
61 | 
62 |         self.x_shape = x.shape
63 |         self.x_cols = x_cols
64 |         self.output = out
65 |         return self.output
66 | 
67 |     def update_grad_input(self, x, grad_output, scale=1):
68 |         x_cols = self.x_cols
69 |         dout = grad_output
70 |         N, C, H, W = self.x_shape
71 |         pool_height, pool_width = self.kW, self.kH
72 |         stride = self.dW
73 |         pool_dim = pool_height * pool_width
74 | 
75 |         dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
76 |         dx_cols = np.zeros_like(x_cols)
77 |         dx_cols[:, np.arange(dx_cols.shape[1])] = 1. / pool_dim * dout_reshaped
78 |         dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width,
79 |                            padding=0, stride=stride)
80 | 
81 |         self.grad_input = dx.reshape(self.x_shape)
82 | 
83 |         return self.grad_input
84 | 
85 |     def __str__(self):
86 |         pass
87 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_batch_normalitazion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | import numpy as np
 4 | from batch_normalization import BatchNormalization
 5 | 
 6 | 
 7 | class SpatialBatchNormalization(BatchNormalization):
 8 |     n_dim = 4
 9 | 
10 |     def __init__(self, *args):
11 |         super(SpatialBatchNormalization, self).__init__(*args)
12 | 
13 |     def update_output(self, x):
14 |         N, C, H, W = x.shape
15 |         x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C)
16 |         x_flat = np.ascontiguousarray(x_flat, dtype=x.dtype)
17 |         super(SpatialBatchNormalization, self).update_output(x_flat)
18 |         self.output = self.output.reshape(N, H, W, C).transpose(0, 3, 1, 2)
19 |         return self.output
20 | 
21 |     def update_grad_input(self, x, grad_output, scale=1):
22 |         N, C, H, W = grad_output.shape
23 |         dout_flat = grad_output.transpose(0, 2, 3, 1).reshape(-1, C)
24 |         dout_flat = np.ascontiguousarray(dout_flat, dtype=dout_flat.dtype)
25 |         super(SpatialBatchNormalization, self).update_grad_input(x, dout_flat, scale)
26 |         self.grad_input = self.grad_input.reshape(N, H, W, C).transpose(0, 3, 1, 2)
27 |         return self.grad_input
28 | 
29 |     def backward(self, x, grad_output, scale=1):
30 |         return self.update_grad_input(x, grad_output, scale)
31 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_convolution.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from module import Module
  5 | import numpy as np
  6 | try:
  7 |     from im2col_cyt import col2im_cython
  8 | except ImportError:
  9 |     print('Installation broken, please reinstall PyFunt')
 10 | 
 11 | 
 12 | class SpatialConvolution(Module):
 13 | 
 14 |     n_dim = 2
 15 | 
 16 |     def __init__(self, n_input_plane, n_output_plane, kW, kH, dW=1, dH=1, padW=0, padH=0):
 17 |         super(SpatialConvolution, self).__init__()
 18 | 
 19 |         self.n_input_plane = n_input_plane
 20 |         self.n_output_plane = n_output_plane
 21 |         self.kW = kW
 22 |         self.kH = kH
 23 | 
 24 |         self.dW = dW
 25 |         self.dH = dH
 26 |         self.padW = padW
 27 |         self.padH = padH or self.padW
 28 | 
 29 |         self.weight = np.ndarray((n_output_plane, n_input_plane, kH, kW))
 30 |         self.bias = np.ndarray(n_output_plane)
 31 |         self.grad_weight = np.ndarray((n_output_plane, n_input_plane, kH, kW))
 32 |         self.grad_bias = np.ndarray(n_output_plane)
 33 | 
 34 |         self.reset()
 35 | 
 36 |     def no_bias(self):
 37 |         self.bias = None
 38 |         self.grad_bias = None
 39 | 
 40 |     def reset(self, stdv=None):
 41 |         if not stdv:
 42 |             stdv = 1/np.sqrt(self.kW*self.kH*self.n_input_plane)
 43 |         self.weight = np.random.normal(
 44 |             0, stdv, (self.n_output_plane, self.n_input_plane, self.kH, self.kW))
 45 |         self.bias = np.zeros(self.n_output_plane)
 46 | 
 47 |     def check_input_dim(self, x):
 48 |         pass
 49 | 
 50 |     def make_contigous(self, input, grad_output):
 51 |         pass
 52 | 
 53 |     def update_output(self, x):
 54 |         w, b = self.weight, self.bias
 55 |         # input = make_contigous (input)N, C, H, W = x.shape
 56 |         self.x_shape = N, C, H, W = x.shape
 57 | 
 58 |         F, _, HH, WW = w.shape
 59 |         stride, pad = self.dW, self.padW
 60 |         #assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
 61 |         #assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 62 | 
 63 |         p = pad
 64 |         x_padded = np.pad(
 65 |             x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 66 | 
 67 |         self.tiles_w = (W + (2 * pad) - WW) % stride
 68 |         self.tiles_h = (H + (2 * pad) - HH) % stride
 69 |         if not self.tiles_w == 0:
 70 |             x_padded = x_padded[:, :, :, :-self.tiles_w]
 71 |         if not self.tiles_h == 0:
 72 |             x_padded = x_padded[:, :, :-self.tiles_h, :]
 73 | 
 74 |         N, C, H, W = x_padded.shape
 75 |         if (W + (2 * pad) - WW) % stride != 0:
 76 |             raise Exception('width does not work')
 77 | 
 78 |         # H += 2 * pad
 79 |         # W += 2 * pad
 80 |         out_h = (H - HH) / stride + 1
 81 |         out_w = (W - WW) / stride + 1
 82 | 
 83 |         # Perform an im2col operation by picking clever strides
 84 |         shape = (C, HH, WW, N, out_h, out_w)
 85 |         strides = (H * W, W, 1, C * H * W, stride * W, stride)
 86 |         strides = x.itemsize * np.array(strides)
 87 |         x_stride = np.lib.stride_tricks.as_strided(x_padded,
 88 |                                                    shape=shape, strides=strides)
 89 |         x_cols = np.ascontiguousarray(x_stride)
 90 |         x_cols.shape = (C * HH * WW, N * out_h * out_w)
 91 | 
 92 |         # Now all our convolutions are a big matrix multiply
 93 |         res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
 94 | 
 95 |         res.shape = (F, N, out_h, out_w)
 96 |         out = res.transpose(1, 0, 2, 3)
 97 | 
 98 |         self.output = np.ascontiguousarray(out)
 99 | 
100 |         self.x_cols = x_cols
101 |         return self.output
102 | 
103 |     def update_grad_input(self, input, grad_output, scale=1):
104 |         x_shape, x_cols = self.x_shape, self.x_cols
105 |         w = self.weight
106 | 
107 |         stride, pad = self.dW, self.padW
108 | 
109 |         N, C, H, W = x_shape
110 |         F, _, HH, WW = w.shape
111 |         _, _, out_h, out_w = grad_output.shape
112 | 
113 |         self.grad_bias[:] = np.sum(grad_output, axis=(0, 2, 3))[:]
114 | 
115 |         dout_reshaped = grad_output.transpose(1, 0, 2, 3).reshape(F, -1)
116 |         self.grad_weight[:] = dout_reshaped.dot(x_cols.T).reshape(w.shape)[:]
117 | 
118 |         dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
119 |         #dx_cols.shape = (C, HH, WW, N, out_h, out_w)
120 |         # dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
121 |         dx = col2im_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
122 |         self.grad_input = dx
123 |         return dx
124 | 
125 |     def type(self, type, cache):
126 |         pass
127 | 
128 |     def __str__(self):
129 |         pass
130 | 
131 |     def clear_state(self):
132 |         pass
133 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_full_convolution.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | from module import Module
  5 | import numpy as np
  6 | try:
  7 |     from im2col_cyt import col2im_cython
  8 | except ImportError:
  9 |     print('Installation broken, please reinstall PyFunt')
 10 | 
 11 | 
 12 | class SpatialFullConvolution(Module):
 13 | 
 14 |     '''implementation of layer described in https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf"'''
 15 |     n_dim = 2
 16 | 
 17 |     def __init__(self, n_input_plane, n_output_plane, kW, kH, dW=1, dH=1, padW=0, padH=0, adjW=0, adjH=0):
 18 |         super(SpatialFullConvolution, self).__init__()
 19 | 
 20 |         self.n_input_plane = n_input_plane
 21 |         self.n_output_plane = n_output_plane
 22 |         self.kW = kW
 23 |         self.kH = kH
 24 | 
 25 |         self.dW = dW
 26 |         self.dH = dH
 27 |         if padH != padW or dH != dW:
 28 |             raise Exception('padH != padW or dH != dW, behaviout not implemented ')
 29 |         self.padW = padW
 30 |         self.padH = padH or self.padW
 31 |         self.adjW = adjW
 32 |         self.adjH = adjH
 33 | 
 34 |         if self.adjW > self.dW - 1 or self.adjH > self.dH - 1:
 35 |             raise Exception(
 36 |                 'adjW and adjH must be smaller than self.dW - 1 and self.dH - 1 respectively')
 37 | 
 38 |         self.weight = np.ndarray((n_input_plane, n_output_plane, kH, kW))
 39 |         self.bias = np.ndarray(n_output_plane)
 40 |         self.grad_weight = np.ndarray((n_input_plane, n_output_plane, kH, kW))
 41 |         self.grad_bias = np.ndarray(n_output_plane)
 42 | 
 43 |         self.reset()
 44 | 
 45 |     def no_bias(self):
 46 |         self.bias = None
 47 |         self.grad_bias = None
 48 | 
 49 |     def reset(self, stdv=None):
 50 |         if not stdv:
 51 |             stdv = 1/np.sqrt(self.kW*self.kH*self.n_input_plane)
 52 |         self.weight = np.random.normal(
 53 |             0, stdv, (self.n_output_plane, self.n_input_plane, self.kH, self.kW))
 54 |         self.bias = np.zeros(self.n_output_plane)
 55 | 
 56 |     def check_input_dim(self, x):
 57 |         pass
 58 | 
 59 |     def make_contigous(self, input, grad_output):
 60 |         pass
 61 | 
 62 |     def calcula_adj(self, target_size, ker, pad, stride):
 63 |         return (target_size + 2 * pad - ker) % stride
 64 | 
 65 |     def update_output(self, x):
 66 | 
 67 |         w = self.weight
 68 |         F, FF, HH, WW = w.shape
 69 | 
 70 |         stride, pad = self.dW, self.padW
 71 |         N, in_C, inH, inW = x.shape
 72 |         C = self.n_output_plane
 73 |         W = (inW - 1) * self.dW - 2*self.padW + WW  # x_shape
 74 |         H = (inH - 1) * self.dH - 2*self.padH + HH  # x_shape
 75 |         _, _, in_h, in_w = x.shape
 76 |         #assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 77 |         x_reshaped = x.transpose(1, 0, 2, 3).reshape(F, -1)
 78 |         out_cols = w.reshape(F, -1).T.dot(x_reshaped)
 79 |         # out_cols.shape = (C, HH, WW, N, in_h, in_w)
 80 |         self.output = col2im_cython(out_cols, N, C, H, W, HH, WW, pad, stride)
 81 |         self.output += self.bias.reshape(1, -1, 1, 1)
 82 |         if self.adjH:
 83 |             self.output = np.pad(
 84 |                 self.output, ((0, 0), (0, 0), (0, self.adjH), (0, 0)), mode='constant')
 85 |         if self.adjW:
 86 |             self.output = np.pad(
 87 |                 self.output, ((0, 0), (0, 0), (0, 0), (0, self.adjW)), mode='constant')
 88 |         return self.output
 89 | 
 90 | 
 91 | 
 92 |         # w, b = self.weight, self.bias
 93 |         # # input = make_contigous (input)N, C, H, W = x.shape
 94 |         # N, C, H, W = x.shape
 95 |         # outW = (W - 1) * self.dW - 2*self.padW + self.kW + self.adjW
 96 | 
 97 |         # F, FF, HH, WW = w.shape
 98 |         # stride, pad = self.dW, self.padW
 99 | 
100 |         # p = pad
101 |         # x = np.pad(
102 |         #     x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
103 | 
104 |         # self.tiles_w = (W + 2 * pad - WW) % stride
105 |         # self.tiles_h = (H + 2 * pad - HH) % stride
106 | 
107 |         # stride, pad = self.dW, self.padW
108 | 
109 |         # out_w  = (W - 1) * self.dW - 2*self.padW + WW;
110 |         # out_h = (H - 1) * self.dH - 2*self.padH + HH;
111 |         # _, _, out_h, out_w = x.shape
112 |         # import pdb; pdb.set_trace()
113 |         # x_reshaped = x.transpose(1, 0, 2, 3).reshape(F, -1)
114 |         # out_cols = w.reshape(F, -1).T.dot(x_reshaped)# + b.reshape(-1, 1)
115 |         # out_cols.shape = (self.n_output_plane, -1)
116 |         # b_reshaped = b.reshape(self.n_output_plane, -1)
117 |         # out_cols += b_reshaped
118 |         # out_cols.shape = (self.n_output_plane, HH, WW, N, out_h, out_w)
119 |         # #out_cols.shape = (C, HH, WW, N, out_h, out_w)
120 |         # self.output = col2im_6d_cython(out_cols, N, self.n_output_plane, H, W, HH, WW, pad, stride)
121 | 
122 |         # if self.output.shape[1] != self.n_output:
123 |         #     import pdb; pdb.set_trace()
124 | 
125 |     def update_grad_input(self, input, grad_output, scale=1):
126 |         raise NotImplementedError
127 |         # TODO THIS IS BROKEN FIXME PLEASE :()
128 |         w = self.bias
129 |         F, _, HH, WW = w.shape
130 |         stride = self.stride
131 | 
132 |         if not self.adjH == 0:
133 |             grad_output = grad_output[:, :, :-self.adjH, :]
134 |         if not self.adjW == 0:
135 |             grad_output = grad_output[:, :, :, :-self.adjW]
136 | 
137 |         N, C, H, W = grad_output.shape
138 | 
139 |         # H += 2 * pad
140 |         # W += 2 * pad
141 |         out_h = (H - HH) / stride + 1
142 |         out_w = (W - WW) / stride + 1
143 | 
144 |         # Perform an im2col operation by picking clever strides
145 |         shape = (C, HH, WW, N, out_h, out_w)
146 |         strides = (H * W, W, 1, C * H * W, stride * W, stride)
147 |         strides = grad_output.itemsize * np.array(strides)
148 |         dout_stride = np.lib.stride_tricks.as_strided(
149 |             grad_output, shape=shape, strides=strides)
150 |         dout_cols = np.ascontiguousarray(dout_stride)
151 |         dout_cols.shape = (C * HH * WW, N * out_h * out_w)
152 | 
153 |         # Now all our convolutions are a big matrix multiply
154 |         res = w.reshape(F, -1).dot(dout_cols)
155 | 
156 |         res.shape = (F, N, out_h, out_w)
157 |         out = res.transpose(1, 0, 2, 3)
158 | 
159 |         self.grad_input = np.ascontiguousarray(out)
160 |         return self.grad_input
161 | 
162 |     def type(self, type, cache):
163 |         pass
164 | 
165 |     def __str__(self):
166 |         pass
167 | 
168 |     def clear_state(self):
169 |         pass
170 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_max_pooling.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | try:
 4 |     from im2col_cyt import im2col_cython, col2im_cython
 5 | except ImportError:
 6 |     print('Installation broken, please reinstall PyFunt')
 7 | 
 8 | 
 9 | class SpatialMaxPooling(Module):
10 | 
11 |     """docstring for SpatialMaxPooling"""
12 | 
13 |     def __init__(self, kW, kH, dW=1, dH=1, padW=0, padH=0):
14 |         super(SpatialMaxPooling, self).__init__()
15 |         self.kW = kW
16 |         self.kH = kH
17 |         self.dW = dW
18 |         self.dH = dH
19 |         self.padW = padW
20 |         self.padH = padH
21 |         self.ceil_mode = False
22 |         self.count_include_pad = True
23 |         self.divide = True
24 | 
25 |     def ceil(self):
26 |         # TODO:
27 |         self.ceil_mode = True
28 | 
29 |     def floor(self):
30 |         # TODO:
31 |         self.ceil_mode = False
32 | 
33 |     def set_count_include_pad(self):
34 |         # TODO:
35 |         self.count_include_pad = True
36 | 
37 |     def set_count_exclude_pad(self):
38 |         # TODO:
39 |         self.count_include_pad = False
40 | 
41 |     def update_output(self, x):
42 |         N, C, H, W = x.shape
43 |         pool_height, pool_width = self.kW, self.kH
44 |         stride = self.dW
45 | 
46 |         assert (H - pool_height) % stride == 0, 'Invalid height'
47 |         assert (W - pool_width) % stride == 0, 'Invalid width'
48 | 
49 |         out_height = (H - pool_height) / stride + 1
50 |         out_width = (W - pool_width) / stride + 1
51 | 
52 |         x_split = x.reshape(N * C, 1, H, W)
53 |         x_cols = im2col_cython(
54 |             x_split, pool_height, pool_width, padding=0, stride=stride)
55 |         x_cols_argmax = np.argmax(x_cols, axis=0)
56 |         x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
57 |         out = x_cols_max.reshape(
58 |             out_height, out_width, N, C).transpose(2, 3, 0, 1)
59 | 
60 |         self.x_shape = x.shape
61 |         self.x_cols = x_cols
62 |         self.x_cols_argmax = x_cols_argmax
63 |         self.output = out
64 |         return self.output
65 | 
66 |     def update_grad_input(self, x, grad_output, scale=1):
67 |         x_cols = self.x_cols
68 |         x_cols_argmax = self.x_cols_argmax
69 |         dout = grad_output
70 |         N, C, H, W = x.shape
71 |         pool_height, pool_width = self.kW, self.kH
72 |         stride = self.dW
73 | 
74 |         dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
75 |         dx_cols = np.zeros_like(x_cols)
76 |         dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
77 |         dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width,
78 |                            padding=0, stride=stride)
79 |         dx = dx.reshape(self.x_shape)
80 |         self.grad_input = dx
81 |         return self.grad_input
82 | 
83 |     def reset(self):
84 |         pass
85 | 
86 |     def __str__(self):
87 |         pass
88 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_reflection_padding.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class SpatialReflectionPadding(Module):
 6 | 
 7 |     def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None):
 8 |         super(SpatialReflectionPadding, self).__init__()
 9 |         self.pad_l = pad_l
10 |         self.pad_r = pad_r or self.pad_l
11 |         self.pad_t = pad_t or self.pad_l
12 |         self.pad_b = pad_b or self.pad_l
13 | 
14 |     def update_output(self, x):
15 |         if x.ndim == 3:
16 |             self.output = np.pad(
17 |                 x, ((0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'reflect')
18 |         elif x.ndim == 4:
19 |             self.output = np.pad(
20 |                 x, ((0, 0), (0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'reflect')
21 | 
22 |         else:
23 |             raise Exception('input must be 3 or 4-dimensional')
24 |         return self.output
25 | 
26 |     def update_grad_input(self, x, grad_output):
27 |         if x.ndim == grad_output.ndim == 3:
28 |             if not (x.shape[0] == grad_output.shape[0] and
29 |                     x.shape[1] + self.pad_t + self.pad_b == grad_output.shape[1] and
30 |                     x.shape[2] + self.pad_l + self.pad_r == grad_output.shape[2]):
31 |                 raise Exception('input and gradOutput must be compatible in size')
32 |             self.grad_input = grad_output[:, self.pad_t:self.pad_b, self.pad_l:self.pad_r]
33 |         elif x.ndim == grad_output.ndim == 4:
34 |             if not (x.shape[0] == grad_output.shape[0] and
35 |                     x.shape[1] == grad_output.shape[1] and
36 |                     x.shape[2] + self.pad_t + self.pad_b == grad_output.shape[2] and
37 |                     x.shape[3] + self.pad_l + self.pad_r == grad_output.shape[3]):
38 |                 raise Exception('input and gradOutput must be compatible in size')
39 |             self.grad_input = grad_output[:, :, self.pad_t:self.pad_b, self.pad_l:self.pad_r]
40 |         else:
41 |             raise Exception(
42 |                 'input and gradOutput must be 3 or 4-dimensional and have equal number of dimensions')
43 |         return self.grad_input
44 | 
45 |     def __str__(self):
46 |         return str(type(self)) + '(l=%d, r=%d, t=%d, b=%d)' % (self.pad_l, self.pad_r, self.pad_t, self.pad_b)
47 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_replication_padding.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class SpatialReplicationPadding(Module):
 6 | 
 7 |     def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None):
 8 |         super(SpatialReplicationPadding, self).__init__()
 9 |         self.pad_l = pad_l
10 |         self.pad_r = pad_r or self.pad_l
11 |         self.pad_t = pad_t or self.pad_l
12 |         self.pad_b = pad_b or self.pad_l
13 | 
14 |     def update_output(self, x):
15 |         if x.ndim == 3:
16 |             self.output = np.pad(
17 |                 x, ((0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'edge')
18 |         elif x.ndim == 4:
19 |             self.output = np.pad(
20 |                 x, ((0, 0), (0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'edge')
21 | 
22 |         else:
23 |             raise Exception('input must be 3 or 4-dimensional')
24 |         return self.output
25 | 
26 |     def update_grad_input(self, x, grad_output):
27 |         if x.ndim == grad_output.ndim == 3:
28 |             if not (x.shape[0] == grad_output.shape[0] and
29 |                     x.shape[1] + self.pad_t + self.pad_b == grad_output.shape[1] and
30 |                     x.shape[2] + self.pad_l + self.pad_r == grad_output.shape[2]):
31 |                 raise Exception('input and gradOutput must be compatible in size')
32 |             self.grad_input = grad_output[:, self.pad_t:self.pad_b, self.pad_l:self.pad_r]
33 |         elif x.ndim == grad_output.ndim == 4:
34 |             if not (x.shape[0] == grad_output.shape[0] and
35 |                     x.shape[1] == grad_output.shape[1] and
36 |                     x.shape[2] + self.pad_t + self.pad_b == grad_output.shape[2] and
37 |                     x.shape[3] + self.pad_l + self.pad_r == grad_output.shape[3]):
38 |                 raise Exception('input and gradOutput must be compatible in size')
39 |             self.grad_input = grad_output[:, :, self.pad_t:self.pad_b, self.pad_l:self.pad_r]
40 |         else:
41 |             raise Exception(
42 |                 'input and gradOutput must be 3 or 4-dimensional and have equal number of dimensions')
43 |         return self.grad_input
44 | 
45 |     def __str__(self):
46 |         return str(type(self)) + '(l=%d, r=%d, t=%d, b=%d)' % (self.pad_l, self.pad_r, self.pad_t, self.pad_b)
47 | 


--------------------------------------------------------------------------------
/pyfunt/spatial_up_sampling_nearest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | from module import Module
 4 | import numpy as np
 5 | try:
 6 |     from im2col_cyt import im2col_cython, col2im_cython
 7 | except ImportError:
 8 |     print('Installation broken, please reinstall PyFunt')
 9 | 
10 | from numpy.lib.stride_tricks import as_strided
11 | 
12 | 
13 | def tile_array(a, b1, b2):
14 |     r, c = a.shape
15 |     rs, cs = a.strides
16 |     x = as_strided(a, (r, b1, c, b2), (rs, 0, cs, 0))
17 |     return x.reshape(r*b1, c*b2)
18 | 
19 | 
20 | class SpatialUpSamplingNearest(Module):
21 | 
22 |     def __init__(self, scale):
23 |         super(SpatialUpSamplingNearest, self).__init__()
24 |         self.scale_factor = scale
25 |         if self.scale_factor < 1:
26 |             raise Exception('scale_factor must be greater than 1')
27 |         if np.floor(self.scale_factor) != self.scale_factor:
28 |             raise Exception('scale_factor must be integer')
29 | 
30 |     def update_output(self, x):
31 |         out_size = x.shape
32 |         out_size[x.ndim - 1] *= self.scale_factor
33 |         out_size[x.ndim - 2] *= self.scale_factor
34 |         N, C, H, W = out_size
35 | 
36 |         stride = self.scale_factor
37 |         pool_height = pool_width = stride
38 | 
39 |         x_reshaped = x.transpose(2, 3, 0, 1).flatten()
40 |         out_cols = np.zeros(out_size)
41 |         out_cols[:, np.arange(out_cols.shape[1])] = x_reshaped
42 |         out = col2im_cython(out_cols, N * C, 1, H, W, pool_height, pool_width,
43 |                             padding=0, stride=stride)
44 |         out = out.reshape(out_size)
45 |         return self.grad_input
46 | 
47 |         return self.output
48 | 
49 |     def update_grad_input(self, x, grad_output, scale=1):
50 | 
51 |         N, C, H, W = grad_output.shape
52 |         pool_height = pool_width = self.scale_factor
53 |         stride = self.scale_factor
54 | 
55 |         out_height = (H - pool_height) / stride + 1
56 |         out_width = (W - pool_width) / stride + 1
57 | 
58 |         grad_output_split = grad_output.reshape(N * C, 1, H, W)
59 |         grad_output_cols = im2col_cython(
60 |             grad_output_split, pool_height, pool_width, padding=0, stride=stride)
61 |         grad_intput_cols = grad_output_cols[0, np.arange(grad_output_cols.shape[1])]
62 |         grad_input = grad_intput_cols.reshape(
63 |             out_height, out_width, N, C).transpose(2, 3, 0, 1)
64 | 
65 |         self.output = grad_input
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/pyfunt/tanh.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Tanh(Module):
 6 | 
 7 |     def __init__(self, th=1e-6, v=0, ip=False):
 8 |         super(Tanh, self).__init__()
 9 |         self.th = th
10 |         self.val = v
11 |         self.inplace = ip
12 | 
13 |     def update_output(self, x):
14 |         self.output = np.tanh(x)
15 |         return self.output
16 | 
17 |     def update_grad_input(self, x, grad_output):
18 |         self.grad_input = grad_output * (1 - np.power(self.output, 2))
19 |         return self.grad_input
20 | 


--------------------------------------------------------------------------------
/pyfunt/threshold.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Threshold(Module):
 6 | 
 7 |     def __init__(self, th=1e-6, v=0, ip=False):
 8 |         super(Threshold, self).__init__()
 9 |         self.th = th
10 |         self.val = v
11 |         self.inplace = ip
12 | 
13 |     def update_output(self, x):
14 |         self.output = np.maximum(self.th, x)
15 |         return self.output
16 | 
17 |     def update_grad_input(self, x, grad_output):
18 |         dx = np.array(grad_output, copy=True)
19 |         dx[x <= 0] = 0
20 |         self.grad_input = dx
21 |         return self.grad_input
22 | 
23 |     def validate_parameters(self):
24 |         if self.inplace:
25 |             if self.val > self.th:
26 |                 raise Exception('in-place processing requires value not exceed threshold')
27 | 
28 |     def reset(self):
29 |         pass
30 | 


--------------------------------------------------------------------------------
/pyfunt/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from load_torch_model import (load_t7model, load_t7checkpoint, load_parser_init, load_parser_vals)
2 | from gradient_check import eval_numerical_gradient_array
3 | from . import *
4 | 


--------------------------------------------------------------------------------
/pyfunt/utils/gradient_check.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import randrange
  3 | 
  4 | 
  5 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  6 |     '''
  7 |     a naive implementation of numerical gradient of f at x
  8 |     - f should be a function that takes a single argument
  9 |     - x is the point (numpy array) to evaluate the gradient at
 10 |     '''
 11 |     grad = np.zeros_like(x)
 12 |     # iterate over all indexes in x
 13 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 14 |     while not it.finished:
 15 | 
 16 |         # evaluate function at x+h
 17 |         ix = it.multi_index
 18 |         oldval = x[ix]
 19 |         x[ix] = oldval + h  # increment by h
 20 |         fxph = f(x)  # evalute f(x + h)
 21 |         x[ix] = oldval - h
 22 |         fxmh = f(x)  # evaluate f(x - h)
 23 |         x[ix] = oldval  # restore
 24 | 
 25 |         # compute the partial derivative with centered formula
 26 |         grad[ix] = (fxph - fxmh) / (2 * h)  # the slope
 27 |         if verbose:
 28 |             print(x), grad[ix]
 29 |         it.iternext()  # step to next dimension
 30 | 
 31 |     return grad
 32 | 
 33 | 
 34 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 35 |     '''
 36 |     Evaluate a numeric gradient for a function that accepts a numpy
 37 |     array and returns a numpy array.
 38 |     '''
 39 |     grad = np.zeros_like(x)
 40 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 41 |     while not it.finished:
 42 |         ix = it.multi_index
 43 | 
 44 |         oldval = x[ix]
 45 |         x[ix] = oldval + h
 46 |         pos = f(x).copy()
 47 |         x[ix] = oldval - h
 48 |         neg = f(x).copy()
 49 |         x[ix] = oldval
 50 | 
 51 |         grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 52 |         it.iternext()
 53 |     return grad
 54 | 
 55 | 
 56 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 57 |     '''
 58 |     Compute numeric gradients for a function that operates on input
 59 |     and output blobs.
 60 | 
 61 |     We assume that f accepts several input blobs as arguments, followed by a blob
 62 |     into which outputs will be written. For example, f might be called like this:
 63 | 
 64 |     f(x, w, out)
 65 | 
 66 |     where x and w are input Blobs, and the result of f will be written to out.
 67 | 
 68 |     Inputs:
 69 |     - f: function
 70 |     - inputs: tuple of input blobs
 71 |     - output: output blob
 72 |     - h: step size
 73 |     '''
 74 |     numeric_diffs = []
 75 |     for input_blob in inputs:
 76 |         diff = np.zeros_like(input_blob.diffs)
 77 |         it = np.nditer(input_blob.vals, flags=['multi_index'],
 78 |                        op_flags=['readwrite'])
 79 |         while not it.finished:
 80 |             idx = it.multi_index
 81 |             orig = input_blob.vals[idx]
 82 | 
 83 |             input_blob.vals[idx] = orig + h
 84 |             f(*(inputs + (output,)))
 85 |             pos = np.copy(output.vals)
 86 |             input_blob.vals[idx] = orig - h
 87 |             f(*(inputs + (output,)))
 88 |             neg = np.copy(output.vals)
 89 |             input_blob.vals[idx] = orig
 90 | 
 91 |             diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 92 | 
 93 |             it.iternext()
 94 |         numeric_diffs.append(diff)
 95 |     return numeric_diffs
 96 | 
 97 | 
 98 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
 99 |     return eval_numerical_gradient_blobs(lambda *args: net.forward(),
100 |                                          inputs, output, h=h)
101 | 
102 | 
103 | def grad_check_sparse(f, x, analytic_grad, num_checks):
104 |     '''
105 |     sample a few random elements and only return numerical
106 |     in this dimensions.
107 |     '''
108 |     h = 1e-5
109 | 
110 |     x.shape
111 |     for i in xrange(num_checks):
112 |         ix = tuple([randrange(m) for m in x.shape])
113 | 
114 |         oldval = x[ix]
115 |         x[ix] = oldval + h  # increment by h
116 |         fxph = f(x)  # evaluate f(x + h)
117 |         x[ix] = oldval - h  # increment by h
118 |         fxmh = f(x)  # evaluate f(x - h)
119 |         x[ix] = oldval  # reset
120 | 
121 |         grad_numerical = (fxph - fxmh) / (2 * h)
122 |         grad_analytic = analytic_grad[ix]
123 |         rel_error = abs(grad_numerical - grad_analytic) / \
124 |             (abs(grad_numerical) + abs(grad_analytic))
125 |         print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error))
126 | 


--------------------------------------------------------------------------------
/pyfunt/utils/load_torch_model.py:
--------------------------------------------------------------------------------
  1 | import torchfile
  2 | import pyfunt
  3 | import pdb
  4 | import re
  5 | 
  6 | please_contribute = 'If you want you can fix it and make a pull request ;)'
  7 | 
  8 | 
  9 | '''
 10 | <Layer>_init (module) takes a dict for the torch layer and returns a tuple
 11 | containing the values for the pyfunt layer initialization funciton.
 12 | Once you wrote the function, add the reation in the load_parser_init dict.
 13 | The same mechanism goes for the layer values using load_parser_vals dictt
 14 | (gard input, output, weight, bias already get added).
 15 | '''
 16 | 
 17 | 
 18 | def conv_init(m):
 19 |     return m['nInputPlane'], m['nOutputPlane'], m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH']
 20 | 
 21 | 
 22 | def dropout_init(m):
 23 |     return m['p'], not m['v2']
 24 | 
 25 | 
 26 | def linear_init(m):
 27 |     return m['weight'].shape[1], m['weight'].shape[0], len(m['bias']) != 0
 28 | 
 29 | 
 30 | def mul_constant_init(m):
 31 |     return (m['constant_scalar'],)
 32 | 
 33 | 
 34 | def relu_init(m):
 35 |     return (m['inplace'],)
 36 | 
 37 | 
 38 | def spatial_max_pooling_init(m):
 39 |     return m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH']
 40 | 
 41 | 
 42 | def spatial_batch_normalization_init(m):
 43 |     return len(m['running_mean']), m['eps'], m['momentum'], len(m['weight']) > 0
 44 | 
 45 | 
 46 | def spatial_average_pooling_init(m):
 47 |     return m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH']
 48 | 
 49 | 
 50 | def spatial_full_convolution_init(m):
 51 |     return m['nInputPlane'], m['nOutputPlane'], m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH'], m['adjW'], m['adjH']
 52 | 
 53 | 
 54 | def spatial_padding_init(m):
 55 |     return m['pad_l'], m['pad_r'], m['pad_t'], m['pad_b']
 56 | 
 57 | 
 58 | def view_init(m):
 59 |     return (m['size'],)
 60 | 
 61 | 
 62 | load_parser_init = {
 63 |     'Dropout': dropout_init,
 64 |     'Linear': linear_init,
 65 |     'MulConstant': mul_constant_init,
 66 |     'ReLU': relu_init,
 67 |     'SpatialConvolution': conv_init,
 68 |     'SpatialMaxPooling': spatial_max_pooling_init,
 69 |     'SpatialAvergaePooling': spatial_average_pooling_init,
 70 |     'SpatialBatchNormalization': spatial_batch_normalization_init,
 71 |     'SpatialFullConvolution': spatial_full_convolution_init,
 72 |     'SpatialReflectionPadding': spatial_padding_init,
 73 |     'SpatialReplicationPadding': spatial_padding_init,
 74 |     'View': view_init
 75 | }
 76 | 
 77 | 
 78 | # def add_possible_values(module, tmodule):
 79 | #     print(len(dir(tmodule)))
 80 | #     for k in dir(tmodule):
 81 | #         if any(x.isupper() for x in k):
 82 | #             ourk = re.sub('([A-Z]+)', r'_\1', k).lower()
 83 | #             add_value(module, tmodule, ourk, k)
 84 | #         else:
 85 | #             add_value(module, tmodule, k)
 86 | 
 87 | 
 88 | def dropout_vals(module, tmodule):
 89 |     add_value(module, tmodule, 'noise')
 90 | 
 91 | 
 92 | def spatial_batch_normalization_vals(module, tmodule):
 93 |     add_value(module, tmodule, 'running_mean')
 94 |     add_value(module, tmodule, 'running_var')
 95 | 
 96 | 
 97 | load_parser_vals = {
 98 |     'Droput': dropout_vals,
 99 |     'SpatialBatchNormalization': spatial_batch_normalization_vals
100 | }
101 | 
102 | 
103 | def load_t7model(path=None, obj=None, model=None, custom_layers=None):
104 |     if not (path is None or obj is None):
105 |         raise Exception('you must pass a path or a TorchObject')
106 |     if path:
107 |         o = torchfile.load(path)
108 |     else:
109 |         o = obj
110 | 
111 |     # import pdb; pdb.set_trace()
112 |     if type(o) is torchfile.TorchObject:
113 |         class_name = o._typename.split('.')[-1]
114 |         tmodule = o._obj
115 | 
116 |         if not hasattr(pyfunt, class_name):
117 |             print('class %s not found' % class_name)
118 |             print(please_contribute)
119 |             raise NotImplementedError
120 | 
121 |         Module = getattr(pyfunt, class_name)
122 |         if not is_container(Module):
123 |             raise Exception('model is a torchobj but not a container')
124 |         model = Module()
125 |         add_inout(model, tmodule)
126 | 
127 |         m = load_t7model(obj=tmodule, model=model, custom_layers=custom_layers)
128 |         if not model:
129 |             model = m
130 |     else:
131 | 
132 |         for i, tmodule in enumerate(o.modules):
133 |             if type(tmodule) is torchfile.TorchObject:
134 |                 class_name = tmodule._typename.split('.')[-1]
135 |                 tmodule_o = tmodule._obj
136 | 
137 |                 if hasattr(pyfunt, class_name):
138 |                     Module = getattr(pyfunt, class_name)
139 |                 elif custom_layers and hasattr(custom_layers, class_name):
140 |                     Module = getattr(custom_layers, class_name)
141 |                 else:
142 |                     print('class %s not found' % class_name)
143 |                     print(please_contribute)
144 |                     raise NotImplementedError
145 | 
146 |                 if i == 0 and model is None:
147 |                     if not is_container(Module):
148 |                         model = pyfunt.Sequential()
149 |                 #     else:
150 |                 #         model = Module()
151 |                 #         model = load_t7model(obj=tmodule, model=model)
152 |                 # else:
153 |                 if is_container(Module):
154 |                     model.add(
155 |                         load_t7model(obj=tmodule, model=model, custom_layers=custom_layers))
156 |                 else:
157 |                     if class_name in load_parser_init:
158 |                         args = load_parser_init[class_name](tmodule_o)
159 |                         module = Module(*args)
160 |                     else:
161 |                         try:
162 |                             module = Module()
163 |                         except:
164 |                             print('parser for %s not found' % class_name)
165 |                             print('%s cannot be initialized with no args' %
166 |                                   class_name)
167 |                             print(please_contribute)
168 |                             raise NotImplementedError
169 | 
170 |                     #add_possible_values(module, tmodule)
171 |                     add_inout(module, tmodule_o)
172 |                     add_w(module, tmodule_o)
173 |                     if class_name in load_parser_vals:
174 |                         load_parser_vals[class_name](module, tmodule_o)
175 |                     model.add(module)
176 |             else:
177 |                 print('oops!')
178 |                 print(please_contribute)
179 |                 pdb.set_trace()
180 |                 raise NotImplementedError
181 |     return model
182 | 
183 | 
184 | def is_container(tm):
185 |     return pyfunt.container.Container in tm.__bases__
186 | 
187 | 
188 | def add_value(module, tmodule, pname, tpname=None):
189 |     tpname = tpname or pname
190 |     if hasattr(module, pname):
191 |         if tpname in tmodule:
192 |             setattr(module, pname, tmodule[tpname])
193 | 
194 | 
195 | def add_inout(module, tmodule):
196 |     add_value(module, tmodule, 'output')
197 |     add_value(module, tmodule, 'grad_input', 'gradInput')
198 | 
199 | 
200 | def add_w(module, tmodule):
201 |     add_value(module, tmodule, 'weight')
202 |     add_value(module, tmodule, 'bias')
203 |     add_value(module, tmodule, 'grad_weight', 'gradWeight')
204 |     add_value(module, tmodule, 'grad_bias', 'gradBias')
205 | 
206 | 
207 | def load_t7checkpoint(path, models_keys=['model'], custom_layers=None):
208 |     # model_keys iterable that contains for example the word 'model'
209 |     # the model to load in pyfunt
210 |     cp = torchfile.load(path)
211 |     for model in models_keys:
212 |         cp[model] = load_t7model(obj=cp[model], custom_layers=custom_layers)
213 |     return cp
214 | 


--------------------------------------------------------------------------------
/pyfunt/utils/vis_utils.py:
--------------------------------------------------------------------------------
 1 | from math import sqrt, ceil
 2 | import numpy as np
 3 | 
 4 | 
 5 | def visualize_grid(Xs, ubound=255.0, padding=1, grid_size=None):
 6 |     '''
 7 |     Reshape a 4D tensor of image data to a grid for easy visualization.
 8 | 
 9 |     Inputs:
10 |     - Xs: Data of shape (N, H, W, C)
11 |     - ubound: Output grid will have values scaled to the range [0, ubound]
12 |     - padding: The number of blank pixels between elements of the grid
13 |     '''
14 |     (N, H, W, C) = Xs.shape
15 | 
16 |     if grid_size is None:
17 |         grid_size_y = grid_size_x = int(ceil(sqrt(N)))
18 |     elif type(grid_size) == tuple:
19 |         grid_size_x = grid_size[0]
20 |         grid_size_y = grid_size[1]
21 |     else:
22 |         grid_size_y = grid_size_x = grid_size
23 | 
24 |     grid_height = H * grid_size_y + padding * (grid_size_y - 1)
25 |     grid_width = W * grid_size_x + padding * (grid_size_x - 1)
26 |     grid = np.zeros((grid_height, grid_width, C))
27 |     next_idx = 0
28 |     y0, y1 = 0, H
29 |     for y in xrange(grid_size_y):
30 |         x0, x1 = 0, W
31 |         for x in xrange(grid_size_x):
32 |             if next_idx < N:
33 |                 img = Xs[next_idx]
34 |                 low, high = np.min(img), np.max(img)
35 |                 grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
36 |                 # grid[y0:y1, x0:x1] = Xs[next_idx]
37 |                 next_idx += 1
38 |             x0 += W + padding
39 |             x1 += W + padding
40 |         y0 += H + padding
41 |         y1 += H + padding
42 |     # grid_max = np.max(grid)
43 |     # grid_min = np.min(grid)
44 |     # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
45 |     return grid
46 | 
47 | 
48 | def vis_grid(Xs):
49 |     ''' visualize a grid of images '''
50 |     (N, H, W, C) = Xs.shape
51 |     A = int(ceil(sqrt(N)))
52 |     G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
53 |     G *= np.min(Xs)
54 |     n = 0
55 |     for y in range(A):
56 |         for x in range(A):
57 |             if n < N:
58 |                 G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n, :, :, :]
59 |                 n += 1
60 |     # normalize to [0,1]
61 |     maxg = G.max()
62 |     ming = G.min()
63 |     G = (G - ming)/(maxg-ming)
64 |     return G
65 | 
66 | 
67 | def vis_nn(rows):
68 |     ''' visualize array of arrays of images '''
69 |     N = len(rows)
70 |     D = len(rows[0])
71 |     H, W, C = rows[0][0].shape
72 |     Xs = rows[0][0]
73 |     G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
74 |     for y in range(N):
75 |         for x in range(D):
76 |             G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
77 |     # normalize to [0,1]
78 |     maxg = G.max()
79 |     ming = G.min()
80 |     G = (G - ming)/(maxg-ming)
81 |     return G
82 | 


--------------------------------------------------------------------------------
/pyfunt/view.py:
--------------------------------------------------------------------------------
 1 | from module import Module
 2 | 
 3 | 
 4 | class View(Module):
 5 | 
 6 |     def __init__(self, shape):
 7 |         super(View, self).__init__()
 8 |         if type(shape) is not tuple:
 9 |             shape = (shape,)
10 |         self.shape = shape
11 | 
12 |     def update_output(self, x):
13 |         self.output = x.view().reshape((x.shape[0],) + self.shape)
14 |         return self.output
15 | 
16 |     def update_grad_input(self, x, grad_output):
17 |         self.grad_input = grad_output.view().reshape(x.shape)
18 |         return self.grad_input
19 | 
20 |     def reset(self):
21 |         pass
22 | 
23 | 
24 | # class View(Module):
25 | 
26 | #     def __init__(self, args):
27 | #         super(View, self).__init__()
28 | #         self.reset_size(args)
29 | #         self.num_input.ndim = None
30 | 
31 | #     def reset_size(self, args):
32 | #         if len(args) == 1 and type(args[0]) == 'float64':
33 | #             self.size = args[0]
34 | #         else:
35 | #             self.size = None
36 | #         self.num_elements = 1
37 | #         inferdim = False
38 | #         for i in xrange(self.size):
39 | #             szi = self.size[i]
40 | #             if szi >= 0:
41 | #                 self.num_elements *= self.size[i]
42 | #             else:
43 | #                 if szi != -1:
44 | #                     raise Exception('size should be positive or -1')
45 | #                 if inferdim:
46 | #                     raise Exception('only one dimension can be at -1')
47 | #                 inferdim = True
48 | 
49 | #     def update_output(self, x):
50 | #         self.output = self.output or np.zeros_like(x)
51 | #         batch_size = None
52 | #         if batch_size:
53 | #             self.output = x.view(batch_size, *self.size)
54 | #         else:
55 | #             self.output = x.view(self.size)
56 | #         return self.output
57 | 
58 | #     def update_grad_input(self, x, grad_output):
59 | #         self.grad_input = self.grad_input or np.zeros_like(grad_output)
60 | #         self.grad_input = grad_output.view(x.size)
61 | #         return self.grad_input
62 | 
63 | #     def __str__(self):
64 | #         return '%s(%s)' % (type(self), self.size)
65 | 
66 | #     def reset(self):
67 | #         pass
68 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.11.0
2 | tqdm == 3.8.0
3 | cython >= 0.24.1
4 | torchfile


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import os
  5 | import sys
  6 | import subprocess
  7 | 
  8 | '''
  9 | Original Source: https://github.com/scipy/scipy/blob/master/setup.py
 10 | '''
 11 | 
 12 | if sys.version_info[:2] < (2, 6) or (3, 0) <= sys.version_info[0:2] < (3, 2):
 13 |     raise RuntimeError("Python version 2.6, 2.7 (TODO: >= 3.2) required.")
 14 | 
 15 | if sys.version_info[0] < 3:
 16 |     import __builtin__ as builtins
 17 | else:
 18 |     import builtins
 19 | 
 20 | MAJOR = 0
 21 | MINOR = 1
 22 | MICRO = 0
 23 | ISRELEASED = False
 24 | 
 25 | VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
 26 | 
 27 | with open('./requirements.txt') as f:
 28 |     required = f.read().splitlines()
 29 | 
 30 | # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly
 31 | # update it when the contents of directories change.
 32 | if os.path.exists('MANIFEST'):
 33 |     os.remove('MANIFEST')
 34 | 
 35 | 
 36 | # Return the git revision as a string
 37 | def git_version():
 38 |     def _minimal_ext_cmd(cmd):
 39 |         # construct minimal environment
 40 |         env = {}
 41 |         for k in ['SYSTEMROOT', 'PATH']:
 42 |             v = os.environ.get(k)
 43 |             if v is not None:
 44 |                 env[k] = v
 45 |         # LANGUAGE is used on win32
 46 |         env['LANGUAGE'] = 'C'
 47 |         env['LANG'] = 'C'
 48 |         env['LC_ALL'] = 'C'
 49 |         out = subprocess.Popen(
 50 |             cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
 51 |         return out
 52 | 
 53 |     try:
 54 |         out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
 55 |         GIT_REVISION = out.strip().decode('ascii')
 56 |     except OSError:
 57 |         GIT_REVISION = "Unknown"
 58 | 
 59 |     return GIT_REVISION
 60 | 
 61 | 
 62 | # This is a bit hackish: we are setting a global variable so that the main
 63 | # pyfunt __init__ can detect if it is being loaded by the setup routine, to
 64 | # avoid attempting to load components that aren't built yet.  While ugly, it's
 65 | # a lot more robust than what was previously being used.
 66 | builtins.__PUFUNT_SETUP__ = True
 67 | 
 68 | 
 69 | def get_version_info():
 70 |     # Adding the git rev number needs to be done inside
 71 |     # write_version_py(), otherwise the import of pyfunt.version messes
 72 |     # up the build under Python 3.
 73 |     FULLVERSION = VERSION
 74 |     if os.path.exists('.git'):
 75 |         GIT_REVISION = git_version()
 76 |     elif os.path.exists('pyfunt/version.py'):
 77 |         # must be a source distribution, use existing version file
 78 |         # load it as a separate module to not load pyfunt/__init__.py
 79 |         import imp
 80 |         version = imp.load_source('pyfunt.version', 'pyfunt/version.py')
 81 |         GIT_REVISION = version.git_revision
 82 |     else:
 83 |         GIT_REVISION = "Unknown"
 84 | 
 85 |     if not ISRELEASED:
 86 |         FULLVERSION += '.dev0+' + GIT_REVISION[:7]
 87 | 
 88 |     return FULLVERSION, GIT_REVISION
 89 | 
 90 | 
 91 | def write_version_py(filename='pyfunt/version.py'):
 92 |     cnt = """\
 93 |         # THIS FILE IS GENERATED FROM PYFUNT SETUP.PY\
 94 |         short_version = '%(version)s'\
 95 |         version = '%(version)s'\
 96 |         full_version = '%(full_version)s'\
 97 |         git_revision = '%(git_revision)s'\
 98 |         release = %(isrelease)s\
 99 |         if not release:\
100 |             version = full_version\
101 |     """
102 |     FULLVERSION, GIT_REVISION = get_version_info()
103 | 
104 |     a = open(filename, 'w')
105 |     try:
106 |         a.write(cnt % {'version': VERSION,
107 |                        'full_version': FULLVERSION,
108 |                        'git_revision': GIT_REVISION,
109 |                        'isrelease': str(ISRELEASED)})
110 |     finally:
111 |         a.close()
112 | 
113 | 
114 | def generate_cython():
115 |     cwd = os.path.abspath(os.path.dirname(__file__))
116 |     print("Cythonizing sources")
117 |     p = subprocess.call([sys.executable,
118 |                          os.path.join(cwd, 'tools', 'cythonize.py'),
119 |                          'pyfunt'],
120 |                         cwd=cwd)
121 |     if p != 0:
122 |         raise RuntimeError("Running cythonize failed!")
123 | 
124 | 
125 | def configuration(parent_package='', top_path=None):
126 |     from numpy.distutils.misc_util import Configuration
127 |     config = Configuration(None, parent_package, top_path)
128 |     config.set_options(ignore_setup_xxx_py=True,
129 |                        assume_default_configuration=True,
130 |                        delegate_options_to_subpackages=True,
131 |                        quiet=True)
132 | 
133 |     config.add_subpackage('pyfunt')
134 |     config.add_data_files(('pyfunt', '*.txt'))
135 | 
136 |     config.get_version('pyfunt/version.py')
137 | 
138 |     return config
139 | 
140 | 
141 | def setup_package():
142 | 
143 |     # Rewrite the version file every time
144 | 
145 |     write_version_py()
146 |     cmdclass = {}
147 | 
148 |     # Figure out whether to add ``*_requires = ['numpy']``.
149 |     # We don't want to do that unconditionally, because we risk updating
150 |     # an installed numpy which fails too often.  Just if it's not installed, we
151 |     # may give it a try.  See gh-3379.
152 |     build_requires = []
153 |     try:
154 |         import numpy
155 |         if (len(sys.argv) >= 2 and sys.argv[1] == 'bdist_wheel' and
156 |                 sys.platform == 'darwin'):
157 |             # We're ony building wheels for platforms where we know there's
158 |             # also a Numpy wheel, so do this unconditionally.  See gh-5184.
159 |             build_requires = ['numpy>=1.7.1']
160 |     except:
161 |         build_requires = ['numpy>=1.7.1']
162 | 
163 |     metadata = dict(
164 |         name="pyfunt",
165 |         author="Daniele Ettore Ciriello",
166 |         author_email="ciriello.daniele@gmail.com",
167 |         version="1.1.0",
168 |         license="MIT",
169 |         url="https://github.com/dnlcrl/PyFunt",
170 |         download_url="https://github.com/dnlcrl/PyFunt",
171 |         description="Pythonic Deep Learning Framework",
172 |         packages=['pyfunt', 'pyfunt/examples', 'pyfunt/utils', 'pyfunt/examples/residual_networks', ],
173 |         cmdclass=cmdclass,  # {'build_ext': build_ext},
174 |         platforms=["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"],
175 |         setup_requires=build_requires,
176 |         install_requires=required,
177 |         # ext_modules=extensions,
178 |         keywords='pyfunt deep learning artificial neural network convolution',
179 |     )
180 | 
181 |     if len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or
182 |                                sys.argv[1] in ('--help-commands', 'egg_info', '--version',
183 |                                                'clean')):
184 |         # For these actions, NumPy is not required.
185 |         #
186 |         # They are required to succeed without Numpy for example when
187 |         # pip is used to install Scipy when Numpy is not yet present in
188 |         # the system.
189 |         try:
190 |             from setuptools import setup
191 |         except ImportError:
192 |             from distutils.core import setup
193 |     else:
194 |         if (len(sys.argv) >= 2 and sys.argv[1] in ('bdist_wheel', 'bdist_egg')) or (
195 |                 'develop' in sys.argv):
196 |             # bdist_wheel/bdist_egg needs setuptools
197 |             import setuptools
198 | 
199 |         from numpy.distutils.core import setup
200 | 
201 |         cwd = os.path.abspath(os.path.dirname(__file__))
202 |         if not os.path.exists(os.path.join(cwd, 'PKG-INFO')):
203 |             # Generate Cython sources, unless building from source release
204 |             generate_cython()
205 | 
206 |         metadata['configuration'] = configuration
207 | 
208 |     print 'setup complete'
209 |     setup(**metadata)
210 | 
211 | if __name__ == '__main__':
212 |     setup_package()
213 | 


--------------------------------------------------------------------------------
/tools/cythonize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """ cythonize
  3 | 
  4 | SOURCE: https://github.com/scipy/scipy/blob/master/setup.py
  5 | 
  6 | Cythonize pyx files into C files as needed.
  7 | 
  8 | Usage: cythonize [root_dir]
  9 | 
 10 | Default [root_dir] is 'pyfunt'.
 11 | 
 12 | Checks pyx files to see if they have been changed relative to their
 13 | corresponding C files.  If they have, then runs cython on these files to
 14 | recreate the C files.
 15 | 
 16 | The script thinks that the pyx files have changed relative to the C files
 17 | by comparing hashes stored in a database file.
 18 | 
 19 | Simple script to invoke Cython (and Tempita) on all .pyx (.pyx.in)
 20 | files; while waiting for a proper build system. Uses file hashes to
 21 | figure out if rebuild is needed.
 22 | 
 23 | For now, this script should be run by developers when changing Cython files
 24 | only, and the resulting C files checked in, so that end-users (and Python-only
 25 | developers) do not get the Cython/Tempita dependencies.
 26 | 
 27 | Originally written by Dag Sverre Seljebotn, and copied here from:
 28 | 
 29 | https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
 30 | 
 31 | Note: this script does not check any of the dependent C libraries; it only
 32 | operates on the Cython .pyx files.
 33 | """
 34 | 
 35 | from __future__ import division, print_function, absolute_import
 36 | 
 37 | import os
 38 | import re
 39 | import sys
 40 | import hashlib
 41 | import subprocess
 42 | 
 43 | HASH_FILE = 'cythonize.dat'
 44 | DEFAULT_ROOT = 'pyfunt'
 45 | 
 46 | # WindowsError is not defined on unix systems
 47 | try:
 48 |     WindowsError
 49 | except NameError:
 50 |     WindowsError = None
 51 | 
 52 | #
 53 | # Rules
 54 | #
 55 | 
 56 | 
 57 | def process_pyx(fromfile, tofile):
 58 |     try:
 59 |         from Cython.Compiler.Version import version as cython_version
 60 |         from distutils.version import LooseVersion
 61 |         if LooseVersion(cython_version) < LooseVersion('0.22'):
 62 |             raise Exception('Building PyFunt requires Cython >= 0.22')
 63 | 
 64 |     except ImportError:
 65 |         pass
 66 | 
 67 |     flags = ['--fast-fail']
 68 |     if tofile.endswith('.cxx'):
 69 |         flags += ['--cplus']
 70 | 
 71 |     try:
 72 |         try:
 73 |             # if fromfile == 'im2col_cython.pyx':
 74 |             #     print('compiling im2col_cython')
 75 |             #     r = subprocess.call(
 76 |             #         ['python', 'pyfunt/layers/setup.py', 'build_ext', '--inplace'])
 77 |             # else:
 78 |             r = subprocess.call(
 79 |                     ['cython'] + flags + ["-o", tofile, fromfile])
 80 |             if r != 0:
 81 |                 raise Exception('Cython failed')
 82 | 
 83 |         except OSError:
 84 |             # There are ways of installing Cython that don't result in a cython
 85 |             # executable on the path, see gh-2397.
 86 |             r = subprocess.call([sys.executable, '-c',
 87 |                                  'import sys; from Cython.Compiler.Main import '
 88 |                                  'setuptools_main as main; sys.exit(main())'] + flags +
 89 |                                 ["-o", tofile, fromfile])
 90 |             if r != 0:
 91 |                 raise Exception("Cython either isn't installed or it failed.")
 92 |     except OSError:
 93 |         raise OSError('Cython needs to be installed')
 94 | 
 95 | 
 96 | def process_tempita_pyx(fromfile, tofile):
 97 |     try:
 98 |         try:
 99 |             from Cython import Tempita as tempita
100 |         except ImportError:
101 |             import tempita
102 |     except ImportError:
103 |         raise Exception('Building PyFunt requires Tempita: '
104 |                         'pip install --user Tempita')
105 |     from_filename = tempita.Template.from_filename
106 |     template = from_filename(fromfile, encoding=sys.getdefaultencoding())
107 |     pyxcontent = template.substitute()
108 |     assert fromfile.endswith('.pyx.in')
109 |     pyxfile = fromfile[:-len('.pyx.in')] + '.pyx'
110 |     with open(pyxfile, "w") as f:
111 |         f.write(pyxcontent)
112 |     process_pyx(pyxfile, tofile)
113 | 
114 | rules = {
115 |     # fromext : function
116 |     '.pyx': process_pyx,
117 |     '.pyx.in': process_tempita_pyx
118 | }
119 | #
120 | # Hash db
121 | #
122 | 
123 | 
124 | def load_hashes(filename):
125 |     # Return { filename : (sha1 of input, sha1 of output) }
126 |     if os.path.isfile(filename):
127 |         hashes = {}
128 |         with open(filename, 'r') as f:
129 |             for line in f:
130 |                 filename, inhash, outhash = line.split()
131 |                 hashes[filename] = (inhash, outhash)
132 |     else:
133 |         hashes = {}
134 |     return hashes
135 | 
136 | 
137 | def save_hashes(hash_db, filename):
138 |     with open(filename, 'w') as f:
139 |         for key, value in sorted(hash_db.items()):
140 |             f.write("%s %s %s\n" % (key, value[0], value[1]))
141 | 
142 | 
143 | def sha1_of_file(filename):
144 |     h = hashlib.sha1()
145 |     with open(filename, "rb") as f:
146 |         h.update(f.read())
147 |     return h.hexdigest()
148 | 
149 | #
150 | # Main program
151 | #
152 | 
153 | 
154 | def normpath(path):
155 |     path = path.replace(os.sep, '/')
156 |     if path.startswith('./'):
157 |         path = path[2:]
158 |     return path
159 | 
160 | 
161 | def get_hash(frompath, topath):
162 |     from_hash = sha1_of_file(frompath)
163 |     to_hash = sha1_of_file(topath) if os.path.exists(topath) else None
164 |     return (from_hash, to_hash)
165 | 
166 | 
167 | def process(path, fromfile, tofile, processor_function, hash_db):
168 |     fullfrompath = os.path.join(path, fromfile)
169 |     fulltopath = os.path.join(path, tofile)
170 |     current_hash = get_hash(fullfrompath, fulltopath)
171 |     if current_hash == hash_db.get(normpath(fullfrompath), None):
172 |         print('%s has not changed' % fullfrompath)
173 |         return
174 | 
175 |     orig_cwd = os.getcwd()
176 |     try:
177 |         os.chdir(path)
178 |         print('Processing %s to %s' % (fullfrompath, fulltopath))
179 |         processor_function(fromfile, tofile)
180 |     finally:
181 |         os.chdir(orig_cwd)
182 |     # changed target file, recompute hash
183 |     current_hash = get_hash(fullfrompath, fulltopath)
184 |     # store hash in db
185 |     hash_db[normpath(fullfrompath)] = current_hash
186 | 
187 | 
188 | def find_process_files(root_dir):
189 |     hash_db = load_hashes(HASH_FILE)
190 |     for cur_dir, dirs, files in os.walk(root_dir):
191 |         for filename in files:
192 |             in_file = os.path.join(cur_dir, filename + ".in")
193 |             if filename.endswith('.pyx') and os.path.isfile(in_file):
194 |                 continue
195 |             for fromext, function in rules.items():
196 |                 if filename.endswith(fromext):
197 |                     toext = ".c"
198 |                     with open(os.path.join(cur_dir, filename), 'rb') as f:
199 |                         data = f.read()
200 |                         m = re.search(
201 |                             br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I | re.M)
202 |                         if m:
203 |                             toext = ".cxx"
204 |                     fromfile = filename
205 |                     tofile = filename[:-len(fromext)] + toext
206 |                     process(cur_dir, fromfile, tofile, function, hash_db)
207 |                     save_hashes(hash_db, HASH_FILE)
208 | 
209 | 
210 | def main():
211 |     try:
212 |         root_dir = sys.argv[1]
213 |     except IndexError:
214 |         root_dir = DEFAULT_ROOT
215 |     find_process_files(root_dir)
216 | 
217 | 
218 | if __name__ == '__main__':
219 |     main()
220 | 


--------------------------------------------------------------------------------