├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── pyfunt ├── __init__.py ├── affine.py ├── batch_normalization.py ├── c_add_table.py ├── class_nll_criterion.py ├── concat_table.py ├── container.py ├── criterion.py ├── dropout.py ├── examples │ ├── __init__.py │ ├── model_testing │ │ └── test_model.py │ └── residual_networks │ │ ├── __init__.py │ │ ├── resnet.py │ │ └── train-cifar.py ├── identity.py ├── im2col.py ├── im2col_cyt.pyx ├── linear.py ├── log_soft_max.py ├── module.py ├── mul_constant.py ├── optim.py ├── padding.py ├── parallel.py ├── relu.py ├── reshape.py ├── sequential.py ├── setup.py ├── sigmoid.py ├── soft_max.py ├── solver.py ├── spatial_average_pooling.py ├── spatial_batch_normalitazion.py ├── spatial_convolution.py ├── spatial_full_convolution.py ├── spatial_max_pooling.py ├── spatial_reflection_padding.py ├── spatial_replication_padding.py ├── spatial_up_sampling_nearest.py ├── tanh.py ├── threshold.py ├── utils │ ├── __init__.py │ ├── gradient_check.py │ ├── load_torch_model.py │ └── vis_utils.py └── view.py ├── requirements.txt ├── setup.py └── tools └── cythonize.py /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | CIFAR_DATASET_PATH 3 | .ipynb_checkpoints/ 4 | *.ipynb 5 | test.py 6 | 7 | # Editor temporary/working/backup files # 8 | ######################################### 9 | .#* 10 | [#]*# 11 | *~ 12 | *$ 13 | *.bak 14 | .idea/* 15 | *.kdev4 16 | *.org 17 | .project 18 | .pydevproject 19 | *.rej 20 | .settings/ 21 | .*.sw[nop] 22 | .sw[nop] 23 | *.tmp 24 | *.vim 25 | tags 26 | 27 | # Compiled source # 28 | ################### 29 | *.a 30 | *.com 31 | *.class 32 | *.dll 33 | *.exe 34 | *.l[ao] 35 | *.o 36 | *.py[ocd] 37 | *.so 38 | _configtest.c 39 | 40 | # Packages # 41 | ############ 42 | # it's better to unpack these files and commit the raw source 43 | # git has its own built in compression methods 44 | *.7z 45 | *.bz2 46 | *.bzip2 47 | *.dmg 48 | *.gz 49 | *.iso 50 | *.jar 51 | *.rar 52 | *.tar 53 | *.tbz2 54 | *.tgz 55 | *.zip 56 | 57 | # Python files # 58 | ################ 59 | # setup.py working directory 60 | build 61 | # sphinx build directory 62 | doc/_build 63 | # cython files 64 | cythonize.dat 65 | # setup.py dist directory 66 | dist 67 | # Egg metadata 68 | *.egg-info 69 | # tox testing tool 70 | .tox 71 | # The shelf plugin uses this dir 72 | ./.shelf 73 | MANIFEST 74 | # distutils configuration 75 | site.cfg 76 | # other temporary files 77 | .coverage 78 | .deps 79 | .libs 80 | 81 | # Paver generated files # 82 | ######################### 83 | /release 84 | 85 | # Logs and databases # 86 | ###################### 87 | *.log 88 | *.sql 89 | *.sqlite 90 | 91 | # Patches # 92 | ########### 93 | *.patch 94 | *.diff 95 | 96 | # OS generated files # 97 | ###################### 98 | .directory 99 | .fseventsd 100 | .DS_Store* 101 | .gdb_history 102 | .VolumeIcon.icns 103 | ehthumbs.db 104 | Icon? 105 | Thumbs.db 106 | 107 | # Documentation generated files # 108 | ################################# 109 | doc/frontpage/build 110 | doc/source/generated 111 | 112 | # Things specific to this project # 113 | ################################### 114 | 115 | pyfunt/version.py 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Daniele Ciriello 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include *.txt 3 | # Top-level build scripts 4 | include setup.py setupegg.py bscript bento.info 5 | # All source files 6 | recursive-include pyfunt * 7 | # All documentation 8 | recursive-include doc * 9 | # Add build and testing tools 10 | include tox.ini 11 | recursive-include tools * 12 | # Cached Cython signatures 13 | include cythonize.dat 14 | # Exclude what we don't want to include 15 | recursive-exclude pyfunt/linalg/src/id_dist/src *_subr_*.f 16 | prune doc/build 17 | prune doc/source/generated 18 | prune */__pycache__ 19 | global-exclude *.pyc *~ *.bak *.swp *.pyo -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyFunt (/paɪfʊnt/) 2 | 3 | ![Project frozen](https://img.shields.io/badge/status-frozen-blue.png) ![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg) 4 | 5 | Pythonic Deep Learning Framework (WIP and CPU only) inspired by [Torch](http://torch.ch)'s [Neural Network package](https://github.com/torch/nn). 6 | 7 | ## Requirements 8 | 9 | - [Python 2.7](https://www.python.org/) 10 | - [Cython](cython.org/) 11 | - [numpy](www.numpy.org/) 12 | - [torchfile](https://github.com/bshillingford/python-torchfile/) 13 | 14 | 15 | ## Installation 16 | 17 | Get [pip](https://pypi.python.org/pypi/pip) and run: 18 | 19 | pip install git+git://github.com/dnlcrl/PyFunt.git 20 | 21 | ## Usage 22 | 23 | Check the [examples folder](https://github.com/dnlcrl/PyFunt/tree/master/pyfunt/examples) 24 | 25 | ### Example: Parametric Residual Model 26 | 27 | Parametric models can be built easily thanks to the module structure: 28 | 29 | from pyfunt import (SpatialConvolution, SpatialBatchNormalization, 30 | SpatialAveragePooling, Sequential, ReLU, Linear, 31 | Reshape, LogSoftMax, Padding, Identity, ConcatTable, 32 | CAddTable) 33 | 34 | def residual_layer(n_channels, n_out_channels=None, stride=None): 35 | n_out_channels = n_out_channels or n_channels 36 | stride = stride or 1 37 | 38 | convs = Sequential() 39 | add = convs.add 40 | add(SpatialConvolution( 41 | n_channels, n_out_channels, 3, 3, stride, stride, 1, 1)) 42 | add(SpatialBatchNormalization(n_out_channels)) 43 | add(SpatialConvolution(n_out_channels, n_out_channels, 3, 3, 1, 1, 1, 1)) 44 | add(SpatialBatchNormalization(n_out_channels)) 45 | 46 | if stride > 1: 47 | shortcut = Sequential() 48 | shortcut.add(SpatialAveragePooling(2, 2, stride, stride)) 49 | shortcut.add(Padding(1, (n_out_channels - n_channels)/2, 3)) 50 | else: 51 | shortcut = Identity() 52 | 53 | res = Sequential() 54 | res.add(ConcatTable().add(convs).add(shortcut)).add(CAddTable()) 55 | res.add(ReLU(True)) 56 | return res 57 | 58 | 59 | def resnet(n_size, num_starting_filters, reg): 60 | nfs = num_starting_filters 61 | model = Sequential() 62 | add = model.add 63 | add(SpatialConvolution(3, nfs, 3, 3, 1, 1, 1, 1)) 64 | add(SpatialBatchNormalization(nfs)) 65 | add(ReLU()) 66 | 67 | for i in xrange(1, n_size): 68 | add(residual_layer(nfs)) 69 | add(residual_layer(nfs, 2*nfs, 2)) 70 | 71 | for i in xrange(1, n_size-1): 72 | add(residual_layer(2*nfs)) 73 | add(residual_layer(2*nfs, 4*nfs, 2)) 74 | 75 | for i in xrange(1, n_size-1): 76 | add(residual_layer(4*nfs)) 77 | 78 | add(SpatialAveragePooling(8, 8)) 79 | add(Reshape(nfs*4)) 80 | add(Linear(nfs*4, 10)) 81 | add(LogSoftMax()) 82 | return model 83 | 84 | --- 85 | 86 | Check the Torch documentation for more informations about the implemented layers (pyfunt is more or less a python port of torch/nn): [https://github.com/torch/nn/blob/master/doc/index.md](https://github.com/torch/nn/blob/master/doc/index.md) 87 | -------------------------------------------------------------------------------- /pyfunt/__init__.py: -------------------------------------------------------------------------------- 1 | from affine import Affine 2 | from batch_normalization import BatchNormalization 3 | from c_add_table import CAddTable 4 | from class_nll_criterion import ClassNLLCriterion 5 | from concat_table import ConcatTable 6 | from container import Container 7 | from criterion import Criterion 8 | from dropout import Dropout 9 | from identity import Identity 10 | from linear import Linear 11 | from log_soft_max import LogSoftMax 12 | from module import Module 13 | from mul_constant import MulConstant 14 | from padding import Padding 15 | from parallel import Parallel 16 | from relu import ReLU 17 | from reshape import Reshape 18 | from sequential import Sequential 19 | from sigmoid import Sigmoid 20 | from soft_max import SoftMax 21 | from solver import Solver 22 | from spatial_average_pooling import SpatialAveragePooling 23 | from spatial_batch_normalitazion import SpatialBatchNormalization 24 | from spatial_convolution import SpatialConvolution 25 | from spatial_full_convolution import SpatialFullConvolution 26 | from spatial_replication_padding import SpatialReplicationPadding 27 | from spatial_reflection_padding import SpatialReflectionPadding 28 | from spatial_max_pooling import SpatialMaxPooling 29 | from spatial_up_sampling_nearest import SpatialUpSamplingNearest 30 | from tanh import Tanh 31 | from threshold import Threshold 32 | from view import View 33 | 34 | from . import * 35 | -------------------------------------------------------------------------------- /pyfunt/affine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from module import Module 5 | import numpy as np 6 | 7 | 8 | class Affine(Module): 9 | 10 | def __init__(self, input_size, output_size, bias=False): 11 | super(Affine, self).__init__() 12 | self.weight = np.ndarray(output_size, input_size) 13 | self.grad_weight = np.ndarray(output_size, input_size) 14 | if bias: 15 | self.bias = np.ndarray(output_size) 16 | self.grad_bias = np.ndarray(output_size) 17 | self.reset() 18 | 19 | def no_bias(self): 20 | self.bias = None 21 | self.grad_bias = None 22 | return self 23 | 24 | def reset(self, stdv=None): 25 | if not stdv: 26 | stdv = 1./np.sqrt(self.weight.shape[2]) 27 | self.weight = np.uniform(-stdv, stdv, self.weight.shape) 28 | self.bias = np.uniform(-stdv, stdv, self.weight.shape[0]) 29 | 30 | def update_output(self, x): 31 | w = self.weight 32 | b = self.bias or np.zeros(self.weight.shape[0]) 33 | self.out = x.reshape(x.shape[0], -1).dot(w) + b 34 | self.x = x 35 | return self.output 36 | 37 | def update_grad_input(self, input, grad_output): 38 | x, w = self.x, self.weight 39 | self.grad_input = grad_output.dot(w.T).reshape(x.shape) 40 | return self.grad_input 41 | 42 | def acc_grad_parameters(self, x, grad_output, scale=1): 43 | x = self.x 44 | self.grad_weight = x.reshape(x.shape[0], -1).T.dot(grad_output) 45 | if self.bias: 46 | self.grad_bias = np.sum(grad_output, axis=0) 47 | 48 | def clear_state(self): 49 | pass 50 | 51 | def __str__(self): 52 | pass 53 | -------------------------------------------------------------------------------- /pyfunt/batch_normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from module import Module 5 | import numpy as np 6 | 7 | 8 | class BatchNormalization(Module): 9 | 10 | def __init__(self, n_output, eps=1e-5, momentum=0.1, affine=True): 11 | super(BatchNormalization, self).__init__() 12 | self.eps = eps 13 | self.momentum = momentum 14 | self.train = True 15 | self.running_mean = np.zeros(n_output) 16 | self.running_var = np.zeros(n_output) 17 | n_dim = 2 18 | if affine: 19 | self.weight = np.ndarray(n_output) 20 | self.bias = np.ndarray(n_output) 21 | self.grad_weight = np.ndarray(n_output) 22 | self.grad_bias = np.ndarray(n_output) 23 | else: 24 | self.weight = None 25 | self.bias = None 26 | self.grad_weight = None 27 | self.grad_bias = None 28 | self.reset() 29 | 30 | def reset(self): 31 | if self.weight is not None: 32 | self.weight[:] = np.random.uniform(size=len(self.weight))[:] 33 | if self.bias is not None: 34 | self.bias[:] = np.zeros(len(self.bias))[:] 35 | self.running_mean = np.zeros(len(self.running_mean)) 36 | self.running_var = np.ones(len(self.running_var)) 37 | 38 | def check_input_dim(self, x): 39 | i_dim = len(x.shape) 40 | if i_dim != self.n_dim or (i_dim != self.n_dim - 1 and self.train is not False): 41 | raise Exception('TODO ERROR :(') 42 | # feast_dim = (i_dim == self.n_dim -1) and 1 or 2 43 | # local featDim = (iDim == self.nDim - 1) and 1 or 2 44 | # assert(input:size(featDim) == self.running_mean:nElement(), string.format( 45 | # 'got %d-feature tensor, expected %d', 46 | # input:size(featDim), self.running_mean:nElement())) 47 | 48 | def make_contigous(self, x, grad_output): 49 | #TODO 50 | pass 51 | 52 | 53 | def update_output(self, x): 54 | 55 | eps = self.eps 56 | momentum = self.momentum 57 | N, D = x.shape 58 | running_mean = self.running_mean 59 | running_var = self.running_var 60 | 61 | if self.train: 62 | mean = 1. / N * np.sum(x, axis=0) 63 | 64 | xmu = x - mean 65 | 66 | carre = xmu*xmu 67 | 68 | var = 1. / N * np.sum(carre, axis=0) 69 | 70 | sqrtvar = np.sqrt(var + eps) 71 | 72 | invstd = 1. / sqrtvar 73 | 74 | running_mean = momentum * mean + (1. - momentum) * running_mean 75 | 76 | unbiased_var = np.sum(carre, axis=0)/(N - 1.) 77 | 78 | running_var = momentum * unbiased_var + \ 79 | (1. - momentum) * running_var 80 | 81 | self.xmu = xmu 82 | self.invstd = invstd 83 | 84 | else: 85 | mean = running_mean 86 | invstd = 1. / np.sqrt(running_var + eps) 87 | 88 | out = ((x - mean) * invstd) 89 | if self.weight is not None: 90 | out *= self.weight 91 | if self.bias is not None: 92 | out += self.bias 93 | #out = ((x - mean) * invstd) * self.weight + self.bias 94 | # Store the updated running means back into bn_param 95 | self.running_mean = np.array(running_mean, copy=True) 96 | self.running_var = np.array(running_var, copy=True) 97 | self.output = out 98 | 99 | return self.output 100 | 101 | def update_grad_input(self, x, grad_output, scale=1): 102 | 103 | xmu, invstd = self.xmu, self.invstd 104 | 105 | N, D = grad_output.shape 106 | 107 | _sum = np.sum(grad_output, axis=0) 108 | dotp = np.sum((xmu * grad_output), axis=0) 109 | 110 | k = 1. / N * dotp * invstd * invstd 111 | dx = xmu * k 112 | 113 | dmean = 1. / N * _sum 114 | dx = (grad_output - dmean - dx) * invstd * self.weight 115 | 116 | self.grad_weight[:] = dotp * invstd 117 | 118 | self.grad_bias[:] = _sum 119 | self.grad_input = dx 120 | 121 | return self.grad_input 122 | 123 | # def backward(self, x, grad_output, scale=1): 124 | # return self.update_grad_input(x, grad_output, scale) 125 | 126 | def acc_grad_input(self, x, grad_output, scale): 127 | return self.backward(x, grad_output, scale, None, self.grad_weight, self.grad_bias) 128 | 129 | def clear_state(self): 130 | pass 131 | -------------------------------------------------------------------------------- /pyfunt/c_add_table.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class CAddTable(Module): 6 | 7 | def __init__(self): 8 | super(CAddTable, self).__init__() 9 | self.grad_input = None 10 | 11 | def update_output(self, x): 12 | self.output = np.sum(x, axis=0) 13 | return self.output 14 | 15 | def update_grad_input(self, x, grad_output): 16 | self.grad_input = np.zeros_like(x) 17 | for i in xrange(len(x)): 18 | self.grad_input[i] = np.copy(grad_output) 19 | return self.grad_input 20 | 21 | def reset(self): 22 | pass 23 | -------------------------------------------------------------------------------- /pyfunt/class_nll_criterion.py: -------------------------------------------------------------------------------- 1 | from criterion import Criterion 2 | import numpy as np 3 | 4 | 5 | class ClassNLLCriterion(Criterion): 6 | 7 | """docstring for ClassNLLCriterion""" 8 | 9 | def __init__(self, weights=None, size_average=None): 10 | super(ClassNLLCriterion, self).__init__() 11 | if size_average: 12 | self.size_average = size_average 13 | else: 14 | self.size_average = True 15 | 16 | if weights: 17 | # assert(weights:dim() == 1, "weights input should be 1-D Tensor") 18 | self.weights = weights 19 | self.output_tensor = np.zeros(1) 20 | self.total_weight_tensor = np.ones(1) 21 | self.target = np.zeros(1) # , dtype=np.long) 22 | 23 | def __len__(self): 24 | if self.weights: 25 | return len(self.weights) 26 | else: 27 | return 0 28 | 29 | def update_output(self, x, target): 30 | 31 | # probs=np.exp(scores - np.max(scores, axis=1, keepdims=True)) 32 | # probs /= np.sum(probs, axis=1, keepdims=True) 33 | # return probs 34 | # # N = x.shape[0] 35 | # # loss = -np.mean(self.logp[np.arange(N), target]) 36 | # # self.output = -x 37 | # # return loss 38 | 39 | # N = x.shape[0] 40 | # xdev = x - x.max(1, keepdims=True) 41 | # self.logp = xdev - np.log(np.sum(np.exp(xdev), axis=1, keepdims=True)) 42 | # loss = -np.mean(self.logp[np.arange(N), target]) 43 | # self.output = loss 44 | # import pdb; pdb.set_trace() 45 | # return self.output 46 | 47 | self.output = - np.mean(x[np.arange(x.shape[0]), target]) 48 | return self.output 49 | 50 | def update_grad_input(self, x, target): 51 | N = x.shape[0] 52 | dx = np.exp(x) 53 | dx[np.arange(N), target] -= 1 54 | dx /= N 55 | self.grad_input = dx 56 | #import pdb; pdb.set_trace() 57 | return self.grad_input 58 | -------------------------------------------------------------------------------- /pyfunt/concat_table.py: -------------------------------------------------------------------------------- 1 | from container import Container 2 | import numpy as np 3 | 4 | 5 | class ConcatTable(Container): 6 | 7 | """docstring for ConcatTable""" 8 | 9 | def __init__(self): 10 | super(ConcatTable, self).__init__() 11 | self.modules = [] 12 | 13 | def update_output(self, x): 14 | self.output = [] 15 | for i in xrange(len(self.modules)): 16 | current_output = self.rethrow_errors( 17 | self.modules[i], i, 'update_output', x) 18 | self.output.append(current_output) 19 | # if i == 0: 20 | # self.output = current_output 21 | # else: 22 | # np.concatenate((self.output, current_output), axis=0) 23 | # self.output.append(self.rethrow_errors( 24 | # self.modules[i], i, 'update_output', x)) 25 | return self.output 26 | 27 | def _backward(self, method, x, grad_output, scale=1): 28 | for i, module in enumerate(self.modules): 29 | if method == 'update_grad_input': 30 | args = self.modules[i], i, method, x, grad_output[i] 31 | else: 32 | args = self.modules[i], i, method, x, grad_output[i], scale 33 | current_grad_input = self.rethrow_errors(*args) 34 | if i == 0: 35 | self.grad_input = current_grad_input 36 | else: 37 | self.grad_input += current_grad_input 38 | return self.grad_input 39 | 40 | def update_grad_input(self, x, grad_output): 41 | return self._backward('update_grad_input', x, grad_output) 42 | 43 | def backward(self, x, grad_output, scale=1): 44 | return self._backward('backward', x, grad_output, scale) 45 | 46 | def acc_grad_parameters(self, x, grad_output, scale=1): 47 | for i, module in enumerate(self.modules): 48 | self.rethrow_errors( 49 | self.modules[i], i, 'acc_grad_parameters', x, grad_output[i], scale) 50 | -------------------------------------------------------------------------------- /pyfunt/container.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from module import Module 3 | from types import DictType 4 | import sys 5 | import traceback 6 | import numpy as np 7 | import abc 8 | 9 | 10 | class Container(Module): 11 | """docstring for Container""" 12 | def __init__(self): 13 | super(Container, self).__init__() 14 | self.modules = [] 15 | 16 | def add(self, module): 17 | self.modules.append(module) 18 | return self 19 | 20 | def get(self, index): 21 | return self.modules[index] 22 | 23 | def size(self): 24 | return len(self.modules) 25 | 26 | def rethrow_errors(self, module, module_index, func_name, *args): 27 | def handle_error(err): 28 | # TODO 29 | return err 30 | func = getattr(module, func_name) 31 | try: 32 | result = func(*args) 33 | except Exception as e: 34 | print('In %d module (%s) of %s:' % (module_index, type(module).__name__, type(self).__name__)) 35 | traceback.print_exc() 36 | raise e 37 | 38 | return result 39 | 40 | def apply_to_modules(self, func): 41 | for module in self.modules: 42 | func(module) 43 | 44 | def zero_grad_parameters(self): 45 | self.apply_to_modules(lambda module: module.zero_grad_parameters()) 46 | 47 | def update_parameters(self, lr): 48 | self.apply_to_modules(lambda module: module.update_parameters(lr)) 49 | 50 | def training(self): 51 | super(Container, self).training() 52 | self.apply_to_modules(lambda module: module.training()) 53 | 54 | def evaluate(self): 55 | super(Container, self).evaluate() 56 | self.apply_to_modules(lambda module: module.evaluate()) 57 | 58 | def share(self, mlp, args): 59 | pass 60 | 61 | def reset(self, stdv): 62 | self.apply_to_modules(lambda module: module.reset(stdv)) 63 | 64 | def parameters(self): 65 | def tinsert(to, _from): 66 | if isinstance(_from, list): 67 | for i in xrange(len(_from)): 68 | tinsert(to, _from[i]) 69 | else: 70 | to.append(_from) 71 | 72 | w = [] 73 | gw = [] 74 | for i in xrange(len(self.modules)): 75 | 76 | res = self.modules[i].parameters() 77 | if res: 78 | mw, mgw = res 79 | tinsert(w, mw) 80 | tinsert(gw, mgw) 81 | return w, gw 82 | 83 | def clear_state(self): 84 | pass 85 | -------------------------------------------------------------------------------- /pyfunt/criterion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import abc 3 | 4 | 5 | class Criterion(object): 6 | __metaclass__ = abc.ABCMeta 7 | """docstring for Criterion""" 8 | def __init__(self): 9 | super(Criterion, self).__init__() 10 | self.output = 0 11 | 12 | @abc.abstractmethod 13 | def update_output(self, x, target): 14 | pass 15 | 16 | def forward(self, x, target): 17 | return self.update_output(x, target) 18 | 19 | def backward(self, x, target): 20 | return self.update_grad_input(x, target) 21 | 22 | @abc.abstractmethod 23 | def update_grad_input(self, x, target): 24 | pass 25 | 26 | def clone(self): 27 | pass 28 | 29 | def __call__(self, x, target): 30 | self.output = self.forward(x, target) 31 | self.grad_input = self.backward(x, target) 32 | return self.output, self.grad_input 33 | -------------------------------------------------------------------------------- /pyfunt/dropout.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Dropout(Module): 6 | 7 | def __init__(self, p=0.5, v1=False, stochastic_inference=False): 8 | super(Dropout, self).__init__() 9 | self.p = p 10 | self.train = True 11 | self.stochastic_inference = stochastic_inference 12 | # version 2 scales output during training instead of evaluation 13 | self.v2 = not v1 14 | if self.p >= 1 or self.p < 0: 15 | raise(' illegal percentage, must be 0 <= p < 1') 16 | self.noise = None 17 | 18 | def update_output(self, x): 19 | self.output = x.copy() 20 | if self.p > 0: 21 | if self.train or self.stochastic_inference: 22 | self.noise = np.random.binomial(1, p=1-self.p, size=x.shape) # bernoulli 23 | if self.v2: 24 | self.noise /= 1-self.p 25 | self.output *= self.noise 26 | elif not self.v2: 27 | self.output *= 1-self.p 28 | return self.output 29 | 30 | def update_grad_input(self, x, grad_output): 31 | self.grad_input = grad_output.copy() 32 | if self.train: 33 | if self.p > 0: 34 | self.grad_input *= self.noise 35 | else: 36 | if not self.v2 and self.p > 0: 37 | self.grad_input *= 1-self.p 38 | return self.grad_input 39 | 40 | def __str__(self): 41 | return '%s(%f)' % (type(self), self.p) 42 | 43 | def reset(self): 44 | pass 45 | -------------------------------------------------------------------------------- /pyfunt/examples/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /pyfunt/examples/model_testing/test_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pyfunt import (SpatialConvolution, SpatialBatchNormalization, 3 | SpatialAveragePooling, Sequential, ReLU, Linear, 4 | Reshape, LogSoftMax) 5 | from pyfunt.utils import eval_numerical_gradient_array 6 | 7 | def rel_error(x, y): 8 | """ returns relative error """ 9 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 10 | 11 | x = np.random.randn(3, 4, 8, 8) 12 | # x = np.random.randn(3, 2, 8, 8) 13 | dout = np.random.randn(3, 10) 14 | pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} 15 | 16 | s = Sequential() 17 | s.add(SpatialConvolution(4, 2, 1, 1, 1, 1)) 18 | s.add(SpatialAveragePooling(2, 2, 2, 2, 0, 0)) 19 | s.add(SpatialBatchNormalization(2)) 20 | s.add(ReLU()) 21 | s.add(Reshape(2*4*4)) 22 | s.add(Linear(2*4*4, 10)) 23 | s.add(LogSoftMax()) 24 | 25 | dx_num = eval_numerical_gradient_array(lambda x: s.update_output(x), x, dout) 26 | 27 | out = s.update_output(x) 28 | dx = s.update_grad_input(x, dout) 29 | # Your error should be around 1e-8 30 | print('Testing net backward function:') 31 | print('dx error: ', rel_error(dx, dx_num)) 32 | # import pdb; pdb.set_trace() 33 | -------------------------------------------------------------------------------- /pyfunt/examples/residual_networks/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /pyfunt/examples/residual_networks/resnet.py: -------------------------------------------------------------------------------- 1 | from pyfunt import (SpatialConvolution, SpatialBatchNormalization, 2 | SpatialAveragePooling, Sequential, ReLU, Linear, 3 | Reshape, LogSoftMax, Padding, Identity, ConcatTable, 4 | CAddTable) 5 | 6 | 7 | def residual_layer(n_channels, n_out_channels=None, stride=None): 8 | n_out_channels = n_out_channels or n_channels 9 | stride = stride or 1 10 | 11 | convs = Sequential() 12 | add = convs.add 13 | add(SpatialConvolution( 14 | n_channels, n_out_channels, 3, 3, stride, stride, 1, 1)) 15 | add(SpatialBatchNormalization(n_out_channels)) 16 | add(SpatialConvolution(n_out_channels, n_out_channels, 3, 3, 1, 1, 1, 1)) 17 | add(SpatialBatchNormalization(n_out_channels)) 18 | 19 | if stride > 1: 20 | shortcut = Sequential() 21 | shortcut.add(SpatialAveragePooling(2, 2, stride, stride)) 22 | shortcut.add(Padding(1, (n_out_channels - n_channels)/2, 3)) 23 | else: 24 | shortcut = Identity() 25 | 26 | res = Sequential() 27 | res.add(ConcatTable().add(convs).add(shortcut)).add(CAddTable()) 28 | # https://github.com/szagoruyko/wide-residual-networks/blob/master/models/resnet-pre-act.lua 29 | 30 | res.add(ReLU(True)) 31 | 32 | return res 33 | 34 | 35 | def resnet(n_size, num_starting_filters, reg): 36 | ''' 37 | Implementation of ["Deep Residual Learning for Image Recognition",Kaiming \ 38 | He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - http://arxiv.org/abs/1512.03385 39 | 40 | Inspired by https://github.com/gcr/torch-residual-networks 41 | 42 | This network should model a similiar behaviour of gcr's implementation. 43 | Check https://github.com/gcr/torch-residual-networks for more infos about \ 44 | the structure. 45 | 46 | The network operates on minibatches of data that have shape (N, C, H, W) 47 | consisting of N images, each with height H and width W and with C input 48 | channels. 49 | 50 | The network has, like in the reference paper (except for the final optional 51 | affine layers), (6*n)+2 layers, composed as below: 52 | 53 | (image_dim: 3, 32, 32; F=16) 54 | (input_dim: N, *image_dim) 55 | INPUT 56 | | 57 | v 58 | +-------------------+ 59 | |conv[F, *image_dim]| (out_shape: N, 16, 32, 32) 60 | +-------------------+ 61 | | 62 | v 63 | +-------------------------+ 64 | |n * res_block[F, F, 3, 3]| (out_shape: N, 16, 32, 32) 65 | +-------------------------+ 66 | | 67 | v 68 | +-------------------------+ 69 | |res_block[2*F, F, 3, 3] | (out_shape: N, 32, 16, 16) 70 | +-------------------------+ 71 | | 72 | v 73 | +---------------------------------+ 74 | |(n-1) * res_block[2*F, 2*F, 3, 3]| (out_shape: N, 32, 16, 16) 75 | +---------------------------------+ 76 | | 77 | v 78 | +-------------------------+ 79 | |res_block[4*F, 2*F, 3, 3]| (out_shape: N, 64, 8, 8) 80 | +-------------------------+ 81 | | 82 | v 83 | +---------------------------------+ 84 | |(n-1) * res_block[4*F, 4*F, 3, 3]| (out_shape: N, 64, 8, 8) 85 | +---------------------------------+ 86 | | 87 | v 88 | +-------------+ 89 | |pool[1, 8, 8]| (out_shape: N, 64, 1, 1) 90 | +-------------+ 91 | | 92 | v 93 | +- - - - - - - - -+ 94 | |(opt) m * affine | (out_shape: N, 64, 1, 1) 95 | +- - - - - - - - -+ 96 | | 97 | v 98 | +-------+ 99 | |softmax| (out_shape: N, num_classes) 100 | +-------+ 101 | | 102 | v 103 | OUTPUT 104 | 105 | Every convolution layer has a pad=1 and stride=1, except for the dimension 106 | enhancning layers which has a stride of 2 to mantain the computational 107 | complexity. 108 | Optionally, there is the possibility of setting m affine layers immediatley 109 | before the softmax layer by setting the hidden_dims parameter, which should 110 | be a list of integers representing the numbe of neurons for each affine 111 | layer. 112 | 113 | Each residual block is composed as below: 114 | 115 | Input 116 | | 117 | ,-------+-----. 118 | Downsampling 3x3 convolution+dimensionality reduction 119 | | | 120 | v v 121 | Zero-padding 3x3 convolution 122 | | | 123 | `-----( Add )---' 124 | | 125 | Output 126 | 127 | After every layer, a batch normalization with momentum .1 is applied. 128 | 129 | Weight initialization (check also layers/init.py and layers/README.md): 130 | - Inizialize the weights and biases for the affine layers in the same 131 | way of torch's default mode by calling _init_affine_wb that returns a 132 | tuple (w, b). 133 | - Inizialize the weights for the conv layers in the same 134 | way of torch's default mode by calling init_conv_w. 135 | - Inizialize the weights for the conv layers in the same 136 | way of kaiming's mode by calling init_conv_w_kaiming 137 | (http://arxiv.org/abs/1502.01852 and 138 | http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-\ 139 | initialization) 140 | - Initialize batch normalization layer's weights like torch's default by 141 | calling init_bn_w 142 | - Initialize batch normalization layer's weights like cgr's first resblock\ 143 | 's bn (https://github.com/gcr/torch-residual-networks/blob/master/residual\ 144 | -layers.lua#L57-L59) by calling init_bn_w_gcr. 145 | 146 | num_filters=[16, 16, 32, 32, 64, 64], 147 | Initialize a new network. 148 | 149 | Inputs: 150 | - input_dim: Tuple (C, H, W) giving size of input data. 151 | - num_starting_filters: Number of filters for the first convolution 152 | layer. 153 | - n_size: nSize for the residual network like in the reference paper 154 | - hidden_dims: Optional list number of units to use in the 155 | fully-connected hidden layers between the fianl pool and the sofmatx 156 | layer. 157 | - num_classes: Number of scores to produce from the final affine layer. 158 | - reg: Scalar giving L2 regularization strength 159 | - dtype: numpy datatype to use for computation. 160 | ''' 161 | 162 | nfs = num_starting_filters 163 | model = Sequential() 164 | add = model.add 165 | add(SpatialConvolution(3, nfs, 3, 3, 1, 1, 1, 1)) 166 | add(SpatialBatchNormalization(nfs)) 167 | add(ReLU()) 168 | 169 | for i in xrange(1, n_size): 170 | add(residual_layer(nfs)) 171 | add(residual_layer(nfs, 2*nfs, 2)) 172 | 173 | for i in xrange(1, n_size-1): 174 | add(residual_layer(2*nfs)) 175 | add(residual_layer(2*nfs, 4*nfs, 2)) 176 | 177 | for i in xrange(1, n_size-1): 178 | add(residual_layer(4*nfs)) 179 | 180 | add(SpatialAveragePooling(8, 8)) 181 | add(Reshape(nfs*4)) 182 | add(Linear(nfs*4, 10)) 183 | add(LogSoftMax()) 184 | return model 185 | -------------------------------------------------------------------------------- /pyfunt/examples/residual_networks/train-cifar.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import uuid 5 | import numpy as np 6 | # import matplotlib.pyplot as plt 7 | from pydatset.cifar10 import get_CIFAR10_data 8 | from pydatset.data_augmentation import (random_flips, 9 | random_crops) 10 | from resnet import resnet 11 | from pyfunt.solver import Solver as Solver 12 | 13 | import inspect 14 | import argparse 15 | 16 | from pyfunt.class_nll_criterion import ClassNLLCriterion 17 | 18 | np.seterr(all='raise') 19 | 20 | np.random.seed(0) 21 | 22 | DATA_PATH = '../CIFAR_DATASET_PATH' 23 | 24 | path_set = False 25 | while not path_set: 26 | try: 27 | with open(DATA_PATH) as f: 28 | DATASET_PATH = f.read() 29 | path_set = True 30 | except: 31 | data_path = raw_input('Enter the path for the CIFAR10 dataset: ') 32 | with open(DATA_PATH, "w") as f: 33 | f.write(data_path) 34 | 35 | 36 | EXPERIMENT_PATH = '../Experiments/' + str(uuid.uuid4())[-10:] 37 | 38 | # residual network constants 39 | NSIZE = 3 40 | N_STARTING_FILTERS = 16 41 | 42 | # solver constants 43 | NUM_PROCESSES = 4 44 | 45 | NUM_TRAIN = 50000 46 | NUM_TEST = 10000 47 | 48 | WEIGHT_DEACY = 1e-4 49 | REGULARIZATION = 0 50 | LEARNING_RATE = .1 51 | MOMENTUM = .99 52 | NUM_EPOCHS = 160 53 | BATCH_SIZE = 64 54 | CHECKPOINT_EVERY = 20 55 | 56 | XH, XW = 32, 32 57 | 58 | args = argparse.Namespace() 59 | 60 | 61 | def parse_args(): 62 | """ 63 | Parse the options for running the Residual Network on CIFAR-10. 64 | """ 65 | desc = 'Train a Residual Network on CIFAR-10.' 66 | parser = argparse.ArgumentParser(description=desc, 67 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 68 | add = parser.add_argument 69 | add('--dataset_path', 70 | metavar='DIRECOTRY', 71 | default=DATASET_PATH, 72 | type=str, 73 | help='directory where results will be saved') 74 | add('--experiment_path', 75 | metavar='DIRECOTRY', 76 | default=EXPERIMENT_PATH, 77 | type=str, 78 | help='directory where results will be saved') 79 | add('-load', '--load_checkpoint', 80 | metavar='DIRECOTRY', 81 | default='', 82 | type=str, 83 | help='load checkpoint from load_checkpoint') 84 | add('--n_size', 85 | metavar='INT', 86 | default=NSIZE, 87 | type=int, 88 | help='Network will have (6*n)+2 conv layers') 89 | add('--n_starting_filters', 90 | metavar='INT', 91 | default=N_STARTING_FILTERS, 92 | type=int, 93 | help='Network will starts with those number of filters') 94 | add('--n_processes', '-np', 95 | metavar='INT', 96 | default=NUM_PROCESSES, 97 | type=int, 98 | help='Number of processes for each step') 99 | add('--n_train', 100 | metavar='INT', 101 | default=NUM_TRAIN, 102 | type=int, 103 | help='Number of total images to select for training') 104 | add('--n_test', 105 | metavar='INT', 106 | default=NUM_TEST, 107 | type=int, 108 | help='Number of total images to select for validation') 109 | add('-wd', '--weight_decay', 110 | metavar='FLOAT', 111 | default=WEIGHT_DEACY, 112 | type=float, 113 | help='Weight decay for sgd_th') 114 | add('-reg', '--network_regularization', 115 | metavar='FLOAT', 116 | default=REGULARIZATION, 117 | type=float, 118 | help='L2 regularization term for the network') 119 | add('-lr', '--learning_rate', 120 | metavar='FLOAT', 121 | default=LEARNING_RATE, 122 | type=float, 123 | help='Learning rate to use with sgd_th') 124 | add('-mom', '--momentum', 125 | metavar='FLOAT', 126 | default=MOMENTUM, 127 | type=float, 128 | help='Nesterov momentum use with sgd_th') 129 | add('--n_epochs', '-nep', 130 | metavar='INT', 131 | default=NUM_EPOCHS, 132 | type=int, 133 | help='Number of epochs for training') 134 | add('--batch_size', '-bs', 135 | metavar='INT', 136 | default=BATCH_SIZE, 137 | type=int, 138 | help='Number of images for each iteration') 139 | add('--checkpoint_every', '-cp', 140 | metavar='INT', 141 | default=CHECKPOINT_EVERY, 142 | type=int, 143 | help='Number of epochs between each checkpoint') 144 | parser.parse_args(namespace=args) 145 | assert not (args.network_regularization and args.weight_decay) 146 | 147 | 148 | def data_augm(batch): 149 | p = 2 150 | h, w = XH, XW 151 | 152 | # batch = random_tint(batch) 153 | # batch = random_contrast(batch) 154 | batch = random_flips(batch) 155 | # batch = random_rotate(batch, 10) 156 | batch = random_crops(batch, (h, w), pad=p) 157 | return batch 158 | 159 | 160 | def custom_update_decay(epoch): 161 | if epoch in (80, 120): 162 | return 0.1 163 | return 1 164 | 165 | 166 | def print_infos(solver): 167 | print('Model: \n%s' % solver.model) 168 | 169 | print('Solver: \n%s' % solver) 170 | 171 | print('Data Augmentation Function: \n') 172 | print(''.join(['\t' + i for i in inspect.getsourcelines(data_augm)[0]])) 173 | print('Custom Weight Decay Update Rule: \n') 174 | print(''.join(['\t' + i for i in inspect.getsourcelines(custom_update_decay)[0]])) 175 | 176 | 177 | def main(): 178 | parse_args() 179 | 180 | data = get_CIFAR10_data(args.dataset_path, 181 | num_training=args.n_train, num_validation=0, num_test=args.n_test) 182 | 183 | data = { 184 | 'X_train': data['X_train'], 185 | 'y_train': data['y_train'], 186 | 'X_val': data['X_test'], 187 | 'y_val': data['y_test'], 188 | } 189 | 190 | exp_path = args.experiment_path 191 | nf = args.n_starting_filters 192 | reg = args.network_regularization 193 | 194 | model = resnet(n_size=args.n_size, 195 | num_starting_filters=nf, 196 | reg=reg) 197 | 198 | wd = args.weight_decay 199 | lr = args.learning_rate 200 | mom = args.momentum 201 | 202 | optim_config = {'learning_rate': lr, 'nesterov': True, 203 | 'momentum': mom, 'weight_decay': wd} 204 | 205 | epochs = args.n_epochs 206 | bs = args.batch_size 207 | num_p = args.n_processes 208 | cp = args.checkpoint_every 209 | criterion = ClassNLLCriterion() 210 | solver = Solver(model, data, args.load_checkpoint, 211 | criterion=criterion, 212 | num_epochs=epochs, batch_size=bs, # 20 213 | update_rule='sgd_th', 214 | optim_config=optim_config, 215 | custom_update_ld=custom_update_decay, 216 | batch_augment_func=data_augm, 217 | checkpoint_every=cp, 218 | num_processes=num_p) 219 | 220 | print_infos(solver) 221 | solver.train() 222 | 223 | solver.export_model(exp_path) 224 | solver.export_histories(exp_path) 225 | 226 | print('finish') 227 | 228 | 229 | if __name__ == '__main__': 230 | main() 231 | -------------------------------------------------------------------------------- /pyfunt/identity.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | 3 | 4 | class Identity(Module): 5 | """docstring for Identity""" 6 | def __init__(self): 7 | super(Identity, self).__init__() 8 | 9 | def update_output(self, x): 10 | self.output = x.copy() 11 | return self.output 12 | 13 | def update_grad_input(self, x, grad_output): 14 | self.grad_input = grad_output.copy() 15 | return self.grad_input 16 | 17 | def clear_state(self): 18 | pass 19 | 20 | def reset(self): 21 | pass 22 | -------------------------------------------------------------------------------- /pyfunt/im2col.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | 5 | 6 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 7 | # First figure out what the size of the output should be 8 | N, C, H, W = x_shape 9 | assert (H + 2 * padding - field_height) % stride == 0 10 | assert (W + 2 * padding - field_height) % stride == 0 11 | out_height = (H + 2 * padding - field_height) / stride + 1 12 | out_width = (W + 2 * padding - field_width) / stride + 1 13 | 14 | i0 = np.repeat(np.arange(field_height), field_width) 15 | i0 = np.tile(i0, C) 16 | i1 = stride * np.repeat(np.arange(out_height), out_width) 17 | j0 = np.tile(np.arange(field_width), field_height * C) 18 | j1 = stride * np.tile(np.arange(out_width), out_height) 19 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 20 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 21 | 22 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 23 | 24 | return (k, i, j) 25 | 26 | 27 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 28 | ''' An implementation of im2col based on some fancy indexing ''' 29 | # Zero-pad the input 30 | p = padding 31 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 32 | 33 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 34 | stride) 35 | 36 | cols = x_padded[:, k, i, j] 37 | C = x.shape[1] 38 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 39 | return cols 40 | 41 | 42 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 43 | stride=1): 44 | ''' An implementation of col2im based on fancy indexing and np.add.at ''' 45 | N, C, H, W = x_shape 46 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 47 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 48 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 49 | stride) 50 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 51 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 52 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 53 | if padding == 0: 54 | return x_padded 55 | return x_padded[:, :, padding:-padding, padding:-padding] 56 | 57 | -------------------------------------------------------------------------------- /pyfunt/im2col_cyt.pyx: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | cimport numpy as np 4 | cimport cython 5 | 6 | # DTYPE = np.float64 7 | # ctypedef np.float64_t DTYPE_t 8 | 9 | ctypedef fused DTYPE_t: 10 | np.float32_t 11 | np.float64_t 12 | 13 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 14 | int field_width, int padding, int stride): 15 | cdef int N = x.shape[0] 16 | cdef int C = x.shape[1] 17 | cdef int H = x.shape[2] 18 | cdef int W = x.shape[3] 19 | 20 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 21 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 22 | 23 | cdef int p = padding 24 | cdef double c = 0.0 25 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 26 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant', constant_values=(c,)) 27 | 28 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 29 | (C * field_height * field_width, N * HH * WW), 30 | dtype=x.dtype) 31 | 32 | # Moving the inner loop to a C function with no bounds checking works, but does 33 | # not seem to help performance in any measurable way. 34 | 35 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 36 | field_height, field_width, padding, stride) 37 | return cols 38 | 39 | 40 | @cython.boundscheck(False) 41 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 42 | np.ndarray[DTYPE_t, ndim=4] x_padded, 43 | int N, int C, int H, int W, int HH, int WW, 44 | int field_height, int field_width, int padding, int stride) except? -1: 45 | cdef int c, ii, jj, row, yy, xx, i, col 46 | 47 | for c in range(C): 48 | for yy in range(HH): 49 | for xx in range(WW): 50 | for ii in range(field_height): 51 | for jj in range(field_width): 52 | row = c * field_width * field_height + ii * field_height + jj 53 | for i in range(N): 54 | col = yy * WW * N + xx * N + i 55 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 56 | 57 | 58 | 59 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 60 | int field_height, int field_width, int padding, int stride): 61 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 62 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 63 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 64 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 65 | dtype=cols.dtype) 66 | 67 | # Moving the inner loop to a C-function with no bounds checking improves 68 | # performance quite a bit for col2im. 69 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 70 | field_height, field_width, padding, stride) 71 | if padding > 0: 72 | return x_padded[:, :, padding:-padding, padding:-padding] 73 | return x_padded 74 | 75 | 76 | @cython.boundscheck(False) 77 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 78 | np.ndarray[DTYPE_t, ndim=4] x_padded, 79 | int N, int C, int H, int W, int HH, int WW, 80 | int field_height, int field_width, int padding, int stride) except? -1: 81 | cdef int c, ii, jj, row, yy, xx, i, col 82 | 83 | for c in range(C): 84 | for ii in range(field_height): 85 | for jj in range(field_width): 86 | row = c * field_width * field_height + ii * field_height + jj 87 | for yy in range(HH): 88 | for xx in range(WW): 89 | for i in range(N): 90 | col = yy * WW * N + xx * N + i 91 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 92 | 93 | 94 | @cython.boundscheck(False) 95 | @cython.wraparound(False) 96 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 97 | np.ndarray[DTYPE_t, ndim=4] x_padded, 98 | int N, int C, int H, int W, int HH, int WW, 99 | int out_h, int out_w, int pad, int stride): 100 | 101 | cdef int c, hh, ww, n, h, w 102 | for n in range(N): 103 | for c in range(C): 104 | for hh in range(HH): 105 | for ww in range(WW): 106 | for h in range(out_h): 107 | for w in range(out_w): 108 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 109 | 110 | 111 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 112 | int HH, int WW, int pad, int stride): 113 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 114 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 115 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 116 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 117 | dtype=cols.dtype) 118 | 119 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 120 | 121 | if pad > 0: 122 | return x_padded[:, :, pad:-pad, pad:-pad] 123 | return x_padded -------------------------------------------------------------------------------- /pyfunt/linear.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Linear(Module): 6 | 7 | def __init__(self, input_size, output_size, bias=True): 8 | super(Linear, self).__init__() 9 | self.weight = np.ndarray((input_size, output_size)) 10 | self.grad_weight = np.ndarray((input_size, output_size)) 11 | if bias: 12 | self.bias = np.ndarray(output_size) 13 | self.grad_bias = np.ndarray(output_size) 14 | else: 15 | self.bias = None 16 | self.grad_bias = None 17 | self.reset() 18 | 19 | def no_bias(self): 20 | self.bias = None 21 | self.grad_bias = None 22 | 23 | def reset(self, stdv=None): 24 | if stdv: 25 | stdv = stdv * np.sqrt(3) 26 | else: 27 | std = 1./np.sqrt(self.weight.shape[0]) 28 | # stdv = 1./np.sqrt(self.weight.shape[1]) 29 | self.weight = np.random.uniform(-std, std, self.weight.shape) 30 | if self.bias is not None: 31 | self.bias = np.random.uniform(-std, std, self.bias.shape) 32 | 33 | def update_output(self, x): 34 | out = x.reshape(x.shape[0], -1) 35 | out = out.dot(self.weight) 36 | if self.bias is not None: 37 | out += self.bias 38 | self.output = out 39 | return self.output 40 | 41 | def update_grad_input(self, x, grad_output): 42 | dx = grad_output.dot(self.weight.T).reshape(x.shape) 43 | self.grad_weight[:] = x.reshape(x.shape[0], -1).T.dot(grad_output)[:] 44 | if self.bias is not None: 45 | self.grad_bias[:] = np.sum(grad_output, axis=0)[:] 46 | self.grad_input = dx 47 | return dx 48 | 49 | def acc_grad_parameters(self, x, grad_output, scale=None): 50 | pass 51 | -------------------------------------------------------------------------------- /pyfunt/log_soft_max.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class LogSoftMax(Module): 6 | """docstring for LogSoftMax""" 7 | def __init__(self): 8 | super(LogSoftMax, self).__init__() 9 | 10 | def update_output(self, x): 11 | max_input = x.max(1, keepdims=True) 12 | log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True) 13 | log_sum = max_input + np.log(log_sum) 14 | self.output = x - log_sum 15 | return self.output 16 | 17 | def update_grad_input(self, x, grad_output): 18 | _sum = np.sum(grad_output, axis=1, keepdims=True) 19 | 20 | max_input = x.max(1, keepdims=True) 21 | log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True) 22 | log_sum = max_input + np.log(log_sum) 23 | self.output = x - log_sum 24 | 25 | self.grad_input = grad_output - np.exp(self.output)*_sum 26 | return self.grad_input 27 | 28 | def reset(self): 29 | pass 30 | -------------------------------------------------------------------------------- /pyfunt/module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import abc 5 | import numpy as np 6 | from copy import deepcopy 7 | from types import DictType 8 | 9 | 10 | class Module(object): 11 | __metaclass__ = abc.ABCMeta 12 | 13 | def __init__(self): 14 | self.grad_input = None # np.ndarray() 15 | self.output = None # np.ndarray() 16 | self._type = np.float 17 | 18 | def parameters(self): 19 | if hasattr(self, 'weight'): 20 | if self.weight is not None and self.bias is not None: 21 | return [self.weight, self.bias], [self.grad_weight, self.grad_bias] 22 | if self.weight is not None: 23 | return [self.weight], [self.grad_weight] 24 | if self.bias is not None: 25 | return [self.bias], [self.grad_bias] 26 | 27 | @abc.abstractmethod 28 | def update_output(self, _input=None): 29 | # return self.output 30 | raise NotImplementedError() 31 | 32 | def forward(self, x): 33 | return self.update_output(x) 34 | 35 | def backward(self, _input, grad_output, scale=1): 36 | self.grad_input = self.update_grad_input(_input, grad_output) 37 | self.acc_grad_parameters(_input, grad_output, scale) 38 | return self.grad_input 39 | 40 | def backward_update(self, _input, grad_output, lr): 41 | grad_weight = self.grad_weight 42 | grad_bias = self.grad_bias 43 | self.grad_weight = self.weight 44 | self.grad_bias = self.bias 45 | self.acc_grad_parameters(_input, grad_output, -lr) 46 | self.grad_weight = grad_weight 47 | self.grad_bias = grad_bias 48 | 49 | @abc.abstractmethod 50 | def update_grad_input(self, _input, grad_output): 51 | # return self.grad_input 52 | raise NotImplementedError() 53 | 54 | def acc_grad_parameters(self, _input, grad_output, scale): 55 | pass 56 | 57 | def acc_update_grad_parameters(self, _input, grad_output, lr): 58 | grad_weight = self.grad_weight 59 | grad_bias = self.grad_bias 60 | self.grad_weight = self.weight 61 | self.grad_bias = self.bias 62 | self.acc_grad_parameters(_input, grad_output, -lr) 63 | self.grad_weight = grad_weight 64 | self.grad_bias = grad_bias 65 | 66 | def shared_acc_update_grad_parameters(self, _input, grad_output, lr): 67 | if self.parameters(): 68 | self.zero_grad_parameters() 69 | self.acc_grad_parameters(_input, grad_output, 1) 70 | self.update_parameters(lr) 71 | 72 | def zero_grad_parameters(self): 73 | _, grad_params = self.parameters() 74 | if grad_params: 75 | for g in grad_params: 76 | g.zero() 77 | 78 | def update_parameters(self, lr): 79 | res = self.parameters() 80 | if res: 81 | params, grad_params = res 82 | for i, p in enumerate(params): 83 | p -= lr*grad_params[i] 84 | 85 | def training(self): 86 | self.train = True 87 | 88 | def evaluate(self): 89 | self.train = False 90 | 91 | def share(self, mlp, p_names): 92 | for i, v in enumerate(p_names): 93 | if self[v] is not None: 94 | self[v].set(mlp[v]) 95 | self.acc_update_grad_parameters = self.shared_acc_update_grad_parameters 96 | mlp.acc_update_grad_parameters = self.acc_update_grad_parameters 97 | return self 98 | 99 | def clone(self, p_names=None): 100 | clone = deepcopy(self) 101 | if p_names: 102 | clone.share(self, p_names) 103 | return clone 104 | 105 | # def type(self, type=None, cache=None): 106 | # if type is not None: 107 | # return self._type 108 | # cache = cache or {} 109 | # # find all tensors and convert them 110 | # for key, param in pairs(self): 111 | # self[key] = utils.recursive_type(param, type, cache) 112 | 113 | # self._type = type 114 | # return self 115 | 116 | 117 | # function Module:float(...) 118 | # return self:type('torch.FloatTensor',...) 119 | # end 120 | 121 | # function Module:double(...) 122 | # return self:type('torch.DoubleTensor',...) 123 | # end 124 | 125 | # function Module:cuda(...) 126 | # return self:type('torch.CudaTensor',...) 127 | # end 128 | 129 | def reset(self): 130 | raise NotImplementedError() 131 | 132 | def write(self, file): 133 | np.save(file, self) 134 | 135 | def read(self, file): 136 | obj = np.load(file)[0] 137 | for k, v in enumerate(obj): 138 | self[k] = v 139 | 140 | 141 | # -- This function is not easy to understand. It works as follows: 142 | # -- 143 | # -- - gather all parameter tensors for this module (and children); 144 | # -- count all parameter values (floats) 145 | # -- - create one ginormous memory area (Storage object) with room for all 146 | # -- parameters 147 | # -- - remap each parameter tensor to point to an area within the ginormous 148 | # -- Storage, and copy it there 149 | # -- 150 | # -- It has the effect of making all parameters point to the same memory area, 151 | # -- which is then returned. 152 | # -- 153 | # -- The purpose is to allow operations over all parameters (such as momentum 154 | # -- updates and serialization), but it assumes that all parameters are of 155 | # -- the same type (and, in the case of CUDA, on the same device), which 156 | # -- is not always true. Use for_each() to iterate over this module and 157 | # -- children instead. 158 | # -- 159 | # -- Module._flattenTensorBuffer can be used by other packages (e.g. cunn) 160 | # -- to specify the type of temporary buffers. For example, the temporary 161 | # -- buffers for CudaTensor could be FloatTensor, to avoid GPU memory usage. 162 | # -- 163 | # -- TODO: This logically belongs to torch.Tensor, not nn. 164 | # Module._flattenTensorBuffer = {} 165 | 166 | 167 | # function Module.flatten(parameters) 168 | 169 | # -- returns true if tensor occupies a contiguous region of memory (no holes) 170 | # local function isCompact(tensor) 171 | # local sortedStride, perm = torch.sort( 172 | # torch.LongTensor(tensor:nDimension()):set(tensor:stride()), 1, true) 173 | # local sortedSize = torch.LongTensor(tensor:nDimension()):set( 174 | # tensor:size()):index(1, perm) 175 | # local nRealDim = torch.clamp(sortedStride, 0, 1):sum() 176 | # sortedStride = sortedStride:narrow(1, 1, nRealDim):clone() 177 | # sortedSize = sortedSize:narrow(1, 1, nRealDim):clone() 178 | # local t = tensor.new():set(tensor:storage(), 1, 179 | # sortedSize:storage(), 180 | # sortedStride:storage()) 181 | # return t:isContiguous() 182 | # end 183 | 184 | # if not parameters or #parameters == 0 then 185 | # return torch.Tensor() 186 | # end 187 | # local Tensor = parameters[1].new 188 | # local TmpTensor = Module._flattenTensorBuffer[torch.type(parameters[1])] 189 | # or Tensor 190 | 191 | # -- 1. construct the set of all unique storages referenced by parameter tensors 192 | # local storages = {} 193 | # local nParameters = 0 194 | # local parameterMeta = {} 195 | # for k = 1,#parameters do 196 | # local param = parameters[k] 197 | # local storage = parameters[k]:storage() 198 | # local storageKey = torch.pointer(storage) 199 | 200 | # if not storages[storageKey] then 201 | # storages[storageKey] = {storage, nParameters} 202 | # nParameters = nParameters + storage:size() 203 | # end 204 | 205 | # parameterMeta[k] = {storageOffset = param:storageOffset() + 206 | # storages[storageKey][2], 207 | # size = param:size(), 208 | # stride = param:stride()} 209 | # end 210 | 211 | # -- 2. construct a single tensor that will hold all the parameters 212 | # local flatParameters = TmpTensor(nParameters):zero() 213 | 214 | # -- 3. determine if there are elements in the storage that none of the 215 | # -- parameter tensors reference ('holes') 216 | # local tensorsCompact = true 217 | # for k = 1,#parameters do 218 | # local meta = parameterMeta[k] 219 | # local tmp = TmpTensor():set( 220 | # flatParameters:storage(), meta.storageOffset, meta.size, meta.stride) 221 | # tmp:fill(1) 222 | # tensorsCompact = tensorsCompact and isCompact(tmp) 223 | # end 224 | 225 | # local maskParameters = flatParameters:byte():clone() 226 | # local compactOffsets = flatParameters:long():cumsum(1) 227 | # local nUsedParameters = compactOffsets[-1] 228 | 229 | # -- 4. copy storages into the flattened parameter tensor 230 | # for _, storageAndOffset in pairs(storages) do 231 | # local storage, offset = table.unpack(storageAndOffset) 232 | # flatParameters[{{offset+1,offset+storage:size()}}]:copy(Tensor():set(storage)) 233 | # end 234 | 235 | # -- 5. allow garbage collection 236 | # storages = nil 237 | # for k = 1,#parameters do 238 | # parameters[k]:set(Tensor()) 239 | # end 240 | 241 | # -- 6. compact the flattened parameters if there were holes 242 | # if nUsedParameters ~= nParameters then 243 | # assert(tensorsCompact, 244 | # "Cannot gather tensors that are not compact") 245 | 246 | # flatParameters = TmpTensor(nUsedParameters):copy( 247 | # flatParameters:maskedSelect(maskParameters)) 248 | # for k = 1,#parameters do 249 | # parameterMeta[k].storageOffset = 250 | # compactOffsets[parameterMeta[k].storageOffset] 251 | # end 252 | # end 253 | 254 | # if TmpTensor ~= Tensor then 255 | # flatParameters = Tensor(flatParameters:nElement()):copy(flatParameters) 256 | # end 257 | 258 | # -- 7. fix up the parameter tensors to point at the flattened parameters 259 | # for k = 1,#parameters do 260 | # parameters[k]:set(flatParameters:storage(), 261 | # parameterMeta[k].storageOffset, 262 | # parameterMeta[k].size, 263 | # parameterMeta[k].stride) 264 | # end 265 | 266 | # return flatParameters 267 | # end 268 | 269 | def get_parameters(self): 270 | parameters, grad_parameters = self.parameters() 271 | #p, g = Module.flatten(parameters), Module.flatten(grad_parameters) 272 | #if not p.n_element() == g.n_element(): 273 | # raise Exception('check that you are sharing parameters and gradParameters') 274 | return parameters, grad_parameters 275 | 276 | def __call__(self, _input=None, grad_output=None): 277 | self.forward(_input) 278 | if self.grad_output: 279 | self.backward(_input, grad_output) 280 | return self.output, self.grad_input 281 | else: 282 | return self.output 283 | 284 | # Run a callback (called with the module as an argument) in preorder over this 285 | # module and its children. 286 | def apply(self, callback): 287 | callback(self) 288 | if self.modules: 289 | for module in self.modules: 290 | module.apply(callback) 291 | 292 | def find_modules(self, type_c, container): 293 | container = container or self 294 | nodes = {} 295 | containers = {} 296 | mod_type = type(self) 297 | if mod_type == type_c: 298 | nodes[len(nodes)+1] = self 299 | containers[len(containers)] = container 300 | # Recurse on nodes with 'modules' 301 | if self.modules is not None: 302 | if type(self.modules) is DictType: 303 | for i in xrange(len(self.modules)): 304 | child = self.modules[i] 305 | cur_nodes, cur_containers = child.find_modules( 306 | type_c, self) 307 | 308 | # This shouldn't happen 309 | if not len(cur_nodes) == len(cur_containers): 310 | raise Exception('Internal error: incorrect return length') 311 | 312 | # add the list items from our child to our list (ie return a 313 | # flattened table of the return nodes). 314 | for j in xrange(len(cur_nodes)): 315 | nodes[len(cur_nodes)+1] = cur_nodes[j] 316 | containers[len(containers)+1] = cur_containers[j] 317 | 318 | return nodes, containers 319 | 320 | def list_modules(self): 321 | def tinsert(to, _from): 322 | if type(_from) == DictType: 323 | for i in xrange(len(_from)): 324 | tinsert(to, _from[i]) 325 | else: 326 | to.update(_from) 327 | 328 | modules = self 329 | if self.modules: 330 | for i in xrange(len(self.modules)): 331 | modulas = self.modules[i].list_modules() 332 | if modulas: 333 | tinsert(modules, modulas) 334 | return modules 335 | 336 | def clear_state(self): 337 | return # clear utils clear(self, 'output', 'gradInput') 338 | 339 | # similar to apply, recursively goes over network and calls 340 | # a callback function which returns a new module replacing the old one 341 | 342 | def replace(self, callback): 343 | callback(self) 344 | if self.modules: 345 | for i, m in enumerate(self.modules): 346 | self.modules[i] = Module.replace(callback) 347 | -------------------------------------------------------------------------------- /pyfunt/mul_constant.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class MulConstant(Module): 6 | 7 | def __init__(self, constant_scalar): 8 | super(MulConstant, self).__init__() 9 | if not np.isscalar(constant_scalar): 10 | raise Exception('Constant is not a scalar: ' + constant_scalar) 11 | self.constant_scalar = constant_scalar 12 | 13 | def update_output(self, x): 14 | self.output = x * self.constant_scalar 15 | return self.output 16 | 17 | def update_grad_input(self, x, grad_output): 18 | self.grad_input = grad_output * self.constant_scalar 19 | return self.grad_input 20 | 21 | def validate_parameters(self): 22 | if self.inplace: 23 | if self.val > self.th: 24 | raise Exception('in-place processing requires value not exceed threshold') 25 | 26 | def reset(self): 27 | pass 28 | -------------------------------------------------------------------------------- /pyfunt/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | ''' 31 | 32 | 33 | def sgd_th(w, dw, config=None): 34 | ''' 35 | Performs stochastic gradient descent with nesterov momentum, 36 | like Torch's optim.sgd: 37 | https://github.com/torch/optim/blob/master/sgd.lua 38 | 39 | config format: 40 | - learning_rate: Scalar learning rate. 41 | - momentum: Scalar between 0 and 1 giving the momentum value. 42 | Setting momentum = 0 reduces to sgd. 43 | - nesterov: Boolean to indicate if nesterov momentum should be applied 44 | - dampening: default equal to momentum. 45 | - weight_decay: apply weight_decay in place. 46 | - state_dw: stored gradients for the next update. 47 | ''' 48 | if config is None: 49 | config = {} 50 | 51 | learning_rate = config.get('learning_rate', 1e-2) 52 | momentum = config.get('momentum', 0) 53 | nesterov = config.get('nesterov', False) 54 | dampening = config.get('dampening', 0) 55 | weight_decay = config.get('weight_decay', 0) 56 | state_dw = config.get('state_dw', None) 57 | assert (not nesterov or (momentum > 0 and dampening == 0) 58 | ), 'Nesterov momentum requires a momentum and zero dampening' 59 | dampening = dampening or momentum 60 | dw = dw.copy() 61 | if weight_decay: 62 | dw += weight_decay * w 63 | 64 | if momentum: 65 | if state_dw is None: 66 | state_dw = dw 67 | else: 68 | state_dw *= momentum 69 | state_dw += (1 - dampening) * dw 70 | if nesterov: 71 | dw = dw + momentum * state_dw 72 | else: 73 | dw = state_dw 74 | 75 | next_w = w - learning_rate * dw 76 | 77 | config['state_dw'] = state_dw 78 | 79 | return next_w, config 80 | 81 | 82 | def nesterov(w, dw, config=None): 83 | ''' 84 | Performs stochastic gradient descent with nesterov momentum. 85 | 86 | config format: 87 | - learning_rate: Scalar learning rate. 88 | - momentum: Scalar between 0 and 1 giving the momentum value. 89 | Setting momentum = 0 reduces to sgd. 90 | - velocity: A numpy array of the same shape as w and dw used to store a moving 91 | average of the gradients. 92 | ''' 93 | if config is None: 94 | config = {} 95 | config.setdefault('learning_rate', 1e-2) 96 | config.setdefault('momentum', 0.9) 97 | v = config.get('velocity', np.zeros_like(w, dtype=np.float64)) 98 | 99 | next_w = None 100 | prev_v = v 101 | v = config['momentum'] * v - config['learning_rate'] * dw 102 | next_w = w - config['momentum'] * prev_v + (1 + config['momentum']) * v 103 | config['velocity'] = v 104 | 105 | return next_w, config 106 | 107 | 108 | def sgd(w, dw, config=None, p=-1): 109 | ''' 110 | Performs vanilla stochastic gradient descent. 111 | 112 | config format: 113 | - learning_rate: Scalar learning rate. 114 | ''' 115 | if config is None: 116 | config = {} 117 | config.setdefault('learning_rate', 1e-2) 118 | 119 | w -= config['learning_rate'] * dw 120 | return w, config 121 | 122 | 123 | def sgd_momentum(w, dw, config=None): 124 | ''' 125 | Performs stochastic gradient descent with momentum. 126 | 127 | config format: 128 | - learning_rate: Scalar learning rate. 129 | - momentum: Scalar between 0 and 1 giving the momentum value. 130 | Setting momentum = 0 reduces to sgd. 131 | - velocity: A numpy array of the same shape as w and dw used to store a moving 132 | average of the gradients. 133 | ''' 134 | if config is None: 135 | config = {} 136 | config.setdefault('learning_rate', 1e-2) 137 | config.setdefault('momentum', 0.9) 138 | v = config.get('velocity', np.zeros_like(w)) 139 | 140 | next_w = None 141 | v = config['momentum'] * v + config['learning_rate'] * dw 142 | next_w = w - v 143 | config['velocity'] = v 144 | 145 | return next_w, config 146 | 147 | 148 | 149 | def rmsprop(x, dx, config=None): 150 | ''' 151 | Uses the RMSProp update rule, which uses a moving average of squared gradient 152 | values to set adaptive per-parameter learning rates. 153 | 154 | config format: 155 | - learning_rate: Scalar learning rate. 156 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 157 | gradient cache. 158 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 159 | - cache: Moving average of second moments of gradients. 160 | ''' 161 | if config is None: 162 | config = {} 163 | config.setdefault('learning_rate', 1e-2) 164 | config.setdefault('decay_rate', 0.99) 165 | config.setdefault('epsilon', 1e-8) 166 | config.setdefault('cache', np.zeros_like(x)) 167 | 168 | next_x = None 169 | cache = config['cache'] 170 | decay_rate = config['decay_rate'] 171 | learning_rate = config['learning_rate'] 172 | cache = decay_rate * cache + (1 - decay_rate) * dx**2 173 | x += - learning_rate * dx / (np.sqrt(cache) + 1e-8) 174 | 175 | config['cache'] = cache 176 | next_x = x 177 | 178 | return next_x, config 179 | 180 | 181 | def adam(x, dx, config=None): 182 | ''' 183 | Uses the Adam update rule, which incorporates moving averages of both the 184 | gradient and its square and a bias correction term. 185 | 186 | config format: 187 | - learning_rate: Scalar learning rate. 188 | - beta1: Decay rate for moving average of first moment of gradient. 189 | - beta2: Decay rate for moving average of second moment of gradient. 190 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 191 | - m: Moving average of gradient. 192 | - v: Moving average of squared gradient. 193 | - t: Iteration number. 194 | ''' 195 | if config is None: 196 | config = {} 197 | config.setdefault('learning_rate', 1e-3) 198 | config.setdefault('beta1', 0.9) 199 | config.setdefault('beta2', 0.999) 200 | config.setdefault('epsilon', 1e-8) 201 | config.setdefault('m', np.zeros_like(x)) 202 | config.setdefault('v', np.zeros_like(x)) 203 | config.setdefault('t', 1) 204 | 205 | next_x = None 206 | m = config['m'] 207 | v = config['v'] 208 | t = config['t'] 209 | beta1 = config['beta1'] 210 | beta2 = config['beta2'] 211 | 212 | # update parameters 213 | learning_rate = config['learning_rate'] 214 | epsilon = config['epsilon'] 215 | m = beta1*m + (1-beta1)*dx 216 | v = beta2*v + (1-beta2)*dx**2 217 | t = t + 1 218 | next_x = x - learning_rate*m/(np.sqrt(v) + epsilon) 219 | 220 | # Writing back in config 221 | config['m'] = m 222 | config['v'] = v 223 | config['t'] = t 224 | 225 | return next_x, config 226 | -------------------------------------------------------------------------------- /pyfunt/padding.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Padding(Module): 6 | 7 | def __init__(self, dim, pad, n_input_dim, value=None, index=None): 8 | super(Padding, self).__init__() 9 | self.value = value or 0 10 | self.index = index or 1 11 | self.dim = [dim] if type(dim) == int else dim 12 | self.pad = pad if pad > 0 else -pad 13 | self.n_input_dim = n_input_dim 14 | 15 | def update_output(self, x): 16 | pads = [] 17 | for axis in range(x.ndim): 18 | if axis in self.dim: 19 | pads += [(self.pad, self.pad)] 20 | else: 21 | pads += [(0, 0)] 22 | pads = tuple(pads) 23 | self.output = np.pad(x, pads, mode='constant') 24 | return self.output 25 | 26 | def update_grad_input(self, x, grad_output): 27 | slc = [slice(None)] * x.ndim 28 | self.grad_input = grad_output 29 | for axis in range(x.ndim): 30 | if axis in self.dim: 31 | slc[axis] = slice(self.pad, -self.pad) 32 | self.grad_input = grad_output[slc] 33 | return self.grad_input 34 | 35 | def reset(self): 36 | pass 37 | -------------------------------------------------------------------------------- /pyfunt/parallel.py: -------------------------------------------------------------------------------- 1 | from container import Container 2 | 3 | 4 | class Parallel(Container): 5 | """docstring for Parallel""" 6 | def __init__(self): 7 | super(Parallel, self).__init__() 8 | 9 | def len(self): 10 | return len(self.modules) 11 | 12 | def add(self, module): 13 | pass 14 | 15 | def insert(self, modules, module): 16 | pass 17 | 18 | def remove(self, index): 19 | pass 20 | 21 | def update_output(self, x): 22 | pass 23 | 24 | def update_grad_input(self, grad_output): 25 | pass 26 | 27 | def acc_grad_parameters(self, grad_output, scale): 28 | pass 29 | 30 | def backward(self, grad_output, scale): 31 | pass 32 | 33 | def __str__(self): 34 | pass 35 | -------------------------------------------------------------------------------- /pyfunt/relu.py: -------------------------------------------------------------------------------- 1 | from threshold import Threshold 2 | 3 | 4 | class ReLU(Threshold): 5 | def __init__(self, ip=False): 6 | super(ReLU, self).__init__(0, 0, ip) 7 | -------------------------------------------------------------------------------- /pyfunt/reshape.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | 3 | 4 | class Reshape(Module): 5 | 6 | def __init__(self, shape): 7 | super(Reshape, self).__init__() 8 | if type(shape) is not tuple: 9 | shape = (shape,) 10 | self.shape = shape 11 | 12 | def update_output(self, x): 13 | self.output = x.reshape((x.shape[0],) + self.shape) 14 | return self.output 15 | 16 | def update_grad_input(self, x, grad_output): 17 | self.grad_input = grad_output.reshape(x.shape) 18 | return self.grad_input 19 | 20 | def reset(self): 21 | pass 22 | -------------------------------------------------------------------------------- /pyfunt/sequential.py: -------------------------------------------------------------------------------- 1 | from container import Container 2 | import numpy as np 3 | 4 | 5 | class Sequential(Container): 6 | 7 | """docstring for Sequential""" 8 | 9 | def __init__(self): 10 | super(Sequential, self).__init__() 11 | 12 | def len(self): 13 | return len(self.modules) 14 | 15 | def add(self, module): 16 | if len(self.modules) == 0: 17 | self.grad_input = module.grad_input 18 | self.modules.append(module) 19 | self.output = module.output 20 | return self 21 | 22 | def insert(self, module, index=None): 23 | index = index or len(self.modules) + 1 24 | if index > len(self.modules) + 1 or index < 1: 25 | raise Exception('index should be contiguous to existing modules') 26 | self.modules.insert(module, index) 27 | self.output = self.modules[len(self.modules)].output 28 | self.grad_input = self.modules[0].grad_input # 1?? 29 | 30 | def remove(self, index): 31 | if index > len(self.modules) or index < 1: 32 | raise Exception('index out of range') 33 | self.modules.remove(index) 34 | if len(self.modules) > 0: 35 | self.output = self.modules[-1].output 36 | self.grad_input = self.modules[0].grad_input 37 | else: 38 | self.output = np.ndarray() 39 | self.grad_input = np.ndarray() 40 | 41 | def update_output(self, x): 42 | current_output = x 43 | for i in xrange(len(self.modules)): 44 | current_output = self.rethrow_errors(self.modules[i], i, 'update_output', current_output) 45 | self.output = current_output 46 | return self.output 47 | 48 | def update_grad_input(self, x, grad_output): 49 | current_grad_output = grad_output 50 | current_module = self.modules[-1] 51 | for i in range(len(self.modules)-2, -1, -1): 52 | previous_module = self.modules[i] 53 | current_grad_output = self.rethrow_errors(current_module, i, 'update_grad_input', previous_module.output, current_grad_output) 54 | current_module = previous_module 55 | current_grad_output = self.rethrow_errors(current_module, 0, 'update_grad_input', x, current_grad_output) 56 | self.grad_input = current_grad_output 57 | return current_grad_output 58 | 59 | def acc_grad_parameters(self, x, grad_output, scale=1): 60 | current_grad_output = grad_output 61 | current_module = self.modules[-1] 62 | for i in range(len(self.modules)-2, -1, -1): 63 | previous_module = self.modules[i] 64 | self.rethrow_errors(current_module, i, 'acc_grad_parameters', previous_module.output, current_grad_output, scale) 65 | current_grad_output = current_module.grad_input 66 | current_module = previous_module 67 | self.rethrow_errors(current_module, 0, 'acc_grad_parameters', x, current_grad_output, scale) 68 | 69 | def backward(self, x, grad_output, scale=1): 70 | current_grad_output = grad_output 71 | current_module = self.modules[-1] 72 | for i in range(len(self.modules)-2, -1, -1): 73 | previous_module = self.modules[i] 74 | current_grad_output = self.rethrow_errors(current_module, i, 'backward', previous_module.output, current_grad_output, scale) 75 | current_module.grad_input[:] = current_grad_output[:] 76 | current_module = previous_module 77 | 78 | current_grad_output = self.rethrow_errors(current_module, 0, 'backward', x, current_grad_output, scale) 79 | self.grad_input = current_grad_output 80 | return current_grad_output 81 | 82 | def acc_update_grad_parameters(self, x, grad_output, lr): 83 | current_grad_output = grad_output 84 | current_module = self.modules[-1] 85 | for i in range(len(self.modules)-2, -1, -1): 86 | previous_module = self.modules[i] 87 | self.rethrow_errors(current_module, i, 'acc_update_grad_parameters', previous_module.output, current_grad_output, lr) 88 | current_grad_output = current_module.grad_input 89 | current_module = previous_module 90 | self.rethrow_errors(current_module, 1, 'acc_update_grad_parameters', x, current_grad_output, lr) 91 | 92 | def __str__(self): 93 | return 'temporary string for Sequential class' 94 | -------------------------------------------------------------------------------- /pyfunt/setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | from distutils.core import setup 3 | 4 | 5 | def configuration(parent_package='', top_path=None): 6 | from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs 7 | 8 | config = Configuration('pyfunt', parent_package, top_path) 9 | config.add_subpackage('examples') 10 | config.add_extension('im2col_cyt', 11 | sources=[('im2col_cyt.c')], 12 | include_dirs=[get_numpy_include_dirs()]) 13 | 14 | return config 15 | 16 | if __name__ == '__main__': 17 | setup(**configuration(top_path='').todict()) 18 | -------------------------------------------------------------------------------- /pyfunt/sigmoid.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Sigmoid(Module): 6 | 7 | def __init__(self): 8 | super(Sigmoid, self).__init__() 9 | 10 | def update_output(self, x): 11 | self.output = 1 / (1 + np.exp(-x)) 12 | return self.output 13 | 14 | def update_grad_input(self, x, grad_output): 15 | self.grad_input = grad_output * (1.0 - grad_output) 16 | return self.grad_input 17 | -------------------------------------------------------------------------------- /pyfunt/soft_max.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class SoftMax(Module): 6 | """docstring for LogSoftMax""" 7 | def __init__(self): 8 | super(SoftMax, self).__init__() 9 | 10 | def update_output(self, x): 11 | max_input = x.max(1, keepdims=True) 12 | z = np.exp(x - max_input) 13 | log_sum = np.sum(z, axis=1, keepdims=True) 14 | # log_sum = max_input + np.log(log_sum) 15 | self.output = z * 1/log_sum 16 | return self.output 17 | 18 | def update_grad_input(self, x, grad_output): 19 | _sum = np.sum(grad_output*self.ouput, axis=1, keepdims=True) 20 | self.grad_input = self.output * (self.grad_output - _sum) 21 | 22 | # max_input = x.max(1, keepdims=True) 23 | # log_sum = np.sum(np.exp(x - max_input), axis=1, keepdims=True) 24 | # log_sum = max_input + np.log(log_sum) 25 | # self.output = x - log_sum 26 | 27 | # self.grad_input = grad_output - np.exp(self.output)*_sum 28 | return self.grad_input 29 | 30 | def reset(self): 31 | pass 32 | -------------------------------------------------------------------------------- /pyfunt/solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from datetime import datetime 4 | import optim 5 | import os 6 | import multiprocessing as mp 7 | import signal 8 | from copy_reg import pickle 9 | from types import MethodType 10 | import sys 11 | from tqdm import tqdm 12 | 13 | def rel_error(x, y): 14 | """ returns relative error """ 15 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 16 | 17 | 18 | def _pickle_method(method): 19 | ''' 20 | Helper for multiprocessing ops, for more infos, check answer and comments 21 | here: 22 | http://stackoverflow.com/a/1816969/1142814 23 | ''' 24 | func_name = method.im_func.__name__ 25 | obj = method.im_self 26 | cls = method.im_class 27 | return _unpickle_method, (func_name, obj, cls) 28 | 29 | 30 | def _unpickle_method(func_name, obj, cls): 31 | ''' 32 | Helper for multiprocessing ops, for more infos, check answer and comments 33 | here: 34 | http://stackoverflow.com/a/1816969/1142814 35 | ''' 36 | for cls in cls.mro(): 37 | try: 38 | func = cls.__dict__[func_name] 39 | except KeyError: 40 | pass 41 | else: 42 | break 43 | return func.__get__(obj, cls) 44 | 45 | 46 | def init_worker(): 47 | ''' 48 | Permit to interrupt all processes trough ^C. 49 | ''' 50 | signal.signal(signal.SIGINT, signal.SIG_IGN) 51 | 52 | 53 | def loss_helper(args): 54 | model, criterion, x, y = args 55 | preds = model.forward(x) 56 | loss = criterion.forward(preds, y) 57 | dout = criterion.backward(preds, y) 58 | _ = model.backward(x, dout) 59 | _, grads = model.get_parameters() 60 | return loss, grads 61 | 62 | 63 | class Solver(object): 64 | 65 | ''' 66 | A Solver encapsulates all the logic necessary for training classification 67 | models. The Solver performs stochastic gradient descent using different 68 | update rules defined in optim.py. 69 | 70 | The solver accepts both training and validataion data and labels so it can 71 | periodically check classification accuracy on both training and validation 72 | data to watch out for overfitting. 73 | 74 | To train a model, you will first construct a Solver instance, passing the 75 | model, dataset, and various optoins (learning rate, batch size, etc) to the 76 | constructor. You will then call the train() method to run the optimization 77 | procedure and train the model. 78 | 79 | After the train() method returns, model.params will contain the parameters 80 | that performed best on the validation set over the course of training. 81 | In addition, the instance variable solver.loss_history will contain a list 82 | of all losses encountered during training and the instance variables 83 | solver.train_acc_history and solver.val_acc_history will be lists containing 84 | the accuracies of the model on the training and validation set at each epoch. 85 | 86 | Example usage might look something like this: 87 | 88 | data = { 89 | 'X_train': # training data 90 | 'y_train': # training labels 91 | 'X_val': # validation data 92 | 'X_train': # validation labels 93 | } 94 | model = MyAwesomeModel(hidden_size=100, reg=10) 95 | solver = Solver(model, data, 96 | update_rule='sgd', 97 | optim_config={ 98 | 'learning_rate': 1e-3, 99 | }, 100 | lr_decay=0.95, 101 | num_epochs=10, batch_size=100, 102 | print_every=100) 103 | solver.train() 104 | 105 | 106 | A Solver works on a model object that must conform to the following API: 107 | 108 | - model.params must be a dictionary mapping string parameter names to numpy 109 | arrays containing parameter values. 110 | 111 | - model.loss(X, y) must be a function that computes training-time loss and 112 | gradients, and test-time classification scores, with the following inputs 113 | and outputs: 114 | 115 | Inputs: 116 | - X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k) 117 | - y: Array of labels, of shape (N,) giving labels for X where y[i] is the 118 | label for X[i]. 119 | 120 | Returns: 121 | If y is None, run a test-time forward pass and return: 122 | - scores: Array of shape (N, C) giving classification scores for X where 123 | scores[i, c] gives the score of class c for X[i]. 124 | 125 | If y is not None, run a training time forward and backward pass and return 126 | a tuple of: 127 | - loss: Scalar giving the loss 128 | - grads: Dictionary with the same keys as self.params mapping parameter 129 | names to gradients of the loss with respect to those parameters. 130 | ''' 131 | 132 | def __init__(self, model, data=None, load_dir=None, **kwargs): 133 | ''' 134 | Construct a new Solver instance. 135 | 136 | Required arguments: 137 | - model: A model object conforming to the API described above 138 | - data: A dictionary of training and validation data with the following: 139 | 'X_train': Array of shape (N_train, d_1, ..., d_k) giving training images 140 | 'X_val': Array of shape (N_val, d_1, ..., d_k) giving validation images 141 | 'y_train': Array of shape (N_train,) giving labels for training images 142 | 'y_val': Array of shape (N_val,) giving labels for validation images 143 | 144 | Optional arguments: Arguments you also find in the Stanford's 145 | cs231n assignments' Solver 146 | - update_rule: A string giving the name of an update rule in optim.py. 147 | Default is 'sgd_th'. 148 | - optim_config: A dictionary containing hyperparameters that will be 149 | passed to the chosen update rule. Each update rule requires different 150 | hyperparameters (see optim.py) but all update rules require a 151 | 'learning_rate' parameter so that should always be present. 152 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 153 | rate is multiplied by this value. 154 | - batch_size: Size of minibatches used to compute loss and gradient during 155 | training. 156 | - num_epochs: The number of epochs to run for during training. 157 | Custom arguments: 158 | - load_dir: root directory for the checkpoints folder, if is not False, 159 | the instance tries to load the most recent checkpoint found in load_dir. 160 | - path_checkpoints: root directory where the checkpoints folder resides. 161 | - check_point_every: save a checkpoint every check_point_every epochs. 162 | - custom_update_ld: optional function to update the learning rate decay 163 | parameter, if not False the instruction 164 | self.lr_decay = custom_update_ld(self.epoch) is executed at the and 165 | of each epoch. 166 | - acc_check_train_pre_process: optional function to pre-process the 167 | training subset for checking accuracy on training data. 168 | If not False acc_check_train_pre_process is called before each 169 | accuracy check. 170 | - acc_check_val_pre_process: optional function to pre-process the 171 | validation data. 172 | If not False acc_check_val_pre_process is called on the validation 173 | before each accuracy check. 174 | - batch_augment_func: optional function to augment the batch data. 175 | If not False X_batch = batch_augment_func(X_batch) is called before 176 | each training step. 177 | - num_processes: optional number of parallel processes for each 178 | training step. If not 1, at each training/accuracy_check step, each 179 | batch is divided by the number of processes and losses (and grads) 180 | are computed in parallel when all processes finish we compute the 181 | mean for the loss (and grads) and continue as usual. 182 | ''' 183 | self.model = model 184 | if data: 185 | self.X_train = data['X_train'] 186 | self.y_train = data['y_train'] 187 | self.X_val = data['X_val'] 188 | self.y_val = data['y_val'] 189 | 190 | # Unpack keyword arguments 191 | self.criterion = kwargs.pop('criterion', None) 192 | if self.criterion is None: 193 | raise(Exception('Criterion cannot be None')) 194 | 195 | self.update_rule = kwargs.pop('update_rule', 'sgd') 196 | self.optim_config = kwargs.pop('optim_config', {}) 197 | self.learning_rate = self.optim_config['learning_rate'] 198 | self.lr_decay = kwargs.pop('lr_decay', 1.0) 199 | self.batch_size = kwargs.pop('batch_size', 100) 200 | self.num_epochs = kwargs.pop('num_epochs', 10) 201 | 202 | # Personal Edits 203 | self.path_checkpoints = kwargs.pop('path_checkpoints', 'checkpoints') 204 | self.checkpoint_every = kwargs.pop('checkpoint_every', 0) 205 | self.check_and_swap_every = kwargs.pop('check_and_swap_every', 0) 206 | self.silent_train = kwargs.pop('silent_train', False) 207 | self.custom_update_ld = kwargs.pop('custom_update_ld', False) 208 | self.acc_check_train_pre_process = kwargs.pop( 209 | 'acc_check_train_pre_process', False) 210 | self.acc_check_val_pre_process = kwargs.pop( 211 | 'acc_check_val_pre_process', False) 212 | self.batch_augment_func = kwargs.pop('batch_augment_func', False) 213 | self.num_processes = kwargs.pop('num_processes', 1) 214 | 215 | # Throw an error if there are extra keyword arguments 216 | if len(kwargs) > 0: 217 | extra = ', '.join('"%s"' % k for k in kwargs.keys()) 218 | raise ValueError('Unrecognized arguments %s' % extra) 219 | 220 | # Make sure the update rule exists, then replace the string 221 | # name with the actual function 222 | if not hasattr(optim, self.update_rule): 223 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 224 | self.update_rule = getattr(optim, self.update_rule) 225 | self._reset() 226 | if load_dir: 227 | self.load_dir = load_dir 228 | self.load_current_checkpoint() 229 | 230 | def __str__(self): 231 | return """ 232 | Number of processes: %d; 233 | Update Rule: %s; 234 | Optim Config: %s; 235 | Learning Rate Decay: %d; 236 | Batch Size: %d; 237 | Number of Epochs: %d; 238 | """ % ( 239 | self.num_processes, 240 | self.update_rule.__name__, 241 | str(self.optim_config), 242 | self.lr_decay, 243 | self.batch_size, 244 | self.num_epochs 245 | ) 246 | 247 | def _reset(self): 248 | ''' 249 | Set up some book-keeping variables for optimization. Don't call this 250 | manually. 251 | ''' 252 | # Set up some variables for book-keeping 253 | self.epoch = 0 254 | self.best_val_acc = 0 255 | self.best_params = {} 256 | self.loss_history = [] 257 | self.val_acc_history = [] 258 | self.train_acc_history = [] 259 | self.pbar = None 260 | 261 | # Make a deep copy of the optim_config for each parameter 262 | self.optim_configs = {} 263 | self.params, self.grad_params = self.model.get_parameters() 264 | # self.weights, _ = self.model.get_parameters() 265 | for p in range(len(self.params)): 266 | d = {k: v for k, v in self.optim_config.iteritems()} 267 | self.optim_configs[p] = d 268 | 269 | self.multiprocessing = bool(self.num_processes-1) 270 | if self.multiprocessing: 271 | self.pool = mp.Pool(self.num_processes, init_worker) 272 | 273 | def load_current_checkpoint(self): 274 | ''' 275 | Return the current checkpoint 276 | ''' 277 | checkpoints = [f for f in os.listdir( 278 | self.load_dir) if not f.startswith('.')] 279 | 280 | try: 281 | num = max([int(f.split('_')[1]) for f in checkpoints]) 282 | name = 'check_' + str(num) 283 | try: 284 | cp = np.load( 285 | os.path.join(self.path_checkpoints, name, name + '.pkl')) 286 | except: 287 | print('sorry, I haven\'t fixed this line, but it should be easy to fix, if you want you can try now and make a pull request') 288 | raise() 289 | # Set up some variables for book-keeping 290 | 291 | self.epoch = cp['epoch'] 292 | self.best_val_acc = cp['best_val_acc'] 293 | self.best_params = cp['best_params'] 294 | self.loss_history = cp['loss_history'] 295 | self.val_acc_history = cp['val_acc_history'] 296 | self.train_acc_history = cp['train_acc_history'] 297 | self.model = cp['model'] 298 | 299 | except Exception, e: 300 | raise e 301 | 302 | def make_check_point(self): 303 | ''' 304 | Save the solver's current status 305 | ''' 306 | checkpoints = { 307 | 'model': self.model, 308 | 'epoch': self.epoch, 309 | 'best_params': self.best_params, 310 | 'best_val_acc': self.best_val_acc, 311 | 'loss_history': self.loss_history, 312 | 'val_acc_history': self.val_acc_history, 313 | 'train_acc_history': self.train_acc_history} 314 | 315 | name = 'check_' + str(self.epoch) 316 | directory = os.path.join(self.path_checkpoints, name) 317 | if not os.path.exists(directory): 318 | os.makedirs(directory) 319 | try: 320 | np.save(checkpoints, os.path.join( 321 | directory, name + '.pkl')) 322 | except: 323 | print('sorry, I haven\'t fixed this line, but it should be easy to fix, if you want you can try now and make a pull request') 324 | raise() 325 | 326 | def export_model(self, path): 327 | if not os.path.exists(path): 328 | os.makedirs(path) 329 | np.save('%smodel' % path, self.best_params) 330 | 331 | def export_histories(self, path): 332 | if not os.path.exists(path): 333 | os.makedirs(path) 334 | i = np.arange(len(self.loss_history)) + 1 335 | z = np.array(zip(i, i*self.batch_size, self.loss_history)) 336 | np.savetxt(path + 'loss_history.csv', z, delimiter=',', fmt=[ 337 | '%d', '%d', '%f'], header='iteration, n_images, loss') 338 | 339 | i = np.arange(len(self.train_acc_history), dtype=np.int) 340 | 341 | z = np.array(zip(i, self.train_acc_history)) 342 | np.savetxt(path + 'train_acc_history.csv', z, delimiter=',', fmt=[ 343 | '%d', '%f'], header='epoch, train_acc') 344 | 345 | z = np.array(zip(i, self.val_acc_history)) 346 | np.savetxt(path + 'val_acc_history.csv', z, delimiter=',', fmt=[ 347 | '%d', '%f'], header='epoch, val_acc') 348 | np.save(path + 'loss', self.loss_history) 349 | np.save(path + 'train_acc_history', self.train_acc_history) 350 | np.save(path + 'val_acc_history', self.val_acc_history) 351 | 352 | def _loss_helper(self, args): 353 | x, y = args 354 | preds = self.model.forward(x) 355 | loss = self.criterion.forward(preds, y) 356 | dout = self.criterion.backward(preds, y) 357 | self.model.backward(x, dout) 358 | return loss, self.grad_params 359 | 360 | def _step(self): 361 | ''' 362 | Make a single gradient update. This is called by train() and should not 363 | be called manually. 364 | ''' 365 | # Make a minibatch of training data 366 | num_train = self.X_train.shape[0] 367 | batch_mask = np.random.choice(num_train, self.batch_size) 368 | X_batch = self.X_train[batch_mask] 369 | y_batch = self.y_train[batch_mask] 370 | 371 | if not self.multiprocessing: 372 | # pred = model.forward(X_batch) 373 | # loss = self.criterion.forward(pred, y_batch) 374 | loss, grads = self._loss_helper((X_batch, y_batch)) 375 | else: 376 | n = self.num_processes 377 | pool = self.pool 378 | 379 | X_batches = np.split(X_batch, n) 380 | # sub_weights = np.array([len(x) 381 | # for x in X_batches], dtype=np.float32) 382 | # sub_weights /= sub_weights.sum() 383 | 384 | y_batches = np.split(y_batch, n) 385 | try: 386 | job_args = [(self.model, self.criterion, X_batches[i], y_batches[i]) for i in range(n)] 387 | results = pool.map_async(loss_helper, job_args).get() 388 | losses = np.zeros(len(results)) 389 | gradses = [] 390 | i = 0 391 | for i, r in enumerate(results): 392 | l, g = r 393 | losses[i] = l 394 | gradses.append(g) 395 | i += 1 396 | except Exception, e: 397 | self.pool.terminate() 398 | self.pool.join() 399 | raise e 400 | loss = np.mean(losses) 401 | grads = [] 402 | for p, w in enumerate(gradses[0]): 403 | grad = np.mean([grad[p] for grad in gradses], axis=0) 404 | grads.append(grad) 405 | self.grad_params[p][:] = grad 406 | 407 | self.loss_history.append(loss) 408 | return loss, grads 409 | 410 | def eval_model(self, X, y, num_samples=None, batch_size=100, return_preds=False): 411 | ''' 412 | Check accuracy of the model on the provided data. 413 | 414 | Inputs: 415 | - X: Array of data, of shape (N, d_1, ..., d_k) 416 | - y: Array of labels, of shape (N,) 417 | - num_samples: If not None, subsample the data and only test the model 418 | on num_samples datapoints. TODO 419 | - batch_size: Split X and y into batches of this size to avoid using too 420 | much memory. TODO 421 | - return_preds: if True returns predictions probabilities 422 | 423 | Returns: 424 | - acc: Scalar giving the fraction of instances that were correctly 425 | classified by the model. 426 | ''' 427 | N = X.shape[0] 428 | batch_size = self.batch_size 429 | num_batches = N / batch_size 430 | if N % batch_size != 0: 431 | num_batches += 1 432 | y_pred1 = [] 433 | y_pred5 = [] 434 | self.pbar = tqdm(total=N, desc='Accuracy Check', unit='im') 435 | for i in xrange(num_batches): 436 | start = i * batch_size 437 | end = (i + 1) * batch_size 438 | 439 | if not self.multiprocessing: 440 | scores = self.model.forward(X[start:end]) 441 | y_pred1.append(np.argmax(scores, axis=1)) 442 | y_pred5.append(scores.argsort()[-5:][::-1]) 443 | else: 444 | n = self.num_processes 445 | pool = self.pool 446 | X_batches = np.split(X[start:end], n) 447 | try: 448 | results = pool.map_async(self.model.forward, X_batches).get() 449 | scores = np.vstack(results) 450 | y_pred1.append(np.argmax(scores, axis=1)) 451 | y_pred5.append(scores.argsort()[-5:][::-1]) 452 | 453 | except Exception, e: 454 | self.pool.terminate() 455 | self.pool.join() 456 | raise e 457 | 458 | self.pbar.update(end - start) 459 | print() 460 | y_pred1 = np.hstack(y_pred1) 461 | if return_preds: 462 | return y_pred1 463 | acc1 = np.mean(y_pred1 == y) 464 | acc5 = np.mean(np.any(y_pred5 == y)) 465 | return acc1, acc5 466 | 467 | def _check_and_swap(self, it=0): 468 | ''' 469 | Check accuracy for both X_train[:1000] and X_val. 470 | ''' 471 | if self.acc_check_train_pre_process: 472 | X_tr_check = self.acc_check_train_pre_process(self.X_train[:1000]) 473 | else: 474 | X_tr_check = self.X_train[:1000] 475 | if self.acc_check_val_pre_process: 476 | X_val_check = self.acc_check_val_pre_process(self.X_val) 477 | else: 478 | X_val_check = self.X_val 479 | 480 | train_acc, val_acc = 0, 0 481 | 482 | train_acc, _ = self.eval_model( 483 | X_tr_check, self.y_train[:1000]) 484 | val_acc, _ = self.eval_model(X_val_check, self.y_val) 485 | 486 | self.train_acc_history.append(train_acc) 487 | self.val_acc_history.append(val_acc) 488 | 489 | self.emit_sound() 490 | # Keep track of the best model 491 | if val_acc > self.best_val_acc: 492 | self.best_val_acc = val_acc 493 | # self.best_params = {} 494 | for p, w in enumerate(self.params): 495 | self.best_params[p] = w.copy() 496 | # for k, v in self.model.params.iteritems(): 497 | # self.best_params[k] = v.copy() 498 | 499 | loss = '%.4f' % self.loss_history[it-1] if it > 0 else '-' 500 | print('%s - iteration %d: loss:%s, train_acc:%.4f, val_acc: %.4f, best_val_acc: %.4f;\n' % ( 501 | # print('%s - iteration %d: loss:%s, train_acc: %.4f, val_acc: %.4f, best_val_acc: %.4f;\n' % () 502 | # str(datetime.now()), it, loss, val_acc, self.best_val_acc) 503 | str(datetime.now()), it, loss, train_acc, val_acc, self.best_val_acc)) 504 | 505 | def _new_training_bar(self, total): 506 | ''' 507 | Create a new loading bar. 508 | ''' 509 | if not self.silent_train: 510 | d = 'Epoch %d / %d' % ( 511 | self.epoch + 1, self.num_epochs) 512 | self.pbar = tqdm(total=total, desc=d, unit='s.') 513 | 514 | def _update_bar(self, amount): 515 | if not self.silent_train: 516 | self.pbar.update(amount) 517 | 518 | def train(self): 519 | ''' 520 | Run optimization to train the model. 521 | ''' 522 | num_train = self.X_train.shape[0] 523 | iterations_per_epoch = int(np.ceil(num_train / float(self.batch_size))) 524 | images_per_epochs = iterations_per_epoch * self.batch_size 525 | num_iterations = self.num_epochs * iterations_per_epoch 526 | 527 | print('Training for %d epochs (%d iterations).\n' % 528 | (self.num_epochs, num_iterations)) 529 | epoch_end = True 530 | lr_decay_updated = False 531 | self._check_and_swap() 532 | self._new_training_bar(images_per_epochs) 533 | # self.params, self.grad_params = self.model.get_parameters() 534 | self.best_params = np.copy(self.params) 535 | for it in xrange(num_iterations): 536 | 537 | loss, _ = self._step() 538 | 539 | # self.loss_history.append(loss) 540 | 541 | # Perform a parameter update 542 | # self.model.params.iteritems(): 543 | for p, w in enumerate(self.params): 544 | dw = self.grad_params[p] 545 | config = self.optim_configs[p] 546 | next_w, next_config = self.update_rule(w, dw, config) 547 | self.params[p][:] = next_w[:] 548 | self.optim_configs[p] = next_config 549 | 550 | self.pbar.update(self.batch_size) 551 | 552 | epoch_end = (it + 1) % iterations_per_epoch == 0 553 | 554 | if epoch_end: 555 | print() 556 | self.epoch += 1 557 | 558 | if self.custom_update_ld: 559 | self.lr_decay = self.custom_update_ld(self.epoch) 560 | lr_decay_updated = self.lr_decay != 1 561 | 562 | for k in self.optim_configs: 563 | self.optim_configs[k]['learning_rate'] *= self.lr_decay 564 | 565 | if self.checkpoint_every and (self.epoch % self.checkpoint_every == 0): 566 | self.make_check_point() 567 | 568 | if not self.check_and_swap_every or (self.epoch % self.check_and_swap_every == 0): 569 | self._check_and_swap(it) 570 | 571 | finish = it == num_iterations - 1 572 | if not finish: 573 | if lr_decay_updated: 574 | print('learning_rate updated: ', next( 575 | self.optim_configs.itervalues())['learning_rate']) 576 | lr_decay_updated = False 577 | print() 578 | self._new_training_bar(images_per_epochs) 579 | 580 | # At the end of training swap the best params into the model 581 | self.params[:] = self.best_params[:] 582 | if self.multiprocessing: 583 | try: 584 | self.pool.terminate() 585 | self.pool.join() 586 | except: 587 | pass 588 | 589 | def emit_sound(self): 590 | ''' 591 | Emit sound when epoch end. 592 | ''' 593 | sys.stdout.write('\a') 594 | 595 | 596 | # again, check http://stackoverflow.com/a/1816969/1142814 and comments 597 | pickle(MethodType, _pickle_method, _unpickle_method) 598 | -------------------------------------------------------------------------------- /pyfunt/spatial_average_pooling.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | try: 5 | from im2col_cyt import im2col_cython, col2im_cython 6 | except ImportError: 7 | print('Installation broken, please reinstall PyFunt') 8 | 9 | 10 | class SpatialAveragePooling(Module): 11 | 12 | """docstring for SpatialAveragePooling""" 13 | 14 | def __init__(self, kW, kH, dW=1, dH=1, padW=0, padH=0): 15 | super(SpatialAveragePooling, self).__init__() 16 | self.kW = kW 17 | self.kH = kH 18 | self.dW = dW 19 | self.dH = dH 20 | self.padW = padW 21 | self.padH = padH 22 | self.ceil_mode = False 23 | self.count_include_pad = True 24 | self.divide = True 25 | 26 | def reset(self): 27 | #TODO 28 | pass 29 | 30 | def ceil(self): 31 | self.ceil_mode = True 32 | 33 | def floor(self): 34 | self.ceil_mode = False 35 | 36 | def set_count_include_pad(self): 37 | self.count_include_pad = True 38 | 39 | def set_count_exclude_pad(self): 40 | self.count_include_pad = False 41 | 42 | def update_output(self, x): 43 | N, C, H, W = x.shape 44 | pool_height, pool_width = self.kW, self.kH 45 | stride = self.dW 46 | 47 | assert ( 48 | H - pool_height) % stride == 0 or H == pool_height, 'Invalid height' 49 | assert ( 50 | W - pool_width) % stride == 0 or W == pool_width, 'Invalid width' 51 | 52 | out_height = int(np.floor((H - pool_height) / stride + 1)) 53 | out_width = int(np.floor((W - pool_width) / stride + 1)) 54 | 55 | x_split = x.reshape(N * C, 1, H, W) 56 | x_cols = im2col_cython( 57 | x_split, pool_height, pool_width, padding=0, stride=stride) 58 | x_cols_avg = np.mean(x_cols, axis=0) 59 | out = x_cols_avg.reshape( 60 | out_height, out_width, N, C).transpose(2, 3, 0, 1) 61 | 62 | self.x_shape = x.shape 63 | self.x_cols = x_cols 64 | self.output = out 65 | return self.output 66 | 67 | def update_grad_input(self, x, grad_output, scale=1): 68 | x_cols = self.x_cols 69 | dout = grad_output 70 | N, C, H, W = self.x_shape 71 | pool_height, pool_width = self.kW, self.kH 72 | stride = self.dW 73 | pool_dim = pool_height * pool_width 74 | 75 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 76 | dx_cols = np.zeros_like(x_cols) 77 | dx_cols[:, np.arange(dx_cols.shape[1])] = 1. / pool_dim * dout_reshaped 78 | dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width, 79 | padding=0, stride=stride) 80 | 81 | self.grad_input = dx.reshape(self.x_shape) 82 | 83 | return self.grad_input 84 | 85 | def __str__(self): 86 | pass 87 | -------------------------------------------------------------------------------- /pyfunt/spatial_batch_normalitazion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import numpy as np 4 | from batch_normalization import BatchNormalization 5 | 6 | 7 | class SpatialBatchNormalization(BatchNormalization): 8 | n_dim = 4 9 | 10 | def __init__(self, *args): 11 | super(SpatialBatchNormalization, self).__init__(*args) 12 | 13 | def update_output(self, x): 14 | N, C, H, W = x.shape 15 | x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C) 16 | x_flat = np.ascontiguousarray(x_flat, dtype=x.dtype) 17 | super(SpatialBatchNormalization, self).update_output(x_flat) 18 | self.output = self.output.reshape(N, H, W, C).transpose(0, 3, 1, 2) 19 | return self.output 20 | 21 | def update_grad_input(self, x, grad_output, scale=1): 22 | N, C, H, W = grad_output.shape 23 | dout_flat = grad_output.transpose(0, 2, 3, 1).reshape(-1, C) 24 | dout_flat = np.ascontiguousarray(dout_flat, dtype=dout_flat.dtype) 25 | super(SpatialBatchNormalization, self).update_grad_input(x, dout_flat, scale) 26 | self.grad_input = self.grad_input.reshape(N, H, W, C).transpose(0, 3, 1, 2) 27 | return self.grad_input 28 | 29 | def backward(self, x, grad_output, scale=1): 30 | return self.update_grad_input(x, grad_output, scale) 31 | -------------------------------------------------------------------------------- /pyfunt/spatial_convolution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from module import Module 5 | import numpy as np 6 | try: 7 | from im2col_cyt import col2im_cython 8 | except ImportError: 9 | print('Installation broken, please reinstall PyFunt') 10 | 11 | 12 | class SpatialConvolution(Module): 13 | 14 | n_dim = 2 15 | 16 | def __init__(self, n_input_plane, n_output_plane, kW, kH, dW=1, dH=1, padW=0, padH=0): 17 | super(SpatialConvolution, self).__init__() 18 | 19 | self.n_input_plane = n_input_plane 20 | self.n_output_plane = n_output_plane 21 | self.kW = kW 22 | self.kH = kH 23 | 24 | self.dW = dW 25 | self.dH = dH 26 | self.padW = padW 27 | self.padH = padH or self.padW 28 | 29 | self.weight = np.ndarray((n_output_plane, n_input_plane, kH, kW)) 30 | self.bias = np.ndarray(n_output_plane) 31 | self.grad_weight = np.ndarray((n_output_plane, n_input_plane, kH, kW)) 32 | self.grad_bias = np.ndarray(n_output_plane) 33 | 34 | self.reset() 35 | 36 | def no_bias(self): 37 | self.bias = None 38 | self.grad_bias = None 39 | 40 | def reset(self, stdv=None): 41 | if not stdv: 42 | stdv = 1/np.sqrt(self.kW*self.kH*self.n_input_plane) 43 | self.weight = np.random.normal( 44 | 0, stdv, (self.n_output_plane, self.n_input_plane, self.kH, self.kW)) 45 | self.bias = np.zeros(self.n_output_plane) 46 | 47 | def check_input_dim(self, x): 48 | pass 49 | 50 | def make_contigous(self, input, grad_output): 51 | pass 52 | 53 | def update_output(self, x): 54 | w, b = self.weight, self.bias 55 | # input = make_contigous (input)N, C, H, W = x.shape 56 | self.x_shape = N, C, H, W = x.shape 57 | 58 | F, _, HH, WW = w.shape 59 | stride, pad = self.dW, self.padW 60 | #assert (W + 2 * pad - WW) % stride == 0, 'width does not work' 61 | #assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 62 | 63 | p = pad 64 | x_padded = np.pad( 65 | x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 66 | 67 | self.tiles_w = (W + (2 * pad) - WW) % stride 68 | self.tiles_h = (H + (2 * pad) - HH) % stride 69 | if not self.tiles_w == 0: 70 | x_padded = x_padded[:, :, :, :-self.tiles_w] 71 | if not self.tiles_h == 0: 72 | x_padded = x_padded[:, :, :-self.tiles_h, :] 73 | 74 | N, C, H, W = x_padded.shape 75 | if (W + (2 * pad) - WW) % stride != 0: 76 | raise Exception('width does not work') 77 | 78 | # H += 2 * pad 79 | # W += 2 * pad 80 | out_h = (H - HH) / stride + 1 81 | out_w = (W - WW) / stride + 1 82 | 83 | # Perform an im2col operation by picking clever strides 84 | shape = (C, HH, WW, N, out_h, out_w) 85 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 86 | strides = x.itemsize * np.array(strides) 87 | x_stride = np.lib.stride_tricks.as_strided(x_padded, 88 | shape=shape, strides=strides) 89 | x_cols = np.ascontiguousarray(x_stride) 90 | x_cols.shape = (C * HH * WW, N * out_h * out_w) 91 | 92 | # Now all our convolutions are a big matrix multiply 93 | res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1) 94 | 95 | res.shape = (F, N, out_h, out_w) 96 | out = res.transpose(1, 0, 2, 3) 97 | 98 | self.output = np.ascontiguousarray(out) 99 | 100 | self.x_cols = x_cols 101 | return self.output 102 | 103 | def update_grad_input(self, input, grad_output, scale=1): 104 | x_shape, x_cols = self.x_shape, self.x_cols 105 | w = self.weight 106 | 107 | stride, pad = self.dW, self.padW 108 | 109 | N, C, H, W = x_shape 110 | F, _, HH, WW = w.shape 111 | _, _, out_h, out_w = grad_output.shape 112 | 113 | self.grad_bias[:] = np.sum(grad_output, axis=(0, 2, 3))[:] 114 | 115 | dout_reshaped = grad_output.transpose(1, 0, 2, 3).reshape(F, -1) 116 | self.grad_weight[:] = dout_reshaped.dot(x_cols.T).reshape(w.shape)[:] 117 | 118 | dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) 119 | #dx_cols.shape = (C, HH, WW, N, out_h, out_w) 120 | # dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 121 | dx = col2im_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 122 | self.grad_input = dx 123 | return dx 124 | 125 | def type(self, type, cache): 126 | pass 127 | 128 | def __str__(self): 129 | pass 130 | 131 | def clear_state(self): 132 | pass 133 | -------------------------------------------------------------------------------- /pyfunt/spatial_full_convolution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | from module import Module 5 | import numpy as np 6 | try: 7 | from im2col_cyt import col2im_cython 8 | except ImportError: 9 | print('Installation broken, please reinstall PyFunt') 10 | 11 | 12 | class SpatialFullConvolution(Module): 13 | 14 | '''implementation of layer described in https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf"''' 15 | n_dim = 2 16 | 17 | def __init__(self, n_input_plane, n_output_plane, kW, kH, dW=1, dH=1, padW=0, padH=0, adjW=0, adjH=0): 18 | super(SpatialFullConvolution, self).__init__() 19 | 20 | self.n_input_plane = n_input_plane 21 | self.n_output_plane = n_output_plane 22 | self.kW = kW 23 | self.kH = kH 24 | 25 | self.dW = dW 26 | self.dH = dH 27 | if padH != padW or dH != dW: 28 | raise Exception('padH != padW or dH != dW, behaviout not implemented ') 29 | self.padW = padW 30 | self.padH = padH or self.padW 31 | self.adjW = adjW 32 | self.adjH = adjH 33 | 34 | if self.adjW > self.dW - 1 or self.adjH > self.dH - 1: 35 | raise Exception( 36 | 'adjW and adjH must be smaller than self.dW - 1 and self.dH - 1 respectively') 37 | 38 | self.weight = np.ndarray((n_input_plane, n_output_plane, kH, kW)) 39 | self.bias = np.ndarray(n_output_plane) 40 | self.grad_weight = np.ndarray((n_input_plane, n_output_plane, kH, kW)) 41 | self.grad_bias = np.ndarray(n_output_plane) 42 | 43 | self.reset() 44 | 45 | def no_bias(self): 46 | self.bias = None 47 | self.grad_bias = None 48 | 49 | def reset(self, stdv=None): 50 | if not stdv: 51 | stdv = 1/np.sqrt(self.kW*self.kH*self.n_input_plane) 52 | self.weight = np.random.normal( 53 | 0, stdv, (self.n_output_plane, self.n_input_plane, self.kH, self.kW)) 54 | self.bias = np.zeros(self.n_output_plane) 55 | 56 | def check_input_dim(self, x): 57 | pass 58 | 59 | def make_contigous(self, input, grad_output): 60 | pass 61 | 62 | def calcula_adj(self, target_size, ker, pad, stride): 63 | return (target_size + 2 * pad - ker) % stride 64 | 65 | def update_output(self, x): 66 | 67 | w = self.weight 68 | F, FF, HH, WW = w.shape 69 | 70 | stride, pad = self.dW, self.padW 71 | N, in_C, inH, inW = x.shape 72 | C = self.n_output_plane 73 | W = (inW - 1) * self.dW - 2*self.padW + WW # x_shape 74 | H = (inH - 1) * self.dH - 2*self.padH + HH # x_shape 75 | _, _, in_h, in_w = x.shape 76 | #assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 77 | x_reshaped = x.transpose(1, 0, 2, 3).reshape(F, -1) 78 | out_cols = w.reshape(F, -1).T.dot(x_reshaped) 79 | # out_cols.shape = (C, HH, WW, N, in_h, in_w) 80 | self.output = col2im_cython(out_cols, N, C, H, W, HH, WW, pad, stride) 81 | self.output += self.bias.reshape(1, -1, 1, 1) 82 | if self.adjH: 83 | self.output = np.pad( 84 | self.output, ((0, 0), (0, 0), (0, self.adjH), (0, 0)), mode='constant') 85 | if self.adjW: 86 | self.output = np.pad( 87 | self.output, ((0, 0), (0, 0), (0, 0), (0, self.adjW)), mode='constant') 88 | return self.output 89 | 90 | 91 | 92 | # w, b = self.weight, self.bias 93 | # # input = make_contigous (input)N, C, H, W = x.shape 94 | # N, C, H, W = x.shape 95 | # outW = (W - 1) * self.dW - 2*self.padW + self.kW + self.adjW 96 | 97 | # F, FF, HH, WW = w.shape 98 | # stride, pad = self.dW, self.padW 99 | 100 | # p = pad 101 | # x = np.pad( 102 | # x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 103 | 104 | # self.tiles_w = (W + 2 * pad - WW) % stride 105 | # self.tiles_h = (H + 2 * pad - HH) % stride 106 | 107 | # stride, pad = self.dW, self.padW 108 | 109 | # out_w = (W - 1) * self.dW - 2*self.padW + WW; 110 | # out_h = (H - 1) * self.dH - 2*self.padH + HH; 111 | # _, _, out_h, out_w = x.shape 112 | # import pdb; pdb.set_trace() 113 | # x_reshaped = x.transpose(1, 0, 2, 3).reshape(F, -1) 114 | # out_cols = w.reshape(F, -1).T.dot(x_reshaped)# + b.reshape(-1, 1) 115 | # out_cols.shape = (self.n_output_plane, -1) 116 | # b_reshaped = b.reshape(self.n_output_plane, -1) 117 | # out_cols += b_reshaped 118 | # out_cols.shape = (self.n_output_plane, HH, WW, N, out_h, out_w) 119 | # #out_cols.shape = (C, HH, WW, N, out_h, out_w) 120 | # self.output = col2im_6d_cython(out_cols, N, self.n_output_plane, H, W, HH, WW, pad, stride) 121 | 122 | # if self.output.shape[1] != self.n_output: 123 | # import pdb; pdb.set_trace() 124 | 125 | def update_grad_input(self, input, grad_output, scale=1): 126 | raise NotImplementedError 127 | # TODO THIS IS BROKEN FIXME PLEASE :() 128 | w = self.bias 129 | F, _, HH, WW = w.shape 130 | stride = self.stride 131 | 132 | if not self.adjH == 0: 133 | grad_output = grad_output[:, :, :-self.adjH, :] 134 | if not self.adjW == 0: 135 | grad_output = grad_output[:, :, :, :-self.adjW] 136 | 137 | N, C, H, W = grad_output.shape 138 | 139 | # H += 2 * pad 140 | # W += 2 * pad 141 | out_h = (H - HH) / stride + 1 142 | out_w = (W - WW) / stride + 1 143 | 144 | # Perform an im2col operation by picking clever strides 145 | shape = (C, HH, WW, N, out_h, out_w) 146 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 147 | strides = grad_output.itemsize * np.array(strides) 148 | dout_stride = np.lib.stride_tricks.as_strided( 149 | grad_output, shape=shape, strides=strides) 150 | dout_cols = np.ascontiguousarray(dout_stride) 151 | dout_cols.shape = (C * HH * WW, N * out_h * out_w) 152 | 153 | # Now all our convolutions are a big matrix multiply 154 | res = w.reshape(F, -1).dot(dout_cols) 155 | 156 | res.shape = (F, N, out_h, out_w) 157 | out = res.transpose(1, 0, 2, 3) 158 | 159 | self.grad_input = np.ascontiguousarray(out) 160 | return self.grad_input 161 | 162 | def type(self, type, cache): 163 | pass 164 | 165 | def __str__(self): 166 | pass 167 | 168 | def clear_state(self): 169 | pass 170 | -------------------------------------------------------------------------------- /pyfunt/spatial_max_pooling.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | try: 4 | from im2col_cyt import im2col_cython, col2im_cython 5 | except ImportError: 6 | print('Installation broken, please reinstall PyFunt') 7 | 8 | 9 | class SpatialMaxPooling(Module): 10 | 11 | """docstring for SpatialMaxPooling""" 12 | 13 | def __init__(self, kW, kH, dW=1, dH=1, padW=0, padH=0): 14 | super(SpatialMaxPooling, self).__init__() 15 | self.kW = kW 16 | self.kH = kH 17 | self.dW = dW 18 | self.dH = dH 19 | self.padW = padW 20 | self.padH = padH 21 | self.ceil_mode = False 22 | self.count_include_pad = True 23 | self.divide = True 24 | 25 | def ceil(self): 26 | # TODO: 27 | self.ceil_mode = True 28 | 29 | def floor(self): 30 | # TODO: 31 | self.ceil_mode = False 32 | 33 | def set_count_include_pad(self): 34 | # TODO: 35 | self.count_include_pad = True 36 | 37 | def set_count_exclude_pad(self): 38 | # TODO: 39 | self.count_include_pad = False 40 | 41 | def update_output(self, x): 42 | N, C, H, W = x.shape 43 | pool_height, pool_width = self.kW, self.kH 44 | stride = self.dW 45 | 46 | assert (H - pool_height) % stride == 0, 'Invalid height' 47 | assert (W - pool_width) % stride == 0, 'Invalid width' 48 | 49 | out_height = (H - pool_height) / stride + 1 50 | out_width = (W - pool_width) / stride + 1 51 | 52 | x_split = x.reshape(N * C, 1, H, W) 53 | x_cols = im2col_cython( 54 | x_split, pool_height, pool_width, padding=0, stride=stride) 55 | x_cols_argmax = np.argmax(x_cols, axis=0) 56 | x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] 57 | out = x_cols_max.reshape( 58 | out_height, out_width, N, C).transpose(2, 3, 0, 1) 59 | 60 | self.x_shape = x.shape 61 | self.x_cols = x_cols 62 | self.x_cols_argmax = x_cols_argmax 63 | self.output = out 64 | return self.output 65 | 66 | def update_grad_input(self, x, grad_output, scale=1): 67 | x_cols = self.x_cols 68 | x_cols_argmax = self.x_cols_argmax 69 | dout = grad_output 70 | N, C, H, W = x.shape 71 | pool_height, pool_width = self.kW, self.kH 72 | stride = self.dW 73 | 74 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 75 | dx_cols = np.zeros_like(x_cols) 76 | dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped 77 | dx = col2im_cython(dx_cols, N * C, 1, H, W, pool_height, pool_width, 78 | padding=0, stride=stride) 79 | dx = dx.reshape(self.x_shape) 80 | self.grad_input = dx 81 | return self.grad_input 82 | 83 | def reset(self): 84 | pass 85 | 86 | def __str__(self): 87 | pass 88 | -------------------------------------------------------------------------------- /pyfunt/spatial_reflection_padding.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class SpatialReflectionPadding(Module): 6 | 7 | def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None): 8 | super(SpatialReflectionPadding, self).__init__() 9 | self.pad_l = pad_l 10 | self.pad_r = pad_r or self.pad_l 11 | self.pad_t = pad_t or self.pad_l 12 | self.pad_b = pad_b or self.pad_l 13 | 14 | def update_output(self, x): 15 | if x.ndim == 3: 16 | self.output = np.pad( 17 | x, ((0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'reflect') 18 | elif x.ndim == 4: 19 | self.output = np.pad( 20 | x, ((0, 0), (0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'reflect') 21 | 22 | else: 23 | raise Exception('input must be 3 or 4-dimensional') 24 | return self.output 25 | 26 | def update_grad_input(self, x, grad_output): 27 | if x.ndim == grad_output.ndim == 3: 28 | if not (x.shape[0] == grad_output.shape[0] and 29 | x.shape[1] + self.pad_t + self.pad_b == grad_output.shape[1] and 30 | x.shape[2] + self.pad_l + self.pad_r == grad_output.shape[2]): 31 | raise Exception('input and gradOutput must be compatible in size') 32 | self.grad_input = grad_output[:, self.pad_t:self.pad_b, self.pad_l:self.pad_r] 33 | elif x.ndim == grad_output.ndim == 4: 34 | if not (x.shape[0] == grad_output.shape[0] and 35 | x.shape[1] == grad_output.shape[1] and 36 | x.shape[2] + self.pad_t + self.pad_b == grad_output.shape[2] and 37 | x.shape[3] + self.pad_l + self.pad_r == grad_output.shape[3]): 38 | raise Exception('input and gradOutput must be compatible in size') 39 | self.grad_input = grad_output[:, :, self.pad_t:self.pad_b, self.pad_l:self.pad_r] 40 | else: 41 | raise Exception( 42 | 'input and gradOutput must be 3 or 4-dimensional and have equal number of dimensions') 43 | return self.grad_input 44 | 45 | def __str__(self): 46 | return str(type(self)) + '(l=%d, r=%d, t=%d, b=%d)' % (self.pad_l, self.pad_r, self.pad_t, self.pad_b) 47 | -------------------------------------------------------------------------------- /pyfunt/spatial_replication_padding.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class SpatialReplicationPadding(Module): 6 | 7 | def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None): 8 | super(SpatialReplicationPadding, self).__init__() 9 | self.pad_l = pad_l 10 | self.pad_r = pad_r or self.pad_l 11 | self.pad_t = pad_t or self.pad_l 12 | self.pad_b = pad_b or self.pad_l 13 | 14 | def update_output(self, x): 15 | if x.ndim == 3: 16 | self.output = np.pad( 17 | x, ((0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'edge') 18 | elif x.ndim == 4: 19 | self.output = np.pad( 20 | x, ((0, 0), (0, 0), (self.pad_t, self.pad_b), (self.pad_l, self.pad_r)), 'edge') 21 | 22 | else: 23 | raise Exception('input must be 3 or 4-dimensional') 24 | return self.output 25 | 26 | def update_grad_input(self, x, grad_output): 27 | if x.ndim == grad_output.ndim == 3: 28 | if not (x.shape[0] == grad_output.shape[0] and 29 | x.shape[1] + self.pad_t + self.pad_b == grad_output.shape[1] and 30 | x.shape[2] + self.pad_l + self.pad_r == grad_output.shape[2]): 31 | raise Exception('input and gradOutput must be compatible in size') 32 | self.grad_input = grad_output[:, self.pad_t:self.pad_b, self.pad_l:self.pad_r] 33 | elif x.ndim == grad_output.ndim == 4: 34 | if not (x.shape[0] == grad_output.shape[0] and 35 | x.shape[1] == grad_output.shape[1] and 36 | x.shape[2] + self.pad_t + self.pad_b == grad_output.shape[2] and 37 | x.shape[3] + self.pad_l + self.pad_r == grad_output.shape[3]): 38 | raise Exception('input and gradOutput must be compatible in size') 39 | self.grad_input = grad_output[:, :, self.pad_t:self.pad_b, self.pad_l:self.pad_r] 40 | else: 41 | raise Exception( 42 | 'input and gradOutput must be 3 or 4-dimensional and have equal number of dimensions') 43 | return self.grad_input 44 | 45 | def __str__(self): 46 | return str(type(self)) + '(l=%d, r=%d, t=%d, b=%d)' % (self.pad_l, self.pad_r, self.pad_t, self.pad_b) 47 | -------------------------------------------------------------------------------- /pyfunt/spatial_up_sampling_nearest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from module import Module 4 | import numpy as np 5 | try: 6 | from im2col_cyt import im2col_cython, col2im_cython 7 | except ImportError: 8 | print('Installation broken, please reinstall PyFunt') 9 | 10 | from numpy.lib.stride_tricks import as_strided 11 | 12 | 13 | def tile_array(a, b1, b2): 14 | r, c = a.shape 15 | rs, cs = a.strides 16 | x = as_strided(a, (r, b1, c, b2), (rs, 0, cs, 0)) 17 | return x.reshape(r*b1, c*b2) 18 | 19 | 20 | class SpatialUpSamplingNearest(Module): 21 | 22 | def __init__(self, scale): 23 | super(SpatialUpSamplingNearest, self).__init__() 24 | self.scale_factor = scale 25 | if self.scale_factor < 1: 26 | raise Exception('scale_factor must be greater than 1') 27 | if np.floor(self.scale_factor) != self.scale_factor: 28 | raise Exception('scale_factor must be integer') 29 | 30 | def update_output(self, x): 31 | out_size = x.shape 32 | out_size[x.ndim - 1] *= self.scale_factor 33 | out_size[x.ndim - 2] *= self.scale_factor 34 | N, C, H, W = out_size 35 | 36 | stride = self.scale_factor 37 | pool_height = pool_width = stride 38 | 39 | x_reshaped = x.transpose(2, 3, 0, 1).flatten() 40 | out_cols = np.zeros(out_size) 41 | out_cols[:, np.arange(out_cols.shape[1])] = x_reshaped 42 | out = col2im_cython(out_cols, N * C, 1, H, W, pool_height, pool_width, 43 | padding=0, stride=stride) 44 | out = out.reshape(out_size) 45 | return self.grad_input 46 | 47 | return self.output 48 | 49 | def update_grad_input(self, x, grad_output, scale=1): 50 | 51 | N, C, H, W = grad_output.shape 52 | pool_height = pool_width = self.scale_factor 53 | stride = self.scale_factor 54 | 55 | out_height = (H - pool_height) / stride + 1 56 | out_width = (W - pool_width) / stride + 1 57 | 58 | grad_output_split = grad_output.reshape(N * C, 1, H, W) 59 | grad_output_cols = im2col_cython( 60 | grad_output_split, pool_height, pool_width, padding=0, stride=stride) 61 | grad_intput_cols = grad_output_cols[0, np.arange(grad_output_cols.shape[1])] 62 | grad_input = grad_intput_cols.reshape( 63 | out_height, out_width, N, C).transpose(2, 3, 0, 1) 64 | 65 | self.output = grad_input 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /pyfunt/tanh.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Tanh(Module): 6 | 7 | def __init__(self, th=1e-6, v=0, ip=False): 8 | super(Tanh, self).__init__() 9 | self.th = th 10 | self.val = v 11 | self.inplace = ip 12 | 13 | def update_output(self, x): 14 | self.output = np.tanh(x) 15 | return self.output 16 | 17 | def update_grad_input(self, x, grad_output): 18 | self.grad_input = grad_output * (1 - np.power(self.output, 2)) 19 | return self.grad_input 20 | -------------------------------------------------------------------------------- /pyfunt/threshold.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | import numpy as np 3 | 4 | 5 | class Threshold(Module): 6 | 7 | def __init__(self, th=1e-6, v=0, ip=False): 8 | super(Threshold, self).__init__() 9 | self.th = th 10 | self.val = v 11 | self.inplace = ip 12 | 13 | def update_output(self, x): 14 | self.output = np.maximum(self.th, x) 15 | return self.output 16 | 17 | def update_grad_input(self, x, grad_output): 18 | dx = np.array(grad_output, copy=True) 19 | dx[x <= 0] = 0 20 | self.grad_input = dx 21 | return self.grad_input 22 | 23 | def validate_parameters(self): 24 | if self.inplace: 25 | if self.val > self.th: 26 | raise Exception('in-place processing requires value not exceed threshold') 27 | 28 | def reset(self): 29 | pass 30 | -------------------------------------------------------------------------------- /pyfunt/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from load_torch_model import (load_t7model, load_t7checkpoint, load_parser_init, load_parser_vals) 2 | from gradient_check import eval_numerical_gradient_array 3 | from . import * 4 | -------------------------------------------------------------------------------- /pyfunt/utils/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | 5 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 6 | ''' 7 | a naive implementation of numerical gradient of f at x 8 | - f should be a function that takes a single argument 9 | - x is the point (numpy array) to evaluate the gradient at 10 | ''' 11 | grad = np.zeros_like(x) 12 | # iterate over all indexes in x 13 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 14 | while not it.finished: 15 | 16 | # evaluate function at x+h 17 | ix = it.multi_index 18 | oldval = x[ix] 19 | x[ix] = oldval + h # increment by h 20 | fxph = f(x) # evalute f(x + h) 21 | x[ix] = oldval - h 22 | fxmh = f(x) # evaluate f(x - h) 23 | x[ix] = oldval # restore 24 | 25 | # compute the partial derivative with centered formula 26 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 27 | if verbose: 28 | print(x), grad[ix] 29 | it.iternext() # step to next dimension 30 | 31 | return grad 32 | 33 | 34 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 35 | ''' 36 | Evaluate a numeric gradient for a function that accepts a numpy 37 | array and returns a numpy array. 38 | ''' 39 | grad = np.zeros_like(x) 40 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 41 | while not it.finished: 42 | ix = it.multi_index 43 | 44 | oldval = x[ix] 45 | x[ix] = oldval + h 46 | pos = f(x).copy() 47 | x[ix] = oldval - h 48 | neg = f(x).copy() 49 | x[ix] = oldval 50 | 51 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 52 | it.iternext() 53 | return grad 54 | 55 | 56 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 57 | ''' 58 | Compute numeric gradients for a function that operates on input 59 | and output blobs. 60 | 61 | We assume that f accepts several input blobs as arguments, followed by a blob 62 | into which outputs will be written. For example, f might be called like this: 63 | 64 | f(x, w, out) 65 | 66 | where x and w are input Blobs, and the result of f will be written to out. 67 | 68 | Inputs: 69 | - f: function 70 | - inputs: tuple of input blobs 71 | - output: output blob 72 | - h: step size 73 | ''' 74 | numeric_diffs = [] 75 | for input_blob in inputs: 76 | diff = np.zeros_like(input_blob.diffs) 77 | it = np.nditer(input_blob.vals, flags=['multi_index'], 78 | op_flags=['readwrite']) 79 | while not it.finished: 80 | idx = it.multi_index 81 | orig = input_blob.vals[idx] 82 | 83 | input_blob.vals[idx] = orig + h 84 | f(*(inputs + (output,))) 85 | pos = np.copy(output.vals) 86 | input_blob.vals[idx] = orig - h 87 | f(*(inputs + (output,))) 88 | neg = np.copy(output.vals) 89 | input_blob.vals[idx] = orig 90 | 91 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 92 | 93 | it.iternext() 94 | numeric_diffs.append(diff) 95 | return numeric_diffs 96 | 97 | 98 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 99 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 100 | inputs, output, h=h) 101 | 102 | 103 | def grad_check_sparse(f, x, analytic_grad, num_checks): 104 | ''' 105 | sample a few random elements and only return numerical 106 | in this dimensions. 107 | ''' 108 | h = 1e-5 109 | 110 | x.shape 111 | for i in xrange(num_checks): 112 | ix = tuple([randrange(m) for m in x.shape]) 113 | 114 | oldval = x[ix] 115 | x[ix] = oldval + h # increment by h 116 | fxph = f(x) # evaluate f(x + h) 117 | x[ix] = oldval - h # increment by h 118 | fxmh = f(x) # evaluate f(x - h) 119 | x[ix] = oldval # reset 120 | 121 | grad_numerical = (fxph - fxmh) / (2 * h) 122 | grad_analytic = analytic_grad[ix] 123 | rel_error = abs(grad_numerical - grad_analytic) / \ 124 | (abs(grad_numerical) + abs(grad_analytic)) 125 | print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) 126 | -------------------------------------------------------------------------------- /pyfunt/utils/load_torch_model.py: -------------------------------------------------------------------------------- 1 | import torchfile 2 | import pyfunt 3 | import pdb 4 | import re 5 | 6 | please_contribute = 'If you want you can fix it and make a pull request ;)' 7 | 8 | 9 | ''' 10 | _init (module) takes a dict for the torch layer and returns a tuple 11 | containing the values for the pyfunt layer initialization funciton. 12 | Once you wrote the function, add the reation in the load_parser_init dict. 13 | The same mechanism goes for the layer values using load_parser_vals dictt 14 | (gard input, output, weight, bias already get added). 15 | ''' 16 | 17 | 18 | def conv_init(m): 19 | return m['nInputPlane'], m['nOutputPlane'], m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH'] 20 | 21 | 22 | def dropout_init(m): 23 | return m['p'], not m['v2'] 24 | 25 | 26 | def linear_init(m): 27 | return m['weight'].shape[1], m['weight'].shape[0], len(m['bias']) != 0 28 | 29 | 30 | def mul_constant_init(m): 31 | return (m['constant_scalar'],) 32 | 33 | 34 | def relu_init(m): 35 | return (m['inplace'],) 36 | 37 | 38 | def spatial_max_pooling_init(m): 39 | return m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH'] 40 | 41 | 42 | def spatial_batch_normalization_init(m): 43 | return len(m['running_mean']), m['eps'], m['momentum'], len(m['weight']) > 0 44 | 45 | 46 | def spatial_average_pooling_init(m): 47 | return m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH'] 48 | 49 | 50 | def spatial_full_convolution_init(m): 51 | return m['nInputPlane'], m['nOutputPlane'], m['kW'], m['kH'], m['dW'], m['dH'], m['padW'], m['padH'], m['adjW'], m['adjH'] 52 | 53 | 54 | def spatial_padding_init(m): 55 | return m['pad_l'], m['pad_r'], m['pad_t'], m['pad_b'] 56 | 57 | 58 | def view_init(m): 59 | return (m['size'],) 60 | 61 | 62 | load_parser_init = { 63 | 'Dropout': dropout_init, 64 | 'Linear': linear_init, 65 | 'MulConstant': mul_constant_init, 66 | 'ReLU': relu_init, 67 | 'SpatialConvolution': conv_init, 68 | 'SpatialMaxPooling': spatial_max_pooling_init, 69 | 'SpatialAvergaePooling': spatial_average_pooling_init, 70 | 'SpatialBatchNormalization': spatial_batch_normalization_init, 71 | 'SpatialFullConvolution': spatial_full_convolution_init, 72 | 'SpatialReflectionPadding': spatial_padding_init, 73 | 'SpatialReplicationPadding': spatial_padding_init, 74 | 'View': view_init 75 | } 76 | 77 | 78 | # def add_possible_values(module, tmodule): 79 | # print(len(dir(tmodule))) 80 | # for k in dir(tmodule): 81 | # if any(x.isupper() for x in k): 82 | # ourk = re.sub('([A-Z]+)', r'_\1', k).lower() 83 | # add_value(module, tmodule, ourk, k) 84 | # else: 85 | # add_value(module, tmodule, k) 86 | 87 | 88 | def dropout_vals(module, tmodule): 89 | add_value(module, tmodule, 'noise') 90 | 91 | 92 | def spatial_batch_normalization_vals(module, tmodule): 93 | add_value(module, tmodule, 'running_mean') 94 | add_value(module, tmodule, 'running_var') 95 | 96 | 97 | load_parser_vals = { 98 | 'Droput': dropout_vals, 99 | 'SpatialBatchNormalization': spatial_batch_normalization_vals 100 | } 101 | 102 | 103 | def load_t7model(path=None, obj=None, model=None, custom_layers=None): 104 | if not (path is None or obj is None): 105 | raise Exception('you must pass a path or a TorchObject') 106 | if path: 107 | o = torchfile.load(path) 108 | else: 109 | o = obj 110 | 111 | # import pdb; pdb.set_trace() 112 | if type(o) is torchfile.TorchObject: 113 | class_name = o._typename.split('.')[-1] 114 | tmodule = o._obj 115 | 116 | if not hasattr(pyfunt, class_name): 117 | print('class %s not found' % class_name) 118 | print(please_contribute) 119 | raise NotImplementedError 120 | 121 | Module = getattr(pyfunt, class_name) 122 | if not is_container(Module): 123 | raise Exception('model is a torchobj but not a container') 124 | model = Module() 125 | add_inout(model, tmodule) 126 | 127 | m = load_t7model(obj=tmodule, model=model, custom_layers=custom_layers) 128 | if not model: 129 | model = m 130 | else: 131 | 132 | for i, tmodule in enumerate(o.modules): 133 | if type(tmodule) is torchfile.TorchObject: 134 | class_name = tmodule._typename.split('.')[-1] 135 | tmodule_o = tmodule._obj 136 | 137 | if hasattr(pyfunt, class_name): 138 | Module = getattr(pyfunt, class_name) 139 | elif custom_layers and hasattr(custom_layers, class_name): 140 | Module = getattr(custom_layers, class_name) 141 | else: 142 | print('class %s not found' % class_name) 143 | print(please_contribute) 144 | raise NotImplementedError 145 | 146 | if i == 0 and model is None: 147 | if not is_container(Module): 148 | model = pyfunt.Sequential() 149 | # else: 150 | # model = Module() 151 | # model = load_t7model(obj=tmodule, model=model) 152 | # else: 153 | if is_container(Module): 154 | model.add( 155 | load_t7model(obj=tmodule, model=model, custom_layers=custom_layers)) 156 | else: 157 | if class_name in load_parser_init: 158 | args = load_parser_init[class_name](tmodule_o) 159 | module = Module(*args) 160 | else: 161 | try: 162 | module = Module() 163 | except: 164 | print('parser for %s not found' % class_name) 165 | print('%s cannot be initialized with no args' % 166 | class_name) 167 | print(please_contribute) 168 | raise NotImplementedError 169 | 170 | #add_possible_values(module, tmodule) 171 | add_inout(module, tmodule_o) 172 | add_w(module, tmodule_o) 173 | if class_name in load_parser_vals: 174 | load_parser_vals[class_name](module, tmodule_o) 175 | model.add(module) 176 | else: 177 | print('oops!') 178 | print(please_contribute) 179 | pdb.set_trace() 180 | raise NotImplementedError 181 | return model 182 | 183 | 184 | def is_container(tm): 185 | return pyfunt.container.Container in tm.__bases__ 186 | 187 | 188 | def add_value(module, tmodule, pname, tpname=None): 189 | tpname = tpname or pname 190 | if hasattr(module, pname): 191 | if tpname in tmodule: 192 | setattr(module, pname, tmodule[tpname]) 193 | 194 | 195 | def add_inout(module, tmodule): 196 | add_value(module, tmodule, 'output') 197 | add_value(module, tmodule, 'grad_input', 'gradInput') 198 | 199 | 200 | def add_w(module, tmodule): 201 | add_value(module, tmodule, 'weight') 202 | add_value(module, tmodule, 'bias') 203 | add_value(module, tmodule, 'grad_weight', 'gradWeight') 204 | add_value(module, tmodule, 'grad_bias', 'gradBias') 205 | 206 | 207 | def load_t7checkpoint(path, models_keys=['model'], custom_layers=None): 208 | # model_keys iterable that contains for example the word 'model' 209 | # the model to load in pyfunt 210 | cp = torchfile.load(path) 211 | for model in models_keys: 212 | cp[model] = load_t7model(obj=cp[model], custom_layers=custom_layers) 213 | return cp 214 | -------------------------------------------------------------------------------- /pyfunt/utils/vis_utils.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, ceil 2 | import numpy as np 3 | 4 | 5 | def visualize_grid(Xs, ubound=255.0, padding=1, grid_size=None): 6 | ''' 7 | Reshape a 4D tensor of image data to a grid for easy visualization. 8 | 9 | Inputs: 10 | - Xs: Data of shape (N, H, W, C) 11 | - ubound: Output grid will have values scaled to the range [0, ubound] 12 | - padding: The number of blank pixels between elements of the grid 13 | ''' 14 | (N, H, W, C) = Xs.shape 15 | 16 | if grid_size is None: 17 | grid_size_y = grid_size_x = int(ceil(sqrt(N))) 18 | elif type(grid_size) == tuple: 19 | grid_size_x = grid_size[0] 20 | grid_size_y = grid_size[1] 21 | else: 22 | grid_size_y = grid_size_x = grid_size 23 | 24 | grid_height = H * grid_size_y + padding * (grid_size_y - 1) 25 | grid_width = W * grid_size_x + padding * (grid_size_x - 1) 26 | grid = np.zeros((grid_height, grid_width, C)) 27 | next_idx = 0 28 | y0, y1 = 0, H 29 | for y in xrange(grid_size_y): 30 | x0, x1 = 0, W 31 | for x in xrange(grid_size_x): 32 | if next_idx < N: 33 | img = Xs[next_idx] 34 | low, high = np.min(img), np.max(img) 35 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 36 | # grid[y0:y1, x0:x1] = Xs[next_idx] 37 | next_idx += 1 38 | x0 += W + padding 39 | x1 += W + padding 40 | y0 += H + padding 41 | y1 += H + padding 42 | # grid_max = np.max(grid) 43 | # grid_min = np.min(grid) 44 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 45 | return grid 46 | 47 | 48 | def vis_grid(Xs): 49 | ''' visualize a grid of images ''' 50 | (N, H, W, C) = Xs.shape 51 | A = int(ceil(sqrt(N))) 52 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 53 | G *= np.min(Xs) 54 | n = 0 55 | for y in range(A): 56 | for x in range(A): 57 | if n < N: 58 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n, :, :, :] 59 | n += 1 60 | # normalize to [0,1] 61 | maxg = G.max() 62 | ming = G.min() 63 | G = (G - ming)/(maxg-ming) 64 | return G 65 | 66 | 67 | def vis_nn(rows): 68 | ''' visualize array of arrays of images ''' 69 | N = len(rows) 70 | D = len(rows[0]) 71 | H, W, C = rows[0][0].shape 72 | Xs = rows[0][0] 73 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 74 | for y in range(N): 75 | for x in range(D): 76 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 77 | # normalize to [0,1] 78 | maxg = G.max() 79 | ming = G.min() 80 | G = (G - ming)/(maxg-ming) 81 | return G 82 | -------------------------------------------------------------------------------- /pyfunt/view.py: -------------------------------------------------------------------------------- 1 | from module import Module 2 | 3 | 4 | class View(Module): 5 | 6 | def __init__(self, shape): 7 | super(View, self).__init__() 8 | if type(shape) is not tuple: 9 | shape = (shape,) 10 | self.shape = shape 11 | 12 | def update_output(self, x): 13 | self.output = x.view().reshape((x.shape[0],) + self.shape) 14 | return self.output 15 | 16 | def update_grad_input(self, x, grad_output): 17 | self.grad_input = grad_output.view().reshape(x.shape) 18 | return self.grad_input 19 | 20 | def reset(self): 21 | pass 22 | 23 | 24 | # class View(Module): 25 | 26 | # def __init__(self, args): 27 | # super(View, self).__init__() 28 | # self.reset_size(args) 29 | # self.num_input.ndim = None 30 | 31 | # def reset_size(self, args): 32 | # if len(args) == 1 and type(args[0]) == 'float64': 33 | # self.size = args[0] 34 | # else: 35 | # self.size = None 36 | # self.num_elements = 1 37 | # inferdim = False 38 | # for i in xrange(self.size): 39 | # szi = self.size[i] 40 | # if szi >= 0: 41 | # self.num_elements *= self.size[i] 42 | # else: 43 | # if szi != -1: 44 | # raise Exception('size should be positive or -1') 45 | # if inferdim: 46 | # raise Exception('only one dimension can be at -1') 47 | # inferdim = True 48 | 49 | # def update_output(self, x): 50 | # self.output = self.output or np.zeros_like(x) 51 | # batch_size = None 52 | # if batch_size: 53 | # self.output = x.view(batch_size, *self.size) 54 | # else: 55 | # self.output = x.view(self.size) 56 | # return self.output 57 | 58 | # def update_grad_input(self, x, grad_output): 59 | # self.grad_input = self.grad_input or np.zeros_like(grad_output) 60 | # self.grad_input = grad_output.view(x.size) 61 | # return self.grad_input 62 | 63 | # def __str__(self): 64 | # return '%s(%s)' % (type(self), self.size) 65 | 66 | # def reset(self): 67 | # pass 68 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.11.0 2 | tqdm == 3.8.0 3 | cython >= 0.24.1 4 | torchfile -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import subprocess 7 | 8 | ''' 9 | Original Source: https://github.com/scipy/scipy/blob/master/setup.py 10 | ''' 11 | 12 | if sys.version_info[:2] < (2, 6) or (3, 0) <= sys.version_info[0:2] < (3, 2): 13 | raise RuntimeError("Python version 2.6, 2.7 (TODO: >= 3.2) required.") 14 | 15 | if sys.version_info[0] < 3: 16 | import __builtin__ as builtins 17 | else: 18 | import builtins 19 | 20 | MAJOR = 0 21 | MINOR = 1 22 | MICRO = 0 23 | ISRELEASED = False 24 | 25 | VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) 26 | 27 | with open('./requirements.txt') as f: 28 | required = f.read().splitlines() 29 | 30 | # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly 31 | # update it when the contents of directories change. 32 | if os.path.exists('MANIFEST'): 33 | os.remove('MANIFEST') 34 | 35 | 36 | # Return the git revision as a string 37 | def git_version(): 38 | def _minimal_ext_cmd(cmd): 39 | # construct minimal environment 40 | env = {} 41 | for k in ['SYSTEMROOT', 'PATH']: 42 | v = os.environ.get(k) 43 | if v is not None: 44 | env[k] = v 45 | # LANGUAGE is used on win32 46 | env['LANGUAGE'] = 'C' 47 | env['LANG'] = 'C' 48 | env['LC_ALL'] = 'C' 49 | out = subprocess.Popen( 50 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 51 | return out 52 | 53 | try: 54 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 55 | GIT_REVISION = out.strip().decode('ascii') 56 | except OSError: 57 | GIT_REVISION = "Unknown" 58 | 59 | return GIT_REVISION 60 | 61 | 62 | # This is a bit hackish: we are setting a global variable so that the main 63 | # pyfunt __init__ can detect if it is being loaded by the setup routine, to 64 | # avoid attempting to load components that aren't built yet. While ugly, it's 65 | # a lot more robust than what was previously being used. 66 | builtins.__PUFUNT_SETUP__ = True 67 | 68 | 69 | def get_version_info(): 70 | # Adding the git rev number needs to be done inside 71 | # write_version_py(), otherwise the import of pyfunt.version messes 72 | # up the build under Python 3. 73 | FULLVERSION = VERSION 74 | if os.path.exists('.git'): 75 | GIT_REVISION = git_version() 76 | elif os.path.exists('pyfunt/version.py'): 77 | # must be a source distribution, use existing version file 78 | # load it as a separate module to not load pyfunt/__init__.py 79 | import imp 80 | version = imp.load_source('pyfunt.version', 'pyfunt/version.py') 81 | GIT_REVISION = version.git_revision 82 | else: 83 | GIT_REVISION = "Unknown" 84 | 85 | if not ISRELEASED: 86 | FULLVERSION += '.dev0+' + GIT_REVISION[:7] 87 | 88 | return FULLVERSION, GIT_REVISION 89 | 90 | 91 | def write_version_py(filename='pyfunt/version.py'): 92 | cnt = """\ 93 | # THIS FILE IS GENERATED FROM PYFUNT SETUP.PY\ 94 | short_version = '%(version)s'\ 95 | version = '%(version)s'\ 96 | full_version = '%(full_version)s'\ 97 | git_revision = '%(git_revision)s'\ 98 | release = %(isrelease)s\ 99 | if not release:\ 100 | version = full_version\ 101 | """ 102 | FULLVERSION, GIT_REVISION = get_version_info() 103 | 104 | a = open(filename, 'w') 105 | try: 106 | a.write(cnt % {'version': VERSION, 107 | 'full_version': FULLVERSION, 108 | 'git_revision': GIT_REVISION, 109 | 'isrelease': str(ISRELEASED)}) 110 | finally: 111 | a.close() 112 | 113 | 114 | def generate_cython(): 115 | cwd = os.path.abspath(os.path.dirname(__file__)) 116 | print("Cythonizing sources") 117 | p = subprocess.call([sys.executable, 118 | os.path.join(cwd, 'tools', 'cythonize.py'), 119 | 'pyfunt'], 120 | cwd=cwd) 121 | if p != 0: 122 | raise RuntimeError("Running cythonize failed!") 123 | 124 | 125 | def configuration(parent_package='', top_path=None): 126 | from numpy.distutils.misc_util import Configuration 127 | config = Configuration(None, parent_package, top_path) 128 | config.set_options(ignore_setup_xxx_py=True, 129 | assume_default_configuration=True, 130 | delegate_options_to_subpackages=True, 131 | quiet=True) 132 | 133 | config.add_subpackage('pyfunt') 134 | config.add_data_files(('pyfunt', '*.txt')) 135 | 136 | config.get_version('pyfunt/version.py') 137 | 138 | return config 139 | 140 | 141 | def setup_package(): 142 | 143 | # Rewrite the version file every time 144 | 145 | write_version_py() 146 | cmdclass = {} 147 | 148 | # Figure out whether to add ``*_requires = ['numpy']``. 149 | # We don't want to do that unconditionally, because we risk updating 150 | # an installed numpy which fails too often. Just if it's not installed, we 151 | # may give it a try. See gh-3379. 152 | build_requires = [] 153 | try: 154 | import numpy 155 | if (len(sys.argv) >= 2 and sys.argv[1] == 'bdist_wheel' and 156 | sys.platform == 'darwin'): 157 | # We're ony building wheels for platforms where we know there's 158 | # also a Numpy wheel, so do this unconditionally. See gh-5184. 159 | build_requires = ['numpy>=1.7.1'] 160 | except: 161 | build_requires = ['numpy>=1.7.1'] 162 | 163 | metadata = dict( 164 | name="pyfunt", 165 | author="Daniele Ettore Ciriello", 166 | author_email="ciriello.daniele@gmail.com", 167 | version="1.1.0", 168 | license="MIT", 169 | url="https://github.com/dnlcrl/PyFunt", 170 | download_url="https://github.com/dnlcrl/PyFunt", 171 | description="Pythonic Deep Learning Framework", 172 | packages=['pyfunt', 'pyfunt/examples', 'pyfunt/utils', 'pyfunt/examples/residual_networks', ], 173 | cmdclass=cmdclass, # {'build_ext': build_ext}, 174 | platforms=["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"], 175 | setup_requires=build_requires, 176 | install_requires=required, 177 | # ext_modules=extensions, 178 | keywords='pyfunt deep learning artificial neural network convolution', 179 | ) 180 | 181 | if len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or 182 | sys.argv[1] in ('--help-commands', 'egg_info', '--version', 183 | 'clean')): 184 | # For these actions, NumPy is not required. 185 | # 186 | # They are required to succeed without Numpy for example when 187 | # pip is used to install Scipy when Numpy is not yet present in 188 | # the system. 189 | try: 190 | from setuptools import setup 191 | except ImportError: 192 | from distutils.core import setup 193 | else: 194 | if (len(sys.argv) >= 2 and sys.argv[1] in ('bdist_wheel', 'bdist_egg')) or ( 195 | 'develop' in sys.argv): 196 | # bdist_wheel/bdist_egg needs setuptools 197 | import setuptools 198 | 199 | from numpy.distutils.core import setup 200 | 201 | cwd = os.path.abspath(os.path.dirname(__file__)) 202 | if not os.path.exists(os.path.join(cwd, 'PKG-INFO')): 203 | # Generate Cython sources, unless building from source release 204 | generate_cython() 205 | 206 | metadata['configuration'] = configuration 207 | 208 | print 'setup complete' 209 | setup(**metadata) 210 | 211 | if __name__ == '__main__': 212 | setup_package() 213 | -------------------------------------------------------------------------------- /tools/cythonize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ cythonize 3 | 4 | SOURCE: https://github.com/scipy/scipy/blob/master/setup.py 5 | 6 | Cythonize pyx files into C files as needed. 7 | 8 | Usage: cythonize [root_dir] 9 | 10 | Default [root_dir] is 'pyfunt'. 11 | 12 | Checks pyx files to see if they have been changed relative to their 13 | corresponding C files. If they have, then runs cython on these files to 14 | recreate the C files. 15 | 16 | The script thinks that the pyx files have changed relative to the C files 17 | by comparing hashes stored in a database file. 18 | 19 | Simple script to invoke Cython (and Tempita) on all .pyx (.pyx.in) 20 | files; while waiting for a proper build system. Uses file hashes to 21 | figure out if rebuild is needed. 22 | 23 | For now, this script should be run by developers when changing Cython files 24 | only, and the resulting C files checked in, so that end-users (and Python-only 25 | developers) do not get the Cython/Tempita dependencies. 26 | 27 | Originally written by Dag Sverre Seljebotn, and copied here from: 28 | 29 | https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py 30 | 31 | Note: this script does not check any of the dependent C libraries; it only 32 | operates on the Cython .pyx files. 33 | """ 34 | 35 | from __future__ import division, print_function, absolute_import 36 | 37 | import os 38 | import re 39 | import sys 40 | import hashlib 41 | import subprocess 42 | 43 | HASH_FILE = 'cythonize.dat' 44 | DEFAULT_ROOT = 'pyfunt' 45 | 46 | # WindowsError is not defined on unix systems 47 | try: 48 | WindowsError 49 | except NameError: 50 | WindowsError = None 51 | 52 | # 53 | # Rules 54 | # 55 | 56 | 57 | def process_pyx(fromfile, tofile): 58 | try: 59 | from Cython.Compiler.Version import version as cython_version 60 | from distutils.version import LooseVersion 61 | if LooseVersion(cython_version) < LooseVersion('0.22'): 62 | raise Exception('Building PyFunt requires Cython >= 0.22') 63 | 64 | except ImportError: 65 | pass 66 | 67 | flags = ['--fast-fail'] 68 | if tofile.endswith('.cxx'): 69 | flags += ['--cplus'] 70 | 71 | try: 72 | try: 73 | # if fromfile == 'im2col_cython.pyx': 74 | # print('compiling im2col_cython') 75 | # r = subprocess.call( 76 | # ['python', 'pyfunt/layers/setup.py', 'build_ext', '--inplace']) 77 | # else: 78 | r = subprocess.call( 79 | ['cython'] + flags + ["-o", tofile, fromfile]) 80 | if r != 0: 81 | raise Exception('Cython failed') 82 | 83 | except OSError: 84 | # There are ways of installing Cython that don't result in a cython 85 | # executable on the path, see gh-2397. 86 | r = subprocess.call([sys.executable, '-c', 87 | 'import sys; from Cython.Compiler.Main import ' 88 | 'setuptools_main as main; sys.exit(main())'] + flags + 89 | ["-o", tofile, fromfile]) 90 | if r != 0: 91 | raise Exception("Cython either isn't installed or it failed.") 92 | except OSError: 93 | raise OSError('Cython needs to be installed') 94 | 95 | 96 | def process_tempita_pyx(fromfile, tofile): 97 | try: 98 | try: 99 | from Cython import Tempita as tempita 100 | except ImportError: 101 | import tempita 102 | except ImportError: 103 | raise Exception('Building PyFunt requires Tempita: ' 104 | 'pip install --user Tempita') 105 | from_filename = tempita.Template.from_filename 106 | template = from_filename(fromfile, encoding=sys.getdefaultencoding()) 107 | pyxcontent = template.substitute() 108 | assert fromfile.endswith('.pyx.in') 109 | pyxfile = fromfile[:-len('.pyx.in')] + '.pyx' 110 | with open(pyxfile, "w") as f: 111 | f.write(pyxcontent) 112 | process_pyx(pyxfile, tofile) 113 | 114 | rules = { 115 | # fromext : function 116 | '.pyx': process_pyx, 117 | '.pyx.in': process_tempita_pyx 118 | } 119 | # 120 | # Hash db 121 | # 122 | 123 | 124 | def load_hashes(filename): 125 | # Return { filename : (sha1 of input, sha1 of output) } 126 | if os.path.isfile(filename): 127 | hashes = {} 128 | with open(filename, 'r') as f: 129 | for line in f: 130 | filename, inhash, outhash = line.split() 131 | hashes[filename] = (inhash, outhash) 132 | else: 133 | hashes = {} 134 | return hashes 135 | 136 | 137 | def save_hashes(hash_db, filename): 138 | with open(filename, 'w') as f: 139 | for key, value in sorted(hash_db.items()): 140 | f.write("%s %s %s\n" % (key, value[0], value[1])) 141 | 142 | 143 | def sha1_of_file(filename): 144 | h = hashlib.sha1() 145 | with open(filename, "rb") as f: 146 | h.update(f.read()) 147 | return h.hexdigest() 148 | 149 | # 150 | # Main program 151 | # 152 | 153 | 154 | def normpath(path): 155 | path = path.replace(os.sep, '/') 156 | if path.startswith('./'): 157 | path = path[2:] 158 | return path 159 | 160 | 161 | def get_hash(frompath, topath): 162 | from_hash = sha1_of_file(frompath) 163 | to_hash = sha1_of_file(topath) if os.path.exists(topath) else None 164 | return (from_hash, to_hash) 165 | 166 | 167 | def process(path, fromfile, tofile, processor_function, hash_db): 168 | fullfrompath = os.path.join(path, fromfile) 169 | fulltopath = os.path.join(path, tofile) 170 | current_hash = get_hash(fullfrompath, fulltopath) 171 | if current_hash == hash_db.get(normpath(fullfrompath), None): 172 | print('%s has not changed' % fullfrompath) 173 | return 174 | 175 | orig_cwd = os.getcwd() 176 | try: 177 | os.chdir(path) 178 | print('Processing %s to %s' % (fullfrompath, fulltopath)) 179 | processor_function(fromfile, tofile) 180 | finally: 181 | os.chdir(orig_cwd) 182 | # changed target file, recompute hash 183 | current_hash = get_hash(fullfrompath, fulltopath) 184 | # store hash in db 185 | hash_db[normpath(fullfrompath)] = current_hash 186 | 187 | 188 | def find_process_files(root_dir): 189 | hash_db = load_hashes(HASH_FILE) 190 | for cur_dir, dirs, files in os.walk(root_dir): 191 | for filename in files: 192 | in_file = os.path.join(cur_dir, filename + ".in") 193 | if filename.endswith('.pyx') and os.path.isfile(in_file): 194 | continue 195 | for fromext, function in rules.items(): 196 | if filename.endswith(fromext): 197 | toext = ".c" 198 | with open(os.path.join(cur_dir, filename), 'rb') as f: 199 | data = f.read() 200 | m = re.search( 201 | br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I | re.M) 202 | if m: 203 | toext = ".cxx" 204 | fromfile = filename 205 | tofile = filename[:-len(fromext)] + toext 206 | process(cur_dir, fromfile, tofile, function, hash_db) 207 | save_hashes(hash_db, HASH_FILE) 208 | 209 | 210 | def main(): 211 | try: 212 | root_dir = sys.argv[1] 213 | except IndexError: 214 | root_dir = DEFAULT_ROOT 215 | find_process_files(root_dir) 216 | 217 | 218 | if __name__ == '__main__': 219 | main() 220 | --------------------------------------------------------------------------------