├── ABOUT ├── Makefile ├── README.md ├── build.sh ├── common-gcc-cuda-4.0.mk ├── convdata.py ├── convnet.py ├── data.py ├── example-layers ├── layer-params-18pct.cfg ├── layer-params-19pct.cfg ├── layer-params-80sec.cfg ├── layer-params-conv-local-11pct.cfg ├── layer-params-conv-local-13pct.cfg ├── layer-params-example.cfg ├── layer-params.gc.cfg ├── layers-18pct.cfg ├── layers-19pct.cfg ├── layers-80sec.cfg ├── layers-conv-local-11pct.cfg ├── layers-conv-local-13pct.cfg ├── layers-example.cfg └── layers.gc.cfg ├── gpumodel.py ├── include ├── common │ ├── matrix.h │ ├── matrix_funcs.h │ ├── queue.h │ └── thread.h ├── convnet.cuh ├── cost.cuh ├── cudaconv2 │ ├── conv_util.cuh │ └── cudaconv2.cuh ├── data.cuh ├── layer.cuh ├── layer_kernels.cuh ├── neuron.cuh ├── nvmatrix │ ├── nvmatrix.cuh │ ├── nvmatrix_kernels.cuh │ └── nvmatrix_operators.cuh ├── pyconvnet.cuh ├── util.cuh ├── weights.cuh └── worker.cuh ├── layer.py ├── options.py ├── ordereddict.py ├── shownet.py ├── src ├── common │ └── matrix.cpp ├── convnet.cu ├── cost.cu ├── cudaconv2 │ ├── conv_util.cu │ ├── filter_acts.cu │ ├── img_acts.cu │ └── weight_acts.cu ├── data.cu ├── layer.cu ├── layer_kernels.cu ├── neuron.cu ├── nvmatrix │ ├── nvmatrix.cu │ └── nvmatrix_kernels.cu ├── pyconvnet.cu ├── util.cu ├── weights.cu └── worker.cu └── util.py /ABOUT: -------------------------------------------------------------------------------- 1 | cuda-convnet 2 | High-performance C++/CUDA implementation of abstract convolutional neural networks 3 | 4 | See http://code.google.com/p/cuda-convnet/ for documentation. 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MODELNAME := _ConvNet 2 | 3 | INCLUDES := -I$(PYTHON_INCLUDE_PATH) -I$(NUMPY_INCLUDE_PATH) -I./include -I./include/common -I./include/cudaconv2 -I./include/nvmatrix 4 | LIB := -lpthread -L$(ATLAS_LIB_PATH) -L$(CUDA_INSTALL_PATH)/lib64 -lcblas 5 | 6 | USECUBLAS := 1 7 | 8 | PYTHON_VERSION=$(shell python -V 2>&1 | cut -d ' ' -f 2 | cut -d '.' -f 1,2) 9 | LIB += -lpython$(PYTHON_VERSION) 10 | 11 | GENCODE_ARCH := -gencode=arch=compute_20,code=\"sm_20,compute_20\" 12 | COMMONFLAGS := -DNUMPY_INTERFACE -DMODELNAME=$(MODELNAME) -DINITNAME=init$(MODELNAME) 13 | 14 | EXECUTABLE := $(MODELNAME).so 15 | 16 | CUFILES := $(shell echo src/*.cu src/cudaconv2/*.cu src/nvmatrix/*.cu) 17 | CU_DEPS := $(shell echo include/*.cuh include/cudaconv2/*.cuh include/nvmatrix/*.cuh) 18 | CCFILES := $(shell echo src/common/*.cpp) 19 | C_DEPS := $(shell echo include/common/*.h) 20 | 21 | include common-gcc-cuda-4.0.mk 22 | 23 | makedirectories: 24 | $(VERBOSE)mkdir -p $(LIBDIR) 25 | $(VERBOSE)mkdir -p $(OBJDIR)/src/cudaconv2 26 | $(VERBOSE)mkdir -p $(OBJDIR)/src/nvmatrix 27 | $(VERBOSE)mkdir -p $(OBJDIR)/src/common 28 | $(VERBOSE)mkdir -p $(TARGETDIR) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cuda-convnet 2 | Alex Krizhevsky's original code from Google Code. Required for [yanglab-convnet](https://github.com/ulrichstern/yanglab-convnet). 3 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Fill in these environment variables. 4 | # I have tested this code with CUDA 4.0, 4.1, and 4.2. 5 | # Only use Fermi-generation cards. Older cards won't work. 6 | 7 | # If you're not sure what these paths should be, 8 | # you can use the find command to try to locate them. 9 | # For example, NUMPY_INCLUDE_PATH contains the file 10 | # arrayobject.h. So you can search for it like this: 11 | # 12 | # find /usr -name arrayobject.h 13 | # 14 | # (it'll almost certainly be under /usr) 15 | 16 | # CUDA toolkit installation directory. 17 | export CUDA_INSTALL_PATH=/usr/local/cuda 18 | 19 | # CUDA SDK installation directory. 20 | export CUDA_SDK_PATH=/home/spoon/NVIDIA_GPU_Computing_SDK 21 | 22 | # Python include directory. This should contain the file Python.h, among others. 23 | export PYTHON_INCLUDE_PATH=/usr/include/python2.7 24 | 25 | # Numpy include directory. This should contain the file arrayobject.h, among others. 26 | export NUMPY_INCLUDE_PATH=/usr/lib/pymodules/python2.7/numpy/core/include/numpy 27 | 28 | # ATLAS library directory. This should contain the file libcblas.so, among others. 29 | export ATLAS_LIB_PATH=/usr/lib/atlas-base 30 | 31 | make $* 32 | 33 | -------------------------------------------------------------------------------- /convdata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | from data import * 26 | import numpy.random as nr 27 | import numpy as n 28 | import random as r 29 | 30 | class CIFARDataProvider(LabeledMemoryDataProvider): 31 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 32 | LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 33 | self.data_mean = self.batch_meta['data_mean'] 34 | self.num_colors = 3 35 | self.img_size = 32 36 | # Subtract the mean from the data and make sure that both data and 37 | # labels are in single-precision floating point. 38 | for d in self.data_dic: 39 | # This converts the data matrix to single precision and makes sure that it is C-ordered 40 | d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C') 41 | d['labels'] = n.require(d['labels'].reshape((1, d['data'].shape[1])), dtype=n.single, requirements='C') 42 | 43 | def get_next_batch(self): 44 | epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self) 45 | return epoch, batchnum, [datadic['data'], datadic['labels']] 46 | 47 | # Returns the dimensionality of the two data matrices returned by get_next_batch 48 | # idx is the index of the matrix. 49 | def get_data_dims(self, idx=0): 50 | return self.img_size**2 * self.num_colors if idx == 0 else 1 51 | 52 | # Takes as input an array returned by get_next_batch 53 | # Returns a (numCases, imgSize, imgSize, 3) array which can be 54 | # fed to pylab for plotting. 55 | # This is used by shownet.py to plot test case predictions. 56 | def get_plottable_data(self, data): 57 | return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single) 58 | 59 | class CroppedCIFARDataProvider(LabeledMemoryDataProvider): 60 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False): 61 | LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 62 | 63 | self.border_size = dp_params['crop_border'] 64 | self.inner_size = 32 - self.border_size*2 65 | self.multiview = dp_params['multiview_test'] and test 66 | self.num_views = 5*2 67 | self.data_mult = self.num_views if self.multiview else 1 68 | self.num_colors = 3 69 | 70 | for d in self.data_dic: 71 | d['data'] = n.require(d['data'], requirements='C') 72 | d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C') 73 | 74 | self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)] 75 | 76 | self.batches_generated = 0 77 | self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1)) 78 | 79 | def get_next_batch(self): 80 | epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self) 81 | 82 | cropped = self.cropped_data[self.batches_generated % 2] 83 | 84 | self.__trim_borders(datadic['data'], cropped) 85 | cropped -= self.data_mean 86 | self.batches_generated += 1 87 | return epoch, batchnum, [cropped, datadic['labels']] 88 | 89 | def get_data_dims(self, idx=0): 90 | return self.inner_size**2 * 3 if idx == 0 else 1 91 | 92 | # Takes as input an array returned by get_next_batch 93 | # Returns a (numCases, imgSize, imgSize, 3) array which can be 94 | # fed to pylab for plotting. 95 | # This is used by shownet.py to plot test case predictions. 96 | def get_plottable_data(self, data): 97 | return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single) 98 | 99 | def __trim_borders(self, x, target): 100 | y = x.reshape(3, 32, 32, x.shape[1]) 101 | 102 | if self.test: # don't need to loop over cases 103 | if self.multiview: 104 | start_positions = [(0,0), (0, self.border_size*2), 105 | (self.border_size, self.border_size), 106 | (self.border_size*2, 0), (self.border_size*2, self.border_size*2)] 107 | end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions] 108 | for i in xrange(self.num_views/2): 109 | pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:] 110 | target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1])) 111 | target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1])) 112 | else: 113 | pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now 114 | target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1])) 115 | else: 116 | for c in xrange(x.shape[1]): # loop over cases 117 | startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1) 118 | endY, endX = startY + self.inner_size, startX + self.inner_size 119 | pic = y[:,startY:endY,startX:endX, c] 120 | if nr.randint(2) == 0: # also flip the image with 50% probability 121 | pic = pic[:,:,::-1] 122 | target[:,c] = pic.reshape((self.get_data_dims(),)) 123 | 124 | class DummyConvNetDataProvider(LabeledDummyDataProvider): 125 | def __init__(self, data_dim): 126 | LabeledDummyDataProvider.__init__(self, data_dim) 127 | 128 | def get_next_batch(self): 129 | epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self) 130 | 131 | dic['data'] = n.require(dic['data'].T, requirements='C') 132 | dic['labels'] = n.require(dic['labels'].T, requirements='C') 133 | 134 | return epoch, batchnum, [dic['data'], dic['labels']] 135 | 136 | # Returns the dimensionality of the two data matrices returned by get_next_batch 137 | def get_data_dims(self, idx=0): 138 | return self.batch_meta['num_vis'] if idx == 0 else 1 139 | -------------------------------------------------------------------------------- /convnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import numpy as n 26 | import numpy.random as nr 27 | from util import * 28 | from data import * 29 | from options import * 30 | from gpumodel import * 31 | import sys 32 | import math as m 33 | import layer as lay 34 | from convdata import * 35 | from os import linesep as NL 36 | #import pylab as pl 37 | 38 | class ConvNet(IGPUModel): 39 | def __init__(self, op, load_dic, dp_params={}): 40 | filename_options = [] 41 | dp_params['multiview_test'] = op.get_value('multiview_test') 42 | dp_params['crop_border'] = op.get_value('crop_border') 43 | IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params) 44 | 45 | def import_model(self): 46 | lib_name = "pyconvnet" if is_windows_machine() else "_ConvNet" 47 | print "=========================" 48 | print "Importing %s C++ module" % lib_name 49 | self.libmodel = __import__(lib_name) 50 | 51 | def init_model_lib(self): 52 | self.libmodel.initModel(self.layers, self.minibatch_size, self.device_ids[0]) 53 | 54 | def init_model_state(self): 55 | ms = self.model_state 56 | if self.load_file: 57 | ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers']) 58 | else: 59 | ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self) 60 | self.layers_dic = dict(zip([l['name'] for l in ms['layers']], ms['layers'])) 61 | 62 | logreg_name = self.op.get_value('logreg_name') 63 | if logreg_name: 64 | self.logreg_idx = self.get_layer_idx(logreg_name, check_type='cost.logreg') 65 | 66 | # Convert convolutional layers to local 67 | if len(self.op.get_value('conv_to_local')) > 0: 68 | for i, layer in enumerate(ms['layers']): 69 | if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'): 70 | lay.LocalLayerParser.conv_to_local(ms['layers'], i) 71 | # Decouple weight matrices 72 | if len(self.op.get_value('unshare_weights')) > 0: 73 | for name_str in self.op.get_value('unshare_weights'): 74 | if name_str: 75 | name = lay.WeightLayerParser.get_layer_name(name_str) 76 | if name is not None: 77 | name, idx = name[0], name[1] 78 | if name not in self.layers_dic: 79 | raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name) 80 | layer = self.layers_dic[name] 81 | lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx) 82 | else: 83 | raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str) 84 | self.op.set_value('conv_to_local', [], parse=False) 85 | self.op.set_value('unshare_weights', [], parse=False) 86 | 87 | def get_layer_idx(self, layer_name, check_type=None): 88 | try: 89 | layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name) 90 | if check_type: 91 | layer_type = self.model_state['layers'][layer_idx]['type'] 92 | if layer_type != check_type: 93 | raise ModelStateException("Layer with name '%s' has type '%s'; should be '%s'." % (layer_name, layer_type, check_type)) 94 | return layer_idx 95 | except ValueError: 96 | raise ModelStateException("Layer with name '%s' not defined." % layer_name) 97 | 98 | def fill_excused_options(self): 99 | if self.op.get_value('check_grads'): 100 | self.op.set_value('save_path', '') 101 | self.op.set_value('train_batch_range', '0') 102 | self.op.set_value('test_batch_range', '0') 103 | self.op.set_value('data_path', '') 104 | 105 | # Make sure the data provider returned data in proper format 106 | def parse_batch_data(self, batch_data, train=True): 107 | if max(d.dtype != n.single for d in batch_data[2]): 108 | raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.") 109 | return batch_data 110 | 111 | def start_batch(self, batch_data, train=True): 112 | data = batch_data[2] 113 | if self.check_grads: 114 | self.libmodel.checkGradients(data) 115 | elif not train and self.multiview_test: 116 | self.libmodel.startMultiviewTest(data, self.train_data_provider.num_views, self.logreg_idx) 117 | else: 118 | self.libmodel.startBatch(data, not train) 119 | 120 | def print_iteration(self): 121 | print "%d.%d..." % (self.epoch, self.batchnum), 122 | 123 | def print_train_time(self, compute_time_py): 124 | print "(%.3f sec)" % (compute_time_py) 125 | 126 | def print_costs(self, cost_outputs): 127 | costs, num_cases = cost_outputs[0], cost_outputs[1] 128 | for errname in costs.keys(): 129 | costs[errname] = [(v/num_cases) for v in costs[errname]] 130 | print "%s: " % errname, 131 | print ", ".join("%6f" % v for v in costs[errname]), 132 | if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]): 133 | print "^ got nan or inf!" 134 | sys.exit(1) 135 | 136 | def print_train_results(self): 137 | self.print_costs(self.train_outputs[-1]) 138 | 139 | def print_test_status(self): 140 | pass 141 | 142 | def print_test_results(self): 143 | print "" 144 | print "======================Test output======================" 145 | self.print_costs(self.test_outputs[-1]) 146 | print "" 147 | print "-------------------------------------------------------", 148 | for i,l in enumerate(self.layers): # This is kind of hacky but will do for now. 149 | if 'weights' in l: 150 | if type(l['weights']) == n.ndarray: 151 | print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))), 152 | elif type(l['weights']) == list: 153 | print "" 154 | print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))), 155 | print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))), 156 | print "" 157 | 158 | def conditional_save(self): 159 | self.save_state() 160 | print "-------------------------------------------------------" 161 | print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file) 162 | print "=======================================================", 163 | 164 | def aggregate_test_outputs(self, test_outputs): 165 | num_cases = sum(t[1] for t in test_outputs) 166 | for i in xrange(1 ,len(test_outputs)): 167 | for k,v in test_outputs[i][0].items(): 168 | for j in xrange(len(v)): 169 | test_outputs[0][0][k][j] += test_outputs[i][0][k][j] 170 | return (test_outputs[0][0], num_cases) 171 | 172 | @classmethod 173 | def get_options_parser(cls): 174 | op = IGPUModel.get_options_parser() 175 | op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128) 176 | op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True) 177 | op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file") 178 | op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range']) 179 | op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0, requires=['logreg_name']) 180 | op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True) 181 | op.add_option("logreg-name", "logreg_name", StringOptionParser, "Cropped DP: logreg layer name (for --multiview-test)", default="") 182 | op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[]) 183 | op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[]) 184 | op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0) 185 | 186 | op.delete_option('max_test_err') 187 | op.options["max_filesize_mb"].default = 0 188 | op.options["testing_freq"].default = 50 189 | op.options["num_epochs"].default = 50000 190 | op.options['dp_type'].default = None 191 | 192 | DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider) 193 | DataProvider.register_data_provider('dummy-cn-n', 'Dummy ConvNet', DummyConvNetDataProvider) 194 | DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider) 195 | 196 | return op 197 | 198 | if __name__ == "__main__": 199 | #nr.seed(5) 200 | op = ConvNet.get_options_parser() 201 | 202 | op, load_dic = IGPUModel.parse_options(op) 203 | model = ConvNet(op, load_dic) 204 | model.start() 205 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import numpy as n 26 | from numpy.random import randn, rand, random_integers 27 | import os 28 | from util import * 29 | 30 | BATCH_META_FILE = "batches.meta" 31 | 32 | class DataProvider: 33 | BATCH_REGEX = re.compile('^data_batch_(\d+)(\.\d+)?$') 34 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 35 | if batch_range == None: 36 | batch_range = DataProvider.get_batch_nums(data_dir) 37 | if init_batchnum is None or init_batchnum not in batch_range: 38 | init_batchnum = batch_range[0] 39 | 40 | self.data_dir = data_dir 41 | self.batch_range = batch_range 42 | self.curr_epoch = init_epoch 43 | self.curr_batchnum = init_batchnum 44 | self.dp_params = dp_params 45 | self.batch_meta = self.get_batch_meta(data_dir) 46 | self.data_dic = None 47 | self.test = test 48 | self.batch_idx = batch_range.index(init_batchnum) 49 | 50 | def get_next_batch(self): 51 | if self.data_dic is None or len(self.batch_range) > 1: 52 | self.data_dic = self.get_batch(self.curr_batchnum) 53 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 54 | self.advance_batch() 55 | 56 | return epoch, batchnum, self.data_dic 57 | 58 | def __add_subbatch(self, batch_num, sub_batchnum, batch_dic): 59 | subbatch_path = "%s.%d" % (os.path.join(self.data_dir, self.get_data_file_name(batch_num)), sub_batchnum) 60 | if os.path.exists(subbatch_path): 61 | sub_dic = unpickle(subbatch_path) 62 | self._join_batches(batch_dic, sub_dic) 63 | else: 64 | raise IndexError("Sub-batch %d.%d does not exist in %s" % (batch_num,sub_batchnum, self.data_dir)) 65 | 66 | def _join_batches(self, main_batch, sub_batch): 67 | main_batch['data'] = n.r_[main_batch['data'], sub_batch['data']] 68 | 69 | def get_batch(self, batch_num): 70 | if os.path.exists(self.get_data_file_name(batch_num) + '.1'): # batch in sub-batches 71 | dic = unpickle(self.get_data_file_name(batch_num) + '.1') 72 | sb_idx = 2 73 | while True: 74 | try: 75 | self.__add_subbatch(batch_num, sb_idx, dic) 76 | sb_idx += 1 77 | except IndexError: 78 | break 79 | else: 80 | dic = unpickle(self.get_data_file_name(batch_num)) 81 | return dic 82 | 83 | def get_data_dims(self): 84 | return self.batch_meta['num_vis'] 85 | 86 | def advance_batch(self): 87 | self.batch_idx = self.get_next_batch_idx() 88 | self.curr_batchnum = self.batch_range[self.batch_idx] 89 | if self.batch_idx == 0: # we wrapped 90 | self.curr_epoch += 1 91 | 92 | def get_next_batch_idx(self): 93 | return (self.batch_idx + 1) % len(self.batch_range) 94 | 95 | def get_next_batch_num(self): 96 | return self.batch_range[self.get_next_batch_idx()] 97 | 98 | # get filename of current batch 99 | def get_data_file_name(self, batchnum=None): 100 | if batchnum is None: 101 | batchnum = self.curr_batchnum 102 | return os.path.join(self.data_dir, 'data_batch_%d' % batchnum) 103 | 104 | @classmethod 105 | def get_instance(cls, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, type="default", dp_params={}, test=False): 106 | # why the fuck can't i reference DataProvider in the original definition? 107 | #cls.dp_classes['default'] = DataProvider 108 | type = type or DataProvider.get_batch_meta(data_dir)['dp_type'] # allow data to decide data provider 109 | if type.startswith("dummy-"): 110 | name = "-".join(type.split('-')[:-1]) + "-n" 111 | if name not in dp_types: 112 | raise DataProviderException("No such data provider: %s" % type) 113 | _class = dp_classes[name] 114 | dims = int(type.split('-')[-1]) 115 | return _class(dims) 116 | elif type in dp_types: 117 | _class = dp_classes[type] 118 | return _class(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 119 | 120 | raise DataProviderException("No such data provider: %s" % type) 121 | 122 | @classmethod 123 | def register_data_provider(cls, name, desc, _class): 124 | if name in dp_types: 125 | raise DataProviderException("Data provider %s already registered" % name) 126 | dp_types[name] = desc 127 | dp_classes[name] = _class 128 | 129 | @staticmethod 130 | def get_batch_meta(data_dir): 131 | return unpickle(os.path.join(data_dir, BATCH_META_FILE)) 132 | 133 | @staticmethod 134 | def get_batch_filenames(srcdir): 135 | return sorted([f for f in os.listdir(srcdir) if DataProvider.BATCH_REGEX.match(f)], key=alphanum_key) 136 | 137 | @staticmethod 138 | def get_batch_nums(srcdir): 139 | names = DataProvider.get_batch_filenames(srcdir) 140 | return sorted(list(set(int(DataProvider.BATCH_REGEX.match(n).group(1)) for n in names))) 141 | 142 | @staticmethod 143 | def get_num_batches(srcdir): 144 | return len(DataProvider.get_batch_nums(srcdir)) 145 | 146 | class DummyDataProvider(DataProvider): 147 | def __init__(self, data_dim): 148 | #self.data_dim = data_dim 149 | self.batch_range = [1] 150 | self.batch_meta = {'num_vis': data_dim, 'data_in_rows':True} 151 | self.curr_epoch = 1 152 | self.curr_batchnum = 1 153 | self.batch_idx = 0 154 | 155 | def get_next_batch(self): 156 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 157 | self.advance_batch() 158 | data = rand(512, self.get_data_dims()).astype(n.single) 159 | return self.curr_epoch, self.curr_batchnum, {'data':data} 160 | 161 | 162 | class LabeledDummyDataProvider(DummyDataProvider): 163 | def __init__(self, data_dim, num_classes=10, num_cases=512): 164 | #self.data_dim = data_dim 165 | self.batch_range = [1] 166 | self.batch_meta = {'num_vis': data_dim, 167 | 'label_names': [str(x) for x in range(num_classes)], 168 | 'data_in_rows':True} 169 | self.num_cases = num_cases 170 | self.num_classes = num_classes 171 | self.curr_epoch = 1 172 | self.curr_batchnum = 1 173 | self.batch_idx=0 174 | 175 | def get_num_classes(self): 176 | return self.num_classes 177 | 178 | def get_next_batch(self): 179 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 180 | self.advance_batch() 181 | data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand 182 | labels = n.require(n.c_[random_integers(0,self.num_classes-1,self.num_cases)], requirements='C', dtype=n.single) 183 | 184 | return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels} 185 | 186 | class MemoryDataProvider(DataProvider): 187 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params=None, test=False): 188 | DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 189 | self.data_dic = [] 190 | for i in self.batch_range: 191 | self.data_dic += [self.get_batch(i)] 192 | 193 | def get_next_batch(self): 194 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 195 | self.advance_batch() 196 | 197 | return epoch, batchnum, self.data_dic[batchnum - self.batch_range[0]] 198 | 199 | class LabeledDataProvider(DataProvider): 200 | def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 201 | DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 202 | 203 | def get_num_classes(self): 204 | return len(self.batch_meta['label_names']) 205 | 206 | class LabeledMemoryDataProvider(LabeledDataProvider): 207 | def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False): 208 | LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test) 209 | self.data_dic = [] 210 | for i in batch_range: 211 | self.data_dic += [unpickle(self.get_data_file_name(i))] 212 | self.data_dic[-1]["labels"] = n.c_[n.require(self.data_dic[-1]['labels'], dtype=n.single)] 213 | 214 | def get_next_batch(self): 215 | epoch, batchnum = self.curr_epoch, self.curr_batchnum 216 | self.advance_batch() 217 | bidx = batchnum - self.batch_range[0] 218 | return epoch, batchnum, self.data_dic[bidx] 219 | 220 | dp_types = {"default": "The default data provider; loads one batch into memory at a time", 221 | "memory": "Loads the entire dataset into memory", 222 | "labeled": "Returns data and labels (used by classifiers)", 223 | "labeled-memory": "Combination labeled + memory", 224 | "dummy-n": "Dummy data provider for n-dimensional data", 225 | "dummy-labeled-n": "Labeled dummy data provider for n-dimensional data"} 226 | dp_classes = {"default": DataProvider, 227 | "memory": MemoryDataProvider, 228 | "labeled": LabeledDataProvider, 229 | "labeled-memory": LabeledMemoryDataProvider, 230 | "dummy-n": DummyDataProvider, 231 | "dummy-labeled-n": LabeledDummyDataProvider} 232 | 233 | class DataProviderException(Exception): 234 | pass 235 | -------------------------------------------------------------------------------- /example-layers/layer-params-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | # Reduce all learning rates by factor of 10 after 120 epochs. 4 | # Then another factor of 10 after 10 more epochs. 5 | 6 | [conv1] 7 | epsW=0.001 8 | epsB=0.002 9 | momW=0.9 10 | momB=0.9 11 | wc=0.004 12 | 13 | [conv2] 14 | epsW=0.001 15 | epsB=0.002 16 | momW=0.9 17 | momB=0.9 18 | wc=0.004 19 | 20 | [conv3] 21 | epsW=0.001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.004 26 | 27 | [fc10] 28 | epsW=0.001 29 | epsB=0.002 30 | momW=0.9 31 | momB=0.9 32 | wc=1 33 | 34 | [logprob] 35 | coeff=1 36 | 37 | [rnorm1] 38 | scale=0.00005 39 | pow=.75 40 | 41 | [rnorm2] 42 | scale=0.00005 43 | pow=.75 44 | -------------------------------------------------------------------------------- /example-layers/layer-params-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer parameter file 2 | # Set wc to 0 for translations -- 14.2% 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.004 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.004 17 | 18 | [conv3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [fc10] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=3 31 | 32 | [logprob] 33 | coeff=1 34 | -------------------------------------------------------------------------------- /example-layers/layer-params-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer parameter file 2 | # You should reduce the learning rate after 8 epochs by a factor of 10. 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.004 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.004 17 | 18 | [conv3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [fc64] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=.03 31 | 32 | [fc10] 33 | epsW=0.001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=.03 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-11pct.cfg: -------------------------------------------------------------------------------- 1 | # 11% error on CIFAR-10 - layer parameter file 2 | # Methodology: 3 | # 1. Train on batches 1-4, use batch 5 for validation. 4 | # 2. After about 350 epochs, validation error no longer making improvements. 5 | # 3. Fold in batch 5. 6 | # 4. Train on batches 1-5 for about 150 more epochs, until the batch 5 error is near the errors for batches 1-4. It takes forever to actually get there but after 150 epochs it's close enough. 7 | # 5. Lower learning rates (epsW) by a factor of 10 to 0.0001, train for 10 more epochs. 8 | # 6. Lower learning rates (epsW) by another factor of 10 to 0.00001, train for 10 more epochs. 9 | # 7. Stop. Test on batch 6 with --test-range=6 --multiview-test=1 --logreg-name=logprob (read more about what this does here: http://code.google.com/p/cuda-convnet/wiki/TrainingNet#Training_on_image_translations ) 10 | 11 | # More details about methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 12 | 13 | [conv1] 14 | epsW=0.001 15 | epsB=0.002 16 | momW=0.9 17 | momB=0.9 18 | wc=0.000 19 | 20 | [conv2] 21 | epsW=0.001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.000 26 | 27 | [local3] 28 | epsW=0.001 29 | epsB=0.002 30 | momW=0.9 31 | momB=0.9 32 | wc=0.004 33 | 34 | [local4] 35 | epsW=0.001 36 | epsB=0.002 37 | momW=0.9 38 | momB=0.9 39 | wc=0.004 40 | 41 | [fc10] 42 | epsW=0.001 43 | epsB=0.002 44 | momW=0.9 45 | momB=0.9 46 | wc=0.01 47 | 48 | [logprob] 49 | coeff=1 50 | 51 | [rnorm1] 52 | scale=0.001 53 | pow=0.75 54 | 55 | [rnorm2] 56 | scale=0.001 57 | pow=0.75 58 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 - layer parameter file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.00 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.00 17 | 18 | [local3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [local4] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=0.004 31 | 32 | [fc10] 33 | epsW=0.001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=0.004 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-example.cfg: -------------------------------------------------------------------------------- 1 | [conv32] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [local32] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [fc1024] 16 | momW=0.9 17 | momB=0.9 18 | epsW=0.00001 19 | epsB=0.00002 20 | wc=0 21 | 22 | [conv32-2] 23 | epsW=0.001 24 | epsB=0.002 25 | momW=0.9 26 | momB=0.9 27 | wc=0 28 | 29 | [conv32-3] 30 | epsW=0.001 31 | epsB=0.002 32 | momW=0.9 33 | momB=0.9 34 | wc=0 35 | 36 | [fc10] 37 | epsW=0.0001,0.001 38 | epsB=0.002 39 | momW=0.5,0.9 40 | momB=0.9 41 | wc=0,0 42 | 43 | [logprob] 44 | coeff=1 45 | -------------------------------------------------------------------------------- /example-layers/layer-params.gc.cfg: -------------------------------------------------------------------------------- 1 | [conv32] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [local32] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [fc10] 16 | wc=0,0 17 | momB=0 18 | momW=0,0 19 | epsW=0.00001,0.00001 20 | epsB=0.00002 21 | 22 | [logprob] 23 | coeff=1 24 | -------------------------------------------------------------------------------- /example-layers/layers-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=4 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [rnorm1] 35 | type=rnorm 36 | inputs=pool1 37 | channels=32 38 | size=3 39 | 40 | [conv2] 41 | type=conv 42 | inputs=rnorm1 43 | filters=32 44 | padding=2 45 | stride=1 46 | filterSize=5 47 | channels=32 48 | neuron=relu 49 | initW=0.01 50 | partialSum=4 51 | sharedBiases=1 52 | 53 | [pool2] 54 | type=pool 55 | pool=avg 56 | inputs=conv2 57 | start=0 58 | sizeX=3 59 | stride=2 60 | outputsX=0 61 | channels=32 62 | 63 | [rnorm2] 64 | type=rnorm 65 | inputs=pool2 66 | channels=32 67 | size=3 68 | 69 | [conv3] 70 | type=conv 71 | inputs=rnorm2 72 | filters=64 73 | padding=2 74 | stride=1 75 | filterSize=5 76 | channels=32 77 | neuron=relu 78 | initW=0.01 79 | partialSum=4 80 | sharedBiases=1 81 | 82 | [pool3] 83 | type=pool 84 | pool=avg 85 | inputs=conv3 86 | start=0 87 | sizeX=3 88 | stride=2 89 | outputsX=0 90 | channels=64 91 | 92 | [fc10] 93 | type=fc 94 | outputs=10 95 | inputs=pool3 96 | initW=0.01 97 | 98 | [probs] 99 | type=softmax 100 | inputs=fc10 101 | 102 | [logprob] 103 | type=cost.logreg 104 | inputs=labels,probs 105 | -------------------------------------------------------------------------------- /example-layers/layers-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=1 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=1 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=1 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=pool3 84 | initW=0.01 85 | 86 | [probs] 87 | type=softmax 88 | inputs=fc10 89 | 90 | [logprob] 91 | type=cost.logreg 92 | inputs=labels,probs 93 | -------------------------------------------------------------------------------- /example-layers/layers-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=4 21 | sharedBiases=1 22 | 23 | [pool1] 24 | type=pool 25 | pool=max 26 | inputs=conv1 27 | start=0 28 | sizeX=3 29 | stride=2 30 | outputsX=0 31 | channels=32 32 | neuron=relu 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=4 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=4 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc64] 81 | type=fc 82 | outputs=64 83 | inputs=pool3 84 | initW=0.1 85 | neuron=relu 86 | 87 | [fc10] 88 | type=fc 89 | outputs=10 90 | inputs=fc64 91 | initW=0.1 92 | 93 | [probs] 94 | type=softmax 95 | inputs=fc10 96 | 97 | [logprob] 98 | type=cost.logreg 99 | inputs=labels,probs 100 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-11pct.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv1] 10 | type=conv 11 | inputs=data 12 | channels=3 13 | filters=64 14 | padding=2 15 | stride=1 16 | filterSize=5 17 | neuron=relu 18 | initW=0.0001 19 | partialSum=4 20 | sharedBiases=1 21 | 22 | [pool1] 23 | type=pool 24 | pool=max 25 | inputs=conv1 26 | start=0 27 | sizeX=3 28 | stride=2 29 | outputsX=0 30 | channels=64 31 | 32 | [rnorm1] 33 | type=cmrnorm 34 | inputs=pool1 35 | channels=64 36 | size=9 37 | 38 | [conv2] 39 | type=conv 40 | inputs=rnorm1 41 | filters=64 42 | padding=2 43 | stride=1 44 | filterSize=5 45 | channels=64 46 | neuron=relu 47 | initW=0.01 48 | partialSum=8 49 | sharedBiases=1 50 | 51 | [rnorm2] 52 | type=cmrnorm 53 | inputs=conv2 54 | channels=64 55 | size=9 56 | 57 | [pool2] 58 | type=pool 59 | pool=max 60 | inputs=rnorm2 61 | start=0 62 | sizeX=3 63 | stride=2 64 | outputsX=0 65 | channels=64 66 | 67 | [local3] 68 | type=local 69 | inputs=pool2 70 | filters=64 71 | padding=1 72 | stride=1 73 | filterSize=3 74 | channels=64 75 | neuron=relu 76 | initW=0.04 77 | 78 | [local4] 79 | type=local 80 | inputs=local3 81 | filters=32 82 | padding=1 83 | stride=1 84 | filterSize=3 85 | channels=64 86 | neuron=relu 87 | initW=0.04 88 | 89 | [fc10] 90 | type=fc 91 | outputs=10 92 | inputs=local4 93 | initW=0.01 94 | 95 | [probs] 96 | type=softmax 97 | inputs=fc10 98 | 99 | [logprob] 100 | type=cost.logreg 101 | inputs=labels,probs 102 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [data] 5 | type=data 6 | dataIdx=0 7 | 8 | [labels] 9 | type=data 10 | dataIdx=1 11 | 12 | [conv1] 13 | type=conv 14 | inputs=data 15 | channels=3 16 | filters=64 17 | padding=2 18 | stride=1 19 | filterSize=5 20 | neuron=relu 21 | initW=0.0001 22 | partialSum=4 23 | sharedBiases=1 24 | 25 | [pool1] 26 | type=pool 27 | pool=max 28 | inputs=conv1 29 | start=0 30 | sizeX=3 31 | stride=2 32 | outputsX=0 33 | channels=64 34 | 35 | [conv2] 36 | type=conv 37 | inputs=pool1 38 | filters=64 39 | padding=2 40 | stride=1 41 | filterSize=5 42 | channels=64 43 | neuron=relu 44 | initW=0.01 45 | partialSum=8 46 | sharedBiases=1 47 | 48 | [pool2] 49 | type=pool 50 | pool=max 51 | inputs=conv2 52 | start=0 53 | sizeX=3 54 | stride=2 55 | outputsX=0 56 | channels=64 57 | 58 | [local3] 59 | type=local 60 | inputs=pool2 61 | filters=32 62 | padding=1 63 | stride=1 64 | filterSize=3 65 | channels=64 66 | neuron=relu 67 | initW=0.04 68 | 69 | [local4] 70 | type=local 71 | inputs=local3 72 | filters=32 73 | padding=1 74 | stride=1 75 | filterSize=3 76 | channels=32 77 | neuron=relu 78 | initW=0.04 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=local4 84 | initW=0.01 85 | neuron=ident 86 | 87 | [probs] 88 | type=softmax 89 | inputs=fc10 90 | 91 | [logprob] 92 | type=cost.logreg 93 | inputs=labels,probs 94 | -------------------------------------------------------------------------------- /example-layers/layers-example.cfg: -------------------------------------------------------------------------------- 1 | # This is a layer configuration file that contains all the 2 | # layer types supported by this code. It's not actually good for anything 3 | # other than demonstrating how layers are specified and connected to one another. 4 | 5 | # Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580. 6 | # But there's no particular reason to run the net specified by this file. It's not actually good. 7 | 8 | [data] 9 | type=data 10 | dataIdx=0 11 | 12 | [labels] 13 | type=data 14 | dataIdx=1 15 | 16 | [conv32] 17 | type=conv 18 | inputs=data 19 | channels=3 20 | filters=32 21 | padding=4 22 | stride=1 23 | filterSize=9 24 | neuron=logistic 25 | initW=0.00001 26 | partialSum=1 27 | sharedBiases=true 28 | 29 | [local32] 30 | type=local 31 | inputs=conv32 32 | channels=32 33 | filters=32 34 | padding=4 35 | stride=1 36 | filterSize=9 37 | neuron=logistic 38 | initW=0.00001 39 | 40 | [fc1024] 41 | type=fc 42 | outputs=1024 43 | inputs=data 44 | initW=0.001 45 | neuron=relu 46 | 47 | [maxpool] 48 | type=pool 49 | pool=max 50 | inputs=local32 51 | start=0 52 | sizeX=4 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [rnorm1] 58 | type=rnorm 59 | inputs=maxpool 60 | channels=32 61 | sizeX=5 62 | scale=0.0000125 63 | pow=0.75 64 | 65 | [cnorm1] 66 | type=cnorm 67 | inputs=rnorm1 68 | channels=32 69 | sizeX=7 70 | scale=0.001 71 | pow=0.5 72 | 73 | [conv32-2] 74 | type=conv 75 | inputs=cnorm1 76 | groups=4 77 | channels=32 78 | filters=32 79 | padding=2 80 | stride=1 81 | filterSize=5 82 | neuron=relu 83 | initW=0.0001 84 | partialSum=1 85 | sharedBiases=false 86 | 87 | [conv32-3] 88 | type=conv 89 | inputs=conv32-2 90 | groups=4 91 | channels=128 92 | filters=32 93 | padding=2 94 | stride=2 95 | filterSize=5 96 | neuron=relu 97 | initW=0.0001 98 | partialSum=1 99 | randSparse=true 100 | filterChannels=64 101 | 102 | [fc10] 103 | type=fc 104 | outputs=10 105 | inputs=conv32-3,fc1024 106 | initW=0.0001,0.0001 107 | neuron=ident 108 | 109 | [probs] 110 | type=softmax 111 | inputs=fc10 112 | 113 | [logprob] 114 | type=cost.logreg 115 | inputs=labels,probs 116 | -------------------------------------------------------------------------------- /example-layers/layers.gc.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv32] 10 | type=conv 11 | inputs=data 12 | filters=16 13 | padding=0 14 | stride=1 15 | filterSize=3 16 | channels=3 17 | neuron=linear[3,2.2] 18 | initW=0.8 19 | partialSum=1 20 | sharedBiases=true 21 | 22 | [avgpool] 23 | type=pool 24 | pool=avg 25 | inputs=conv32 26 | start=-2 27 | sizeX=4 28 | stride=4 29 | outputsX=0 30 | channels=16 31 | 32 | [local32] 33 | type=local 34 | inputs=avgpool 35 | filters=32 36 | padding=2 37 | stride=3 38 | filterSize=5 39 | channels=16 40 | neuron=tanh[1.79,-0.66] 41 | initW=0.4 42 | #partialSum=1 43 | #sharedBiases=true 44 | groups=2 45 | randSparse=true 46 | 47 | [fc10] 48 | type=fc 49 | outputs=10 50 | inputs=local32,conv32 51 | initW=0.8,0.008 52 | 53 | [probs] 54 | type=softmax 55 | inputs=fc10 56 | 57 | [logprob] 58 | type=cost.logreg 59 | inputs=labels,probs 60 | -------------------------------------------------------------------------------- /gpumodel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import numpy as n 26 | import os 27 | from time import time, asctime, localtime, strftime 28 | from numpy.random import randn, rand 29 | from numpy import s_, dot, tile, zeros, ones, zeros_like, array, ones_like 30 | from util import * 31 | from data import * 32 | from options import * 33 | from math import ceil, floor, sqrt 34 | from data import DataProvider, dp_types 35 | import sys 36 | import shutil 37 | import platform 38 | from os import linesep as NL 39 | 40 | class ModelStateException(Exception): 41 | pass 42 | 43 | # GPU Model interface 44 | class IGPUModel: 45 | def __init__(self, model_name, op, load_dic, filename_options=None, dp_params={}): 46 | # these are input parameters 47 | self.model_name = model_name 48 | self.op = op 49 | self.options = op.options 50 | self.load_dic = load_dic 51 | self.filename_options = filename_options 52 | self.dp_params = dp_params 53 | self.get_gpus() 54 | self.fill_excused_options() 55 | #assert self.op.all_values_given() 56 | 57 | for o in op.get_options_list(): 58 | setattr(self, o.name, o.value) 59 | 60 | # these are things that the model must remember but they're not input parameters 61 | if load_dic: 62 | self.model_state = load_dic["model_state"] 63 | self.save_file = self.options["load_file"].value 64 | if not os.path.isdir(self.save_file): 65 | self.save_file = os.path.dirname(self.save_file) 66 | else: 67 | self.model_state = {} 68 | if filename_options is not None: 69 | self.save_file = model_name + "_" + '_'.join(['%s_%s' % (char, self.options[opt].get_str_value()) for opt, char in filename_options]) + '_' + strftime('%Y-%m-%d_%H.%M.%S') 70 | self.model_state["train_outputs"] = [] 71 | self.model_state["test_outputs"] = [] 72 | self.model_state["epoch"] = 1 73 | self.model_state["batchnum"] = self.train_batch_range[0] 74 | 75 | self.init_data_providers() 76 | if load_dic: 77 | self.train_data_provider.advance_batch() 78 | 79 | # model state often requries knowledge of data provider, so it's initialized after 80 | try: 81 | self.init_model_state() 82 | except ModelStateException, e: 83 | print e 84 | sys.exit(1) 85 | for var, val in self.model_state.iteritems(): 86 | setattr(self, var, val) 87 | 88 | self.import_model() 89 | self.init_model_lib() 90 | 91 | def import_model(self): 92 | print "=========================" 93 | print "Importing %s C++ module" % ('_' + self.model_name) 94 | self.libmodel = __import__('_' + self.model_name) 95 | 96 | def fill_excused_options(self): 97 | pass 98 | 99 | def init_data_providers(self): 100 | self.dp_params['convnet'] = self 101 | try: 102 | self.test_data_provider = DataProvider.get_instance(self.data_path, self.test_batch_range, 103 | type=self.dp_type, dp_params=self.dp_params, test=True) 104 | self.train_data_provider = DataProvider.get_instance(self.data_path, self.train_batch_range, 105 | self.model_state["epoch"], self.model_state["batchnum"], 106 | type=self.dp_type, dp_params=self.dp_params, test=False) 107 | except DataProviderException, e: 108 | print "Unable to create data provider: %s" % e 109 | self.print_data_providers() 110 | sys.exit() 111 | 112 | def init_model_state(self): 113 | pass 114 | 115 | def init_model_lib(self): 116 | pass 117 | 118 | def start(self): 119 | if self.test_only: 120 | self.test_outputs += [self.get_test_error()] 121 | self.print_test_results() 122 | sys.exit(0) 123 | self.train() 124 | 125 | def train(self): 126 | print "=========================" 127 | print "Training %s" % self.model_name 128 | self.op.print_values() 129 | print "=========================" 130 | self.print_model_state() 131 | print "Running on CUDA device(s) %s" % ", ".join("%d" % d for d in self.device_ids) 132 | print "Current time: %s" % asctime(localtime()) 133 | print "Saving checkpoints to %s" % os.path.join(self.save_path, self.save_file) 134 | print "=========================" 135 | next_data = self.get_next_batch() 136 | while self.epoch <= self.num_epochs: 137 | data = next_data 138 | self.epoch, self.batchnum = data[0], data[1] 139 | self.print_iteration() 140 | sys.stdout.flush() 141 | 142 | compute_time_py = time() 143 | self.start_batch(data) 144 | 145 | # load the next batch while the current one is computing 146 | next_data = self.get_next_batch() 147 | 148 | batch_output = self.finish_batch() 149 | self.train_outputs += [batch_output] 150 | self.print_train_results() 151 | 152 | if self.get_num_batches_done() % self.testing_freq == 0: 153 | self.sync_with_host() 154 | self.test_outputs += [self.get_test_error()] 155 | self.print_test_results() 156 | self.print_test_status() 157 | self.conditional_save() 158 | 159 | self.print_train_time(time() - compute_time_py) 160 | self.cleanup() 161 | 162 | def cleanup(self): 163 | sys.exit(0) 164 | 165 | def sync_with_host(self): 166 | self.libmodel.syncWithHost() 167 | 168 | def print_model_state(self): 169 | pass 170 | 171 | def get_num_batches_done(self): 172 | return len(self.train_batch_range) * (self.epoch - 1) + self.batchnum - self.train_batch_range[0] + 1 173 | 174 | def get_next_batch(self, train=True): 175 | dp = self.train_data_provider 176 | if not train: 177 | dp = self.test_data_provider 178 | return self.parse_batch_data(dp.get_next_batch(), train=train) 179 | 180 | def parse_batch_data(self, batch_data, train=True): 181 | return batch_data[0], batch_data[1], batch_data[2]['data'] 182 | 183 | def start_batch(self, batch_data, train=True): 184 | self.libmodel.startBatch(batch_data[2], not train) 185 | 186 | def finish_batch(self): 187 | return self.libmodel.finishBatch() 188 | 189 | def print_iteration(self): 190 | print "\t%d.%d..." % (self.epoch, self.batchnum), 191 | 192 | def print_train_time(self, compute_time_py): 193 | print "(%.3f sec)" % (compute_time_py) 194 | 195 | def print_train_results(self): 196 | batch_error = self.train_outputs[-1][0] 197 | if not (batch_error > 0 and batch_error < 2e20): 198 | print "Crazy train error: %.6f" % batch_error 199 | self.cleanup() 200 | 201 | print "Train error: %.6f " % (batch_error), 202 | 203 | def print_test_results(self): 204 | batch_error = self.test_outputs[-1][0] 205 | print "%s\t\tTest error: %.6f" % (NL, batch_error), 206 | 207 | def print_test_status(self): 208 | status = (len(self.test_outputs) == 1 or self.test_outputs[-1][0] < self.test_outputs[-2][0]) and "ok" or "WORSE" 209 | print status, 210 | 211 | def conditional_save(self): 212 | batch_error = self.test_outputs[-1][0] 213 | if batch_error > 0 and batch_error < self.max_test_err: 214 | self.save_state() 215 | else: 216 | print "\tTest error > %g, not saving." % self.max_test_err, 217 | 218 | def aggregate_test_outputs(self, test_outputs): 219 | test_error = tuple([sum(t[r] for t in test_outputs) / (1 if self.test_one else len(self.test_batch_range)) for r in range(len(test_outputs[-1]))]) 220 | return test_error 221 | 222 | def get_test_error(self): 223 | next_data = self.get_next_batch(train=False) 224 | test_outputs = [] 225 | while True: 226 | data = next_data 227 | self.start_batch(data, train=False) 228 | load_next = not self.test_one and data[1] < self.test_batch_range[-1] 229 | if load_next: # load next batch 230 | next_data = self.get_next_batch(train=False) 231 | test_outputs += [self.finish_batch()] 232 | if self.test_only: # Print the individual batch results for safety 233 | print "batch %d: %s" % (data[1], str(test_outputs[-1])) 234 | if not load_next: 235 | break 236 | sys.stdout.flush() 237 | 238 | return self.aggregate_test_outputs(test_outputs) 239 | 240 | def set_var(self, var_name, var_val): 241 | setattr(self, var_name, var_val) 242 | self.model_state[var_name] = var_val 243 | return var_val 244 | 245 | def get_var(self, var_name): 246 | return self.model_state[var_name] 247 | 248 | def has_var(self, var_name): 249 | return var_name in self.model_state 250 | 251 | def save_state(self): 252 | for att in self.model_state: 253 | if hasattr(self, att): 254 | self.model_state[att] = getattr(self, att) 255 | 256 | dic = {"model_state": self.model_state, 257 | "op": self.op} 258 | 259 | checkpoint_dir = os.path.join(self.save_path, self.save_file) 260 | checkpoint_file = "%d.%d" % (self.epoch, self.batchnum) 261 | checkpoint_file_full_path = os.path.join(checkpoint_dir, checkpoint_file) 262 | if not os.path.exists(checkpoint_dir): 263 | os.makedirs(checkpoint_dir) 264 | 265 | pickle(checkpoint_file_full_path, dic,compress=self.zip_save) 266 | 267 | for f in sorted(os.listdir(checkpoint_dir), key=alphanum_key): 268 | if sum(os.path.getsize(os.path.join(checkpoint_dir, f2)) for f2 in os.listdir(checkpoint_dir)) > self.max_filesize_mb*1024*1024 and f != checkpoint_file: 269 | os.remove(os.path.join(checkpoint_dir, f)) 270 | else: 271 | break 272 | 273 | @staticmethod 274 | def load_checkpoint(load_dir): 275 | if os.path.isdir(load_dir): 276 | return unpickle(os.path.join(load_dir, sorted(os.listdir(load_dir), key=alphanum_key)[-1])) 277 | return unpickle(load_dir) 278 | 279 | @staticmethod 280 | def get_options_parser(): 281 | op = OptionsParser() 282 | op.add_option("f", "load_file", StringOptionParser, "Load file", default="", excuses=OptionsParser.EXCLUDE_ALL) 283 | op.add_option("train-range", "train_batch_range", RangeOptionParser, "Data batch range: training") 284 | op.add_option("test-range", "test_batch_range", RangeOptionParser, "Data batch range: testing") 285 | op.add_option("data-provider", "dp_type", StringOptionParser, "Data provider", default="default") 286 | op.add_option("test-freq", "testing_freq", IntegerOptionParser, "Testing frequency", default=25) 287 | op.add_option("epochs", "num_epochs", IntegerOptionParser, "Number of epochs", default=500) 288 | op.add_option("data-path", "data_path", StringOptionParser, "Data path") 289 | op.add_option("save-path", "save_path", StringOptionParser, "Save path") 290 | op.add_option("max-filesize", "max_filesize_mb", IntegerOptionParser, "Maximum save file size (MB)", default=5000) 291 | op.add_option("max-test-err", "max_test_err", FloatOptionParser, "Maximum test error for saving") 292 | op.add_option("num-gpus", "num_gpus", IntegerOptionParser, "Number of GPUs", default=1) 293 | op.add_option("test-only", "test_only", BooleanOptionParser, "Test and quit?", default=0) 294 | op.add_option("zip-save", "zip_save", BooleanOptionParser, "Compress checkpoints?", default=0) 295 | op.add_option("test-one", "test_one", BooleanOptionParser, "Test on one batch at a time?", default=1) 296 | op.add_option("gpu", "gpu", ListOptionParser(IntegerOptionParser), "GPU override", default=OptionExpression("[-1] * num_gpus")) 297 | return op 298 | 299 | @staticmethod 300 | def print_data_providers(): 301 | print "Available data providers:" 302 | for dp, desc in dp_types.iteritems(): 303 | print " %s: %s" % (dp, desc) 304 | 305 | def get_gpus(self): 306 | self.device_ids = [get_gpu_lock(g) for g in self.op.get_value('gpu')] 307 | if GPU_LOCK_NO_LOCK in self.device_ids: 308 | print "Not enough free GPUs!" 309 | sys.exit() 310 | 311 | @staticmethod 312 | def parse_options(op): 313 | try: 314 | load_dic = None 315 | options = op.parse() 316 | if options["load_file"].value_given: 317 | load_dic = IGPUModel.load_checkpoint(options["load_file"].value) 318 | old_op = load_dic["op"] 319 | old_op.merge_from(op) 320 | op = old_op 321 | op.eval_expr_defaults() 322 | return op, load_dic 323 | except OptionMissingException, e: 324 | print e 325 | op.print_usage() 326 | except OptionException, e: 327 | print e 328 | except UnpickleError, e: 329 | print "Error loading checkpoint:" 330 | print e 331 | sys.exit() 332 | 333 | -------------------------------------------------------------------------------- /include/common/matrix.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MATRIX_H_ 28 | #define MATRIX_H_ 29 | 30 | #include 31 | #ifdef NUMPY_INTERFACE 32 | #include 33 | #include 34 | #endif 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #if defined(_WIN64) || defined(_WIN32) 42 | #include 43 | #define isnan(_X) (_isnan(_X)) 44 | #define isinf(_X) (!_finite(_X)) 45 | #define uint unsigned int 46 | double sqrt(int _X); 47 | double log(int _X); 48 | #endif 49 | 50 | #ifdef USE_MKL 51 | #include 52 | #include 53 | #include 54 | #include 55 | 56 | #define IS_MKL true 57 | 58 | #ifdef DOUBLE_PRECISION 59 | #define MKL_UNIFORM vdRngUniform 60 | #define MKL_NORMAL vdRngGaussian 61 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_DUNIFORM_STD_ACCURATE 62 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_DGAUSSIAN_BOXMULLER 63 | #define MKL_EXP vdExp 64 | #define MKL_RECIP vdInv 65 | #define MKL_SQUARE vdSqr 66 | #define MKL_TANH vdTanh 67 | #define MKL_LOG vdLn 68 | #define MKL_VECMUL vdMul 69 | #define MKL_VECDIV vdDiv 70 | #else 71 | #define MKL_UNIFORM vsRngUniform 72 | #define MKL_NORMAL vsRngGaussian 73 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_SUNIFORM_STD_ACCURATE 74 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_SGAUSSIAN_BOXMULLER 75 | #define MKL_EXP vsExp 76 | #define MKL_RECIP vsInv 77 | #define MKL_SQUARE vsSqr 78 | #define MKL_TANH vsTanh 79 | #define MKL_LOG vsLn 80 | #define MKL_VECMUL vsMul 81 | #define MKL_VECDIV vsDiv 82 | #endif /* DOUBLE_PRECISION */ 83 | 84 | #else 85 | extern "C" { 86 | #include 87 | } 88 | #define IS_MKL false 89 | #endif /* USE_MKL */ 90 | 91 | #ifdef DOUBLE_PRECISION 92 | #define CBLAS_GEMM cblas_dgemm 93 | #define CBLAS_SCAL cblas_dscal 94 | #define CBLAS_AXPY cblas_daxpy 95 | #else 96 | #define CBLAS_GEMM cblas_sgemm 97 | #define CBLAS_SCAL cblas_sscal 98 | #define CBLAS_AXPY cblas_saxpy 99 | #endif /* DOUBLE_PRECISION */ 100 | 101 | #define MTYPE_MAX numeric_limits::max() 102 | 103 | class Matrix { 104 | private: 105 | MTYPE* _data; 106 | bool _ownsData; 107 | long int _numRows, _numCols; 108 | long int _numElements; 109 | CBLAS_TRANSPOSE _trans; 110 | 111 | void _init(MTYPE* data, long int numRows, long int numCols, bool transpose, bool ownsData); 112 | void _tileTo2(Matrix& target) const; 113 | void _copyAllTo(Matrix& target) const; 114 | MTYPE _sum_column(long int col) const; 115 | MTYPE _sum_row(long int row) const; 116 | MTYPE _aggregate(MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 117 | void _aggregate(long int axis, Matrix& target, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 118 | MTYPE _aggregateRow(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 119 | MTYPE _aggregateCol(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const; 120 | void _updateDims(long int numRows, long int numCols); 121 | void _applyLoop(MTYPE(*func)(MTYPE)); 122 | void _applyLoop(MTYPE (*func)(MTYPE), Matrix& target); 123 | void _applyLoop2(const Matrix& a, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const; 124 | void _applyLoop2(const Matrix& a, MTYPE (*func)(MTYPE,MTYPE, MTYPE), MTYPE scalar, Matrix& target) const; 125 | void _applyLoopScalar(const MTYPE scalar, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const; 126 | void _checkBounds(long int startRow, long int endRow, long int startCol, long int endCol) const; 127 | void _divideByVector(const Matrix& vec, Matrix& target); 128 | inline long int _getNumColsBackEnd() const { 129 | return _trans == CblasNoTrans ? _numCols : _numRows; 130 | } 131 | public: 132 | enum FUNCTION { 133 | TANH, RECIPROCAL, SQUARE, ABS, EXP, LOG, ZERO, ONE, LOGISTIC1, LOGISTIC2, SIGN 134 | }; 135 | Matrix(); 136 | Matrix(long int numRows, long int numCols); 137 | #ifdef NUMPY_INTERFACE 138 | Matrix(const PyArrayObject *src); 139 | #endif 140 | Matrix(const Matrix &like); 141 | Matrix(MTYPE* data, long int numRows, long int numCols); 142 | Matrix(MTYPE* data, long int numRows, long int numCols, bool transpose); 143 | ~Matrix(); 144 | 145 | inline MTYPE& getCell(long int i, long int j) const { 146 | assert(i >= 0 && i < _numRows); 147 | assert(j >= 0 && j < _numCols); 148 | if (_trans == CblasTrans) { 149 | return _data[j * _numRows + i]; 150 | } 151 | return _data[i * _numCols + j]; 152 | } 153 | 154 | MTYPE& operator()(long int i, long int j) const { 155 | return getCell(i, j); 156 | } 157 | 158 | inline MTYPE* getData() const { 159 | return _data; 160 | } 161 | 162 | inline bool isView() const { 163 | return !_ownsData; 164 | } 165 | 166 | inline long int getNumRows() const { 167 | return _numRows; 168 | } 169 | 170 | inline long int getNumCols() const { 171 | return _numCols; 172 | } 173 | 174 | inline long int getNumDataBytes() const { 175 | return _numElements * sizeof(MTYPE); 176 | } 177 | 178 | inline long int getNumElements() const { 179 | return _numElements; 180 | } 181 | 182 | inline long int getLeadingDim() const { 183 | return _trans == CblasTrans ? _numRows : _numCols; 184 | } 185 | 186 | inline long int getFollowingDim() const { 187 | return _trans == CblasTrans ? _numCols : _numRows; 188 | } 189 | 190 | inline CBLAS_TRANSPOSE getBLASTrans() const { 191 | return _trans; 192 | } 193 | 194 | inline bool isSameDims(const Matrix& a) const { 195 | return a.getNumRows() == getNumRows() && a.getNumCols() == getNumCols(); 196 | } 197 | 198 | inline bool isTrans() const { 199 | return _trans == CblasTrans; 200 | } 201 | 202 | /* 203 | * Only use if you know what you're doing! 204 | * Does not update any dimensions. Just flips the _trans flag. 205 | * 206 | * Use transpose() if you want to get the transpose of this matrix. 207 | */ 208 | inline void setTrans(bool trans) { 209 | assert(isTrans() == trans || !isView()); 210 | _trans = trans ? CblasTrans : CblasNoTrans; 211 | } 212 | 213 | void apply(FUNCTION f); 214 | void apply(Matrix::FUNCTION f, Matrix& target); 215 | void subtractFromScalar(MTYPE scalar); 216 | void subtractFromScalar(MTYPE scalar, Matrix &target) const; 217 | void biggerThanScalar(MTYPE scalar); 218 | void smallerThanScalar(MTYPE scalar); 219 | void equalsScalar(MTYPE scalar); 220 | void biggerThanScalar(MTYPE scalar, Matrix& target) const; 221 | void smallerThanScalar(MTYPE scalar, Matrix& target) const; 222 | void equalsScalar(MTYPE scalar, Matrix& target) const; 223 | void biggerThan(Matrix& a); 224 | void biggerThan(Matrix& a, Matrix& target) const; 225 | void smallerThan(Matrix& a); 226 | void smallerThan(Matrix& a, Matrix& target) const; 227 | void minWith(Matrix &a); 228 | void minWith(Matrix &a, Matrix &target) const; 229 | void maxWith(Matrix &a); 230 | void maxWith(Matrix &a, Matrix &target) const; 231 | void equals(Matrix& a); 232 | void equals(Matrix& a, Matrix& target) const; 233 | void notEquals(Matrix& a) ; 234 | void notEquals(Matrix& a, Matrix& target) const; 235 | void add(const Matrix &m); 236 | void add(const Matrix &m, MTYPE scale); 237 | void add(const Matrix &m, Matrix& target); 238 | void add(const Matrix &m, MTYPE scale, Matrix& target); 239 | void subtract(const Matrix &m); 240 | void subtract(const Matrix &m, Matrix& target); 241 | void subtract(const Matrix &m, MTYPE scale); 242 | void subtract(const Matrix &m, MTYPE scale, Matrix& target); 243 | void addVector(const Matrix& vec, MTYPE scale); 244 | void addVector(const Matrix& vec, MTYPE scale, Matrix& target); 245 | void addVector(const Matrix& vec); 246 | void addVector(const Matrix& vec, Matrix& target); 247 | void addScalar(MTYPE scalar); 248 | void addScalar(MTYPE scalar, Matrix& target) const; 249 | void maxWithScalar(MTYPE scalar); 250 | void maxWithScalar(MTYPE scalar, Matrix &target) const; 251 | void minWithScalar(MTYPE scalar); 252 | void minWithScalar(MTYPE scalar, Matrix &target) const; 253 | void eltWiseMultByVector(const Matrix& vec); 254 | void eltWiseMultByVector(const Matrix& vec, Matrix& target); 255 | void eltWiseDivideByVector(const Matrix& vec); 256 | void eltWiseDivideByVector(const Matrix& vec, Matrix& target); 257 | void resize(long int newNumRows, long int newNumCols); 258 | void resize(const Matrix& like); 259 | Matrix& slice(long int startRow, long int endRow, long int startCol, long int endCol) const; 260 | void slice(long int startRow, long int endRow, long int startCol, long int endCol, Matrix &target) const; 261 | Matrix& sliceRows(long int startRow, long int endRow) const; 262 | void sliceRows(long int startRow, long int endRow, Matrix& target) const; 263 | Matrix& sliceCols(long int startCol, long int endCol) const; 264 | void sliceCols(long int startCol, long int endCol, Matrix& target) const; 265 | void rightMult(const Matrix &b, MTYPE scale); 266 | void rightMult(const Matrix &b, Matrix &target) const; 267 | void rightMult(const Matrix &b); 268 | void rightMult(const Matrix &b, MTYPE scaleAB, Matrix &target) const; 269 | void addProduct(const Matrix &a, const Matrix &b, MTYPE scaleAB, MTYPE scaleThis); 270 | void addProduct(const Matrix& a, const Matrix& b); 271 | void eltWiseMult(const Matrix& a); 272 | void eltWiseMult(const Matrix& a, Matrix& target) const; 273 | void eltWiseDivide(const Matrix& a); 274 | void eltWiseDivide(const Matrix& a, Matrix &target) const; 275 | Matrix& transpose() const; 276 | Matrix& transpose(bool hard) const; 277 | Matrix& tile(long int timesY, long int timesX) const; 278 | void tile(long int timesY, long int timesX, Matrix& target) const; 279 | void copy(Matrix &dest, long int srcStartRow, long int srcEndRow, long int srcStartCol, long int srcEndCol, long int destStartRow, long int destStartCol) const; 280 | Matrix& copy() const; 281 | void copy(Matrix& target) const; 282 | Matrix& sum(long int axis) const; 283 | void sum(long int axis, Matrix &target) const; 284 | MTYPE sum() const; 285 | MTYPE max() const; 286 | Matrix& max(long int axis) const; 287 | void max(long int axis, Matrix& target) const; 288 | MTYPE min() const; 289 | Matrix& min(long int axis) const; 290 | void min(long int axis, Matrix& target) const; 291 | MTYPE norm() const; 292 | MTYPE norm2() const; 293 | void scale(MTYPE scale); 294 | void scale(MTYPE alpha, Matrix& target); 295 | void reshape(long int numRows, long int numCols); 296 | Matrix& reshaped(long int numRows, long int numCols); 297 | void printShape(const char* name) const; 298 | bool hasNan() const; 299 | bool hasInf() const; 300 | #ifdef USE_MKL 301 | void randomizeNormal(VSLStreamStatePtr stream, MTYPE mean, MTYPE stdev); 302 | void randomizeUniform(VSLStreamStatePtr stream); 303 | void randomizeNormal(VSLStreamStatePtr stream); 304 | #else 305 | void randomizeNormal(MTYPE mean, MTYPE stdev); 306 | void randomizeUniform(); 307 | void randomizeNormal(); 308 | #endif 309 | void print() const; 310 | void print(long int startRow,long int rows, long int startCol,long int cols) const; 311 | void print(long int rows, long int cols) const; 312 | }; 313 | 314 | #endif /* MATRIX_H_ */ 315 | -------------------------------------------------------------------------------- /include/common/matrix_funcs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef MATRIX_FUNCS_H_ 28 | #define MATRIX_FUNCS_H_ 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #ifdef DOUBLE_PRECISION 35 | #define MTYPE double 36 | #else 37 | #define MTYPE float 38 | #endif 39 | 40 | #define MYRAND ((double)rand() / ((double)RAND_MAX + 1)) 41 | 42 | inline MTYPE _zero(MTYPE x) { 43 | return 0; 44 | } 45 | 46 | inline MTYPE _one(MTYPE x) { 47 | return 1; 48 | } 49 | 50 | inline MTYPE _abs(MTYPE x) { 51 | return x > 0 ? x : -x; 52 | } 53 | 54 | inline MTYPE _square(MTYPE x) { 55 | return x * x; 56 | } 57 | 58 | inline MTYPE _sigma1(MTYPE x) { 59 | return (tanh(x / 2) + 1) / 2; 60 | } 61 | 62 | inline MTYPE _sigma2(MTYPE x) { 63 | return 1 / (1 + exp(-x)); 64 | } 65 | 66 | inline MTYPE _recip(MTYPE x) { 67 | return 1 / x; 68 | } 69 | 70 | inline MTYPE _exp(MTYPE x) { 71 | return exp(x); 72 | } 73 | 74 | inline MTYPE _log(MTYPE x) { 75 | return log(x); 76 | } 77 | 78 | inline MTYPE _tanh(MTYPE x) { 79 | return tanh(x); 80 | } 81 | 82 | inline MTYPE _sign(MTYPE x) { 83 | return x > 0 ? 1 : -1; 84 | } 85 | 86 | inline MTYPE _rand(MTYPE x) { 87 | return MYRAND; 88 | } 89 | 90 | inline MTYPE _divide(MTYPE x, MTYPE y) { 91 | return x / y; 92 | } 93 | 94 | inline MTYPE _mult(MTYPE x, MTYPE y) { 95 | return x * y; 96 | } 97 | 98 | inline MTYPE _add(MTYPE x, MTYPE y) { 99 | return x + y; 100 | } 101 | 102 | inline MTYPE _addSquare(MTYPE x, MTYPE y) { 103 | return x*x + y; 104 | } 105 | 106 | inline MTYPE _addWithScale(MTYPE x, MTYPE y, MTYPE scale) { 107 | return x + scale*y; 108 | } 109 | 110 | inline MTYPE _max(MTYPE x, MTYPE y) { 111 | return std::max(x, y); 112 | } 113 | 114 | inline MTYPE _min(MTYPE x, MTYPE y) { 115 | return std::min(x, y); 116 | } 117 | 118 | inline MTYPE _bigger(MTYPE x, MTYPE y) { 119 | return x > y; 120 | } 121 | 122 | inline MTYPE _smaller(MTYPE x, MTYPE y) { 123 | return x < y; 124 | } 125 | 126 | inline MTYPE _equal(MTYPE x, MTYPE y) { 127 | return x == y; 128 | } 129 | 130 | inline MTYPE _notEqual(MTYPE x, MTYPE y) { 131 | return x != y; 132 | } 133 | 134 | #endif /* MATRIX_FUNCS_H_ */ 135 | -------------------------------------------------------------------------------- /include/common/queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef QUEUE_H_ 28 | #define QUEUE_H_ 29 | #include 30 | #include 31 | 32 | /* 33 | * A thread-safe circular queue that automatically grows but never shrinks. 34 | */ 35 | template 36 | class Queue { 37 | private: 38 | T *_elements; 39 | int _numElements; 40 | int _head, _tail; 41 | int _maxSize; 42 | pthread_mutex_t *_queueMutex; 43 | pthread_cond_t *_queueCV; 44 | 45 | void _init(int initialSize) { 46 | _numElements = 0; 47 | _head = 0; 48 | _tail = 0; 49 | _maxSize = initialSize; 50 | _elements = new T[initialSize]; 51 | _queueCV = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t))); 52 | _queueMutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t))); 53 | pthread_mutex_init(_queueMutex, NULL); 54 | pthread_cond_init(_queueCV, NULL); 55 | } 56 | 57 | void expand() { 58 | T *newStorage = new T[_maxSize * 2]; 59 | memcpy(newStorage, _elements + _head, (_maxSize - _head) * sizeof(T)); 60 | memcpy(newStorage + _maxSize - _head, _elements, _tail * sizeof(T)); 61 | delete[] _elements; 62 | _elements = newStorage; 63 | _head = 0; 64 | _tail = _numElements; 65 | _maxSize *= 2; 66 | } 67 | public: 68 | Queue(int initialSize) { 69 | _init(initialSize); 70 | } 71 | 72 | Queue() { 73 | _init(1); 74 | } 75 | 76 | ~Queue() { 77 | pthread_mutex_destroy(_queueMutex); 78 | pthread_cond_destroy(_queueCV); 79 | delete[] _elements; 80 | free(_queueMutex); 81 | free(_queueCV); 82 | } 83 | 84 | void enqueue(T el) { 85 | pthread_mutex_lock(_queueMutex); 86 | if(_numElements == _maxSize) { 87 | expand(); 88 | } 89 | _elements[_tail] = el; 90 | _tail = (_tail + 1) % _maxSize; 91 | _numElements++; 92 | 93 | pthread_cond_signal(_queueCV); 94 | pthread_mutex_unlock(_queueMutex); 95 | } 96 | 97 | /* 98 | * Blocks until not empty. 99 | */ 100 | T dequeue() { 101 | pthread_mutex_lock(_queueMutex); 102 | if(_numElements == 0) { 103 | pthread_cond_wait(_queueCV, _queueMutex); 104 | } 105 | T el = _elements[_head]; 106 | _head = (_head + 1) % _maxSize; 107 | _numElements--; 108 | pthread_mutex_unlock(_queueMutex); 109 | return el; 110 | } 111 | 112 | /* 113 | * Obviously this number can change by the time you actually look at it. 114 | */ 115 | inline int getNumElements() const { 116 | return _numElements; 117 | } 118 | }; 119 | 120 | #endif /* QUEUE_H_ */ 121 | -------------------------------------------------------------------------------- /include/common/thread.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef THREAD_H_ 28 | #define THREAD_H_ 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | /* 35 | * Abstract joinable thread class. 36 | * The only thing the implementer has to fill in is the run method. 37 | */ 38 | class Thread { 39 | private: 40 | pthread_attr_t _pthread_attr; 41 | pthread_t _threadID; 42 | bool _joinable, _startable; 43 | 44 | static void* start_pthread_func(void *obj) { 45 | void* retval = reinterpret_cast(obj)->run(); 46 | pthread_exit(retval); 47 | return retval; 48 | } 49 | protected: 50 | virtual void* run() = 0; 51 | public: 52 | Thread(bool joinable) : _joinable(joinable), _startable(true) { 53 | pthread_attr_init(&_pthread_attr); 54 | pthread_attr_setdetachstate(&_pthread_attr, joinable ? PTHREAD_CREATE_JOINABLE : PTHREAD_CREATE_DETACHED); 55 | } 56 | 57 | virtual ~Thread() { 58 | } 59 | 60 | pthread_t start() { 61 | assert(_startable); 62 | _startable = false; 63 | int n; 64 | if ((n = pthread_create(&_threadID, &_pthread_attr, &Thread::start_pthread_func, (void*)this))) { 65 | errno = n; 66 | perror("pthread_create error"); 67 | } 68 | return _threadID; 69 | } 70 | 71 | void join(void **status) { 72 | assert(_joinable); 73 | int n; 74 | if((n = pthread_join(_threadID, status))) { 75 | errno = n; 76 | perror("pthread_join error"); 77 | } 78 | } 79 | 80 | void join() { 81 | join(NULL); 82 | } 83 | 84 | pthread_t getThreadID() const { 85 | return _threadID; 86 | } 87 | }; 88 | 89 | #endif /* THREAD_H_ */ 90 | -------------------------------------------------------------------------------- /include/convnet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef CONVNET3 28 | #define CONVNET3 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "layer.cuh" 39 | #include "data.cuh" 40 | #include "worker.cuh" 41 | #include "weights.cuh" 42 | 43 | class Worker; 44 | class WorkResult; 45 | class Layer; 46 | class DataLayer; 47 | class CostLayer; 48 | 49 | class ConvNet : public Thread { 50 | protected: 51 | std::vector _layers; 52 | std::vector _dataLayers; 53 | std::vector _costs; 54 | GPUData* _data; 55 | 56 | DataProvider* _dp; 57 | int _deviceID; 58 | 59 | Queue _workerQueue; 60 | Queue _resultQueue; 61 | 62 | // For gradient checking 63 | int _numFailures; 64 | int _numTests; 65 | double _baseErr; 66 | 67 | virtual Layer* initLayer(string& layerType, PyObject* paramsDict); 68 | void initCuda(); 69 | void* run(); 70 | public: 71 | ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID); 72 | 73 | Queue& getWorkerQueue(); 74 | Queue& getResultQueue(); 75 | DataProvider& getDataProvider(); 76 | 77 | Layer& operator[](int idx); 78 | Layer& getLayer(int idx); 79 | void copyToCPU(); 80 | void copyToGPU(); 81 | void updateWeights(); 82 | void reset(); 83 | int getNumLayers(); 84 | 85 | void bprop(PASS_TYPE passType); 86 | void fprop(PASS_TYPE passType); 87 | void fprop(int miniIdx, PASS_TYPE passType); 88 | void fprop(GPUData& data, PASS_TYPE passType); 89 | 90 | bool checkGradient(const std::string& name, float eps, Weights& weights); 91 | void checkGradients(); 92 | Cost& getCost(); 93 | Cost& getCost(Cost& cost); 94 | double getCostValue(); 95 | }; 96 | 97 | #endif /* CONVNET3 */ 98 | 99 | -------------------------------------------------------------------------------- /include/cost.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef COST_CUH 28 | #define COST_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include "layer.cuh" 35 | #include "util.cuh" 36 | 37 | class CostLayer; 38 | 39 | /* 40 | * Wrapper for dictionary mapping cost name to vector of returned values. 41 | */ 42 | class Cost { 43 | private: 44 | int _numCases; 45 | CostMap _costMap; 46 | CostCoeffMap _costCoeffMap; 47 | public: 48 | Cost(int numCases); 49 | Cost(int numCases, std::vector& costs); 50 | doublev& operator [](const std::string s); 51 | CostMap& getCostMap(); 52 | CostCoeffMap& getCostCoeffMap(); 53 | int getNumCases(); 54 | /* 55 | * Returns sum of first values returned by all the costs, weighted by the cost coefficients. 56 | */ 57 | double getValue(); 58 | Cost& operator += (Cost& er); 59 | Cost& operator /= (const double v); 60 | virtual ~Cost(); 61 | }; 62 | 63 | 64 | #endif /* COST_CUH */ 65 | 66 | -------------------------------------------------------------------------------- /include/cudaconv2/cudaconv2.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef COMMON_CUH 28 | #define COMMON_CUH 29 | 30 | #include 31 | #include 32 | #include "conv_util.cuh" 33 | 34 | enum FILTER_OUTPUT_ORDER {MODULE_FILTER_IMAGE, FILTER_MODULE_IMAGE}; 35 | 36 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 37 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 38 | int numImgColors, int numGroups); 39 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 40 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 41 | int numImgColors, int numGroups, 42 | float scaleTargets, float scaleOutput); 43 | 44 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 45 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 46 | int numImgColors, int numGroups); 47 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, 48 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 49 | int numImgColors, int numGroups, 50 | float scaleTargets, float scaleOutput); 51 | 52 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 53 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups); 54 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 55 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups, 56 | float scaleTargets, float scaleOutput); 57 | 58 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 59 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups); 60 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, 61 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups, 62 | float scaleTargets, float scaleOutput); 63 | 64 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 65 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, 66 | int moduleStride, int numImgColors, int numGroups, int partialSum); 67 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 68 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 69 | int numImgColors, int numGroups, int partialSum, 70 | float scaleTargets, float scaleOutput); 71 | 72 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 73 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, 74 | int moduleStride, int numImgColors, int numGroups); 75 | 76 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, 77 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 78 | int numImgColors, int numGroups, float scaleTargets, float scaleOutput); 79 | 80 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 81 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 82 | int numImgColors, int numFilterColors, int numGroups); 83 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 84 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 85 | int numImgColors, int numFilterColors, int numGroups, 86 | float scaleTargets, float scaleOutput); 87 | 88 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 89 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 90 | int numImgColors, int numFilterColors, int numGroups, 91 | float scaleTargets, float scaleOutput); 92 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 93 | int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride, 94 | int numImgColors, int numFilterColors, int numGroups); 95 | 96 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 97 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 98 | int numImgColors, int numFilterColors, int numGroups); 99 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 100 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, 101 | int numGroups, int partialSum, float scaleTargets, float scaleOutput); 102 | 103 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 104 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, 105 | int numImgColors, int numFilterColors, int numGroups); 106 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices, 107 | int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, 108 | int numGroups, float scaleTargets, float scaleOutput); 109 | 110 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 111 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups); 112 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 113 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups, 114 | float scaleTargets, float scaleOutput); 115 | 116 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 117 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups); 118 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices, 119 | int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups, 120 | float scaleTargets, float scaleOutput); 121 | 122 | 123 | #endif /* COMMON_CUH */ 124 | 125 | -------------------------------------------------------------------------------- /include/data.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef DATA_CUH 28 | #define DATA_CUH 29 | 30 | #include 31 | #include 32 | #include "util.cuh" 33 | 34 | template 35 | class Data { 36 | protected: 37 | std::vector* _data; 38 | public: 39 | typedef typename std::vector::iterator T_iter; 40 | 41 | Data(std::vector& data) : _data(&data) { 42 | assert(_data->size() > 0); 43 | for (int i = 1; i < data.size(); i++) { 44 | assert(data[i-1]->getLeadingDim() == data[i]->getLeadingDim()); 45 | } 46 | assert(data[0]->getLeadingDim() > 0); 47 | } 48 | 49 | ~Data() { 50 | for (T_iter it = _data->begin(); it != _data->end(); ++it) { 51 | delete *it; 52 | } 53 | delete _data; 54 | } 55 | 56 | T& operator [](int idx) { 57 | return *_data->at(idx); 58 | } 59 | 60 | int getSize() { 61 | return _data->size(); 62 | } 63 | 64 | std::vector& getData() { 65 | return *_data; 66 | } 67 | 68 | int getNumCases() { 69 | return _data->at(0)->getLeadingDim(); 70 | } 71 | }; 72 | 73 | typedef Data GPUData; 74 | typedef Data CPUData; 75 | 76 | class DataProvider { 77 | protected: 78 | CPUData* _hData; 79 | NVMatrixV _data; 80 | int _minibatchSize; 81 | long int _dataSize; 82 | public: 83 | DataProvider(int minibatchSize); 84 | GPUData& operator[](int idx); 85 | void setData(CPUData&); 86 | void clearData(); 87 | GPUData& getMinibatch(int idx); 88 | GPUData& getDataSlice(int startCase, int endCase); 89 | int getNumMinibatches(); 90 | int getMinibatchSize(); 91 | int getNumCases(); 92 | int getNumCasesInMinibatch(int idx); 93 | }; 94 | 95 | #endif /* DATA_CUH */ 96 | 97 | -------------------------------------------------------------------------------- /include/layer.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef LAYER_CUH 28 | #define LAYER_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "convnet.cuh" 37 | #include "cost.cuh" 38 | #include "weights.cuh" 39 | #include "neuron.cuh" 40 | 41 | class Cost; 42 | class ConvNet; 43 | class CostLayer; 44 | class DataLayer; 45 | 46 | /* 47 | * Abstract layer. 48 | */ 49 | class Layer { 50 | protected: 51 | ConvNet* _convNet; 52 | std::vector _prev, _next; 53 | int _rcvdFInputs, _rcvdBInputs; 54 | 55 | NVMatrixV _inputs; 56 | NVMatrix *_outputs; // TODO: make this a pointer so you can reuse previous layers' matrices 57 | NVMatrix *_actsGrad; // Layer activity gradients 58 | bool _gradConsumer, _foundGradConsumers, _trans; 59 | bool _conserveMem; 60 | int _numGradProducersNext; 61 | int _actsTarget, _actsGradTarget; 62 | std::string _name, _type; 63 | void fpropNext(PASS_TYPE passType); 64 | virtual void truncBwdActs(); 65 | virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0; 66 | 67 | virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) { 68 | // Do nothing by default 69 | } 70 | virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) { 71 | assert(!isGradProducer()); // Only do nothing if not grad producer 72 | } 73 | public: 74 | static bool _saveActsGrad, _saveActs; 75 | 76 | Layer(ConvNet* convNet, PyObject* paramsDict, bool trans); 77 | 78 | virtual void fprop(PASS_TYPE passType); 79 | void fprop(NVMatrix& v, PASS_TYPE passType); 80 | virtual void fprop(NVMatrixV& v, PASS_TYPE passType); 81 | virtual void bprop(PASS_TYPE passType); 82 | void bprop(NVMatrix& v, PASS_TYPE passType); 83 | virtual void reset(); 84 | int incRcvdBInputs(); 85 | int getRcvdFInputs(); 86 | int getRcvdBInputs(); 87 | bool isGradConsumer(); 88 | virtual bool isGradProducer(); 89 | std::string& getName(); 90 | std::string& getType(); 91 | void addNext(Layer* l); 92 | void addPrev(Layer* l); 93 | std::vector& getPrev(); 94 | std::vector& getNext(); 95 | virtual NVMatrix& getActs(); 96 | virtual NVMatrix& getActsGrad(); 97 | virtual void postInit(); 98 | 99 | // Do nothing if this layer has no weights 100 | virtual void updateWeights() { 101 | } 102 | virtual void checkGradients() { 103 | } 104 | virtual void copyToCPU() { 105 | } 106 | virtual void copyToGPU() { 107 | } 108 | }; 109 | 110 | class NeuronLayer : public Layer { 111 | protected: 112 | Neuron* _neuron; 113 | 114 | virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 115 | virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 116 | public: 117 | NeuronLayer(ConvNet* convNet, PyObject* paramsDict); 118 | }; 119 | 120 | class WeightLayer : public Layer { 121 | protected: 122 | WeightList _weights; 123 | Weights *_biases; 124 | float _wStep, _bStep; 125 | 126 | void bpropCommon(NVMatrix& v, PASS_TYPE passType); 127 | virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0; 128 | virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0; 129 | public: 130 | WeightLayer(ConvNet* convNet, PyObject* paramsDict, bool trans, bool useGrad); 131 | virtual void updateWeights(); 132 | virtual void copyToCPU(); 133 | virtual void copyToGPU(); 134 | void checkGradients(); 135 | Weights& getWeights(int idx); 136 | }; 137 | 138 | class FCLayer : public WeightLayer { 139 | protected: 140 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 141 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 142 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 143 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 144 | public: 145 | FCLayer(ConvNet* convNet, PyObject* paramsDict); 146 | }; 147 | 148 | class SoftmaxLayer : public Layer { 149 | protected: 150 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 151 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 152 | public: 153 | SoftmaxLayer(ConvNet* convNet, PyObject* paramsDict); 154 | }; 155 | 156 | class EltwiseSumLayer : public Layer { 157 | protected: 158 | vector* _coeffs; 159 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 160 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 161 | public: 162 | EltwiseSumLayer(ConvNet* convNet, PyObject* paramsDict); 163 | }; 164 | 165 | class EltwiseMaxLayer : public Layer { 166 | protected: 167 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 168 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 169 | public: 170 | EltwiseMaxLayer(ConvNet* convNet, PyObject* paramsDict); 171 | }; 172 | 173 | class DataLayer : public Layer { 174 | private: 175 | int _dataIdx; 176 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 177 | public: 178 | DataLayer(ConvNet* convNet, PyObject* paramsDict); 179 | 180 | bool isGradProducer(); 181 | void fprop(PASS_TYPE passType); 182 | void fprop(NVMatrixV& data, PASS_TYPE passType); 183 | }; 184 | 185 | class LocalLayer : public WeightLayer { 186 | protected: 187 | struct FilterConns { 188 | int* hFilterConns; 189 | int* dFilterConns; 190 | }; 191 | vector* _filterConns; 192 | 193 | intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups; 194 | intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse; 195 | int _modulesX, _modules, _numFilters; 196 | 197 | void copyToGPU(); 198 | 199 | public: 200 | LocalLayer(ConvNet* convNet, PyObject* paramsDict, bool useGrad); 201 | }; 202 | 203 | class ConvLayer : public LocalLayer { 204 | protected: 205 | int _partialSum; 206 | bool _sharedBiases; 207 | 208 | NVMatrix _weightGradTmp, _actGradTmp; 209 | 210 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 211 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 212 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 213 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 214 | void truncBwdActs(); 215 | 216 | public: 217 | ConvLayer(ConvNet* convNet, PyObject* paramsDict); 218 | }; 219 | 220 | class LocalUnsharedLayer : public LocalLayer { 221 | protected: 222 | NVMatrix _sexMask; 223 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 224 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 225 | void bpropBiases(NVMatrix& v, PASS_TYPE passType); 226 | void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType); 227 | public: 228 | LocalUnsharedLayer(ConvNet* convNet, PyObject* paramsDict); 229 | }; 230 | 231 | class PoolLayer : public Layer { 232 | protected: 233 | int _channels, _sizeX, _start, _stride, _outputsX; 234 | int _imgSize; 235 | string _pool; 236 | public: 237 | PoolLayer(ConvNet* convNet, PyObject* paramsDict, bool trans); 238 | 239 | static PoolLayer& makePoolLayer(ConvNet* convNet, PyObject* paramsDict); 240 | }; 241 | 242 | class AvgPoolLayer : public PoolLayer { 243 | protected: 244 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 245 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 246 | public: 247 | AvgPoolLayer(ConvNet* convNet, PyObject* paramsDict); 248 | }; 249 | 250 | class MaxPoolLayer : public PoolLayer { 251 | protected: 252 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 253 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 254 | public: 255 | MaxPoolLayer(ConvNet* convNet, PyObject* paramsDict); 256 | }; 257 | 258 | class NailbedLayer : public Layer { 259 | protected: 260 | int _channels, _start, _stride, _outputsX; 261 | int _imgSize; 262 | public: 263 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 264 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 265 | 266 | NailbedLayer(ConvNet* convNet, PyObject* paramsDict); 267 | }; 268 | 269 | class GaussianBlurLayer : public Layer { 270 | protected: 271 | int _channels; 272 | Matrix* _hFilter; 273 | NVMatrix _filter; 274 | NVMatrix _actGradsTmp; 275 | public: 276 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 277 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 278 | void copyToGPU(); 279 | 280 | GaussianBlurLayer(ConvNet* convNet, PyObject* paramsDict); 281 | }; 282 | 283 | class ResizeLayer : public Layer { 284 | protected: 285 | int _channels; 286 | float _scale; 287 | int _imgSize, _tgtSize; 288 | public: 289 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 290 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 291 | 292 | ResizeLayer(ConvNet* convNet, PyObject* paramsDict); 293 | }; 294 | 295 | class RGBToYUVLayer : public Layer { 296 | public: 297 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 298 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 299 | 300 | RGBToYUVLayer(ConvNet* convNet, PyObject* paramsDict); 301 | }; 302 | 303 | class RGBToLABLayer : public Layer { 304 | protected: 305 | bool _center; 306 | public: 307 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 308 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 309 | 310 | RGBToLABLayer(ConvNet* convNet, PyObject* paramsDict); 311 | }; 312 | 313 | class ResponseNormLayer : public Layer { 314 | protected: 315 | int _channels, _size; 316 | float _scale, _pow; 317 | NVMatrix _denoms; 318 | 319 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 320 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 321 | void truncBwdActs(); 322 | public: 323 | ResponseNormLayer(ConvNet* convNet, PyObject* paramsDict); 324 | }; 325 | 326 | class CrossMapResponseNormLayer : public ResponseNormLayer { 327 | protected: 328 | bool _blocked; 329 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 330 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 331 | public: 332 | CrossMapResponseNormLayer(ConvNet* convNet, PyObject* paramsDict); 333 | }; 334 | 335 | class ContrastNormLayer : public ResponseNormLayer { 336 | protected: 337 | int _imgSize; 338 | NVMatrix _meanDiffs; 339 | 340 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 341 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 342 | void truncBwdActs(); 343 | public: 344 | ContrastNormLayer(ConvNet* convNet, PyObject* paramsDict); 345 | }; 346 | 347 | class CostLayer : public Layer { 348 | protected: 349 | float _coeff; 350 | doublev _costv; 351 | public: 352 | CostLayer(ConvNet* convNet, PyObject* paramsDict, bool trans); 353 | void bprop(PASS_TYPE passType); 354 | virtual doublev& getCost(); 355 | float getCoeff(); 356 | bool isGradProducer(); 357 | 358 | static CostLayer& makeCostLayer(ConvNet* convNet, string& type, PyObject* paramsDict); 359 | }; 360 | 361 | /* 362 | * Input 0: labels 363 | * Input 1: softmax outputs 364 | */ 365 | class LogregCostLayer : public CostLayer { 366 | protected: 367 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 368 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 369 | public: 370 | LogregCostLayer(ConvNet* convNet, PyObject* paramsDict); 371 | }; 372 | 373 | class SumOfSquaresCostLayer : public CostLayer { 374 | protected: 375 | void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType); 376 | void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType); 377 | public: 378 | SumOfSquaresCostLayer(ConvNet* convNet, PyObject* paramsDict); 379 | }; 380 | 381 | #endif /* LAYER_CUH */ 382 | 383 | -------------------------------------------------------------------------------- /include/layer_kernels.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef LAYER_KERNELS_CUH 28 | #define LAYER_KERNELS_CUH 29 | 30 | #include 31 | #include 32 | 33 | #define LOGREG_GRAD_THREADS_X 32 34 | #define LOGREG_GRAD_THREADS_Y 4 35 | 36 | #define LOGREG_ERR_THREADS_X 128 37 | #define LOGREG_ERR_THREADS_Y 1 38 | 39 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out); 40 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff); 41 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add); 42 | 43 | // Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad 44 | // to avoi dividing and then multiplying by quantities that may be near zero. 45 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff); 46 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add); 47 | 48 | #endif /* LAYER_KERNELS_CUH */ 49 | 50 | -------------------------------------------------------------------------------- /include/nvmatrix/nvmatrix_operators.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef NVMATRIX_OPERATORS_CUH 28 | #define NVMATRIX_OPERATORS_CUH 29 | 30 | #include 31 | 32 | class NVMatrixOps { 33 | public: 34 | class Exp { 35 | public: 36 | __device__ inline float operator()(const float a) const { 37 | return __expf(a); 38 | } 39 | }; 40 | 41 | class Logistic { 42 | public: 43 | __device__ inline float operator()(const float a) const { 44 | return __fdividef(1.0f, 1.0f + __expf(-a)); 45 | } 46 | }; 47 | 48 | class Log { 49 | public: 50 | __device__ inline float operator()(const float a) const { 51 | return __logf(a); 52 | } 53 | }; 54 | 55 | class Square { 56 | public: 57 | __device__ inline float operator()(const float a) const { 58 | return a * a; 59 | } 60 | }; 61 | 62 | class Sqrt { 63 | public: 64 | __device__ inline float operator()(const float a) const { 65 | return sqrtf(a); 66 | } 67 | }; 68 | 69 | class Reciprocal { 70 | public: 71 | __device__ inline float operator()(const float a) const { 72 | return 1.0f / a; 73 | } 74 | }; 75 | 76 | class Abs { 77 | public: 78 | __device__ inline float operator()(const float a) const { 79 | return a > 0 ? a : -a; 80 | } 81 | }; 82 | 83 | class Sign { 84 | public: 85 | __device__ inline float operator()(const float a) const { 86 | return (a > 0) - (a < 0); 87 | } 88 | }; 89 | 90 | class Identity { 91 | public: 92 | __device__ inline float operator()(const float a) const { 93 | return a; 94 | } 95 | }; 96 | 97 | class Zero { 98 | public: 99 | __device__ inline float operator()(const float a) const { 100 | return 0; 101 | } 102 | }; 103 | 104 | class One { 105 | public: 106 | __device__ inline float operator()(const float a) const { 107 | return 1; 108 | } 109 | }; 110 | 111 | class SmallerThanScalar { 112 | private: 113 | const float scalar; 114 | public: 115 | SmallerThanScalar(const float _scalar) : scalar(_scalar) { 116 | } 117 | __device__ inline float operator()(const float a) const { 118 | return a < scalar; 119 | } 120 | }; 121 | 122 | class BiggerThanScalar { 123 | private: 124 | const float scalar; 125 | public: 126 | BiggerThanScalar(const float _scalar) : scalar(_scalar) { 127 | } 128 | __device__ inline float operator()(const float a) const { 129 | return a > scalar; 130 | } 131 | }; 132 | 133 | class AddScalar { 134 | private: 135 | const float scalar; 136 | public: 137 | AddScalar(const float _scalar) : scalar(_scalar) { 138 | } 139 | __device__ inline float operator()(const float a) const { 140 | return a + scalar; 141 | } 142 | }; 143 | 144 | class WeightedAddScalar { 145 | private: 146 | const float weight, scalar; 147 | public: 148 | WeightedAddScalar(const float _weight, const float _scalar) : weight(_weight), scalar(_scalar) { 149 | } 150 | __device__ inline float operator()(const float a) const { 151 | return weight * a + scalar; 152 | } 153 | }; 154 | 155 | class MultByScalar { 156 | private: 157 | const float scalar; 158 | public: 159 | MultByScalar(const float _scalar) : scalar(_scalar) { 160 | } 161 | __device__ inline float operator()(const float a) const { 162 | return a * scalar; 163 | } 164 | }; 165 | 166 | class Pow { 167 | private: 168 | const float p; 169 | public: 170 | Pow(const float _p) : p(_p) { 171 | } 172 | __device__ inline float operator()(const float a) const { 173 | return __powf(a, p); 174 | } 175 | }; 176 | 177 | template 178 | class InRange { 179 | private: 180 | const float lower, upper; 181 | public: 182 | InRange(const float _lower, const float _upper) : lower(_lower), upper(_upper) { 183 | } 184 | __device__ inline float operator()(const float a) const { 185 | return exclusive ? a > lower && a < upper : a >= lower && a <= upper; 186 | } 187 | }; 188 | 189 | class MinWithScalar { 190 | private: 191 | const float scalar; 192 | public: 193 | MinWithScalar(const float _scalar) : scalar(_scalar) { 194 | } 195 | __device__ inline float operator()(const float a) const { 196 | return a > scalar ? scalar : a; 197 | } 198 | }; 199 | 200 | class MaxWithScalar { 201 | private: 202 | const float scalar; 203 | public: 204 | MaxWithScalar(const float _scalar) : scalar(_scalar) { 205 | } 206 | __device__ inline float operator()(const float a) const { 207 | return a > scalar ? a : scalar; 208 | } 209 | }; 210 | }; 211 | 212 | class NVMatrixBinaryOps { 213 | public: 214 | class Equals { 215 | public: 216 | __device__ inline float operator()(const float a, const float b) const { 217 | return a == b; 218 | } 219 | }; 220 | 221 | class BiggerThan { 222 | public: 223 | __device__ inline float operator()(const float a, const float b) const { 224 | return a > b; 225 | } 226 | }; 227 | 228 | class Divide { 229 | public: 230 | __device__ inline float operator()(const float a, const float b) const { 231 | return __fdividef(a, b); 232 | } 233 | }; 234 | 235 | class Multiply { 236 | public: 237 | __device__ inline float operator()(const float a, const float b) const { 238 | return a * b; 239 | } 240 | }; 241 | 242 | class SquaredDiff { 243 | public: 244 | __device__ inline float operator()(const float a, const float b) const { 245 | return (a - b) * (a - b); 246 | } 247 | }; 248 | 249 | class WeightedAdd { 250 | private: 251 | const float scaleA, scaleB; 252 | public: 253 | WeightedAdd(const float _scaleA, const float _scaleB) : scaleA(_scaleA), scaleB(_scaleB) { 254 | } 255 | __device__ inline float operator()(const float a, const float b) const { 256 | return a * scaleA + b * scaleB; 257 | } 258 | }; 259 | 260 | class Add { 261 | public: 262 | __device__ inline float operator()(const float a, const float b) const { 263 | return a + b; 264 | } 265 | }; 266 | 267 | class First { 268 | public: 269 | __device__ inline float operator()(const float a, const float b) const { 270 | return a; 271 | } 272 | }; 273 | 274 | class Second { 275 | public: 276 | __device__ inline float operator()(const float a, const float b) const { 277 | return b; 278 | } 279 | }; 280 | 281 | class SecondScaled { 282 | private: 283 | const float scale; 284 | public: 285 | SecondScaled(const float _scale) : scale(_scale) { 286 | } 287 | __device__ inline float operator()(const float a, const float b) const { 288 | return scale * b; 289 | } 290 | }; 291 | }; 292 | 293 | class NVMatrixAggs { 294 | public: 295 | class Sum { 296 | public: 297 | __device__ inline float operator()(const float a, const float b) const { 298 | return a + b; 299 | } 300 | __device__ inline float getBaseValue() { 301 | return 0; 302 | } 303 | }; 304 | 305 | class Max { 306 | public: 307 | __device__ inline float operator()(const float a, const float b) const { 308 | return a > b ? a : b; 309 | } 310 | __device__ inline float getBaseValue() { 311 | return -2e38; 312 | } 313 | }; 314 | 315 | class Min { 316 | public: 317 | __device__ inline float operator()(const float a, const float b) const { 318 | return a > b ? b : a; 319 | } 320 | __device__ inline float getBaseValue() { 321 | return 2e38; 322 | } 323 | }; 324 | 325 | template 326 | class ArgMax { 327 | private: 328 | UnaryOperator u; 329 | public: 330 | ArgMax(UnaryOperator _u) : u(_u) { 331 | } 332 | __device__ inline float operator()(const float a, const float b) const { 333 | return u(a) > u(b) ? a : b; 334 | } 335 | __device__ inline float getBaseValue() { 336 | return u.getArgMin(); 337 | } 338 | }; 339 | }; 340 | 341 | class NVMatrixTernaryOps { 342 | public: 343 | class Add { 344 | public: 345 | __device__ inline float operator()(const float a, const float b, const float c) const { 346 | return a + b + c; 347 | } 348 | }; 349 | }; 350 | 351 | #endif /* NVMATRIX_OPERATORS_CUH */ 352 | 353 | -------------------------------------------------------------------------------- /include/pyconvnet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef PYCONVNET3_CUH 28 | #define PYCONVNET3_CUH 29 | 30 | #define _QUOTEME(x) #x 31 | #define QUOTEME(x) _QUOTEME(x) 32 | 33 | extern "C" void INITNAME(); 34 | 35 | PyObject* initModel(PyObject *self, PyObject *args); 36 | PyObject* startBatch(PyObject *self, PyObject *args); 37 | PyObject* finishBatch(PyObject *self, PyObject *args); 38 | PyObject* checkGradients(PyObject *self, PyObject *args); 39 | PyObject* syncWithHost(PyObject *self, PyObject *args); 40 | PyObject* startMultiviewTest(PyObject *self, PyObject *args); 41 | PyObject* startFeatureWriter(PyObject *self, PyObject *args); 42 | 43 | #endif /* PYCONVNET3_CUH */ 44 | 45 | -------------------------------------------------------------------------------- /include/util.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef UTIL_H 28 | #define UTIL_H 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | /* 40 | * The types of passes that the convnet supports. Used in the fprop and bprop functions in 41 | * ConvNet class. Most of the layers ignore the pass type, but some make use of it. 42 | */ 43 | enum PASS_TYPE {PASS_TRAIN, PASS_TEST, PASS_GC}; 44 | 45 | // For gradient checking 46 | #define GC_SUPPRESS_PASSES true 47 | #define GC_REL_ERR_THRESH 0.02 48 | 49 | /* 50 | * Store entire data matrix on GPU if its size does not exceed this many MB. 51 | * Otherwise store only one minibatch at a time. 52 | */ 53 | #define MAX_DATA_ON_GPU 200 54 | 55 | typedef std::vector MatrixV; 56 | typedef std::vector NVMatrixV; 57 | typedef std::map*> CostMap; 58 | typedef std::map CostCoeffMap; 59 | typedef std::vector doublev; 60 | typedef std::vector floatv; 61 | typedef std::vector intv; 62 | 63 | floatv* getFloatV(PyObject* pyList); 64 | intv* getIntV(PyObject* pyList); 65 | MatrixV* getMatrixV(PyObject* pyList); 66 | int* getIntA(PyObject* pyList); 67 | 68 | int pyDictGetInt(PyObject* dict, const char* key); 69 | intv* pyDictGetIntV(PyObject* dict, const char* key); 70 | std::string pyDictGetString(PyObject* dict, const char* key); 71 | float pyDictGetFloat(PyObject* dict, const char* key); 72 | floatv* pyDictGetFloatV(PyObject* dict, const char* key); 73 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key); 74 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key); 75 | int* pyDictGetIntA(PyObject* dict, const char* key); 76 | 77 | template 78 | std::string tostr(T n) { 79 | std::ostringstream result; 80 | result << n; 81 | return result.str(); 82 | } 83 | 84 | #endif /* UTIL_H */ 85 | 86 | -------------------------------------------------------------------------------- /include/weights.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef WEIGHTS_CUH 28 | #define WEIGHTS_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "util.cuh" 38 | 39 | using namespace std; 40 | 41 | class Weights { 42 | private: 43 | Matrix* _hWeights, *_hWeightsInc; 44 | NVMatrix* _weights, *_weightsInc, *_weightsGrad; 45 | 46 | float _epsW, _wc, _mom; 47 | bool _onGPU, _useGrad; 48 | int _numUpdates; 49 | static bool _autoCopyToGPU; 50 | 51 | // Non-NULL if these weights are really shared from some other layer 52 | Weights* _srcWeights; 53 | 54 | public: 55 | NVMatrix& operator*() { 56 | return getW(); 57 | } 58 | 59 | Weights(Weights& srcWeights, float epsW) : _srcWeights(&srcWeights), _epsW(epsW), _wc(0), _onGPU(false), _numUpdates(0), 60 | _weights(NULL), _weightsInc(NULL), _weightsGrad(NULL){ 61 | _hWeights = &srcWeights.getCPUW(); 62 | _hWeightsInc = &srcWeights.getCPUWInc(); 63 | _mom = srcWeights.getMom(); 64 | _useGrad = srcWeights.isUseGrad(); 65 | if (_autoCopyToGPU) { 66 | copyToGPU(); 67 | } 68 | } 69 | 70 | Weights(Matrix& hWeights, Matrix& hWeightsInc, float epsW, float wc, float mom, bool useGrad) 71 | : _srcWeights(NULL), _hWeights(&hWeights), _hWeightsInc(&hWeightsInc), _numUpdates(0), 72 | _epsW(epsW), _wc(wc), _mom(mom), _useGrad(useGrad), _onGPU(false), _weights(NULL), 73 | _weightsInc(NULL), _weightsGrad(NULL) { 74 | if (_autoCopyToGPU) { 75 | copyToGPU(); 76 | } 77 | } 78 | 79 | ~Weights() { 80 | delete _hWeights; 81 | delete _hWeightsInc; 82 | if (_srcWeights == NULL) { 83 | delete _weights; 84 | delete _weightsInc; 85 | delete _weightsGrad; 86 | } 87 | } 88 | 89 | static void setAutoCopyToGPU(bool autoCopyToGPU) { 90 | _autoCopyToGPU = autoCopyToGPU; 91 | } 92 | 93 | NVMatrix& getW() { 94 | assert(_onGPU); 95 | return *_weights; 96 | } 97 | 98 | NVMatrix& getInc() { 99 | assert(_onGPU); 100 | return *_weightsInc; 101 | } 102 | 103 | NVMatrix& getGrad() { 104 | assert(_onGPU); 105 | return _useGrad ? *_weightsGrad : *_weightsInc; 106 | } 107 | 108 | Matrix& getCPUW() { 109 | return *_hWeights; 110 | } 111 | 112 | Matrix& getCPUWInc() { 113 | return *_hWeightsInc; 114 | } 115 | 116 | int getNumRows() const { 117 | return _hWeights->getNumRows(); 118 | } 119 | 120 | int getNumCols() const { 121 | return _hWeights->getNumCols(); 122 | } 123 | 124 | void copyToCPU() { 125 | if (_srcWeights == NULL) { 126 | assert(_onGPU); 127 | _weights->copyToHost(*_hWeights); 128 | _weightsInc->copyToHost(*_hWeightsInc); 129 | } 130 | } 131 | 132 | // This function is assumed to be called in the order in which the layers 133 | // were defined 134 | void copyToGPU() { 135 | if (_srcWeights == NULL) { 136 | _weights = new NVMatrix(); 137 | _weightsInc = new NVMatrix(); 138 | _weights->copyFromHost(*_hWeights, true); 139 | _weightsInc->copyFromHost(*_hWeightsInc, true); 140 | _weightsGrad = _useGrad ? new NVMatrix() : NULL; 141 | } else { 142 | _weights = _srcWeights->_weights; 143 | _weightsInc = _srcWeights->_weightsInc; 144 | _weightsGrad = _srcWeights->_weightsGrad; 145 | } 146 | _onGPU = true; 147 | } 148 | 149 | // Scale your gradient by epsW / numCases! 150 | void update() { 151 | // Only true owner of weights updates 152 | if (_srcWeights == NULL && _epsW > 0) { 153 | assert(_onGPU); 154 | if (_useGrad) { 155 | _weightsInc->add(*_weightsGrad, _mom, 1); 156 | } 157 | if (_wc > 0) { 158 | _weightsInc->add(*_weights, -_wc * _epsW); 159 | } 160 | _weights->add(*_weightsInc); 161 | _numUpdates = 0; 162 | } 163 | } 164 | 165 | int incNumUpdates() { 166 | if (_srcWeights != NULL) { 167 | return _srcWeights->incNumUpdates(); 168 | } 169 | return _numUpdates++; 170 | } 171 | 172 | // Returns the number of times a gradient has been computed for this 173 | // weight matrix during the current pass (interval between two calls of update()) 174 | // through the net. This number will only be greater than 1 if this weight matrix 175 | // is *shared* by multiple layers in the net. 176 | int getNumUpdates() const { 177 | if (_srcWeights != NULL) { 178 | return _srcWeights->getNumUpdates(); 179 | } 180 | return _numUpdates; 181 | } 182 | 183 | float getEps() const { 184 | return _epsW; 185 | } 186 | 187 | float getMom() const { 188 | return _mom; 189 | } 190 | 191 | float getWC() const { 192 | return _wc; 193 | } 194 | 195 | bool isUseGrad() const { // is good grammar 196 | return _useGrad; 197 | } 198 | }; 199 | 200 | class WeightList { 201 | private: 202 | std::vector _weightList; 203 | 204 | public: 205 | Weights& operator[](const int idx) const { 206 | return *_weightList[idx]; 207 | } 208 | 209 | ~WeightList() { 210 | for (int i = 0; i < _weightList.size(); i++) { 211 | delete _weightList[i]; 212 | } 213 | } 214 | 215 | // WeightList(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) : _initialized(false) { 216 | // initialize(hWeights, hWeightsInc, epsW, wc, mom, useGrads); 217 | // } 218 | 219 | WeightList() { 220 | } 221 | 222 | // void initialize(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) { 223 | // for (int i = 0; i < hWeights.size(); i++) { 224 | // _weightList.push_back(new Weights(*hWeights[i], *hWeightsInc[i], epsW[i], wc[i], mom[i], useGrads)); 225 | // } 226 | // _initialized = true; 227 | // delete &hWeights; 228 | // delete &hWeightsInc; 229 | // delete &epsW; 230 | // delete &wc; 231 | // delete &mom; 232 | // } 233 | 234 | void addWeights(Weights& w) { 235 | _weightList.push_back(&w); 236 | } 237 | 238 | // void addWeights(WeightList& wl) { 239 | // for (int i = 0; i < wl.getSize(); i++) { 240 | // addWeights(wl[i]); 241 | // } 242 | // } 243 | 244 | void update() { 245 | for (int i = 0; i < getSize(); i++) { 246 | _weightList[i]->update(); 247 | } 248 | } 249 | 250 | void copyToCPU() { 251 | for (int i = 0; i < getSize(); i++) { 252 | _weightList[i]->copyToCPU(); 253 | } 254 | } 255 | 256 | void copyToGPU() { 257 | for (int i = 0; i < getSize(); i++) { 258 | _weightList[i]->copyToGPU(); 259 | } 260 | } 261 | 262 | int getSize() { 263 | return _weightList.size(); 264 | } 265 | }; 266 | 267 | #endif /* WEIGHTS_CUH */ -------------------------------------------------------------------------------- /include/worker.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef WORKER_CUH 28 | #define WORKER_CUH 29 | 30 | #include "convnet.cuh" 31 | #include "cost.cuh" 32 | #include "data.cuh" 33 | 34 | class ConvNet; 35 | class Cost; 36 | 37 | class WorkResult { 38 | public: 39 | enum RESULTS {BATCH_DONE, SYNC_DONE}; 40 | protected: 41 | WorkResult::RESULTS _resultType; 42 | Cost* _results; 43 | public: 44 | WorkResult(WorkResult::RESULTS resultType, Cost& results); 45 | WorkResult(WorkResult::RESULTS resultType); 46 | virtual ~WorkResult(); 47 | Cost& getResults() const; 48 | WorkResult::RESULTS getResultType() const; 49 | }; 50 | 51 | class Worker { 52 | protected: 53 | ConvNet* _convNet; 54 | public: 55 | Worker(ConvNet& convNet); 56 | virtual void run() = 0; 57 | }; 58 | 59 | class DataWorker : public Worker { 60 | protected: 61 | CPUData* _data; 62 | DataProvider* _dp; 63 | public: 64 | DataWorker(ConvNet& convNet, CPUData& data); 65 | virtual ~DataWorker(); 66 | }; 67 | 68 | class TrainingWorker : public DataWorker { 69 | protected: 70 | bool _test; 71 | public: 72 | TrainingWorker(ConvNet& convNet, CPUData& data, bool test); 73 | void run(); 74 | }; 75 | 76 | class SyncWorker : public Worker { 77 | public: 78 | SyncWorker(ConvNet& convNet); 79 | void run(); 80 | }; 81 | 82 | class GradCheckWorker : public DataWorker { 83 | public: 84 | GradCheckWorker(ConvNet& convNet, CPUData& data); 85 | void run(); 86 | }; 87 | 88 | class MultiviewTestWorker : public DataWorker { 89 | protected: 90 | int _numViews, _logregIdx; 91 | public: 92 | MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx); 93 | void run(); 94 | }; 95 | 96 | class FeatureWorker : public DataWorker { 97 | protected: 98 | Matrix* _ftrs; 99 | int _layerIdx; 100 | public: 101 | FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx); 102 | ~FeatureWorker(); 103 | void run(); 104 | }; 105 | 106 | #endif /* WORKER_CUH */ 107 | 108 | -------------------------------------------------------------------------------- /ordereddict.py: -------------------------------------------------------------------------------- 1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 2 | # Passes Python2.7's test suite and incorporates all the latest updates. 3 | 4 | try: 5 | from thread import get_ident as _get_ident 6 | except ImportError: 7 | from dummy_thread import get_ident as _get_ident 8 | 9 | try: 10 | from _abcoll import KeysView, ValuesView, ItemsView 11 | except ImportError: 12 | pass 13 | 14 | 15 | class OrderedDict(dict): 16 | 'Dictionary that remembers insertion order' 17 | # An inherited dict maps keys to values. 18 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 19 | # The remaining methods are order-aware. 20 | # Big-O running times for all methods are the same as for regular dictionaries. 21 | 22 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 23 | # The circular doubly linked list starts and ends with a sentinel element. 24 | # The sentinel element never gets deleted (this simplifies the algorithm). 25 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 26 | 27 | def __init__(self, *args, **kwds): 28 | '''Initialize an ordered dictionary. Signature is the same as for 29 | regular dictionaries, but keyword arguments are not recommended 30 | because their insertion order is arbitrary. 31 | 32 | ''' 33 | if len(args) > 1: 34 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 35 | try: 36 | self.__root 37 | except AttributeError: 38 | self.__root = root = [] # sentinel node 39 | root[:] = [root, root, None] 40 | self.__map = {} 41 | self.__update(*args, **kwds) 42 | 43 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 44 | 'od.__setitem__(i, y) <==> od[i]=y' 45 | # Setting a new item creates a new link which goes at the end of the linked 46 | # list, and the inherited dictionary is updated with the new key/value pair. 47 | if key not in self: 48 | root = self.__root 49 | last = root[0] 50 | last[1] = root[0] = self.__map[key] = [last, root, key] 51 | dict_setitem(self, key, value) 52 | 53 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 54 | 'od.__delitem__(y) <==> del od[y]' 55 | # Deleting an existing item uses self.__map to find the link which is 56 | # then removed by updating the links in the predecessor and successor nodes. 57 | dict_delitem(self, key) 58 | link_prev, link_next, key = self.__map.pop(key) 59 | link_prev[1] = link_next 60 | link_next[0] = link_prev 61 | 62 | def __iter__(self): 63 | 'od.__iter__() <==> iter(od)' 64 | root = self.__root 65 | curr = root[1] 66 | while curr is not root: 67 | yield curr[2] 68 | curr = curr[1] 69 | 70 | def __reversed__(self): 71 | 'od.__reversed__() <==> reversed(od)' 72 | root = self.__root 73 | curr = root[0] 74 | while curr is not root: 75 | yield curr[2] 76 | curr = curr[0] 77 | 78 | def clear(self): 79 | 'od.clear() -> None. Remove all items from od.' 80 | try: 81 | for node in self.__map.itervalues(): 82 | del node[:] 83 | root = self.__root 84 | root[:] = [root, root, None] 85 | self.__map.clear() 86 | except AttributeError: 87 | pass 88 | dict.clear(self) 89 | 90 | def popitem(self, last=True): 91 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 92 | Pairs are returned in LIFO order if last is true or FIFO order if false. 93 | 94 | ''' 95 | if not self: 96 | raise KeyError('dictionary is empty') 97 | root = self.__root 98 | if last: 99 | link = root[0] 100 | link_prev = link[0] 101 | link_prev[1] = root 102 | root[0] = link_prev 103 | else: 104 | link = root[1] 105 | link_next = link[1] 106 | root[1] = link_next 107 | link_next[0] = root 108 | key = link[2] 109 | del self.__map[key] 110 | value = dict.pop(self, key) 111 | return key, value 112 | 113 | # -- the following methods do not depend on the internal structure -- 114 | 115 | def keys(self): 116 | 'od.keys() -> list of keys in od' 117 | return list(self) 118 | 119 | def values(self): 120 | 'od.values() -> list of values in od' 121 | return [self[key] for key in self] 122 | 123 | def items(self): 124 | 'od.items() -> list of (key, value) pairs in od' 125 | return [(key, self[key]) for key in self] 126 | 127 | def iterkeys(self): 128 | 'od.iterkeys() -> an iterator over the keys in od' 129 | return iter(self) 130 | 131 | def itervalues(self): 132 | 'od.itervalues -> an iterator over the values in od' 133 | for k in self: 134 | yield self[k] 135 | 136 | def iteritems(self): 137 | 'od.iteritems -> an iterator over the (key, value) items in od' 138 | for k in self: 139 | yield (k, self[k]) 140 | 141 | def update(*args, **kwds): 142 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 143 | 144 | If E is a dict instance, does: for k in E: od[k] = E[k] 145 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 146 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 147 | In either case, this is followed by: for k, v in F.items(): od[k] = v 148 | 149 | ''' 150 | if len(args) > 2: 151 | raise TypeError('update() takes at most 2 positional ' 152 | 'arguments (%d given)' % (len(args),)) 153 | elif not args: 154 | raise TypeError('update() takes at least 1 argument (0 given)') 155 | self = args[0] 156 | # Make progressively weaker assumptions about "other" 157 | other = () 158 | if len(args) == 2: 159 | other = args[1] 160 | if isinstance(other, dict): 161 | for key in other: 162 | self[key] = other[key] 163 | elif hasattr(other, 'keys'): 164 | for key in other.keys(): 165 | self[key] = other[key] 166 | else: 167 | for key, value in other: 168 | self[key] = value 169 | for key, value in kwds.items(): 170 | self[key] = value 171 | 172 | __update = update # let subclasses override update without breaking __init__ 173 | 174 | __marker = object() 175 | 176 | def pop(self, key, default=__marker): 177 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 178 | If key is not found, d is returned if given, otherwise KeyError is raised. 179 | 180 | ''' 181 | if key in self: 182 | result = self[key] 183 | del self[key] 184 | return result 185 | if default is self.__marker: 186 | raise KeyError(key) 187 | return default 188 | 189 | def setdefault(self, key, default=None): 190 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 191 | if key in self: 192 | return self[key] 193 | self[key] = default 194 | return default 195 | 196 | def __repr__(self, _repr_running={}): 197 | 'od.__repr__() <==> repr(od)' 198 | call_key = id(self), _get_ident() 199 | if call_key in _repr_running: 200 | return '...' 201 | _repr_running[call_key] = 1 202 | try: 203 | if not self: 204 | return '%s()' % (self.__class__.__name__,) 205 | return '%s(%r)' % (self.__class__.__name__, self.items()) 206 | finally: 207 | del _repr_running[call_key] 208 | 209 | def __reduce__(self): 210 | 'Return state information for pickling' 211 | items = [[k, self[k]] for k in self] 212 | inst_dict = vars(self).copy() 213 | for k in vars(OrderedDict()): 214 | inst_dict.pop(k, None) 215 | if inst_dict: 216 | return (self.__class__, (items,), inst_dict) 217 | return self.__class__, (items,) 218 | 219 | def copy(self): 220 | 'od.copy() -> a shallow copy of od' 221 | return self.__class__(self) 222 | 223 | @classmethod 224 | def fromkeys(cls, iterable, value=None): 225 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 226 | and values equal to v (which defaults to None). 227 | 228 | ''' 229 | d = cls() 230 | for key in iterable: 231 | d[key] = value 232 | return d 233 | 234 | def __eq__(self, other): 235 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 236 | while comparison to a regular mapping is order-insensitive. 237 | 238 | ''' 239 | if isinstance(other, OrderedDict): 240 | return len(self)==len(other) and self.items() == other.items() 241 | return dict.__eq__(self, other) 242 | 243 | def __ne__(self, other): 244 | return not self == other 245 | 246 | # -- the following methods are only used in Python 2.7 -- 247 | 248 | def viewkeys(self): 249 | "od.viewkeys() -> a set-like object providing a view on od's keys" 250 | return KeysView(self) 251 | 252 | def viewvalues(self): 253 | "od.viewvalues() -> an object providing a view on od's values" 254 | return ValuesView(self) 255 | 256 | def viewitems(self): 257 | "od.viewitems() -> a set-like object providing a view on od's items" 258 | return ItemsView(self) 259 | -------------------------------------------------------------------------------- /src/convnet.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | using namespace std; 38 | 39 | /* 40 | * ======================= 41 | * ConvNet 42 | * ======================= 43 | */ 44 | ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID) : Thread(false), _deviceID(deviceID), _data(NULL) { 45 | try { 46 | int numLayers = PyList_GET_SIZE(layerParams); 47 | 48 | for (int i = 0; i < numLayers; i++) { 49 | PyObject* paramsDict = PyList_GET_ITEM(layerParams, i); 50 | string layerType = pyDictGetString(paramsDict, "type"); 51 | 52 | Layer* l = initLayer(layerType, paramsDict); 53 | // Connect backward links in graph for this layer 54 | intv* inputLayers = pyDictGetIntV(paramsDict, "inputs"); 55 | if (inputLayers != NULL) { 56 | for (int i = 0; i < inputLayers->size(); i++) { 57 | l->addPrev(&getLayer(inputLayers->at(i))); 58 | } 59 | } 60 | delete inputLayers; 61 | } 62 | 63 | // Connect the forward links in the graph 64 | for (int i = 0; i < _layers.size(); i++) { 65 | vector& prev = _layers[i]->getPrev(); 66 | for (int j = 0; j < prev.size(); j++) { 67 | prev[j]->addNext(_layers[i]); 68 | } 69 | } 70 | 71 | // Execute post-initialization stuff 72 | for (int i = 0; i < _layers.size(); i++) { 73 | _layers[i]->postInit(); 74 | } 75 | 76 | _dp = new DataProvider(minibatchSize); 77 | } catch (string& s) { 78 | cout << "Error creating ConvNet: " << s << endl; 79 | exit(1); 80 | } 81 | } 82 | 83 | /* 84 | * Override this in derived classes 85 | */ 86 | Layer* ConvNet::initLayer(string& layerType, PyObject* paramsDict) { 87 | if (layerType == "fc") { 88 | _layers.push_back(new FCLayer(this, paramsDict)); 89 | } else if (layerType == "conv") { 90 | _layers.push_back(new ConvLayer(this, paramsDict)); 91 | } else if (layerType == "local") { 92 | _layers.push_back(new LocalUnsharedLayer(this, paramsDict)); 93 | } else if (layerType == "pool") { 94 | _layers.push_back(&PoolLayer::makePoolLayer(this, paramsDict)); 95 | } else if (layerType == "rnorm") { 96 | _layers.push_back(new ResponseNormLayer(this, paramsDict)); 97 | } else if (layerType == "cmrnorm") { 98 | _layers.push_back(new CrossMapResponseNormLayer(this, paramsDict)); 99 | } else if (layerType == "cnorm") { 100 | _layers.push_back(new ContrastNormLayer(this, paramsDict)); 101 | } else if (layerType == "softmax") { 102 | _layers.push_back(new SoftmaxLayer(this, paramsDict)); 103 | } else if (layerType == "eltsum") { 104 | _layers.push_back(new EltwiseSumLayer(this, paramsDict)); 105 | } else if (layerType == "eltmax") { 106 | _layers.push_back(new EltwiseMaxLayer(this, paramsDict)); 107 | } else if (layerType == "neuron") { 108 | _layers.push_back(new NeuronLayer(this, paramsDict)); 109 | } else if (layerType == "nailbed") { 110 | _layers.push_back(new NailbedLayer(this, paramsDict)); 111 | } else if (layerType == "blur") { 112 | _layers.push_back(new GaussianBlurLayer(this, paramsDict)); 113 | } else if (layerType == "resize") { 114 | _layers.push_back(new ResizeLayer(this, paramsDict)); 115 | } else if (layerType == "rgb2yuv") { 116 | _layers.push_back(new RGBToYUVLayer(this, paramsDict)); 117 | } else if (layerType == "rgb2lab") { 118 | _layers.push_back(new RGBToLABLayer(this, paramsDict)); 119 | } else if (layerType == "data") { 120 | DataLayer *d = new DataLayer(this, paramsDict); 121 | _layers.push_back(d); 122 | _dataLayers.push_back(d); 123 | } else if (strncmp(layerType.c_str(), "cost.", 5) == 0) { 124 | CostLayer *c = &CostLayer::makeCostLayer(this, layerType, paramsDict); 125 | _layers.push_back(c); 126 | _costs.push_back(c); 127 | } else { 128 | throw string("Unknown layer type ") + layerType; 129 | } 130 | 131 | return _layers.back(); 132 | } 133 | 134 | /* 135 | * This executes in a new CPU thread so it's OK to initialize CUDA stuff here. 136 | */ 137 | void ConvNet::initCuda() { 138 | cudaSetDevice(_deviceID < 0 ? cutGetMaxGflopsDeviceId() : _deviceID); 139 | cudaDeviceSetCacheConfig(cudaFuncCachePreferShared); 140 | cublasInit(); 141 | NVMatrix::initRandom(time(0)); 142 | copyToGPU(); 143 | } 144 | 145 | void* ConvNet::run() { 146 | initCuda(); 147 | 148 | while (true) { 149 | Worker* worker = _workerQueue.dequeue(); 150 | worker->run(); 151 | delete worker; 152 | } 153 | return NULL; 154 | } 155 | 156 | Queue& ConvNet::getWorkerQueue() { 157 | return _workerQueue; 158 | } 159 | 160 | Queue& ConvNet::getResultQueue() { 161 | return _resultQueue; 162 | } 163 | 164 | DataProvider& ConvNet::getDataProvider() { 165 | return *_dp; 166 | } 167 | 168 | Layer& ConvNet::operator[](int idx) { 169 | return *_layers[idx]; 170 | } 171 | 172 | Layer& ConvNet::getLayer(int idx) { 173 | return *_layers[idx]; 174 | } 175 | 176 | void ConvNet::copyToCPU() { 177 | for (int i = 0; i < _layers.size(); i++) { 178 | _layers[i]->copyToCPU(); 179 | } 180 | } 181 | 182 | void ConvNet::copyToGPU() { 183 | for (int i = 0; i < _layers.size(); i++) { 184 | _layers[i]->copyToGPU(); 185 | } 186 | } 187 | 188 | void ConvNet::updateWeights() { 189 | for (int i = 0; i < _layers.size(); i++) { 190 | _layers[i]->updateWeights(); 191 | } 192 | } 193 | 194 | void ConvNet::reset() { 195 | for (int i = 0; i < _layers.size(); i++) { 196 | _layers[i]->reset(); 197 | } 198 | } 199 | 200 | int ConvNet::getNumLayers() { 201 | return _layers.size(); 202 | } 203 | 204 | void ConvNet::bprop(PASS_TYPE passType) { 205 | for (int i = 0; i < _costs.size(); i++) { 206 | _costs[i]->bprop(passType); 207 | } 208 | reset(); 209 | } 210 | 211 | void ConvNet::fprop(PASS_TYPE passType) { 212 | assert(_data != NULL); 213 | reset(); 214 | for (int i = 0; i < _dataLayers.size(); i++) { 215 | _dataLayers[i]->fprop(_data->getData(), passType); 216 | } 217 | } 218 | 219 | void ConvNet::fprop(GPUData& data, PASS_TYPE passType) { 220 | if (&data != _data) { 221 | delete _data; 222 | } 223 | _data = &data; 224 | fprop(passType); 225 | } 226 | 227 | void ConvNet::fprop(int miniIdx, PASS_TYPE passType) { 228 | delete _data; 229 | _data = &_dp->getMinibatch(miniIdx); 230 | fprop(passType); 231 | } 232 | 233 | Cost& ConvNet::getCost() { 234 | return *new Cost(_data->getNumCases(), _costs); 235 | } 236 | 237 | // Same as getCost() but adds results to given cost and returns it 238 | Cost& ConvNet::getCost(Cost& cost) { 239 | Cost& newCost = getCost(); 240 | cost += newCost; 241 | delete &newCost; 242 | return cost; 243 | } 244 | 245 | double ConvNet::getCostValue() { 246 | Cost& cost = getCost(); 247 | double val = cost.getValue(); 248 | delete &cost; 249 | return val; 250 | } 251 | 252 | /* 253 | * Gradient checking stuff 254 | */ 255 | void ConvNet::checkGradients() { 256 | _numFailures = 0; 257 | _numTests = 0; 258 | fprop(0, PASS_GC); 259 | _baseErr = getCostValue(); 260 | bprop(PASS_GC); 261 | 262 | for (vector::iterator it = _layers.begin(); it != _layers.end(); ++it) { 263 | (*it)->checkGradients(); 264 | } 265 | 266 | cout << "------------------------" << endl; 267 | if (_numFailures > 0) { 268 | cout << _numFailures << "/" << _numTests << " TESTS FAILED" << endl; 269 | } else { 270 | cout << "ALL " << _numTests << " TESTS PASSED" << endl; 271 | } 272 | } 273 | 274 | /* 275 | * name: weight matrix name 276 | * eps: finite difference step 277 | */ 278 | bool ConvNet::checkGradient(const string& name, float eps, Weights& weights) { 279 | Matrix numGrad(weights.getNumRows(), weights.getNumCols()); 280 | Matrix diff(numGrad); 281 | numGrad.apply(Matrix::ZERO); 282 | Matrix weightsCPU; 283 | 284 | weights.getW().copyToHost(weightsCPU, true); 285 | 286 | for(int i = 0; i < weights.getNumRows(); i++) { 287 | for (int j = 0; j < weights.getNumCols(); j++) { 288 | float v = weightsCPU(i,j); 289 | weightsCPU(i,j) += eps; 290 | weights.getW().copyFromHost(weightsCPU); 291 | weightsCPU(i,j) = v; 292 | fprop(PASS_GC); 293 | double err = getCostValue(); 294 | numGrad(i,j) = (err - _baseErr) / (_data->getNumCases() * eps); 295 | if (isnan(numGrad(i,j)) || isinf(numGrad(i,j))) { 296 | cout << "Numerical computation produced nan or inf when checking '" << name << "': " << numGrad(i,j) << endl; 297 | cout << "Consider reducing the sizes of the weights or finite difference steps." << endl; 298 | cout << "Exiting." << endl; 299 | exit(1); 300 | } 301 | weights.getW().copyFromHost(weightsCPU); 302 | } 303 | } 304 | 305 | Matrix gradCPU; 306 | weights.getGrad().copyToHost(gradCPU, true); 307 | gradCPU.scale(-1.0 / _data->getNumCases()); 308 | float analNorm = gradCPU.norm(); 309 | float numNorm = numGrad.norm(); 310 | numGrad.subtract(gradCPU, diff); 311 | float relErr = diff.norm() / analNorm; 312 | bool fail = relErr >= GC_REL_ERR_THRESH; 313 | if (fail || !GC_SUPPRESS_PASSES) { 314 | cout << "========================" << endl; 315 | printf("(%s) %s GRADIENT CHECK\n", fail ? "****FAIL****" : "PASS", name.c_str()); 316 | cout << "========================" << endl; 317 | cout << "Analytic:" << endl; 318 | gradCPU.print(6,4); 319 | cout << "Numeric:" << endl; 320 | numGrad.print(6,4); 321 | printf("Analytic norm: %e\n", analNorm); 322 | printf("Numeric norm: %e\n", numNorm); 323 | printf("Relative error: %e\n", relErr); 324 | } 325 | _numTests++; 326 | _numFailures += fail; 327 | return fail; 328 | } 329 | -------------------------------------------------------------------------------- /src/cost.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | /* 33 | * ===================== 34 | * Cost 35 | * ===================== 36 | */ 37 | 38 | Cost::Cost(int numCases) : _numCases(numCases) { 39 | } 40 | 41 | Cost::Cost(int numCases, vector& costs) : _numCases(numCases) { 42 | for (vector::iterator it = costs.begin(); it != costs.end(); ++it) { 43 | _costMap[(*it)->getName()] = &(*it)->getCost(); 44 | _costCoeffMap[(*it)->getName()] = (*it)->getCoeff(); 45 | } 46 | } 47 | 48 | int Cost::getNumCases() { 49 | return _numCases; 50 | } 51 | 52 | doublev& Cost::operator [](const string s) { 53 | return *_costMap[s]; 54 | } 55 | 56 | CostMap& Cost::getCostMap() { 57 | return _costMap; 58 | } 59 | 60 | CostCoeffMap& Cost::getCostCoeffMap() { 61 | return _costCoeffMap; 62 | } 63 | 64 | double Cost::getValue() { 65 | double val = 0; 66 | for (CostMap::iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 67 | val += _costCoeffMap[it->first] * it->second->at(0); 68 | } 69 | return val; 70 | } 71 | 72 | Cost& Cost::operator += (Cost& er) { 73 | CostMap& otherMap = er.getCostMap(); 74 | CostCoeffMap& otherCoeffMap = er.getCostCoeffMap(); 75 | for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) { 76 | if (_costMap.count(it->first) == 0) { 77 | _costMap[it->first] = new doublev(); 78 | _costCoeffMap[it->first] = otherCoeffMap[it->first]; 79 | } 80 | 81 | vector& myVec = *_costMap[it->first]; 82 | vector& otherVec = *otherMap[it->first]; 83 | for (int i = 0; i < otherVec.size(); i++) { 84 | if (myVec.size() <= i) { 85 | myVec.push_back(0); 86 | } 87 | myVec[i] += otherVec[i]; 88 | } 89 | } 90 | _numCases += er.getNumCases(); 91 | return *this; 92 | } 93 | 94 | Cost& Cost::operator /= (const double v) { 95 | for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 96 | for (doublev::iterator it2 = it->second->begin(); it2 != it->second->end(); ++it2) { 97 | *it2 /= v; 98 | } 99 | } 100 | return *this; 101 | } 102 | 103 | Cost::~Cost() { 104 | for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) { 105 | delete it->second; 106 | } 107 | } -------------------------------------------------------------------------------- /src/data.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | DataProvider::DataProvider(int minibatchSize) : 33 | _minibatchSize(minibatchSize), _hData(NULL) { 34 | 35 | } 36 | 37 | GPUData& DataProvider::operator[](int idx) { 38 | return getMinibatch(idx); 39 | } 40 | 41 | void DataProvider::clearData() { 42 | delete _hData; 43 | _hData = NULL; 44 | _dataSize = 0; 45 | } 46 | 47 | void DataProvider::setData(CPUData& hData) { 48 | // This is now deleted by the DataWorker's destructor 49 | // delete _hData; // Delete old CPU matrices 50 | 51 | _hData = &hData; 52 | _dataSize = 0; 53 | for (int i = 0; i < hData.getSize(); i++) { 54 | _dataSize += hData[i].getNumDataBytes(); 55 | } 56 | _dataSize /= 1024 * 1024; 57 | if (_dataSize < MAX_DATA_ON_GPU) { 58 | for (int i = 0; i < hData.getSize(); i++) { 59 | if (i >= _data.size()) { 60 | _data.push_back(new NVMatrix()); 61 | } 62 | _data[i]->copyFromHost(hData[i], true); 63 | } 64 | } 65 | } 66 | 67 | GPUData& DataProvider::getMinibatch(int idx) { 68 | assert(idx >= 0 && idx < getNumMinibatches()); 69 | return getDataSlice(idx * _minibatchSize, (idx + 1) * _minibatchSize); 70 | } 71 | 72 | GPUData& DataProvider::getDataSlice(int startCase, int endCase) { 73 | assert(_hData != NULL); 74 | assert(_hData->getNumCases() > 0); 75 | 76 | NVMatrixV& miniData = *new NVMatrixV(); 77 | 78 | for (int i = 0; i < _hData->getData().size(); i++) { 79 | miniData.push_back(new NVMatrix()); 80 | if (_dataSize < MAX_DATA_ON_GPU) { 81 | if (_data[i]->isTrans()) { 82 | _data[i]->sliceRows(startCase, min(_hData->getNumCases(), endCase), *miniData[i]); 83 | } else { 84 | _data[i]->sliceCols(startCase, min(_hData->getNumCases(), endCase), *miniData[i]); 85 | } 86 | } else { 87 | Matrix tmp; 88 | if ((*_hData)[i].isTrans()) { 89 | (*_hData)[i].sliceRows(startCase, min(_hData->getNumCases(), endCase), tmp); 90 | } else { 91 | (*_hData)[i].sliceCols(startCase, min(_hData->getNumCases(), endCase), tmp); 92 | } 93 | miniData.back()->copyFromHost(tmp, true); 94 | } 95 | } 96 | 97 | return *new GPUData(miniData); 98 | } 99 | 100 | int DataProvider::getNumMinibatches() { 101 | assert(_hData != NULL); 102 | assert(_hData->getNumCases() > 0); 103 | return DIVUP(_hData->getNumCases(), _minibatchSize); 104 | } 105 | 106 | int DataProvider::getMinibatchSize() { 107 | return _minibatchSize; 108 | } 109 | 110 | int DataProvider::getNumCases() { 111 | assert(_hData != NULL); 112 | assert(_hData->getNumCases() > 0); 113 | return _hData->getNumCases(); 114 | } 115 | 116 | int DataProvider::getNumCasesInMinibatch(int idx) { 117 | assert(_hData != NULL); 118 | assert(_hData->getNumCases() > 0); 119 | assert(idx >= 0 && idx < getNumMinibatches()); 120 | return min(_minibatchSize, max(0, _hData->getNumCases() - idx * _minibatchSize)); 121 | } -------------------------------------------------------------------------------- /src/layer_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | #include 30 | 31 | /* 32 | * E = -log(y_t) 33 | * probs: (numOut, numCases) 34 | * labels: (1, numCases) 35 | * maxProbs: (1, numCases) 36 | * labelLogProbs: (1, numCases) (*out) 37 | * correctProbs: (1, numCases) (*out) 38 | * 39 | * target: (1, numCases) 40 | */ 41 | __global__ void kLogregCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs, 42 | const int numCases, const int numOut) { 43 | const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x; 44 | 45 | if (tx < numCases) { 46 | const int label = int(labels[tx]); 47 | const float maxp = maxProbs[tx]; 48 | const float labelp = probs[label * numCases + tx]; 49 | 50 | labelLogProbs[tx] = __logf(labelp); 51 | 52 | /* 53 | * Compute the probability of guessing the correct case if you take the most-probable label. 54 | * 55 | * This is done like this: 56 | * 57 | * - If the most probable label is not equal to the true label, then the probability is zero. 58 | * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum). 59 | * 60 | * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned 61 | * maximum probability. But it's a safety measure to prevent over-estimating your accuracy. 62 | * Though it could never happen in reality. Well it could. But it wouldn't. Cool? 63 | */ 64 | if (labelp != maxp) { 65 | correctProbs[tx] = 0; 66 | } else { 67 | int numMax = 0; 68 | for (int i = 0; i < numOut; i++) { 69 | numMax += probs[i * numCases + tx] == maxp; 70 | } 71 | correctProbs[tx] = 1.0f / float(numMax); 72 | } 73 | } 74 | } 75 | 76 | /* 77 | * E = -log(y_t) 78 | * y_l: (numOut, numCases) 79 | * labels: (1, numCases) 80 | * 81 | * dE_dy_l: (numOut, numCases) 82 | */ 83 | template 84 | __global__ void kLogregCostGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases, 85 | const int numOut, const float gradCoeff) { 86 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 87 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 88 | const int tidx = ty * numCases + tx; 89 | 90 | if (ty < numOut && tx < numCases) { 91 | const int label = int(labels[tx]); 92 | float v = gradCoeff * (label == ty); 93 | v = __fdividef(v, y_l[tidx]); 94 | if (add) { 95 | dE_dy_l[tidx] += v; 96 | } else { 97 | dE_dy_l[tidx] = v; 98 | } 99 | } 100 | } 101 | 102 | /* 103 | * dE_dy_l: (numOut, numCases) 104 | * y_l: (numOut, numCases) 105 | * 106 | * dE_dx_l: (numOut, numCases) 107 | */ 108 | template 109 | __global__ void kSoftmaxGrad(float* dE_dy_l, float* y_l, float* dE_dx_l, const int numCases, const int numOut) { 110 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 111 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 112 | const int tidx = ty * numCases + tx; 113 | 114 | if (ty < numOut && tx < numCases) { 115 | float v = 0; 116 | for (int j = 0; j < numOut; j++) { 117 | v += dE_dy_l[j * numCases + tx] * ((j == ty) - y_l[j * numCases + tx]); 118 | } 119 | v *= y_l[tidx]; 120 | 121 | if (add) { 122 | dE_dx_l[tidx] += v; 123 | } else { 124 | dE_dx_l[tidx] = v; 125 | } 126 | } 127 | } 128 | 129 | /* 130 | * E = -log(y_t) 131 | * y_l: (numOut, numCases) 132 | * labels: (1, numCases) 133 | * 134 | * dE_dx_l: (numOut, numCases) 135 | */ 136 | template 137 | __global__ void kLogregSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases, 138 | const int numOut, const float gradCoeff) { 139 | const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x; 140 | const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y; 141 | const int tidx = ty * numCases + tx; 142 | 143 | if (ty < numOut && tx < numCases) { 144 | const int label = int(labels[tx]); 145 | float v = gradCoeff * ((label == ty) - y_l[tidx]); 146 | if (add) { 147 | dE_dx_l[tidx] += v; 148 | } else { 149 | dE_dx_l[tidx] = v; 150 | } 151 | } 152 | } 153 | 154 | template 155 | __global__ void kEltwiseMaxGrad(float* actGrad, float* input, float* output, float* target, 156 | const int numElements) { 157 | for (int i = B_X * blockIdx.x + threadIdx.x; i < numElements; i += B_X * gridDim.x) { 158 | if (add) { 159 | target[i] += actGrad[i] * (output[i] == input[i]); 160 | } else { 161 | target[i] = actGrad[i] * (output[i] == input[i]); 162 | } 163 | } 164 | } 165 | 166 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add) { 167 | assert(actGrad.isContiguous()); 168 | assert(output.isContiguous()); 169 | assert(input.isContiguous()); 170 | assert(actGrad.isSameDims(input)); 171 | assert(actGrad.isSameDims(output)); 172 | 173 | dim3 blocks(DIVUP(actGrad.getNumElements(), 128)); 174 | dim3 threads(128); 175 | if (add) { 176 | assert(actGrad.isSameDims(target)); 177 | cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, true>, cudaFuncCachePreferL1); 178 | kEltwiseMaxGrad<128, true><<>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements()); 179 | } else { 180 | target.resize(actGrad); 181 | cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, false>, cudaFuncCachePreferL1); 182 | kEltwiseMaxGrad<128, false><<>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements()); 183 | } 184 | 185 | cutilCheckMsg("computeEltwiseMaxGrad: Kernel execution failed"); 186 | } 187 | 188 | /* 189 | * E = -log(y_t) 190 | * probs: (numOut, numCases) 191 | * labels: (1, numCases) 192 | * maxProbs: (1, numCases) 193 | * labelLogProbs: (1, numCases) (*out) 194 | * correctProbs: (1, numCases) (*out) 195 | * 196 | * target: (1, numCases) 197 | */ 198 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) { 199 | int numCases = probs.getNumCols(); 200 | int numOut = probs.getNumRows(); 201 | 202 | assert(labels.getNumElements() == numCases); 203 | assert(!labels.isTrans()); 204 | assert(!probs.isTrans()); 205 | assert(labels.isContiguous()); 206 | assert(probs.isContiguous()); 207 | 208 | NVMatrix& maxProbs = probs.max(0); 209 | 210 | labelLogProbs_out.resize(1, numCases); 211 | correctProbs_out.resize(1, numCases); 212 | dim3 threads(LOGREG_ERR_THREADS_X, 1); 213 | dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1); 214 | cudaFuncSetCacheConfig(kLogregCost, cudaFuncCachePreferL1); 215 | kLogregCost<<>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(), 216 | labelLogProbs_out.getDevData(), correctProbs_out.getDevData(), 217 | numCases, numOut); 218 | cutilCheckMsg("computeLogregCost: Kernel execution failed"); 219 | // cudaThreadSynchronize(); 220 | delete &maxProbs; 221 | } 222 | 223 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) { 224 | int numCases = probs.getLeadingDim(); 225 | int numOut = probs.getFollowingDim(); 226 | assert(labels.getNumElements() == numCases); 227 | assert(probs.isContiguous()); 228 | assert(target.isContiguous()); 229 | assert(labels.isContiguous()); 230 | assert(!labels.isTrans()); 231 | assert(!probs.isTrans()); 232 | 233 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 234 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 235 | if (!add) { 236 | target.resize(probs); 237 | kLogregCostGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 238 | numCases, numOut, coeff); 239 | } else { 240 | kLogregCostGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 241 | numCases, numOut, coeff); 242 | } 243 | 244 | cutilCheckMsg("computeLogregGrad: Kernel execution failed"); 245 | } 246 | 247 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add) { 248 | int numCases = acts.getLeadingDim(); 249 | int numOut = acts.getFollowingDim(); 250 | 251 | assert(acts.isSameDims(actsGrad)); 252 | assert(acts.isContiguous()); 253 | assert(actsGrad.isContiguous()); 254 | assert(target.isContiguous()); 255 | assert(acts.isTrans()); 256 | assert(actsGrad.isTrans()); 257 | 258 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 259 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 260 | if (!add) { 261 | target.resize(acts); 262 | kSoftmaxGrad<<>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut); 263 | } else { 264 | kSoftmaxGrad<<>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut); 265 | } 266 | cutilCheckMsg("computeSoftmaxGrad: Kernel execution failed"); 267 | } 268 | 269 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) { 270 | int numCases = probs.getLeadingDim(); 271 | int numOut = probs.getFollowingDim(); 272 | assert(labels.getNumElements() == numCases); 273 | assert(probs.isContiguous()); 274 | assert(target.isContiguous()); 275 | assert(labels.isContiguous()); 276 | assert(probs.isTrans()); 277 | 278 | dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y); 279 | dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y)); 280 | if (!add) { 281 | target.resize(probs); 282 | kLogregSoftmaxGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 283 | numCases, numOut, coeff); 284 | } else { 285 | kLogregSoftmaxGrad<<>>(probs.getDevData(), labels.getDevData(), target.getDevData(), 286 | numCases, numOut, coeff); 287 | } 288 | 289 | cutilCheckMsg("computeLogregSoftmaxGrad: Kernel execution failed"); 290 | } -------------------------------------------------------------------------------- /src/neuron.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | Neuron& Neuron::makeNeuron(PyObject* neuronDict) { 33 | string type = pyDictGetString(neuronDict, "type"); 34 | PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params"); 35 | 36 | if (type == "relu") { 37 | return *new ReluNeuron(); 38 | } 39 | 40 | if (type == "softrelu") { 41 | return *new SoftReluNeuron(); 42 | } 43 | 44 | if (type == "brelu") { 45 | float a = pyDictGetFloat(neuronParamsDict, "a"); 46 | return *new BoundedReluNeuron(a); 47 | } 48 | 49 | if (type == "abs") { 50 | return *new AbsNeuron(); 51 | } 52 | 53 | if (type == "logistic") { 54 | return *new LogisticNeuron(); 55 | } 56 | 57 | if (type == "tanh") { 58 | float a = pyDictGetFloat(neuronParamsDict, "a"); 59 | float b = pyDictGetFloat(neuronParamsDict, "b"); 60 | 61 | return *new TanhNeuron(a, b); 62 | } 63 | 64 | if (type == "square") { 65 | return *new SquareNeuron(); 66 | } 67 | 68 | if (type == "sqrt") { 69 | return *new SqrtNeuron(); 70 | } 71 | 72 | if (type == "linear") { 73 | float a = pyDictGetFloat(neuronParamsDict, "a"); 74 | float b = pyDictGetFloat(neuronParamsDict, "b"); 75 | return *new LinearNeuron(a, b); 76 | } 77 | 78 | if (type == "ident") { 79 | return *new Neuron(); 80 | } 81 | 82 | throw string("Unknown neuron type: ") + type; 83 | } 84 | -------------------------------------------------------------------------------- /src/nvmatrix/nvmatrix_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | __global__ void kTile(const float* src, float* tgt, const uint srcWidth, const uint srcHeight, const uint tgtWidth, const uint tgtHeight) { 32 | const int idx = blockIdx.x * blockDim.x + threadIdx.x; 33 | const int numThreads = blockDim.x * gridDim.x; 34 | // const unsigned int numEls = tgtWidth * tgtHeight; 35 | for (uint i = idx; i < tgtWidth * tgtHeight; i += numThreads) { 36 | const uint y = i / tgtWidth; 37 | const uint x = i % tgtWidth; 38 | const uint srcY = y % srcHeight; 39 | const uint srcX = x % srcWidth; 40 | tgt[i] = src[srcY * srcWidth + srcX]; 41 | } 42 | } 43 | 44 | __global__ void kDotProduct_r(float* a, float* b, float* target, const uint numCols, const uint numElements) { 45 | __shared__ float shmem[DP_BLOCKSIZE]; 46 | 47 | uint eidx = DP_BLOCKSIZE * blockIdx.x + threadIdx.x; 48 | shmem[threadIdx.x] = 0; 49 | if (eidx < numCols) { 50 | for (; eidx < numElements; eidx += numCols) { 51 | shmem[threadIdx.x] += a[eidx] * b[eidx]; 52 | } 53 | } 54 | __syncthreads(); 55 | if (threadIdx.x < 256) { 56 | shmem[threadIdx.x] += shmem[threadIdx.x + 256]; 57 | } 58 | __syncthreads(); 59 | if (threadIdx.x < 128) { 60 | shmem[threadIdx.x] += shmem[threadIdx.x + 128]; 61 | } 62 | __syncthreads(); 63 | if (threadIdx.x < 64) { 64 | shmem[threadIdx.x] += shmem[threadIdx.x + 64]; 65 | } 66 | __syncthreads(); 67 | if (threadIdx.x < 32) { 68 | volatile float* mysh = &shmem[threadIdx.x]; 69 | *mysh += mysh[32]; 70 | *mysh += mysh[16]; 71 | *mysh += mysh[8]; 72 | *mysh += mysh[4]; 73 | *mysh += mysh[2]; 74 | *mysh += mysh[1]; 75 | if (threadIdx.x == 0) { 76 | target[blockIdx.x] = *mysh; 77 | } 78 | } 79 | } 80 | 81 | __global__ void kSetupCurand(curandState *state, unsigned long long seed) { 82 | const uint tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x; 83 | /* Each thread gets same seed, a different sequence number, 84 | no offset */ 85 | curand_init(seed, tidx, 0, &state[tidx]); 86 | } 87 | 88 | -------------------------------------------------------------------------------- /src/pyconvnet.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | #include 43 | 44 | using namespace std; 45 | static ConvNet* model = NULL; 46 | 47 | static PyMethodDef _ConvNetMethods[] = { { "initModel", initModel, METH_VARARGS }, 48 | { "startBatch", startBatch, METH_VARARGS }, 49 | { "finishBatch", finishBatch, METH_VARARGS }, 50 | { "checkGradients", checkGradients, METH_VARARGS }, 51 | { "startMultiviewTest", startMultiviewTest, METH_VARARGS }, 52 | { "startFeatureWriter", startFeatureWriter, METH_VARARGS }, 53 | { "syncWithHost", syncWithHost, METH_VARARGS }, 54 | { NULL, NULL } 55 | }; 56 | 57 | #if defined(_WIN64) || defined(_WIN32) 58 | extern "C" __declspec(dllexport) void initpyconvnet() { 59 | (void) Py_InitModule("pyconvnet", _ConvNetMethods); 60 | import_array(); 61 | } 62 | #else 63 | void INITNAME() { 64 | (void) Py_InitModule(QUOTEME(MODELNAME), _ConvNetMethods); 65 | import_array(); 66 | } 67 | #endif 68 | 69 | PyObject* initModel(PyObject *self, PyObject *args) { 70 | assert(model == NULL); 71 | 72 | PyListObject* pyLayerParams; 73 | int pyMinibatchSize; 74 | int pyDeviceID; 75 | 76 | if (!PyArg_ParseTuple(args, "O!ii", 77 | &PyList_Type, &pyLayerParams, 78 | &pyMinibatchSize, 79 | &pyDeviceID)) { 80 | return NULL; 81 | } 82 | model = new ConvNet(pyLayerParams, 83 | pyMinibatchSize, 84 | pyDeviceID); 85 | 86 | model->start(); 87 | return Py_BuildValue("i", 0); 88 | } 89 | 90 | /* 91 | * Starts training/testing on the given batch (asynchronous -- returns immediately). 92 | */ 93 | PyObject* startBatch(PyObject *self, PyObject *args) { 94 | assert(model != NULL); 95 | PyListObject* data; 96 | int test = 0; 97 | if (!PyArg_ParseTuple(args, "O!|i", 98 | &PyList_Type, &data, 99 | &test)) { 100 | return NULL; 101 | } 102 | MatrixV& mvec = *getMatrixV((PyObject*)data); 103 | 104 | TrainingWorker* wr = new TrainingWorker(*model, *new CPUData(mvec), test); 105 | model->getWorkerQueue().enqueue(wr); 106 | return Py_BuildValue("i", 0); 107 | } 108 | 109 | /* 110 | * Starts testing on the given batch (asynchronous -- returns immediately). 111 | */ 112 | PyObject* startMultiviewTest(PyObject *self, PyObject *args) { 113 | assert(model != NULL); 114 | PyListObject* data; 115 | int numViews, logregIdx; 116 | if (!PyArg_ParseTuple(args, "O!ii", 117 | &PyList_Type, &data, 118 | &numViews, 119 | &logregIdx)) { 120 | return NULL; 121 | } 122 | MatrixV& mvec = *getMatrixV((PyObject*)data); 123 | 124 | MultiviewTestWorker* wr = new MultiviewTestWorker(*model, *new CPUData(mvec), numViews, logregIdx); 125 | model->getWorkerQueue().enqueue(wr); 126 | return Py_BuildValue("i", 0); 127 | } 128 | 129 | PyObject* startFeatureWriter(PyObject *self, PyObject *args) { 130 | assert(model != NULL); 131 | PyListObject* data; 132 | int layerIdx; 133 | if (!PyArg_ParseTuple(args, "O!i", 134 | &PyList_Type, &data, 135 | &layerIdx)) { 136 | return NULL; 137 | } 138 | MatrixV& mvec = *getMatrixV((PyObject*)data); 139 | Matrix& ftrs = *mvec.back(); 140 | mvec.pop_back(); 141 | 142 | FeatureWorker* wr = new FeatureWorker(*model, *new CPUData(mvec), ftrs, layerIdx); 143 | model->getWorkerQueue().enqueue(wr); 144 | return Py_BuildValue("i", 0); 145 | } 146 | 147 | /* 148 | * Waits for the trainer to finish training on the batch given to startBatch. 149 | */ 150 | PyObject* finishBatch(PyObject *self, PyObject *args) { 151 | assert(model != NULL); 152 | WorkResult* res = model->getResultQueue().dequeue(); 153 | assert(res != NULL); 154 | assert(res->getResultType() == WorkResult::BATCH_DONE); 155 | 156 | Cost& cost = res->getResults(); 157 | PyObject* dict = PyDict_New(); 158 | CostMap& costMap = cost.getCostMap(); 159 | for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) { 160 | PyObject* v = PyList_New(0); 161 | for (vector::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) { 162 | PyObject* f = PyFloat_FromDouble(*iv); 163 | PyList_Append(v, f); 164 | } 165 | PyDict_SetItemString(dict, it->first.c_str(), v); 166 | } 167 | 168 | PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases()); 169 | delete res; // Deletes cost too 170 | return retVal; 171 | } 172 | 173 | PyObject* checkGradients(PyObject *self, PyObject *args) { 174 | assert(model != NULL); 175 | PyListObject* data; 176 | if (!PyArg_ParseTuple(args, "O!", 177 | &PyList_Type, &data)) { 178 | return NULL; 179 | } 180 | MatrixV& mvec = *getMatrixV((PyObject*)data); 181 | 182 | GradCheckWorker* wr = new GradCheckWorker(*model, *new CPUData(mvec)); 183 | model->getWorkerQueue().enqueue(wr); 184 | WorkResult* res = model->getResultQueue().dequeue(); 185 | assert(res != NULL); 186 | assert(res->getResultType() == WorkResult::BATCH_DONE); 187 | delete res; 188 | return Py_BuildValue("i", 0); 189 | } 190 | 191 | /* 192 | * Copies weight matrices from GPU to system memory. 193 | */ 194 | PyObject* syncWithHost(PyObject *self, PyObject *args) { 195 | assert(model != NULL); 196 | SyncWorker* wr = new SyncWorker(*model); 197 | model->getWorkerQueue().enqueue(wr); 198 | WorkResult* res = model->getResultQueue().dequeue(); 199 | assert(res != NULL); 200 | assert(res->getResultType() == WorkResult::SYNC_DONE); 201 | 202 | delete res; 203 | return Py_BuildValue("i", 0); 204 | } 205 | 206 | -------------------------------------------------------------------------------- /src/util.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | using namespace std; 30 | 31 | floatv* getFloatV(PyObject* pyList) { 32 | if (pyList == NULL) { 33 | return NULL; 34 | } 35 | floatv* vec = new floatv(); 36 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 37 | vec->push_back(PyFloat_AS_DOUBLE(PyList_GET_ITEM(pyList, i))); 38 | } 39 | return vec; 40 | } 41 | 42 | intv* getIntV(PyObject* pyList) { 43 | if (pyList == NULL) { 44 | return NULL; 45 | } 46 | intv* vec = new intv(); 47 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 48 | vec->push_back(PyInt_AS_LONG(PyList_GET_ITEM(pyList, i))); 49 | } 50 | return vec; 51 | } 52 | 53 | int* getIntA(PyObject* pyList) { 54 | if (pyList == NULL) { 55 | return NULL; 56 | } 57 | int* arr = new int[PyList_GET_SIZE(pyList)]; 58 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 59 | arr[i] = PyInt_AS_LONG(PyList_GET_ITEM(pyList, i)); 60 | } 61 | return arr; 62 | } 63 | MatrixV* getMatrixV(PyObject* pyList) { 64 | if (pyList == NULL) { 65 | return NULL; 66 | } 67 | MatrixV* vec = new MatrixV(); 68 | for (int i = 0; i < PyList_GET_SIZE(pyList); i++) { 69 | vec->push_back(new Matrix((PyArrayObject*)PyList_GET_ITEM(pyList, i))); 70 | } 71 | return vec; 72 | } 73 | 74 | int pyDictGetInt(PyObject* dict, const char* key) { 75 | return PyInt_AS_LONG(PyDict_GetItemString(dict, key)); 76 | } 77 | 78 | intv* pyDictGetIntV(PyObject* dict, const char* key) { 79 | return getIntV(PyDict_GetItemString(dict, key)); 80 | } 81 | 82 | int* pyDictGetIntA(PyObject* dict, const char* key) { 83 | return getIntA(PyDict_GetItemString(dict, key)); 84 | } 85 | 86 | string pyDictGetString(PyObject* dict, const char* key) { 87 | return string(PyString_AS_STRING(PyDict_GetItemString(dict, key))); 88 | } 89 | 90 | float pyDictGetFloat(PyObject* dict, const char* key) { 91 | return PyFloat_AS_DOUBLE(PyDict_GetItemString(dict, key)); 92 | } 93 | 94 | floatv* pyDictGetFloatV(PyObject* dict, const char* key) { 95 | return getFloatV(PyDict_GetItemString(dict, key)); 96 | } 97 | 98 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key) { 99 | return new Matrix((PyArrayObject*)PyDict_GetItemString(dict, key)); 100 | } 101 | 102 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key) { 103 | return getMatrixV(PyDict_GetItemString(dict, key)); 104 | } -------------------------------------------------------------------------------- /src/weights.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | 29 | bool Weights::_autoCopyToGPU = false; -------------------------------------------------------------------------------- /src/worker.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | using namespace std; 32 | 33 | /* 34 | * ==================== 35 | * WorkResult 36 | * ==================== 37 | */ 38 | WorkResult::WorkResult(WorkResult::RESULTS resultType, Cost& results) : _resultType(resultType), _results(&results) { 39 | } 40 | 41 | WorkResult::WorkResult(WorkResult::RESULTS resultType) : _resultType(resultType), _results(NULL) { 42 | } 43 | 44 | WorkResult::~WorkResult() { 45 | delete _results; // delete NULL is ok 46 | } 47 | 48 | Cost& WorkResult::getResults() const { 49 | return *_results; 50 | } 51 | 52 | WorkResult::RESULTS WorkResult::getResultType() const { 53 | return _resultType; 54 | } 55 | 56 | /* 57 | * ==================== 58 | * Worker 59 | * ==================== 60 | */ 61 | Worker::Worker(ConvNet& convNet) : _convNet(&convNet) { 62 | } 63 | 64 | /* 65 | * ==================== 66 | * DataWorker 67 | * ==================== 68 | */ 69 | DataWorker::DataWorker(ConvNet& convNet, CPUData& data) : Worker(convNet), _data(&data) { 70 | _dp = &convNet.getDataProvider(); 71 | } 72 | 73 | DataWorker::~DataWorker() { 74 | _dp->clearData(); 75 | } 76 | 77 | /* 78 | * ==================== 79 | * TrainingWorker 80 | * ==================== 81 | */ 82 | TrainingWorker::TrainingWorker(ConvNet& convNet, CPUData& data, bool test) 83 | : DataWorker(convNet, data), _test(test) { 84 | } 85 | 86 | // Need to setData here (as opposed to the constructor) because the constructor executes in 87 | // the original CPU thread, which is not the one with GPU access. 88 | void TrainingWorker::run() { 89 | _dp->setData(*_data); 90 | Cost& batchCost = *new Cost(0); 91 | for (int i = 0; i < _dp->getNumMinibatches(); i++) { 92 | _convNet->fprop(i, _test ? PASS_TEST : PASS_TRAIN); 93 | _convNet->getCost(batchCost); 94 | 95 | if (!_test) { 96 | _convNet->bprop(PASS_TRAIN); 97 | _convNet->updateWeights(); 98 | } 99 | } 100 | cudaThreadSynchronize(); 101 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 102 | } 103 | 104 | /* 105 | * ==================== 106 | * SyncWorker 107 | * ==================== 108 | */ 109 | SyncWorker::SyncWorker(ConvNet& convNet) : Worker(convNet) { 110 | } 111 | 112 | void SyncWorker::run() { 113 | _convNet->copyToCPU(); 114 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::SYNC_DONE)); 115 | } 116 | 117 | /* 118 | * ==================== 119 | * GradCheckWorker 120 | * ==================== 121 | */ 122 | GradCheckWorker::GradCheckWorker(ConvNet& convNet, CPUData& data) 123 | : DataWorker(convNet, data) { 124 | } 125 | 126 | void GradCheckWorker::run() { 127 | _dp->setData(*_data); 128 | _convNet->checkGradients(); 129 | exit(0); 130 | } 131 | 132 | /* 133 | * ==================== 134 | * MultiviewTestWorker 135 | * ==================== 136 | */ 137 | MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx) 138 | : DataWorker(convNet, data), _numViews(numViews), _logregIdx(logregIdx) { 139 | assert(_data->getNumCases() % _numViews == 0); 140 | } 141 | 142 | void MultiviewTestWorker::run() { 143 | _dp->setData(*_data); 144 | Layer& logregLayer = _convNet->getLayer(_logregIdx); 145 | 146 | int numCasesReal = _dp->getNumCases() / _numViews; 147 | int numMiniReal = DIVUP(numCasesReal, _dp->getMinibatchSize()); 148 | 149 | Cost& batchCost = *new Cost(0); 150 | for (int i = 0; i < numMiniReal; i++) { 151 | NVMatrix softmaxActs; 152 | for (int v = 0; v < _numViews; v++) { 153 | GPUData& mini = _dp->getDataSlice(v * numCasesReal + i * _dp->getMinibatchSize(), 154 | min((v + 1) * numCasesReal, v * numCasesReal + (i + 1) * _dp->getMinibatchSize())); 155 | _convNet->fprop(mini, PASS_TEST); 156 | if (v == 0) { 157 | logregLayer.getPrev()[1]->getActs().copy(softmaxActs); 158 | } else { 159 | softmaxActs.add(logregLayer.getPrev()[1]->getActs()); 160 | } 161 | } 162 | softmaxActs.scale(1.0 / _numViews); 163 | NVMatrixV logregInput; 164 | logregInput.push_back(&logregLayer.getPrev()[0]->getActs()); 165 | logregInput.push_back(&softmaxActs); 166 | 167 | logregLayer.fprop(logregInput, PASS_TEST); 168 | 169 | _convNet->getCost(batchCost); 170 | } 171 | cudaThreadSynchronize(); 172 | 173 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 174 | } 175 | 176 | /* 177 | * ==================== 178 | * FeatureWorker 179 | * ==================== 180 | */ 181 | FeatureWorker::FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx) 182 | : DataWorker(convNet, data), _ftrs(&ftrs), _layerIdx(layerIdx) { 183 | assert(ftrs.getNumRows() == data.getNumCases()); 184 | assert(!ftrs.isTrans()); 185 | } 186 | 187 | FeatureWorker::~FeatureWorker() { 188 | delete _ftrs; 189 | } 190 | 191 | void FeatureWorker::run() { 192 | _dp->setData(*_data); 193 | Layer& ftrLayer = _convNet->getLayer(_layerIdx); 194 | Cost& batchCost = *new Cost(0); 195 | for (int i = 0; i < _dp->getNumMinibatches(); i++) { 196 | _convNet->fprop(i, PASS_TEST); 197 | _convNet->getCost(batchCost); 198 | Matrix& miniFtrs = _ftrs->sliceRows(i * _dp->getMinibatchSize(), 199 | min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize())); 200 | NVMatrix& acts = ftrLayer.getActs(); 201 | NVMatrix acts_T; 202 | if (acts.isTrans()) { 203 | NVMatrix& soft_T = acts.getTranspose(); 204 | soft_T.transpose(acts_T); 205 | delete &soft_T; 206 | } else { 207 | acts.transpose(acts_T); 208 | } 209 | acts_T.copyToHost(miniFtrs); 210 | delete &miniFtrs; 211 | } 212 | cudaThreadSynchronize(); 213 | _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost)); 214 | } -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without modification, 5 | # are permitted provided that the following conditions are met: 6 | # 7 | # - Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # - Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | import re 26 | import cPickle 27 | import os 28 | import numpy as n 29 | from math import sqrt 30 | 31 | import gzip 32 | import zipfile 33 | 34 | class UnpickleError(Exception): 35 | pass 36 | 37 | VENDOR_ID_REGEX = re.compile('^vendor_id\s+: (\S+)') 38 | GPU_LOCK_NO_SCRIPT = -2 39 | GPU_LOCK_NO_LOCK = -1 40 | 41 | try: 42 | import magic 43 | ms = magic.open(magic.MAGIC_NONE) 44 | ms.load() 45 | except ImportError: # no magic module 46 | ms = None 47 | 48 | def get_gpu_lock(id=-1): 49 | import imp 50 | lock_script_path = '/u/tang/bin/gpu_lock2.py' 51 | if os.path.exists(lock_script_path): 52 | locker = imp.load_source("", lock_script_path) 53 | if id == -1: 54 | return locker.obtain_lock_id() 55 | print id 56 | got_id = locker._obtain_lock(id) 57 | return id if got_id else GPU_LOCK_NO_LOCK 58 | return GPU_LOCK_NO_SCRIPT if id < 0 else id 59 | 60 | def pickle(filename, data, compress=False): 61 | if compress: 62 | fo = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) 63 | fo.writestr('data', cPickle.dumps(data, -1)) 64 | else: 65 | fo = open(filename, "wb") 66 | cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL) 67 | fo.close() 68 | 69 | def unpickle(filename): 70 | if not os.path.exists(filename): 71 | raise UnpickleError("Path '%s' does not exist." % filename) 72 | if ms is not None and ms.file(filename).startswith('gzip'): 73 | fo = gzip.open(filename, 'rb') 74 | dict = cPickle.load(fo) 75 | elif ms is not None and ms.file(filename).startswith('Zip'): 76 | fo = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED) 77 | dict = cPickle.loads(fo.read('data')) 78 | else: 79 | fo = open(filename, 'rb') 80 | dict = cPickle.load(fo) 81 | 82 | fo.close() 83 | return dict 84 | 85 | def tryint(s): 86 | try: 87 | return int(s) 88 | except: 89 | return s 90 | 91 | def alphanum_key(s): 92 | return [tryint(c) for c in re.split('([0-9]+)', s)] 93 | 94 | def is_intel_machine(): 95 | f = open('/proc/cpuinfo') 96 | for line in f: 97 | m = VENDOR_ID_REGEX.match(line) 98 | if m: 99 | f.close() 100 | return m.group(1) == 'GenuineIntel' 101 | f.close() 102 | return False 103 | 104 | def get_cpu(): 105 | if is_intel_machine(): 106 | return 'intel' 107 | return 'amd' 108 | 109 | def is_windows_machine(): 110 | return os.name == 'nt' 111 | --------------------------------------------------------------------------------