├── ABOUT
├── Makefile
├── README.md
├── build.sh
├── common-gcc-cuda-4.0.mk
├── convdata.py
├── convnet.py
├── data.py
├── example-layers
    ├── layer-params-18pct.cfg
    ├── layer-params-19pct.cfg
    ├── layer-params-80sec.cfg
    ├── layer-params-conv-local-11pct.cfg
    ├── layer-params-conv-local-13pct.cfg
    ├── layer-params-example.cfg
    ├── layer-params.gc.cfg
    ├── layers-18pct.cfg
    ├── layers-19pct.cfg
    ├── layers-80sec.cfg
    ├── layers-conv-local-11pct.cfg
    ├── layers-conv-local-13pct.cfg
    ├── layers-example.cfg
    └── layers.gc.cfg
├── gpumodel.py
├── include
    ├── common
    │   ├── matrix.h
    │   ├── matrix_funcs.h
    │   ├── queue.h
    │   └── thread.h
    ├── convnet.cuh
    ├── cost.cuh
    ├── cudaconv2
    │   ├── conv_util.cuh
    │   └── cudaconv2.cuh
    ├── data.cuh
    ├── layer.cuh
    ├── layer_kernels.cuh
    ├── neuron.cuh
    ├── nvmatrix
    │   ├── nvmatrix.cuh
    │   ├── nvmatrix_kernels.cuh
    │   └── nvmatrix_operators.cuh
    ├── pyconvnet.cuh
    ├── util.cuh
    ├── weights.cuh
    └── worker.cuh
├── layer.py
├── options.py
├── ordereddict.py
├── shownet.py
├── src
    ├── common
    │   └── matrix.cpp
    ├── convnet.cu
    ├── cost.cu
    ├── cudaconv2
    │   ├── conv_util.cu
    │   ├── filter_acts.cu
    │   ├── img_acts.cu
    │   └── weight_acts.cu
    ├── data.cu
    ├── layer.cu
    ├── layer_kernels.cu
    ├── neuron.cu
    ├── nvmatrix
    │   ├── nvmatrix.cu
    │   └── nvmatrix_kernels.cu
    ├── pyconvnet.cu
    ├── util.cu
    ├── weights.cu
    └── worker.cu
└── util.py


/ABOUT:
--------------------------------------------------------------------------------
1 | cuda-convnet
2 | High-performance C++/CUDA implementation of abstract convolutional neural networks
3 | 
4 | See http://code.google.com/p/cuda-convnet/ for documentation.
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MODELNAME := _ConvNet
 2 | 
 3 | INCLUDES :=  -I$(PYTHON_INCLUDE_PATH) -I$(NUMPY_INCLUDE_PATH) -I./include -I./include/common -I./include/cudaconv2 -I./include/nvmatrix
 4 | LIB := -lpthread -L$(ATLAS_LIB_PATH) -L$(CUDA_INSTALL_PATH)/lib64 -lcblas
 5 | 
 6 | USECUBLAS   := 1
 7 | 
 8 | PYTHON_VERSION=$(shell python -V 2>&1 | cut -d ' ' -f 2 | cut -d '.' -f 1,2)
 9 | LIB += -lpython$(PYTHON_VERSION)
10 | 
11 | GENCODE_ARCH := -gencode=arch=compute_20,code=\"sm_20,compute_20\"
12 | COMMONFLAGS := -DNUMPY_INTERFACE -DMODELNAME=$(MODELNAME) -DINITNAME=init$(MODELNAME)
13 | 
14 | EXECUTABLE	:= $(MODELNAME).so
15 | 
16 | CUFILES				:= $(shell echo src/*.cu src/cudaconv2/*.cu src/nvmatrix/*.cu)
17 | CU_DEPS				:= $(shell echo include/*.cuh include/cudaconv2/*.cuh include/nvmatrix/*.cuh)
18 | CCFILES				:= $(shell echo src/common/*.cpp)
19 | C_DEPS				:= $(shell echo include/common/*.h)
20 | 
21 | include common-gcc-cuda-4.0.mk
22 | 	
23 | makedirectories:
24 | 	$(VERBOSE)mkdir -p $(LIBDIR)
25 | 	$(VERBOSE)mkdir -p $(OBJDIR)/src/cudaconv2
26 | 	$(VERBOSE)mkdir -p $(OBJDIR)/src/nvmatrix
27 | 	$(VERBOSE)mkdir -p $(OBJDIR)/src/common
28 | 	$(VERBOSE)mkdir -p $(TARGETDIR)
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cuda-convnet
2 | Alex Krizhevsky's original code from Google Code.  Required for [yanglab-convnet](https://github.com/ulrichstern/yanglab-convnet).
3 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Fill in these environment variables.
 4 | # I have tested this code with CUDA 4.0, 4.1, and 4.2. 
 5 | # Only use Fermi-generation cards. Older cards won't work.
 6 | 
 7 | # If you're not sure what these paths should be, 
 8 | # you can use the find command to try to locate them.
 9 | # For example, NUMPY_INCLUDE_PATH contains the file
10 | # arrayobject.h. So you can search for it like this:
11 | # 
12 | # find /usr -name arrayobject.h
13 | # 
14 | # (it'll almost certainly be under /usr)
15 | 
16 | # CUDA toolkit installation directory.
17 | export CUDA_INSTALL_PATH=/usr/local/cuda
18 | 
19 | # CUDA SDK installation directory.
20 | export CUDA_SDK_PATH=/home/spoon/NVIDIA_GPU_Computing_SDK
21 | 
22 | # Python include directory. This should contain the file Python.h, among others.
23 | export PYTHON_INCLUDE_PATH=/usr/include/python2.7
24 | 
25 | # Numpy include directory. This should contain the file arrayobject.h, among others.
26 | export NUMPY_INCLUDE_PATH=/usr/lib/pymodules/python2.7/numpy/core/include/numpy
27 | 
28 | # ATLAS library directory. This should contain the file libcblas.so, among others.
29 | export ATLAS_LIB_PATH=/usr/lib/atlas-base
30 | 
31 | make $*
32 | 
33 | 


--------------------------------------------------------------------------------
/convdata.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | # 
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | from data import *
 26 | import numpy.random as nr
 27 | import numpy as n
 28 | import random as r
 29 | 
 30 | class CIFARDataProvider(LabeledMemoryDataProvider):
 31 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
 32 |         LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
 33 |         self.data_mean = self.batch_meta['data_mean']
 34 |         self.num_colors = 3
 35 |         self.img_size = 32
 36 |         # Subtract the mean from the data and make sure that both data and
 37 |         # labels are in single-precision floating point.
 38 |         for d in self.data_dic:
 39 |             # This converts the data matrix to single precision and makes sure that it is C-ordered
 40 |             d['data'] = n.require((d['data'] - self.data_mean), dtype=n.single, requirements='C')
 41 |             d['labels'] = n.require(d['labels'].reshape((1, d['data'].shape[1])), dtype=n.single, requirements='C')
 42 | 
 43 |     def get_next_batch(self):
 44 |         epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
 45 |         return epoch, batchnum, [datadic['data'], datadic['labels']]
 46 | 
 47 |     # Returns the dimensionality of the two data matrices returned by get_next_batch
 48 |     # idx is the index of the matrix. 
 49 |     def get_data_dims(self, idx=0):
 50 |         return self.img_size**2 * self.num_colors if idx == 0 else 1
 51 |     
 52 |     # Takes as input an array returned by get_next_batch
 53 |     # Returns a (numCases, imgSize, imgSize, 3) array which can be
 54 |     # fed to pylab for plotting.
 55 |     # This is used by shownet.py to plot test case predictions.
 56 |     def get_plottable_data(self, data):
 57 |         return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.img_size, self.img_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
 58 |     
 59 | class CroppedCIFARDataProvider(LabeledMemoryDataProvider):
 60 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
 61 |         LabeledMemoryDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
 62 | 
 63 |         self.border_size = dp_params['crop_border']
 64 |         self.inner_size = 32 - self.border_size*2
 65 |         self.multiview = dp_params['multiview_test'] and test
 66 |         self.num_views = 5*2
 67 |         self.data_mult = self.num_views if self.multiview else 1
 68 |         self.num_colors = 3
 69 |         
 70 |         for d in self.data_dic:
 71 |             d['data'] = n.require(d['data'], requirements='C')
 72 |             d['labels'] = n.require(n.tile(d['labels'].reshape((1, d['data'].shape[1])), (1, self.data_mult)), requirements='C')
 73 |         
 74 |         self.cropped_data = [n.zeros((self.get_data_dims(), self.data_dic[0]['data'].shape[1]*self.data_mult), dtype=n.single) for x in xrange(2)]
 75 | 
 76 |         self.batches_generated = 0
 77 |         self.data_mean = self.batch_meta['data_mean'].reshape((3,32,32))[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size].reshape((self.get_data_dims(), 1))
 78 | 
 79 |     def get_next_batch(self):
 80 |         epoch, batchnum, datadic = LabeledMemoryDataProvider.get_next_batch(self)
 81 | 
 82 |         cropped = self.cropped_data[self.batches_generated % 2]
 83 | 
 84 |         self.__trim_borders(datadic['data'], cropped)
 85 |         cropped -= self.data_mean
 86 |         self.batches_generated += 1
 87 |         return epoch, batchnum, [cropped, datadic['labels']]
 88 |         
 89 |     def get_data_dims(self, idx=0):
 90 |         return self.inner_size**2 * 3 if idx == 0 else 1
 91 | 
 92 |     # Takes as input an array returned by get_next_batch
 93 |     # Returns a (numCases, imgSize, imgSize, 3) array which can be
 94 |     # fed to pylab for plotting.
 95 |     # This is used by shownet.py to plot test case predictions.
 96 |     def get_plottable_data(self, data):
 97 |         return n.require((data + self.data_mean).T.reshape(data.shape[1], 3, self.inner_size, self.inner_size).swapaxes(1,3).swapaxes(1,2) / 255.0, dtype=n.single)
 98 |     
 99 |     def __trim_borders(self, x, target):
100 |         y = x.reshape(3, 32, 32, x.shape[1])
101 | 
102 |         if self.test: # don't need to loop over cases
103 |             if self.multiview:
104 |                 start_positions = [(0,0),  (0, self.border_size*2),
105 |                                    (self.border_size, self.border_size),
106 |                                   (self.border_size*2, 0), (self.border_size*2, self.border_size*2)]
107 |                 end_positions = [(sy+self.inner_size, sx+self.inner_size) for (sy,sx) in start_positions]
108 |                 for i in xrange(self.num_views/2):
109 |                     pic = y[:,start_positions[i][0]:end_positions[i][0],start_positions[i][1]:end_positions[i][1],:]
110 |                     target[:,i * x.shape[1]:(i+1)* x.shape[1]] = pic.reshape((self.get_data_dims(),x.shape[1]))
111 |                     target[:,(self.num_views/2 + i) * x.shape[1]:(self.num_views/2 +i+1)* x.shape[1]] = pic[:,:,::-1,:].reshape((self.get_data_dims(),x.shape[1]))
112 |             else:
113 |                 pic = y[:,self.border_size:self.border_size+self.inner_size,self.border_size:self.border_size+self.inner_size, :] # just take the center for now
114 |                 target[:,:] = pic.reshape((self.get_data_dims(), x.shape[1]))
115 |         else:
116 |             for c in xrange(x.shape[1]): # loop over cases
117 |                 startY, startX = nr.randint(0,self.border_size*2 + 1), nr.randint(0,self.border_size*2 + 1)
118 |                 endY, endX = startY + self.inner_size, startX + self.inner_size
119 |                 pic = y[:,startY:endY,startX:endX, c]
120 |                 if nr.randint(2) == 0: # also flip the image with 50% probability
121 |                     pic = pic[:,:,::-1]
122 |                 target[:,c] = pic.reshape((self.get_data_dims(),))
123 |     
124 | class DummyConvNetDataProvider(LabeledDummyDataProvider):
125 |     def __init__(self, data_dim):
126 |         LabeledDummyDataProvider.__init__(self, data_dim)
127 |         
128 |     def get_next_batch(self):
129 |         epoch, batchnum, dic = LabeledDummyDataProvider.get_next_batch(self)
130 |         
131 |         dic['data'] = n.require(dic['data'].T, requirements='C')
132 |         dic['labels'] = n.require(dic['labels'].T, requirements='C')
133 |         
134 |         return epoch, batchnum, [dic['data'], dic['labels']]
135 |     
136 |     # Returns the dimensionality of the two data matrices returned by get_next_batch
137 |     def get_data_dims(self, idx=0):
138 |         return self.batch_meta['num_vis'] if idx == 0 else 1
139 | 


--------------------------------------------------------------------------------
/convnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | # 
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import numpy as n
 26 | import numpy.random as nr
 27 | from util import *
 28 | from data import *
 29 | from options import *
 30 | from gpumodel import *
 31 | import sys
 32 | import math as m
 33 | import layer as lay
 34 | from convdata import *
 35 | from os import linesep as NL
 36 | #import pylab as pl
 37 | 
 38 | class ConvNet(IGPUModel):
 39 |     def __init__(self, op, load_dic, dp_params={}):
 40 |         filename_options = []
 41 |         dp_params['multiview_test'] = op.get_value('multiview_test')
 42 |         dp_params['crop_border'] = op.get_value('crop_border')
 43 |         IGPUModel.__init__(self, "ConvNet", op, load_dic, filename_options, dp_params=dp_params)
 44 |         
 45 |     def import_model(self):
 46 |         lib_name = "pyconvnet" if is_windows_machine() else "_ConvNet"
 47 |         print "========================="
 48 |         print "Importing %s C++ module" % lib_name
 49 |         self.libmodel = __import__(lib_name) 
 50 |         
 51 |     def init_model_lib(self):
 52 |         self.libmodel.initModel(self.layers, self.minibatch_size, self.device_ids[0])
 53 |         
 54 |     def init_model_state(self):
 55 |         ms = self.model_state
 56 |         if self.load_file:
 57 |             ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self, ms['layers'])
 58 |         else:
 59 |             ms['layers'] = lay.LayerParser.parse_layers(self.layer_def, self.layer_params, self)
 60 |         self.layers_dic = dict(zip([l['name'] for l in ms['layers']], ms['layers']))
 61 |         
 62 |         logreg_name = self.op.get_value('logreg_name')
 63 |         if logreg_name:
 64 |             self.logreg_idx = self.get_layer_idx(logreg_name, check_type='cost.logreg')
 65 |         
 66 |         # Convert convolutional layers to local
 67 |         if len(self.op.get_value('conv_to_local')) > 0:
 68 |             for i, layer in enumerate(ms['layers']):
 69 |                 if layer['type'] == 'conv' and layer['name'] in self.op.get_value('conv_to_local'):
 70 |                     lay.LocalLayerParser.conv_to_local(ms['layers'], i)
 71 |         # Decouple weight matrices
 72 |         if len(self.op.get_value('unshare_weights')) > 0:
 73 |             for name_str in self.op.get_value('unshare_weights'):
 74 |                 if name_str:
 75 |                     name = lay.WeightLayerParser.get_layer_name(name_str)
 76 |                     if name is not None:
 77 |                         name, idx = name[0], name[1]
 78 |                         if name not in self.layers_dic:
 79 |                             raise ModelStateException("Layer '%s' does not exist; unable to unshare" % name)
 80 |                         layer = self.layers_dic[name]
 81 |                         lay.WeightLayerParser.unshare_weights(layer, ms['layers'], matrix_idx=idx)
 82 |                     else:
 83 |                         raise ModelStateException("Invalid layer name '%s'; unable to unshare." % name_str)
 84 |         self.op.set_value('conv_to_local', [], parse=False)
 85 |         self.op.set_value('unshare_weights', [], parse=False)
 86 |     
 87 |     def get_layer_idx(self, layer_name, check_type=None):
 88 |         try:
 89 |             layer_idx = [l['name'] for l in self.model_state['layers']].index(layer_name)
 90 |             if check_type:
 91 |                 layer_type = self.model_state['layers'][layer_idx]['type']
 92 |                 if layer_type != check_type:
 93 |                     raise ModelStateException("Layer with name '%s' has type '%s'; should be '%s'." % (layer_name, layer_type, check_type))
 94 |             return layer_idx
 95 |         except ValueError:
 96 |             raise ModelStateException("Layer with name '%s' not defined." % layer_name)
 97 | 
 98 |     def fill_excused_options(self):
 99 |         if self.op.get_value('check_grads'):
100 |             self.op.set_value('save_path', '')
101 |             self.op.set_value('train_batch_range', '0')
102 |             self.op.set_value('test_batch_range', '0')
103 |             self.op.set_value('data_path', '')
104 |             
105 |     # Make sure the data provider returned data in proper format
106 |     def parse_batch_data(self, batch_data, train=True):
107 |         if max(d.dtype != n.single for d in batch_data[2]):
108 |             raise DataProviderException("All matrices returned by data provider must consist of single-precision floats.")
109 |         return batch_data
110 | 
111 |     def start_batch(self, batch_data, train=True):
112 |         data = batch_data[2]
113 |         if self.check_grads:
114 |             self.libmodel.checkGradients(data)
115 |         elif not train and self.multiview_test:
116 |             self.libmodel.startMultiviewTest(data, self.train_data_provider.num_views, self.logreg_idx)
117 |         else:
118 |             self.libmodel.startBatch(data, not train)
119 |         
120 |     def print_iteration(self):
121 |         print "%d.%d..." % (self.epoch, self.batchnum),
122 |         
123 |     def print_train_time(self, compute_time_py):
124 |         print "(%.3f sec)" % (compute_time_py)
125 |         
126 |     def print_costs(self, cost_outputs):
127 |         costs, num_cases = cost_outputs[0], cost_outputs[1]
128 |         for errname in costs.keys():
129 |             costs[errname] = [(v/num_cases) for v in costs[errname]]
130 |             print "%s: " % errname,
131 |             print ", ".join("%6f" % v for v in costs[errname]),
132 |             if sum(m.isnan(v) for v in costs[errname]) > 0 or sum(m.isinf(v) for v in costs[errname]):
133 |                 print "^ got nan or inf!"
134 |                 sys.exit(1)
135 |         
136 |     def print_train_results(self):
137 |         self.print_costs(self.train_outputs[-1])
138 |         
139 |     def print_test_status(self):
140 |         pass
141 |         
142 |     def print_test_results(self):
143 |         print ""
144 |         print "======================Test output======================"
145 |         self.print_costs(self.test_outputs[-1])
146 |         print ""
147 |         print "-------------------------------------------------------", 
148 |         for i,l in enumerate(self.layers): # This is kind of hacky but will do for now.
149 |             if 'weights' in l:
150 |                 if type(l['weights']) == n.ndarray:
151 |                     print "%sLayer '%s' weights: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['weights'])), n.mean(n.abs(l['weightsInc']))),
152 |                 elif type(l['weights']) == list:
153 |                     print ""
154 |                     print NL.join("Layer '%s' weights[%d]: %e [%e]" % (l['name'], i, n.mean(n.abs(w)), n.mean(n.abs(wi))) for i,(w,wi) in enumerate(zip(l['weights'],l['weightsInc']))),
155 |                 print "%sLayer '%s' biases: %e [%e]" % (NL, l['name'], n.mean(n.abs(l['biases'])), n.mean(n.abs(l['biasesInc']))),
156 |         print ""
157 |         
158 |     def conditional_save(self):
159 |         self.save_state()
160 |         print "-------------------------------------------------------"
161 |         print "Saved checkpoint to %s" % os.path.join(self.save_path, self.save_file)
162 |         print "=======================================================",
163 |         
164 |     def aggregate_test_outputs(self, test_outputs):
165 |         num_cases = sum(t[1] for t in test_outputs)
166 |         for i in xrange(1 ,len(test_outputs)):
167 |             for k,v in test_outputs[i][0].items():
168 |                 for j in xrange(len(v)):
169 |                     test_outputs[0][0][k][j] += test_outputs[i][0][k][j]
170 |         return (test_outputs[0][0], num_cases)
171 |     
172 |     @classmethod
173 |     def get_options_parser(cls):
174 |         op = IGPUModel.get_options_parser()
175 |         op.add_option("mini", "minibatch_size", IntegerOptionParser, "Minibatch size", default=128)
176 |         op.add_option("layer-def", "layer_def", StringOptionParser, "Layer definition file", set_once=True)
177 |         op.add_option("layer-params", "layer_params", StringOptionParser, "Layer parameter file")
178 |         op.add_option("check-grads", "check_grads", BooleanOptionParser, "Check gradients and quit?", default=0, excuses=['data_path','save_path','train_batch_range','test_batch_range'])
179 |         op.add_option("multiview-test", "multiview_test", BooleanOptionParser, "Cropped DP: test on multiple patches?", default=0, requires=['logreg_name'])
180 |         op.add_option("crop-border", "crop_border", IntegerOptionParser, "Cropped DP: crop border size", default=4, set_once=True)
181 |         op.add_option("logreg-name", "logreg_name", StringOptionParser, "Cropped DP: logreg layer name (for --multiview-test)", default="")
182 |         op.add_option("conv-to-local", "conv_to_local", ListOptionParser(StringOptionParser), "Convert given conv layers to unshared local", default=[])
183 |         op.add_option("unshare-weights", "unshare_weights", ListOptionParser(StringOptionParser), "Unshare weight matrices in given layers", default=[])
184 |         op.add_option("conserve-mem", "conserve_mem", BooleanOptionParser, "Conserve GPU memory (slower)?", default=0)
185 |                 
186 |         op.delete_option('max_test_err')
187 |         op.options["max_filesize_mb"].default = 0
188 |         op.options["testing_freq"].default = 50
189 |         op.options["num_epochs"].default = 50000
190 |         op.options['dp_type'].default = None
191 |         
192 |         DataProvider.register_data_provider('cifar', 'CIFAR', CIFARDataProvider)
193 |         DataProvider.register_data_provider('dummy-cn-n', 'Dummy ConvNet', DummyConvNetDataProvider)
194 |         DataProvider.register_data_provider('cifar-cropped', 'Cropped CIFAR', CroppedCIFARDataProvider)
195 |         
196 |         return op
197 |     
198 | if __name__ == "__main__":
199 |     #nr.seed(5)
200 |     op = ConvNet.get_options_parser()
201 | 
202 |     op, load_dic = IGPUModel.parse_options(op)
203 |     model = ConvNet(op, load_dic)
204 |     model.start()
205 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | # 
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import numpy as n
 26 | from numpy.random import randn, rand, random_integers
 27 | import os
 28 | from util import *
 29 | 
 30 | BATCH_META_FILE = "batches.meta"
 31 | 
 32 | class DataProvider:
 33 |     BATCH_REGEX = re.compile('^data_batch_(\d+)(\.\d+)?$')
 34 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
 35 |         if batch_range == None:
 36 |             batch_range = DataProvider.get_batch_nums(data_dir)
 37 |         if init_batchnum is None or init_batchnum not in batch_range:
 38 |             init_batchnum = batch_range[0]
 39 | 
 40 |         self.data_dir = data_dir
 41 |         self.batch_range = batch_range
 42 |         self.curr_epoch = init_epoch
 43 |         self.curr_batchnum = init_batchnum
 44 |         self.dp_params = dp_params
 45 |         self.batch_meta = self.get_batch_meta(data_dir)
 46 |         self.data_dic = None
 47 |         self.test = test
 48 |         self.batch_idx = batch_range.index(init_batchnum)
 49 | 
 50 |     def get_next_batch(self):
 51 |         if self.data_dic is None or len(self.batch_range) > 1:
 52 |             self.data_dic = self.get_batch(self.curr_batchnum)
 53 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
 54 |         self.advance_batch()
 55 | 
 56 |         return epoch, batchnum, self.data_dic
 57 |     
 58 |     def __add_subbatch(self, batch_num, sub_batchnum, batch_dic):
 59 |         subbatch_path = "%s.%d" % (os.path.join(self.data_dir, self.get_data_file_name(batch_num)), sub_batchnum)
 60 |         if os.path.exists(subbatch_path):
 61 |             sub_dic = unpickle(subbatch_path)
 62 |             self._join_batches(batch_dic, sub_dic)
 63 |         else:
 64 |             raise IndexError("Sub-batch %d.%d does not exist in %s" % (batch_num,sub_batchnum, self.data_dir))
 65 |         
 66 |     def _join_batches(self, main_batch, sub_batch):
 67 |         main_batch['data'] = n.r_[main_batch['data'], sub_batch['data']]
 68 |         
 69 |     def get_batch(self, batch_num):
 70 |         if os.path.exists(self.get_data_file_name(batch_num) + '.1'): # batch in sub-batches
 71 |             dic = unpickle(self.get_data_file_name(batch_num) + '.1')
 72 |             sb_idx = 2
 73 |             while True:
 74 |                 try:
 75 |                     self.__add_subbatch(batch_num, sb_idx, dic)
 76 |                     sb_idx += 1
 77 |                 except IndexError:
 78 |                     break
 79 |         else:
 80 |             dic = unpickle(self.get_data_file_name(batch_num))
 81 |         return dic
 82 |     
 83 |     def get_data_dims(self):
 84 |         return self.batch_meta['num_vis']
 85 |     
 86 |     def advance_batch(self):
 87 |         self.batch_idx = self.get_next_batch_idx()
 88 |         self.curr_batchnum = self.batch_range[self.batch_idx]
 89 |         if self.batch_idx == 0: # we wrapped
 90 |             self.curr_epoch += 1
 91 |             
 92 |     def get_next_batch_idx(self):
 93 |         return (self.batch_idx + 1) % len(self.batch_range)
 94 |     
 95 |     def get_next_batch_num(self):
 96 |         return self.batch_range[self.get_next_batch_idx()]
 97 |     
 98 |     # get filename of current batch
 99 |     def get_data_file_name(self, batchnum=None):
100 |         if batchnum is None:
101 |             batchnum = self.curr_batchnum
102 |         return os.path.join(self.data_dir, 'data_batch_%d' % batchnum)
103 |     
104 |     @classmethod
105 |     def get_instance(cls, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, type="default", dp_params={}, test=False):
106 |         # why the fuck can't i reference DataProvider in the original definition?
107 |         #cls.dp_classes['default'] = DataProvider
108 |         type = type or DataProvider.get_batch_meta(data_dir)['dp_type'] # allow data to decide data provider
109 |         if type.startswith("dummy-"):
110 |             name = "-".join(type.split('-')[:-1]) + "-n"
111 |             if name not in dp_types:
112 |                 raise DataProviderException("No such data provider: %s" % type)
113 |             _class = dp_classes[name]
114 |             dims = int(type.split('-')[-1])
115 |             return _class(dims)
116 |         elif type in dp_types:
117 |             _class = dp_classes[type]
118 |             return _class(data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
119 |         
120 |         raise DataProviderException("No such data provider: %s" % type)
121 |     
122 |     @classmethod
123 |     def register_data_provider(cls, name, desc, _class):
124 |         if name in dp_types:
125 |             raise DataProviderException("Data provider %s already registered" % name)
126 |         dp_types[name] = desc
127 |         dp_classes[name] = _class
128 |         
129 |     @staticmethod
130 |     def get_batch_meta(data_dir):
131 |         return unpickle(os.path.join(data_dir, BATCH_META_FILE))
132 |     
133 |     @staticmethod
134 |     def get_batch_filenames(srcdir):
135 |         return sorted([f for f in os.listdir(srcdir) if DataProvider.BATCH_REGEX.match(f)], key=alphanum_key)
136 |     
137 |     @staticmethod
138 |     def get_batch_nums(srcdir):
139 |         names = DataProvider.get_batch_filenames(srcdir)
140 |         return sorted(list(set(int(DataProvider.BATCH_REGEX.match(n).group(1)) for n in names)))
141 |         
142 |     @staticmethod
143 |     def get_num_batches(srcdir):
144 |         return len(DataProvider.get_batch_nums(srcdir))
145 |     
146 | class DummyDataProvider(DataProvider):
147 |     def __init__(self, data_dim):
148 |         #self.data_dim = data_dim
149 |         self.batch_range = [1]
150 |         self.batch_meta = {'num_vis': data_dim, 'data_in_rows':True}
151 |         self.curr_epoch = 1
152 |         self.curr_batchnum = 1
153 |         self.batch_idx = 0
154 |         
155 |     def get_next_batch(self):
156 |         epoch,  batchnum = self.curr_epoch, self.curr_batchnum
157 |         self.advance_batch()
158 |         data = rand(512, self.get_data_dims()).astype(n.single)
159 |         return self.curr_epoch, self.curr_batchnum, {'data':data}
160 | 
161 |     
162 | class LabeledDummyDataProvider(DummyDataProvider):
163 |     def __init__(self, data_dim, num_classes=10, num_cases=512):
164 |         #self.data_dim = data_dim
165 |         self.batch_range = [1]
166 |         self.batch_meta = {'num_vis': data_dim,
167 |                            'label_names': [str(x) for x in range(num_classes)],
168 |                            'data_in_rows':True}
169 |         self.num_cases = num_cases
170 |         self.num_classes = num_classes
171 |         self.curr_epoch = 1
172 |         self.curr_batchnum = 1
173 |         self.batch_idx=0
174 |         
175 |     def get_num_classes(self):
176 |         return self.num_classes
177 |     
178 |     def get_next_batch(self):
179 |         epoch,  batchnum = self.curr_epoch, self.curr_batchnum
180 |         self.advance_batch()
181 |         data = rand(self.num_cases, self.get_data_dims()).astype(n.single) # <--changed to rand
182 |         labels = n.require(n.c_[random_integers(0,self.num_classes-1,self.num_cases)], requirements='C', dtype=n.single)
183 | 
184 |         return self.curr_epoch, self.curr_batchnum, {'data':data, 'labels':labels}
185 | 
186 | class MemoryDataProvider(DataProvider):
187 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params=None, test=False):
188 |         DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
189 |         self.data_dic = []
190 |         for i in self.batch_range:
191 |             self.data_dic += [self.get_batch(i)]
192 |     
193 |     def get_next_batch(self):
194 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
195 |         self.advance_batch()
196 | 
197 |         return epoch, batchnum, self.data_dic[batchnum - self.batch_range[0]]
198 | 
199 | class LabeledDataProvider(DataProvider):   
200 |     def __init__(self, data_dir, batch_range=None, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
201 |         DataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
202 |         
203 |     def get_num_classes(self):
204 |         return len(self.batch_meta['label_names'])
205 |     
206 | class LabeledMemoryDataProvider(LabeledDataProvider):
207 |     def __init__(self, data_dir, batch_range, init_epoch=1, init_batchnum=None, dp_params={}, test=False):
208 |         LabeledDataProvider.__init__(self, data_dir, batch_range, init_epoch, init_batchnum, dp_params, test)
209 |         self.data_dic = []
210 |         for i in batch_range:
211 |             self.data_dic += [unpickle(self.get_data_file_name(i))]
212 |             self.data_dic[-1]["labels"] = n.c_[n.require(self.data_dic[-1]['labels'], dtype=n.single)]
213 |             
214 |     def get_next_batch(self):
215 |         epoch, batchnum = self.curr_epoch, self.curr_batchnum
216 |         self.advance_batch()
217 |         bidx = batchnum - self.batch_range[0]
218 |         return epoch, batchnum, self.data_dic[bidx]
219 |     
220 | dp_types = {"default": "The default data provider; loads one batch into memory at a time",
221 |             "memory": "Loads the entire dataset into memory",
222 |             "labeled": "Returns data and labels (used by classifiers)",
223 |             "labeled-memory": "Combination labeled + memory",
224 |             "dummy-n": "Dummy data provider for n-dimensional data",
225 |             "dummy-labeled-n": "Labeled dummy data provider for n-dimensional data"}
226 | dp_classes = {"default": DataProvider,
227 |               "memory": MemoryDataProvider,
228 |               "labeled": LabeledDataProvider,
229 |               "labeled-memory": LabeledMemoryDataProvider,
230 |               "dummy-n": DummyDataProvider,
231 |               "dummy-labeled-n": LabeledDummyDataProvider}
232 |     
233 | class DataProviderException(Exception):
234 |     pass
235 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-18pct.cfg:
--------------------------------------------------------------------------------
 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 
 2 | 
 3 | # Reduce all learning rates by factor of 10 after 120 epochs.
 4 | # Then another factor of 10 after 10 more epochs.
 5 | 
 6 | [conv1]
 7 | epsW=0.001
 8 | epsB=0.002
 9 | momW=0.9
10 | momB=0.9
11 | wc=0.004
12 | 
13 | [conv2]
14 | epsW=0.001
15 | epsB=0.002
16 | momW=0.9
17 | momB=0.9
18 | wc=0.004
19 | 
20 | [conv3]
21 | epsW=0.001
22 | epsB=0.002
23 | momW=0.9
24 | momB=0.9
25 | wc=0.004
26 | 
27 | [fc10]
28 | epsW=0.001
29 | epsB=0.002
30 | momW=0.9
31 | momB=0.9
32 | wc=1
33 | 
34 | [logprob]
35 | coeff=1
36 | 
37 | [rnorm1]
38 | scale=0.00005
39 | pow=.75
40 | 
41 | [rnorm2]
42 | scale=0.00005
43 | pow=.75
44 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-19pct.cfg:
--------------------------------------------------------------------------------
 1 | # 19% error on CIFAR-10 in 20 minutes - layer parameter file 
 2 | # Set wc to 0 for translations -- 14.2%
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.004
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.004
17 | 
18 | [conv3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [fc10]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=3
31 | 
32 | [logprob]
33 | coeff=1
34 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-80sec.cfg:
--------------------------------------------------------------------------------
 1 | # 26% error on CIFAR-10 in 80 seconds - layer parameter file 
 2 | # You should reduce the learning rate after 8 epochs by a factor of 10.
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.004
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.004
17 | 
18 | [conv3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [fc64]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=.03
31 | 
32 | [fc10]
33 | epsW=0.001
34 | epsB=0.002
35 | momW=0.9
36 | momB=0.9
37 | wc=.03
38 | 
39 | [logprob]
40 | coeff=1
41 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-conv-local-11pct.cfg:
--------------------------------------------------------------------------------
 1 | # 11% error on CIFAR-10 - layer parameter file 
 2 | # Methodology:
 3 | # 1. Train on batches 1-4, use batch 5 for validation.
 4 | # 2. After about 350 epochs, validation error no longer making improvements.
 5 | # 3. Fold in batch 5.
 6 | # 4. Train on batches 1-5 for about 150 more epochs, until the batch 5 error is near the errors for batches 1-4. It takes forever to actually get there but after 150 epochs it's close enough.
 7 | # 5. Lower learning rates (epsW) by a factor of 10 to 0.0001, train for 10 more epochs.
 8 | # 6. Lower learning rates (epsW) by another factor of 10 to 0.00001, train for 10 more epochs.
 9 | # 7. Stop. Test on batch 6 with --test-range=6 --multiview-test=1 --logreg-name=logprob (read more about what this does here: http://code.google.com/p/cuda-convnet/wiki/TrainingNet#Training_on_image_translations )
10 | 
11 | # More details about methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
12 | 
13 | [conv1]
14 | epsW=0.001
15 | epsB=0.002
16 | momW=0.9
17 | momB=0.9
18 | wc=0.000
19 | 
20 | [conv2]
21 | epsW=0.001
22 | epsB=0.002
23 | momW=0.9
24 | momB=0.9
25 | wc=0.000
26 | 
27 | [local3]
28 | epsW=0.001
29 | epsB=0.002
30 | momW=0.9
31 | momB=0.9
32 | wc=0.004
33 | 
34 | [local4]
35 | epsW=0.001
36 | epsB=0.002
37 | momW=0.9
38 | momB=0.9
39 | wc=0.004
40 | 
41 | [fc10]
42 | epsW=0.001
43 | epsB=0.002
44 | momW=0.9
45 | momB=0.9
46 | wc=0.01
47 | 
48 | [logprob]
49 | coeff=1
50 | 
51 | [rnorm1]
52 | scale=0.001
53 | pow=0.75
54 | 
55 | [rnorm2]
56 | scale=0.001
57 | pow=0.75
58 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-conv-local-13pct.cfg:
--------------------------------------------------------------------------------
 1 | # 13% error on CIFAR-10 - layer parameter file 
 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
 3 | 
 4 | [conv1]
 5 | epsW=0.001
 6 | epsB=0.002
 7 | momW=0.9
 8 | momB=0.9
 9 | wc=0.00
10 | 
11 | [conv2]
12 | epsW=0.001
13 | epsB=0.002
14 | momW=0.9
15 | momB=0.9
16 | wc=0.00
17 | 
18 | [local3]
19 | epsW=0.001
20 | epsB=0.002
21 | momW=0.9
22 | momB=0.9
23 | wc=0.004
24 | 
25 | [local4]
26 | epsW=0.001
27 | epsB=0.002
28 | momW=0.9
29 | momB=0.9
30 | wc=0.004
31 | 
32 | [fc10]
33 | epsW=0.001
34 | epsB=0.002
35 | momW=0.9
36 | momB=0.9
37 | wc=0.004
38 | 
39 | [logprob]
40 | coeff=1
41 | 


--------------------------------------------------------------------------------
/example-layers/layer-params-example.cfg:
--------------------------------------------------------------------------------
 1 | [conv32]
 2 | epsW=0.001
 3 | epsB=0.002
 4 | momW=0.9
 5 | momB=0.9
 6 | wc=0
 7 | 
 8 | [local32]
 9 | epsW=0.001
10 | epsB=0.002
11 | momW=0.9
12 | momB=0.9
13 | wc=0
14 | 
15 | [fc1024]
16 | momW=0.9
17 | momB=0.9
18 | epsW=0.00001
19 | epsB=0.00002
20 | wc=0
21 | 
22 | [conv32-2]
23 | epsW=0.001
24 | epsB=0.002
25 | momW=0.9
26 | momB=0.9
27 | wc=0
28 | 
29 | [conv32-3]
30 | epsW=0.001
31 | epsB=0.002
32 | momW=0.9
33 | momB=0.9
34 | wc=0
35 | 
36 | [fc10]
37 | epsW=0.0001,0.001
38 | epsB=0.002
39 | momW=0.5,0.9
40 | momB=0.9
41 | wc=0,0
42 | 
43 | [logprob]
44 | coeff=1
45 | 


--------------------------------------------------------------------------------
/example-layers/layer-params.gc.cfg:
--------------------------------------------------------------------------------
 1 | [conv32]
 2 | epsW=0.001
 3 | epsB=0.002
 4 | momW=0.9
 5 | momB=0.9
 6 | wc=0
 7 | 
 8 | [local32]
 9 | epsW=0.001
10 | epsB=0.002
11 | momW=0.9
12 | momB=0.9
13 | wc=0
14 | 
15 | [fc10]
16 | wc=0,0
17 | momB=0
18 | momW=0,0
19 | epsW=0.00001,0.00001
20 | epsB=0.00002
21 | 
22 | [logprob]
23 | coeff=1
24 | 


--------------------------------------------------------------------------------
/example-layers/layers-18pct.cfg:
--------------------------------------------------------------------------------
  1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 
  2 | 
  3 | [data]
  4 | type=data
  5 | dataIdx=0
  6 | 
  7 | [labels]
  8 | type=data
  9 | dataIdx=1
 10 | 
 11 | [conv1]
 12 | type=conv
 13 | inputs=data
 14 | channels=3
 15 | filters=32
 16 | padding=2
 17 | stride=1
 18 | filterSize=5
 19 | initW=0.0001
 20 | partialSum=4
 21 | sharedBiases=1
 22 | 
 23 | [pool1]
 24 | type=pool
 25 | pool=max
 26 | inputs=conv1
 27 | start=0
 28 | sizeX=3
 29 | stride=2
 30 | outputsX=0
 31 | channels=32
 32 | neuron=relu
 33 | 
 34 | [rnorm1]
 35 | type=rnorm
 36 | inputs=pool1
 37 | channels=32
 38 | size=3
 39 | 
 40 | [conv2]
 41 | type=conv
 42 | inputs=rnorm1
 43 | filters=32
 44 | padding=2
 45 | stride=1
 46 | filterSize=5
 47 | channels=32
 48 | neuron=relu
 49 | initW=0.01
 50 | partialSum=4
 51 | sharedBiases=1
 52 | 
 53 | [pool2]
 54 | type=pool
 55 | pool=avg
 56 | inputs=conv2
 57 | start=0
 58 | sizeX=3
 59 | stride=2
 60 | outputsX=0
 61 | channels=32
 62 | 
 63 | [rnorm2]
 64 | type=rnorm
 65 | inputs=pool2
 66 | channels=32
 67 | size=3
 68 | 
 69 | [conv3]
 70 | type=conv
 71 | inputs=rnorm2
 72 | filters=64
 73 | padding=2
 74 | stride=1
 75 | filterSize=5
 76 | channels=32
 77 | neuron=relu
 78 | initW=0.01
 79 | partialSum=4
 80 | sharedBiases=1
 81 | 
 82 | [pool3]
 83 | type=pool
 84 | pool=avg
 85 | inputs=conv3
 86 | start=0
 87 | sizeX=3
 88 | stride=2
 89 | outputsX=0
 90 | channels=64
 91 | 
 92 | [fc10]
 93 | type=fc
 94 | outputs=10
 95 | inputs=pool3
 96 | initW=0.01
 97 | 
 98 | [probs]
 99 | type=softmax
100 | inputs=fc10
101 | 
102 | [logprob]
103 | type=cost.logreg
104 | inputs=labels,probs
105 | 


--------------------------------------------------------------------------------
/example-layers/layers-19pct.cfg:
--------------------------------------------------------------------------------
 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file 
 2 | 
 3 | [data]
 4 | type=data
 5 | dataIdx=0
 6 | 
 7 | [labels]
 8 | type=data
 9 | dataIdx=1
10 | 
11 | [conv1]
12 | type=conv
13 | inputs=data
14 | channels=3
15 | filters=32
16 | padding=2
17 | stride=1
18 | filterSize=5
19 | initW=0.0001
20 | partialSum=1
21 | sharedBiases=1
22 | 
23 | [pool1]
24 | type=pool
25 | pool=max
26 | inputs=conv1
27 | start=0
28 | sizeX=3
29 | stride=2
30 | outputsX=0
31 | channels=32
32 | neuron=relu
33 | 
34 | [conv2]
35 | type=conv
36 | inputs=pool1
37 | filters=32
38 | padding=2
39 | stride=1
40 | filterSize=5
41 | channels=32
42 | neuron=relu
43 | initW=0.01
44 | partialSum=1
45 | sharedBiases=1
46 | 
47 | [pool2]
48 | type=pool
49 | pool=avg
50 | inputs=conv2
51 | start=0
52 | sizeX=3
53 | stride=2
54 | outputsX=0
55 | channels=32
56 | 
57 | [conv3]
58 | type=conv
59 | inputs=pool2
60 | filters=64
61 | padding=2
62 | stride=1
63 | filterSize=5
64 | channels=32
65 | neuron=relu
66 | initW=0.01
67 | partialSum=1
68 | sharedBiases=1
69 | 
70 | [pool3]
71 | type=pool
72 | pool=avg
73 | inputs=conv3
74 | start=0
75 | sizeX=3
76 | stride=2
77 | outputsX=0
78 | channels=64
79 | 
80 | [fc10]
81 | type=fc
82 | outputs=10
83 | inputs=pool3
84 | initW=0.01
85 | 
86 | [probs]
87 | type=softmax
88 | inputs=fc10
89 | 
90 | [logprob]
91 | type=cost.logreg
92 | inputs=labels,probs
93 | 


--------------------------------------------------------------------------------
/example-layers/layers-80sec.cfg:
--------------------------------------------------------------------------------
  1 | # 26% error on CIFAR-10 in 80 seconds - layer definition file 
  2 | 
  3 | [data]
  4 | type=data
  5 | dataIdx=0
  6 | 
  7 | [labels]
  8 | type=data
  9 | dataIdx=1
 10 | 
 11 | [conv1]
 12 | type=conv
 13 | inputs=data
 14 | channels=3
 15 | filters=32
 16 | padding=2
 17 | stride=1
 18 | filterSize=5
 19 | initW=0.0001
 20 | partialSum=4
 21 | sharedBiases=1
 22 | 
 23 | [pool1]
 24 | type=pool
 25 | pool=max
 26 | inputs=conv1
 27 | start=0
 28 | sizeX=3
 29 | stride=2
 30 | outputsX=0
 31 | channels=32
 32 | neuron=relu
 33 | 
 34 | [conv2]
 35 | type=conv
 36 | inputs=pool1
 37 | filters=32
 38 | padding=2
 39 | stride=1
 40 | filterSize=5
 41 | channels=32
 42 | neuron=relu
 43 | initW=0.01
 44 | partialSum=4
 45 | sharedBiases=1
 46 | 
 47 | [pool2]
 48 | type=pool
 49 | pool=avg
 50 | inputs=conv2
 51 | start=0
 52 | sizeX=3
 53 | stride=2
 54 | outputsX=0
 55 | channels=32
 56 | 
 57 | [conv3]
 58 | type=conv
 59 | inputs=pool2
 60 | filters=64
 61 | padding=2
 62 | stride=1
 63 | filterSize=5
 64 | channels=32
 65 | neuron=relu
 66 | initW=0.01
 67 | partialSum=4
 68 | sharedBiases=1
 69 | 
 70 | [pool3]
 71 | type=pool
 72 | pool=avg
 73 | inputs=conv3
 74 | start=0
 75 | sizeX=3
 76 | stride=2
 77 | outputsX=0
 78 | channels=64
 79 | 
 80 | [fc64]
 81 | type=fc
 82 | outputs=64
 83 | inputs=pool3
 84 | initW=0.1
 85 | neuron=relu
 86 | 
 87 | [fc10]
 88 | type=fc
 89 | outputs=10
 90 | inputs=fc64
 91 | initW=0.1
 92 | 
 93 | [probs]
 94 | type=softmax
 95 | inputs=fc10
 96 | 
 97 | [logprob]
 98 | type=cost.logreg
 99 | inputs=labels,probs
100 | 


--------------------------------------------------------------------------------
/example-layers/layers-conv-local-11pct.cfg:
--------------------------------------------------------------------------------
  1 | [data]
  2 | type=data
  3 | dataIdx=0
  4 | 
  5 | [labels]
  6 | type=data
  7 | dataIdx=1
  8 | 
  9 | [conv1]
 10 | type=conv
 11 | inputs=data
 12 | channels=3
 13 | filters=64
 14 | padding=2
 15 | stride=1
 16 | filterSize=5
 17 | neuron=relu
 18 | initW=0.0001
 19 | partialSum=4
 20 | sharedBiases=1
 21 | 
 22 | [pool1]
 23 | type=pool
 24 | pool=max
 25 | inputs=conv1
 26 | start=0
 27 | sizeX=3
 28 | stride=2
 29 | outputsX=0
 30 | channels=64
 31 | 
 32 | [rnorm1]
 33 | type=cmrnorm
 34 | inputs=pool1
 35 | channels=64
 36 | size=9
 37 | 
 38 | [conv2]
 39 | type=conv
 40 | inputs=rnorm1
 41 | filters=64
 42 | padding=2
 43 | stride=1
 44 | filterSize=5
 45 | channels=64
 46 | neuron=relu
 47 | initW=0.01
 48 | partialSum=8
 49 | sharedBiases=1
 50 | 
 51 | [rnorm2]
 52 | type=cmrnorm
 53 | inputs=conv2
 54 | channels=64
 55 | size=9
 56 | 
 57 | [pool2]
 58 | type=pool
 59 | pool=max
 60 | inputs=rnorm2
 61 | start=0
 62 | sizeX=3
 63 | stride=2
 64 | outputsX=0
 65 | channels=64
 66 | 
 67 | [local3]
 68 | type=local
 69 | inputs=pool2
 70 | filters=64
 71 | padding=1
 72 | stride=1
 73 | filterSize=3
 74 | channels=64
 75 | neuron=relu
 76 | initW=0.04
 77 | 
 78 | [local4]
 79 | type=local
 80 | inputs=local3
 81 | filters=32
 82 | padding=1
 83 | stride=1
 84 | filterSize=3
 85 | channels=64
 86 | neuron=relu
 87 | initW=0.04
 88 | 
 89 | [fc10]
 90 | type=fc
 91 | outputs=10
 92 | inputs=local4
 93 | initW=0.01
 94 | 
 95 | [probs]
 96 | type=softmax
 97 | inputs=fc10
 98 | 
 99 | [logprob]
100 | type=cost.logreg
101 | inputs=labels,probs
102 | 


--------------------------------------------------------------------------------
/example-layers/layers-conv-local-13pct.cfg:
--------------------------------------------------------------------------------
 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file 
 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology
 3 | 
 4 | [data]
 5 | type=data
 6 | dataIdx=0
 7 | 
 8 | [labels]
 9 | type=data
10 | dataIdx=1
11 | 
12 | [conv1]
13 | type=conv
14 | inputs=data
15 | channels=3
16 | filters=64
17 | padding=2
18 | stride=1
19 | filterSize=5
20 | neuron=relu
21 | initW=0.0001
22 | partialSum=4
23 | sharedBiases=1
24 | 
25 | [pool1]
26 | type=pool
27 | pool=max
28 | inputs=conv1
29 | start=0
30 | sizeX=3
31 | stride=2
32 | outputsX=0
33 | channels=64
34 | 
35 | [conv2]
36 | type=conv
37 | inputs=pool1
38 | filters=64
39 | padding=2
40 | stride=1
41 | filterSize=5
42 | channels=64
43 | neuron=relu
44 | initW=0.01
45 | partialSum=8
46 | sharedBiases=1
47 | 
48 | [pool2]
49 | type=pool
50 | pool=max
51 | inputs=conv2
52 | start=0
53 | sizeX=3
54 | stride=2
55 | outputsX=0
56 | channels=64
57 | 
58 | [local3]
59 | type=local
60 | inputs=pool2
61 | filters=32
62 | padding=1
63 | stride=1
64 | filterSize=3
65 | channels=64
66 | neuron=relu
67 | initW=0.04
68 | 
69 | [local4]
70 | type=local
71 | inputs=local3
72 | filters=32
73 | padding=1
74 | stride=1
75 | filterSize=3
76 | channels=32
77 | neuron=relu
78 | initW=0.04
79 | 
80 | [fc10]
81 | type=fc
82 | outputs=10
83 | inputs=local4
84 | initW=0.01
85 | neuron=ident
86 | 
87 | [probs]
88 | type=softmax
89 | inputs=fc10
90 | 
91 | [logprob]
92 | type=cost.logreg
93 | inputs=labels,probs
94 | 


--------------------------------------------------------------------------------
/example-layers/layers-example.cfg:
--------------------------------------------------------------------------------
  1 | # This is a layer configuration file that contains all the 
  2 | # layer types supported by this code. It's not actually good for anything
  3 | # other than demonstrating how layers are specified and connected to one another.
  4 | 
  5 | # Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580.
  6 | # But there's no particular reason to run the net specified by this file. It's not actually good.
  7 | 
  8 | [data]
  9 | type=data
 10 | dataIdx=0
 11 | 
 12 | [labels]
 13 | type=data
 14 | dataIdx=1
 15 | 
 16 | [conv32]
 17 | type=conv
 18 | inputs=data
 19 | channels=3
 20 | filters=32
 21 | padding=4
 22 | stride=1
 23 | filterSize=9
 24 | neuron=logistic
 25 | initW=0.00001
 26 | partialSum=1
 27 | sharedBiases=true
 28 | 
 29 | [local32]
 30 | type=local
 31 | inputs=conv32
 32 | channels=32
 33 | filters=32
 34 | padding=4
 35 | stride=1
 36 | filterSize=9
 37 | neuron=logistic
 38 | initW=0.00001
 39 | 
 40 | [fc1024]
 41 | type=fc
 42 | outputs=1024
 43 | inputs=data
 44 | initW=0.001
 45 | neuron=relu
 46 | 
 47 | [maxpool]
 48 | type=pool
 49 | pool=max
 50 | inputs=local32
 51 | start=0
 52 | sizeX=4
 53 | stride=2
 54 | outputsX=0
 55 | channels=32
 56 | 
 57 | [rnorm1]
 58 | type=rnorm
 59 | inputs=maxpool
 60 | channels=32
 61 | sizeX=5
 62 | scale=0.0000125
 63 | pow=0.75
 64 | 
 65 | [cnorm1]
 66 | type=cnorm
 67 | inputs=rnorm1
 68 | channels=32
 69 | sizeX=7
 70 | scale=0.001
 71 | pow=0.5
 72 | 
 73 | [conv32-2]
 74 | type=conv
 75 | inputs=cnorm1
 76 | groups=4
 77 | channels=32
 78 | filters=32
 79 | padding=2
 80 | stride=1
 81 | filterSize=5
 82 | neuron=relu
 83 | initW=0.0001
 84 | partialSum=1
 85 | sharedBiases=false
 86 | 
 87 | [conv32-3]
 88 | type=conv
 89 | inputs=conv32-2
 90 | groups=4
 91 | channels=128
 92 | filters=32
 93 | padding=2
 94 | stride=2
 95 | filterSize=5
 96 | neuron=relu
 97 | initW=0.0001
 98 | partialSum=1
 99 | randSparse=true
100 | filterChannels=64
101 | 
102 | [fc10]
103 | type=fc
104 | outputs=10
105 | inputs=conv32-3,fc1024
106 | initW=0.0001,0.0001
107 | neuron=ident
108 | 
109 | [probs]
110 | type=softmax
111 | inputs=fc10
112 | 
113 | [logprob]
114 | type=cost.logreg
115 | inputs=labels,probs
116 | 


--------------------------------------------------------------------------------
/example-layers/layers.gc.cfg:
--------------------------------------------------------------------------------
 1 | [data]
 2 | type=data
 3 | dataIdx=0
 4 | 
 5 | [labels]
 6 | type=data
 7 | dataIdx=1
 8 | 
 9 | [conv32]
10 | type=conv
11 | inputs=data
12 | filters=16
13 | padding=0
14 | stride=1
15 | filterSize=3
16 | channels=3
17 | neuron=linear[3,2.2]
18 | initW=0.8
19 | partialSum=1
20 | sharedBiases=true
21 | 
22 | [avgpool]
23 | type=pool
24 | pool=avg
25 | inputs=conv32
26 | start=-2
27 | sizeX=4
28 | stride=4
29 | outputsX=0
30 | channels=16
31 | 
32 | [local32]
33 | type=local
34 | inputs=avgpool
35 | filters=32
36 | padding=2
37 | stride=3
38 | filterSize=5
39 | channels=16
40 | neuron=tanh[1.79,-0.66]
41 | initW=0.4
42 | #partialSum=1
43 | #sharedBiases=true
44 | groups=2
45 | randSparse=true
46 | 
47 | [fc10]
48 | type=fc
49 | outputs=10
50 | inputs=local32,conv32
51 | initW=0.8,0.008
52 | 
53 | [probs]
54 | type=softmax
55 | inputs=fc10
56 | 
57 | [logprob]
58 | type=cost.logreg
59 | inputs=labels,probs
60 | 


--------------------------------------------------------------------------------
/gpumodel.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | # 
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import numpy as n
 26 | import os
 27 | from time import time, asctime, localtime, strftime
 28 | from numpy.random import randn, rand
 29 | from numpy import s_, dot, tile, zeros, ones, zeros_like, array, ones_like
 30 | from util import *
 31 | from data import *
 32 | from options import *
 33 | from math import ceil, floor, sqrt
 34 | from data import DataProvider, dp_types
 35 | import sys
 36 | import shutil
 37 | import platform
 38 | from os import linesep as NL
 39 | 
 40 | class ModelStateException(Exception):
 41 |     pass
 42 | 
 43 | # GPU Model interface
 44 | class IGPUModel:
 45 |     def __init__(self, model_name, op, load_dic, filename_options=None, dp_params={}):
 46 |         # these are input parameters
 47 |         self.model_name = model_name
 48 |         self.op = op
 49 |         self.options = op.options
 50 |         self.load_dic = load_dic
 51 |         self.filename_options = filename_options
 52 |         self.dp_params = dp_params
 53 |         self.get_gpus()
 54 |         self.fill_excused_options()
 55 |         #assert self.op.all_values_given()
 56 |         
 57 |         for o in op.get_options_list():
 58 |             setattr(self, o.name, o.value)
 59 | 
 60 |         # these are things that the model must remember but they're not input parameters
 61 |         if load_dic:
 62 |             self.model_state = load_dic["model_state"]
 63 |             self.save_file = self.options["load_file"].value
 64 |             if not os.path.isdir(self.save_file):
 65 |                 self.save_file = os.path.dirname(self.save_file)
 66 |         else:
 67 |             self.model_state = {}
 68 |             if filename_options is not None:
 69 |                 self.save_file = model_name + "_" + '_'.join(['%s_%s' % (char, self.options[opt].get_str_value()) for opt, char in filename_options]) + '_' + strftime('%Y-%m-%d_%H.%M.%S')
 70 |             self.model_state["train_outputs"] = []
 71 |             self.model_state["test_outputs"] = []
 72 |             self.model_state["epoch"] = 1
 73 |             self.model_state["batchnum"] = self.train_batch_range[0]
 74 | 
 75 |         self.init_data_providers()
 76 |         if load_dic: 
 77 |             self.train_data_provider.advance_batch()
 78 |             
 79 |         # model state often requries knowledge of data provider, so it's initialized after
 80 |         try:
 81 |             self.init_model_state()
 82 |         except ModelStateException, e:
 83 |             print e
 84 |             sys.exit(1)
 85 |         for var, val in self.model_state.iteritems():
 86 |             setattr(self, var, val)
 87 |             
 88 |         self.import_model()
 89 |         self.init_model_lib()
 90 |         
 91 |     def import_model(self):
 92 |         print "========================="
 93 |         print "Importing %s C++ module" % ('_' + self.model_name)
 94 |         self.libmodel = __import__('_' + self.model_name) 
 95 |                    
 96 |     def fill_excused_options(self):
 97 |         pass
 98 |     
 99 |     def init_data_providers(self):
100 |         self.dp_params['convnet'] = self
101 |         try:
102 |             self.test_data_provider = DataProvider.get_instance(self.data_path, self.test_batch_range,
103 |                                                                 type=self.dp_type, dp_params=self.dp_params, test=True)
104 |             self.train_data_provider = DataProvider.get_instance(self.data_path, self.train_batch_range,
105 |                                                                      self.model_state["epoch"], self.model_state["batchnum"],
106 |                                                                      type=self.dp_type, dp_params=self.dp_params, test=False)
107 |         except DataProviderException, e:
108 |             print "Unable to create data provider: %s" % e
109 |             self.print_data_providers()
110 |             sys.exit()
111 |         
112 |     def init_model_state(self):
113 |         pass
114 |        
115 |     def init_model_lib(self):
116 |         pass
117 |     
118 |     def start(self):
119 |         if self.test_only:
120 |             self.test_outputs += [self.get_test_error()]
121 |             self.print_test_results()
122 |             sys.exit(0)
123 |         self.train()
124 |     
125 |     def train(self):
126 |         print "========================="
127 |         print "Training %s" % self.model_name
128 |         self.op.print_values()
129 |         print "========================="
130 |         self.print_model_state()
131 |         print "Running on CUDA device(s) %s" % ", ".join("%d" % d for d in self.device_ids)
132 |         print "Current time: %s" % asctime(localtime())
133 |         print "Saving checkpoints to %s" % os.path.join(self.save_path, self.save_file)
134 |         print "========================="
135 |         next_data = self.get_next_batch()
136 |         while self.epoch <= self.num_epochs:
137 |             data = next_data
138 |             self.epoch, self.batchnum = data[0], data[1]
139 |             self.print_iteration()
140 |             sys.stdout.flush()
141 |             
142 |             compute_time_py = time()
143 |             self.start_batch(data)
144 |             
145 |             # load the next batch while the current one is computing
146 |             next_data = self.get_next_batch()
147 |             
148 |             batch_output = self.finish_batch()
149 |             self.train_outputs += [batch_output]
150 |             self.print_train_results()
151 | 
152 |             if self.get_num_batches_done() % self.testing_freq == 0:
153 |                 self.sync_with_host()
154 |                 self.test_outputs += [self.get_test_error()]
155 |                 self.print_test_results()
156 |                 self.print_test_status()
157 |                 self.conditional_save()
158 |             
159 |             self.print_train_time(time() - compute_time_py)
160 |         self.cleanup()
161 |     
162 |     def cleanup(self):
163 |         sys.exit(0)
164 |         
165 |     def sync_with_host(self):
166 |         self.libmodel.syncWithHost()
167 |             
168 |     def print_model_state(self):
169 |         pass
170 |     
171 |     def get_num_batches_done(self):
172 |         return len(self.train_batch_range) * (self.epoch - 1) + self.batchnum - self.train_batch_range[0] + 1
173 |     
174 |     def get_next_batch(self, train=True):
175 |         dp = self.train_data_provider
176 |         if not train:
177 |             dp = self.test_data_provider
178 |         return self.parse_batch_data(dp.get_next_batch(), train=train)
179 |     
180 |     def parse_batch_data(self, batch_data, train=True):
181 |         return batch_data[0], batch_data[1], batch_data[2]['data']
182 |     
183 |     def start_batch(self, batch_data, train=True):
184 |         self.libmodel.startBatch(batch_data[2], not train)
185 |     
186 |     def finish_batch(self):
187 |         return self.libmodel.finishBatch()
188 |     
189 |     def print_iteration(self):
190 |         print "\t%d.%d..." % (self.epoch, self.batchnum),
191 |     
192 |     def print_train_time(self, compute_time_py):
193 |         print "(%.3f sec)" % (compute_time_py)
194 |     
195 |     def print_train_results(self):
196 |         batch_error = self.train_outputs[-1][0]
197 |         if not (batch_error > 0 and batch_error < 2e20):
198 |             print "Crazy train error: %.6f" % batch_error
199 |             self.cleanup()
200 | 
201 |         print "Train error: %.6f " % (batch_error),
202 | 
203 |     def print_test_results(self):
204 |         batch_error = self.test_outputs[-1][0]
205 |         print "%s\t\tTest error: %.6f" % (NL, batch_error),
206 | 
207 |     def print_test_status(self):
208 |         status = (len(self.test_outputs) == 1 or self.test_outputs[-1][0] < self.test_outputs[-2][0]) and "ok" or "WORSE"
209 |         print status,
210 |         
211 |     def conditional_save(self):
212 |         batch_error = self.test_outputs[-1][0]
213 |         if batch_error > 0 and batch_error < self.max_test_err:
214 |             self.save_state()
215 |         else:
216 |             print "\tTest error > %g, not saving." % self.max_test_err,
217 |     
218 |     def aggregate_test_outputs(self, test_outputs):
219 |         test_error = tuple([sum(t[r] for t in test_outputs) / (1 if self.test_one else len(self.test_batch_range)) for r in range(len(test_outputs[-1]))])
220 |         return test_error
221 |     
222 |     def get_test_error(self):
223 |         next_data = self.get_next_batch(train=False)
224 |         test_outputs = []
225 |         while True:
226 |             data = next_data
227 |             self.start_batch(data, train=False)
228 |             load_next = not self.test_one and data[1] < self.test_batch_range[-1]
229 |             if load_next: # load next batch
230 |                 next_data = self.get_next_batch(train=False)
231 |             test_outputs += [self.finish_batch()]
232 |             if self.test_only: # Print the individual batch results for safety
233 |                 print "batch %d: %s" % (data[1], str(test_outputs[-1]))
234 |             if not load_next:
235 |                 break
236 |             sys.stdout.flush()
237 |             
238 |         return self.aggregate_test_outputs(test_outputs)
239 |     
240 |     def set_var(self, var_name, var_val):
241 |         setattr(self, var_name, var_val)
242 |         self.model_state[var_name] = var_val
243 |         return var_val
244 |         
245 |     def get_var(self, var_name):
246 |         return self.model_state[var_name]
247 |         
248 |     def has_var(self, var_name):
249 |         return var_name in self.model_state
250 |         
251 |     def save_state(self):
252 |         for att in self.model_state:
253 |             if hasattr(self, att):
254 |                 self.model_state[att] = getattr(self, att)
255 |         
256 |         dic = {"model_state": self.model_state,
257 |                "op": self.op}
258 |             
259 |         checkpoint_dir = os.path.join(self.save_path, self.save_file)
260 |         checkpoint_file = "%d.%d" % (self.epoch, self.batchnum)
261 |         checkpoint_file_full_path = os.path.join(checkpoint_dir, checkpoint_file)
262 |         if not os.path.exists(checkpoint_dir):
263 |             os.makedirs(checkpoint_dir)
264 |     
265 |         pickle(checkpoint_file_full_path, dic,compress=self.zip_save)
266 |         
267 |         for f in sorted(os.listdir(checkpoint_dir), key=alphanum_key):
268 |             if sum(os.path.getsize(os.path.join(checkpoint_dir, f2)) for f2 in os.listdir(checkpoint_dir)) > self.max_filesize_mb*1024*1024 and f != checkpoint_file:
269 |                 os.remove(os.path.join(checkpoint_dir, f))
270 |             else:
271 |                 break
272 |             
273 |     @staticmethod
274 |     def load_checkpoint(load_dir):
275 |         if os.path.isdir(load_dir):
276 |             return unpickle(os.path.join(load_dir, sorted(os.listdir(load_dir), key=alphanum_key)[-1]))
277 |         return unpickle(load_dir)
278 | 
279 |     @staticmethod
280 |     def get_options_parser():
281 |         op = OptionsParser()
282 |         op.add_option("f", "load_file", StringOptionParser, "Load file", default="", excuses=OptionsParser.EXCLUDE_ALL)
283 |         op.add_option("train-range", "train_batch_range", RangeOptionParser, "Data batch range: training")
284 |         op.add_option("test-range", "test_batch_range", RangeOptionParser, "Data batch range: testing")
285 |         op.add_option("data-provider", "dp_type", StringOptionParser, "Data provider", default="default")
286 |         op.add_option("test-freq", "testing_freq", IntegerOptionParser, "Testing frequency", default=25)
287 |         op.add_option("epochs", "num_epochs", IntegerOptionParser, "Number of epochs", default=500)
288 |         op.add_option("data-path", "data_path", StringOptionParser, "Data path")
289 |         op.add_option("save-path", "save_path", StringOptionParser, "Save path")
290 |         op.add_option("max-filesize", "max_filesize_mb", IntegerOptionParser, "Maximum save file size (MB)", default=5000)
291 |         op.add_option("max-test-err", "max_test_err", FloatOptionParser, "Maximum test error for saving")
292 |         op.add_option("num-gpus", "num_gpus", IntegerOptionParser, "Number of GPUs", default=1)
293 |         op.add_option("test-only", "test_only", BooleanOptionParser, "Test and quit?", default=0)
294 |         op.add_option("zip-save", "zip_save", BooleanOptionParser, "Compress checkpoints?", default=0)
295 |         op.add_option("test-one", "test_one", BooleanOptionParser, "Test on one batch at a time?", default=1)
296 |         op.add_option("gpu", "gpu", ListOptionParser(IntegerOptionParser), "GPU override", default=OptionExpression("[-1] * num_gpus"))
297 |         return op
298 | 
299 |     @staticmethod
300 |     def print_data_providers():
301 |         print "Available data providers:"
302 |         for dp, desc in dp_types.iteritems():
303 |             print "    %s: %s" % (dp, desc)
304 |             
305 |     def get_gpus(self):
306 |         self.device_ids = [get_gpu_lock(g) for g in self.op.get_value('gpu')]
307 |         if GPU_LOCK_NO_LOCK in self.device_ids:
308 |             print "Not enough free GPUs!"
309 |             sys.exit()
310 |         
311 |     @staticmethod
312 |     def parse_options(op):
313 |         try:
314 |             load_dic = None
315 |             options = op.parse()
316 |             if options["load_file"].value_given:
317 |                 load_dic = IGPUModel.load_checkpoint(options["load_file"].value)
318 |                 old_op = load_dic["op"]
319 |                 old_op.merge_from(op)
320 |                 op = old_op
321 |             op.eval_expr_defaults()
322 |             return op, load_dic
323 |         except OptionMissingException, e:
324 |             print e
325 |             op.print_usage()
326 |         except OptionException, e:
327 |             print e
328 |         except UnpickleError, e:
329 |             print "Error loading checkpoint:"
330 |             print e
331 |         sys.exit()
332 |         
333 | 


--------------------------------------------------------------------------------
/include/common/matrix.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef MATRIX_H_
 28 | #define MATRIX_H_
 29 | 
 30 | #include <matrix_funcs.h>
 31 | #ifdef NUMPY_INTERFACE
 32 | #include <Python.h>
 33 | #include <arrayobject.h>
 34 | #endif
 35 | #include <limits>
 36 | #include <assert.h>
 37 | #include <stdio.h>
 38 | #include <string.h>
 39 | #include <math.h>
 40 | 
 41 | #if defined(_WIN64) || defined(_WIN32)
 42 | #include <float.h>
 43 | #define isnan(_X) (_isnan(_X))
 44 | #define isinf(_X) (!_finite(_X)) 
 45 | #define uint unsigned int
 46 | double sqrt(int _X);
 47 | double log(int _X);
 48 | #endif
 49 | 
 50 | #ifdef USE_MKL
 51 | #include <mkl.h>
 52 | #include <mkl_cblas.h>
 53 | #include <mkl_vsl.h>
 54 | #include <mkl_vml.h>
 55 | 
 56 | #define IS_MKL true
 57 | 
 58 | #ifdef DOUBLE_PRECISION
 59 | #define MKL_UNIFORM vdRngUniform
 60 | #define MKL_NORMAL vdRngGaussian
 61 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_DUNIFORM_STD_ACCURATE
 62 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_DGAUSSIAN_BOXMULLER
 63 | #define MKL_EXP vdExp
 64 | #define MKL_RECIP vdInv
 65 | #define MKL_SQUARE vdSqr
 66 | #define MKL_TANH vdTanh
 67 | #define MKL_LOG vdLn
 68 | #define MKL_VECMUL vdMul
 69 | #define MKL_VECDIV vdDiv
 70 | #else
 71 | #define MKL_UNIFORM vsRngUniform
 72 | #define MKL_NORMAL vsRngGaussian
 73 | #define MKL_UNIFORM_RND_METHOD VSL_METHOD_SUNIFORM_STD_ACCURATE
 74 | #define MKL_GAUSSIAN_RND_METHOD VSL_METHOD_SGAUSSIAN_BOXMULLER
 75 | #define MKL_EXP vsExp
 76 | #define MKL_RECIP vsInv
 77 | #define MKL_SQUARE vsSqr
 78 | #define MKL_TANH vsTanh
 79 | #define MKL_LOG vsLn
 80 | #define MKL_VECMUL vsMul
 81 | #define MKL_VECDIV vsDiv
 82 | #endif /* DOUBLE_PRECISION */
 83 | 
 84 | #else
 85 | extern "C" {
 86 | #include <cblas.h>
 87 | }
 88 | #define IS_MKL false
 89 | #endif /* USE_MKL */
 90 | 
 91 | #ifdef DOUBLE_PRECISION
 92 | #define CBLAS_GEMM cblas_dgemm
 93 | #define CBLAS_SCAL cblas_dscal
 94 | #define CBLAS_AXPY cblas_daxpy
 95 | #else
 96 | #define CBLAS_GEMM cblas_sgemm
 97 | #define CBLAS_SCAL cblas_sscal
 98 | #define CBLAS_AXPY cblas_saxpy
 99 | #endif /* DOUBLE_PRECISION */
100 | 
101 | #define MTYPE_MAX numeric_limits<MTYPE>::max()
102 | 
103 | class Matrix {
104 | private:
105 |     MTYPE* _data;
106 |     bool _ownsData;
107 |     long int _numRows, _numCols;
108 |     long int _numElements;
109 |     CBLAS_TRANSPOSE _trans;
110 | 
111 |     void _init(MTYPE* data, long int numRows, long int numCols, bool transpose, bool ownsData);
112 |     void _tileTo2(Matrix& target) const;
113 |     void _copyAllTo(Matrix& target) const;
114 |     MTYPE _sum_column(long int col) const;
115 |     MTYPE _sum_row(long int row) const;
116 |     MTYPE _aggregate(MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
117 |     void _aggregate(long int axis, Matrix& target, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
118 |     MTYPE _aggregateRow(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
119 |     MTYPE _aggregateCol(long int row, MTYPE(*agg_func)(MTYPE, MTYPE), MTYPE initialValue) const;
120 |     void _updateDims(long int numRows, long int numCols);
121 |     void _applyLoop(MTYPE(*func)(MTYPE));
122 |     void _applyLoop(MTYPE (*func)(MTYPE), Matrix& target);
123 |     void _applyLoop2(const Matrix& a, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const;
124 |     void _applyLoop2(const Matrix& a, MTYPE (*func)(MTYPE,MTYPE, MTYPE), MTYPE scalar, Matrix& target) const;
125 |     void _applyLoopScalar(const MTYPE scalar, MTYPE(*func)(MTYPE, MTYPE), Matrix& target) const;
126 |     void _checkBounds(long int startRow, long int endRow, long int startCol, long int endCol) const;
127 |     void _divideByVector(const Matrix& vec, Matrix& target);
128 |     inline long int _getNumColsBackEnd() const {
129 |         return _trans == CblasNoTrans ? _numCols : _numRows;
130 |     }
131 | public:
132 |     enum FUNCTION {
133 |         TANH, RECIPROCAL, SQUARE, ABS, EXP, LOG, ZERO, ONE, LOGISTIC1, LOGISTIC2, SIGN
134 |     };
135 |     Matrix();
136 |     Matrix(long int numRows, long int numCols);
137 | #ifdef NUMPY_INTERFACE
138 |     Matrix(const PyArrayObject *src);
139 | #endif
140 |     Matrix(const Matrix &like);
141 |     Matrix(MTYPE* data, long int numRows, long int numCols);
142 |     Matrix(MTYPE* data, long int numRows, long int numCols, bool transpose);
143 |     ~Matrix();
144 | 
145 |     inline MTYPE& getCell(long int i, long int j) const {
146 |         assert(i >= 0 && i < _numRows);
147 |         assert(j >= 0 && j < _numCols);
148 |         if (_trans == CblasTrans) {
149 |             return _data[j * _numRows + i];
150 |         }
151 |         return _data[i * _numCols + j];
152 |     }
153 | 
154 |     MTYPE& operator()(long int i, long int j) const {
155 |         return getCell(i, j);
156 |     }
157 | 
158 |     inline MTYPE* getData() const {
159 |         return _data;
160 |     }
161 | 
162 |     inline bool isView() const {
163 |         return !_ownsData;
164 |     }
165 | 
166 |     inline long int getNumRows() const {
167 |         return _numRows;
168 |     }
169 | 
170 |     inline long int getNumCols() const {
171 |         return _numCols;
172 |     }
173 | 
174 |     inline long int getNumDataBytes() const {
175 |         return _numElements * sizeof(MTYPE);
176 |     }
177 | 
178 |     inline long int getNumElements() const {
179 |         return _numElements;
180 |     }
181 | 
182 |     inline long int getLeadingDim() const {
183 |         return _trans == CblasTrans ? _numRows : _numCols;
184 |     }
185 | 
186 |     inline long int getFollowingDim() const {
187 |         return _trans == CblasTrans ? _numCols : _numRows;
188 |     }
189 | 
190 |     inline CBLAS_TRANSPOSE getBLASTrans() const {
191 |         return _trans;
192 |     }
193 | 
194 |     inline bool isSameDims(const Matrix& a) const {
195 |         return a.getNumRows() == getNumRows() && a.getNumCols() == getNumCols();
196 |     }
197 | 
198 |     inline bool isTrans() const {
199 |         return _trans == CblasTrans;
200 |     }
201 | 
202 |     /*
203 |      * Only use if you know what you're doing!
204 |      * Does not update any dimensions. Just flips the _trans flag.
205 |      *
206 |      * Use transpose() if you want to get the transpose of this matrix.
207 |      */
208 |     inline void setTrans(bool trans) {
209 |         assert(isTrans() == trans || !isView());
210 |         _trans = trans ? CblasTrans : CblasNoTrans;
211 |     }
212 | 
213 |     void apply(FUNCTION f);
214 |     void apply(Matrix::FUNCTION f, Matrix& target);
215 |     void subtractFromScalar(MTYPE scalar);
216 |     void subtractFromScalar(MTYPE scalar, Matrix &target) const;
217 |     void biggerThanScalar(MTYPE scalar);
218 |     void smallerThanScalar(MTYPE scalar);
219 |     void equalsScalar(MTYPE scalar);
220 |     void biggerThanScalar(MTYPE scalar, Matrix& target) const;
221 |     void smallerThanScalar(MTYPE scalar, Matrix& target) const;
222 |     void equalsScalar(MTYPE scalar, Matrix& target) const;
223 |     void biggerThan(Matrix& a);
224 |     void biggerThan(Matrix& a, Matrix& target) const;
225 |     void smallerThan(Matrix& a);
226 |     void smallerThan(Matrix& a, Matrix& target) const;
227 |     void minWith(Matrix &a);
228 |     void minWith(Matrix &a, Matrix &target) const;
229 |     void maxWith(Matrix &a);
230 |     void maxWith(Matrix &a, Matrix &target) const;
231 |     void equals(Matrix& a);
232 |     void equals(Matrix& a, Matrix& target) const;
233 |     void notEquals(Matrix& a) ;
234 |     void notEquals(Matrix& a, Matrix& target) const;
235 |     void add(const Matrix &m);
236 |     void add(const Matrix &m, MTYPE scale);
237 |     void add(const Matrix &m, Matrix& target);
238 |     void add(const Matrix &m, MTYPE scale, Matrix& target);
239 |     void subtract(const Matrix &m);
240 |     void subtract(const Matrix &m, Matrix& target);
241 |     void subtract(const Matrix &m, MTYPE scale);
242 |     void subtract(const Matrix &m, MTYPE scale, Matrix& target);
243 |     void addVector(const Matrix& vec, MTYPE scale);
244 |     void addVector(const Matrix& vec, MTYPE scale, Matrix& target);
245 |     void addVector(const Matrix& vec);
246 |     void addVector(const Matrix& vec, Matrix& target);
247 |     void addScalar(MTYPE scalar);
248 |     void addScalar(MTYPE scalar, Matrix& target) const;
249 |     void maxWithScalar(MTYPE scalar);
250 |     void maxWithScalar(MTYPE scalar, Matrix &target) const;
251 |     void minWithScalar(MTYPE scalar);
252 |     void minWithScalar(MTYPE scalar, Matrix &target) const;
253 |     void eltWiseMultByVector(const Matrix& vec);
254 |     void eltWiseMultByVector(const Matrix& vec, Matrix& target);
255 |     void eltWiseDivideByVector(const Matrix& vec);
256 |     void eltWiseDivideByVector(const Matrix& vec, Matrix& target);
257 |     void resize(long int newNumRows, long int newNumCols);
258 |     void resize(const Matrix& like);
259 |     Matrix& slice(long int startRow, long int endRow, long int startCol, long int endCol) const;
260 |     void slice(long int startRow, long int endRow, long int startCol, long int endCol, Matrix &target) const;
261 |     Matrix& sliceRows(long int startRow, long int endRow) const;
262 |     void sliceRows(long int startRow, long int endRow, Matrix& target) const;
263 |     Matrix& sliceCols(long int startCol, long int endCol) const;
264 |     void sliceCols(long int startCol, long int endCol, Matrix& target) const;
265 |     void rightMult(const Matrix &b, MTYPE scale);
266 |     void rightMult(const Matrix &b, Matrix &target) const;
267 |     void rightMult(const Matrix &b);
268 |     void rightMult(const Matrix &b, MTYPE scaleAB, Matrix &target) const;
269 |     void addProduct(const Matrix &a, const Matrix &b, MTYPE scaleAB, MTYPE scaleThis);
270 |     void addProduct(const Matrix& a, const Matrix& b);
271 |     void eltWiseMult(const Matrix& a);
272 |     void eltWiseMult(const Matrix& a, Matrix& target) const;
273 |     void eltWiseDivide(const Matrix& a);
274 |     void eltWiseDivide(const Matrix& a, Matrix &target) const;
275 |     Matrix& transpose() const;
276 |     Matrix& transpose(bool hard) const;
277 |     Matrix& tile(long int timesY, long int timesX) const;
278 |     void tile(long int timesY, long int timesX, Matrix& target) const;
279 |     void copy(Matrix &dest, long int srcStartRow, long int srcEndRow, long int srcStartCol, long int srcEndCol, long int destStartRow, long int destStartCol) const;
280 |     Matrix& copy() const;
281 |     void copy(Matrix& target) const;
282 |     Matrix& sum(long int axis) const;
283 |     void sum(long int axis, Matrix &target) const;
284 |     MTYPE sum() const;
285 |     MTYPE max() const;
286 |     Matrix& max(long int axis) const;
287 |     void max(long int axis, Matrix& target) const;
288 |     MTYPE min() const;
289 |     Matrix& min(long int axis) const;
290 |     void min(long int axis, Matrix& target) const;
291 |     MTYPE norm() const;
292 |     MTYPE norm2() const;
293 |     void scale(MTYPE scale);
294 |     void scale(MTYPE alpha, Matrix& target);
295 |     void reshape(long int numRows, long int numCols);
296 |     Matrix& reshaped(long int numRows, long int numCols);
297 |     void printShape(const char* name) const;
298 |     bool hasNan() const;
299 |     bool hasInf() const;
300 | #ifdef USE_MKL
301 |     void randomizeNormal(VSLStreamStatePtr stream, MTYPE mean, MTYPE stdev);
302 |     void randomizeUniform(VSLStreamStatePtr stream);
303 |     void randomizeNormal(VSLStreamStatePtr stream);
304 | #else
305 |     void randomizeNormal(MTYPE mean, MTYPE stdev);
306 |     void randomizeUniform();
307 |     void randomizeNormal();
308 | #endif
309 |     void print() const;
310 |     void print(long int startRow,long int rows, long int startCol,long int cols) const;
311 |     void print(long int rows, long int cols) const;
312 | };
313 | 
314 | #endif /* MATRIX_H_ */
315 | 


--------------------------------------------------------------------------------
/include/common/matrix_funcs.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef MATRIX_FUNCS_H_
 28 | #define MATRIX_FUNCS_H_
 29 | 
 30 | #include <stdlib.h>
 31 | #include <math.h>
 32 | #include <algorithm>
 33 | 
 34 | #ifdef DOUBLE_PRECISION
 35 | #define MTYPE double
 36 | #else
 37 | #define MTYPE float
 38 | #endif
 39 | 
 40 | #define MYRAND ((double)rand() / ((double)RAND_MAX + 1))
 41 | 
 42 | inline MTYPE _zero(MTYPE x) {
 43 |     return 0;
 44 | }
 45 | 
 46 | inline MTYPE _one(MTYPE x) {
 47 |     return 1;
 48 | }
 49 | 
 50 | inline MTYPE _abs(MTYPE x) {
 51 |     return x > 0 ? x : -x;
 52 | }
 53 | 
 54 | inline MTYPE _square(MTYPE x) {
 55 |     return x * x;
 56 | }
 57 | 
 58 | inline MTYPE _sigma1(MTYPE x) {
 59 |     return (tanh(x / 2) + 1) / 2;
 60 | }
 61 | 
 62 | inline MTYPE _sigma2(MTYPE x) {
 63 |     return 1 / (1 + exp(-x));
 64 | }
 65 | 
 66 | inline MTYPE _recip(MTYPE x) {
 67 |     return 1 / x;
 68 | }
 69 | 
 70 | inline MTYPE _exp(MTYPE x) {
 71 |     return exp(x);
 72 | }
 73 | 
 74 | inline MTYPE _log(MTYPE x) {
 75 |     return log(x);
 76 | }
 77 | 
 78 | inline MTYPE _tanh(MTYPE x) {
 79 |     return tanh(x);
 80 | }
 81 | 
 82 | inline MTYPE _sign(MTYPE x) {
 83 |     return x > 0 ? 1 : -1;
 84 | }
 85 | 
 86 | inline MTYPE _rand(MTYPE x) {
 87 |     return MYRAND;
 88 | }
 89 | 
 90 | inline MTYPE _divide(MTYPE x, MTYPE y) {
 91 |     return x / y;
 92 | }
 93 | 
 94 | inline MTYPE _mult(MTYPE x, MTYPE y) {
 95 |     return x * y;
 96 | }
 97 | 
 98 | inline MTYPE _add(MTYPE x, MTYPE y) {
 99 |     return x + y;
100 | }
101 | 
102 | inline MTYPE _addSquare(MTYPE x, MTYPE y) {
103 |     return x*x + y;
104 | }
105 | 
106 | inline MTYPE _addWithScale(MTYPE x, MTYPE y, MTYPE scale) {
107 |     return x + scale*y;
108 | }
109 | 
110 | inline MTYPE _max(MTYPE x, MTYPE y) {
111 |     return std::max(x, y);
112 | }
113 | 
114 | inline MTYPE _min(MTYPE x, MTYPE y) {
115 |     return std::min(x, y);
116 | }
117 | 
118 | inline MTYPE _bigger(MTYPE x, MTYPE y) {
119 |     return x > y;
120 | }
121 | 
122 | inline MTYPE _smaller(MTYPE x, MTYPE y) {
123 |     return x < y;
124 | }
125 | 
126 | inline MTYPE _equal(MTYPE x, MTYPE y) {
127 |     return x == y;
128 | }
129 | 
130 | inline MTYPE _notEqual(MTYPE x, MTYPE y) {
131 |     return x != y;
132 | }
133 | 
134 | #endif /* MATRIX_FUNCS_H_ */
135 | 


--------------------------------------------------------------------------------
/include/common/queue.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef QUEUE_H_
 28 | #define QUEUE_H_
 29 | #include <pthread.h>
 30 | #include <stdlib.h>
 31 | 
 32 | /*
 33 |  * A thread-safe circular queue that automatically grows but never shrinks.
 34 |  */
 35 | template <class T>
 36 | class Queue {
 37 | private:
 38 |     T *_elements;
 39 |     int _numElements;
 40 |     int _head, _tail;
 41 |     int _maxSize;
 42 |     pthread_mutex_t *_queueMutex;
 43 |     pthread_cond_t *_queueCV;
 44 | 
 45 |     void _init(int initialSize) {
 46 |         _numElements = 0;
 47 |         _head = 0;
 48 |         _tail = 0;
 49 |         _maxSize = initialSize;
 50 |         _elements = new T[initialSize];
 51 |         _queueCV = (pthread_cond_t*)(malloc(sizeof (pthread_cond_t)));
 52 |         _queueMutex = (pthread_mutex_t*)(malloc(sizeof (pthread_mutex_t)));
 53 |         pthread_mutex_init(_queueMutex, NULL);
 54 |         pthread_cond_init(_queueCV, NULL);
 55 |     }
 56 | 
 57 |     void expand() {
 58 |         T *newStorage = new T[_maxSize * 2];
 59 |         memcpy(newStorage, _elements + _head, (_maxSize - _head) * sizeof(T));
 60 |         memcpy(newStorage + _maxSize - _head, _elements, _tail * sizeof(T));
 61 |         delete[] _elements;
 62 |         _elements = newStorage;
 63 |         _head = 0;
 64 |         _tail = _numElements;
 65 |         _maxSize *= 2;
 66 |     }
 67 | public:
 68 |     Queue(int initialSize) {
 69 |         _init(initialSize);
 70 |     }
 71 | 
 72 |     Queue()  {
 73 |         _init(1);
 74 |     }
 75 | 
 76 |     ~Queue() {
 77 |         pthread_mutex_destroy(_queueMutex);
 78 |         pthread_cond_destroy(_queueCV);
 79 |         delete[] _elements;
 80 |         free(_queueMutex);
 81 |         free(_queueCV);
 82 |     }
 83 | 
 84 |     void enqueue(T el) {
 85 |         pthread_mutex_lock(_queueMutex);
 86 |         if(_numElements == _maxSize) {
 87 |             expand();
 88 |         }
 89 |         _elements[_tail] = el;
 90 |         _tail = (_tail + 1) % _maxSize;
 91 |         _numElements++;
 92 | 
 93 |         pthread_cond_signal(_queueCV);
 94 |         pthread_mutex_unlock(_queueMutex);
 95 |     }
 96 | 
 97 |     /*
 98 |      * Blocks until not empty.
 99 |      */
100 |     T dequeue() {
101 |         pthread_mutex_lock(_queueMutex);
102 |         if(_numElements == 0) {
103 |             pthread_cond_wait(_queueCV, _queueMutex);
104 |         }
105 |         T el = _elements[_head];
106 |         _head = (_head + 1) % _maxSize;
107 |         _numElements--;
108 |         pthread_mutex_unlock(_queueMutex);
109 |         return el;
110 |     }
111 | 
112 |     /*
113 |      * Obviously this number can change by the time you actually look at it.
114 |      */
115 |     inline int getNumElements() const {
116 |         return _numElements;
117 |     }
118 | };
119 | 
120 | #endif /* QUEUE_H_ */
121 | 


--------------------------------------------------------------------------------
/include/common/thread.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef THREAD_H_
28 | #define THREAD_H_
29 | #include <pthread.h>
30 | #include <stdio.h>
31 | #include <errno.h>
32 | #include <assert.h>
33 | 
34 | /*
35 |  * Abstract joinable thread class.
36 |  * The only thing the implementer has to fill in is the run method.
37 |  */
38 | class Thread {
39 | private:
40 |     pthread_attr_t _pthread_attr;
41 |     pthread_t _threadID;
42 |     bool _joinable, _startable;
43 | 
44 |     static void* start_pthread_func(void *obj) {
45 |         void* retval = reinterpret_cast<Thread*>(obj)->run();
46 |         pthread_exit(retval);
47 |         return retval;
48 |     }
49 | protected:
50 |     virtual void* run() = 0;
51 | public:
52 |     Thread(bool joinable) : _joinable(joinable), _startable(true) {
53 |         pthread_attr_init(&_pthread_attr);
54 |         pthread_attr_setdetachstate(&_pthread_attr, joinable ? PTHREAD_CREATE_JOINABLE : PTHREAD_CREATE_DETACHED);
55 |     }
56 | 
57 |     virtual ~Thread() {
58 |     }
59 | 
60 |     pthread_t start() {
61 |         assert(_startable);
62 |         _startable = false;
63 |         int n;
64 |         if ((n = pthread_create(&_threadID, &_pthread_attr, &Thread::start_pthread_func, (void*)this))) {
65 |             errno = n;
66 |             perror("pthread_create error");
67 |         }
68 |         return _threadID;
69 |     }
70 | 
71 |     void join(void **status) {
72 |         assert(_joinable);
73 |         int n;
74 |         if((n = pthread_join(_threadID, status))) {
75 |             errno = n;
76 |             perror("pthread_join error");
77 |         }
78 |     }
79 | 
80 |     void join() {
81 |         join(NULL);
82 |     }
83 | 
84 |     pthread_t getThreadID() const {
85 |         return _threadID;
86 |     }
87 | };
88 | 
89 | #endif /* THREAD_H_ */
90 | 


--------------------------------------------------------------------------------
/include/convnet.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef CONVNET3
28 | #define	CONVNET3
29 | 
30 | #include <vector>
31 | #include <string>
32 | #include <cutil_inline.h>
33 | #include <time.h>
34 | #include <queue.h>
35 | #include <thread.h>
36 | #include <math.h>
37 | 
38 | #include "layer.cuh"
39 | #include "data.cuh"
40 | #include "worker.cuh"
41 | #include "weights.cuh"
42 | 
43 | class Worker;
44 | class WorkResult;
45 | class Layer;
46 | class DataLayer;
47 | class CostLayer;
48 | 
49 | class ConvNet : public Thread {
50 | protected:
51 |     std::vector<Layer*> _layers;
52 |     std::vector<DataLayer*> _dataLayers;
53 |     std::vector<CostLayer*> _costs;
54 |     GPUData* _data;
55 | 
56 |     DataProvider* _dp;
57 |     int _deviceID;
58 |     
59 |     Queue<Worker*> _workerQueue;
60 |     Queue<WorkResult*> _resultQueue;
61 |     
62 |     // For gradient checking
63 |     int _numFailures;
64 |     int _numTests;
65 |     double _baseErr;
66 |     
67 |     virtual Layer* initLayer(string& layerType, PyObject* paramsDict);
68 |     void initCuda();
69 |     void* run();
70 | public:
71 |     ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID);
72 |     
73 |     Queue<Worker*>& getWorkerQueue();
74 |     Queue<WorkResult*>& getResultQueue();
75 |     DataProvider& getDataProvider();
76 |     
77 |     Layer& operator[](int idx);
78 |     Layer& getLayer(int idx);
79 |     void copyToCPU();
80 |     void copyToGPU();
81 |     void updateWeights();
82 |     void reset();
83 |     int getNumLayers();
84 |     
85 |     void bprop(PASS_TYPE passType);
86 |     void fprop(PASS_TYPE passType);
87 |     void fprop(int miniIdx, PASS_TYPE passType);
88 |     void fprop(GPUData& data, PASS_TYPE passType);
89 | 
90 |     bool checkGradient(const std::string& name, float eps, Weights& weights); 
91 |     void checkGradients();
92 |     Cost& getCost();
93 |     Cost& getCost(Cost& cost);
94 |     double getCostValue();
95 | };
96 | 
97 | #endif	/* CONVNET3 */
98 | 
99 | 


--------------------------------------------------------------------------------
/include/cost.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef COST_CUH
28 | #define	COST_CUH
29 | 
30 | #include <vector>
31 | #include <map>
32 | #include <cutil_inline.h>
33 | 
34 | #include "layer.cuh"
35 | #include "util.cuh"
36 | 
37 | class CostLayer;
38 | 
39 | /*
40 |  * Wrapper for dictionary mapping cost name to vector of returned values.
41 |  */
42 | class Cost {
43 | private:
44 |     int _numCases;
45 |     CostMap _costMap;
46 |     CostCoeffMap _costCoeffMap;
47 | public:
48 |     Cost(int numCases);
49 |     Cost(int numCases, std::vector<CostLayer*>& costs);
50 |     doublev& operator [](const std::string s);
51 |     CostMap& getCostMap();
52 |     CostCoeffMap& getCostCoeffMap();
53 |     int getNumCases();
54 |     /*
55 |      * Returns sum of first values returned by all the costs, weighted by the cost coefficients.
56 |      */
57 |     double getValue();
58 |     Cost& operator += (Cost& er);
59 |     Cost& operator /= (const double v);
60 |     virtual ~Cost();
61 | };
62 | 
63 | 
64 | #endif	/* COST_CUH */
65 | 
66 | 


--------------------------------------------------------------------------------
/include/cudaconv2/cudaconv2.cuh:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef COMMON_CUH
 28 | #define	COMMON_CUH
 29 | 
 30 | #include <cutil_inline.h>
 31 | #include <nvmatrix.cuh>
 32 | #include "conv_util.cuh"
 33 | 
 34 | enum FILTER_OUTPUT_ORDER {MODULE_FILTER_IMAGE, FILTER_MODULE_IMAGE};
 35 | 
 36 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 37 |                     int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 38 |                     int numImgColors, int numGroups);
 39 | void convFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 40 |                    int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 41 |                    int numImgColors, int numGroups,
 42 |                    float scaleTargets, float scaleOutput);
 43 | 
 44 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 45 |                      int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 46 |                      int numImgColors, int numGroups);
 47 | void localFilterActs(NVMatrix& images, NVMatrix& filters, NVMatrix& targets,
 48 |                      int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 49 |                      int numImgColors, int numGroups,
 50 |                      float scaleTargets, float scaleOutput);
 51 | 
 52 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 53 |                  int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups);
 54 | void convImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 55 |                  int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups,
 56 |                  float scaleTargets, float scaleOutput);
 57 | 
 58 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 59 |                   int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups);
 60 | void localImgActs(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets,
 61 |                   int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numGroups,
 62 |                   float scaleTargets, float scaleOutput);
 63 | 
 64 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 65 |                     int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart,
 66 |                     int moduleStride, int numImgColors, int numGroups, int partialSum);
 67 | void convWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 68 |                     int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 69 |                     int numImgColors, int numGroups, int partialSum,
 70 |                     float scaleTargets, float scaleOutput);
 71 | 
 72 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 73 |                      int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart,
 74 |                      int moduleStride, int numImgColors, int numGroups);
 75 | 
 76 | void localWeightActs(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets,
 77 |                      int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 78 |                      int numImgColors, int numGroups, float scaleTargets, float scaleOutput);
 79 | 
 80 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 81 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 82 |                           int numImgColors, int numFilterColors, int numGroups);
 83 | void convFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 84 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 85 |                           int numImgColors, int numFilterColors, int numGroups,
 86 |                           float scaleTargets, float scaleOutput);
 87 | 
 88 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 89 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 90 |                           int numImgColors, int numFilterColors, int numGroups,
 91 |                           float scaleTargets, float scaleOutput);
 92 | void localFilterActsSparse(NVMatrix& images, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
 93 |                           int imgSizeY, int numModulesY, int numModulesX, int paddingStart, int moduleStride,
 94 |                           int numImgColors, int numFilterColors, int numGroups);
 95 | 
 96 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
 97 |                          int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
 98 |                          int numImgColors, int numFilterColors, int numGroups);
 99 | void convWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
100 |                         int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors,
101 |                         int numGroups, int partialSum, float scaleTargets, float scaleOutput);
102 | 
103 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
104 |                          int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride,
105 |                          int numImgColors, int numFilterColors, int numGroups);
106 | void localWeightActsSparse(NVMatrix& images, NVMatrix& hidActs, NVMatrix& targets, int* dColorIndices,
107 |                         int imgSizeY, int numModulesY, int numModulesX, int filterSize, int paddingStart, int moduleStride, int numImgColors, int numFilterColors,
108 |                         int numGroups, float scaleTargets, float scaleOutput);
109 | 
110 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
111 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups);
112 | void convImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
113 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups,
114 |                        float scaleTargets, float scaleOutput);
115 | 
116 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
117 |                         int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups);
118 | void localImgActsSparse(NVMatrix& hidActs, NVMatrix& filters, NVMatrix& targets, int* dColorIndices,
119 |                        int imgSizeY, int imgSizeX, int numModulesY, int paddingStart, int moduleStride, int numImgColors, int numFilterColors, int numGroups,
120 |                        float scaleTargets, float scaleOutput);
121 | 
122 | 
123 | #endif	/* COMMON_CUH */
124 | 
125 | 


--------------------------------------------------------------------------------
/include/data.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef DATA_CUH
28 | #define	DATA_CUH
29 | 
30 | #include <vector>
31 | #include <algorithm>
32 | #include "util.cuh"
33 | 
34 | template <class T>
35 | class Data {
36 | protected:
37 |     std::vector<T*>* _data;
38 | public:
39 |     typedef typename std::vector<T*>::iterator T_iter;
40 |     
41 |     Data(std::vector<T*>& data) : _data(&data) {
42 |         assert(_data->size() > 0);
43 |         for (int i = 1; i < data.size(); i++) {
44 |             assert(data[i-1]->getLeadingDim() == data[i]->getLeadingDim());
45 |         }
46 |         assert(data[0]->getLeadingDim() > 0);
47 |     }
48 | 
49 |     ~Data() {
50 |         for (T_iter it = _data->begin(); it != _data->end(); ++it) {
51 |             delete *it;
52 |         }
53 |         delete _data;
54 |     }
55 |     
56 |     T& operator [](int idx) {
57 |         return *_data->at(idx);
58 |     }
59 |     
60 |     int getSize() {
61 |         return _data->size();
62 |     }
63 |     
64 |     std::vector<T*>& getData() {
65 |         return *_data;
66 |     }
67 | 
68 |     int getNumCases() {
69 |         return _data->at(0)->getLeadingDim();
70 |     }
71 | };
72 | 
73 | typedef Data<NVMatrix> GPUData;
74 | typedef Data<Matrix> CPUData;
75 | 
76 | class DataProvider {
77 | protected:
78 |     CPUData* _hData;
79 |     NVMatrixV _data;
80 |     int _minibatchSize;
81 |     long int _dataSize;
82 | public:
83 |     DataProvider(int minibatchSize);
84 |     GPUData& operator[](int idx);
85 |     void setData(CPUData&);
86 |     void clearData();
87 |     GPUData& getMinibatch(int idx);
88 |     GPUData& getDataSlice(int startCase, int endCase);
89 |     int getNumMinibatches();
90 |     int getMinibatchSize();
91 |     int getNumCases();
92 |     int getNumCasesInMinibatch(int idx);
93 | };
94 | 
95 | #endif	/* DATA_CUH */
96 | 
97 | 


--------------------------------------------------------------------------------
/include/layer.cuh:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef LAYER_CUH
 28 | #define	LAYER_CUH
 29 | 
 30 | #include <string>
 31 | #include <vector>
 32 | #include <map>
 33 | #include <assert.h>
 34 | #include <nvmatrix.cuh>
 35 | 
 36 | #include "convnet.cuh"
 37 | #include "cost.cuh"
 38 | #include "weights.cuh"
 39 | #include "neuron.cuh"
 40 | 
 41 | class Cost;
 42 | class ConvNet;
 43 | class CostLayer;
 44 | class DataLayer;
 45 | 
 46 | /*
 47 |  * Abstract layer.
 48 |  */
 49 | class Layer {
 50 | protected:
 51 |     ConvNet* _convNet;
 52 |     std::vector<Layer*> _prev, _next;
 53 |     int _rcvdFInputs, _rcvdBInputs;
 54 |     
 55 |     NVMatrixV _inputs;
 56 |     NVMatrix *_outputs; // TODO: make this a pointer so you can reuse previous layers' matrices
 57 |     NVMatrix *_actsGrad; // Layer activity gradients
 58 |     bool _gradConsumer, _foundGradConsumers, _trans;
 59 |     bool _conserveMem;
 60 |     int _numGradProducersNext;
 61 |     int _actsTarget, _actsGradTarget;
 62 |     std::string _name, _type;
 63 |     void fpropNext(PASS_TYPE passType);
 64 |     virtual void truncBwdActs(); 
 65 |     virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType) = 0;
 66 |     
 67 |     virtual void bpropCommon(NVMatrix& v, PASS_TYPE passType) {
 68 |         // Do nothing by default
 69 |     }
 70 |     virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType) {
 71 |         assert(!isGradProducer()); // Only do nothing if not grad producer
 72 |     }
 73 | public:    
 74 |     static bool _saveActsGrad, _saveActs;
 75 |     
 76 |     Layer(ConvNet* convNet, PyObject* paramsDict, bool trans);
 77 |     
 78 |     virtual void fprop(PASS_TYPE passType);
 79 |     void fprop(NVMatrix& v, PASS_TYPE passType);
 80 |     virtual void fprop(NVMatrixV& v, PASS_TYPE passType);
 81 |     virtual void bprop(PASS_TYPE passType);
 82 |     void bprop(NVMatrix& v, PASS_TYPE passType);
 83 |     virtual void reset();
 84 |     int incRcvdBInputs();
 85 |     int getRcvdFInputs();
 86 |     int getRcvdBInputs();
 87 |     bool isGradConsumer();
 88 |     virtual bool isGradProducer();
 89 |     std::string& getName();
 90 |     std::string& getType();
 91 |     void addNext(Layer* l);
 92 |     void addPrev(Layer* l);
 93 |     std::vector<Layer*>& getPrev();
 94 |     std::vector<Layer*>& getNext();
 95 |     virtual NVMatrix& getActs();
 96 |     virtual NVMatrix& getActsGrad();
 97 |     virtual void postInit();
 98 |     
 99 |     // Do nothing if this layer has no weights
100 |     virtual void updateWeights() {
101 |     }
102 |     virtual void checkGradients() {
103 |     }
104 |     virtual void copyToCPU() {
105 |     }
106 |     virtual void copyToGPU()  {
107 |     }
108 | };
109 | 
110 | class NeuronLayer : public Layer {
111 | protected:
112 |     Neuron* _neuron;
113 |     
114 |     virtual void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
115 |     virtual void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
116 | public:
117 |     NeuronLayer(ConvNet* convNet, PyObject* paramsDict);
118 | };
119 | 
120 | class WeightLayer : public Layer {
121 | protected:
122 |     WeightList _weights;
123 |     Weights *_biases;
124 |     float _wStep, _bStep;
125 |     
126 |     void bpropCommon(NVMatrix& v, PASS_TYPE passType);
127 |     virtual void bpropBiases(NVMatrix& v, PASS_TYPE passType) = 0;
128 |     virtual void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType) = 0;
129 | public:
130 |     WeightLayer(ConvNet* convNet, PyObject* paramsDict, bool trans, bool useGrad);
131 |     virtual void updateWeights();
132 |     virtual void copyToCPU();
133 |     virtual void copyToGPU();
134 |     void checkGradients();
135 |     Weights& getWeights(int idx);
136 | };
137 | 
138 | class FCLayer : public WeightLayer {
139 | protected:
140 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
141 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
142 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
143 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
144 | public:
145 |     FCLayer(ConvNet* convNet, PyObject* paramsDict);
146 | };
147 | 
148 | class SoftmaxLayer : public Layer {
149 | protected:
150 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
151 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
152 | public:
153 |     SoftmaxLayer(ConvNet* convNet, PyObject* paramsDict);
154 | };
155 | 
156 | class EltwiseSumLayer : public Layer {
157 | protected:
158 |     vector<float>* _coeffs;
159 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
160 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
161 | public:
162 |     EltwiseSumLayer(ConvNet* convNet, PyObject* paramsDict);
163 | };
164 | 
165 | class EltwiseMaxLayer : public Layer {
166 | protected:
167 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
168 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
169 | public:
170 |     EltwiseMaxLayer(ConvNet* convNet, PyObject* paramsDict);
171 | };
172 | 
173 | class DataLayer : public Layer {
174 | private:
175 |     int _dataIdx;
176 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
177 | public:
178 |     DataLayer(ConvNet* convNet, PyObject* paramsDict);
179 |     
180 |     bool isGradProducer();
181 |     void fprop(PASS_TYPE passType);
182 |     void fprop(NVMatrixV& data, PASS_TYPE passType);
183 | };
184 | 
185 | class LocalLayer : public WeightLayer {
186 | protected:
187 |     struct FilterConns {
188 |         int* hFilterConns;
189 |         int* dFilterConns;
190 |     };
191 |     vector<FilterConns>* _filterConns;
192 |     
193 |     intv* _padding, *_stride, *_filterSize, *_channels, *_imgSize, *_groups;
194 |     intv* _imgPixels, *_filterPixels, *_filterChannels, *_overSample, *_randSparse;
195 |     int _modulesX, _modules, _numFilters;
196 | 
197 |     void copyToGPU();
198 |     
199 | public:
200 |     LocalLayer(ConvNet* convNet, PyObject* paramsDict, bool useGrad);
201 | };
202 | 
203 | class ConvLayer : public LocalLayer {
204 | protected:
205 |     int _partialSum;
206 |     bool _sharedBiases;
207 |     
208 |     NVMatrix _weightGradTmp, _actGradTmp;
209 | 
210 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
211 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
212 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
213 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
214 |     void truncBwdActs();
215 | 
216 | public:
217 |     ConvLayer(ConvNet* convNet, PyObject* paramsDict);
218 | }; 
219 | 
220 | class LocalUnsharedLayer : public LocalLayer {
221 | protected:
222 |     NVMatrix _sexMask;
223 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
224 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
225 |     void bpropBiases(NVMatrix& v, PASS_TYPE passType);
226 |     void bpropWeights(NVMatrix& v, int inpIdx, PASS_TYPE passType);
227 | public:
228 |     LocalUnsharedLayer(ConvNet* convNet, PyObject* paramsDict);
229 | }; 
230 | 
231 | class PoolLayer : public Layer {
232 | protected:
233 |     int _channels, _sizeX, _start, _stride, _outputsX;
234 |     int _imgSize;
235 |     string _pool;
236 | public:
237 |     PoolLayer(ConvNet* convNet, PyObject* paramsDict, bool trans);
238 |     
239 |     static PoolLayer& makePoolLayer(ConvNet* convNet, PyObject* paramsDict);
240 | }; 
241 | 
242 | class AvgPoolLayer : public PoolLayer {
243 | protected:
244 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
245 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
246 | public:
247 |     AvgPoolLayer(ConvNet* convNet, PyObject* paramsDict);
248 | }; 
249 | 
250 | class MaxPoolLayer : public PoolLayer {
251 | protected:
252 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
253 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
254 | public:
255 |     MaxPoolLayer(ConvNet* convNet, PyObject* paramsDict);
256 | };
257 | 
258 | class NailbedLayer : public Layer {
259 | protected:
260 |     int _channels, _start, _stride, _outputsX;
261 |     int _imgSize;
262 | public:
263 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
264 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
265 |     
266 |     NailbedLayer(ConvNet* convNet, PyObject* paramsDict);
267 | };
268 | 
269 | class GaussianBlurLayer : public Layer {
270 | protected:
271 |     int _channels;
272 |     Matrix* _hFilter;
273 |     NVMatrix _filter;
274 |     NVMatrix _actGradsTmp;
275 | public:
276 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
277 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
278 |     void copyToGPU();
279 |     
280 |     GaussianBlurLayer(ConvNet* convNet, PyObject* paramsDict);
281 | };
282 | 
283 | class ResizeLayer : public Layer {
284 | protected:
285 |     int _channels;
286 |     float _scale;
287 |     int _imgSize, _tgtSize;
288 | public:
289 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
290 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
291 | 
292 |     ResizeLayer(ConvNet* convNet, PyObject* paramsDict);
293 | };
294 | 
295 | class RGBToYUVLayer : public Layer {
296 | public:
297 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
298 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
299 | 
300 |     RGBToYUVLayer(ConvNet* convNet, PyObject* paramsDict);
301 | };
302 | 
303 | class RGBToLABLayer : public Layer {
304 | protected:
305 |     bool _center;
306 | public:
307 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
308 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
309 | 
310 |     RGBToLABLayer(ConvNet* convNet, PyObject* paramsDict);
311 | };
312 | 
313 | class ResponseNormLayer : public Layer {
314 | protected:
315 |     int _channels, _size;
316 |     float _scale, _pow;
317 |     NVMatrix _denoms;
318 | 
319 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
320 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
321 |     void truncBwdActs();
322 | public:
323 |     ResponseNormLayer(ConvNet* convNet, PyObject* paramsDict);
324 | }; 
325 | 
326 | class CrossMapResponseNormLayer : public ResponseNormLayer {
327 | protected:
328 |     bool _blocked;
329 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
330 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
331 | public:
332 |     CrossMapResponseNormLayer(ConvNet* convNet, PyObject* paramsDict);
333 | }; 
334 | 
335 | class ContrastNormLayer : public ResponseNormLayer {
336 | protected:
337 |     int _imgSize;
338 |     NVMatrix _meanDiffs;
339 |     
340 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
341 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
342 |     void truncBwdActs();
343 | public:
344 |     ContrastNormLayer(ConvNet* convNet, PyObject* paramsDict);
345 | };
346 | 
347 | class CostLayer : public Layer {
348 | protected:
349 |     float _coeff;
350 |     doublev _costv;
351 | public:
352 |     CostLayer(ConvNet* convNet, PyObject* paramsDict, bool trans);
353 |     void bprop(PASS_TYPE passType); 
354 |     virtual doublev& getCost();
355 |     float getCoeff();
356 |     bool isGradProducer();
357 |     
358 |     static CostLayer& makeCostLayer(ConvNet* convNet, string& type, PyObject* paramsDict);
359 | };
360 | 
361 | /*
362 |  * Input 0: labels
363 |  * Input 1: softmax outputs
364 |  */
365 | class LogregCostLayer : public CostLayer {
366 | protected:
367 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
368 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
369 | public:
370 |     LogregCostLayer(ConvNet* convNet, PyObject* paramsDict);
371 | };
372 | 
373 | class SumOfSquaresCostLayer : public CostLayer {
374 | protected:
375 |     void fpropActs(int inpIdx, float scaleTargets, PASS_TYPE passType);
376 |     void bpropActs(NVMatrix& v, int inpIdx, float scaleTargets, PASS_TYPE passType);
377 | public:
378 |     SumOfSquaresCostLayer(ConvNet* convNet, PyObject* paramsDict);
379 | };
380 | 
381 | #endif	/* LAYER_CUH */
382 | 
383 | 


--------------------------------------------------------------------------------
/include/layer_kernels.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef LAYER_KERNELS_CUH
28 | #define	LAYER_KERNELS_CUH
29 | 
30 | #include <cutil_inline.h>
31 | #include <nvmatrix.cuh>
32 | 
33 | #define LOGREG_GRAD_THREADS_X      32
34 | #define LOGREG_GRAD_THREADS_Y      4
35 | 
36 | #define LOGREG_ERR_THREADS_X        128
37 | #define LOGREG_ERR_THREADS_Y        1
38 | 
39 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out);
40 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
41 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add);
42 | 
43 | // Numerical stability optimization: this routine combines computeLogregGrad with computeSoftmaxGrad
44 | // to avoi dividing and then multiplying by quantities that may be near zero.
45 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff);
46 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add);
47 | 
48 | #endif	/* LAYER_KERNELS_CUH */
49 | 
50 | 


--------------------------------------------------------------------------------
/include/nvmatrix/nvmatrix_operators.cuh:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef NVMATRIX_OPERATORS_CUH
 28 | #define	NVMATRIX_OPERATORS_CUH
 29 | 
 30 | #include <cutil_inline.h>
 31 | 
 32 | class NVMatrixOps {
 33 | public:
 34 |     class Exp {
 35 |     public:
 36 |         __device__ inline float operator()(const float a) const {
 37 |             return __expf(a);
 38 |         }
 39 |     };
 40 | 
 41 |     class Logistic {
 42 |     public:
 43 |         __device__ inline float operator()(const float a) const {
 44 |             return __fdividef(1.0f, 1.0f + __expf(-a));
 45 |         }
 46 |     };
 47 | 
 48 |     class Log {
 49 |     public:
 50 |         __device__ inline float operator()(const float a) const {
 51 |             return __logf(a);
 52 |         }
 53 |     };
 54 | 
 55 |     class Square {
 56 |     public:
 57 |         __device__ inline float operator()(const float a) const {
 58 |             return a * a;
 59 |         }
 60 |     };
 61 | 
 62 |     class Sqrt {
 63 |     public:
 64 |         __device__ inline float operator()(const float a) const {
 65 |             return sqrtf(a);
 66 |         }
 67 |     };
 68 | 
 69 |     class Reciprocal {
 70 |     public:
 71 |         __device__ inline float operator()(const float a) const {
 72 |             return 1.0f / a;
 73 |         }
 74 |     };
 75 | 
 76 |     class Abs {
 77 |     public:
 78 |         __device__ inline float operator()(const float a) const {
 79 |             return a > 0 ? a : -a;
 80 |         }
 81 |     };
 82 | 
 83 |     class Sign {
 84 |     public:
 85 |         __device__ inline float operator()(const float a) const {
 86 |             return (a > 0) - (a < 0);
 87 |         }
 88 |     };
 89 |     
 90 |     class Identity {
 91 |     public:
 92 |         __device__ inline float operator()(const float a) const {
 93 |             return a;
 94 |         }
 95 |     };
 96 | 
 97 |     class Zero {
 98 |     public:
 99 |         __device__ inline float operator()(const float a) const {
100 |             return 0;
101 |         }
102 |     };
103 | 
104 |     class One {
105 |     public:
106 |         __device__ inline float operator()(const float a) const {
107 |             return 1;
108 |         }
109 |     };
110 |     
111 |     class SmallerThanScalar {
112 |     private:
113 |         const float scalar;
114 |     public:
115 |         SmallerThanScalar(const float _scalar) : scalar(_scalar) {
116 |         }
117 |         __device__ inline float operator()(const float a) const {
118 |             return a < scalar;
119 |         }
120 |     };
121 | 
122 |     class BiggerThanScalar {
123 |     private:
124 |         const float scalar;
125 |     public:
126 |         BiggerThanScalar(const float _scalar) : scalar(_scalar) {
127 |         }
128 |         __device__ inline float operator()(const float a) const {
129 |             return a > scalar;
130 |         }
131 |     };
132 | 
133 |     class AddScalar {
134 |     private:
135 |         const float scalar;
136 |     public:
137 |         AddScalar(const float _scalar) : scalar(_scalar) {
138 |         }
139 |         __device__ inline float operator()(const float a) const {
140 |             return a + scalar;
141 |         }
142 |     };
143 | 
144 |     class WeightedAddScalar {
145 |     private:
146 |         const float weight, scalar;
147 |     public:
148 |         WeightedAddScalar(const float _weight, const float _scalar) : weight(_weight), scalar(_scalar) {
149 |         }
150 |         __device__ inline float operator()(const float a) const {
151 |             return weight * a + scalar;
152 |         }
153 |     };
154 | 
155 |     class MultByScalar {
156 |     private:
157 |         const float scalar;
158 |     public:
159 |         MultByScalar(const float _scalar) : scalar(_scalar) {
160 |         }
161 |         __device__ inline float operator()(const float a) const {
162 |             return a * scalar;
163 |         }
164 |     };
165 | 
166 |     class Pow {
167 |     private:
168 |         const float p;
169 |     public:
170 |         Pow(const float _p) : p(_p) {
171 |         }
172 |         __device__ inline float operator()(const float a) const {
173 |             return __powf(a, p);
174 |         }
175 |     };
176 | 
177 |     template <bool exclusive>
178 |     class InRange {
179 |     private:
180 |         const float lower, upper;
181 |     public:
182 |         InRange(const float _lower, const float _upper) : lower(_lower), upper(_upper) {
183 |         }
184 |         __device__ inline float operator()(const float a) const {
185 |             return exclusive ? a > lower && a < upper : a >= lower && a <= upper;
186 |         }
187 |     };
188 | 
189 |     class MinWithScalar {
190 |     private:
191 |         const float scalar;
192 |     public:
193 |         MinWithScalar(const float _scalar) : scalar(_scalar) {
194 |         }
195 |         __device__ inline float operator()(const float a) const {
196 |             return a > scalar ? scalar : a;
197 |         }
198 |     };
199 | 
200 |     class MaxWithScalar {
201 |     private:
202 |         const float scalar;
203 |     public:
204 |         MaxWithScalar(const float _scalar) : scalar(_scalar) {
205 |         }
206 |         __device__ inline float operator()(const float a) const {
207 |             return a > scalar ? a : scalar;
208 |         }
209 |     };
210 | };
211 | 
212 | class NVMatrixBinaryOps {
213 | public:
214 |     class Equals {
215 |     public:
216 |         __device__ inline float operator()(const float a, const float b) const {
217 |             return a == b;
218 |         }
219 |     };
220 | 
221 |     class BiggerThan {
222 |     public:
223 |         __device__ inline float operator()(const float a, const float b) const {
224 |             return a > b;
225 |         }
226 |     };
227 | 
228 |     class Divide {
229 |     public:
230 |         __device__ inline float operator()(const float a, const float b) const  {
231 |             return __fdividef(a, b);
232 |         }
233 |     };
234 | 
235 |     class Multiply {
236 |     public:
237 |         __device__ inline float operator()(const float a, const float b) const {
238 |             return a * b;
239 |         }
240 |     };
241 | 
242 |     class SquaredDiff {
243 |     public:
244 |         __device__ inline float operator()(const float a, const float b) const {
245 |             return (a - b) * (a - b);
246 |         }
247 |     };
248 | 
249 |     class WeightedAdd {
250 |     private:
251 |         const float scaleA, scaleB;
252 |     public:
253 |         WeightedAdd(const float _scaleA, const float _scaleB) : scaleA(_scaleA), scaleB(_scaleB) {
254 |         }
255 |         __device__ inline float operator()(const float a, const float b) const {
256 |             return a * scaleA + b * scaleB;
257 |         }
258 |     };
259 | 
260 |     class Add {
261 |     public:
262 |         __device__ inline float operator()(const float a, const float b) const {
263 |             return a + b;
264 |         }
265 |     };
266 |     
267 |     class First {
268 |     public:
269 |         __device__ inline float operator()(const float a, const float b) const {
270 |             return a;
271 |         }
272 |     };
273 |     
274 |     class Second {
275 |     public:
276 |         __device__ inline float operator()(const float a, const float b) const {
277 |             return b;
278 |         }
279 |     };
280 |     
281 |     class SecondScaled {
282 |     private:
283 |         const float scale;
284 |     public:
285 |         SecondScaled(const float _scale) : scale(_scale) {
286 |         }
287 |         __device__ inline float operator()(const float a, const float b) const {
288 |             return scale * b;
289 |         }
290 |     };
291 | };
292 | 
293 | class NVMatrixAggs {
294 | public:
295 |     class Sum {
296 |     public:
297 |         __device__ inline float operator()(const float a, const float b) const {
298 |             return a + b;
299 |         }
300 |         __device__ inline float getBaseValue() {
301 |             return 0;
302 |         }
303 |     };
304 | 
305 |     class Max {
306 |     public:
307 |         __device__ inline float operator()(const float a, const float b) const {
308 |             return a > b ? a : b;
309 |         }
310 |         __device__ inline float getBaseValue() {
311 |             return -2e38;
312 |         }
313 |     };
314 | 
315 |     class Min {
316 |     public:
317 |         __device__ inline float operator()(const float a, const float b) const {
318 |             return a > b ? b : a;
319 |         }
320 |         __device__ inline float getBaseValue() {
321 |             return 2e38;
322 |         }
323 |     };
324 | 
325 |     template<class UnaryOperator>
326 |     class ArgMax {
327 |     private:
328 |        UnaryOperator u;
329 |     public:
330 |        ArgMax(UnaryOperator _u) : u(_u) {
331 |        }
332 |        __device__ inline float operator()(const float a, const float b) const {
333 |            return u(a) > u(b) ? a : b;
334 |        }
335 |        __device__ inline float getBaseValue() {
336 |            return u.getArgMin();
337 |        }
338 |     };
339 | };
340 | 
341 | class NVMatrixTernaryOps {
342 | public:
343 |     class Add {
344 |     public:
345 |         __device__ inline float operator()(const float a, const float b, const float c) const {
346 |             return a + b + c;
347 |         }
348 |     };
349 | };
350 | 
351 | #endif	/* NVMATRIX_OPERATORS_CUH */
352 | 
353 | 


--------------------------------------------------------------------------------
/include/pyconvnet.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef PYCONVNET3_CUH
28 | #define	PYCONVNET3_CUH
29 | 
30 | #define _QUOTEME(x) #x
31 | #define QUOTEME(x) _QUOTEME(x)
32 | 
33 | extern "C" void INITNAME();
34 | 
35 | PyObject* initModel(PyObject *self, PyObject *args);
36 | PyObject* startBatch(PyObject *self, PyObject *args);
37 | PyObject* finishBatch(PyObject *self, PyObject *args);
38 | PyObject* checkGradients(PyObject *self, PyObject *args);
39 | PyObject* syncWithHost(PyObject *self, PyObject *args);
40 | PyObject* startMultiviewTest(PyObject *self, PyObject *args);
41 | PyObject* startFeatureWriter(PyObject *self, PyObject *args);
42 | 
43 | #endif	/* PYCONVNET3_CUH */
44 | 
45 | 


--------------------------------------------------------------------------------
/include/util.cuh:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #ifndef UTIL_H
28 | #define	UTIL_H
29 | 
30 | #include <vector>
31 | #include <map>
32 | #include <string>
33 | #include <sstream>
34 | #include <string>
35 | #include <Python.h>
36 | #include <nvmatrix.cuh>
37 | #include <matrix.h>
38 | 
39 | /*
40 |  * The types of passes that the convnet supports. Used in the fprop and bprop functions in
41 |  * ConvNet class. Most of the layers ignore the pass type, but some make use of it.
42 |  */
43 | enum PASS_TYPE {PASS_TRAIN, PASS_TEST, PASS_GC};
44 | 
45 | // For gradient checking
46 | #define GC_SUPPRESS_PASSES          true
47 | #define GC_REL_ERR_THRESH           0.02
48 | 
49 | /*
50 |  * Store entire data matrix on GPU if its size does not exceed this many MB.
51 |  * Otherwise store only one minibatch at a time.
52 |  */ 
53 | #define MAX_DATA_ON_GPU             200 
54 | 
55 | typedef std::vector<Matrix*> MatrixV;
56 | typedef std::vector<NVMatrix*> NVMatrixV;
57 | typedef std::map<std::string,std::vector<double>*> CostMap;
58 | typedef std::map<std::string,double> CostCoeffMap;
59 | typedef std::vector<double> doublev;
60 | typedef std::vector<float> floatv;
61 | typedef std::vector<int> intv;
62 | 
63 | floatv* getFloatV(PyObject* pyList);
64 | intv* getIntV(PyObject* pyList);
65 | MatrixV* getMatrixV(PyObject* pyList);
66 | int* getIntA(PyObject* pyList);
67 | 
68 | int pyDictGetInt(PyObject* dict, const char* key);
69 | intv* pyDictGetIntV(PyObject* dict, const char* key);
70 | std::string pyDictGetString(PyObject* dict, const char* key);
71 | float pyDictGetFloat(PyObject* dict, const char* key);
72 | floatv* pyDictGetFloatV(PyObject* dict, const char* key);
73 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key);
74 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key);
75 | int* pyDictGetIntA(PyObject* dict, const char* key);
76 | 
77 | template<typename T>
78 | std::string tostr(T n) {
79 |     std::ostringstream result;
80 |     result << n;
81 |     return result.str();
82 | }
83 | 
84 | #endif	/* UTIL_H */
85 | 
86 | 


--------------------------------------------------------------------------------
/include/weights.cuh:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef WEIGHTS_CUH
 28 | #define	WEIGHTS_CUH
 29 | 
 30 | #include <string>
 31 | #include <vector>
 32 | #include <iostream>
 33 | #include <cutil_inline.h>
 34 | #include <assert.h>
 35 | #include <nvmatrix.cuh>
 36 | #include <matrix.h>
 37 | #include "util.cuh"
 38 | 
 39 | using namespace std;
 40 | 
 41 | class Weights {
 42 | private:
 43 |     Matrix* _hWeights, *_hWeightsInc;
 44 |     NVMatrix* _weights, *_weightsInc, *_weightsGrad;
 45 |     
 46 |     float _epsW, _wc, _mom;
 47 |     bool _onGPU, _useGrad;
 48 |     int _numUpdates;
 49 |     static bool _autoCopyToGPU;
 50 |     
 51 |     // Non-NULL if these weights are really shared from some other layer
 52 |     Weights* _srcWeights;
 53 |  
 54 | public:
 55 |     NVMatrix& operator*() {
 56 |         return getW();
 57 |     }
 58 |     
 59 |     Weights(Weights& srcWeights, float epsW) : _srcWeights(&srcWeights), _epsW(epsW), _wc(0), _onGPU(false), _numUpdates(0),
 60 |                                                _weights(NULL), _weightsInc(NULL), _weightsGrad(NULL){
 61 |         _hWeights = &srcWeights.getCPUW();
 62 |         _hWeightsInc = &srcWeights.getCPUWInc();
 63 |         _mom = srcWeights.getMom();
 64 |         _useGrad = srcWeights.isUseGrad();   
 65 |         if (_autoCopyToGPU) {
 66 |             copyToGPU();
 67 |         }
 68 |     }
 69 |     
 70 |     Weights(Matrix& hWeights, Matrix& hWeightsInc, float epsW, float wc, float mom, bool useGrad)
 71 |         : _srcWeights(NULL), _hWeights(&hWeights), _hWeightsInc(&hWeightsInc), _numUpdates(0),
 72 |           _epsW(epsW), _wc(wc), _mom(mom), _useGrad(useGrad), _onGPU(false), _weights(NULL),
 73 |           _weightsInc(NULL), _weightsGrad(NULL) {
 74 |         if (_autoCopyToGPU) {
 75 |             copyToGPU();
 76 |         }
 77 |     }
 78 |         
 79 |     ~Weights() {
 80 |         delete _hWeights;
 81 |         delete _hWeightsInc;
 82 |         if (_srcWeights == NULL) {
 83 |             delete _weights;
 84 |             delete _weightsInc;
 85 |             delete _weightsGrad;
 86 |         }
 87 |     }
 88 | 
 89 |     static void setAutoCopyToGPU(bool autoCopyToGPU) {
 90 |         _autoCopyToGPU = autoCopyToGPU;
 91 |     }
 92 |     
 93 |     NVMatrix& getW() {
 94 |         assert(_onGPU);
 95 |         return *_weights;
 96 |     }
 97 |     
 98 |     NVMatrix& getInc() {
 99 |         assert(_onGPU);
100 |         return *_weightsInc;
101 |     }
102 |         
103 |     NVMatrix& getGrad() {
104 |         assert(_onGPU);
105 |         return _useGrad ? *_weightsGrad : *_weightsInc;
106 |     }
107 |     
108 |     Matrix& getCPUW() {
109 |         return *_hWeights;
110 |     }
111 |     
112 |     Matrix& getCPUWInc() {
113 |         return *_hWeightsInc;
114 |     }
115 |     
116 |     int getNumRows() const {
117 |         return _hWeights->getNumRows();
118 |     }
119 |     
120 |     int getNumCols() const {
121 |         return _hWeights->getNumCols();
122 |     }
123 |     
124 |     void copyToCPU() {
125 |         if (_srcWeights == NULL) {
126 |             assert(_onGPU);
127 |             _weights->copyToHost(*_hWeights);
128 |             _weightsInc->copyToHost(*_hWeightsInc);
129 |         }
130 |     }
131 |     
132 |     // This function is assumed to be called in the order in which the layers
133 |     // were defined
134 |     void copyToGPU() {
135 |         if (_srcWeights == NULL) {
136 |             _weights = new NVMatrix();
137 |             _weightsInc = new NVMatrix();
138 |             _weights->copyFromHost(*_hWeights, true);
139 |             _weightsInc->copyFromHost(*_hWeightsInc, true);
140 |             _weightsGrad = _useGrad ? new NVMatrix() : NULL;
141 |         } else {
142 |             _weights = _srcWeights->_weights;
143 |             _weightsInc = _srcWeights->_weightsInc;
144 |             _weightsGrad = _srcWeights->_weightsGrad;
145 |         }
146 |         _onGPU = true;
147 |     }
148 |     
149 |     // Scale your gradient by epsW / numCases!
150 |     void update() {
151 |         // Only true owner of weights updates
152 |         if (_srcWeights == NULL && _epsW > 0) {
153 |             assert(_onGPU);
154 |             if (_useGrad) {
155 |                 _weightsInc->add(*_weightsGrad, _mom, 1);
156 |             }
157 |             if (_wc > 0) {
158 |                 _weightsInc->add(*_weights, -_wc * _epsW);
159 |             }
160 |             _weights->add(*_weightsInc);
161 |             _numUpdates = 0;
162 |         }
163 |     }
164 |     
165 |     int incNumUpdates() {
166 |         if (_srcWeights != NULL) {
167 |             return _srcWeights->incNumUpdates();
168 |         }
169 |         return _numUpdates++;
170 |     }
171 |     
172 |     // Returns the number of times a gradient has been computed for this
173 |     // weight matrix during the current pass (interval between two calls of update())
174 |     // through the net. This number will only be greater than 1 if this weight matrix
175 |     // is *shared* by multiple layers in the net.
176 |     int getNumUpdates() const {
177 |         if (_srcWeights != NULL) {
178 |             return _srcWeights->getNumUpdates();
179 |         }
180 |         return _numUpdates;
181 |     }
182 |     
183 |     float getEps() const {
184 |         return _epsW;
185 |     }
186 |     
187 |     float getMom() const {
188 |         return _mom;
189 |     }
190 |     
191 |     float getWC() const {
192 |         return _wc;
193 |     }
194 |     
195 |     bool isUseGrad() const { // is good grammar
196 |         return _useGrad;
197 |     }
198 | };
199 | 
200 | class WeightList {
201 | private:
202 |     std::vector<Weights*> _weightList;
203 | 
204 | public:
205 |     Weights& operator[](const int idx) const {
206 |         return *_weightList[idx];
207 |     }
208 |     
209 |     ~WeightList() {
210 |         for (int i = 0; i < _weightList.size(); i++) {
211 |             delete _weightList[i];
212 |         }
213 |     }
214 |     
215 | //    WeightList(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) : _initialized(false) {
216 | //        initialize(hWeights, hWeightsInc, epsW, wc, mom, useGrads);
217 | //    }
218 |     
219 |     WeightList() {
220 |     }
221 |     
222 | //    void initialize(MatrixV& hWeights, MatrixV& hWeightsInc, floatv& epsW, floatv& wc, floatv& mom, bool useGrads) {
223 | //        for (int i = 0; i < hWeights.size(); i++) {
224 | //            _weightList.push_back(new Weights(*hWeights[i], *hWeightsInc[i], epsW[i], wc[i], mom[i], useGrads));
225 | //        }
226 | //        _initialized = true;
227 | //        delete &hWeights;
228 | //        delete &hWeightsInc;
229 | //        delete &epsW;
230 | //        delete &wc;
231 | //        delete &mom;
232 | //    }
233 |     
234 |     void addWeights(Weights& w) {
235 |         _weightList.push_back(&w);
236 |     }
237 |     
238 | //    void addWeights(WeightList& wl) {
239 | //        for (int i = 0; i < wl.getSize(); i++) {
240 | //            addWeights(wl[i]);
241 | //        }
242 | //    }
243 |     
244 |     void update() {
245 |         for (int i = 0; i < getSize(); i++) {
246 |             _weightList[i]->update();
247 |         }
248 |     }
249 | 
250 |     void copyToCPU() {
251 |         for (int i = 0; i < getSize(); i++) {
252 |             _weightList[i]->copyToCPU();
253 |         }
254 |     }
255 | 
256 |     void copyToGPU() {
257 |         for (int i = 0; i < getSize(); i++) {
258 |             _weightList[i]->copyToGPU();
259 |         }
260 |     }
261 |     
262 |     int getSize() {
263 |         return _weightList.size();
264 |     }
265 | };
266 | 
267 | #endif	/* WEIGHTS_CUH */


--------------------------------------------------------------------------------
/include/worker.cuh:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #ifndef WORKER_CUH
 28 | #define	WORKER_CUH
 29 | 
 30 | #include "convnet.cuh"
 31 | #include "cost.cuh"
 32 | #include "data.cuh"
 33 | 
 34 | class ConvNet;
 35 | class Cost;
 36 | 
 37 | class WorkResult {
 38 | public:
 39 |     enum RESULTS {BATCH_DONE, SYNC_DONE};
 40 | protected:
 41 |     WorkResult::RESULTS _resultType;
 42 |     Cost* _results;
 43 | public:
 44 |     WorkResult(WorkResult::RESULTS resultType, Cost& results);
 45 |     WorkResult(WorkResult::RESULTS resultType);
 46 |     virtual ~WorkResult();
 47 |     Cost& getResults() const;
 48 |     WorkResult::RESULTS getResultType() const;
 49 | };
 50 | 
 51 | class Worker {
 52 | protected:
 53 |     ConvNet* _convNet;
 54 | public:
 55 |     Worker(ConvNet& convNet);
 56 |     virtual void run() = 0;
 57 | };
 58 | 
 59 | class DataWorker : public Worker {
 60 | protected:
 61 |     CPUData* _data;
 62 |     DataProvider* _dp;
 63 | public:
 64 |     DataWorker(ConvNet& convNet, CPUData& data);
 65 |     virtual ~DataWorker();
 66 | };
 67 | 
 68 | class TrainingWorker : public DataWorker {
 69 | protected:
 70 |     bool _test;
 71 | public:
 72 |     TrainingWorker(ConvNet& convNet, CPUData& data, bool test);
 73 |     void run();
 74 | };
 75 | 
 76 | class SyncWorker : public Worker {
 77 | public:
 78 |     SyncWorker(ConvNet& convNet);
 79 |     void run();
 80 | };
 81 | 
 82 | class GradCheckWorker : public DataWorker {
 83 | public:
 84 |     GradCheckWorker(ConvNet& convNet, CPUData& data);
 85 |     void run();
 86 | };
 87 | 
 88 | class MultiviewTestWorker : public DataWorker {
 89 | protected:
 90 |     int _numViews, _logregIdx;
 91 | public:
 92 |     MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx);
 93 |     void run();
 94 | };
 95 | 
 96 | class FeatureWorker : public DataWorker {
 97 | protected:
 98 |     Matrix* _ftrs;
 99 |     int _layerIdx;
100 | public:
101 |     FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx);
102 |     ~FeatureWorker();
103 |     void run();
104 | };
105 | 
106 | #endif	/* WORKER_CUH */
107 | 
108 | 


--------------------------------------------------------------------------------
/ordereddict.py:
--------------------------------------------------------------------------------
  1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
  2 | # Passes Python2.7's test suite and incorporates all the latest updates.
  3 | 
  4 | try:
  5 |     from thread import get_ident as _get_ident
  6 | except ImportError:
  7 |     from dummy_thread import get_ident as _get_ident
  8 | 
  9 | try:
 10 |     from _abcoll import KeysView, ValuesView, ItemsView
 11 | except ImportError:
 12 |     pass
 13 | 
 14 | 
 15 | class OrderedDict(dict):
 16 |     'Dictionary that remembers insertion order'
 17 |     # An inherited dict maps keys to values.
 18 |     # The inherited dict provides __getitem__, __len__, __contains__, and get.
 19 |     # The remaining methods are order-aware.
 20 |     # Big-O running times for all methods are the same as for regular dictionaries.
 21 | 
 22 |     # The internal self.__map dictionary maps keys to links in a doubly linked list.
 23 |     # The circular doubly linked list starts and ends with a sentinel element.
 24 |     # The sentinel element never gets deleted (this simplifies the algorithm).
 25 |     # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
 26 | 
 27 |     def __init__(self, *args, **kwds):
 28 |         '''Initialize an ordered dictionary.  Signature is the same as for
 29 |         regular dictionaries, but keyword arguments are not recommended
 30 |         because their insertion order is arbitrary.
 31 | 
 32 |         '''
 33 |         if len(args) > 1:
 34 |             raise TypeError('expected at most 1 arguments, got %d' % len(args))
 35 |         try:
 36 |             self.__root
 37 |         except AttributeError:
 38 |             self.__root = root = []                     # sentinel node
 39 |             root[:] = [root, root, None]
 40 |             self.__map = {}
 41 |         self.__update(*args, **kwds)
 42 | 
 43 |     def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
 44 |         'od.__setitem__(i, y) <==> od[i]=y'
 45 |         # Setting a new item creates a new link which goes at the end of the linked
 46 |         # list, and the inherited dictionary is updated with the new key/value pair.
 47 |         if key not in self:
 48 |             root = self.__root
 49 |             last = root[0]
 50 |             last[1] = root[0] = self.__map[key] = [last, root, key]
 51 |         dict_setitem(self, key, value)
 52 | 
 53 |     def __delitem__(self, key, dict_delitem=dict.__delitem__):
 54 |         'od.__delitem__(y) <==> del od[y]'
 55 |         # Deleting an existing item uses self.__map to find the link which is
 56 |         # then removed by updating the links in the predecessor and successor nodes.
 57 |         dict_delitem(self, key)
 58 |         link_prev, link_next, key = self.__map.pop(key)
 59 |         link_prev[1] = link_next
 60 |         link_next[0] = link_prev
 61 | 
 62 |     def __iter__(self):
 63 |         'od.__iter__() <==> iter(od)'
 64 |         root = self.__root
 65 |         curr = root[1]
 66 |         while curr is not root:
 67 |             yield curr[2]
 68 |             curr = curr[1]
 69 | 
 70 |     def __reversed__(self):
 71 |         'od.__reversed__() <==> reversed(od)'
 72 |         root = self.__root
 73 |         curr = root[0]
 74 |         while curr is not root:
 75 |             yield curr[2]
 76 |             curr = curr[0]
 77 | 
 78 |     def clear(self):
 79 |         'od.clear() -> None.  Remove all items from od.'
 80 |         try:
 81 |             for node in self.__map.itervalues():
 82 |                 del node[:]
 83 |             root = self.__root
 84 |             root[:] = [root, root, None]
 85 |             self.__map.clear()
 86 |         except AttributeError:
 87 |             pass
 88 |         dict.clear(self)
 89 | 
 90 |     def popitem(self, last=True):
 91 |         '''od.popitem() -> (k, v), return and remove a (key, value) pair.
 92 |         Pairs are returned in LIFO order if last is true or FIFO order if false.
 93 | 
 94 |         '''
 95 |         if not self:
 96 |             raise KeyError('dictionary is empty')
 97 |         root = self.__root
 98 |         if last:
 99 |             link = root[0]
100 |             link_prev = link[0]
101 |             link_prev[1] = root
102 |             root[0] = link_prev
103 |         else:
104 |             link = root[1]
105 |             link_next = link[1]
106 |             root[1] = link_next
107 |             link_next[0] = root
108 |         key = link[2]
109 |         del self.__map[key]
110 |         value = dict.pop(self, key)
111 |         return key, value
112 | 
113 |     # -- the following methods do not depend on the internal structure --
114 | 
115 |     def keys(self):
116 |         'od.keys() -> list of keys in od'
117 |         return list(self)
118 | 
119 |     def values(self):
120 |         'od.values() -> list of values in od'
121 |         return [self[key] for key in self]
122 | 
123 |     def items(self):
124 |         'od.items() -> list of (key, value) pairs in od'
125 |         return [(key, self[key]) for key in self]
126 | 
127 |     def iterkeys(self):
128 |         'od.iterkeys() -> an iterator over the keys in od'
129 |         return iter(self)
130 | 
131 |     def itervalues(self):
132 |         'od.itervalues -> an iterator over the values in od'
133 |         for k in self:
134 |             yield self[k]
135 | 
136 |     def iteritems(self):
137 |         'od.iteritems -> an iterator over the (key, value) items in od'
138 |         for k in self:
139 |             yield (k, self[k])
140 | 
141 |     def update(*args, **kwds):
142 |         '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
143 | 
144 |         If E is a dict instance, does:           for k in E: od[k] = E[k]
145 |         If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
146 |         Or if E is an iterable of items, does:   for k, v in E: od[k] = v
147 |         In either case, this is followed by:     for k, v in F.items(): od[k] = v
148 | 
149 |         '''
150 |         if len(args) > 2:
151 |             raise TypeError('update() takes at most 2 positional '
152 |                             'arguments (%d given)' % (len(args),))
153 |         elif not args:
154 |             raise TypeError('update() takes at least 1 argument (0 given)')
155 |         self = args[0]
156 |         # Make progressively weaker assumptions about "other"
157 |         other = ()
158 |         if len(args) == 2:
159 |             other = args[1]
160 |         if isinstance(other, dict):
161 |             for key in other:
162 |                 self[key] = other[key]
163 |         elif hasattr(other, 'keys'):
164 |             for key in other.keys():
165 |                 self[key] = other[key]
166 |         else:
167 |             for key, value in other:
168 |                 self[key] = value
169 |         for key, value in kwds.items():
170 |             self[key] = value
171 | 
172 |     __update = update  # let subclasses override update without breaking __init__
173 | 
174 |     __marker = object()
175 | 
176 |     def pop(self, key, default=__marker):
177 |         '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
178 |         If key is not found, d is returned if given, otherwise KeyError is raised.
179 | 
180 |         '''
181 |         if key in self:
182 |             result = self[key]
183 |             del self[key]
184 |             return result
185 |         if default is self.__marker:
186 |             raise KeyError(key)
187 |         return default
188 | 
189 |     def setdefault(self, key, default=None):
190 |         'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
191 |         if key in self:
192 |             return self[key]
193 |         self[key] = default
194 |         return default
195 | 
196 |     def __repr__(self, _repr_running={}):
197 |         'od.__repr__() <==> repr(od)'
198 |         call_key = id(self), _get_ident()
199 |         if call_key in _repr_running:
200 |             return '...'
201 |         _repr_running[call_key] = 1
202 |         try:
203 |             if not self:
204 |                 return '%s()' % (self.__class__.__name__,)
205 |             return '%s(%r)' % (self.__class__.__name__, self.items())
206 |         finally:
207 |             del _repr_running[call_key]
208 | 
209 |     def __reduce__(self):
210 |         'Return state information for pickling'
211 |         items = [[k, self[k]] for k in self]
212 |         inst_dict = vars(self).copy()
213 |         for k in vars(OrderedDict()):
214 |             inst_dict.pop(k, None)
215 |         if inst_dict:
216 |             return (self.__class__, (items,), inst_dict)
217 |         return self.__class__, (items,)
218 | 
219 |     def copy(self):
220 |         'od.copy() -> a shallow copy of od'
221 |         return self.__class__(self)
222 | 
223 |     @classmethod
224 |     def fromkeys(cls, iterable, value=None):
225 |         '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
226 |         and values equal to v (which defaults to None).
227 | 
228 |         '''
229 |         d = cls()
230 |         for key in iterable:
231 |             d[key] = value
232 |         return d
233 | 
234 |     def __eq__(self, other):
235 |         '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
236 |         while comparison to a regular mapping is order-insensitive.
237 | 
238 |         '''
239 |         if isinstance(other, OrderedDict):
240 |             return len(self)==len(other) and self.items() == other.items()
241 |         return dict.__eq__(self, other)
242 | 
243 |     def __ne__(self, other):
244 |         return not self == other
245 | 
246 |     # -- the following methods are only used in Python 2.7 --
247 | 
248 |     def viewkeys(self):
249 |         "od.viewkeys() -> a set-like object providing a view on od's keys"
250 |         return KeysView(self)
251 | 
252 |     def viewvalues(self):
253 |         "od.viewvalues() -> an object providing a view on od's values"
254 |         return ValuesView(self)
255 | 
256 |     def viewitems(self):
257 |         "od.viewitems() -> a set-like object providing a view on od's items"
258 |         return ItemsView(self)
259 | 


--------------------------------------------------------------------------------
/src/convnet.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <vector>
 28 | #include <iostream> 
 29 | #include <string>
 30 | 
 31 | #include <nvmatrix.cuh>
 32 | #include <nvmatrix_operators.cuh>
 33 | #include <matrix.h>
 34 | #include <convnet.cuh>
 35 | #include <util.cuh>
 36 | 
 37 | using namespace std;
 38 | 
 39 | /* 
 40 |  * =======================
 41 |  * ConvNet
 42 |  * =======================
 43 |  */
 44 | ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID) : Thread(false),  _deviceID(deviceID), _data(NULL) {
 45 |     try {
 46 |         int numLayers = PyList_GET_SIZE(layerParams);
 47 |     
 48 |         for (int i = 0; i < numLayers; i++) {
 49 |             PyObject* paramsDict = PyList_GET_ITEM(layerParams, i);
 50 |             string layerType = pyDictGetString(paramsDict, "type");
 51 |             
 52 |             Layer* l = initLayer(layerType, paramsDict);
 53 |             // Connect backward links in graph for this layer
 54 |             intv* inputLayers = pyDictGetIntV(paramsDict, "inputs");
 55 |             if (inputLayers != NULL) {
 56 |                 for (int i = 0; i < inputLayers->size(); i++) {
 57 |                     l->addPrev(&getLayer(inputLayers->at(i)));
 58 |                 }
 59 |             }
 60 |             delete inputLayers;
 61 |         }
 62 | 
 63 |         // Connect the forward links in the graph
 64 |         for (int i = 0; i < _layers.size(); i++) {
 65 |             vector<Layer*>& prev = _layers[i]->getPrev();
 66 |             for (int j = 0; j < prev.size(); j++) {
 67 |                 prev[j]->addNext(_layers[i]);
 68 |             }
 69 |         }
 70 |          
 71 |         // Execute post-initialization stuff
 72 |         for (int i = 0; i < _layers.size(); i++) {
 73 |             _layers[i]->postInit();
 74 |         }
 75 |         
 76 |         _dp = new DataProvider(minibatchSize);
 77 |     } catch (string& s) {
 78 |         cout << "Error creating ConvNet: " << s << endl;
 79 |         exit(1);
 80 |     }
 81 | }
 82 | 
 83 | /*
 84 |  * Override this in derived classes
 85 |  */
 86 | Layer* ConvNet::initLayer(string& layerType, PyObject* paramsDict) {
 87 |     if (layerType == "fc") {
 88 |         _layers.push_back(new FCLayer(this, paramsDict));
 89 |     } else if (layerType == "conv") {
 90 |         _layers.push_back(new ConvLayer(this, paramsDict));
 91 |     } else if (layerType == "local") {
 92 |         _layers.push_back(new LocalUnsharedLayer(this, paramsDict));
 93 |     } else if (layerType == "pool") {
 94 |         _layers.push_back(&PoolLayer::makePoolLayer(this, paramsDict));
 95 |     } else if (layerType == "rnorm") {
 96 |         _layers.push_back(new ResponseNormLayer(this, paramsDict));
 97 |     } else if (layerType == "cmrnorm") {
 98 |         _layers.push_back(new CrossMapResponseNormLayer(this, paramsDict));
 99 |     } else if (layerType == "cnorm") {
100 |         _layers.push_back(new ContrastNormLayer(this, paramsDict));
101 |     } else if (layerType == "softmax") {
102 |         _layers.push_back(new SoftmaxLayer(this, paramsDict));
103 |     } else if (layerType == "eltsum") {
104 |         _layers.push_back(new EltwiseSumLayer(this, paramsDict));
105 |     } else if (layerType == "eltmax") {
106 |         _layers.push_back(new EltwiseMaxLayer(this, paramsDict));
107 |     } else if (layerType == "neuron") {
108 |         _layers.push_back(new NeuronLayer(this, paramsDict));
109 |     } else if (layerType == "nailbed") {
110 |         _layers.push_back(new NailbedLayer(this, paramsDict));
111 |     } else if (layerType == "blur") {
112 |         _layers.push_back(new GaussianBlurLayer(this, paramsDict));
113 |     } else if (layerType == "resize") {
114 |         _layers.push_back(new ResizeLayer(this, paramsDict));
115 |     } else if (layerType == "rgb2yuv") {
116 |         _layers.push_back(new RGBToYUVLayer(this, paramsDict));
117 |     } else if (layerType == "rgb2lab") {
118 |         _layers.push_back(new RGBToLABLayer(this, paramsDict));
119 |     } else if (layerType == "data") {
120 |         DataLayer *d = new DataLayer(this, paramsDict);
121 |         _layers.push_back(d);
122 |         _dataLayers.push_back(d);
123 |     } else if (strncmp(layerType.c_str(), "cost.", 5) == 0) {
124 |         CostLayer *c = &CostLayer::makeCostLayer(this, layerType, paramsDict);
125 |         _layers.push_back(c);
126 |         _costs.push_back(c);
127 |     } else {
128 |         throw string("Unknown layer type ") + layerType;
129 |     }
130 | 
131 |     return _layers.back();
132 | }
133 | 
134 | /*
135 |  * This executes in a new CPU thread so it's OK to initialize CUDA stuff here. 
136 |  */
137 | void ConvNet::initCuda() { 
138 |     cudaSetDevice(_deviceID < 0 ? cutGetMaxGflopsDeviceId() : _deviceID);
139 |     cudaDeviceSetCacheConfig(cudaFuncCachePreferShared);
140 |     cublasInit();
141 |     NVMatrix::initRandom(time(0));
142 |     copyToGPU();
143 | }
144 | 
145 | void* ConvNet::run() {
146 |     initCuda();
147 | 
148 |     while (true) {
149 |         Worker* worker = _workerQueue.dequeue();
150 |         worker->run();
151 |         delete worker;
152 |     }
153 |     return NULL;
154 | }
155 | 
156 | Queue<Worker*>& ConvNet::getWorkerQueue() {
157 |     return _workerQueue;
158 | }
159 | 
160 | Queue<WorkResult*>& ConvNet::getResultQueue() {
161 |     return _resultQueue;
162 | }
163 | 
164 | DataProvider& ConvNet::getDataProvider() {
165 |     return *_dp;
166 | }
167 | 
168 | Layer& ConvNet::operator[](int idx) {
169 |     return *_layers[idx];
170 | }
171 | 
172 | Layer& ConvNet::getLayer(int idx) {
173 |     return *_layers[idx];
174 | }
175 | 
176 | void ConvNet::copyToCPU() {
177 |     for (int i = 0; i < _layers.size(); i++) {
178 |         _layers[i]->copyToCPU();
179 |     }
180 | }
181 | 
182 | void ConvNet::copyToGPU() {
183 |     for (int i = 0; i < _layers.size(); i++) {
184 |         _layers[i]->copyToGPU();
185 |     }
186 | }
187 | 
188 | void ConvNet::updateWeights() {
189 |     for (int i = 0; i < _layers.size(); i++) {
190 |         _layers[i]->updateWeights();
191 |     }
192 | }
193 | 
194 | void ConvNet::reset() {
195 |     for (int i = 0; i < _layers.size(); i++) {
196 |         _layers[i]->reset();
197 |     }
198 | }
199 | 
200 | int ConvNet::getNumLayers() {
201 |     return _layers.size();
202 | }
203 | 
204 | void ConvNet::bprop(PASS_TYPE passType) {
205 |     for (int i = 0; i < _costs.size(); i++) {
206 |         _costs[i]->bprop(passType);
207 |     }
208 |     reset();
209 | }
210 | 
211 | void ConvNet::fprop(PASS_TYPE passType) {
212 |     assert(_data != NULL);
213 |     reset();
214 |     for (int i = 0; i < _dataLayers.size(); i++) {
215 |         _dataLayers[i]->fprop(_data->getData(), passType);
216 |     }
217 | }
218 | 
219 | void ConvNet::fprop(GPUData& data, PASS_TYPE passType) {
220 |     if (&data != _data) {
221 |         delete _data;
222 |     }
223 |     _data = &data;
224 |     fprop(passType);
225 | }
226 | 
227 | void ConvNet::fprop(int miniIdx, PASS_TYPE passType) {
228 |     delete _data;
229 |     _data = &_dp->getMinibatch(miniIdx);
230 |     fprop(passType);
231 | }
232 | 
233 | Cost& ConvNet::getCost() {
234 |     return *new Cost(_data->getNumCases(), _costs);
235 | }
236 | 
237 | // Same as getCost() but adds results to given cost and returns it
238 | Cost& ConvNet::getCost(Cost& cost) {
239 |     Cost& newCost = getCost();
240 |     cost += newCost;
241 |     delete &newCost;
242 |     return cost;
243 | }
244 | 
245 | double ConvNet::getCostValue() {
246 |     Cost& cost = getCost();
247 |     double val = cost.getValue();
248 |     delete &cost;
249 |     return val;
250 | }
251 | 
252 | /*
253 |  * Gradient checking stuff
254 |  */
255 | void ConvNet::checkGradients() {
256 |     _numFailures = 0;
257 |     _numTests = 0;
258 |     fprop(0, PASS_GC);
259 |     _baseErr = getCostValue();
260 |     bprop(PASS_GC);
261 |     
262 |     for (vector<Layer*>::iterator it = _layers.begin(); it != _layers.end(); ++it) {
263 |         (*it)->checkGradients();
264 |     }
265 |     
266 |     cout << "------------------------" << endl;
267 |     if (_numFailures > 0) {
268 |         cout << _numFailures << "/" << _numTests << " TESTS FAILED" << endl;
269 |     } else {
270 |         cout << "ALL " << _numTests << " TESTS PASSED" << endl;
271 |     }
272 | }
273 | 
274 | /*
275 |  * name: weight matrix name
276 |  * eps: finite difference step
277 |  */
278 | bool ConvNet::checkGradient(const string& name, float eps, Weights& weights) {
279 |     Matrix numGrad(weights.getNumRows(), weights.getNumCols());
280 |     Matrix diff(numGrad);
281 |     numGrad.apply(Matrix::ZERO);
282 |     Matrix weightsCPU;
283 | 
284 |     weights.getW().copyToHost(weightsCPU, true);
285 | 
286 |     for(int i = 0; i < weights.getNumRows(); i++) {
287 |         for (int j = 0; j < weights.getNumCols(); j++) {
288 |             float v = weightsCPU(i,j);
289 |             weightsCPU(i,j) += eps;
290 |             weights.getW().copyFromHost(weightsCPU);
291 |             weightsCPU(i,j) = v;
292 |             fprop(PASS_GC);
293 |             double err = getCostValue();
294 |             numGrad(i,j) = (err - _baseErr) / (_data->getNumCases() * eps);
295 |             if (isnan(numGrad(i,j)) || isinf(numGrad(i,j))) {
296 |                 cout << "Numerical computation produced nan or inf when checking '" << name << "': " << numGrad(i,j) << endl;
297 |                 cout << "Consider reducing the sizes of the weights or finite difference steps." << endl;
298 |                 cout << "Exiting." << endl;
299 |                 exit(1);
300 |             }
301 |             weights.getW().copyFromHost(weightsCPU);
302 |         }
303 |     }
304 | 
305 |     Matrix gradCPU;
306 |     weights.getGrad().copyToHost(gradCPU, true);
307 |     gradCPU.scale(-1.0 / _data->getNumCases());
308 |     float analNorm = gradCPU.norm();
309 |     float numNorm = numGrad.norm();
310 |     numGrad.subtract(gradCPU, diff);
311 |     float relErr = diff.norm() / analNorm;
312 |     bool fail = relErr >= GC_REL_ERR_THRESH;
313 |     if (fail || !GC_SUPPRESS_PASSES) {
314 |         cout << "========================" << endl;
315 |         printf("(%s) %s GRADIENT CHECK\n", fail ? "****FAIL****" : "PASS", name.c_str());
316 |         cout << "========================" << endl;
317 |         cout << "Analytic:" << endl;
318 |         gradCPU.print(6,4);
319 |         cout << "Numeric:" << endl;
320 |         numGrad.print(6,4);
321 |         printf("Analytic norm: %e\n", analNorm);
322 |         printf("Numeric norm:  %e\n", numNorm);
323 |         printf("Relative error: %e\n", relErr);
324 |     }
325 |     _numTests++;
326 |     _numFailures += fail;
327 |     return fail;
328 | }
329 | 


--------------------------------------------------------------------------------
/src/cost.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <iostream>
 28 | #include <cost.cuh>
 29 | 
 30 | using namespace std;
 31 | 
 32 | /* 
 33 |  * =====================
 34 |  * Cost
 35 |  * =====================
 36 |  */
 37 | 
 38 | Cost::Cost(int numCases) : _numCases(numCases) {
 39 | }
 40 | 
 41 | Cost::Cost(int numCases, vector<CostLayer*>& costs) : _numCases(numCases) {
 42 |     for (vector<CostLayer*>::iterator it = costs.begin(); it != costs.end(); ++it) {
 43 |         _costMap[(*it)->getName()] = &(*it)->getCost();
 44 |         _costCoeffMap[(*it)->getName()] = (*it)->getCoeff();
 45 |     }
 46 | }
 47 | 
 48 | int Cost::getNumCases() {
 49 |     return _numCases;
 50 | }
 51 | 
 52 | doublev& Cost::operator [](const string s) {
 53 |     return *_costMap[s];
 54 | }
 55 | 
 56 | CostMap& Cost::getCostMap() {
 57 |     return _costMap;
 58 | }
 59 | 
 60 | CostCoeffMap& Cost::getCostCoeffMap() {
 61 |     return _costCoeffMap;
 62 | }
 63 | 
 64 | double Cost::getValue() {
 65 |     double val = 0;
 66 |     for (CostMap::iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
 67 |         val += _costCoeffMap[it->first] * it->second->at(0);
 68 |     }
 69 |     return val;
 70 | }
 71 | 
 72 | Cost& Cost::operator += (Cost& er) {
 73 |     CostMap& otherMap = er.getCostMap();
 74 |     CostCoeffMap& otherCoeffMap = er.getCostCoeffMap();
 75 |     for (CostMap::const_iterator it = otherMap.begin(); it != otherMap.end(); ++it) {
 76 |         if (_costMap.count(it->first) == 0) {
 77 |             _costMap[it->first] = new doublev();
 78 |             _costCoeffMap[it->first] = otherCoeffMap[it->first];
 79 |         }
 80 |         
 81 |         vector<double>& myVec = *_costMap[it->first];
 82 |         vector<double>& otherVec = *otherMap[it->first];
 83 |         for (int i = 0; i < otherVec.size(); i++) {
 84 |             if (myVec.size() <= i) {
 85 |                 myVec.push_back(0);
 86 |             }
 87 |             myVec[i] += otherVec[i];
 88 |         }
 89 |     }
 90 |     _numCases += er.getNumCases();
 91 |     return *this;
 92 | }
 93 | 
 94 | Cost& Cost::operator /= (const double v) {
 95 |     for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
 96 |         for (doublev::iterator it2 = it->second->begin(); it2 != it->second->end(); ++it2) {
 97 |             *it2 /= v;
 98 |         }
 99 |     }
100 |     return *this;
101 | }
102 | 
103 | Cost::~Cost() {
104 |     for (CostMap::const_iterator it = _costMap.begin(); it != _costMap.end(); ++it) {
105 |         delete it->second;
106 |     }
107 | }


--------------------------------------------------------------------------------
/src/data.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <algorithm>
 28 | #include <data.cuh>
 29 | 
 30 | using namespace std;
 31 | 
 32 | DataProvider::DataProvider(int minibatchSize) : 
 33 |     _minibatchSize(minibatchSize), _hData(NULL) {
 34 | 
 35 | }
 36 | 
 37 | GPUData& DataProvider::operator[](int idx) {
 38 |     return getMinibatch(idx);
 39 | }
 40 | 
 41 | void DataProvider::clearData() {
 42 |     delete _hData;
 43 |     _hData = NULL;
 44 |     _dataSize = 0;
 45 | }
 46 | 
 47 | void DataProvider::setData(CPUData& hData) {
 48 |     // This is now deleted by the DataWorker's destructor
 49 | //    delete _hData; // Delete old CPU matrices
 50 | 
 51 |     _hData = &hData;
 52 |     _dataSize = 0;
 53 |     for (int i = 0; i < hData.getSize(); i++) {
 54 |         _dataSize += hData[i].getNumDataBytes();
 55 |     }
 56 |     _dataSize /= 1024 * 1024;
 57 |     if (_dataSize < MAX_DATA_ON_GPU) {
 58 |         for (int i = 0; i < hData.getSize(); i++) {
 59 |             if (i >= _data.size()) {
 60 |                 _data.push_back(new NVMatrix());
 61 |             }
 62 |             _data[i]->copyFromHost(hData[i], true);
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | GPUData& DataProvider::getMinibatch(int idx) {
 68 |     assert(idx >= 0 && idx < getNumMinibatches());
 69 |     return getDataSlice(idx * _minibatchSize, (idx + 1) * _minibatchSize);
 70 | }
 71 | 
 72 | GPUData& DataProvider::getDataSlice(int startCase, int endCase) {
 73 |     assert(_hData != NULL);
 74 |     assert(_hData->getNumCases() > 0);
 75 |     
 76 |     NVMatrixV& miniData = *new NVMatrixV();
 77 |     
 78 |     for (int i = 0; i < _hData->getData().size(); i++) {
 79 |         miniData.push_back(new NVMatrix());
 80 |         if (_dataSize < MAX_DATA_ON_GPU) {
 81 |             if (_data[i]->isTrans()) {
 82 |                 _data[i]->sliceRows(startCase, min(_hData->getNumCases(), endCase), *miniData[i]);
 83 |             } else {
 84 |                 _data[i]->sliceCols(startCase, min(_hData->getNumCases(), endCase), *miniData[i]);
 85 |             }
 86 |         } else {
 87 |             Matrix tmp;
 88 |             if ((*_hData)[i].isTrans()) {
 89 |                 (*_hData)[i].sliceRows(startCase, min(_hData->getNumCases(), endCase), tmp);
 90 |             } else {
 91 |                 (*_hData)[i].sliceCols(startCase, min(_hData->getNumCases(), endCase), tmp);
 92 |             }
 93 |             miniData.back()->copyFromHost(tmp, true);
 94 |         }
 95 |     }
 96 | 
 97 |     return *new GPUData(miniData);
 98 | }
 99 | 
100 | int DataProvider::getNumMinibatches() {
101 |     assert(_hData != NULL);
102 |     assert(_hData->getNumCases() > 0);
103 |     return DIVUP(_hData->getNumCases(), _minibatchSize);
104 | }
105 | 
106 | int DataProvider::getMinibatchSize() {
107 |     return _minibatchSize;
108 | }
109 | 
110 | int DataProvider::getNumCases() {
111 |     assert(_hData != NULL);
112 |     assert(_hData->getNumCases() > 0);
113 |     return _hData->getNumCases();
114 | }
115 | 
116 | int DataProvider::getNumCasesInMinibatch(int idx) {
117 |     assert(_hData != NULL);
118 |     assert(_hData->getNumCases() > 0);
119 |     assert(idx >= 0 && idx < getNumMinibatches());
120 |     return min(_minibatchSize, max(0, _hData->getNumCases() - idx * _minibatchSize));
121 | }


--------------------------------------------------------------------------------
/src/layer_kernels.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <assert.h>
 28 | 
 29 | #include <layer_kernels.cuh>
 30 | 
 31 | /*
 32 |  * E = -log(y_t)
 33 |  * probs:           (numOut, numCases)
 34 |  * labels:          (1, numCases)
 35 |  * maxProbs:        (1, numCases)
 36 |  * labelLogProbs:   (1, numCases)   (*out)
 37 |  * correctProbs:    (1, numCases)   (*out)
 38 |  * 
 39 |  * target:          (1, numCases)
 40 |  */
 41 | __global__ void kLogregCost(float* probs, float* labels, float* maxProbs, float* labelLogProbs, float* correctProbs,
 42 |                             const int numCases, const int numOut) {
 43 |     const int tx = blockIdx.x * LOGREG_ERR_THREADS_X + threadIdx.x;
 44 | 
 45 |     if (tx < numCases) {
 46 |         const int label = int(labels[tx]);
 47 |         const float maxp = maxProbs[tx];
 48 |         const float labelp = probs[label * numCases + tx];
 49 |         
 50 |         labelLogProbs[tx] = __logf(labelp);
 51 |         
 52 |         /*
 53 |          * Compute the probability of guessing the correct case if you take the most-probable label.
 54 |          * 
 55 |          * This is done like this:
 56 |          * 
 57 |          * - If the most probable label is not equal to the true label, then the probability is zero.
 58 |          * - Otherwise, the probability is 1 / (number of labels whose probability is equal to the maximum).
 59 |          * 
 60 |          * This is certainly overkill -- in practice, it's just about impossible for two labels to get assigned
 61 |          * maximum probability. But it's a safety measure to prevent over-estimating your accuracy.
 62 |          * Though it could never happen in reality. Well it could. But it wouldn't. Cool?
 63 |          */
 64 |         if (labelp != maxp) {
 65 |             correctProbs[tx] = 0;
 66 |         } else {
 67 |             int numMax = 0;
 68 |             for (int i = 0; i < numOut; i++) {
 69 |                 numMax += probs[i * numCases + tx] == maxp;
 70 |             }
 71 |             correctProbs[tx] = 1.0f / float(numMax);
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | /*
 77 |  * E = -log(y_t)
 78 |  * y_l:     (numOut, numCases)
 79 |  * labels:  (1, numCases)
 80 |  * 
 81 |  * dE_dy_l: (numOut, numCases)
 82 |  */
 83 | template <bool add>
 84 | __global__ void kLogregCostGrad(float* y_l, float* labels, float* dE_dy_l, const int numCases,
 85 |                                  const int numOut, const float gradCoeff) {
 86 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
 87 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
 88 |     const int tidx = ty * numCases + tx;
 89 |     
 90 |     if (ty < numOut && tx < numCases) {
 91 |         const int label = int(labels[tx]);
 92 |         float v = gradCoeff * (label == ty);
 93 |         v = __fdividef(v, y_l[tidx]);
 94 |         if (add) {
 95 |             dE_dy_l[tidx] += v;
 96 |         } else {
 97 |             dE_dy_l[tidx] = v;
 98 |         }
 99 |     }
100 | }
101 | 
102 | /*
103 |  * dE_dy_l: (numOut, numCases)
104 |  * y_l:     (numOut, numCases)
105 |  * 
106 |  * dE_dx_l: (numOut, numCases)
107 |  */
108 | template <bool add>
109 | __global__ void kSoftmaxGrad(float* dE_dy_l, float* y_l, float* dE_dx_l, const int numCases, const int numOut) {
110 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
111 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
112 |     const int tidx = ty * numCases + tx;
113 |     
114 |     if (ty < numOut && tx < numCases) {
115 |         float v = 0;
116 |         for (int j = 0; j < numOut; j++) {
117 |             v += dE_dy_l[j * numCases + tx] * ((j == ty) - y_l[j * numCases + tx]);
118 |         }
119 |         v *= y_l[tidx];
120 |         
121 |         if (add) {
122 |             dE_dx_l[tidx] += v;
123 |         } else {
124 |             dE_dx_l[tidx] = v;
125 |         }
126 |     }
127 | }
128 | 
129 | /*
130 |  * E = -log(y_t)
131 |  * y_l:     (numOut, numCases)
132 |  * labels:  (1, numCases)
133 |  * 
134 |  * dE_dx_l: (numOut, numCases)
135 |  */
136 | template <bool add>
137 | __global__ void kLogregSoftmaxGrad(float* y_l, float* labels, float* dE_dx_l, const int numCases,
138 |                                  const int numOut, const float gradCoeff) {
139 |     const int tx = blockIdx.x * LOGREG_GRAD_THREADS_X + threadIdx.x;
140 |     const int ty = blockIdx.y * LOGREG_GRAD_THREADS_Y + threadIdx.y;
141 |     const int tidx = ty * numCases + tx;
142 |     
143 |     if (ty < numOut && tx < numCases) {
144 |         const int label = int(labels[tx]);
145 |         float v = gradCoeff * ((label == ty) - y_l[tidx]);
146 |         if (add) {
147 |             dE_dx_l[tidx] += v;
148 |         } else {
149 |             dE_dx_l[tidx] = v;
150 |         }
151 |     }
152 | }
153 | 
154 | template <int B_X, bool add>
155 | __global__ void kEltwiseMaxGrad(float* actGrad, float* input, float* output, float* target,
156 |                                 const int numElements) {
157 |     for (int i = B_X * blockIdx.x + threadIdx.x; i < numElements; i += B_X * gridDim.x) {
158 |         if (add) {
159 |             target[i] += actGrad[i] * (output[i] == input[i]);
160 |         } else {
161 |             target[i] = actGrad[i] * (output[i] == input[i]);
162 |         }
163 |     }
164 | }
165 | 
166 | void computeEltwiseMaxGrad(NVMatrix& actGrad, NVMatrix& input, NVMatrix& output, NVMatrix& target, bool add) {
167 |     assert(actGrad.isContiguous());
168 |     assert(output.isContiguous());
169 |     assert(input.isContiguous());
170 |     assert(actGrad.isSameDims(input));
171 |     assert(actGrad.isSameDims(output));
172 |     
173 |     dim3 blocks(DIVUP(actGrad.getNumElements(), 128));
174 |     dim3 threads(128);
175 |     if (add) {
176 |         assert(actGrad.isSameDims(target));
177 |         cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, true>, cudaFuncCachePreferL1);
178 |         kEltwiseMaxGrad<128, true><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
179 |     } else {
180 |         target.resize(actGrad);
181 |         cudaFuncSetCacheConfig(kEltwiseMaxGrad<128, false>, cudaFuncCachePreferL1);
182 |         kEltwiseMaxGrad<128, false><<<blocks, threads>>>(actGrad.getDevData(), input.getDevData(), output.getDevData(), target.getDevData(), actGrad.getNumElements());
183 |     }
184 |     
185 |     cutilCheckMsg("computeEltwiseMaxGrad: Kernel execution failed");
186 | }
187 | 
188 | /*
189 |  * E = -log(y_t)
190 |  * probs:           (numOut, numCases)
191 |  * labels:          (1, numCases)
192 |  * maxProbs:        (1, numCases)
193 |  * labelLogProbs:   (1, numCases)   (*out)
194 |  * correctProbs:    (1, numCases)   (*out)
195 |  * 
196 |  * target:          (1, numCases)
197 |  */
198 | void computeLogregCost(NVMatrix& labels, NVMatrix& probs, NVMatrix& labelLogProbs_out, NVMatrix& correctProbs_out) {
199 |     int numCases = probs.getNumCols(); 
200 |     int numOut = probs.getNumRows(); 
201 | 
202 |     assert(labels.getNumElements() == numCases);
203 |     assert(!labels.isTrans());
204 |     assert(!probs.isTrans());
205 |     assert(labels.isContiguous());
206 |     assert(probs.isContiguous());
207 |     
208 |     NVMatrix& maxProbs = probs.max(0);
209 |     
210 |     labelLogProbs_out.resize(1, numCases);
211 |     correctProbs_out.resize(1, numCases);
212 |     dim3 threads(LOGREG_ERR_THREADS_X, 1);
213 |     dim3 blocks(DIVUP(numCases, LOGREG_ERR_THREADS_X), 1);
214 |     cudaFuncSetCacheConfig(kLogregCost, cudaFuncCachePreferL1);
215 |     kLogregCost<<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), maxProbs.getDevData(),
216 |                                      labelLogProbs_out.getDevData(), correctProbs_out.getDevData(),
217 |                                      numCases, numOut);
218 |     cutilCheckMsg("computeLogregCost: Kernel execution failed");
219 | //    cudaThreadSynchronize();
220 |     delete &maxProbs;
221 | }
222 | 
223 | void computeLogregGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
224 |     int numCases = probs.getLeadingDim(); 
225 |     int numOut = probs.getFollowingDim(); 
226 |     assert(labels.getNumElements() == numCases);
227 |     assert(probs.isContiguous());
228 |     assert(target.isContiguous());
229 |     assert(labels.isContiguous());
230 |     assert(!labels.isTrans());
231 |     assert(!probs.isTrans());
232 |     
233 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
234 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
235 |     if (!add) {
236 |         target.resize(probs);
237 |         kLogregCostGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
238 |                                                      numCases, numOut, coeff);
239 |     } else {
240 |         kLogregCostGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
241 |                                                      numCases, numOut, coeff);
242 |     }
243 | 
244 |     cutilCheckMsg("computeLogregGrad: Kernel execution failed");
245 | }
246 | 
247 | void computeSoftmaxGrad(NVMatrix& acts, NVMatrix& actsGrad, NVMatrix& target, bool add) {
248 |     int numCases = acts.getLeadingDim();
249 |     int numOut = acts.getFollowingDim();
250 | 
251 |     assert(acts.isSameDims(actsGrad));
252 |     assert(acts.isContiguous());
253 |     assert(actsGrad.isContiguous());
254 |     assert(target.isContiguous());
255 |     assert(acts.isTrans());
256 |     assert(actsGrad.isTrans());
257 | 
258 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
259 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
260 |     if (!add) {
261 |         target.resize(acts);
262 |         kSoftmaxGrad<false><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
263 |     } else {
264 |         kSoftmaxGrad<true><<<blocks, threads>>>(actsGrad.getDevData(), acts.getDevData(), target.getDevData(), numCases, numOut);
265 |     }
266 |     cutilCheckMsg("computeSoftmaxGrad: Kernel execution failed");
267 | }
268 | 
269 | void computeLogregSoftmaxGrad(NVMatrix& labels, NVMatrix& probs, NVMatrix& target, bool add, float coeff) {
270 |     int numCases = probs.getLeadingDim(); 
271 |     int numOut = probs.getFollowingDim(); 
272 |     assert(labels.getNumElements() == numCases);
273 |     assert(probs.isContiguous());
274 |     assert(target.isContiguous());
275 |     assert(labels.isContiguous());
276 |     assert(probs.isTrans());
277 |     
278 |     dim3 threads(LOGREG_GRAD_THREADS_X, LOGREG_GRAD_THREADS_Y);
279 |     dim3 blocks(DIVUP(numCases, LOGREG_GRAD_THREADS_X), DIVUP(numOut, LOGREG_GRAD_THREADS_Y));
280 |     if (!add) {
281 |         target.resize(probs);
282 |         kLogregSoftmaxGrad<false><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
283 |                                                      numCases, numOut, coeff);
284 |     } else {
285 |         kLogregSoftmaxGrad<true><<<blocks, threads>>>(probs.getDevData(), labels.getDevData(), target.getDevData(),
286 |                                                      numCases, numOut, coeff);
287 |     }
288 | 
289 |     cutilCheckMsg("computeLogregSoftmaxGrad: Kernel execution failed");
290 | }


--------------------------------------------------------------------------------
/src/neuron.cu:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <neuron.cuh>
28 | #include <util.cuh>
29 | 
30 | using namespace std;
31 | 
32 | Neuron& Neuron::makeNeuron(PyObject* neuronDict) {
33 |     string type = pyDictGetString(neuronDict, "type");
34 |     PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params");
35 |     
36 |     if (type == "relu") {
37 |         return *new ReluNeuron();
38 |     }
39 |     
40 |     if (type == "softrelu") {
41 |         return *new SoftReluNeuron();
42 |     }
43 |     
44 |     if (type == "brelu") {
45 |         float a = pyDictGetFloat(neuronParamsDict, "a");
46 |         return *new BoundedReluNeuron(a);
47 |     }
48 | 
49 |     if (type == "abs") {
50 |         return *new AbsNeuron();
51 |     }
52 | 
53 |     if (type == "logistic") {
54 |         return *new LogisticNeuron();
55 |     }
56 |     
57 |     if (type == "tanh") {
58 |         float a = pyDictGetFloat(neuronParamsDict, "a");
59 |         float b = pyDictGetFloat(neuronParamsDict, "b");
60 |         
61 |         return *new TanhNeuron(a, b);
62 |     }
63 |     
64 |     if (type == "square") {
65 |         return *new SquareNeuron();
66 |     }
67 |     
68 |     if (type == "sqrt") {
69 |         return *new SqrtNeuron();
70 |     }
71 |     
72 |     if (type == "linear") {
73 |         float a = pyDictGetFloat(neuronParamsDict, "a");
74 |         float b = pyDictGetFloat(neuronParamsDict, "b");
75 |         return *new LinearNeuron(a, b);
76 |     }
77 | 
78 |     if (type == "ident") {
79 |         return *new Neuron();
80 |     }
81 |     
82 |     throw string("Unknown neuron type: ") + type;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/nvmatrix/nvmatrix_kernels.cu:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <stdio.h>
28 | #include <cuda_runtime.h>
29 | #include <nvmatrix_kernels.cuh>
30 | 
31 | __global__ void kTile(const float* src, float* tgt, const uint srcWidth, const uint srcHeight, const uint tgtWidth, const uint tgtHeight) {
32 |     const int idx = blockIdx.x * blockDim.x + threadIdx.x;
33 |     const int numThreads = blockDim.x * gridDim.x;
34 |     //    const unsigned int numEls = tgtWidth * tgtHeight;
35 |     for (uint i = idx; i < tgtWidth * tgtHeight; i += numThreads) {
36 |         const uint y = i / tgtWidth;
37 |         const uint x = i % tgtWidth;
38 |         const uint srcY = y % srcHeight;
39 |         const uint srcX = x % srcWidth;
40 |         tgt[i] = src[srcY * srcWidth + srcX];
41 |     }
42 | }
43 | 
44 | __global__ void kDotProduct_r(float* a, float* b, float* target, const uint numCols, const uint numElements) {
45 |     __shared__ float shmem[DP_BLOCKSIZE];
46 | 
47 |     uint eidx = DP_BLOCKSIZE * blockIdx.x + threadIdx.x;
48 |     shmem[threadIdx.x] = 0;
49 |     if (eidx < numCols) {
50 |         for (; eidx < numElements; eidx += numCols) {
51 |             shmem[threadIdx.x] += a[eidx] * b[eidx];
52 |         }
53 |     }
54 |     __syncthreads();
55 |     if (threadIdx.x < 256) {
56 |         shmem[threadIdx.x] += shmem[threadIdx.x + 256];
57 |     }
58 |     __syncthreads();
59 |     if (threadIdx.x < 128) {
60 |         shmem[threadIdx.x] += shmem[threadIdx.x + 128];
61 |     }
62 |     __syncthreads();
63 |     if (threadIdx.x < 64) {
64 |         shmem[threadIdx.x] += shmem[threadIdx.x + 64];
65 |     }
66 |     __syncthreads();
67 |     if (threadIdx.x < 32) {
68 |         volatile float* mysh = &shmem[threadIdx.x];
69 |         *mysh += mysh[32];
70 |         *mysh += mysh[16];
71 |         *mysh += mysh[8];
72 |         *mysh += mysh[4];
73 |         *mysh += mysh[2];
74 |         *mysh += mysh[1];
75 |         if (threadIdx.x == 0) {
76 |             target[blockIdx.x] = *mysh;
77 |         }
78 |     }
79 | }
80 | 
81 | __global__ void kSetupCurand(curandState *state, unsigned long long seed) {
82 |     const uint tidx = NUM_RND_THREADS_PER_BLOCK * blockIdx.x + threadIdx.x;
83 |     /* Each thread gets same seed, a different sequence number,
84 |      no offset */
85 |     curand_init(seed, tidx, 0, &state[tidx]);
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/src/pyconvnet.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <Python.h>
 28 | #include <arrayobject.h>
 29 | #include <assert.h>
 30 | #include <cutil_inline.h>
 31 | #include <cublas.h>
 32 | #include <time.h>
 33 | #include <vector>
 34 | 
 35 | #include <matrix.h>
 36 | #include <queue.h>
 37 | #include <worker.cuh>
 38 | #include <util.cuh>
 39 | #include <cost.cuh>
 40 | 
 41 | #include <pyconvnet.cuh>
 42 | #include <convnet.cuh>
 43 | 
 44 | using namespace std;
 45 | static ConvNet* model = NULL;
 46 | 
 47 | static PyMethodDef _ConvNetMethods[] = {  { "initModel",          initModel,          METH_VARARGS },
 48 |                                               { "startBatch",         startBatch,         METH_VARARGS },
 49 |                                               { "finishBatch",        finishBatch,        METH_VARARGS },
 50 |                                               { "checkGradients",     checkGradients,     METH_VARARGS },
 51 |                                               { "startMultiviewTest", startMultiviewTest, METH_VARARGS },
 52 |                                               { "startFeatureWriter",  startFeatureWriter,         METH_VARARGS },
 53 |                                               { "syncWithHost",       syncWithHost,       METH_VARARGS },
 54 |                                               { NULL, NULL }
 55 | };
 56 | 
 57 | #if defined(_WIN64) || defined(_WIN32)
 58 | extern "C" __declspec(dllexport) void initpyconvnet() {
 59 |     (void) Py_InitModule("pyconvnet", _ConvNetMethods);
 60 |     import_array();
 61 | }
 62 | #else
 63 | void INITNAME() {
 64 |     (void) Py_InitModule(QUOTEME(MODELNAME), _ConvNetMethods);
 65 |     import_array();
 66 | }
 67 | #endif
 68 | 
 69 | PyObject* initModel(PyObject *self, PyObject *args) {
 70 |     assert(model == NULL);
 71 | 
 72 |     PyListObject* pyLayerParams;
 73 |     int pyMinibatchSize;
 74 |     int pyDeviceID;
 75 | 
 76 |     if (!PyArg_ParseTuple(args, "O!ii",
 77 |                           &PyList_Type, &pyLayerParams,
 78 |                           &pyMinibatchSize,
 79 |                           &pyDeviceID)) {
 80 |         return NULL;
 81 |     }
 82 |     model = new ConvNet(pyLayerParams,
 83 |                         pyMinibatchSize,
 84 |                         pyDeviceID);
 85 | 
 86 |     model->start();
 87 |     return Py_BuildValue("i", 0);
 88 | }
 89 | 
 90 | /*
 91 |  * Starts training/testing on the given batch (asynchronous -- returns immediately).
 92 |  */
 93 | PyObject* startBatch(PyObject *self, PyObject *args) {
 94 |     assert(model != NULL);
 95 |     PyListObject* data;
 96 |     int test = 0;
 97 |     if (!PyArg_ParseTuple(args, "O!|i",
 98 |         &PyList_Type, &data,
 99 |         &test)) {
100 |         return NULL;
101 |     }
102 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
103 |     
104 |     TrainingWorker* wr = new TrainingWorker(*model, *new CPUData(mvec), test);
105 |     model->getWorkerQueue().enqueue(wr);
106 |     return Py_BuildValue("i", 0);
107 | }
108 | 
109 | /*
110 |  * Starts testing on the given batch (asynchronous -- returns immediately).
111 |  */
112 | PyObject* startMultiviewTest(PyObject *self, PyObject *args) {
113 |     assert(model != NULL);
114 |     PyListObject* data;
115 |     int numViews, logregIdx;
116 |     if (!PyArg_ParseTuple(args, "O!ii",
117 |         &PyList_Type, &data,
118 |         &numViews,
119 |         &logregIdx)) {
120 |         return NULL;
121 |     }
122 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
123 |     
124 |     MultiviewTestWorker* wr = new MultiviewTestWorker(*model, *new CPUData(mvec), numViews, logregIdx);
125 |     model->getWorkerQueue().enqueue(wr);
126 |     return Py_BuildValue("i", 0);
127 | }
128 | 
129 | PyObject* startFeatureWriter(PyObject *self, PyObject *args) {
130 |     assert(model != NULL);
131 |     PyListObject* data;
132 |     int layerIdx;
133 |     if (!PyArg_ParseTuple(args, "O!i",
134 |         &PyList_Type, &data,
135 |         &layerIdx)) {
136 |         return NULL;
137 |     }
138 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
139 |     Matrix& ftrs = *mvec.back();
140 |     mvec.pop_back();
141 |     
142 |     FeatureWorker* wr = new FeatureWorker(*model, *new CPUData(mvec), ftrs, layerIdx);
143 |     model->getWorkerQueue().enqueue(wr);
144 |     return Py_BuildValue("i", 0);
145 | }
146 | 
147 | /*
148 |  * Waits for the trainer to finish training on the batch given to startBatch.
149 |  */
150 | PyObject* finishBatch(PyObject *self, PyObject *args) {
151 |     assert(model != NULL);
152 |     WorkResult* res = model->getResultQueue().dequeue();
153 |     assert(res != NULL);
154 |     assert(res->getResultType() == WorkResult::BATCH_DONE);
155 |     
156 |     Cost& cost = res->getResults();
157 |     PyObject* dict = PyDict_New();
158 |     CostMap& costMap = cost.getCostMap();
159 |     for (CostMap::const_iterator it = costMap.begin(); it != costMap.end(); ++it) {
160 |         PyObject* v = PyList_New(0);
161 |         for (vector<double>::const_iterator iv = it->second->begin(); iv != it->second->end(); ++iv) {
162 |             PyObject* f = PyFloat_FromDouble(*iv);
163 |             PyList_Append(v, f);
164 |         }
165 |         PyDict_SetItemString(dict, it->first.c_str(), v);
166 |     }
167 |     
168 |     PyObject* retVal = Py_BuildValue("Ni", dict, cost.getNumCases());
169 |     delete res; // Deletes cost too
170 |     return retVal;
171 | }
172 | 
173 | PyObject* checkGradients(PyObject *self, PyObject *args) {
174 |     assert(model != NULL);
175 |     PyListObject* data;
176 |     if (!PyArg_ParseTuple(args, "O!",
177 |         &PyList_Type, &data)) {
178 |         return NULL;
179 |     }
180 |     MatrixV& mvec = *getMatrixV((PyObject*)data);
181 |     
182 |     GradCheckWorker* wr = new GradCheckWorker(*model, *new CPUData(mvec));
183 |     model->getWorkerQueue().enqueue(wr);
184 |     WorkResult* res = model->getResultQueue().dequeue();
185 |     assert(res != NULL);
186 |     assert(res->getResultType() == WorkResult::BATCH_DONE);
187 |     delete res;
188 |     return Py_BuildValue("i", 0);
189 | }
190 | 
191 | /*
192 |  * Copies weight matrices from GPU to system memory.
193 |  */
194 | PyObject* syncWithHost(PyObject *self, PyObject *args) {
195 |     assert(model != NULL);
196 |     SyncWorker* wr = new SyncWorker(*model);
197 |     model->getWorkerQueue().enqueue(wr);
198 |     WorkResult* res = model->getResultQueue().dequeue();
199 |     assert(res != NULL);
200 |     assert(res->getResultType() == WorkResult::SYNC_DONE);
201 |     
202 |     delete res;
203 |     return Py_BuildValue("i", 0);
204 | }
205 | 
206 | 


--------------------------------------------------------------------------------
/src/util.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <util.cuh>
 28 | 
 29 | using namespace std;
 30 | 
 31 | floatv* getFloatV(PyObject* pyList) {
 32 |     if (pyList == NULL) {
 33 |         return NULL;
 34 |     }
 35 |     floatv* vec = new floatv(); 
 36 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 37 |         vec->push_back(PyFloat_AS_DOUBLE(PyList_GET_ITEM(pyList, i)));
 38 |     }
 39 |     return vec;
 40 | }
 41 | 
 42 | intv* getIntV(PyObject* pyList) {
 43 |     if (pyList == NULL) {
 44 |         return NULL;
 45 |     }
 46 |     intv* vec = new intv(); 
 47 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 48 |         vec->push_back(PyInt_AS_LONG(PyList_GET_ITEM(pyList, i)));
 49 |     }
 50 |     return vec;
 51 | }
 52 | 
 53 | int* getIntA(PyObject* pyList) {
 54 |     if (pyList == NULL) {
 55 |         return NULL;
 56 |     }
 57 |     int* arr = new int[PyList_GET_SIZE(pyList)];
 58 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 59 |         arr[i] = PyInt_AS_LONG(PyList_GET_ITEM(pyList, i));
 60 |     }
 61 |     return arr;
 62 | }
 63 | MatrixV* getMatrixV(PyObject* pyList) {
 64 |     if (pyList == NULL) {
 65 |         return NULL;
 66 |     }
 67 |     MatrixV* vec = new MatrixV(); 
 68 |     for (int i = 0; i < PyList_GET_SIZE(pyList); i++) {
 69 |         vec->push_back(new Matrix((PyArrayObject*)PyList_GET_ITEM(pyList, i)));
 70 |     }
 71 |     return vec;
 72 | }
 73 | 
 74 | int pyDictGetInt(PyObject* dict, const char* key) {
 75 |     return PyInt_AS_LONG(PyDict_GetItemString(dict, key));
 76 | }
 77 | 
 78 | intv* pyDictGetIntV(PyObject* dict, const char* key) {
 79 |     return getIntV(PyDict_GetItemString(dict, key));
 80 | }
 81 | 
 82 | int* pyDictGetIntA(PyObject* dict, const char* key) {
 83 |     return getIntA(PyDict_GetItemString(dict, key));
 84 | }
 85 | 
 86 | string pyDictGetString(PyObject* dict, const char* key) {
 87 |     return string(PyString_AS_STRING(PyDict_GetItemString(dict, key)));
 88 | }
 89 | 
 90 | float pyDictGetFloat(PyObject* dict, const char* key) {
 91 |     return PyFloat_AS_DOUBLE(PyDict_GetItemString(dict, key));
 92 | }
 93 | 
 94 | floatv* pyDictGetFloatV(PyObject* dict, const char* key) {
 95 |     return getFloatV(PyDict_GetItemString(dict, key));
 96 | }
 97 | 
 98 | Matrix* pyDictGetMatrix(PyObject* dict, const char* key) {
 99 |     return new Matrix((PyArrayObject*)PyDict_GetItemString(dict, key));
100 | }
101 | 
102 | MatrixV* pyDictGetMatrixV(PyObject* dict, const char* key) {
103 |     return getMatrixV(PyDict_GetItemString(dict, key));
104 | }


--------------------------------------------------------------------------------
/src/weights.cu:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * - Redistributions of source code must retain the above copyright notice,
 9 |  *   this list of conditions and the following disclaimer.
10 |  * 
11 |  * - Redistributions in binary form must reproduce the above copyright notice,
12 |  *   this list of conditions and the following disclaimer in the documentation
13 |  *   and/or other materials provided with the distribution.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | 
27 | #include <weights.cuh>
28 | 
29 | bool Weights::_autoCopyToGPU = false;


--------------------------------------------------------------------------------
/src/worker.cu:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * - Redistributions of source code must retain the above copyright notice,
  9 |  *   this list of conditions and the following disclaimer.
 10 |  * 
 11 |  * - Redistributions in binary form must reproduce the above copyright notice,
 12 |  *   this list of conditions and the following disclaimer in the documentation
 13 |  *   and/or other materials provided with the distribution.
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 19 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 23 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  */
 26 | 
 27 | #include <algorithm>
 28 | #include <util.cuh>
 29 | #include <worker.cuh>
 30 | 
 31 | using namespace std;
 32 | 
 33 | /* 
 34 |  * ====================
 35 |  * WorkResult
 36 |  * ====================
 37 |  */
 38 | WorkResult::WorkResult(WorkResult::RESULTS resultType, Cost& results) : _resultType(resultType), _results(&results) {
 39 | }
 40 | 
 41 | WorkResult::WorkResult(WorkResult::RESULTS resultType) : _resultType(resultType), _results(NULL) {
 42 | }
 43 | 
 44 | WorkResult::~WorkResult() {
 45 |     delete _results; // delete NULL is ok
 46 | }
 47 | 
 48 | Cost& WorkResult::getResults() const {
 49 |     return *_results;
 50 | }
 51 | 
 52 | WorkResult::RESULTS WorkResult::getResultType() const {
 53 |     return _resultType;
 54 | }
 55 | 
 56 | /* 
 57 |  * ====================
 58 |  * Worker
 59 |  * ====================
 60 |  */
 61 | Worker::Worker(ConvNet& convNet) : _convNet(&convNet) {
 62 | }
 63 | 
 64 | /* 
 65 |  * ====================
 66 |  * DataWorker
 67 |  * ====================
 68 |  */
 69 | DataWorker::DataWorker(ConvNet& convNet, CPUData& data) : Worker(convNet), _data(&data) {
 70 |     _dp = &convNet.getDataProvider();
 71 | }
 72 | 
 73 | DataWorker::~DataWorker() {
 74 |     _dp->clearData();
 75 | }
 76 | 
 77 | /* 
 78 |  * ====================
 79 |  * TrainingWorker
 80 |  * ====================
 81 |  */
 82 | TrainingWorker::TrainingWorker(ConvNet& convNet, CPUData& data, bool test) 
 83 |     : DataWorker(convNet, data), _test(test) {
 84 | }
 85 | 
 86 | // Need to setData here (as opposed to the constructor) because the constructor executes in
 87 | // the original CPU thread, which is not the one with GPU access.
 88 | void TrainingWorker::run() {
 89 |     _dp->setData(*_data);
 90 |     Cost& batchCost = *new Cost(0);
 91 |     for (int i = 0; i < _dp->getNumMinibatches(); i++) {
 92 |         _convNet->fprop(i, _test ? PASS_TEST : PASS_TRAIN);
 93 |         _convNet->getCost(batchCost);
 94 |         
 95 |         if (!_test) {
 96 |             _convNet->bprop(PASS_TRAIN);
 97 |             _convNet->updateWeights();
 98 |         }
 99 |     }
100 |     cudaThreadSynchronize();
101 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
102 | }
103 | 
104 | /*
105 |  * ====================
106 |  * SyncWorker
107 |  * ====================
108 |  */
109 | SyncWorker::SyncWorker(ConvNet& convNet) : Worker(convNet) {
110 | }
111 | 
112 | void SyncWorker::run() {
113 |     _convNet->copyToCPU();
114 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::SYNC_DONE));
115 | }
116 | 
117 | /* 
118 |  * ====================
119 |  * GradCheckWorker
120 |  * ====================
121 |  */
122 | GradCheckWorker::GradCheckWorker(ConvNet& convNet, CPUData& data) 
123 |     : DataWorker(convNet, data) {
124 | }
125 | 
126 | void GradCheckWorker::run() {
127 |     _dp->setData(*_data);
128 |     _convNet->checkGradients();
129 |     exit(0);
130 | }
131 | 
132 | /* 
133 |  * ====================
134 |  * MultiviewTestWorker
135 |  * ====================
136 |  */
137 | MultiviewTestWorker::MultiviewTestWorker(ConvNet& convNet, CPUData& data, int numViews, int logregIdx) 
138 |     : DataWorker(convNet, data), _numViews(numViews), _logregIdx(logregIdx) {
139 |     assert(_data->getNumCases() % _numViews == 0);
140 | }
141 | 
142 | void MultiviewTestWorker::run() {
143 |     _dp->setData(*_data);
144 |     Layer& logregLayer = _convNet->getLayer(_logregIdx);
145 | 
146 |     int numCasesReal = _dp->getNumCases() / _numViews;
147 |     int numMiniReal = DIVUP(numCasesReal, _dp->getMinibatchSize());
148 |     
149 |     Cost& batchCost = *new Cost(0);
150 |     for (int i = 0; i < numMiniReal; i++) {
151 |         NVMatrix softmaxActs;
152 |         for (int v = 0; v < _numViews; v++) {
153 |             GPUData& mini = _dp->getDataSlice(v * numCasesReal + i * _dp->getMinibatchSize(),
154 |                                               min((v + 1) * numCasesReal, v * numCasesReal + (i + 1) * _dp->getMinibatchSize()));
155 |             _convNet->fprop(mini, PASS_TEST);
156 |             if (v == 0) {
157 |                 logregLayer.getPrev()[1]->getActs().copy(softmaxActs);
158 |             } else {
159 |                 softmaxActs.add(logregLayer.getPrev()[1]->getActs());
160 |             }
161 |         }
162 |         softmaxActs.scale(1.0 / _numViews);
163 |         NVMatrixV logregInput;
164 |         logregInput.push_back(&logregLayer.getPrev()[0]->getActs());
165 |         logregInput.push_back(&softmaxActs);
166 |         
167 |         logregLayer.fprop(logregInput, PASS_TEST);
168 |         
169 |         _convNet->getCost(batchCost);
170 |     }
171 |     cudaThreadSynchronize();
172 | 
173 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
174 | }
175 | 
176 | /* 
177 |  * ====================
178 |  * FeatureWorker
179 |  * ====================
180 |  */
181 | FeatureWorker::FeatureWorker(ConvNet& convNet, CPUData& data, Matrix& ftrs, int layerIdx)
182 |     : DataWorker(convNet, data), _ftrs(&ftrs), _layerIdx(layerIdx) {
183 |     assert(ftrs.getNumRows() == data.getNumCases());
184 |     assert(!ftrs.isTrans());
185 | }
186 | 
187 | FeatureWorker::~FeatureWorker() {
188 |     delete _ftrs;
189 | }
190 | 
191 | void FeatureWorker::run() {
192 |     _dp->setData(*_data);
193 |     Layer& ftrLayer = _convNet->getLayer(_layerIdx);
194 |     Cost& batchCost = *new Cost(0);
195 |     for (int i = 0; i < _dp->getNumMinibatches(); i++) {
196 |         _convNet->fprop(i, PASS_TEST);
197 |         _convNet->getCost(batchCost);
198 |         Matrix& miniFtrs = _ftrs->sliceRows(i * _dp->getMinibatchSize(),
199 |                                             min(_dp->getNumCases(), (i + 1) * _dp->getMinibatchSize()));
200 |         NVMatrix& acts = ftrLayer.getActs();
201 |         NVMatrix acts_T;
202 |         if (acts.isTrans()) {
203 |             NVMatrix& soft_T = acts.getTranspose();
204 |             soft_T.transpose(acts_T);
205 |             delete &soft_T;
206 |         } else {
207 |             acts.transpose(acts_T);
208 |         }
209 |         acts_T.copyToHost(miniFtrs);
210 |         delete &miniFtrs;
211 |     }
212 |     cudaThreadSynchronize();
213 |     _convNet->getResultQueue().enqueue(new WorkResult(WorkResult::BATCH_DONE, batchCost));
214 | }


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com)
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without modification,
  5 | # are permitted provided that the following conditions are met:
  6 | #
  7 | # - Redistributions of source code must retain the above copyright notice,
  8 | #   this list of conditions and the following disclaimer.
  9 | # 
 10 | # - Redistributions in binary form must reproduce the above copyright notice,
 11 | #   this list of conditions and the following disclaimer in the documentation
 12 | #   and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 18 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 20 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 21 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 22 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 23 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | 
 25 | import re
 26 | import cPickle
 27 | import os
 28 | import numpy as n
 29 | from math import sqrt
 30 | 
 31 | import gzip
 32 | import zipfile
 33 | 
 34 | class UnpickleError(Exception):
 35 |     pass
 36 | 
 37 | VENDOR_ID_REGEX = re.compile('^vendor_id\s+: (\S+)')
 38 | GPU_LOCK_NO_SCRIPT = -2
 39 | GPU_LOCK_NO_LOCK = -1
 40 | 
 41 | try:
 42 |     import magic
 43 |     ms = magic.open(magic.MAGIC_NONE)
 44 |     ms.load()
 45 | except ImportError: # no magic module
 46 |     ms = None
 47 | 
 48 | def get_gpu_lock(id=-1):
 49 |     import imp
 50 |     lock_script_path = '/u/tang/bin/gpu_lock2.py'
 51 |     if os.path.exists(lock_script_path):
 52 |         locker = imp.load_source("", lock_script_path)
 53 |         if id == -1:
 54 |             return locker.obtain_lock_id()
 55 |         print id
 56 |         got_id = locker._obtain_lock(id)
 57 |         return id if got_id else GPU_LOCK_NO_LOCK
 58 |     return GPU_LOCK_NO_SCRIPT if id < 0 else id
 59 | 
 60 | def pickle(filename, data, compress=False):
 61 |     if compress:
 62 |         fo = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
 63 |         fo.writestr('data', cPickle.dumps(data, -1))
 64 |     else:
 65 |         fo = open(filename, "wb")
 66 |         cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL)
 67 |     fo.close()
 68 |     
 69 | def unpickle(filename):
 70 |     if not os.path.exists(filename):
 71 |         raise UnpickleError("Path '%s' does not exist." % filename)
 72 |     if ms is not None and ms.file(filename).startswith('gzip'):
 73 |         fo = gzip.open(filename, 'rb')
 74 |         dict = cPickle.load(fo)
 75 |     elif ms is not None and ms.file(filename).startswith('Zip'):
 76 |         fo = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED)
 77 |         dict = cPickle.loads(fo.read('data'))
 78 |     else:
 79 |         fo = open(filename, 'rb')
 80 |         dict = cPickle.load(fo)
 81 |     
 82 |     fo.close()
 83 |     return dict
 84 | 
 85 | def tryint(s):
 86 |     try:
 87 |         return int(s)
 88 |     except:
 89 |         return s
 90 | 
 91 | def alphanum_key(s):
 92 |     return [tryint(c) for c in re.split('([0-9]+)', s)]
 93 | 
 94 | def is_intel_machine():
 95 |     f = open('/proc/cpuinfo')
 96 |     for line in f:
 97 |         m = VENDOR_ID_REGEX.match(line)
 98 |         if m:
 99 |             f.close()
100 |             return m.group(1) == 'GenuineIntel'
101 |     f.close()
102 |     return False
103 | 
104 | def get_cpu():
105 |     if is_intel_machine():
106 |         return 'intel'
107 |     return 'amd'
108 | 
109 | def is_windows_machine():
110 |     return os.name == 'nt'
111 | 


--------------------------------------------------------------------------------