├── CONTRIBUTING.md ├── LICENSE ├── Makefile-distrib ├── README.md ├── SdkMasterLog.csv ├── avg-test.py ├── avg-valid.py ├── build.sh ├── convdata.py ├── convdata_cifar.py ├── convdata_flickr.py ├── convdata_jpeg.py ├── convnet.py ├── deviceQuery.txt ├── example-layers ├── layer-params-18pct.cfg ├── layer-params-19pct.cfg ├── layer-params-80sec.cfg ├── layer-params-conv-local-12pct.cfg ├── layer-params-conv-local-13pct.cfg ├── layer-params-example.cfg ├── layer-params.gc.cfg ├── layers-18pct.cfg ├── layers-19pct.cfg ├── layers-80sec.cfg ├── layers-conv-local-12pct.cfg ├── layers-conv-local-13pct.cfg ├── layers-example.cfg └── layers.gc.cfg ├── findsimilar.py ├── fix-big-imgnet.py ├── fix-flickr.py ├── gen-py-interface.py ├── include ├── convnet.cuh ├── cost.cuh ├── cpuCNN.cuh ├── data.cuh ├── hostmem.cuh ├── layer.cuh ├── layer_kernels.cuh ├── lr.cuh ├── messages.cuh ├── multisoftmax.h ├── neuron.cuh ├── pipedispenser.cuh ├── pyconvnet.cuh ├── quantizer.cuh ├── softmaxtree.cuh ├── util.cuh ├── weights.cuh └── worker.cuh ├── initw.py ├── layer.py ├── layers-cifar ├── layer-params-18pct-noisylr.cfg ├── layer-params-conv-local-13pct-noisylr.cfg ├── layers-18pct.cfg └── layers-conv-local-13pct.cfg ├── layers ├── layer-params-100.cfg ├── layer-params-106.cfg ├── layer-params-107.cfg ├── layer-params-109.cfg ├── layer-params-110.cfg ├── layer-params-111.cfg ├── layer-params-112.cfg ├── layer-params-113.cfg ├── layer-params-114.cfg ├── layer-params-115-jpeg.cfg ├── layer-params-116.cfg ├── layer-params-117.cfg ├── layer-params-118.cfg ├── layer-params-120-2012-full.cfg ├── layer-params-120-2012.cfg ├── layer-params-120-4gpu-auto2.cfg ├── layer-params-120-4gpu-auto3.cfg ├── layer-params-120-4gpu-auto4.cfg ├── layer-params-120-4gpu-auto5.cfg ├── layer-params-120-4gpu-auto6.cfg ├── layer-params-120-4gpu.cfg ├── layer-params-120.cfg ├── layer-params-121.cfg ├── layer-params-126-2012-full.cfg ├── layer-params-127.cfg ├── layer-params-128.cfg ├── layer-params-129.cfg ├── layer-params-130.cfg ├── layer-params-131-2009.cfg ├── layer-params-131.cfg ├── layer-params-132.cfg ├── layer-params-133.cfg ├── layer-params-134.cfg ├── layer-params-135-2009-2012.cfg ├── layer-params-135-2009.cfg ├── layer-params-135.cfg ├── layer-params-136.cfg ├── layer-params-137-tree.cfg ├── layer-params-137.cfg ├── layer-params-139.cfg ├── layer-params-141-2009-half.cfg ├── layer-params-141-2009.cfg ├── layer-params-141.cfg ├── layer-params-145-2010.cfg ├── layer-params-145-half.cfg ├── layer-params-145.cfg ├── layer-params-146-2009-tree.cfg ├── layer-params-146-2009.cfg ├── layer-params-146-2011.cfg ├── layer-params-146-2012-2009.cfg ├── layer-params-146-2012-2011.cfg ├── layer-params-147.cfg ├── layer-params-147.cfg.save ├── layer-params-148.cfg ├── layer-params-149.cfg ├── layer-params-150.cfg ├── layer-params-153.cfg ├── layer-params-154.cfg ├── layer-params-155.cfg ├── layer-params-156.cfg ├── layer-params-157.cfg ├── layer-params-158.cfg ├── layer-params-160.cfg ├── layer-params-161.cfg ├── layer-params-162.cfg ├── layer-params-163.cfg ├── layer-params-165.cfg ├── layer-params-166.cfg ├── layer-params-167.cfg ├── layer-params-169.cfg ├── layer-params-170-256-0.015.cfg ├── layer-params-170-256-double.cfg ├── layer-params-170-256.cfg ├── layer-params-170-4gpu-exp.cfg ├── layer-params-170-4gpu.cfg ├── layer-params-170-quant.cfg ├── layer-params-170.cfg ├── layer-params-171.cfg ├── layer-params-172.cfg ├── layer-params-174.cfg ├── layer-params-175.cfg ├── layer-params-177.cfg ├── layer-params-178.cfg ├── layer-params-180.cfg ├── layer-params-183-4gpu-26epc.cfg ├── layer-params-183-4gpu-exp.cfg ├── layer-params-183-4gpu.cfg ├── layer-params-184-4gpu-26epc.cfg ├── layer-params-184-4gpu.cfg ├── layer-params-2009-101.cfg ├── layer-params-96-16k.cfg ├── layer-params-98-16kinit.cfg ├── layer-params-99.cfg ├── layer-params-flickr-102-inet-init.cfg ├── layer-params-flickr-102.cfg ├── layer-params-flickr-103.cfg ├── layer-params-flickr-105.cfg ├── layer-params-inet-5layer-conv94-2gpu.cfg ├── layers-100.cfg ├── layers-106.cfg ├── layers-109.cfg ├── layers-110.cfg ├── layers-111.cfg ├── layers-112.cfg ├── layers-113.cfg ├── layers-114.cfg ├── layers-115-jpeg.cfg ├── layers-116.cfg ├── layers-117.cfg ├── layers-118.cfg ├── layers-120-4gpu.cfg ├── layers-120.cfg ├── layers-121.cfg ├── layers-126.cfg ├── layers-127.cfg ├── layers-128.cfg ├── layers-129.cfg ├── layers-130.cfg ├── layers-131-2009.cfg ├── layers-131.cfg ├── layers-132.cfg ├── layers-133.cfg ├── layers-134.cfg ├── layers-135-2009-2012.cfg ├── layers-135-2009.cfg ├── layers-135.cfg ├── layers-137-tree.cfg ├── layers-137.cfg ├── layers-141-2009-2010.cfg ├── layers-141-2009-2012.cfg ├── layers-141-2009-half.cfg ├── layers-141-2009.cfg ├── layers-141.cfg ├── layers-145-half.cfg ├── layers-145.cfg ├── layers-146-2009-tree.cfg ├── layers-146-2009.cfg ├── layers-146-2011.cfg ├── layers-146-2012-2009.cfg ├── layers-146-2012-2011.cfg ├── layers-147.cfg ├── layers-148.cfg ├── layers-149.cfg ├── layers-150.cfg ├── layers-153-4gpu.cfg ├── layers-153.cfg ├── layers-166.cfg ├── layers-167.cfg ├── layers-177.cfg ├── layers-178.cfg ├── layers-183-4gpu.cfg ├── layers-184-4gpu.cfg ├── layers-2009-101.cfg ├── layers-96-16k.cfg ├── layers-98-16kinit.cfg ├── layers-99.cfg ├── layers-flickr-102-inet-init.cfg ├── layers-flickr-102.cfg ├── layers-flickr-103.cfg ├── layers-flickr-105.cfg └── layers-inet-5layer-conv94-2gpu.cfg ├── multisoft-normed.py ├── multisoft.py ├── package.sh ├── pyInterface.cutemp ├── readme.html ├── run4.sh ├── shownet.py ├── src ├── convnet.cu ├── cost.cu ├── cpuCNN.cu ├── data.cu ├── hostmem.cu ├── layer.cu ├── layer_kernels.cu ├── lr.cu ├── multisoftmax.cpp ├── neuron.cu ├── pyconvnet.cu ├── quantizer.cu ├── softmaxtree.cu ├── test.cu ├── util.cu ├── weights.cu └── worker.cu ├── test.py ├── test.sh ├── tm.sh ├── txt-preds.py └── verify-test-preds.py /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | External contributions are not accepted, sorry! 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Google LLC. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /Makefile-distrib: -------------------------------------------------------------------------------- 1 | MODELNAME := _ConvNet 2 | 3 | INCLUDES := -I$(PYTHON_INCLUDE_PATH) -I$(NUMPY_INCLUDE_PATH) -I./include -I./include/common -I./include/cudaconv2 -I./include/nvmatrix 4 | LIB := -lpthread -L$(ATLAS_LIB_PATH) -L$(CUDA_INSTALL_PATH)/lib64 -lcblas 5 | 6 | USECUBLAS := 1 7 | 8 | PYTHON_VERSION=$(shell python -V 2>&1 | cut -d ' ' -f 2 | cut -d '.' -f 1,2) 9 | LIB += -lpython$(PYTHON_VERSION) 10 | 11 | GENCODE_ARCH := -gencode=arch=compute_20,code=\"sm_20,compute_20\" 12 | COMMONFLAGS := -DNUMPY_INTERFACE -DMODELNAME=$(MODELNAME) -DINITNAME=init$(MODELNAME) 13 | 14 | EXECUTABLE := $(MODELNAME).so 15 | 16 | CUFILES := $(shell echo src/*.cu src/cudaconv2/*.cu src/nvmatrix/*.cu) 17 | CU_DEPS := $(shell echo include/*.cuh include/cudaconv2/*.cuh include/nvmatrix/*.cuh) 18 | CCFILES := $(shell echo src/common/*.cpp) 19 | C_DEPS := $(shell echo include/common/*.h) 20 | 21 | include common-gcc-cuda-4.0.mk 22 | 23 | makedirectories: 24 | $(VERBOSE)mkdir -p $(LIBDIR) 25 | $(VERBOSE)mkdir -p $(OBJDIR)/src/cudaconv2 26 | $(VERBOSE)mkdir -p $(OBJDIR)/src/nvmatrix 27 | $(VERBOSE)mkdir -p $(OBJDIR)/src/common 28 | $(VERBOSE)mkdir -p $(TARGETDIR) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AlexNet Source Code 2 | 3 | 4 | 5 | This package contains the original AlexNet source code as it was in 2012, when it won the ImageNet competition. Geoffrey Hinton, Ilya Sutskever, and Alex Krizhevsky formed DNNResearch soon afterwards and sold the company, and the AlexNet source code along with it, to Google, which would continue work on it. This package also includes the parameter files trained on the ImageNet dataset. 6 | 7 | Previously available as open source was Krizhevsky’s precursor to AlexNet, [cuda-convnet](https://code.google.com/archive/p/cuda-convnet/), which was trained on the smaller CIFAR-10 dataset. While there are other existing repositories of code named "AlexNet" on the web, they are not the original code, but rather reimplementations based on the paper Krizhevsky, Sutskever, and Hinton published: 8 | 9 | [Krizhevsky, A., Sutskever, I. & Hinton, G. E. (2012). ImageNet Classification with Deep Convolutional Neural Networks](https://proceedings.neurips.cc/paper_files/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf). In F. Pereira, C. J. C. Burges, L. Bottou & K. Q. Weinberger (ed.), Advances in Neural Information Processing Systems 25 (pp. 1097--1105). Curran Associates, Inc. 10 | 11 | In 2020, Alex Krizhevsky connected CHM to Geoff Hinton, who directed us to a team at Google. CHM worked with Google to identify the original 2012 version of the code and negotiate the terms of the public release as open source. CHM thanks Krizhevsky and Hinton for their support and David Bieber of Google DeepMind for his help in securing the release. 12 | 13 | CHM is proud to present the source code to the 2012 version of Alex Krizhevsky, Ilya Sutskever, and Geoffery Hinton’s AlexNet, which transformed the field of artificial intelligence. 14 | 15 | -------------------------------------------------------------------------------- /SdkMasterLog.csv: -------------------------------------------------------------------------------- 1 | deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050 2 | deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 4, Device = Tesla S2050, Device = Tesla S2050 3 | -------------------------------------------------------------------------------- /avg-test.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | import os 3 | import sys 4 | import re 5 | import random as r 6 | import os 7 | 8 | def do_avg(paths, tgtpath, coeffs): 9 | for i,f in enumerate(sorted(os.listdir(paths[0]))): 10 | b = int(re.match('test_preds_(\d+)', f).group(1)) 11 | dics = [unpickle(os.path.join(p, f)) for p in paths] 12 | preds = sum(c * d['data'] for c,d in zip(coeffs, dics)) 13 | pickle(os.path.join(tgtpath, 'test_preds_%d' % b), {'data': preds}) 14 | print "Wrote batch %d" % b 15 | 16 | if __name__ == "__main__": 17 | paths = sys.argv[1].split(',') 18 | tgtpath = sys.argv[2] 19 | if not os.path.exists(tgtpath): 20 | os.makedirs(tgtpath) 21 | coeffs = [float(x) for x in sys.argv[3].split(',')] 22 | do_avg(paths, tgtpath, coeffs) 23 | 24 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | NVMATRIX=$NVMATRIX_K20X_INCLUDE/.. 4 | CUDACONV=$NVCONV2_K20X_INCLUDE/.. 5 | 6 | rm -rf build 7 | rm *.so 8 | mkdir -p build 9 | 10 | cp -r src build/ 11 | cp -r include build/ 12 | cp $NVMATRIX/src/nvmatrix.cu $NVMATRIX/src/nvmatrix_kernels.cu $NVMATRIX/src/gpu_locking.cpp build/src 13 | cp $NVMATRIX/include/nvmatrix.cuh $NVMATRIX/include/nvmatrix_kernels.cuh $NVMATRIX/include/nvmatrix_operators.cuh $NVMATRIX/include/gpu_locking.h build/include 14 | cp $CUDACONV/src/conv_util.cu $CUDACONV/src/filter_acts.cu $CUDACONV/src/weight_acts.cu $CUDACONV/src/img_acts.cu build/src 15 | cp $CUDACONV/include/conv_util.cuh $CUDACONV/include/cudaconv2.cuh build/include 16 | cp Makefile-all build/Makefile 17 | 18 | cd build && make -j kepler=1 $* && cd .. 19 | ln -fs build/*.so ./ 20 | -------------------------------------------------------------------------------- /example-layers/layer-params-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | # Reduce all learning rates by factor of 10 after 120 epochs. 4 | # Then another factor of 10 after 10 more epochs. 5 | 6 | [conv1] 7 | epsW=0.001 8 | epsB=0.002 9 | momW=0.9 10 | momB=0.9 11 | wc=0.004 12 | 13 | [conv2] 14 | epsW=0.001 15 | epsB=0.002 16 | momW=0.9 17 | momB=0.9 18 | wc=0.004 19 | 20 | [conv3] 21 | epsW=0.001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.004 26 | 27 | [fc10] 28 | epsW=0.001 29 | epsB=0.002 30 | momW=0.9 31 | momB=0.9 32 | wc=1 33 | 34 | [logprob] 35 | coeff=1 36 | -------------------------------------------------------------------------------- /example-layers/layer-params-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer parameter file 2 | # Set wc to 0 for translations -- 14.2% 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.004 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.004 17 | 18 | [conv3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [fc10] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=3 31 | 32 | [logprob] 33 | coeff=1 34 | -------------------------------------------------------------------------------- /example-layers/layer-params-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer parameter file 2 | 3 | [conv1] 4 | epsW=0.001 5 | epsB=0.002 6 | momW=0.9 7 | momB=0.9 8 | wc=0.004 9 | 10 | [conv2] 11 | epsW=0.001 12 | epsB=0.002 13 | momW=0.9 14 | momB=0.9 15 | wc=0.004 16 | 17 | [conv3] 18 | epsW=0.001 19 | epsB=0.002 20 | momW=0.9 21 | momB=0.9 22 | wc=0.004 23 | 24 | [fc64] 25 | epsW=0.001 26 | epsB=0.002 27 | momW=0.9 28 | momB=0.9 29 | wc=.03 30 | 31 | [fc10] 32 | epsW=0.001 33 | epsB=0.002 34 | momW=0.9 35 | momB=0.9 36 | wc=.03 37 | 38 | [logprob] 39 | coeff=1 40 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-12pct.cfg: -------------------------------------------------------------------------------- 1 | # 12% error on CIFAR-10 - layer parameter file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [conv1] 5 | epsW=0.00001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.00 10 | 11 | [conv2] 12 | epsW=0.00001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.00 17 | 18 | [local3] 19 | epsW=0.00001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [local4] 26 | epsW=0.00001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=0.004 31 | 32 | [fc10] 33 | epsW=0.00001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=0.004 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 - layer parameter file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [conv1] 5 | epsW=0.001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.00 10 | 11 | [conv2] 12 | epsW=0.001 13 | epsB=0.002 14 | momW=0.9 15 | momB=0.9 16 | wc=0.00 17 | 18 | [local3] 19 | epsW=0.001 20 | epsB=0.002 21 | momW=0.9 22 | momB=0.9 23 | wc=0.004 24 | 25 | [local4] 26 | epsW=0.001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=0.004 31 | 32 | [fc10] 33 | epsW=0.001 34 | epsB=0.002 35 | momW=0.9 36 | momB=0.9 37 | wc=0.004 38 | 39 | [logprob] 40 | coeff=1 41 | -------------------------------------------------------------------------------- /example-layers/layer-params-example.cfg: -------------------------------------------------------------------------------- 1 | [conv32] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [local32] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [fc1024] 16 | momW=0.9 17 | momB=0.9 18 | epsW=0.00001 19 | epsB=0.00002 20 | wc=0 21 | 22 | [conv32-2] 23 | epsW=0.001 24 | epsB=0.002 25 | momW=0.9 26 | momB=0.9 27 | wc=0 28 | 29 | [conv32-3] 30 | epsW=0.001 31 | epsB=0.002 32 | momW=0.9 33 | momB=0.9 34 | wc=0 35 | 36 | [fc10] 37 | epsW=0.0001,0.001 38 | epsB=0.002 39 | momW=0.5,0.9 40 | momB=0.9 41 | wc=0,0 42 | 43 | [logprob] 44 | coeff=1 45 | -------------------------------------------------------------------------------- /example-layers/layer-params.gc.cfg: -------------------------------------------------------------------------------- 1 | [conv32a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0 7 | 8 | [conv32b] 9 | epsW=0.001 10 | epsB=0.002 11 | momW=0.9 12 | momB=0.9 13 | wc=0 14 | 15 | [conv32c] 16 | epsW=0.001 17 | epsB=0.002 18 | momW=0.9 19 | momB=0.9 20 | wc=0 21 | 22 | [fc10] 23 | wc=0 24 | momB=0 25 | momW=0 26 | epsW=0.00001 27 | epsB=0.00002 28 | 29 | [fc16a] 30 | wc=0,0,0 31 | momB=0 32 | momW=0,0,0 33 | epsW=0.00001,0.1,0.1 34 | epsB=0.00002 35 | 36 | [fc16b] 37 | wc=0,0,0 38 | momB=0 39 | momW=0,0,0 40 | epsW=0.00001,0.1,0.1 41 | epsB=0.00002 42 | 43 | [fc16c] 44 | wc=0,0,0 45 | momB=0 46 | momW=0,0,0 47 | epsW=0.00001,0.1,0.1 48 | epsB=0.00002 49 | 50 | [logreg] 51 | coeff=1 52 | 53 | [rnorm1a] 54 | scale=0.0001 55 | pow=0.75 56 | minDiv=0.25 57 | 58 | [rnorm1b] 59 | scale=0.0001 60 | pow=0.75 61 | minDiv=0.25 62 | 63 | [rnorm1c] 64 | scale=0.0001 65 | pow=0.75 66 | minDiv=0.25 67 | -------------------------------------------------------------------------------- /example-layers/layers-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | neuron=relu 20 | initW=0.0001 21 | partialSum=4 22 | sharedBiases=1 23 | 24 | [pool1] 25 | type=pool 26 | pool=max 27 | inputs=conv1 28 | start=0 29 | sizeX=3 30 | stride=2 31 | outputsX=0 32 | channels=32 33 | 34 | [rnorm1] 35 | type=rnorm 36 | inputs=pool1 37 | channels=32 38 | sizeX=3 39 | scale=0.00005 40 | pow=.75 41 | 42 | [conv2] 43 | type=conv 44 | inputs=rnorm1 45 | filters=32 46 | padding=2 47 | stride=1 48 | filterSize=5 49 | channels=32 50 | neuron=relu 51 | initW=0.01 52 | partialSum=4 53 | sharedBiases=1 54 | 55 | [pool2] 56 | type=pool 57 | pool=avg 58 | inputs=conv2 59 | start=0 60 | sizeX=3 61 | stride=2 62 | outputsX=0 63 | channels=32 64 | 65 | [rnorm2] 66 | type=rnorm 67 | inputs=pool2 68 | channels=32 69 | sizeX=3 70 | scale=0.00005 71 | pow=.75 72 | 73 | [conv3] 74 | type=conv 75 | inputs=rnorm2 76 | filters=64 77 | padding=2 78 | stride=1 79 | filterSize=5 80 | channels=32 81 | neuron=relu 82 | initW=0.01 83 | partialSum=4 84 | sharedBiases=1 85 | 86 | [pool3] 87 | type=pool 88 | pool=avg 89 | inputs=conv3 90 | start=0 91 | sizeX=3 92 | stride=2 93 | outputsX=0 94 | channels=64 95 | 96 | [fc10] 97 | type=fc 98 | outputs=10 99 | inputs=pool3 100 | initW=0.01 101 | neuron=ident 102 | 103 | [probs] 104 | type=softmax 105 | inputs=fc10 106 | 107 | [logprob] 108 | type=cost.logreg 109 | inputs=labels,probs 110 | -------------------------------------------------------------------------------- /example-layers/layers-19pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | neuron=relu 20 | initW=0.0001 21 | partialSum=1 22 | sharedBiases=1 23 | 24 | [pool1] 25 | type=pool 26 | pool=max 27 | inputs=conv1 28 | start=0 29 | sizeX=3 30 | stride=2 31 | outputsX=0 32 | channels=32 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=1 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=1 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=pool3 84 | initW=0.01 85 | neuron=ident 86 | 87 | [probs] 88 | type=softmax 89 | inputs=fc10 90 | 91 | [logprob] 92 | type=cost.logreg 93 | inputs=labels,probs 94 | -------------------------------------------------------------------------------- /example-layers/layers-80sec.cfg: -------------------------------------------------------------------------------- 1 | # 26% error on CIFAR-10 in 80 seconds - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | neuron=relu 20 | initW=0.0001 21 | partialSum=4 22 | sharedBiases=1 23 | 24 | [pool1] 25 | type=pool 26 | pool=max 27 | inputs=conv1 28 | start=0 29 | sizeX=3 30 | stride=2 31 | outputsX=0 32 | channels=32 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=32 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=32 42 | neuron=relu 43 | initW=0.01 44 | partialSum=4 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=avg 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [conv3] 58 | type=conv 59 | inputs=pool2 60 | filters=64 61 | padding=2 62 | stride=1 63 | filterSize=5 64 | channels=32 65 | neuron=relu 66 | initW=0.01 67 | partialSum=4 68 | sharedBiases=1 69 | 70 | [pool3] 71 | type=pool 72 | pool=avg 73 | inputs=conv3 74 | start=0 75 | sizeX=3 76 | stride=2 77 | outputsX=0 78 | channels=64 79 | 80 | [fc64] 81 | type=fc 82 | outputs=64 83 | inputs=pool3 84 | initW=0.1 85 | neuron=relu 86 | 87 | [fc10] 88 | type=fc 89 | outputs=10 90 | inputs=fc64 91 | initW=0.1 92 | neuron=ident 93 | 94 | [probs] 95 | type=softmax 96 | inputs=fc10 97 | 98 | [logprob] 99 | type=cost.logreg 100 | inputs=labels,probs 101 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-12pct.cfg: -------------------------------------------------------------------------------- 1 | # 19% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=64 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | neuron=relu 20 | initW=0.0001 21 | partialSum=4 22 | sharedBiases=1 23 | 24 | [pool1] 25 | type=pool 26 | pool=max 27 | inputs=conv1 28 | start=0 29 | sizeX=3 30 | stride=2 31 | outputsX=0 32 | channels=64 33 | 34 | [conv2] 35 | type=conv 36 | inputs=pool1 37 | filters=64 38 | padding=2 39 | stride=1 40 | filterSize=5 41 | channels=64 42 | neuron=relu 43 | initW=0.01 44 | partialSum=8 45 | sharedBiases=1 46 | 47 | [pool2] 48 | type=pool 49 | pool=max 50 | inputs=conv2 51 | start=0 52 | sizeX=3 53 | stride=2 54 | outputsX=0 55 | channels=64 56 | 57 | [local3] 58 | type=local 59 | inputs=pool2 60 | filters=32 61 | padding=1 62 | stride=1 63 | filterSize=3 64 | channels=64 65 | neuron=relu 66 | initW=0.04 67 | 68 | [local4] 69 | type=local 70 | inputs=local3 71 | filters=32 72 | padding=1 73 | stride=1 74 | filterSize=3 75 | channels=32 76 | neuron=relu 77 | initW=0.04 78 | 79 | [fc10] 80 | type=fc 81 | outputs=10 82 | inputs=local4 83 | initW=0.01 84 | neuron=ident 85 | 86 | [probs] 87 | type=softmax 88 | inputs=fc10 89 | 90 | [logprob] 91 | type=cost.logreg 92 | inputs=labels,probs 93 | -------------------------------------------------------------------------------- /example-layers/layers-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [data] 5 | type=data 6 | dataIdx=0 7 | 8 | [labels] 9 | type=data 10 | dataIdx=1 11 | 12 | [conv1] 13 | type=conv 14 | inputs=data 15 | channels=3 16 | filters=64 17 | padding=2 18 | stride=1 19 | filterSize=5 20 | neuron=relu 21 | initW=0.0001 22 | partialSum=4 23 | sharedBiases=1 24 | 25 | [pool1] 26 | type=pool 27 | pool=max 28 | inputs=conv1 29 | start=0 30 | sizeX=3 31 | stride=2 32 | outputsX=0 33 | channels=64 34 | 35 | [conv2] 36 | type=conv 37 | inputs=pool1 38 | filters=64 39 | padding=2 40 | stride=1 41 | filterSize=5 42 | channels=64 43 | neuron=relu 44 | initW=0.01 45 | partialSum=8 46 | sharedBiases=1 47 | 48 | [pool2] 49 | type=pool 50 | pool=max 51 | inputs=conv2 52 | start=0 53 | sizeX=3 54 | stride=2 55 | outputsX=0 56 | channels=64 57 | 58 | [local3] 59 | type=local 60 | inputs=pool2 61 | filters=32 62 | padding=1 63 | stride=1 64 | filterSize=3 65 | channels=64 66 | neuron=relu 67 | initW=0.04 68 | 69 | [local4] 70 | type=local 71 | inputs=local3 72 | filters=32 73 | padding=1 74 | stride=1 75 | filterSize=3 76 | channels=32 77 | neuron=relu 78 | initW=0.04 79 | 80 | [fc10] 81 | type=fc 82 | outputs=10 83 | inputs=local4 84 | initW=0.01 85 | neuron=ident 86 | 87 | [probs] 88 | type=softmax 89 | inputs=fc10 90 | 91 | [logprob] 92 | type=cost.logreg 93 | inputs=labels,probs 94 | -------------------------------------------------------------------------------- /example-layers/layers-example.cfg: -------------------------------------------------------------------------------- 1 | # This is a layer configuration file that contains all the 2 | # layer types supported by this code. It's not actually good for anything 3 | # other than demonstrating how layers are specified and connected to one another. 4 | 5 | # Note: this file has gotten so big that the resultant net will not run on anything short of a 3GB GTX 580. 6 | # But there's no particular reason to run the net specified by this file. It's not actually good. 7 | 8 | [data] 9 | type=data 10 | dataIdx=0 11 | 12 | [labels] 13 | type=data 14 | dataIdx=1 15 | 16 | [conv32] 17 | type=conv 18 | inputs=data 19 | channels=3 20 | filters=32 21 | padding=4 22 | stride=1 23 | filterSize=9 24 | neuron=logistic 25 | initW=0.00001 26 | partialSum=1 27 | sharedBiases=true 28 | 29 | [local32] 30 | type=local 31 | inputs=conv32 32 | channels=32 33 | filters=32 34 | padding=4 35 | stride=1 36 | filterSize=9 37 | neuron=logistic 38 | initW=0.00001 39 | 40 | [fc1024] 41 | type=fc 42 | outputs=1024 43 | inputs=data 44 | initW=0.001 45 | neuron=relu 46 | 47 | [maxpool] 48 | type=pool 49 | pool=max 50 | inputs=local32 51 | start=0 52 | sizeX=4 53 | stride=2 54 | outputsX=0 55 | channels=32 56 | 57 | [rnorm1] 58 | type=rnorm 59 | inputs=maxpool 60 | channels=32 61 | sizeX=5 62 | scale=0.0000125 63 | pow=0.75 64 | 65 | [cnorm1] 66 | type=cnorm 67 | inputs=rnorm1 68 | channels=32 69 | sizeX=7 70 | scale=0.001 71 | pow=0.5 72 | 73 | [conv32-2] 74 | type=conv 75 | inputs=cnorm1 76 | groups=4 77 | channels=32 78 | filters=32 79 | padding=2 80 | stride=1 81 | filterSize=5 82 | neuron=relu 83 | initW=0.0001 84 | partialSum=1 85 | sharedBiases=false 86 | 87 | [conv32-3] 88 | type=conv 89 | inputs=conv32-2 90 | groups=4 91 | channels=128 92 | filters=32 93 | padding=2 94 | stride=2 95 | filterSize=5 96 | neuron=relu 97 | initW=0.0001 98 | partialSum=1 99 | randSparse=true 100 | filterChannels=64 101 | 102 | [fc10] 103 | type=fc 104 | outputs=10 105 | inputs=conv32-3,fc1024 106 | initW=0.0001,0.0001 107 | neuron=ident 108 | 109 | [probs] 110 | type=softmax 111 | inputs=fc10 112 | 113 | [logprob] 114 | type=cost.logreg 115 | inputs=labels,probs 116 | -------------------------------------------------------------------------------- /example-layers/layers.gc.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv32a] 10 | type=conv 11 | inputs=data 12 | filters=16 13 | padding=0 14 | stride=1 15 | filterSize=3 16 | channels=3 17 | neuron=relu 18 | initW=0.3 19 | initB=1 20 | partialSum=1 21 | sharedBiases=true 22 | gpu=0 23 | 24 | [conv32b] 25 | type=conv 26 | inputs=data 27 | filters=16 28 | padding=0 29 | stride=1 30 | filterSize=3 31 | channels=3 32 | neuron=relu 33 | initW=0.3 34 | initB=1 35 | partialSum=1 36 | sharedBiases=true 37 | gpu=1 38 | 39 | [conv32c] 40 | type=conv 41 | inputs=data 42 | filters=16 43 | padding=0 44 | stride=1 45 | filterSize=3 46 | channels=3 47 | neuron=relu 48 | initW=0.3 49 | initB=1 50 | partialSum=1 51 | sharedBiases=true 52 | gpu=2 53 | 54 | [rnorm1a] 55 | type=cmrnorm 56 | inputs=conv32a 57 | channels=16 58 | size=5 59 | 60 | [rnorm1b] 61 | type=cmrnorm 62 | inputs=conv32b 63 | channels=16 64 | size=5 65 | 66 | [rnorm1c] 67 | type=cmrnorm 68 | inputs=conv32c 69 | channels=16 70 | size=5 71 | 72 | [fc16a] 73 | type=fc 74 | outputs=16 75 | inputs=rnorm1a,rnorm1b,rnorm1c 76 | initW=0.1,0.1,0.1 77 | gpu=0 78 | 79 | [fc16b] 80 | type=fc 81 | outputs=16 82 | inputs=rnorm1b,rnorm1c,rnorm1a 83 | initW=0.1,0.1,0.1 84 | gpu=1 85 | 86 | [fc16c] 87 | type=fc 88 | outputs=16 89 | inputs=rnorm1c,rnorm1a,rnorm1a 90 | initW=0.1,0.1,0.1 91 | gpu=2 92 | 93 | [concat] 94 | type=concat 95 | inputs=fc16a,fc16c,fc16b 96 | 97 | [fc10] 98 | type=fc 99 | inputs=concat 100 | outputs=10 101 | initW=0.08 102 | gpu=0 103 | 104 | [probs] 105 | type=softmax 106 | inputs=fc10 107 | gpu=0 108 | 109 | [logreg] 110 | type=cost.logreg 111 | inputs=labels,probs 112 | gpu=0 113 | -------------------------------------------------------------------------------- /findsimilar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from getopt import getopt 4 | import numpy as n 5 | import numpy.random as nr 6 | from time import time 7 | from util import * 8 | import pylab as pl 9 | import gc 10 | 11 | imnet_dir = '/storage2/imnet-contest' 12 | ftr_dir = '/storage2/imnet-features-4096' 13 | 14 | TEST_IMGS = 128 15 | TOP_IMGS = 16 16 | TEST_BATCH = 'data_batch_3000' 17 | 18 | IMG_SIZE = 256 19 | IMGS_PER_FIGURE = 16 20 | 21 | def draw_fig(test_imgs, tops): 22 | for f in xrange(TEST_IMGS/IMGS_PER_FIGURE): 23 | 24 | pl.figure(f+1, figsize=(15,15)) 25 | pl.clf() 26 | bigpic = n.zeros((3, (IMG_SIZE+1)*IMGS_PER_FIGURE - 1, (IMG_SIZE+1)*(1+TOP_IMGS) + 3), dtype=n.single) 27 | for i in xrange(IMGS_PER_FIGURE): 28 | img_idx = f * IMGS_PER_FIGURE + i 29 | bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,:IMG_SIZE] = test_imgs[:,img_idx].reshape(3, IMG_SIZE, IMG_SIZE) 30 | for j in xrange(TOP_IMGS): 31 | if tops[img_idx][j]['img'] is not None: 32 | bigpic[:, (IMG_SIZE+1) * i:(IMG_SIZE+1)*i+IMG_SIZE,IMG_SIZE + 4 + j*(IMG_SIZE+1):IMG_SIZE + 4 + j*(IMG_SIZE+1)+IMG_SIZE] = tops[img_idx][j]['img'].reshape(3, IMG_SIZE, IMG_SIZE) 33 | bigpic /= 255 34 | pl.imshow(bigpic.swapaxes(0,1).swapaxes(1,2), interpolation='lanczos') 35 | 36 | if __name__ == "__main__": 37 | (options, args) = getopt(sys.argv[1:], "") 38 | options = dict(options) 39 | 40 | # Take 128 images from test batch 41 | dic = unpickle(os.path.join(ftr_dir, TEST_BATCH)) 42 | p = nr.permutation(dic['data'].shape[0])[:TEST_IMGS] 43 | data = dic['data'][p,:] 44 | labels = dic['labels'][:,p] 45 | dicimgs = unpickle(os.path.join(imnet_dir, TEST_BATCH)) 46 | test_imgs = dicimgs['data'][:,p] 47 | 48 | tops = [[{'dist': n.inf, 'batch': 0, 'idx': 0, 'img': None} for i in xrange(TOP_IMGS)] for j in xrange(TEST_IMGS)] 49 | 50 | pl.ion() 51 | for b in xrange(1, 1335): 52 | dic = unpickle(os.path.join(ftr_dir, 'data_batch_%d' % b)) 53 | dicimgs = unpickle(os.path.join(imnet_dir, 'data_batch_%d' % b)) 54 | t = time() 55 | dists = [n.sum((data[i,:] - dic['data'])**2, axis=1) for i in xrange(TEST_IMGS)] 56 | minidx = [d.argmin() for d in dists] 57 | print dists[0].shape 58 | for i, dist, midx, top in zip(xrange(TEST_IMGS), dists, minidx, tops): 59 | k = TOP_IMGS 60 | while k > 0 and dist[midx] < top[k - 1]['dist']: 61 | k -= 1 62 | if k < TOP_IMGS: 63 | top.insert(k, {'dist': dist[midx], 'batch': b, 'idx': midx, 'img': dicimgs['data'][:,midx].copy()}) 64 | top.pop() 65 | #print top 66 | del dic 67 | del dicimgs 68 | del dists 69 | del minidx 70 | gc.collect() 71 | #print tops 72 | print "Finished training batch %d (%f sec)" % (b, time() - t) 73 | if b % 50 == 0: 74 | draw_fig(test_imgs, tops) 75 | pl.draw() 76 | pl.ioff() 77 | draw_fig(test_imgs, tops) 78 | pl.show() 79 | -------------------------------------------------------------------------------- /fix-big-imgnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from PIL import Image 4 | from StringIO import StringIO 5 | from util import * 6 | 7 | src = '/ais/gobi3/u/ilya/jpg_valid_2010_85/' 8 | dst = '/ais/gobi3/u/kriz/lsvrc-2010-jpg/' 9 | 10 | BATCH_SIZE = 1024 11 | 12 | def save_batch(c_strings, c_labels, c_wnids, out_b): 13 | pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_labels, c_wnids)) 14 | 15 | return out_b + 1 16 | if __name__ == "__main__": 17 | c_strings = [] 18 | c_labels = [] 19 | c_wnids = [] 20 | out_b = 2000 21 | for b in xrange(49): 22 | failed = 0 23 | strings, sizes, labels = unpickle(os.path.join(src, '%s' % b)) 24 | for s,l in zip(strings, labels): 25 | try: 26 | im = Image.open(StringIO(s)).convert('RGB') 27 | c_strings += [s] 28 | c_labels += [l[1]] 29 | c_wnids += [l[0]] 30 | if len(c_strings) == BATCH_SIZE: 31 | out_b = save_batch(c_strings, c_labels, c_wnids, out_b) 32 | c_strings = [] 33 | c_labels = [] 34 | c_wnids = [] 35 | except IOError,e: 36 | failed += 1 37 | print "Batch %d failed: %d" % (b, failed) 38 | 39 | if len(c_strings) > 0: 40 | save_batch(c_strings, c_labels, c_wnids, out_b) 41 | -------------------------------------------------------------------------------- /fix-flickr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from PIL import Image 4 | from StringIO import StringIO 5 | from util import * 6 | 7 | src = '/ais/gobi3/u/ilya/flickr_85/' 8 | dst = '/ais/gobi3/u/kriz/flickr-85-1024/' 9 | 10 | BATCH_SIZE = 2048 11 | 12 | def save_batch(c_strings, c_sizes, c_labels, out_b): 13 | pickle(os.path.join(dst, 'data_batch_%d' % out_b), (c_strings, c_sizes, c_labels)) 14 | 15 | return out_b + 1 16 | if __name__ == "__main__": 17 | c_strings = [] 18 | c_sizes = [] 19 | c_labels = [] 20 | out_b = 1 21 | for b in xrange(977): 22 | failed = 0 23 | strings, sizes, labels = unpickle(os.path.join(src, '%s' % b)) 24 | for s,z,l in zip(strings, sizes, labels): 25 | try: 26 | im = Image.open(StringIO(s)).convert('RGB') 27 | c_strings += [s] 28 | c_sizes += [z] 29 | c_labels += [l] 30 | 31 | if len(c_strings) == BATCH_SIZE: 32 | out_b = save_batch(c_strings, c_sizes, c_labels, out_b) 33 | c_strings = [] 34 | c_sizes = [] 35 | c_labels = [] 36 | except IOError,e: 37 | failed += 1 38 | print "Batch %d failed: %d" % (b, failed) 39 | 40 | if len(c_strings) > 0: 41 | save_batch(c_strings, c_sizes, c_labels, out_b) 42 | -------------------------------------------------------------------------------- /gen-py-interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import os 4 | 5 | MODEL_CONSTRUCTOR = """ConvNet::ConvNet(PyListObject* layerParams, int minibatchSize, int deviceID)""" 6 | 7 | pytype_mappings = {"float": "", 8 | "int": "", 9 | "bool":"", 10 | "PyListObject": "PyList_Type"} 11 | argstring_mappings = {"float": "d", 12 | "bool":"i", 13 | "int": "i"} 14 | init_type_mappings = {"float": "double", 15 | "int": "int", 16 | "bool":"int", 17 | "PyListObject": "PyListObject*"} 18 | 19 | if __name__ == "__main__": 20 | m = re.match(r"^(\w+)::\w+\((.*)\)$", MODEL_CONSTRUCTOR, re.MULTILINE | re.DOTALL) 21 | model_name = m.group(1) 22 | model_params = m.group(2).split(',') 23 | 24 | template = "" 25 | with open('./pyInterface.cutemp', 'r') as f: 26 | template = ''.join(line for line in f) 27 | template = template.replace("${MODEL_NAME}", model_name) 28 | template = template.replace("${MODEL_NAME_LOWER}", model_name.lower()) 29 | 30 | init_vars = "" 31 | init_parse = "" 32 | arg_string = "" 33 | model_preamble = "" 34 | model_start = " model = new %s(" % model_name 35 | space_padding = len(model_start) 36 | numVectors = 0 37 | for i,p in enumerate(model_params): 38 | param = p.strip().split(' ') 39 | ptype = re.match("^([\w<>\*]+)", param[0]).group(1).strip('*') 40 | pname = param[1].strip('*') 41 | pname = "py" + pname[0].upper() + pname[1:] 42 | if ptype not in pytype_mappings: 43 | print "Unknown type: %s" % ptype 44 | sys.exit(1) 45 | mapping = pytype_mappings[ptype] 46 | if mapping == "": 47 | arg_string += argstring_mappings[ptype] 48 | init_parse += " &%s" % pname 49 | else: 50 | arg_string += "O!" 51 | init_parse += " &%s, &%s" % (mapping, pname) 52 | 53 | model_start += "%*s%s" % (space_padding * (i>0), "", pname) 54 | 55 | if i < len(model_params) - 1: 56 | init_parse += ",\n" 57 | model_start += ",\n" 58 | init_vars += " %s %s;\n" % (init_type_mappings[ptype], pname) 59 | model_start += ");\n" 60 | template = template.replace("${INIT_VARS}", init_vars) 61 | template = template.replace("${INIT_PARSE}", init_parse) 62 | template = template.replace("${ARG_STRING}", arg_string) 63 | template = template.replace("${MODEL_START}", model_preamble + model_start) 64 | 65 | print template 66 | -------------------------------------------------------------------------------- /include/cost.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef COST_CUH 28 | #define COST_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include "layer.cuh" 35 | #include "util.cuh" 36 | 37 | class CostLayer; 38 | 39 | /* 40 | * Wrapper for dictionary mapping cost name to vector of returned values. 41 | */ 42 | class Cost { 43 | private: 44 | int _numCases; 45 | CostMap _costMap; 46 | CostCoeffMap _costCoeffMap; 47 | public: 48 | Cost(int numCases); 49 | Cost(int numCases, std::vector& costs); 50 | doublev& operator [](const std::string s); 51 | CostMap& getCostMap(); 52 | CostCoeffMap& getCostCoeffMap(); 53 | int getNumCases(); 54 | /* 55 | * Returns sum of first values returned by all the costs, weighted by the cost coefficients. 56 | */ 57 | double getValue(); 58 | Cost& operator += (Cost& er); 59 | Cost& operator |= (Cost& er); 60 | Cost& operator /= (const double v); 61 | virtual ~Cost(); 62 | }; 63 | 64 | 65 | #endif /* COST_CUH */ 66 | 67 | -------------------------------------------------------------------------------- /include/cpuCNN.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File: cpuFuncs.h 3 | * Author: Alex Krizhevsky 4 | * 5 | * Created on September 10, 2012, 5:05 PM 6 | */ 7 | 8 | #ifndef CPUFUNCS_H 9 | #define CPUFUNCS_H 10 | #include 11 | #include 12 | /* 13 | * weights: (numNodes, numFeatures) 14 | * nodes: numNodesAtDepth-length array of ushort2 15 | * where x coordinate gives node idx and y coordinate gives parent idx 16 | * targets: (numNodes, numFeatures) 17 | * 18 | */ 19 | void cpuSoftmaxTreeFwd(float* weights, float* targets, const int numFeatures, SoftmaxTree& tree); 20 | 21 | /* 22 | * grads: (numNodes, numFeatures) 23 | * 24 | */ 25 | void cpuSoftmaxTreeBwd(float* grads, const int numFeatures, SoftmaxTree& tree); 26 | 27 | void cpuSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad, 28 | const int numFeatures, float eps, const float mom, float wc, SoftmaxTree& tree); 29 | 30 | #endif /* CPUFUNCS_H */ 31 | 32 | -------------------------------------------------------------------------------- /include/hostmem.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef HOSTMEM_CUH 28 | #define HOSTMEM_CUH 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | /* 35 | * A utility class for transferring untyped memory from CPU to GPU and vice versa. 36 | */ 37 | class PinnedHostMem { 38 | protected: 39 | uint _numBytes; 40 | void* _data; 41 | public: 42 | PinnedHostMem(); 43 | ~PinnedHostMem(); 44 | void resize(uint bytes); 45 | void copyFrom(void* src, uint bytes); 46 | void copyTo(void* dst); 47 | void* getData(); 48 | }; 49 | 50 | #endif /* HOSTMEM_CUH */ 51 | 52 | -------------------------------------------------------------------------------- /include/lr.cuh: -------------------------------------------------------------------------------- 1 | #ifndef LR_CUH 2 | #define LR_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | * The maximum learning rate is _baseRate. 16 | * The minimum learning rate is _baseRate / _tgtFactor. 17 | * 18 | * These classes define annealing schedules that interpolate between these 19 | * two extrema. 20 | */ 21 | class LearningRateSchedule { 22 | protected: 23 | double _baseRate, _noiseStdev, _randnSpare; 24 | bool _haveRandnSpare; 25 | virtual double _getRate(double progress); 26 | double randn(); 27 | double rand() const; 28 | double abs(double x) const; 29 | public: 30 | LearningRateSchedule(double base); 31 | LearningRateSchedule(double base, double noiseStdev); 32 | double getRate(double progress); 33 | double getBaseRate() const; 34 | virtual ~LearningRateSchedule(); 35 | 36 | static LearningRateSchedule& make(PyObject* lrsDict, double base); 37 | }; 38 | 39 | class LinearLRS : public LearningRateSchedule { 40 | protected: 41 | double _finalRate; 42 | public: 43 | LinearLRS(double base, double tgtFactor, double noiseStdev); 44 | virtual double _getRate(double progress); 45 | }; 46 | 47 | class ExpLRS : public LearningRateSchedule { 48 | protected: 49 | double _pow; 50 | public: 51 | ExpLRS(double baseRate, double tgtFactor, double noiseStdev); 52 | virtual double _getRate(double progress); 53 | }; 54 | 55 | class TanhLRS : public LearningRateSchedule { 56 | protected: 57 | double _alpha, _beta; 58 | public: 59 | TanhLRS(double baseRate, double tgtFactor, double noiseStdev); 60 | virtual double _getRate(double progress); 61 | }; 62 | 63 | class DiscreteExpLRS : public LearningRateSchedule { 64 | protected: 65 | std::vector _rates; 66 | public: 67 | DiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps); 68 | virtual double _getRate(double progress); 69 | }; 70 | 71 | class JumpyDiscreteExpLRS : public DiscreteExpLRS { 72 | public: 73 | JumpyDiscreteExpLRS(double baseRate, double tgtFactor, double noiseStdev, int numSteps); 74 | virtual double _getRate(double progress); 75 | }; 76 | 77 | #endif /* LR_CUH */ 78 | -------------------------------------------------------------------------------- /include/multisoftmax.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: multisoftmax.h 3 | * Author: Alex Krizhevsky 4 | * 5 | * Created on May 9, 2012, 5:36 PM 6 | */ 7 | 8 | #ifndef MULTISOFTMAX_H 9 | #define MULTISOFTMAX_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef DIVUP 17 | #define DIVUP(x, y) (((x) + (y) - 1) / (y)) 18 | #endif 19 | 20 | #define EXP exp 21 | #define LOG log 22 | #define INF 1e35f 23 | 24 | class MultiSoftmaxWorker : public Thread { 25 | protected: 26 | Matrix* _elts, *_B, *_probs, *_fixed; 27 | int _size; 28 | bool _nofix; 29 | void* run(); 30 | public: 31 | MultiSoftmaxWorker(Matrix* elts, Matrix* B, Matrix* probs, Matrix* _fixed, int size, bool nofix); 32 | virtual ~MultiSoftmaxWorker(); 33 | }; 34 | 35 | void MultiSoftmaxCPU_T_parallel(Matrix& elts, std::vector& B, Matrix& probs, Matrix& fixed, int size, bool nofix); 36 | 37 | #endif /* MULTISOFTMAX_H */ 38 | 39 | -------------------------------------------------------------------------------- /include/pyconvnet.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef PYCONVNET3_CUH 28 | #define PYCONVNET3_CUH 29 | 30 | #define _QUOTEME(x) #x 31 | #define QUOTEME(x) _QUOTEME(x) 32 | 33 | extern "C" void INITNAME(); 34 | 35 | PyObject* initModel(PyObject *self, PyObject *args); 36 | PyObject* startBatch(PyObject *self, PyObject *args); 37 | PyObject* finishBatch(PyObject *self, PyObject *args); 38 | PyObject* checkGradients(PyObject *self, PyObject *args); 39 | PyObject* syncWithHost(PyObject *self, PyObject *args); 40 | PyObject* startMultiviewTest(PyObject *self, PyObject *args); 41 | PyObject* startFeatureWriter(PyObject *self, PyObject *args); 42 | PyObject* startDataGrad(PyObject *self, PyObject *args); 43 | #endif 44 | -------------------------------------------------------------------------------- /include/quantizer.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * quantizer.cuh 3 | * 4 | * Created on: 2013-02-15 5 | * Author: spoon 6 | */ 7 | 8 | #ifndef QUANTIZER_CUH_ 9 | #define QUANTIZER_CUH_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | class Quantizer { 18 | protected: 19 | NVMatrix* _quantized; 20 | int _numRows, _numCols; 21 | bool _trans; 22 | virtual void _quantize(NVMatrix& src, NVMatrix& tgt); 23 | virtual void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput); 24 | public: 25 | Quantizer(); 26 | virtual ~Quantizer(); 27 | void quantize(NVMatrix& src, NVMatrix& tgt); 28 | void dequantize(NVMatrix& tgt); 29 | void dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput); 30 | 31 | static Quantizer& make(PyObject* qDict); 32 | }; 33 | 34 | class HalfQuantizer : public Quantizer { 35 | protected: 36 | void _quantize(NVMatrix& src, NVMatrix& tgt); 37 | void _dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput); 38 | public: 39 | HalfQuantizer(); 40 | }; 41 | 42 | 43 | #endif /* QUANTIZER_CUH_ */ 44 | -------------------------------------------------------------------------------- /initw.py: -------------------------------------------------------------------------------- 1 | from gpumodel import * 2 | import numpy as n 3 | import numpy.random as nr 4 | 5 | def get_src(): 6 | src = IGPUModel.load_checkpoint('/nobackup/kriz/tmp/ConvNet__2012-09-19_23.29.04') 7 | return src['model_state']['layers'] 8 | 9 | def makew(name, idx, shapes, params): 10 | src, src_layer = get_src(), params[0] 11 | if name == 'localcombine' and idx == 2: 12 | return n.array(0.01 * nr.randn(shapes[0], shapes[1]), dtype=n.single, order='C') 13 | return src[src_layer]['weights'][idx] 14 | 15 | def makeb(name, shapes, params): 16 | src, src_layer = get_src(), params[0] 17 | return src[src_layer]['biases'] 18 | 19 | def makec(name, idx, shapes, params): 20 | src, src_layer = get_src(), params[0] 21 | return src[src_layer]['filterConns'][idx] 22 | -------------------------------------------------------------------------------- /layers-cifar/layer-params-18pct-noisylr.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | # Reduce all learning rates by factor of 10 after 120 epochs. 4 | # Then another factor of 10 after 10 more epochs. 5 | 6 | [conv1] 7 | epsW=0.001 8 | epsB=0.002 9 | momW=0.9 10 | momB=0.9 11 | wc=0.004 12 | schedW=linear[1,1] 13 | 14 | [conv2] 15 | epsW=0.001 16 | epsB=0.002 17 | momW=0.9 18 | momB=0.9 19 | wc=0.004 20 | schedW=linear[1,1] 21 | 22 | [conv3] 23 | epsW=0.001 24 | epsB=0.002 25 | momW=0.9 26 | momB=0.9 27 | wc=0.004 28 | schedW=linear[1,1] 29 | 30 | [fc10] 31 | epsW=0.001 32 | epsB=0.002 33 | momW=0.9 34 | momB=0.9 35 | wc=1 36 | schedW=linear[1,1] 37 | 38 | [logprob] 39 | coeff=1 40 | 41 | [rnorm1] 42 | scale=0.00005 43 | pow=.75 44 | 45 | [rnorm2] 46 | scale=0.00005 47 | pow=.75 48 | -------------------------------------------------------------------------------- /layers-cifar/layer-params-conv-local-13pct-noisylr.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 - layer parameter file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [conv1] 5 | epsW=0.00001 6 | epsB=0.002 7 | momW=0.9 8 | momB=0.9 9 | wc=0.00 10 | schedW=linear[1,1] 11 | 12 | [conv2] 13 | epsW=0.00001 14 | epsB=0.002 15 | momW=0.9 16 | momB=0.9 17 | wc=0.00 18 | schedW=linear[1,1] 19 | 20 | [local3] 21 | epsW=0.00001 22 | epsB=0.002 23 | momW=0.9 24 | momB=0.9 25 | wc=0.004 26 | schedW=linear[1,1] 27 | 28 | [local4] 29 | epsW=0.00001 30 | epsB=0.002 31 | momW=0.9 32 | momB=0.9 33 | wc=0.004 34 | schedW=linear[1,1] 35 | 36 | [fc10] 37 | epsW=0.00001 38 | epsB=0.002 39 | momW=0.9 40 | momB=0.9 41 | wc=0.004 42 | schedW=linear[1,1] 43 | 44 | [logprob] 45 | coeff=1 46 | -------------------------------------------------------------------------------- /layers-cifar/layers-18pct.cfg: -------------------------------------------------------------------------------- 1 | # 18% error on CIFAR-10 in 20 minutes - layer definition file 2 | 3 | [data] 4 | type=data 5 | dataIdx=0 6 | 7 | [labels] 8 | type=data 9 | dataIdx=1 10 | 11 | [conv1] 12 | type=conv 13 | inputs=data 14 | channels=3 15 | filters=32 16 | padding=2 17 | stride=1 18 | filterSize=5 19 | initW=0.0001 20 | partialSum=4 21 | sharedBiases=1 22 | gpu=0 23 | 24 | [pool1] 25 | type=pool 26 | pool=max 27 | inputs=conv1 28 | start=0 29 | sizeX=3 30 | stride=2 31 | outputsX=0 32 | channels=32 33 | neuron=relu 34 | 35 | [rnorm1] 36 | type=rnorm 37 | inputs=pool1 38 | channels=32 39 | size=3 40 | 41 | [conv2] 42 | type=conv 43 | inputs=rnorm1 44 | filters=32 45 | padding=2 46 | stride=1 47 | filterSize=5 48 | channels=32 49 | neuron=relu 50 | initW=0.01 51 | partialSum=4 52 | sharedBiases=1 53 | 54 | [pool2] 55 | type=pool 56 | pool=avg 57 | inputs=conv2 58 | start=0 59 | sizeX=3 60 | stride=2 61 | outputsX=0 62 | channels=32 63 | 64 | [rnorm2] 65 | type=rnorm 66 | inputs=pool2 67 | channels=32 68 | size=3 69 | 70 | [conv3] 71 | type=conv 72 | inputs=rnorm2 73 | filters=64 74 | padding=2 75 | stride=1 76 | filterSize=5 77 | channels=32 78 | neuron=relu 79 | initW=0.01 80 | partialSum=4 81 | sharedBiases=1 82 | 83 | [pool3] 84 | type=pool 85 | pool=avg 86 | inputs=conv3 87 | start=0 88 | sizeX=3 89 | stride=2 90 | outputsX=0 91 | channels=64 92 | 93 | [fc10] 94 | type=fc 95 | outputs=10 96 | inputs=pool3 97 | initW=0.01 98 | 99 | [probs] 100 | type=softmax 101 | inputs=fc10 102 | 103 | [logprob] 104 | type=cost.logreg 105 | inputs=labels,probs 106 | gpu=0 107 | -------------------------------------------------------------------------------- /layers-cifar/layers-conv-local-13pct.cfg: -------------------------------------------------------------------------------- 1 | # 13% error on CIFAR-10 in 20 minutes - layer definition file 2 | # See methodology: http://code.google.com/p/cuda-convnet/wiki/Methodology 3 | 4 | [data] 5 | type=data 6 | dataIdx=0 7 | 8 | [labels] 9 | type=data 10 | dataIdx=1 11 | 12 | [conv1] 13 | type=conv 14 | inputs=data 15 | channels=3 16 | filters=64 17 | padding=2 18 | stride=1 19 | filterSize=5 20 | neuron=relu 21 | initW=0.0001 22 | partialSum=4 23 | sharedBiases=1 24 | gpu=0 25 | 26 | [pool1] 27 | type=pool 28 | pool=max 29 | inputs=conv1 30 | start=0 31 | sizeX=3 32 | stride=2 33 | outputsX=0 34 | channels=64 35 | 36 | [conv2] 37 | type=conv 38 | inputs=pool1 39 | filters=64 40 | padding=2 41 | stride=1 42 | filterSize=5 43 | channels=64 44 | neuron=relu 45 | initW=0.01 46 | partialSum=8 47 | sharedBiases=1 48 | 49 | [pool2] 50 | type=pool 51 | pool=max 52 | inputs=conv2 53 | start=0 54 | sizeX=3 55 | stride=2 56 | outputsX=0 57 | channels=64 58 | 59 | [local3] 60 | type=local 61 | inputs=pool2 62 | filters=32 63 | padding=1 64 | stride=1 65 | filterSize=3 66 | channels=64 67 | neuron=relu 68 | initW=0.04 69 | 70 | [local4] 71 | type=local 72 | inputs=local3 73 | filters=32 74 | padding=1 75 | stride=1 76 | filterSize=3 77 | channels=32 78 | neuron=relu 79 | initW=0.04 80 | 81 | [fc10] 82 | type=fc 83 | outputs=10 84 | inputs=local4 85 | initW=0.01 86 | neuron=ident 87 | 88 | [probs] 89 | type=softmax 90 | inputs=fc10 91 | 92 | [logprob] 93 | type=cost.logreg 94 | inputs=labels,probs 95 | gpu=0 96 | -------------------------------------------------------------------------------- /layers/layer-params-100.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0 16 | 17 | [conv2a] 18 | epsW=0.001,0.001,0.001 19 | epsB=0.002 20 | momW=0.9,0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005,0.0005 23 | wball=0,0,0 24 | 25 | [conv2b] 26 | epsW=0.001,0.001,0.001 27 | epsB=0.002 28 | momW=0.9,0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005,0.0005 31 | wball=0,0,0 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.001,0.001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.001,0.001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.001,0.001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.001,0.001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.001,0.001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.001 139 | pow=0.25 140 | 141 | [rnorm1b] 142 | scale=0.001 143 | pow=0.25 144 | 145 | [rnorm2a] 146 | scale=0.001 147 | pow=0.25 148 | 149 | [rnorm2b] 150 | scale=0.001 151 | pow=0.25 152 | 153 | # on guppy7 154 | # this is like #97 (on gpu) but with different rnorm coeffs 155 | # /nobackup/kriz/tmp/ConvNet__2012-06-27_14.03.18 156 | # epoch 15: set epsw to 0.001 from 0.01 157 | # epoch 43: killed, seems slightly worse than using my old rnorm coeffs 158 | -------------------------------------------------------------------------------- /layers/layer-params-107.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | wcnorm=0.00 9 | wcNormMin=0.001 10 | wcNormMax=0.002 11 | 12 | [conv1b] 13 | epsW=0.01 14 | epsB=0.002 15 | momW=0.9 16 | momB=0.9 17 | wc=0.0005 18 | wball=0.00 19 | wcnorm=0.00 20 | wcNormMin=0.001 21 | wcNormMax=0.002 22 | 23 | [conv2a] 24 | epsW=0.01,0.01 25 | epsB=0.002 26 | momW=0.9,0.9 27 | momB=0.9 28 | wc=0.0005,0.0005 29 | wball=0.00,0.00 30 | wcNormMin=0.001,0 31 | wcNormMax=0.002,0 32 | 33 | [conv2b] 34 | epsW=0.01,0.01 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0.00,0.00 40 | wcNormMin=0.001,0 41 | wcNormMax=0.002,0 42 | 43 | [conv3a] 44 | epsW=0.01,0.01 45 | epsB=0.002 46 | momW=0.9,0.9 47 | momB=0.9 48 | wc=0.0005,0.0005 49 | wball=0,0 50 | 51 | [conv3b] 52 | epsW=0.01,0.01 53 | epsB=0.002 54 | momW=0.9,0.9 55 | momB=0.9 56 | wc=0.0005,0.0005 57 | wball=0,0 58 | 59 | [conv4a] 60 | epsW=0.01 61 | epsB=0.002 62 | momW=0.9 63 | momB=0.9 64 | wc=0.0005 65 | wball=0 66 | 67 | [conv4b] 68 | epsW=0.01 69 | epsB=0.002 70 | momW=0.9 71 | momB=0.9 72 | wc=0.0005 73 | wball=0 74 | 75 | [conv5a] 76 | epsW=0.01 77 | epsB=0.002 78 | momW=0.9 79 | momB=0.9 80 | wc=0.0005 81 | wball=0 82 | 83 | [conv5b] 84 | epsW=0.01 85 | epsB=0.002 86 | momW=0.9 87 | momB=0.9 88 | wc=0.0005 89 | wball=0 90 | 91 | [fc2048a] 92 | epsW=0.01,0.01 93 | epsB=0.002 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | 99 | [fc2048b] 100 | epsW=0.01,0.01 101 | epsB=0.002 102 | momW=0.9,0.9 103 | momB=0.9 104 | wc=0.0005,0.0005 105 | wball=0,0 106 | 107 | [fc2048ba] 108 | epsW=0.01,0.01 109 | epsB=0.002 110 | momW=0.9,0.9 111 | momB=0.9 112 | wc=0.0005,0.0005 113 | wball=0,0 114 | 115 | [fc2048bb] 116 | epsW=0.01,0.01 117 | epsB=0.002 118 | momW=0.9,0.9 119 | momB=0.9 120 | wc=0.0005,0.0005 121 | wball=0,0 122 | 123 | [fc1000] 124 | epsW=0.01,0.01 125 | epsB=0.002 126 | momW=0.9,0.9 127 | momB=0.9 128 | wc=0.0005,0.0005 129 | wball=0,0 130 | 131 | [logprob] 132 | coeff=1 133 | topk=5 134 | 135 | [hs1a] 136 | enable=true 137 | 138 | [hs2a] 139 | enable=true 140 | 141 | [hs1b] 142 | enable=true 143 | 144 | [hs2b] 145 | enable=true 146 | 147 | [rnorm1a] 148 | scale=0.0001 149 | pow=0.75 150 | 151 | [rnorm1b] 152 | scale=0.0001 153 | pow=0.75 154 | 155 | [rnorm2a] 156 | scale=0.0001 157 | pow=0.75 158 | 159 | [rnorm2b] 160 | scale=0.0001 161 | pow=0.75 162 | 163 | # this is like #101 but uses wcnorm in conv1/conv2. so it uses def file #101. 164 | # it's also like #104, but #104 only does wcnorm on conv2 165 | # on guppy7 166 | # logs/layers-107.log 167 | # /nobackup/kriz/tmp/ConvNet__2012-07-09_19.20.14 168 | -------------------------------------------------------------------------------- /layers/layer-params-109.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | wcnorm=0.00 9 | 10 | [conv1b] 11 | epsW=0.001 12 | epsB=0.002 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | wcnorm=0.00 18 | 19 | [conv2a] 20 | epsW=0.001,0.001 21 | epsB=0.002 22 | momW=0.9,0.9 23 | momB=0.9 24 | wc=0.0005,0.0005 25 | wball=0.00,0.00 26 | wcNormMin=0.001,0 27 | wcNormMax=0.002,0 28 | 29 | [conv2b] 30 | epsW=0.001,0.001 31 | epsB=0.002 32 | momW=0.9,0.9 33 | momB=0.9 34 | wc=0.0005,0.0005 35 | wball=0.00,0.00 36 | wcNormMin=0.001,0 37 | wcNormMax=0.002,0 38 | 39 | [conv3a] 40 | epsW=0.001,0.001 41 | epsB=0.002 42 | momW=0.9,0.9 43 | momB=0.9 44 | wc=0.0005,0.0005 45 | wball=0,0 46 | 47 | [conv3b] 48 | epsW=0.001,0.001 49 | epsB=0.002 50 | momW=0.9,0.9 51 | momB=0.9 52 | wc=0.0005,0.0005 53 | wball=0,0 54 | 55 | [conv4a] 56 | epsW=0.001 57 | epsB=0.002 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | 63 | [conv4b] 64 | epsW=0.001 65 | epsB=0.002 66 | momW=0.9 67 | momB=0.9 68 | wc=0.0005 69 | wball=0 70 | 71 | [conv5a] 72 | epsW=0.001 73 | epsB=0.002 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | 79 | [conv5b] 80 | epsW=0.001 81 | epsB=0.002 82 | momW=0.9 83 | momB=0.9 84 | wc=0.0005 85 | wball=0 86 | 87 | [fc1536a] 88 | epsW=0.001,0.001 89 | epsB=0.002 90 | momW=0.9,0.9 91 | momB=0.9 92 | wc=0.0005,0.0005 93 | wball=0,0 94 | 95 | [fc1536b] 96 | epsW=0.001,0.001 97 | epsB=0.002 98 | momW=0.9,0.9 99 | momB=0.9 100 | wc=0.0005,0.0005 101 | wball=0,0 102 | 103 | [fc1536ba] 104 | epsW=0.001,0.001 105 | epsB=0.002 106 | momW=0.9,0.9 107 | momB=0.9 108 | wc=0.0005,0.0005 109 | wball=0,0 110 | 111 | [fc1536bb] 112 | epsW=0.001,0.001 113 | epsB=0.002 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | 119 | [fc1536ca] 120 | epsW=0.001,0.001 121 | epsB=0.002 122 | momW=0.9,0.9 123 | momB=0.9 124 | wc=0.0005,0.0005 125 | wball=0,0 126 | 127 | [fc1536cb] 128 | epsW=0.001,0.001 129 | epsB=0.002 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | 135 | [fc1000] 136 | epsW=0.001,0.001 137 | epsB=0.002 138 | momW=0.9,0.9 139 | momB=0.9 140 | wc=0.0005,0.0005 141 | wball=0,0 142 | 143 | [logprob] 144 | coeff=1 145 | topk=5 146 | 147 | [hs1a] 148 | enable=true 149 | 150 | [hs2a] 151 | enable=true 152 | 153 | [hs3a] 154 | enable=true 155 | 156 | [hs1b] 157 | enable=true 158 | 159 | [hs2b] 160 | enable=true 161 | 162 | [hs3b] 163 | enable=true 164 | 165 | [rnorm1a] 166 | scale=0.0001 167 | pow=0.75 168 | 169 | [rnorm1b] 170 | scale=0.0001 171 | pow=0.75 172 | 173 | [rnorm2a] 174 | scale=0.0001 175 | pow=0.75 176 | 177 | [rnorm2b] 178 | scale=0.0001 179 | pow=0.75 180 | 181 | # this is like #101 but uses wcnorm in conv2 and also has 3 fc layers. 182 | # on guppy9 183 | # logs/layers-109.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-07-10_00.46.52 185 | # epoch 17: set epsw to 0.001 from 0.01 186 | # epoch 26: enabled dropout on hs3a,hs3b 187 | # epoch 27: killed -- overfitting as feared 188 | -------------------------------------------------------------------------------- /layers/layer-params-110.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.00001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.00001 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.00001,0.00001 19 | epsB=0.002 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.00001,0.00001 27 | epsB=0.002 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.0001,0.0001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.0001,0.0001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.0001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.0001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.0001,0.0001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.0001,0.0001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.0001,0.0001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.0001,0.0001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.0001,0.0001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | # this is like #101 but without rnorm. it's about time i found out how helpful it is to modern nets 138 | # on guppy7 139 | # logs/layers-110.log 140 | # /nobackup/kriz/tmp/ConvNet__2012-07-11_00.26.55 141 | # epoch 19: set epsw to 0.001 from 0.01 142 | # epoch 46: set epsw to 0.0001 from 0.001 143 | # epoch 67: set epsw to 0.00001 from 0.0001 on conv1,conv2 144 | # set color noise to 0 from 0.1 145 | # epoch 66: set epsw to 0 from 0.00001 on conv1,conv2 146 | # epoch 75: killed, it looks to be about 1% worse than #101 147 | -------------------------------------------------------------------------------- /layers/layer-params-111.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | wcnorm=0.00 9 | 10 | [conv1b] 11 | epsW=0.001 12 | epsB=0.002 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | wcnorm=0.00 18 | 19 | [conv2a] 20 | epsW=0.001,0.001 21 | epsB=0.002 22 | momW=0.9,0.9 23 | momB=0.9 24 | wc=0.0005,0.0005 25 | wball=0.00,0.00 26 | wcNormMin=0.001,0 27 | wcNormMax=0.002,0 28 | 29 | [conv2b] 30 | epsW=0.001,0.001 31 | epsB=0.002 32 | momW=0.9,0.9 33 | momB=0.9 34 | wc=0.0005,0.0005 35 | wball=0.00,0.00 36 | wcNormMin=0.001,0 37 | wcNormMax=0.002,0 38 | 39 | [conv3a] 40 | epsW=0.001,0.001 41 | epsB=0.002 42 | momW=0.9,0.9 43 | momB=0.9 44 | wc=0.0005,0.0005 45 | wball=0,0 46 | 47 | [conv3b] 48 | epsW=0.001,0.001 49 | epsB=0.002 50 | momW=0.9,0.9 51 | momB=0.9 52 | wc=0.0005,0.0005 53 | wball=0,0 54 | 55 | [conv4a] 56 | epsW=0.001 57 | epsB=0.002 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | 63 | [conv4b] 64 | epsW=0.001 65 | epsB=0.002 66 | momW=0.9 67 | momB=0.9 68 | wc=0.0005 69 | wball=0 70 | 71 | [conv5a] 72 | epsW=0.001 73 | epsB=0.002 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | 79 | [conv5b] 80 | epsW=0.001 81 | epsB=0.002 82 | momW=0.9 83 | momB=0.9 84 | wc=0.0005 85 | wball=0 86 | 87 | [fc2048a] 88 | epsW=0.001,0.001 89 | epsB=0.002 90 | momW=0.9,0.9 91 | momB=0.9 92 | wc=0.0005,0.0005 93 | wball=0,0 94 | 95 | [fc2048b] 96 | epsW=0.001,0.001 97 | epsB=0.002 98 | momW=0.9,0.9 99 | momB=0.9 100 | wc=0.0005,0.0005 101 | wball=0,0 102 | 103 | [fc2048ba] 104 | epsW=0.001,0.001 105 | epsB=0.002 106 | momW=0.9,0.9 107 | momB=0.9 108 | wc=0.0005,0.0005 109 | wball=0,0 110 | 111 | [fc2048bb] 112 | epsW=0.001,0.001 113 | epsB=0.002 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | 119 | [fc2048ca] 120 | epsW=0.001,0.001 121 | epsB=0.002 122 | momW=0.9,0.9 123 | momB=0.9 124 | wc=0.0005,0.0005 125 | wball=0,0 126 | 127 | [fc2048cb] 128 | epsW=0.001,0.001 129 | epsB=0.002 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | 135 | [fc1000] 136 | epsW=0.001,0.001 137 | epsB=0.002 138 | momW=0.9,0.9 139 | momB=0.9 140 | wc=0.0005,0.0005 141 | wball=0,0 142 | 143 | [logprob] 144 | coeff=1 145 | topk=5 146 | 147 | [hs1a] 148 | enable=true 149 | 150 | [hs2a] 151 | enable=true 152 | 153 | [hs3a] 154 | enable=true 155 | 156 | [hs1b] 157 | enable=true 158 | 159 | [hs2b] 160 | enable=true 161 | 162 | [hs3b] 163 | enable=true 164 | 165 | [rnorm1a] 166 | scale=0.0001 167 | pow=0.75 168 | 169 | [rnorm1b] 170 | scale=0.0001 171 | pow=0.75 172 | 173 | [rnorm2a] 174 | scale=0.0001 175 | pow=0.75 176 | 177 | [rnorm2b] 178 | scale=0.0001 179 | pow=0.75 180 | 181 | # this is like #101 but uses wcnorm in conv2 and also has 3 fc layers. 182 | # its also like #109 but uses wider fc layers with dropout in all cos 109 overfit 183 | # on guppy9 184 | # logs/layers-111.log 185 | # /nobackup/kriz/tmp/ConvNet__2012-07-12_23.59.48 186 | # epoch 19: set epsw to 0.001 from 0.01 187 | # epoch 42: this is quite a bit worse, and in an underfitting way, so i'm starting #104, which will be like this but the fc layers will be 3072 each instead of 2048 188 | -------------------------------------------------------------------------------- /layers/layer-params-112.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy7 154 | # this is like #101 but with rnorm region of size 5 instead of 9 155 | # logs/layers-112.log 156 | # epoch 22: set epsw to 0.001 from 0.01 157 | # epoch 46: set epsw to 0.0001 from 0.001 158 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 159 | # set color noise to 0 from 0.1 160 | # epoch 71: set epsw to 0 from 0.00001 on conv1,conv2 161 | # epoch 79: set epsw to 0.00001 from 0.0001 162 | # epoch 90: killed 163 | # [1.6064990917001289, 0.37237829837731168, 0.16815557540767209] 164 | -------------------------------------------------------------------------------- /layers/layer-params-113.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.01 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01,0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9,0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005,0.0005,0.0005 39 | wball=0,0,0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01,0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9,0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005,0.0005,0.0005 47 | wball=0,0,0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # this is like #101 but with conv3 taking both conv2 and conv1 as input, and conv2 just taking the low res img as input 154 | # on guppy9 155 | -------------------------------------------------------------------------------- /layers/layer-params-114.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | wcnorm=0.00 9 | 10 | [conv1b] 11 | epsW=0.001 12 | epsB=0.002 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | wcnorm=0.00 18 | 19 | [conv2a] 20 | epsW=0.001,0.001 21 | epsB=0.002 22 | momW=0.9,0.9 23 | momB=0.9 24 | wc=0.0005,0.0005 25 | wball=0.00,0.00 26 | wcNormMin=0.001,0 27 | wcNormMax=0.002,0 28 | 29 | [conv2b] 30 | epsW=0.001,0.001 31 | epsB=0.002 32 | momW=0.9,0.9 33 | momB=0.9 34 | wc=0.0005,0.0005 35 | wball=0.00,0.00 36 | wcNormMin=0.001,0 37 | wcNormMax=0.002,0 38 | 39 | [conv3a] 40 | epsW=0.001,0.001 41 | epsB=0.002 42 | momW=0.9,0.9 43 | momB=0.9 44 | wc=0.0005,0.0005 45 | wball=0,0 46 | 47 | [conv3b] 48 | epsW=0.001,0.001 49 | epsB=0.002 50 | momW=0.9,0.9 51 | momB=0.9 52 | wc=0.0005,0.0005 53 | wball=0,0 54 | 55 | [conv4a] 56 | epsW=0.001 57 | epsB=0.002 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | 63 | [conv4b] 64 | epsW=0.001 65 | epsB=0.002 66 | momW=0.9 67 | momB=0.9 68 | wc=0.0005 69 | wball=0 70 | 71 | [conv5a] 72 | epsW=0.001 73 | epsB=0.002 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | 79 | [conv5b] 80 | epsW=0.001 81 | epsB=0.002 82 | momW=0.9 83 | momB=0.9 84 | wc=0.0005 85 | wball=0 86 | 87 | [fc3072a] 88 | epsW=0.001,0.001 89 | epsB=0.002 90 | momW=0.9,0.9 91 | momB=0.9 92 | wc=0.0005,0.0005 93 | wball=0,0 94 | 95 | [fc3072b] 96 | epsW=0.001,0.001 97 | epsB=0.002 98 | momW=0.9,0.9 99 | momB=0.9 100 | wc=0.0005,0.0005 101 | wball=0,0 102 | 103 | [fc3072ba] 104 | epsW=0.001,0.001 105 | epsB=0.002 106 | momW=0.9,0.9 107 | momB=0.9 108 | wc=0.0005,0.0005 109 | wball=0,0 110 | 111 | [fc3072bb] 112 | epsW=0.001,0.001 113 | epsB=0.002 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | 119 | [fc3072ca] 120 | epsW=0.001,0.001 121 | epsB=0.002 122 | momW=0.9,0.9 123 | momB=0.9 124 | wc=0.0005,0.0005 125 | wball=0,0 126 | 127 | [fc3072cb] 128 | epsW=0.001,0.001 129 | epsB=0.002 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | 135 | [fc1000] 136 | epsW=0.001,0.001 137 | epsB=0.002 138 | momW=0.9,0.9 139 | momB=0.9 140 | wc=0.0005,0.0005 141 | wball=0,0 142 | 143 | [logprob] 144 | coeff=1 145 | topk=5 146 | 147 | [hs1a] 148 | enable=true 149 | 150 | [hs2a] 151 | enable=true 152 | 153 | [hs3a] 154 | enable=true 155 | 156 | [hs1b] 157 | enable=true 158 | 159 | [hs2b] 160 | enable=true 161 | 162 | [hs3b] 163 | enable=true 164 | 165 | [rnorm1a] 166 | scale=0.0001 167 | pow=0.75 168 | 169 | [rnorm1b] 170 | scale=0.0001 171 | pow=0.75 172 | 173 | [rnorm2a] 174 | scale=0.0001 175 | pow=0.75 176 | 177 | [rnorm2b] 178 | scale=0.0001 179 | pow=0.75 180 | 181 | # this is like #101 but uses wcnorm in conv2 and also has 3 fc layers, with width 6144. 182 | # on guppy9 183 | # logs/layers-114.log 184 | # 140523240 params (incl biases) 185 | # /nobackup/kriz/tmp/ConvNet__2012-07-15_14.56.24 186 | # epoch 20: set epsw to 0.001 from 0.01 187 | # epoch 40: killed, doing worse than 115 which is the same but has only 2 fc layers 188 | -------------------------------------------------------------------------------- /layers/layer-params-115-jpeg.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | wcnorm=0.00 9 | 10 | [conv1b] 11 | epsW=0.0001 12 | epsB=0.002 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | wcnorm=0.00 18 | 19 | [conv2a] 20 | epsW=0.0001,0.0001 21 | epsB=0.002 22 | momW=0.9,0.9 23 | momB=0.9 24 | wc=0.0005,0.0005 25 | wball=0.00,0.00 26 | wcNormMin=0.001,0 27 | wcNormMax=0.002,0 28 | 29 | [conv2b] 30 | epsW=0.0001,0.0001 31 | epsB=0.002 32 | momW=0.9,0.9 33 | momB=0.9 34 | wc=0.0005,0.0005 35 | wball=0.00,0.00 36 | wcNormMin=0.001,0 37 | wcNormMax=0.002,0 38 | 39 | [conv3a] 40 | epsW=0.0001,0.0001 41 | epsB=0.002 42 | momW=0.9,0.9 43 | momB=0.9 44 | wc=0.0005,0.0005 45 | wball=0,0 46 | 47 | [conv3b] 48 | epsW=0.0001,0.0001 49 | epsB=0.002 50 | momW=0.9,0.9 51 | momB=0.9 52 | wc=0.0005,0.0005 53 | wball=0,0 54 | 55 | [conv4a] 56 | epsW=0.0001 57 | epsB=0.002 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | 63 | [conv4b] 64 | epsW=0.0001 65 | epsB=0.002 66 | momW=0.9 67 | momB=0.9 68 | wc=0.0005 69 | wball=0 70 | 71 | [conv5a] 72 | epsW=0.0001 73 | epsB=0.002 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | 79 | [conv5b] 80 | epsW=0.0001 81 | epsB=0.002 82 | momW=0.9 83 | momB=0.9 84 | wc=0.0005 85 | wball=0 86 | 87 | [fc3072a] 88 | epsW=0.0001,0.0001 89 | epsB=0.002 90 | momW=0.9,0.9 91 | momB=0.9 92 | wc=0.0005,0.0005 93 | wball=0,0 94 | 95 | [fc3072b] 96 | epsW=0.0001,0.0001 97 | epsB=0.002 98 | momW=0.9,0.9 99 | momB=0.9 100 | wc=0.0005,0.0005 101 | wball=0,0 102 | 103 | [fc3072ba] 104 | epsW=0.0001,0.0001 105 | epsB=0.002 106 | momW=0.9,0.9 107 | momB=0.9 108 | wc=0.0005,0.0005 109 | wball=0,0 110 | 111 | [fc3072bb] 112 | epsW=0.0001,0.0001 113 | epsB=0.002 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | 119 | [fc3072ca] 120 | epsW=0.0001,0.0001 121 | epsB=0.002 122 | momW=0.9,0.9 123 | momB=0.9 124 | wc=0.0005,0.0005 125 | wball=0,0 126 | 127 | [fc3072cb] 128 | epsW=0.0001,0.0001 129 | epsB=0.002 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | 135 | [fc1000] 136 | epsW=0.0001,0.0001 137 | epsB=0.002 138 | momW=0.9,0.9 139 | momB=0.9 140 | wc=0.0005,0.0005 141 | wball=0,0 142 | 143 | [logprob] 144 | coeff=1 145 | topk=5 146 | 147 | [hs1a] 148 | enable=true 149 | 150 | [hs1b] 151 | enable=true 152 | 153 | [hs2a] 154 | enable=true 155 | 156 | [hs2b] 157 | enable=true 158 | 159 | [rnorm1a] 160 | scale=0.0001 161 | pow=0.75 162 | 163 | [rnorm1b] 164 | scale=0.0001 165 | pow=0.75 166 | 167 | [rnorm2a] 168 | scale=0.0001 169 | pow=0.75 170 | 171 | [rnorm2b] 172 | scale=0.0001 173 | pow=0.75 174 | 175 | # this is like 115 (on gpu) but trained on ilya's new imgnet-2010 jpeg 176 | # on guppy7 177 | # logs/layers-115-jpeg.log 178 | # /nobackup/kriz/tmp/ConvNet__2012-07-18_20.56.13 179 | # epoch 22: set epsw to 0.001 from 0.01 180 | # epoch 48: set epsw to 0.0001 from 0.001 181 | # epoch 58: killed, since this was a duplicate (jpeg) of a suboptimal net anyway 182 | -------------------------------------------------------------------------------- /layers/layer-params-118.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | wcNormMin=0.001,0 25 | wcNormMax=0.002,0 26 | 27 | [conv2b] 28 | epsW=0.0000,0.0000 29 | epsB=0.00 30 | momW=0.9,0.9 31 | momB=0.9 32 | wc=0.0005,0.0005 33 | wball=0.00,0.00 34 | wcNormMin=0.001,0 35 | wcNormMax=0.002,0 36 | 37 | [conv3a] 38 | epsW=0.00001,0.00001 39 | epsB=0.002 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | 45 | [conv3b] 46 | epsW=0.00001,0.00001 47 | epsB=0.002 48 | momW=0.9,0.9 49 | momB=0.9 50 | wc=0.0005,0.0005 51 | wball=0,0 52 | 53 | [conv4a] 54 | epsW=0.00001 55 | epsB=0.002 56 | momW=0.9 57 | momB=0.9 58 | wc=0.0005 59 | wball=0 60 | 61 | [conv4b] 62 | epsW=0.00001 63 | epsB=0.002 64 | momW=0.9 65 | momB=0.9 66 | wc=0.0005 67 | wball=0 68 | 69 | [conv5a] 70 | epsW=0.00001 71 | epsB=0.002 72 | momW=0.9 73 | momB=0.9 74 | wc=0.0005 75 | wball=0 76 | 77 | [conv5b] 78 | epsW=0.00001 79 | epsB=0.002 80 | momW=0.9 81 | momB=0.9 82 | wc=0.0005 83 | wball=0 84 | 85 | [fc2048a] 86 | epsW=0.00001,0.00001 87 | epsB=0.002 88 | momW=0.9,0.9 89 | momB=0.9 90 | wc=0.0005,0.0005 91 | wball=0,0 92 | 93 | [fc2048b] 94 | epsW=0.00001,0.00001 95 | epsB=0.002 96 | momW=0.9,0.9 97 | momB=0.9 98 | wc=0.0005,0.0005 99 | wball=0,0 100 | 101 | [fc2048ba] 102 | epsW=0.00001,0.00001 103 | epsB=0.002 104 | momW=0.9,0.9 105 | momB=0.9 106 | wc=0.0005,0.0005 107 | wball=0,0 108 | 109 | [fc2048bb] 110 | epsW=0.00001,0.00001 111 | epsB=0.002 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | 117 | [fc1000] 118 | epsW=0.00001,0.00001 119 | epsB=0.002 120 | momW=0.9,0.9 121 | momB=0.9 122 | wc=0.0005,0.0005 123 | wball=0,0 124 | 125 | [logprob] 126 | coeff=1 127 | topk=5 128 | 129 | [hs1a] 130 | enable=true 131 | 132 | [hs2a] 133 | enable=true 134 | 135 | [hs1b] 136 | enable=true 137 | 138 | [hs2b] 139 | enable=true 140 | 141 | [rnorm1a] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm1b] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2a] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [rnorm2b] 154 | scale=0.0001 155 | pow=0.75 156 | 157 | # on guppy7 158 | # this is like #112 but with wcnorm on conv2, and also trained on jpeg 159 | # logs/layers-118.log 160 | # /nobackup/kriz/tmp/ConvNet__2012-07-19_18.35.31 161 | # epoch 23: set epsw to 0.001 from 0.01 162 | # epoch 46: set epsw to 0.0001 from 0.001 163 | # epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2 164 | # set color noise to 0 from 0.1 165 | # epoch 75: set epsw to 0 from 0.00001 on conv1,conv2 166 | # epoch 84: set epsw to 0.00001 from 0.0001 167 | # epcoh 98: killed 168 | # [1.640873252105713, 0.37831333333333333, 0.17355999999999999] 169 | -------------------------------------------------------------------------------- /layers/layer-params-120-2012-full.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #120 (so uses def file #120) but trained on lsvrc-2012 (full) 162 | # on gpu 163 | # /storage/tmp/ConvNet__2012-07-26_04.06.44 164 | # logs/layers-120-2012-full.log 165 | # epoch 23: set epsw to 0.001 from 0.01 166 | # epoch 38: moved to guppy9 167 | # /nobackup/kriz/tmp/ConvNet__2012-07-26_04.06.44 168 | # epoch 49: set epsw to 0.0001 from 0.001 169 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 170 | # set color noise to 0 from 0.1 171 | # epoch 73: set epsw to 0 from 0.00001 on conv1,conv2 172 | # epoch 87: set epsw to 0.00001 from 0.0001 173 | # epoch 94: killed 174 | # 175 | -------------------------------------------------------------------------------- /layers/layer-params-120-2012.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #120 (so uses def file #120) but trained on lsvrc-2012 (non-full) 162 | # on guppy9 163 | # /nobackup/kriz/tmp/ConvNet__2012-07-24_23.16.15 164 | # epoch 22: set epsw to 0.001 from 0.01 165 | # epoch 49: set epsw to 0.0001 from 0.001 166 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 167 | # set color noise to 0 from 0.1 168 | # epoch 73: set epsw to 0 from 0.00001 on conv1,conv2 169 | # epoch 81: set epsw to 0.00001 from 0.0001 170 | # epoch 95: killed 171 | # validation multiview error: 172 | # logprob: 1.765247, 0.410440, 0.187140 173 | 174 | -------------------------------------------------------------------------------- /layers/layer-params-120.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.0 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.0 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.0 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #106 but with rnorm of size 5, also train on jpegs 162 | # on gpu 163 | # logs/layers-120.log 164 | # /storage/tmp/ConvNet__2012-07-22_04.40.34 165 | # moving to guppy7 166 | # /nobackup/kriz/tmp/ConvNet__2012-07-22_04.40.34/ 167 | # epoch 26: set epsw to 0.001 from 0.01 168 | # epoch 47: set epsw to 0.0001 from 0.001 169 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 170 | # set color noise to 0 from 0.1 171 | # epoch 72: set epsw to 0 from 0.00001 on conv1,conv2 172 | # epoch 82: set epsw to 0.00001 from 0.0001 173 | # epoch 106: killed 174 | # logprob: 1.634692, 0.378533, 0.172360 175 | -------------------------------------------------------------------------------- /layers/layer-params-121.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.0 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.0 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.0 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm1a] 162 | scale=0.001 163 | pow=0.75 164 | 165 | [cnorm1b] 166 | scale=0.001 167 | pow=0.75 168 | 169 | # this is like #120 but with cnorm over conv1 as well 170 | # on guppy8 171 | # logs/layers-121.log 172 | # /nobackup/kriz/tmp/ConvNet__2012-07-22_15.59.00 173 | # epoch 25: set epsw to 0.001 from 0.01 174 | # epoch 51: set epsw to 0.0001 from 0.001 175 | # epoch 63: set epsw to 0.00001 from 0.0001 on conv1,conv2 176 | # set color noise to 0 from 0.1 177 | # epoch 76: set epsw to 0 from 0.00001 on conv1,conv2 178 | # epoch 90: set epsw to 0.00001 from 0.0001 179 | # worse than 120 180 | -------------------------------------------------------------------------------- /layers/layer-params-126-2012-full.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #120-2012-full but also has horiz reflection for gpu2 162 | # on guppy8 163 | # logs/layers-126.log 164 | # /nobackup/kriz/tmp/ConvNet__2012-07-31_22.55.59 165 | # killed after 19 epochs..seems no good, and also full sucks we now know 166 | -------------------------------------------------------------------------------- /layers/layer-params-127.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001,0.001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.001,0.001 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.001 35 | epsB=0.02 36 | momW=0.9 37 | momB=0.9 38 | wc=0.0005 39 | wball=0 40 | 41 | [conv3b] 42 | epsW=0.001 43 | epsB=0.02 44 | momW=0.9 45 | momB=0.9 46 | wc=0.0005 47 | wball=0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.001,0.001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.001,0.001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.001,0.001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.001,0.001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.001,0.001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # trained on lsvrc-2012 (full), like #120 but 162 | # this examines whether communication is necessary at conv3 163 | # .. meaning it has no communication at conv3 164 | # on gpu 165 | # /storage/tmp/ConvNet__2012-08-01_02.35.01 166 | # logs/layers-127.log 167 | # killed, since we know now that full sucks. 168 | 169 | # trained on lsvrc-2012 (non-full). like #120 but now also make conv3,conv4 wider to compensate for lost connections 170 | # on guppy8 171 | # logs/layers-127a.log 172 | # /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-02_00.18.36 173 | # epoch 21: set epsw to 0.001 from 0.01 174 | # epoch 36: killed, significantly worse than 120 175 | -------------------------------------------------------------------------------- /layers/layer-params-128.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001,0.001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.001,0.001 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001,0.001 51 | epsB=0.02 52 | momW=0.9,0.9 53 | momB=0.9 54 | wc=0.0005,0.0005 55 | wball=0,0 56 | 57 | [conv4b] 58 | epsW=0.001,0.001 59 | epsB=0.02 60 | momW=0.9,0.9 61 | momB=0.9 62 | wc=0.0005,0.0005 63 | wball=0,0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.001,0.001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.001,0.001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.001,0.001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.001,0.001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.001,0.001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #120 162 | # and has narrower columns which communicate more. i'm running this because #127 suggests 163 | # that communication is good 164 | # on guppy9 165 | # logs/layers-128.log 166 | # epoch 25: set epsw to 0.001 from 0.01 167 | # on hold 168 | -------------------------------------------------------------------------------- /layers/layer-params-131-2009.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # on guppy9 162 | # logs/layers-131-2009.log 163 | # /nobackup/kriz/tmp/ConvNet__2012-08-18_15.41.20 164 | # epoch 7: set epsw to 0.001 from 0.01 165 | # epoch 14: set epsw to 0.0001 from 0.001 166 | # epoch 20: set epsw to 0.00001 from 0.0001 on conv1,conv2 167 | # set color noise to 0 from 0.1 168 | # epoch 24: set epsw to 0 from 0.00001 on conv1,conv2 169 | # epoch 31: set epsw to 0.00001 from 0.0001 170 | # epoch 36: killed 171 | # logprob: 3.466260, 0.694209, 0.437308 172 | # a bit worse than previous 2009 thing! 173 | -------------------------------------------------------------------------------- /layers/layer-params-131.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.0 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.0 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.0 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like #120, but puts rnorm1 right over conv1 (trained on 2012-nonfull) 162 | # on gpu 163 | # /storage/tmp/ConvNet__2012-08-09_12.33.33 164 | # logs/layers-131.log 165 | # moved to guppy7 166 | # /nobackup/kriz/tmp/ConvNet__2012-08-09_12.33.33/ 167 | # epoch 22: set epsw to 0.001 from 0.01 168 | # epoch 46: set epsw to 0.0001 from 0.001 169 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 170 | # set color noise to 0 from 0.1 171 | # epoch 75: set epsw to 0 from 0.00001 on conv1,conv2 172 | # epoch 81: set epsw to 0.00001 from 0.0001 173 | # epoch 100: killed 174 | # validation multiview error: 175 | # logprob: 1.755725, 0.409340, 0.185740 176 | -------------------------------------------------------------------------------- /layers/layer-params-132.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001 35 | epsB=0.02 36 | momW=0.9 37 | momB=0.9 38 | wc=0.0005 39 | wball=0 40 | 41 | [conv3b] 42 | epsW=0.00001 43 | epsB=0.02 44 | momW=0.9 45 | momB=0.9 46 | wc=0.0005 47 | wball=0 48 | 49 | [conv4a] 50 | epsW=0.00001,0.00001 51 | epsB=0.02 52 | momW=0.9,0.9 53 | momB=0.9 54 | wc=0.0005,0.0005 55 | wball=0,0 56 | 57 | [conv4b] 58 | epsW=0.00001,0.00001 59 | epsB=0.02 60 | momW=0.9,0.9 61 | momB=0.9 62 | wc=0.0005,0.0005 63 | wball=0,0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is like 120 but with communication in conv4 instead of conv3 162 | # on gpu 163 | # logs/layers-132.log 164 | # /storage/tmp/ConvNet__2012-08-11_02.23.36 165 | # epoch 20: set epsw to 0.001 from 0.01 166 | # epoch 44: set epsw to 0.0001 from 0.001 167 | # moved to guppy9 168 | # @#$%&!, killed, i accidentally trained this on full 169 | 170 | # restart: 171 | # /nobackup/kriz/tmp/ConvNet__2012-08-13_16.47.07 172 | # logs/layers-132a.log 173 | # epoch 23: set epsw to 0.001 from 0.01 174 | # epoch 4x: set epsw to 0.0001 from 0.001 175 | # epoch 65: set epsw to 0.00001 from 0.0001 on conv1,conv2 176 | # set color noise to 0 from 0.1 177 | # epoch 71: set epsw to 0 from 0.00001 on conv1,conv2 178 | # epoch 88: killed, worse than 131 179 | 180 | -------------------------------------------------------------------------------- /layers/layer-params-133.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001,0.0001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0001,0.0001 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.0001 35 | epsB=0.02 36 | momW=0.9 37 | momB=0.9 38 | wc=0.0005 39 | wball=0 40 | 41 | [conv3b] 42 | epsW=0.0001 43 | epsB=0.02 44 | momW=0.9 45 | momB=0.9 46 | wc=0.0005 47 | wball=0 48 | 49 | [conv4a] 50 | epsW=0.0001,0.0001 51 | epsB=0.02 52 | momW=0.9,0.9 53 | momB=0.9 54 | wc=0.0005,0.0005 55 | wball=0,0 56 | 57 | [conv4b] 58 | epsW=0.0001,0.0001 59 | epsB=0.02 60 | momW=0.9,0.9 61 | momB=0.9 62 | wc=0.0005,0.0005 63 | wball=0,0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.0001,0.0001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.0001,0.0001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.0001,0.0001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.0001,0.0001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.0001,0.0001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | [cnorm2a] 154 | scale=0.001 155 | pow=0.75 156 | 157 | [cnorm2b] 158 | scale=0.001 159 | pow=0.75 160 | 161 | # this is a hybrid of 131 and 132: so it's like 120, but has communication in conv4 instead of conv3, and it also puts rnorm1 directly over conv1 162 | # on guppy7 163 | # logs/layers-133.log 164 | # /nobackup/kriz/tmp/ConvNet__2012-08-15_16.08.23 165 | # epoch 21: set epsw to 0.001 from 0.01 166 | # epoch 48: set epsw to 0.0001 from 0.001 167 | # epoch 50: killed, worse than 131 168 | -------------------------------------------------------------------------------- /layers/layer-params-134.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=0.25 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=0.25 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=0.25 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=0.25 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #131, but with minDiv of 0.25 on rnorms 166 | # on guppy9 167 | # /nobackup/kriz/tmp/ConvNet__2012-08-20_23.26.41 168 | # logs/layers-134.log 169 | # epoch 13: on hold 170 | -------------------------------------------------------------------------------- /layers/layer-params-135.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #131, but with minDiv of 2 on rnorms 166 | # on guppy8 167 | # /nobackup/kriz/tmp/ConvNet__2012-08-21_01.49.23 168 | # logs/layers-135.log 169 | # epoch 20: set epsw to 0.001 from 0.01 170 | # epoch 47: set epsw to 0.0001 from 0.001 171 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1,conv2 172 | # set color noise to 0 from 0.1 173 | # epoch 75: set epsw to 0 from 0.00001 on conv1,conv2 174 | # epoch 81: set epsw to 0.00001 from 0.0001 175 | # epoch 96: killed 176 | # validation multiview: 177 | # logprob: 1.757653, 0.410700, 0.184160 178 | -------------------------------------------------------------------------------- /layers/layer-params-136.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #135 (so uses def file 135), but subtracts scalar mean 166 | # on guppy7 167 | # logs/layers-136.log 168 | # /nobackup/kriz/tmp/ConvNet__2012-08-23_04.38.51 169 | # epoch 15: eh, this is no better, and has no reason to be better. screw it. 170 | -------------------------------------------------------------------------------- /layers/layer-params-139.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001,0.0001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | wcNormMin=0.001,0 25 | wcNormMax=0.002,0 26 | 27 | [conv2b] 28 | epsW=0.0001,0.0001 29 | epsB=0.02 30 | momW=0.9,0.9 31 | momB=0.9 32 | wc=0.0005,0.0005 33 | wball=0.00,0.00 34 | wcNormMin=0.001,0 35 | wcNormMax=0.002,0 36 | 37 | [conv3a] 38 | epsW=0.0001,0.0001 39 | epsB=0.02 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | 45 | [conv3b] 46 | epsW=0.0001,0.0001 47 | epsB=0.02 48 | momW=0.9,0.9 49 | momB=0.9 50 | wc=0.0005,0.0005 51 | wball=0,0 52 | 53 | [conv4a] 54 | epsW=0.0001 55 | epsB=0.02 56 | momW=0.9 57 | momB=0.9 58 | wc=0.0005 59 | wball=0 60 | 61 | [conv4b] 62 | epsW=0.0001 63 | epsB=0.02 64 | momW=0.9 65 | momB=0.9 66 | wc=0.0005 67 | wball=0 68 | 69 | [conv5a] 70 | epsW=0.0001 71 | epsB=0.02 72 | momW=0.9 73 | momB=0.9 74 | wc=0.0005 75 | wball=0 76 | 77 | [conv5b] 78 | epsW=0.0001 79 | epsB=0.02 80 | momW=0.9 81 | momB=0.9 82 | wc=0.0005 83 | wball=0 84 | 85 | [fc2048a] 86 | epsW=0.0001,0.0001 87 | epsB=0.02 88 | momW=0.9,0.9 89 | momB=0.9 90 | wc=0.0005,0.0005 91 | wball=0,0 92 | 93 | [fc2048b] 94 | epsW=0.0001,0.0001 95 | epsB=0.02 96 | momW=0.9,0.9 97 | momB=0.9 98 | wc=0.0005,0.0005 99 | wball=0,0 100 | 101 | [fc2048ba] 102 | epsW=0.0001,0.0001 103 | epsB=0.02 104 | momW=0.9,0.9 105 | momB=0.9 106 | wc=0.0005,0.0005 107 | wball=0,0 108 | 109 | [fc2048bb] 110 | epsW=0.0001,0.0001 111 | epsB=0.02 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | 117 | [fc1000] 118 | epsW=0.0001,0.0001 119 | epsB=0.02 120 | momW=0.9,0.9 121 | momB=0.9 122 | wc=0.0005,0.0005 123 | wball=0,0 124 | 125 | [logprob] 126 | coeff=1 127 | topk=5 128 | 129 | [hs1a] 130 | enable=true 131 | 132 | [hs2a] 133 | enable=true 134 | 135 | [hs1b] 136 | enable=true 137 | 138 | [hs2b] 139 | enable=true 140 | 141 | [rnorm1a] 142 | scale=0.0001 143 | pow=0.75 144 | minDiv=2 145 | 146 | [rnorm1b] 147 | scale=0.0001 148 | pow=0.75 149 | minDiv=2 150 | 151 | [rnorm2a] 152 | scale=0.0001 153 | pow=0.75 154 | minDiv=2 155 | 156 | [rnorm2b] 157 | scale=0.0001 158 | pow=0.75 159 | minDiv=2 160 | 161 | [cnorm2a] 162 | scale=0.001 163 | pow=0.75 164 | 165 | [cnorm2b] 166 | scale=0.001 167 | pow=0.75 168 | 169 | # this is like #137 (hence uses same file) but has wcnorm on conv2[0] 170 | # epoch 19: set epsw to 0.001 from 0.01 171 | # epoch 49: set epsw to 0.0001 from 0.001 172 | # epoch 62: killed, about 0.01 nat worse than 137 (which is pretty significant at this stage) 173 | -------------------------------------------------------------------------------- /layers/layer-params-141.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001,0.0001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0001,0.0001 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.0001,0.0001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.0001,0.0001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.0001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.0001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [conv6a] 82 | epsW=0.0001,0.0001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [conv6b] 90 | epsW=0.0001,0.0001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048a] 98 | epsW=0.0001,0.0001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048b] 106 | epsW=0.0001,0.0001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc2048ba] 114 | epsW=0.0001,0.0001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [fc2048bb] 122 | epsW=0.0001,0.0001 123 | epsB=0.02 124 | momW=0.9,0.9 125 | momB=0.9 126 | wc=0.0005,0.0005 127 | wball=0,0 128 | 129 | [fc1000] 130 | epsW=0.0001,0.0001 131 | epsB=0.02 132 | momW=0.9,0.9 133 | momB=0.9 134 | wc=0.0005,0.0005 135 | wball=0,0 136 | 137 | [logprob] 138 | coeff=1 139 | topk=5 140 | 141 | [hs1a] 142 | enable=true 143 | 144 | [hs2a] 145 | enable=true 146 | 147 | [hs1b] 148 | enable=true 149 | 150 | [hs2b] 151 | enable=true 152 | 153 | [rnorm1a] 154 | scale=0.0001 155 | pow=0.75 156 | minDiv=2 157 | 158 | [rnorm1b] 159 | scale=0.0001 160 | pow=0.75 161 | minDiv=2 162 | 163 | [rnorm2a] 164 | scale=0.0001 165 | pow=0.75 166 | minDiv=2 167 | 168 | [rnorm2b] 169 | scale=0.0001 170 | pow=0.75 171 | minDiv=2 172 | 173 | [cnorm2a] 174 | scale=0.001 175 | pow=0.75 176 | 177 | [cnorm2b] 178 | scale=0.001 179 | pow=0.75 180 | 181 | # this is like #137 but with conv6, also communication in conv6 182 | # /nobackup/kriz/tmp/ConvNet__2012-09-03_16.27.48 183 | # logs/layers-141.log 184 | # epoch 23: set epsw to 0.001 from 0.01 185 | # epoch 48: set epsw to 0.0001 from 0.001 186 | # epoch 60: this seems overfitty....killing 187 | # but will use these weights to initialize a net on 2009... why the hell not? 188 | -------------------------------------------------------------------------------- /layers/layer-params-145-half.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.00001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv2a] 10 | epsW=0.00001 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv3a] 18 | epsW=0.00001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0 24 | 25 | [conv4a] 26 | epsW=0.00001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0 32 | 33 | [conv5a] 34 | epsW=0.00001 35 | epsB=0.02 36 | momW=0.9 37 | momB=0.9 38 | wc=0.0005 39 | wball=0 40 | 41 | 42 | [fc4096a] 43 | epsW=0.00001 44 | epsB=0.02 45 | momW=0.9 46 | momB=0.9 47 | wc=0.0005 48 | wball=0 49 | 50 | [fc4096ba] 51 | epsW=0.00001 52 | epsB=0.02 53 | momW=0.9 54 | momB=0.9 55 | wc=0.0005 56 | wball=0 57 | 58 | [fc1000] 59 | epsW=0.00001 60 | epsB=0.02 61 | momW=0.9 62 | momB=0.9 63 | wc=0.0005 64 | wball=0 65 | 66 | [logprob] 67 | coeff=1 68 | topk=5 69 | 70 | [hs1a] 71 | enable=true 72 | 73 | [hs2a] 74 | enable=true 75 | 76 | [rnorm1a] 77 | scale=0.0001 78 | pow=0.75 79 | minDiv=2 80 | 81 | [rnorm2a] 82 | scale=0.0001 83 | pow=0.75 84 | minDiv=2 85 | 86 | [cnorm2a] 87 | scale=0.001 88 | pow=0.75 89 | 90 | # this is #145 but only one column, although conv5 is as wide as 2 columns because otherwise this net would have about half as many parameters as the 2-column net, which wouldnt make for a fair comparison. 91 | # on guppy9 92 | # trained on 2010 93 | # logs/layers-145-half.log 94 | # /nobackup/kriz/tmp/ConvNet__2012-11-03_01.00.35 95 | # epoch 20: set epstw ot 0.001 from 0.01 96 | # epoch 48: set epstw ot 0.0001 from 0.001 97 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1, set color noise to 0 from 0.1 98 | # epoch 72: set epsw to 0 from 0.00001 on conv1 99 | # epoch 96: killed 100 | # test multiview logprob: 1.702802, 0.391680, 0.182287 101 | -------------------------------------------------------------------------------- /layers/layer-params-146-2009-tree.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [conv6a] 82 | epsW=0.001,0.001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [conv6b] 90 | epsW=0.001,0.001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048a] 98 | epsW=0.001,0.001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048b] 106 | epsW=0.001,0.001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc2048ba] 114 | epsW=0.001,0.001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [fc2048bb] 122 | epsW=0.001,0.001 123 | epsB=0.02 124 | momW=0.9,0.9 125 | momB=0.9 126 | wc=0.0005,0.0005 127 | wball=0,0 128 | 129 | [fc1000] 130 | epsW=0.001 131 | epsB=0.02 132 | momW=0.9 133 | momB=0.9 134 | wc=0.0005 135 | wball=0 136 | 137 | [logprob] 138 | coeff=1 139 | topk=5 140 | 141 | [hs1a] 142 | enable=true 143 | 144 | [hs2a] 145 | enable=true 146 | 147 | [hs1b] 148 | enable=true 149 | 150 | [hs2b] 151 | enable=true 152 | 153 | [rnorm1a] 154 | scale=0.0001 155 | pow=0.75 156 | minDiv=2 157 | 158 | [rnorm1b] 159 | scale=0.0001 160 | pow=0.75 161 | minDiv=2 162 | 163 | [rnorm2a] 164 | scale=0.0001 165 | pow=0.75 166 | minDiv=2 167 | 168 | [rnorm2b] 169 | scale=0.0001 170 | pow=0.75 171 | minDiv=2 172 | 173 | [cnorm2a] 174 | scale=0.001 175 | pow=0.75 176 | 177 | [cnorm2b] 178 | scale=0.001 179 | pow=0.75 180 | 181 | # this is 146-2009 but with tree 182 | # epoch 9: set epsw to 0.001 from 0.01 183 | -------------------------------------------------------------------------------- /layers/layer-params-146-2009.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [conv6a] 82 | epsW=0.001,0.001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [conv6b] 90 | epsW=0.001,0.001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048a] 98 | epsW=0.001,0.001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048b] 106 | epsW=0.001,0.001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc2048ba] 114 | epsW=0.001,0.001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [fc2048bb] 122 | epsW=0.001,0.001 123 | epsB=0.02 124 | momW=0.9,0.9 125 | momB=0.9 126 | wc=0.0005,0.0005 127 | wball=0,0 128 | 129 | [fc1000] 130 | epsW=0.001,0.001 131 | epsB=0.02 132 | momW=0.9,0.9 133 | momB=0.9 134 | wc=0.0005,0.0005 135 | wball=0,0 136 | 137 | [logprob] 138 | coeff=1 139 | topk=5 140 | 141 | [hs1a] 142 | enable=true 143 | 144 | [hs2a] 145 | enable=true 146 | 147 | [hs1b] 148 | enable=true 149 | 150 | [hs2b] 151 | enable=true 152 | 153 | [rnorm1a] 154 | scale=0.0001 155 | pow=0.75 156 | minDiv=2 157 | 158 | [rnorm1b] 159 | scale=0.0001 160 | pow=0.75 161 | minDiv=2 162 | 163 | [rnorm2a] 164 | scale=0.0001 165 | pow=0.75 166 | minDiv=2 167 | 168 | [rnorm2b] 169 | scale=0.0001 170 | pow=0.75 171 | minDiv=2 172 | 173 | [cnorm2a] 174 | scale=0.001 175 | pow=0.75 176 | 177 | [cnorm2b] 178 | scale=0.001 179 | pow=0.75 180 | 181 | # this is 145, and initialized from 145 (up to conv5) but with also conv6 and trained on 2009-10k 182 | # on guppy8 183 | # /nobackup/kriz/tmp/ConvNet__2012-09-14_22.48.00 184 | # initialized from /nobackup/kriz/tmp/ConvNet__2012-09-13_03.43.56.bak 185 | # init epsw 0.001 on conv1-5, 0.01 on fc, conv6 186 | # logs/layers-146-2009.log 187 | # epoch 2.1600: set epsw to 0.001 from 0.01 on fc, conv6 188 | # epoch 6.6491: killed 189 | -------------------------------------------------------------------------------- /layers/layer-params-147.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.00001 19 | epsB=0.002 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.00001 27 | epsB=0.002 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #145 but with non-overlapping pooling 166 | # on guppy9 167 | # logs/layers-147.log 168 | # /nobackup/kriz/tmp/ConvNet__2012-10-07_23.42.30 169 | # epoch 23: set epsw to 0.001 from 0.01 170 | # epoch 51: set epsw to 0.0001 from 0.001 171 | # epoch 79: set epsw to 0 on conv1, 0.00001 elsewhere 172 | # epoch 90: killed 173 | # validation multiview: 174 | # logprob: 1.757644, 0.410580, 0.185100 175 | -------------------------------------------------------------------------------- /layers/layer-params-147.cfg.save: -------------------------------------------------------------------------------- 1 | ano[conv1a] 2 | epsW=0.001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.001,0.001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.001,0.001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.001,0.001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.001,0.001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.001,0.001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #145 but with non-overlapping pooling 166 | # on guppy9 167 | # logs/layers-147.log 168 | # /nobackup/kriz/tmp/ConvNet__2012-10-07_23.42.30 169 | # epoch 23: set epsw to 0.001 from 0.01 170 | -------------------------------------------------------------------------------- /layers/layer-params-148.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.00001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.00001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | # this is like #145 but without normalization layers 138 | # on guppy9 139 | # logs/layers-148.log 140 | # /nobackup/kriz/tmp/ConvNet__2012-11-02_23.33.30 141 | # epoch 22: set epsw to 0.001 from 0.01 142 | # epoch 44: set epsw to 0.0001 from 0.001 143 | # epoch 69: set epsw to 0.00001 from 0.0001 on conv1, set color noise to 0 from 0.1 144 | # epoch 73: set epsw to 0 from 0.00001 on conv1 145 | # epoch 86: set epsw to 0.00001 from 0.0001 146 | # epoch 97: killed 147 | # validation multiview: 148 | # logprob: 1.822358, 0.420340, 0.193620 (1.4% top-1 worse than 145) 149 | -------------------------------------------------------------------------------- /layers/layer-params-149.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.00001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.00001 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.0001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.0001,0.0001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.0001,0.0001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.0001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.0001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.0001,0.0001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.0001,0.0001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.0001,0.0001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.0001,0.0001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.0001,0.0001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | [rnorm5a] 166 | scale=0.0001 167 | pow=0.75 168 | minDiv=2 169 | 170 | [rnorm5b] 171 | scale=0.0001 172 | pow=0.75 173 | minDiv=2 174 | 175 | # this is like #145 but with rnorm over conv5 176 | # on guppy 177 | # /nobackup_a/kriz/tmp/ConvNet__2012-11-13_23.21.47 178 | # logs/layers-149.log 179 | # epoch 21: set epsw to 0.001 from 0.01 180 | # epoch 54: set epsw to 0.0001 from 0.001 181 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1 182 | # set color noise to 0 from 0.1 183 | -------------------------------------------------------------------------------- /layers/layer-params-150.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.00001 19 | epsB=0.02 20 | momW=0.9 21 | momB=0.9 22 | wc=0.0005 23 | wball=0.00 24 | 25 | [conv2b] 26 | epsW=0.00001 27 | epsB=0.02 28 | momW=0.9 29 | momB=0.9 30 | wc=0.0005 31 | wball=0.00 32 | 33 | [conv3a] 34 | epsW=0.00001 35 | epsB=0.02 36 | momW=0.9 37 | momB=0.9 38 | wc=0.0005 39 | wball=0.00 40 | 41 | [conv3b] 42 | epsW=0.00001 43 | epsB=0.02 44 | momW=0.9 45 | momB=0.9 46 | wc=0.0005 47 | wball=0.00 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc4096a] 82 | epsW=0.00001 83 | epsB=0.02 84 | momW=0.9 85 | momB=0.9 86 | wc=0.0005 87 | wball=0 88 | 89 | [fc4096b] 90 | epsW=0.00001 91 | epsB=0.02 92 | momW=0.9 93 | momB=0.9 94 | wc=0.0005 95 | wball=0 96 | 97 | [fc2048ba] 98 | epsW=0.00001 99 | epsB=0.02 100 | momW=0.9 101 | momB=0.9 102 | wc=0.0005 103 | wball=0 104 | 105 | [fc2048bb] 106 | epsW=0.00001 107 | epsB=0.02 108 | momW=0.9 109 | momB=0.9 110 | wc=0.0005 111 | wball=0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | minDiv=2 141 | 142 | [rnorm1b] 143 | scale=0.0001 144 | pow=0.75 145 | minDiv=2 146 | 147 | [rnorm2a] 148 | scale=0.0001 149 | pow=0.75 150 | minDiv=2 151 | 152 | [rnorm2b] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [cnorm2a] 158 | scale=0.001 159 | pow=0.75 160 | 161 | [cnorm2b] 162 | scale=0.001 163 | pow=0.75 164 | 165 | # this is like #145 but with no column communication, trained on 2012 166 | # guppy9 167 | # /nobackup/kriz/tmp/ConvNet__2012-11-20_01.25.08 168 | # logs/layers-150.log 169 | # moved to gpu 170 | # epoch 25: set epsw to 0.001 from 0.01 171 | # epoch 49: set epsw to 0.0001 from 0.001 172 | # epoch 50: move back to guppy9 173 | # epoch 66: set epsw to 0.00001 from 0.0001 on conv1 174 | # set color noise to 0 from 0.1 175 | # epoch 72: set epsw to 0 on conv1 176 | # epoch 79: set epsw to 0.00001 from 0.0001 177 | # epoch 92: killed 178 | # validation multiview: 179 | # logprob: 1.811173, 0.418280, 0.193300 180 | 181 | -------------------------------------------------------------------------------- /layers/layer-params-153.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=linear[1000] 9 | 10 | [conv1b] 11 | epsW=0.01 12 | epsB=0.02 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=linear[1000] 18 | 19 | [conv2a] 20 | epsW=0.01 21 | epsB=0.02 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=linear[1000] 27 | 28 | [conv2b] 29 | epsW=0.01 30 | epsB=0.02 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=linear[1000] 36 | 37 | [conv3a] 38 | epsW=0.01,0.01 39 | epsB=0.02 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=linear[1000] 45 | 46 | [conv3b] 47 | epsW=0.01,0.01 48 | epsB=0.02 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=linear[1000] 54 | 55 | [conv4a] 56 | epsW=0.01 57 | epsB=0.02 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=linear[1000] 63 | 64 | [conv4b] 65 | epsW=0.01 66 | epsB=0.02 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=linear[1000] 72 | 73 | [conv5a] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=linear[1000] 81 | 82 | [conv5b] 83 | epsW=0.01 84 | epsB=0.02 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=linear[1000] 90 | 91 | [fc2048a] 92 | epsW=0.01,0.01 93 | epsB=0.02 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=linear[1000] 99 | 100 | [fc2048b] 101 | epsW=0.01,0.01 102 | epsB=0.02 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=linear[1000] 108 | 109 | [fc2048ba] 110 | epsW=0.01,0.01 111 | epsB=0.02 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=linear[1000] 117 | 118 | [fc2048bb] 119 | epsW=0.01,0.01 120 | epsB=0.02 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=linear[1000] 126 | 127 | [fc1000] 128 | epsW=0.01,0.01 129 | epsB=0.02 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=linear[1000] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #145 but with linear learning rate schedule 181 | # on guppy9 182 | # /nobackup/kriz/tmp/ConvNet__2012-12-18_22.39.10 183 | # logs/layers-153.log 184 | # epoch 18: killed. i realized linear learning rate schedule is completely mental. 185 | -------------------------------------------------------------------------------- /layers/layer-params-155.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=exp[4000] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=exp[4000] 18 | 19 | [conv2a] 20 | epsW=0.02 21 | epsB=0.04 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=exp[4000] 27 | 28 | [conv2b] 29 | epsW=0.02 30 | epsB=0.04 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=exp[4000] 36 | 37 | [conv3a] 38 | epsW=0.02,0.02 39 | epsB=0.04 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=exp[4000] 45 | 46 | [conv3b] 47 | epsW=0.02,0.02 48 | epsB=0.04 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=exp[4000] 54 | 55 | [conv4a] 56 | epsW=0.02 57 | epsB=0.04 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=exp[4000] 63 | 64 | [conv4b] 65 | epsW=0.02 66 | epsB=0.04 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=exp[4000] 72 | 73 | [conv5a] 74 | epsW=0.02 75 | epsB=0.04 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=exp[4000] 81 | 82 | [conv5b] 83 | epsW=0.02 84 | epsB=0.04 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=exp[4000] 90 | 91 | [fc2048a] 92 | epsW=0.02,0.02 93 | epsB=0.04 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=exp[4000] 99 | 100 | [fc2048b] 101 | epsW=0.02,0.02 102 | epsB=0.04 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=exp[4000] 108 | 109 | [fc2048ba] 110 | epsW=0.02,0.02 111 | epsB=0.04 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=exp[4000] 117 | 118 | [fc2048bb] 119 | epsW=0.02,0.02 120 | epsB=0.04 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=exp[4000] 126 | 127 | [fc1000] 128 | epsW=0.02,0.02 129 | epsB=0.04 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=exp[4000] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with exp learning rate schedule 181 | # its also like #154, but with learning rates in the range 0.02 to 0.000005 182 | # on guppy9 183 | # /nobackup/kriz/tmp/ConvNet__2012-12-19_22.13.12 184 | # logs/layers-155.log 185 | # epoch 61: set color noise to 0 from 0.1 186 | # epoch 74: set epsw to 0 from 0.02 on conv1 187 | # validation: 188 | # logprob: 1.861853, 0.426530, 0.199652 189 | # validation multiview: 190 | # logprob: 1.750063, 0.407440, 0.185240 191 | -------------------------------------------------------------------------------- /layers/layer-params-156.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=exp[2000] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=exp[2000] 18 | 19 | [conv2a] 20 | epsW=0.02 21 | epsB=0.04 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=exp[2000] 27 | 28 | [conv2b] 29 | epsW=0.02 30 | epsB=0.04 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=exp[2000] 36 | 37 | [conv3a] 38 | epsW=0.02,0.02 39 | epsB=0.04 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=exp[2000] 45 | 46 | [conv3b] 47 | epsW=0.02,0.02 48 | epsB=0.04 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=exp[2000] 54 | 55 | [conv4a] 56 | epsW=0.02 57 | epsB=0.04 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=exp[2000] 63 | 64 | [conv4b] 65 | epsW=0.02 66 | epsB=0.04 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=exp[2000] 72 | 73 | [conv5a] 74 | epsW=0.02 75 | epsB=0.04 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=exp[2000] 81 | 82 | [conv5b] 83 | epsW=0.02 84 | epsB=0.04 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=exp[2000] 90 | 91 | [fc2048a] 92 | epsW=0.02,0.02 93 | epsB=0.04 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=exp[2000] 99 | 100 | [fc2048b] 101 | epsW=0.02,0.02 102 | epsB=0.04 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=exp[2000] 108 | 109 | [fc2048ba] 110 | epsW=0.02,0.02 111 | epsB=0.04 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=exp[2000] 117 | 118 | [fc2048bb] 119 | epsW=0.02,0.02 120 | epsB=0.04 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=exp[2000] 126 | 127 | [fc1000] 128 | epsW=0.02,0.02 129 | epsB=0.04 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=exp[2000] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with exp learning rate schedule 181 | # its also like #154 and #155, but with learning rates in the range 0.02 to 0.00001 182 | # on guppy9 183 | # /nobackup/kriz/tmp/ConvNet__2012-12-20_01.29.32 184 | # logs/layers-156.log 185 | # epoch 61: set color noise to 0 from 0.1 186 | # epoch 72: set epsw to 0 from 0.02 on conv1 187 | # validation: 188 | # logprob: 1.870253, 0.428933, 0.198336 189 | # validation multiview: 190 | # logprob: 1.751178, 0.407640, 0.183500 191 | -------------------------------------------------------------------------------- /layers/layer-params-157.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=exp[8000] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=exp[8000] 18 | 19 | [conv2a] 20 | epsW=0.04 21 | epsB=0.08 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=exp[8000] 27 | 28 | [conv2b] 29 | epsW=0.04 30 | epsB=0.08 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=exp[8000] 36 | 37 | [conv3a] 38 | epsW=0.04,0.04 39 | epsB=0.08 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=exp[8000] 45 | 46 | [conv3b] 47 | epsW=0.04,0.04 48 | epsB=0.08 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=exp[8000] 54 | 55 | [conv4a] 56 | epsW=0.04 57 | epsB=0.08 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=exp[8000] 63 | 64 | [conv4b] 65 | epsW=0.04 66 | epsB=0.08 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=exp[8000] 72 | 73 | [conv5a] 74 | epsW=0.04 75 | epsB=0.08 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=exp[8000] 81 | 82 | [conv5b] 83 | epsW=0.04 84 | epsB=0.08 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=exp[8000] 90 | 91 | [fc2048a] 92 | epsW=0.04,0.04 93 | epsB=0.08 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=exp[8000] 99 | 100 | [fc2048b] 101 | epsW=0.04,0.04 102 | epsB=0.08 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=exp[8000] 108 | 109 | [fc2048ba] 110 | epsW=0.04,0.04 111 | epsB=0.08 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=exp[8000] 117 | 118 | [fc2048bb] 119 | epsW=0.04,0.04 120 | epsB=0.08 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=exp[8000] 126 | 127 | [fc1000] 128 | epsW=0.04,0.04 129 | epsB=0.08 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=exp[8000] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with exp learning rate schedule 181 | # its also like #155, but with learning rates in the range 0.04 to 0.000005 182 | # on guppy8 183 | # logs/layers-157.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-12-23_02.12.31 185 | # epoch 62: set color noise to 0 from 0.1 186 | # epoch 73: set conv1 epsw to 0 from 0.04 187 | # valid: logprob: 1.880485, 0.431177, 0.203271 188 | # multiview valid: logprob: 1.767696, 0.411140, 0.187040 189 | -------------------------------------------------------------------------------- /layers/layer-params-158.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=exp[20000] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=exp[20000] 18 | 19 | [conv2a] 20 | epsW=0.04 21 | epsB=0.08 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=exp[20000] 27 | 28 | [conv2b] 29 | epsW=0.04 30 | epsB=0.08 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=exp[20000] 36 | 37 | [conv3a] 38 | epsW=0.04,0.04 39 | epsB=0.08 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=exp[20000] 45 | 46 | [conv3b] 47 | epsW=0.04,0.04 48 | epsB=0.08 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=exp[20000] 54 | 55 | [conv4a] 56 | epsW=0.04 57 | epsB=0.08 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=exp[20000] 63 | 64 | [conv4b] 65 | epsW=0.04 66 | epsB=0.08 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=exp[20000] 72 | 73 | [conv5a] 74 | epsW=0.04 75 | epsB=0.08 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=exp[20000] 81 | 82 | [conv5b] 83 | epsW=0.04 84 | epsB=0.08 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=exp[20000] 90 | 91 | [fc2048a] 92 | epsW=0.04,0.04 93 | epsB=0.08 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=exp[20000] 99 | 100 | [fc2048b] 101 | epsW=0.04,0.04 102 | epsB=0.08 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=exp[20000] 108 | 109 | [fc2048ba] 110 | epsW=0.04,0.04 111 | epsB=0.08 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=exp[20000] 117 | 118 | [fc2048bb] 119 | epsW=0.04,0.04 120 | epsB=0.08 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=exp[20000] 126 | 127 | [fc1000] 128 | epsW=0.04,0.04 129 | epsB=0.08 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=exp[20000] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with exp learning rate schedule 181 | # its also like #155, but with learning rates in the range 0.04 to 0.000002 182 | # on guppy8 183 | # logs/layers-158.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-12-23_17.34.48 185 | # epoch 63: set color noise to 0 from 0.1 186 | # epoch 77: set epsw to 0 from 0.04 on conv1 187 | # validation: logprob: 1.862656, 0.428884, 0.199910 188 | # validation multiview: logprob: 1.757155, 0.410260, 0.185380 189 | 190 | -------------------------------------------------------------------------------- /layers/layer-params-160.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=dexp[2000,4] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=dexp[2000,4] 18 | 19 | [conv2a] 20 | epsW=0.02 21 | epsB=0.04 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=dexp[2000,4] 27 | 28 | [conv2b] 29 | epsW=0.02 30 | epsB=0.04 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=dexp[2000,4] 36 | 37 | [conv3a] 38 | epsW=0.02,0.02 39 | epsB=0.04 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=dexp[2000,4] 45 | 46 | [conv3b] 47 | epsW=0.02,0.02 48 | epsB=0.04 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=dexp[2000,4] 54 | 55 | [conv4a] 56 | epsW=0.02 57 | epsB=0.04 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=dexp[2000,4] 63 | 64 | [conv4b] 65 | epsW=0.02 66 | epsB=0.04 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=dexp[2000,4] 72 | 73 | [conv5a] 74 | epsW=0.02 75 | epsB=0.04 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=dexp[2000,4] 81 | 82 | [conv5b] 83 | epsW=0.02 84 | epsB=0.04 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=dexp[2000,4] 90 | 91 | [fc2048a] 92 | epsW=0.02,0.02 93 | epsB=0.04 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=dexp[2000,4] 99 | 100 | [fc2048b] 101 | epsW=0.02,0.02 102 | epsB=0.04 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=dexp[2000,4] 108 | 109 | [fc2048ba] 110 | epsW=0.02,0.02 111 | epsB=0.04 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=dexp[2000,4] 117 | 118 | [fc2048bb] 119 | epsW=0.02,0.02 120 | epsB=0.04 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=dexp[2000,4] 126 | 127 | [fc1000] 128 | epsW=0.02,0.02 129 | epsB=0.04 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=dexp[2000,4] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with dexp learning rate schedule 181 | # its also like 159 but with rates in the range 0.02 to 0.00001 182 | # on guppy7 183 | # logs/layers-160.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-12-24_17.07.46 185 | # epoch 61: set color noise to 0 from 0.1 186 | # validation: 1.884187, 0.433855, 0.205452 187 | # validation multiview: : 1.789202, 0.413740, 0.190360 188 | -------------------------------------------------------------------------------- /layers/layer-params-161.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=jdexp[1000,4] 9 | 10 | [conv1b] 11 | epsW=0.01 12 | epsB=0.02 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=jdexp[1000,4] 18 | 19 | [conv2a] 20 | epsW=0.01 21 | epsB=0.02 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=jdexp[1000,4] 27 | 28 | [conv2b] 29 | epsW=0.01 30 | epsB=0.02 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=jdexp[1000,4] 36 | 37 | [conv3a] 38 | epsW=0.01,0.01 39 | epsB=0.02 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=jdexp[1000,4] 45 | 46 | [conv3b] 47 | epsW=0.01,0.01 48 | epsB=0.02 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=jdexp[1000,4] 54 | 55 | [conv4a] 56 | epsW=0.01 57 | epsB=0.02 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=jdexp[1000,4] 63 | 64 | [conv4b] 65 | epsW=0.01 66 | epsB=0.02 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=jdexp[1000,4] 72 | 73 | [conv5a] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=jdexp[1000,4] 81 | 82 | [conv5b] 83 | epsW=0.01 84 | epsB=0.02 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=jdexp[1000,4] 90 | 91 | [fc2048a] 92 | epsW=0.01,0.01 93 | epsB=0.02 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=jdexp[1000,4] 99 | 100 | [fc2048b] 101 | epsW=0.01,0.01 102 | epsB=0.02 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=jdexp[1000,4] 108 | 109 | [fc2048ba] 110 | epsW=0.01,0.01 111 | epsB=0.02 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=jdexp[1000,4] 117 | 118 | [fc2048bb] 119 | epsW=0.01,0.01 120 | epsB=0.02 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=jdexp[1000,4] 126 | 127 | [fc1000] 128 | epsW=0.01,0.01 129 | epsB=0.02 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=jdexp[1000,4] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with jumpy dexp learning rate schedule 181 | # on guppy7 182 | # logs/layers-161.log 183 | # (guppy7 is dead for now so doing nothing) 184 | -------------------------------------------------------------------------------- /layers/layer-params-162.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=dexp[1000,3] 9 | 10 | [conv1b] 11 | epsW=0.01 12 | epsB=0.02 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=dexp[1000,3] 18 | 19 | [conv2a] 20 | epsW=0.01 21 | epsB=0.02 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=dexp[1000,3] 27 | 28 | [conv2b] 29 | epsW=0.01 30 | epsB=0.02 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=dexp[1000,3] 36 | 37 | [conv3a] 38 | epsW=0.01,0.01 39 | epsB=0.02 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=dexp[1000,3] 45 | 46 | [conv3b] 47 | epsW=0.01,0.01 48 | epsB=0.02 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=dexp[1000,3] 54 | 55 | [conv4a] 56 | epsW=0.01 57 | epsB=0.02 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=dexp[1000,3] 63 | 64 | [conv4b] 65 | epsW=0.01 66 | epsB=0.02 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=dexp[1000,3] 72 | 73 | [conv5a] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=dexp[1000,3] 81 | 82 | [conv5b] 83 | epsW=0.01 84 | epsB=0.02 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=dexp[1000,3] 90 | 91 | [fc2048a] 92 | epsW=0.01,0.01 93 | epsB=0.02 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.0005,0.0005 97 | wball=0,0 98 | sched=dexp[1000,3] 99 | 100 | [fc2048b] 101 | epsW=0.01,0.01 102 | epsB=0.02 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.0005,0.0005 106 | wball=0,0 107 | sched=dexp[1000,3] 108 | 109 | [fc2048ba] 110 | epsW=0.01,0.01 111 | epsB=0.02 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.0005,0.0005 115 | wball=0,0 116 | sched=dexp[1000,3] 117 | 118 | [fc2048bb] 119 | epsW=0.01,0.01 120 | epsB=0.02 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.0005,0.0005 124 | wball=0,0 125 | sched=dexp[1000,3] 126 | 127 | [fc1000] 128 | epsW=0.01,0.01 129 | epsB=0.02 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.0005,0.0005 133 | wball=0,0 134 | sched=dexp[1000,3] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with dexp learning rate schedule 181 | # its also like 159 but with 3 levels of learning rates in the range 0.01 to 0.00001 182 | # on guppy9 183 | # logs/layers-162.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-12-25_22.41.00 185 | # epoch 61: set color noise to 0 from 0.1 186 | # validation: logprob: 1.894451, 0.438533, 0.207935 187 | # validation multiview: ah screw it, it'll suck 188 | -------------------------------------------------------------------------------- /layers/layer-params-163.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | sched=dexp[1000,4] 9 | 10 | [conv1b] 11 | epsW=0.0 12 | epsB=0.0 13 | momW=0.9 14 | momB=0.9 15 | wc=0.0005 16 | wball=0.00 17 | sched=dexp[1000,4] 18 | 19 | [conv2a] 20 | epsW=0.01 21 | epsB=0.02 22 | momW=0.9 23 | momB=0.9 24 | wc=0.0005 25 | wball=0.00 26 | sched=dexp[1000,4] 27 | 28 | [conv2b] 29 | epsW=0.01 30 | epsB=0.02 31 | momW=0.9 32 | momB=0.9 33 | wc=0.0005 34 | wball=0.00 35 | sched=dexp[1000,4] 36 | 37 | [conv3a] 38 | epsW=0.01,0.01 39 | epsB=0.02 40 | momW=0.9,0.9 41 | momB=0.9 42 | wc=0.0005,0.0005 43 | wball=0,0 44 | sched=dexp[1000,4] 45 | 46 | [conv3b] 47 | epsW=0.01,0.01 48 | epsB=0.02 49 | momW=0.9,0.9 50 | momB=0.9 51 | wc=0.0005,0.0005 52 | wball=0,0 53 | sched=dexp[1000,4] 54 | 55 | [conv4a] 56 | epsW=0.01 57 | epsB=0.02 58 | momW=0.9 59 | momB=0.9 60 | wc=0.0005 61 | wball=0 62 | sched=dexp[1000,4] 63 | 64 | [conv4b] 65 | epsW=0.01 66 | epsB=0.02 67 | momW=0.9 68 | momB=0.9 69 | wc=0.0005 70 | wball=0 71 | sched=dexp[1000,4] 72 | 73 | [conv5a] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | sched=dexp[1000,4] 81 | 82 | [conv5b] 83 | epsW=0.01 84 | epsB=0.02 85 | momW=0.9 86 | momB=0.9 87 | wc=0.0005 88 | wball=0 89 | sched=dexp[1000,4] 90 | 91 | [fc2048a] 92 | epsW=0.01,0.01 93 | epsB=0.02 94 | momW=0.9,0.9 95 | momB=0.9 96 | wc=0.001,0.001 97 | wball=0,0 98 | sched=dexp[1000,4] 99 | 100 | [fc2048b] 101 | epsW=0.01,0.01 102 | epsB=0.02 103 | momW=0.9,0.9 104 | momB=0.9 105 | wc=0.001,0.001 106 | wball=0,0 107 | sched=dexp[1000,4] 108 | 109 | [fc2048ba] 110 | epsW=0.01,0.01 111 | epsB=0.02 112 | momW=0.9,0.9 113 | momB=0.9 114 | wc=0.001,0.001 115 | wball=0,0 116 | sched=dexp[1000,4] 117 | 118 | [fc2048bb] 119 | epsW=0.01,0.01 120 | epsB=0.02 121 | momW=0.9,0.9 122 | momB=0.9 123 | wc=0.001,0.001 124 | wball=0,0 125 | sched=dexp[1000,4] 126 | 127 | [fc1000] 128 | epsW=0.01,0.01 129 | epsB=0.02 130 | momW=0.9,0.9 131 | momB=0.9 132 | wc=0.001,0.001 133 | wball=0,0 134 | sched=dexp[1000,4] 135 | 136 | [logprob] 137 | coeff=1 138 | topk=5 139 | 140 | [hs1a] 141 | enable=true 142 | 143 | [hs2a] 144 | enable=true 145 | 146 | [hs1b] 147 | enable=true 148 | 149 | [hs2b] 150 | enable=true 151 | 152 | [rnorm1a] 153 | scale=0.0001 154 | pow=0.75 155 | minDiv=2 156 | 157 | [rnorm1b] 158 | scale=0.0001 159 | pow=0.75 160 | minDiv=2 161 | 162 | [rnorm2a] 163 | scale=0.0001 164 | pow=0.75 165 | minDiv=2 166 | 167 | [rnorm2b] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [cnorm2a] 173 | scale=0.001 174 | pow=0.75 175 | 176 | [cnorm2b] 177 | scale=0.001 178 | pow=0.75 179 | 180 | # this is like #153 (so uses same file) but with dexp learning rate schedule 181 | # its also like 159 but with 2x the wc on fc layers 182 | # on guppy9 183 | # logs/layers-163.log 184 | # /nobackup/kriz/tmp/ConvNet__2012-12-26_01.15.38 185 | # epoch 61: set color noise to 0 from 0.1 186 | # epoch 73: set conv1 epsw to 0 from 0.01 187 | # validation: logprob: 1.849131, 0.429085, 0.199072 188 | # validation multiview: ah screw it, it'll suck 189 | -------------------------------------------------------------------------------- /layers/layer-params-167.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | schedW=dexp[1000,4] 9 | schedB=dexp[10,2] 10 | 11 | [conv1b] 12 | epsW=0.0 13 | epsB=0.0 14 | momW=0.9 15 | momB=0.9 16 | wc=0.0005 17 | wball=0.00 18 | schedW=dexp[1000,4] 19 | schedB=dexp[10,2] 20 | 21 | [conv2a] 22 | epsW=0.01 23 | epsB=0.02 24 | momW=0.9 25 | momB=0.9 26 | wc=0.0005 27 | wball=0.00 28 | schedW=dexp[1000,4] 29 | schedB=dexp[10,2] 30 | 31 | [conv2b] 32 | epsW=0.01 33 | epsB=0.02 34 | momW=0.9 35 | momB=0.9 36 | wc=0.0005 37 | wball=0.00 38 | schedW=dexp[1000,4] 39 | schedB=dexp[10,2] 40 | 41 | [conv3a] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | schedW=dexp[1000,4] 49 | schedB=dexp[10,2] 50 | 51 | [conv3b] 52 | epsW=0.01,0.01 53 | epsB=0.02 54 | momW=0.9,0.9 55 | momB=0.9 56 | wc=0.0005,0.0005 57 | wball=0,0 58 | schedW=dexp[1000,4] 59 | schedB=dexp[10,2] 60 | 61 | [conv4a] 62 | epsW=0.01 63 | epsB=0.02 64 | momW=0.9 65 | momB=0.9 66 | wc=0.0005 67 | wball=0 68 | schedW=dexp[1000,4] 69 | schedB=dexp[10,2] 70 | 71 | [conv4b] 72 | epsW=0.01 73 | epsB=0.02 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | schedW=dexp[1000,4] 79 | schedB=dexp[10,2] 80 | 81 | [conv5a] 82 | epsW=0.01 83 | epsB=0.02 84 | momW=0.9 85 | momB=0.9 86 | wc=0.0005 87 | wball=0 88 | schedW=dexp[1000,4] 89 | schedB=dexp[10,2] 90 | 91 | [conv5b] 92 | epsW=0.01 93 | epsB=0.02 94 | momW=0.9 95 | momB=0.9 96 | wc=0.0005 97 | wball=0 98 | schedW=dexp[1000,4] 99 | schedB=dexp[10,2] 100 | 101 | [fc2048a] 102 | epsW=0.01,0.01 103 | epsB=0.02 104 | momW=0.9,0.9 105 | momB=0.9 106 | wc=0.0005,0.0005 107 | wball=0,0 108 | schedW=dexp[1000,4] 109 | schedB=dexp[10,2] 110 | 111 | [fc2048b] 112 | epsW=0.01,0.01 113 | epsB=0.02 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | schedW=dexp[1000,4] 119 | schedB=dexp[10,2] 120 | 121 | [fc2048ba] 122 | epsW=0.01,0.01 123 | epsB=0.02 124 | momW=0.9,0.9 125 | momB=0.9 126 | wc=0.0005,0.0005 127 | wball=0,0 128 | schedW=dexp[1000,4] 129 | schedB=dexp[10,2] 130 | 131 | [fc2048bb] 132 | epsW=0.01,0.01 133 | epsB=0.02 134 | momW=0.9,0.9 135 | momB=0.9 136 | wc=0.0005,0.0005 137 | wball=0,0 138 | schedW=dexp[1000,4] 139 | schedB=dexp[10,2] 140 | 141 | [fc1000] 142 | epsW=0.01,0.01 143 | epsB=0.02 144 | momW=0.9,0.9 145 | momB=0.9 146 | wc=0.0005,0.0005 147 | wball=0,0 148 | schedW=dexp[1000,4] 149 | schedB=dexp[10,2] 150 | 151 | [logprob] 152 | coeff=1 153 | topk=5 154 | 155 | [hs1a] 156 | enable=true 157 | 158 | [hs2a] 159 | enable=true 160 | 161 | [hs1b] 162 | enable=true 163 | 164 | [hs2b] 165 | enable=true 166 | 167 | [rnorm1a] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [rnorm1b] 173 | scale=0.0001 174 | pow=0.75 175 | minDiv=2 176 | 177 | # this is like #153 but with dexp learning rate schedule 178 | # also deletes rnorm over conv2 179 | # on guppy8 180 | # logs/layers-167.log 181 | # /nobackup/kriz/tmp/ConvNet__2013-01-03_23.39.35 182 | # epoch 63: set color noise to 0 from 0.1 183 | # epoch 73: set conv1 epsw to 0 from 0.01 184 | # logprob: 1.851845, 0.426772, 0.197590 185 | # multiview logprob: 1.738715, 0.404880, 0.181180 186 | 187 | 188 | -------------------------------------------------------------------------------- /layers/layer-params-170-256.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | schedW=dexp[250,0,4] 9 | schedB=dexp[10,0,2] 10 | 11 | [conv1b] 12 | epsW=0.01 13 | epsB=0.02 14 | momW=0.9 15 | momB=0.9 16 | wc=0.0005 17 | wball=0.00 18 | schedW=dexp[250,0,4] 19 | schedB=dexp[10,0,2] 20 | 21 | [conv2a] 22 | epsW=0.01 23 | epsB=0.02 24 | momW=0.9 25 | momB=0.9 26 | wc=0.0005 27 | wball=0.00 28 | schedW=dexp[250,0,4] 29 | schedB=dexp[10,0,2] 30 | 31 | [conv2b] 32 | epsW=0.01 33 | epsB=0.02 34 | momW=0.9 35 | momB=0.9 36 | wc=0.0005 37 | wball=0.00 38 | schedW=dexp[250,0,4] 39 | schedB=dexp[10,0,2] 40 | 41 | [conv3a] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | schedW=dexp[250,0,4] 49 | schedB=dexp[10,0,2] 50 | 51 | [conv3b] 52 | epsW=0.01,0.01 53 | epsB=0.02 54 | momW=0.9,0.9 55 | momB=0.9 56 | wc=0.0005,0.0005 57 | wball=0,0 58 | schedW=dexp[250,0,4] 59 | schedB=dexp[10,0,2] 60 | 61 | [conv4a] 62 | epsW=0.01 63 | epsB=0.02 64 | momW=0.9 65 | momB=0.9 66 | wc=0.0005 67 | wball=0 68 | schedW=dexp[250,0,4] 69 | schedB=dexp[10,0,2] 70 | 71 | [conv4b] 72 | epsW=0.01 73 | epsB=0.02 74 | momW=0.9 75 | momB=0.9 76 | wc=0.0005 77 | wball=0 78 | schedW=dexp[250,0,4] 79 | schedB=dexp[10,0,2] 80 | 81 | [conv5a] 82 | epsW=0.01 83 | epsB=0.02 84 | momW=0.9 85 | momB=0.9 86 | wc=0.0005 87 | wball=0 88 | schedW=dexp[250,0,4] 89 | schedB=dexp[10,0,2] 90 | 91 | [conv5b] 92 | epsW=0.01 93 | epsB=0.02 94 | momW=0.9 95 | momB=0.9 96 | wc=0.0005 97 | wball=0 98 | schedW=dexp[250,0,4] 99 | schedB=dexp[10,0,2] 100 | 101 | [fc2048a] 102 | epsW=0.01,0.01 103 | epsB=0.02 104 | momW=0.9,0.9 105 | momB=0.9 106 | wc=0.0005,0.0005 107 | wball=0,0 108 | schedW=dexp[250,0,4] 109 | schedB=dexp[10,0,2] 110 | 111 | [fc2048b] 112 | epsW=0.01,0.01 113 | epsB=0.02 114 | momW=0.9,0.9 115 | momB=0.9 116 | wc=0.0005,0.0005 117 | wball=0,0 118 | schedW=dexp[250,0,4] 119 | schedB=dexp[10,0,2] 120 | 121 | [fc2048ba] 122 | epsW=0.01,0.01 123 | epsB=0.02 124 | momW=0.9,0.9 125 | momB=0.9 126 | wc=0.0005,0.0005 127 | wball=0,0 128 | schedW=dexp[250,0,4] 129 | schedB=dexp[10,0,2] 130 | 131 | [fc2048bb] 132 | epsW=0.01,0.01 133 | epsB=0.02 134 | momW=0.9,0.9 135 | momB=0.9 136 | wc=0.0005,0.0005 137 | wball=0,0 138 | schedW=dexp[250,0,4] 139 | schedB=dexp[10,0,2] 140 | 141 | [fc1000] 142 | epsW=0.01,0.01 143 | epsB=0.02 144 | momW=0.9,0.9 145 | momB=0.9 146 | wc=0.0005,0.0005 147 | wball=0,0 148 | schedW=dexp[250,0,4] 149 | schedB=dexp[10,0,2] 150 | 151 | [logprob] 152 | coeff=1 153 | topk=5 154 | 155 | [hs1a] 156 | enable=true 157 | 158 | [hs2a] 159 | enable=true 160 | 161 | [hs1b] 162 | enable=true 163 | 164 | [hs2b] 165 | enable=true 166 | 167 | [rnorm1a] 168 | scale=0.0001 169 | pow=0.75 170 | minDiv=2 171 | 172 | [rnorm1b] 173 | scale=0.0001 174 | pow=0.75 175 | minDiv=2 176 | 177 | [rnorm2a] 178 | scale=0.0001 179 | pow=0.75 180 | minDiv=2 181 | 182 | [rnorm2b] 183 | scale=0.0001 184 | pow=0.75 185 | minDiv=2 186 | 187 | [cnorm2a] 188 | scale=0.001 189 | pow=0.75 190 | 191 | [cnorm2b] 192 | scale=0.001 193 | pow=0.75 194 | 195 | # this is 170 but running with minibatch 256 on krunch: 196 | # use def file 153 197 | # not doing fading 198 | # /nobackup/kriz/tmp/ConvNet__2013-02-05_12.50.10 199 | # logs/layers-170-256.log 200 | -------------------------------------------------------------------------------- /layers/layer-params-2009-101.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.002 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.002 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc10184] 114 | epsW=0.01,0.01 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy9 154 | # this is like #101 (on gpu) but this is trained on imgnet-2009 155 | # epoch 6: set epsw to 0.001 from 0.01 156 | # epoch 14: set epsw to 0.0001 from 0.001 157 | # epoch 19: set epsw to 0.00001 from 0.0001 on conv1,conv2 158 | # set color noise to 0 from 0.1 159 | # epoch 27: set epsw to 0 from 0.00001 on conv1,conv2 160 | # epoch 30: set epsw to 0.00001 from 0.0001 161 | # epoch 33: killed 162 | # [3.4620055494832287, 0.69382157140195966, 0.43646610858041701] 163 | -------------------------------------------------------------------------------- /layers/layer-params-96-16k.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.001 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.001 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.001,0.001 19 | epsB=0.002 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.001,0.001 27 | epsB=0.002 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.001,0.001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.001,0.001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.001,0.001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.001,0.001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.001,0.001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.001,0.001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.001,0.001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | 124 | [hs1a] 125 | enable=true 126 | 127 | [hs2a] 128 | enable=true 129 | 130 | [hs1b] 131 | enable=true 132 | 133 | [hs2b] 134 | enable=true 135 | 136 | [rnorm1a] 137 | scale=0.0001 138 | pow=0.75 139 | 140 | [rnorm1b] 141 | scale=0.0001 142 | pow=0.75 143 | 144 | [rnorm2a] 145 | scale=0.0001 146 | pow=0.75 147 | 148 | [rnorm2b] 149 | scale=0.0001 150 | pow=0.75 151 | 152 | # on guppy7 153 | # logs/layers-96-16k.log 154 | # /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-24_02.01.57 155 | # epoch 5: set epsw to 0.001 from 0.01 156 | # epoch 6: enabled dropout 157 | -------------------------------------------------------------------------------- /layers/layer-params-98-16kinit.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.00 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.00 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.00 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.00 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy9 154 | # logs/layers-98-16kinit.log 155 | # weights initialized from net trained on 16k imgnet for a few epochs: /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-24_02.01.57 156 | # /nobackup/kriz/tmp/ConvNet__2012-06-25_17.55.06 157 | # logs/layers-98-16kinit.log 158 | # epoch 30: set epsw to 0.0001 from 0.001 159 | # epoch 44: set epsw to 0.00001 from 0.0001 on conv1,conv2 160 | # set color noise to 0 from 0.1 161 | # epoch 51: set epsw to 0 from 0.00001 on conv1,conv2 162 | # epoch 64: set epsw to 0.00001 from 0.0001 163 | # epoch 71: killed 164 | # (294, 0.37132068707483007, 0.1679778095238095) 165 | -------------------------------------------------------------------------------- /layers/layer-params-99.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.002 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.005 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.002 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.005 16 | 17 | [conv2a] 18 | epsW=0.01,0.01,0.01 19 | epsB=0.002 20 | momW=0.9,0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005,0.0005 23 | wball=0.003,0.003,0.003 24 | 25 | [conv2b] 26 | epsW=0.01,0.01,0.01 27 | epsB=0.002 28 | momW=0.9,0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005,0.0005 31 | wball=0.003,0.003,0.003 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.01,0.01 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy7 154 | # this is like #97 (on gpu) but with different rnorm2 155 | # logs/layers-99.log 156 | # /nobackup/kriz/tmp/ConvNet__2012-06-26_20.35.00 157 | # diff rnorm2 doesnt seem to stop conv2 filters from dying 158 | # now trying wball on conv1, conv2 159 | # logs/layers-99a.log 160 | # /nobackup/kriz/tmp/ConvNet__2012-06-26_23.41.56 161 | # /nobackup/kriz/tmp/ConvNet__2012-06-27_03.57.56 162 | # lot of filters seem to remain random on conv2 163 | -------------------------------------------------------------------------------- /layers/layer-params-flickr-102-inet-init.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001,0.0001,0.001 19 | epsB=0.02 20 | momW=0.9,0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005,0.0005 23 | wball=0.00,0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0001,0.0001,0.001 27 | epsB=0.02 28 | momW=0.9,0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005,0.0005 31 | wball=0.00,0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.0001,0.0001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.0001,0.0001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.0001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.0001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.0001,0.0001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.0001,0.0001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.0001,0.0001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.0001,0.0001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc10003] 114 | epsW=0.0001,0.0001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [crossent] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy7 154 | # this is like #97, but on flickr 155 | # also initialized from #97 on imgnet 156 | # /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-30_23.41.20 157 | # epoch 59: set epsw to 0.0001 from 0.001 158 | # epoch 78: killed because i realized its not really fair 159 | -------------------------------------------------------------------------------- /layers/layer-params-flickr-102.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.0 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.0 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000,0.0000 19 | epsB=0.0 20 | momW=0.9,0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005,0.0005 23 | wball=0.00,0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000,0.0000 27 | epsB=0.0 28 | momW=0.9,0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005,0.0005 31 | wball=0.00,0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc10003] 114 | epsW=0.00001,0.00001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [crossent] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy7 154 | # this is like #97, but on flickr 155 | # /ais/gobi3/u/kriz/tmp/ConvNet__2012-06-30_17.00.32 156 | # epoch 85: set epsw to 0.0001 from 0.001 157 | # epoch 108: set epsw to 0.00001 from 0.0001 on conv1,conv2 158 | # set color noise to 0 from 0.1 159 | # epoch 120: set epsw to 0 from 0.00001 on conv1,conv2 160 | # epoch 136: set epsw to 0.00001 from 0.0001 161 | # epoch 162: killed 162 | -------------------------------------------------------------------------------- /layers/layer-params-flickr-103.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.01 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.00005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.01 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.00005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.01,0.01 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.00005,0.00005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.01,0.01 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.00005,0.00005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.01,0.01 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.00005,0.00005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.01,0.01 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.00005,0.00005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.01 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.00005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.01 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.00005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.01 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.00005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.01 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.00005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.01,0.01 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.00005,0.00005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.01,0.01 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.00005,0.00005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.01,0.01 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.00005,0.00005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.01,0.01 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.00005,0.00005 111 | wball=0,0 112 | 113 | [fc10003] 114 | epsW=0.01,0.01 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.00005,0.00005 119 | wball=0,0 120 | 121 | [rcost] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy9 154 | # this is like #101, but on flickr, and with robust flickr cost 155 | # 156 | -------------------------------------------------------------------------------- /layers/layer-params-flickr-105.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0001 3 | epsB=0.02 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0001 11 | epsB=0.02 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0001,0.0001 19 | epsB=0.02 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0001,0.0001 27 | epsB=0.02 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.0001,0.0001 35 | epsB=0.02 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.0001,0.0001 43 | epsB=0.02 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.0001 51 | epsB=0.02 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.0001 59 | epsB=0.02 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.0001 67 | epsB=0.02 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.0001 75 | epsB=0.02 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.0001,0.0001 83 | epsB=0.02 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.0001,0.0001 91 | epsB=0.02 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.0001,0.0001 99 | epsB=0.02 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.0001,0.0001 107 | epsB=0.02 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc10003] 114 | epsW=0.0001,0.0001 115 | epsB=0.02 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [sqdiff] 122 | coeff=1 123 | 124 | [hs1a] 125 | enable=true 126 | 127 | [hs2a] 128 | enable=true 129 | 130 | [hs1b] 131 | enable=true 132 | 133 | [hs2b] 134 | enable=true 135 | 136 | [rnorm1a] 137 | scale=0.0001 138 | pow=0.75 139 | 140 | [rnorm1b] 141 | scale=0.0001 142 | pow=0.75 143 | 144 | [rnorm2a] 145 | scale=0.0001 146 | pow=0.75 147 | 148 | [rnorm2b] 149 | scale=0.0001 150 | pow=0.75 151 | 152 | # on guppy9 153 | # this is like #101, but on flickr, and with sqdiff objective 154 | # /ais/gobi3/u/kriz/tmp/ConvNet__2012-07-04_23.30.19 155 | # epoch 15: set wc to 0.0005 from 0.00005, set epsw to 0.001 from 0.01 156 | # epoch 93: set epsw to 0.0001 from 0.001 157 | -------------------------------------------------------------------------------- /layers/layer-params-inet-5layer-conv94-2gpu.cfg: -------------------------------------------------------------------------------- 1 | [conv1a] 2 | epsW=0.0000 3 | epsB=0.000 4 | momW=0.9 5 | momB=0.9 6 | wc=0.0005 7 | wball=0.00 8 | 9 | [conv1b] 10 | epsW=0.0000 11 | epsB=0.000 12 | momW=0.9 13 | momB=0.9 14 | wc=0.0005 15 | wball=0.00 16 | 17 | [conv2a] 18 | epsW=0.0000,0.0000 19 | epsB=0.000 20 | momW=0.9,0.9 21 | momB=0.9 22 | wc=0.0005,0.0005 23 | wball=0.00,0.00 24 | 25 | [conv2b] 26 | epsW=0.0000,0.0000 27 | epsB=0.000 28 | momW=0.9,0.9 29 | momB=0.9 30 | wc=0.0005,0.0005 31 | wball=0.00,0.00 32 | 33 | [conv3a] 34 | epsW=0.00001,0.00001 35 | epsB=0.002 36 | momW=0.9,0.9 37 | momB=0.9 38 | wc=0.0005,0.0005 39 | wball=0,0 40 | 41 | [conv3b] 42 | epsW=0.00001,0.00001 43 | epsB=0.002 44 | momW=0.9,0.9 45 | momB=0.9 46 | wc=0.0005,0.0005 47 | wball=0,0 48 | 49 | [conv4a] 50 | epsW=0.00001 51 | epsB=0.002 52 | momW=0.9 53 | momB=0.9 54 | wc=0.0005 55 | wball=0 56 | 57 | [conv4b] 58 | epsW=0.00001 59 | epsB=0.002 60 | momW=0.9 61 | momB=0.9 62 | wc=0.0005 63 | wball=0 64 | 65 | [conv5a] 66 | epsW=0.00001 67 | epsB=0.002 68 | momW=0.9 69 | momB=0.9 70 | wc=0.0005 71 | wball=0 72 | 73 | [conv5b] 74 | epsW=0.00001 75 | epsB=0.002 76 | momW=0.9 77 | momB=0.9 78 | wc=0.0005 79 | wball=0 80 | 81 | [fc2048a] 82 | epsW=0.00001,0.00001 83 | epsB=0.002 84 | momW=0.9,0.9 85 | momB=0.9 86 | wc=0.0005,0.0005 87 | wball=0,0 88 | 89 | [fc2048b] 90 | epsW=0.00001,0.00001 91 | epsB=0.002 92 | momW=0.9,0.9 93 | momB=0.9 94 | wc=0.0005,0.0005 95 | wball=0,0 96 | 97 | [fc2048ba] 98 | epsW=0.00001,0.00001 99 | epsB=0.002 100 | momW=0.9,0.9 101 | momB=0.9 102 | wc=0.0005,0.0005 103 | wball=0,0 104 | 105 | [fc2048bb] 106 | epsW=0.00001,0.00001 107 | epsB=0.002 108 | momW=0.9,0.9 109 | momB=0.9 110 | wc=0.0005,0.0005 111 | wball=0,0 112 | 113 | [fc1000] 114 | epsW=0.00001,0.00001 115 | epsB=0.002 116 | momW=0.9,0.9 117 | momB=0.9 118 | wc=0.0005,0.0005 119 | wball=0,0 120 | 121 | [logprob] 122 | coeff=1 123 | topk=5 124 | 125 | [hs1a] 126 | enable=true 127 | 128 | [hs2a] 129 | enable=true 130 | 131 | [hs1b] 132 | enable=true 133 | 134 | [hs2b] 135 | enable=true 136 | 137 | [rnorm1a] 138 | scale=0.0001 139 | pow=0.75 140 | 141 | [rnorm1b] 142 | scale=0.0001 143 | pow=0.75 144 | 145 | [rnorm2a] 146 | scale=0.0001 147 | pow=0.75 148 | 149 | [rnorm2b] 150 | scale=0.0001 151 | pow=0.75 152 | 153 | # on guppy9 154 | # logs/layers-inet-5layer-conv94-2gpu.log 155 | # /nobackup/kriz/tmp/ConvNet__2012-06-18_18.34.17 156 | # logs/layers-inet-5layer-conv94-2gpu.log 157 | # epoch 13: set epsw to 0.001 from 0.01 158 | # epoch 46: set epsw to 0.0001 from 0.001 159 | # epoch 55: set epsw to 0.00001 from 0.0001 on conv1,conv2 160 | # set color noise to 0 from 0.1 161 | # epoch 62: set epsw to 0 from 0.00001 on conv1,conv2 162 | # epoch 84: set epsw to 0.00001 from 0.0001 163 | # epoch 90: killed 164 | # 0.38107167346938753, 0.17608947619047613 165 | -------------------------------------------------------------------------------- /layers/layers-145-half.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | type=data 3 | dataIdx=0 4 | 5 | [labels] 6 | type=data 7 | dataIdx=1 8 | 9 | [conv1a] 10 | type=conv 11 | inputs=data 12 | channels=3 13 | filters=48 14 | padding=0 15 | stride=4 16 | filterSize=11 17 | initW=0.01 18 | partialSum=5 19 | sharedBiases=1 20 | gpu=0 21 | 22 | [rnorm1a] 23 | type=cmrnorm 24 | inputs=conv1a 25 | channels=48 26 | size=5 27 | 28 | [pool1a] 29 | type=pool 30 | pool=max 31 | inputs=rnorm1a 32 | sizeX=3 33 | stride=2 34 | channels=48 35 | neuron=relu 36 | 37 | [conv2a] 38 | type=conv 39 | inputs=pool1a 40 | filters=128 41 | padding=2 42 | stride=1 43 | filterSize=5 44 | channels=48 45 | initW=0.01 46 | initB=1 47 | partialSum=3 48 | sharedBiases=1 49 | neuron=relu 50 | gpu=0 51 | 52 | [rnorm2a] 53 | type=cmrnorm 54 | inputs=conv2a 55 | channels=128 56 | size=5 57 | 58 | [cnorm2a] 59 | type=rnorm 60 | inputs=rnorm2a 61 | channels=128 62 | size=5 63 | 64 | [pool2a] 65 | type=pool 66 | pool=max 67 | inputs=cnorm2a 68 | sizeX=3 69 | stride=2 70 | channels=128 71 | 72 | [conv3a] 73 | type=conv 74 | inputs=pool2a 75 | filters=192 76 | padding=1 77 | stride=1 78 | filterSize=3 79 | channels=128 80 | initW=0.03 81 | partialSum=13 82 | sharedBiases=1 83 | neuron=relu 84 | gpu=0 85 | 86 | [conv4a] 87 | type=conv 88 | inputs=conv3a 89 | filters=192 90 | padding=1 91 | stride=1 92 | filterSize=3 93 | channels=192 94 | neuron=relu 95 | initW=0.03 96 | initB=1 97 | partialSum=13 98 | sharedBiases=1 99 | 100 | [conv5a] 101 | type=conv 102 | inputs=conv4a 103 | filters=256 104 | padding=1 105 | stride=1 106 | filterSize=3 107 | channels=192 108 | initW=0.03 109 | initB=1 110 | partialSum=13 111 | groups=1 112 | randSparse=0 113 | 114 | [pool3a] 115 | type=pool 116 | pool=max 117 | inputs=conv5a 118 | sizeX=3 119 | stride=2 120 | channels=256 121 | neuron=relu 122 | 123 | [fc4096a] 124 | type=fc 125 | inputs=pool3a 126 | outputs=4096 127 | initW=0.01 128 | initB=1 129 | neuron=relu 130 | gpu=0 131 | 132 | [hs1a] 133 | type=hs 134 | keep=0.5 135 | inputs=fc4096a 136 | 137 | [fc4096ba] 138 | type=fc 139 | inputs=hs1a 140 | outputs=4096 141 | initW=0.01 142 | initB=1 143 | neuron=relu 144 | gpu=0 145 | 146 | [hs2a] 147 | type=hs 148 | keep=0.5 149 | inputs=fc4096ba 150 | 151 | [fc1000] 152 | type=fc 153 | outputs=1000 154 | inputs=hs2a 155 | initW=0.01 156 | gpu=0 157 | 158 | [probs] 159 | type=softmax 160 | inputs=fc1000 161 | 162 | [logprob] 163 | type=cost.logreg 164 | inputs=labels,probs 165 | gpu=0 166 | -------------------------------------------------------------------------------- /package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DEST=./cuda-convnet/trunk 4 | PYTHON_MODULES=/home/spoon/dev/python_modules 5 | 6 | mkdir -p $DEST/src/common 7 | mkdir -p $DEST/src/cudaconv2 8 | mkdir -p $DEST/src/nvmatrix 9 | mkdir -p $DEST/include/common 10 | mkdir -p $DEST/include/cudaconv2 11 | mkdir -p $DEST/include/nvmatrix 12 | mkdir -p $DEST/example-layers 13 | 14 | cp src/*.cu $DEST/src 15 | cp include/*.cuh $DEST/include 16 | 17 | cp ABOUT convdata.py convnet.py layer.py shownet.py $DEST/ 18 | 19 | cp $NVMATRIX_INCLUDE/*.cuh $DEST/include/nvmatrix 20 | cp $NVMATRIX_INCLUDE/../src/nvmatrix*.cu $DEST/src/nvmatrix 21 | cp $PYTHON_MODULES/util.py $PYTHON_MODULES/options.py $PYTHON_MODULES/ordereddict.py $PYTHON_MODULES/gpumodel.py $PYTHON_MODULES/data.py $DEST/ 22 | cp $MYCPP_LIBS_INCLUDE/matrix.h $MYCPP_LIBS_INCLUDE/matrix_funcs.h $MYCPP_LIBS_INCLUDE/queue.h $MYCPP_LIBS_INCLUDE/thread.h $DEST/include/common 23 | cp $MYCPP_LIBS_INCLUDE/matrix.cpp $DEST/src/common 24 | cp $NVCONV2_INCLUDE/conv_util.cuh $NVCONV2_INCLUDE/cudaconv2.cuh $DEST/include/cudaconv2 25 | cp $NVCONV2_INCLUDE/../src/conv_util.cu $NVCONV2_INCLUDE/../src/filter_acts.cu $NVCONV2_INCLUDE/../src/img_acts.cu $NVCONV2_INCLUDE/../src/weight_acts.cu $DEST/src/cudaconv2 26 | 27 | cp ./example-layers/*.cfg $DEST/example-layers 28 | cp common-gcc-cuda-4.0.mk build.sh readme.html $DEST 29 | cp Makefile-distrib $DEST/Makefile 30 | 31 | -------------------------------------------------------------------------------- /readme.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /run4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --epochs=22 >> logs/layers-120-4gpu.log 4 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto2.cfg --epochs=49 >> logs/layers-120-4gpu.log 5 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto3.cfg --epochs=66 >> logs/layers-120-4gpu.log 6 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto4.cfg --color-noise=0 --epochs=73 >> logs/layers-120-4gpu.log 7 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto5.cfg --epochs=81 >> logs/layers-120-4gpu.log 8 | python convnet.py -f /ais/gobi3/u/kriz/tmp/ConvNet__2012-08-03_14.28.23 --layer-params=./layers/layer-params-120-4gpu-auto6.cfg --epochs=95 >> logs/layers-120-4gpu.log 9 | -------------------------------------------------------------------------------- /src/cpuCNN.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #include "softmaxtree.cuh" 5 | /* 6 | * weights: (numNodes, numFeatures) 7 | * targets: (numNodes, numFeatures) 8 | * 9 | */ 10 | void cpuSoftmaxTreeFwd(float* weights, float* targets, const int numFeatures, SoftmaxTree& tree) { 11 | for (int d = 0; d <= tree.getDepth(); ++d) { 12 | for (SoftmaxNodeV::iterator it = tree.getNodesAtDepth(d).begin(); it!= tree.getNodesAtDepth(d).end(); ++it) { 13 | SoftmaxNode& node = **it; 14 | SoftmaxNode* parent = node.getParent(); 15 | for (int f = 0; f < numFeatures; ++f) { 16 | targets[node.getLabel() * numFeatures + f] = weights[node.getLabel() * numFeatures + f] 17 | + (parent == NULL ? 0 : targets[parent->getLabel() * numFeatures + f]); 18 | } 19 | } 20 | } 21 | } 22 | 23 | /* 24 | * grads: (numNodes, numFeatures) 25 | * 26 | */ 27 | void cpuSoftmaxTreeBwd(float* grads, const int numFeatures, SoftmaxTree& tree) { 28 | for (int h = 1; h <= tree.getHeight(); ++h) { 29 | for (SoftmaxNodeV::iterator it = tree.getNodesAtHeight(h).begin(); it!= tree.getNodesAtHeight(h).end(); ++it) { 30 | SoftmaxNode& node = **it; 31 | for (int f = 0; f < numFeatures; ++f) { 32 | grads[node.getLabel() * numFeatures + f] = 0; 33 | } 34 | for (SoftmaxNodeV::iterator itc = node.getChildren().begin(); itc!= node.getChildren().end(); ++itc) { 35 | SoftmaxNode& child = **itc; 36 | for (int f = 0; f < numFeatures; ++f) { 37 | grads[node.getLabel() * numFeatures + f] += grads[child.getLabel() * numFeatures + f]; 38 | } 39 | } 40 | } 41 | } 42 | } 43 | 44 | /* 45 | * weights: (numNodes, numFeatures) 46 | * weightsInc: (numNodes, numFeatures) 47 | * weightsGrad: (numNodes, numFeatures) 48 | * nodeSizes: numNodes-array whose ith element gives number of leaves under 49 | * node with label i. 50 | */ 51 | void cpuSoftmaxTreeUpdateWeights(float* weights, float* weightsInc, float* weightsGrad, 52 | const int numFeatures, float eps, const float mom, float wc, SoftmaxTree& tree) { 53 | for (int d = 0; d <= tree.getDepth(); d++) { 54 | for (SoftmaxNodeV::iterator it = tree.getNodesAtDepth(d).begin(); it!= tree.getNodesAtDepth(d).end(); ++it) { 55 | SoftmaxNode& node = **it; 56 | float w = wc / node.getSize(); 57 | float e = eps;// * sqrt(node.getSize()); 58 | for (int f = 0; f < numFeatures; ++f) { 59 | weightsInc[node.getLabel() * numFeatures + f] = mom * weightsInc[node.getLabel() * numFeatures + f] 60 | + e * (weightsGrad[node.getLabel() * numFeatures + f] - w * weights[node.getLabel() * numFeatures + f]); 61 | weights[node.getLabel() * numFeatures + f] += weightsInc[node.getLabel() * numFeatures + f]; 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/hostmem.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | PinnedHostMem::PinnedHostMem() : _numBytes(0), _data(NULL) { 4 | 5 | } 6 | 7 | PinnedHostMem::~PinnedHostMem() { 8 | if (_numBytes > 0) { 9 | checkCudaErrors(cudaFreeHost(_data)); 10 | } 11 | } 12 | 13 | void PinnedHostMem::resize(uint bytes) { 14 | if (_numBytes != bytes) { 15 | if (_numBytes > 0) { 16 | checkCudaErrors(cudaFreeHost(_data)); 17 | } 18 | checkCudaErrors(cudaHostAlloc(&_data, bytes, cudaHostAllocPortable)); 19 | _numBytes = bytes; 20 | } 21 | } 22 | 23 | void PinnedHostMem::copyFrom(void* src, uint bytes) { 24 | resize(bytes); 25 | checkCudaErrors(cudaMemcpy(_data, src, bytes, cudaMemcpyDefault)); 26 | } 27 | 28 | void PinnedHostMem::copyTo(void* dst) { 29 | checkCudaErrors(cudaMemcpy(dst, _data, _numBytes, cudaMemcpyDefault)); 30 | } 31 | 32 | void* PinnedHostMem::getData() { 33 | return _data; 34 | } 35 | -------------------------------------------------------------------------------- /src/neuron.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Alex Krizhevsky (akrizhevsky@gmail.com) 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * - Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * - Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 19 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include 29 | 30 | using namespace std; 31 | 32 | Neuron& Neuron::makeNeuron(PyObject* neuronDict) { 33 | string type = pyDictGetString(neuronDict, "type"); 34 | PyObject* neuronParamsDict = PyDict_GetItemString(neuronDict, "params"); 35 | 36 | if (type == "relu") { 37 | return *new ReluNeuron(); 38 | } 39 | 40 | if (type == "nrelu") { 41 | return *new NoisyReluNeuron(); 42 | } 43 | 44 | if (type == "drelu") { 45 | return *new DoubleReluNeuron(pyDictGetFloat(neuronParamsDict, "a")); 46 | } 47 | 48 | if (type == "softrelu") { 49 | return *new SoftReluNeuron(); 50 | } 51 | 52 | if (type == "brelu") { 53 | return *new BoundedReluNeuron(pyDictGetFloat(neuronParamsDict, "a")); 54 | } 55 | 56 | if (type == "abs") { 57 | return *new AbsNeuron(); 58 | } 59 | 60 | if (type == "logistic") { 61 | return *new LogisticNeuron(); 62 | } 63 | 64 | if (type == "tanh") { 65 | return *new TanhNeuron(pyDictGetFloat(neuronParamsDict, "a"), pyDictGetFloat(neuronParamsDict, "b")); 66 | } 67 | 68 | if (type == "square") { 69 | return *new SquareNeuron(); 70 | } 71 | 72 | if (type == "sqrt") { 73 | return *new SqrtNeuron(); 74 | } 75 | 76 | if (type == "linear") { 77 | return *new LinearNeuron(pyDictGetFloat(neuronParamsDict, "a"), pyDictGetFloat(neuronParamsDict, "b")); 78 | } 79 | 80 | if (type == "ident") { 81 | return *new Neuron(); 82 | } 83 | 84 | throw string("Unknown neuron type: ") + type; 85 | } 86 | -------------------------------------------------------------------------------- /src/quantizer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace std; 4 | 5 | /*================= 6 | * Quantizer 7 | * ================ 8 | */ 9 | 10 | Quantizer& Quantizer::make(PyObject* lrsDict) { 11 | string type = pyDictGetString(lrsDict, "type"); 12 | if (type == "default") { 13 | return *new Quantizer(); 14 | } else if (type == "half") { 15 | return *new HalfQuantizer(); 16 | } 17 | throw string("Unknown quantizer type ") + type; 18 | } 19 | 20 | Quantizer::Quantizer() : _numRows(0), _numCols(0), _trans(false) { 21 | } 22 | 23 | Quantizer::~Quantizer() { 24 | } 25 | 26 | void Quantizer::quantize(NVMatrix& src, NVMatrix& tgt) { 27 | _quantize(src, tgt); 28 | _quantized = &tgt; 29 | _numRows = src.getNumRows(); 30 | _numCols = src.getNumCols(); 31 | _trans = src.isTrans(); 32 | } 33 | 34 | void Quantizer::dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) { 35 | _dequantize(tgt, scaleTarget, scaleOutput); 36 | tgt.setTrans(_trans); 37 | tgt.reshape(_numRows, _numCols); 38 | } 39 | 40 | void Quantizer::dequantize(NVMatrix& tgt) { 41 | dequantize(tgt, 0, 1); 42 | } 43 | 44 | void Quantizer::_quantize(NVMatrix& src, NVMatrix& tgt) { 45 | src.copy(tgt); 46 | } 47 | 48 | void Quantizer::_dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) { 49 | tgt.add(*_quantized, scaleTarget, scaleOutput); 50 | } 51 | 52 | /*================= 53 | * HalfQuantizer 54 | * ================ 55 | */ 56 | HalfQuantizer::HalfQuantizer() : Quantizer() { 57 | } 58 | 59 | void HalfQuantizer::_quantize(NVMatrix& src, NVMatrix& tgt) { 60 | convQuantizeHalf(src, tgt); 61 | } 62 | 63 | void HalfQuantizer::_dequantize(NVMatrix& tgt, float scaleTarget, float scaleOutput) { 64 | convDequantizeHalf(*_quantized, tgt, _numRows * _numCols, scaleTarget, scaleOutput); 65 | } 66 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | def binary_sum(arr): 5 | if len(arr) == 1: 6 | return arr[0] 7 | mid = len(arr) / 2 8 | sum_left = binary_sum(arr[:mid]) 9 | sum_right = binary_sum(arr[mid:]) 10 | return [a + b for a,b in zip(sum_left, sum_right)] 11 | 12 | def test(path): 13 | p = re.compile(r'^batch \d+:.*\[((?:[\d\.]+(?:, )?)+)\]}, (\d+)\)\s*$') 14 | sums = [] 15 | sums2 = [] 16 | ncases = 0 17 | with open(path) as f: 18 | for line in f: 19 | m = p.match(line) 20 | if m: 21 | vals = m.group(1).split(',') 22 | if len(sums) == 0: sums = [0] * len(vals) 23 | sums = [s + float(v) for s,v in zip(sums, vals)] 24 | sums2 += [[float(v) for v in vals]] 25 | ncases += int(m.group(2)) 26 | return [s/ncases for s in sums], [s/ncases for s in binary_sum(sums2)], ncases 27 | 28 | if __name__ == "__main__": 29 | errs, errs2, ncases = test(sys.argv[1]) 30 | print errs 31 | print errs2 32 | print "--- %d cases" % ncases 33 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 3000..3146 4 | for i in {3035..3146}; do 5 | #for i in {2000..2047}; do 6 | echo "Testing on batch $i" 7 | python convnet.py -f /nobackup/kriz/tmp/ConvNet__2012-06-25_17.55.06 --test-only=1 --test-range="$i" --multiview-test=1 8 | done 9 | -------------------------------------------------------------------------------- /txt-preds.py: -------------------------------------------------------------------------------- 1 | from util import * 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | path = sys.argv[1] 7 | for f in sorted(os.listdir(path)): 8 | dic = unpickle(os.path.join(path, f)) 9 | preds = dic['data'] 10 | assert preds.shape[1] == 1000 11 | for c in xrange(preds.shape[0]): # loop over cases 12 | # Notice the +1 here to convert from 0-based indices to 1-based 13 | top5 = [x[0] + 1 for x in reversed(sorted(list(enumerate(preds[c,:])), key=lambda x:x[1])[-5:])] 14 | assert min(top5) >= 1 and max(top5) <= 1000 15 | print " ".join(str(x) for x in top5) 16 | -------------------------------------------------------------------------------- /verify-test-preds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from util import * 3 | import pylab as pl 4 | import numpy as n 5 | import numpy.random as nr 6 | from PIL import Image 7 | from StringIO import StringIO 8 | 9 | def print_top5(preds, lnames): 10 | print preds 11 | for i in xrange(len(preds)): 12 | print "Label %d: %s" %(i, lnames[preds[i]]) 13 | 14 | if __name__ == "__main__": 15 | pred_path = sys.argv[1] 16 | data_path = sys.argv[2] 17 | batch = nr.randint(98) + 3000 18 | data = unpickle(os.path.join(data_path, 'data_batch_%d' % batch))[0] 19 | preds = [n.array([int(x) - 1 for x in l.split(' ')]) for l in open(pred_path).readlines()] 20 | 21 | img_idx = nr.randint(len(data)) 22 | meta = unpickle(os.path.join(data_path, 'batches.meta')) 23 | lnames = meta['label_names'] 24 | print "Batch: %d, img idx: %d" % (batch, img_idx) 25 | 26 | img = n.asarray(Image.open(StringIO(data[img_idx])).convert('RGB')) 27 | 28 | print_top5(preds[(batch - 3000) * 1024 + img_idx], lnames) 29 | 30 | pl.imshow(img) 31 | pl.show() 32 | --------------------------------------------------------------------------------