├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── caffe-mean.cpp ├── caffe.cpp ├── eval-caffe-fcn.py ├── eval-caffe.py ├── eval-plot.py ├── fp16.h ├── movidius.cpp ├── mxnet.cpp ├── picpac-stream-lmdb.cpp ├── predict.cpp ├── python.cpp ├── templates ├── fcn │ ├── model │ │ ├── blobs │ │ └── caffe.model.tmpl │ ├── solver.prototxt.tmpl │ ├── train.prototxt.tmpl │ └── train.sh └── googlenet │ ├── model │ ├── blobs │ └── caffe.model.tmpl │ ├── quick_solver.prototxt.tmpl │ ├── readme.md │ ├── solver.prototxt │ ├── train.prototxt.tmpl │ └── train.sh ├── test_python.cpp ├── torch.cpp ├── train-caffe-fcn.py ├── visualize.cpp ├── xnn-roc.cpp ├── xnn.cpp ├── xnn.h └── xnn_train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Boost.NumPy"] 2 | path = Boost.NumPy 3 | url = https://github.com/ndarray/Boost.NumPy.git 4 | [submodule "json11"] 5 | path = json11 6 | url = https://github.com/dropbox/json11 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Wei Dong 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all install 2 | CC=g++ 3 | CXX=g++ 4 | CFLAGS += -O3 -g 5 | CXXFLAGS += -Ijson11 -DUSE_CAFFE=1 -std=c++11 -O3 -fopenmp -g -I/usr/include/python2.7 -I/usr/local/cuda/include -DCPU_ONLY=1 6 | #CXXFLAGS += -DUSE_PYTHON=1 7 | LDFLAGS += -fopenmp -L/usr/lib64 8 | # add -lmxnet for mxnet 9 | # add -lpython2.7 for python 10 | # add those for Torch: -lTH -lluaT -lluajit -llapack -lopenblas· 11 | LDLIBS = libxnn.a -lcaffe -lpicpac $(shell pkg-config --libs opencv) \ 12 | -lboost_timer -lboost_chrono -lboost_thread -lboost_filesystem -lboost_system -lboost_program_options -lprotoc -lprotobuf -lglog #-lpython2.7 13 | 14 | COMMON = libxnn.a json11.o 15 | PROGS = predict xnn-roc #test_python # visualize predict #caffex-extract caffex-predict batch-resize import-images 16 | 17 | all: $(COMMON) $(PROGS) 18 | 19 | libxnn.a: xnn.o caffe.o # python.o # mxnet.o python.o 20 | ar rvs $@ $^ 21 | 22 | json11.o: json11/json11.cpp 23 | $(CXX) $(CXXFLAGS) -o $@ -c $^ 24 | 25 | 26 | $(PROGS): %: %.o $(COMMON) 27 | 28 | clean: 29 | rm $(PROGS) *.o 30 | 31 | install: libxnn.a 32 | cp libxnn.a /usr/local/lib 33 | cp xnn.h /usr/local/include 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | XNN: A C++ Prediction API that Wraps Caffe, MXNET and Python 2 | ============================================================= 3 | 4 | Author: Wei Dong (wdong@wdong.org) 5 | 6 | # Usage 7 | ``` 8 | #include 9 | 10 | ... 11 | 12 | xnn::set_mode(1); // 0 for CPU, 1 for GPU, doesn't affect theano 13 | // use theanorc or env variable to tweak the 14 | // behavior of theano. 15 | int batch = 1; 16 | xnn::Model *model = xnn::Model::create("model dir", batch); 17 | 18 | cv::Mat image = cv::read("some.jpg", -1); 19 | vector out; 20 | 21 | model->apply(image, &out); 22 | model->apply(vector{image}, &out); 23 | ``` 24 | 25 | At most batch images can be passed in each invokation of Model::apply. 26 | If less than batch images are passed, the library internally pads the 27 | input up to a whole batch for prediction, so the cost will be as if 28 | batch images are predicted. 29 | The returned vector will have the size of `(#category * batch)` or 30 | `(out image size * #category * batch)`, 31 | depending on whether the model does classification or 32 | segmentation. 33 | 34 | 35 | The library is still under development and doesn't yet coherently handle 36 | classification and segmentation with different backends and the deployment 37 | method is subject to changes. 38 | 39 | # Building and Installation 40 | 41 | The library depends on that Caffe, MXNet, Theano/Lasagne and other python 42 | libraries are properly installed. Use [these scripts](https://github.com/aaalgo/centos7-deep) 43 | to install everything on a fresh CentOS 7 installation. 44 | 45 | # Model Deployment 46 | ## Caffe 47 | The model directory should contain the following files: 48 | - caffe.model: copy the deploy.prototxt file. 49 | - caffe.params: copy one of the ".caffemodel" file. 50 | - caffe.blobs: text file containing the blob name to extract, usually "prob". If multiple blob names are given, one on each line, all the blobs will be extracted and concatenated. 51 | - caffe.mean[optional]: copy the xxx_mean.binaryproto file, or a text file containing 1-3 numbers providing the mean values of RGB channels. 52 | 53 | If the input blob has an image size of (1, 1), then a segmentation 54 | network is assumed, input images are not resized and output have 55 | the same size as input except for number of channels. 56 | 57 | Many pre-trained models can be found [here](https://github.com/BVLC/caffe/wiki/Model-Zoo). 58 | 59 | ## MXNet 60 | The model directory should contain the following files: 61 | - mxnet.symbol: the JSON model file. 62 | - mxnet.params: model parameters. 63 | - mxnet.meta: a JSON file specifying shape and mean information like the one below. 64 | ``` 65 | {"shape": 224, "mean": [123.68, 116.779, 103.939], "channels":3} 66 | ``` 67 | 68 | Many pre-trained models can be found [here](https://github.com/dmlc/mxnet-model-gallery). 69 | 70 | ## Python 71 | The model directory is essentially a python module, which should contain the following files: 72 | - `__init__.py`: typically empty. 73 | - model.py: the python file containing the model. 74 | 75 | The module model.py should provide two functions: 76 | 77 | - `model.shape()`: returns (-1, channel, rows, cols) 78 | - `model.load(shape)`: given shape, loads parameters and returns a function 79 | that make prediction. 80 | 81 | The following minimal script should be used to test that 82 | a python model directory is properly deployed: 83 | 84 | ``` 85 | import numpy as np 86 | import model 87 | 88 | shape = model.shape() 89 | 90 | bathc = 16 91 | shape = (batch, shape[1], shape[2], shape[3]) 92 | 93 | pred_fn = model.load(shape) 94 | 95 | input = np.zeros(shape, dtype=float) 96 | output = pred_fn(input) 97 | ``` 98 | -------------------------------------------------------------------------------- /caffe-mean.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "xnn.h" 4 | 5 | using namespace std; 6 | using namespace caffe; 7 | 8 | int main (int argc, char *argv[]) { 9 | if (argc < 2) return 0; 10 | string mean_file(argv[1]); 11 | 12 | BlobProto blob_proto; 13 | ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); 14 | 15 | /* Convert from BlobProto to Blob */ 16 | Blob meanblob; 17 | meanblob.FromProto(blob_proto); 18 | /* The format of the mean file is planar 32-bit float BGR or grayscale. */ 19 | vector channels; 20 | float* data = meanblob.mutable_cpu_data(); 21 | for (int i = 0; i < meanblob.channels(); ++i) { 22 | /* Extract an individual channel. */ 23 | cv::Mat channel(meanblob.height(), meanblob.width(), CV_32FC1, data); 24 | channels.push_back(channel); 25 | data += meanblob.height() * meanblob.width(); 26 | } 27 | 28 | /* Merge the separate channels into a single image. */ 29 | cv::Mat merged; 30 | cv::merge(channels, merged); 31 | cv::Scalar channel_mean = cv::mean(merged); 32 | std::cerr << channel_mean[0] << ' ' << channel_mean[1] << ' ' << channel_mean[2] << std::endl; 33 | } 34 | -------------------------------------------------------------------------------- /caffe.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "xnn.h" 5 | 6 | namespace xnn { 7 | 8 | using namespace caffe; 9 | using std::vector; 10 | using boost::shared_ptr; 11 | 12 | class CaffeSetMode { 13 | public: 14 | CaffeSetMode (int mode) { 15 | if (mode == 0) { 16 | Caffe::set_mode(Caffe::CPU); 17 | } 18 | else { 19 | Caffe::set_mode(Caffe::GPU); 20 | } 21 | } 22 | }; 23 | 24 | class CaffeModel: public Model, CaffeSetMode { 25 | protected: 26 | Net net; 27 | Blob *input_blob; 28 | vector>> output_blobs; 29 | public: 30 | CaffeModel (fs::path const& dir, int batch) 31 | : CaffeSetMode(mode), 32 | net((dir/"caffe.model").native(), TEST) 33 | { 34 | BOOST_VERIFY(batch >= 1); 35 | //CHECK_EQ(net.num_inputs(), 1) << "Network should have exactly one input: " << net.num_inputs(); 36 | input_blob = net.input_blobs()[0]; 37 | shape[0] = batch; 38 | shape[1] = input_blob->shape(1); 39 | CHECK(shape[1] == 3 || shape[1] == 1) 40 | << "Input layer should have 1 or 3 channels." << shape[1]; 41 | net.CopyTrainedLayersFrom((dir/"caffe.params").native()); 42 | // resize to required batch size 43 | shape[2] = input_blob->shape(2); 44 | shape[3] = input_blob->shape(3); 45 | input_blob->Reshape(shape[0], shape[1], shape[2], shape[3]); 46 | net.Reshape(); 47 | // set mean file 48 | means[0] = means[1] = means[2] = 0; 49 | fs::path mean_file = dir / "caffe.mean"; 50 | fs::ifstream test(mean_file); 51 | if (test) { 52 | BlobProto blob_proto; 53 | // check old format 54 | if (ReadProtoFromBinaryFile(mean_file.native(), &blob_proto)) { 55 | /* Convert from BlobProto to Blob */ 56 | Blob meanblob; 57 | meanblob.FromProto(blob_proto); 58 | CHECK_EQ(meanblob.channels(), channels()) 59 | << "Number of channels of mean file doesn't match input layer."; 60 | /* The format of the mean file is planar 32-bit float BGR or grayscale. */ 61 | vector mats; 62 | float* data = meanblob.mutable_cpu_data(); 63 | for (int i = 0; i < channels(); ++i) { 64 | /* Extract an individual channel. */ 65 | cv::Mat channel(meanblob.height(), meanblob.width(), CV_32FC1, data); 66 | mats.push_back(channel); 67 | data += meanblob.height() * meanblob.width(); 68 | } 69 | /* Merge the separate channels into a single image. */ 70 | cv::Mat merged; 71 | cv::merge(mats, merged); 72 | cv::Scalar channel_mean = cv::mean(merged); 73 | //mean = cv::Mat(input_height, input_width, merged.type(), channel_mean); 74 | means[0] = means[1] = means[2] = channel_mean[0]; 75 | if (channels() > 1) { 76 | means[1] = channel_mean[1]; 77 | means[2] = channel_mean[2]; 78 | } 79 | } 80 | // if not proto format, then the mean file is just a bunch of textual numbers 81 | else { 82 | test >> means[0]; 83 | means[1] = means[2] = means[0]; 84 | test >> means[1]; 85 | test >> means[2]; 86 | } 87 | } 88 | { 89 | fs::ifstream is(dir/"blobs"); 90 | string blob; 91 | CHECK(is) << "cannot open blobs file."; 92 | while (is >> blob) { 93 | output_blobs.push_back(net.blob_by_name(blob)); 94 | } 95 | } 96 | } 97 | 98 | virtual void apply (vector const &images, vector *ft) { 99 | int batch = shape[0]; 100 | CHECK(!images.empty()) << "must input >= 1 images"; 101 | CHECK(images.size() <= batch) << "Too many input images."; 102 | if (fcn()) { // for FCN, we need to resize network according to image size 103 | cv::Size sz = images[0].size(); 104 | for (unsigned i = 1; i < images.size(); ++i) { 105 | CHECK(images[i].size() == sz) << "all images must be the same size"; 106 | } 107 | int input_height = input_blob->shape(2); 108 | int input_width = input_blob->shape(3); 109 | if ((input_width != sz.width) 110 | || (input_height != sz.height)) { 111 | input_blob->Reshape(shape[0], shape[1], sz.height, sz.width); 112 | net.Reshape(); 113 | } 114 | } 115 | float *input_data = input_blob->mutable_cpu_data(); 116 | float *e = preprocess(images, input_data); 117 | CHECK(e -input_data <= input_blob->count()); 118 | net.ForwardPrefilled(); 119 | 120 | // compute output dimension 121 | int dim = 0; 122 | for (auto const &b: output_blobs) { 123 | int d = b->count() / batch; 124 | LOG(INFO) << "output: " << b->shape_string(); 125 | dim += d; 126 | } 127 | LOG(INFO) << "output size " << images.size() << " x " << dim; 128 | ft->resize(images.size() * dim); // total output size 129 | int off = 0; 130 | for (auto const &b: output_blobs) { 131 | int blob_dim = b->count() / batch; 132 | float const *from_begin = b->cpu_data(); 133 | for (int i = 0; i < images.size(); ++i) { 134 | float const *from_end = from_begin + blob_dim; 135 | std::copy(from_begin, from_end, &ft->at(i * dim + off)); 136 | from_begin = from_end; 137 | } 138 | off += blob_dim; 139 | } 140 | CHECK(off == dim); 141 | } 142 | }; 143 | 144 | Model *Model::create_caffe (fs::path const &dir, int batch) { 145 | return new CaffeModel(dir, batch); 146 | } 147 | 148 | class CaffeColorizeModel: public CaffeModel { 149 | public: 150 | CaffeColorizeModel (fs::path const& dir, int batch) 151 | : CaffeModel(dir, batch) 152 | { 153 | Blob *trecip = net.input_blobs()[1]; 154 | float *data = trecip->mutable_cpu_data(); 155 | float v = 6 / std::log(10.0); 156 | std::fill(data, data+trecip->count(), v); 157 | } 158 | }; 159 | 160 | Model *Model::create_colorize_caffe (fs::path const &dir, int batch) { 161 | return new CaffeColorizeModel(dir, batch); 162 | } 163 | 164 | } 165 | 166 | 167 | -------------------------------------------------------------------------------- /eval-caffe-fcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import logging 5 | import argparse 6 | import xnn_train 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("ws", nargs=1) # workspace, must not exist 12 | 13 | args = parser.parse_args() 14 | ws = args.ws[0] 15 | 16 | if not os.path.exists(ws): 17 | logging.error("%s does not exists" % ws) 18 | sys.exit(1) 19 | 20 | os.chdir(ws) 21 | xnn_train.caffe_eval_fcn() 22 | 23 | -------------------------------------------------------------------------------- /eval-caffe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import re 5 | import glob 6 | import shutil 7 | import logging 8 | import argparse 9 | import subprocess 10 | import simplejson as json 11 | from jinja2 import Environment, FileSystemLoader 12 | 13 | base_dir = os.path.abspath(os.path.dirname(__file__)) 14 | logging.basicConfig(level=logging.DEBUG) 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("ws", nargs=1) # workspace, must not exist 18 | parser.add_argument("--channels", default=3, type=int) 19 | parser.add_argument("--split", default=10, type=int) 20 | parser.add_argument("--fold", default=0, type=int) 21 | parser.add_argument("--annotate", default="none") 22 | parser.add_argument("--batch", default=16, type=int) 23 | args = parser.parse_args() 24 | 25 | #db = os.path.abspath(args.db[0]) 26 | ws = args.ws[0] 27 | 28 | assert os.path.isdir(ws) 29 | os.chdir(ws) 30 | 31 | # evaluation 32 | shots = [] 33 | # find all saved snapshots 34 | for x in glob.glob('snapshots/*.caffemodel'): 35 | it = int(x.split('_')[-1].split('.')[0]) 36 | shots.append((it, x)) 37 | pass 38 | 39 | # sort by iteration 40 | shots = sorted(shots, key = lambda x: x[0]) 41 | 42 | if not os.path.exists("eval"): 43 | os.mkdir("eval") 44 | pass 45 | 46 | hist = [] 47 | best = None 48 | best_score = 100 49 | best_path = None 50 | for it, path in shots: 51 | #print it, path 52 | sys.stdout.write('%d\t' % it) 53 | sys.stdout.flush() 54 | out = os.path.join('eval', str(it)) 55 | if os.path.exists(out): 56 | #print "%d already done, skipping..." % it 57 | subprocess.check_call('cat %s' % out, shell=True) 58 | else: 59 | if os.path.islink('model/caffe.params'): 60 | os.remove('model/caffe.params') 61 | os.symlink(os.path.abspath(path), 'model/caffe.params') 62 | cmd = '%s model db --batch %s --mode 1 --split %d --split_fold %d --annotate %s | tee %s' % (os.path.join(base_dir, 'xnn-roc'), args.batch, args.split, args.fold, args.annotate, out) 63 | #print cmd 64 | subprocess.check_call(cmd, shell=True) 65 | os.remove('model/caffe.params') 66 | continue 67 | cc = [] 68 | with open(out, 'r') as f: 69 | for l in f: 70 | l = l.strip().split('\t') 71 | if (len(l) != 2): 72 | continue 73 | x, y = l 74 | cc.append((float(x), float(y))) 75 | #if float(y) > 0.5: 76 | # hist.append((it, float(x))) 77 | # break 78 | #pass 79 | pass 80 | for x, y in cc: 81 | if y > 0.5: 82 | hist.append((it, x)) 83 | if x <= best_score: 84 | best = it 85 | best_score = x 86 | best_path = path 87 | break 88 | pass 89 | print hist[-1] 90 | pass 91 | 92 | sys.exit(0) 93 | 94 | print "Best iteration is %s, with score %g" % (best, best_score) 95 | shutil.copy(best_path, 'model/caffe.params') 96 | 97 | -------------------------------------------------------------------------------- /eval-plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import glob 3 | import os 4 | import sys 5 | import subprocess 6 | import matplotlib.pyplot as plt 7 | import pickle 8 | 9 | def load (dir): 10 | all = [] 11 | for x in glob.glob('%s/*' % dir): 12 | it = int(os.path.basename(x)) 13 | all.append((it, x)) 14 | pass 15 | all = sorted(all, key = lambda x: x[0]) 16 | 17 | hist = [] 18 | for it, out in all: 19 | cc = [] 20 | with open(out, 'r') as f: 21 | for l in f: 22 | l = l.strip().split('\t') 23 | if (len(l) != 2): 24 | continue 25 | x, y = l 26 | cc.append((float(x), float(y))) 27 | #if float(y) > 0.5: 28 | # hist.append((it, float(x))) 29 | # break 30 | #pass 31 | pass 32 | for x, y in cc: 33 | if y > 0.5: 34 | hist.append((it, x)) 35 | break 36 | pass 37 | print hist[-1] 38 | pass 39 | return hist 40 | 41 | fig, ax = plt.subplots(nrows=1, ncols=1) 42 | 43 | l_handles = [] 44 | l_labels = [] 45 | for name in sys.argv[1:]: 46 | hist = load(os.path.join(name, 'eval')) 47 | x, y = zip(*hist) 48 | l, = ax.plot(x, y) 49 | l_handles.append(l) 50 | l_labels.append(name) 51 | pass 52 | 53 | fig.legend(l_handles, l_labels) 54 | fig.savefig('eval.png') 55 | plt.close(fig) 56 | 57 | -------------------------------------------------------------------------------- /fp16.h: -------------------------------------------------------------------------------- 1 | // Copied from Numpy 2 | 3 | 4 | static unsigned half2float(unsigned short h); 5 | unsigned short float2half(unsigned f); 6 | void floattofp16(unsigned char *dst, float *src, unsigned nelem); 7 | void fp16tofloat(float *dst, unsigned char *src, unsigned nelem); 8 | 9 | 10 | -------------------------------------------------------------------------------- /movidius.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define USE_MOVIDIUS 1 5 | #include "xnn.h" 6 | #include "fp16.h" 7 | 8 | namespace xnn { 9 | 10 | #define NAME_SIZE 100 11 | 12 | using std::vector; 13 | typedef unsigned short half; 14 | 15 | class MovidiusModel: public Model { 16 | void *LoadFile(const char *path, unsigned int *length) 17 | { 18 | FILE *fp; 19 | char *buf; 20 | 21 | fp = fopen(path, "rb"); 22 | if(fp == NULL) 23 | return 0; 24 | fseek(fp, 0, SEEK_END); 25 | *length = ftell(fp); 26 | rewind(fp); 27 | if(!(buf = (char*) malloc(*length))) 28 | { 29 | fclose(fp); 30 | return 0; 31 | } 32 | if(fread(buf, 1, *length, fp) != *length) 33 | { 34 | fclose(fp); 35 | free(buf); 36 | return 0; 37 | } 38 | fclose(fp); 39 | return buf; 40 | } 41 | protected: 42 | void *deviceHandle; 43 | void* graphHandle; 44 | vector imagebuf; 45 | vector halfbuf; 46 | static const int networkDim = 224; 47 | public: 48 | MovidiusModel (fs::path const& dir) 49 | { 50 | shape[0] = 1; 51 | shape[1] = 3; 52 | shape[2] = networkDim; 53 | shape[3] = networkDim; 54 | mvncStatus retCode; 55 | char devName[NAME_SIZE]; 56 | retCode = mvncGetDeviceName(0, devName, NAME_SIZE); 57 | CHECK(retCode == MVNC_OK); 58 | 59 | // Try to open the NCS device via the device name 60 | retCode = mvncOpenDevice(devName, &deviceHandle); 61 | CHECK(retCode == MVNC_OK); 62 | 63 | // Now read in a graph file 64 | unsigned int graphFileLen; 65 | void* graphFileBuf = LoadFile((dir/"graph").native().c_str(), &graphFileLen); 66 | 67 | // allocate the graph 68 | retCode = mvncAllocateGraph(deviceHandle, &graphHandle, graphFileBuf, graphFileLen); 69 | CHECK(retCode == MVNC_OK); 70 | free(graphFileBuf); 71 | 72 | means[0] = means[1] = means[2] = 0; 73 | 74 | fs::path mean_file = dir / "mean"; 75 | fs::ifstream test(mean_file); 76 | if (test) { 77 | test >> means[0]; 78 | means[1] = means[2] = means[0]; 79 | test >> means[1]; 80 | test >> means[2]; 81 | } 82 | 83 | imagebuf.resize(networkDim * networkDim * 3); 84 | halfbuf.resize(imagebuf.size()); 85 | } 86 | 87 | ~MovidiusModel () { 88 | mvncDeallocateGraph(graphHandle); 89 | mvncCloseDevice(deviceHandle); 90 | } 91 | 92 | virtual void apply (vector const &images, vector *ft) { 93 | mvncStatus retCode; 94 | int batch = shape[0]; 95 | CHECK(!images.empty()) << "must input >= 1 images"; 96 | int off = 0; 97 | for (int i = 0; i < images.size(); ++i) { 98 | CHECK(0) << "Need to update preprocess to work with NHWC"; 99 | float *e = preprocess(images[i], &imagebuf[0]); 100 | floattofp16((unsigned char *)&halfbuf[0], &imagebuf[0], imagebuf.size()); 101 | retCode = mvncLoadTensor(graphHandle, &halfbuf[0], sizeof(halfbuf[0])*halfbuf.size(), NULL); 102 | CHECK(retCode == MVNC_OK); 103 | void* resultData16; 104 | void* userParam; 105 | unsigned int lenResultData; 106 | retCode = mvncGetResult(graphHandle, &resultData16, &lenResultData, &userParam); 107 | CHECK(retCode == MVNC_OK); 108 | int numResults = lenResultData / sizeof(half); 109 | if (i == 0) { 110 | ft->resize(images.size() * numResults); 111 | } 112 | fp16tofloat(&ft->at(off), (unsigned char*)resultData16, numResults); 113 | off += numResults; 114 | } 115 | } 116 | }; 117 | 118 | Model *Model::create_movidius (fs::path const &dir, int batch) { 119 | return new MovidiusModel(dir); 120 | } 121 | 122 | } 123 | 124 | 125 | -------------------------------------------------------------------------------- /mxnet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "xnn.h" 7 | 8 | namespace xnn { 9 | 10 | using namespace json11; 11 | using std::string; 12 | using std::vector; 13 | 14 | static void readall (fs::path const &path, string *content) { 15 | fs::ifstream ifs(path, std::ios::in | std::ios::binary); 16 | CHECK(ifs); 17 | ifs.seekg(0, std::ios::end); 18 | size_t length = ifs.tellg(); 19 | CHECK(ifs); 20 | content->resize(length); 21 | ifs.seekg(0, std::ios::beg); 22 | ifs.read(&content->at(0), content->size()); 23 | } 24 | 25 | class MXNetModel: public Model { 26 | PredictorHandle out; 27 | vector image_data; 28 | public: 29 | MXNetModel (fs::path const& dir, int batch) 30 | { 31 | BOOST_VERIFY(batch >= 1); 32 | string symbol; 33 | string params; 34 | string meta; 35 | readall(dir/"mxnet.symbol", &symbol); 36 | readall(dir/"mxnet.params", ¶ms); 37 | readall(dir/"mxnet.meta", &meta); 38 | string json_err; 39 | 40 | Json json = Json::parse(meta, json_err); 41 | CHECK(json_err.empty()); 42 | 43 | int dev_type = mode == 0 ? 1 : 2; // 1: cpu, 2: gpu 44 | int dev_id = 0; // arbitrary. 45 | mx_uint num_input_nodes = 1; // 1 for feedforward 46 | const char* input_key[1] = {"data"}; 47 | const char** input_keys = input_key; 48 | 49 | int width = json["shape"].number_value(); 50 | int height = width; 51 | int channels = json["channels"].number_value(); 52 | if (json["mean"].is_array()) { 53 | int x = 0; 54 | for (auto v: json["mean"].array_items()) { 55 | means[x++] = v.number_value(); 56 | } 57 | CHECK(x == 3); 58 | } 59 | else { 60 | means[0] = means[1] = means[2] = json["mean"].number_value(); 61 | } 62 | 63 | shape[0] = batch; 64 | shape[1] = channels; 65 | shape[2] = height; 66 | shape[3] = width; 67 | 68 | const mx_uint input_shape_indptr[2] = { 0, 4 }; 69 | const mx_uint input_shape_data[4] = {batch, 70 | static_cast(channels), 71 | static_cast(width), 72 | static_cast(height) }; 73 | // ( trained_width, trained_height, channel, num) 74 | MXPredCreate(symbol.c_str(), 75 | ¶ms[0], 76 | params.size(), 77 | dev_type, 78 | dev_id, 79 | num_input_nodes, 80 | input_keys, 81 | input_shape_indptr, 82 | input_shape_data, 83 | &out); 84 | image_data.resize(shape[0] * shape[1] * shape[2] * shape[3]); 85 | } 86 | 87 | ~MXNetModel () { 88 | MXPredFree(out); 89 | } 90 | 91 | virtual void apply (vector const &images, vector *ft) { 92 | // Just a big enough memory 1000x1000x3 93 | int batch = shape[0]; 94 | CHECK(!images.empty()) << "must input >= 1 images"; 95 | CHECK(images.size() <= batch) << "Too many input images."; 96 | if (fcn()) { // for FCN, we need to resize network according to image size 97 | cv::Size sz = images[0].size(); 98 | for (unsigned i = 1; i < images.size(); ++i) { 99 | CHECK(images[i].size() == sz) << "all images must be the same size"; 100 | } 101 | int bufsz = image_buffer_size(images[0]); 102 | image_data.resize(bufsz * images.size()); 103 | } 104 | float *e = preprocess(images, &image_data[0], true); 105 | CHECK(e - &image_data[0] == image_data.size()); 106 | //-- Set Input Image 107 | MXPredSetInput(out, "data", image_data.data(), image_data.size()); 108 | //-- Do Predict Forward 109 | MXPredForward(out); 110 | 111 | mx_uint output_index = 0; 112 | mx_uint *shape = 0; 113 | mx_uint shape_len; 114 | 115 | //-- Get Output Result 116 | MXPredGetOutputShape(out, output_index, &shape, &shape_len); 117 | 118 | size_t size = 1; 119 | for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i]; 120 | 121 | ft->resize(size); 122 | MXPredGetOutput(out, output_index, &ft->at(0), size); 123 | } 124 | }; 125 | 126 | Model *Model::create_mxnet (fs::path const &dir, int batch) { 127 | return new MXNetModel(dir, batch); 128 | } 129 | 130 | } 131 | 132 | 133 | -------------------------------------------------------------------------------- /picpac-stream-lmdb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "picpac-cv.h" 9 | 10 | using namespace std; 11 | using namespace picpac; 12 | using boost::scoped_ptr; 13 | 14 | string backend("lmdb"); 15 | 16 | int main(int argc, char const* argv[]) { 17 | BatchImageStream::Config config; 18 | unsigned max; 19 | fs::path input_path; 20 | fs::path output_path; 21 | 22 | namespace po = boost::program_options; 23 | po::options_description desc("Allowed options"); 24 | desc.add_options() 25 | ("help,h", "produce help message.") 26 | ("max", po::value(&max)->default_value(100), "") 27 | ("input", po::value(&input_path), "") 28 | ("output", po::value(&output_path), "") 29 | ; 30 | #define PICPAC_CONFIG_UPDATE(C,p) desc.add_options()(#p, po::value(&C.p)->default_value(C.p), "") 31 | PICPAC_CONFIG_UPDATE_ALL(config); 32 | #undef PICPAC_CONFIG_UPDATE 33 | 34 | po::positional_options_description p; 35 | p.add("input", 1); 36 | p.add("output", 1); 37 | 38 | po::variables_map vm; 39 | po::store(po::command_line_parser(argc, argv). 40 | options(desc).positional(p).run(), vm); 41 | po::notify(vm); 42 | 43 | if (vm.count("help") || input_path.empty() || output_path.empty()) { 44 | cout << "Usage:" << endl; 45 | cout << "\tpicpac-stat ... " << endl; 46 | cout << desc; 47 | cout << endl; 48 | return 0; 49 | } 50 | ImageStream db(input_path, config); 51 | scoped_ptr image_db(caffe::db::GetDB(backend)); 52 | image_db->Open(output_path.native(), caffe::db::NEW); 53 | scoped_ptr image_txn(image_db->NewTransaction()); 54 | int c = 0; 55 | for (unsigned i = 0; i < max; ++i) { 56 | try { 57 | ImageStream::Value v(db.next()); 58 | CHECK(v.image.total() > 0); 59 | caffe::Datum datum; 60 | caffe::CVMatToDatum(v.image, &datum); 61 | datum.set_label(v.label); 62 | string key = lexical_cast(c) , value; 63 | CHECK(datum.SerializeToString(&value)); 64 | image_txn->Put(key, value); 65 | ++c; 66 | } 67 | catch (EoS const &) { 68 | break; 69 | } 70 | } 71 | image_txn->Commit(); 72 | 73 | return 0; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /predict.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "xnn.h" 4 | 5 | using namespace std; 6 | using namespace boost; 7 | using namespace xnn; 8 | 9 | int main(int argc, char **argv) { 10 | namespace po = boost::program_options; 11 | fs::path model_dir; 12 | fs::path image_path; 13 | int mode; 14 | 15 | po::options_description desc("Allowed options"); 16 | desc.add_options() 17 | ("help,h", "produce help message.") 18 | ("model", po::value(&model_dir), "") 19 | ("path", po::value(&image_path), "") 20 | ("mode", po::value(&mode)->default_value(0), "") 21 | ; 22 | 23 | po::positional_options_description p; 24 | p.add("model", 1); 25 | p.add("path", 1); 26 | 27 | po::variables_map vm; 28 | po::store(po::command_line_parser(argc, argv). 29 | options(desc).positional(p).run(), vm); 30 | po::notify(vm); 31 | 32 | if (vm.count("help") || model_dir.empty() || image_path.empty()) { 33 | cerr << desc; 34 | return 1; 35 | } 36 | Model::set_mode(mode); 37 | unique_ptr model(Model::create(model_dir)); 38 | cv::Mat image = cv::imread(image_path.native(), -1); 39 | vector ft; 40 | model->apply(image, &ft); 41 | for (unsigned i = 0; i < ft.size(); ++i){ 42 | cout << i << '\t' << ft[i] << endl; 43 | } 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /python.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "xnn.h" 7 | 8 | namespace xnn { 9 | 10 | using std::string; 11 | using std::vector; 12 | 13 | static bool python_initialized = false; 14 | 15 | void check_import_array () { 16 | import_array(); 17 | } 18 | 19 | PyObject *module_call (PyObject *module, char const *name, PyObject *args) { 20 | PyObject *pFunc = PyObject_GetAttrString(module, name); 21 | CHECK(pFunc); 22 | //cerr << "Loaded: " << pFunc << endl; 23 | CHECK(PyCallable_Check(pFunc)); 24 | /* pFunc is a new reference */ 25 | PyObject *ret = PyObject_CallObject(pFunc, args); 26 | Py_DECREF(pFunc); 27 | Py_DECREF(args); 28 | return ret; 29 | } 30 | 31 | class PythonModel: public Model { 32 | PyObject *module; 33 | PyObject *predict; 34 | PyObject *input; 35 | vector image_data; 36 | public: 37 | PythonModel (fs::path const& dir, int batch) 38 | { 39 | CHECK(!python_initialized); 40 | python_initialized = true; 41 | // initialize python 42 | Py_Initialize(); 43 | check_import_array(); 44 | { // add search path 45 | PyObject* sysPath = PySys_GetObject((char*)"path"); 46 | BOOST_VERIFY(sysPath); 47 | PyObject* cwd = PyString_FromString(dir.native().c_str()); 48 | BOOST_VERIFY(cwd); 49 | PyList_Append(sysPath, cwd); 50 | Py_DECREF(cwd); 51 | } 52 | BOOST_VERIFY(batch >= 1); 53 | 54 | module = PyImport_ImportModule("model"); 55 | CHECK(module); 56 | PyObject *accept_shape = module_call(module, "shape", Py_BuildValue("()")); 57 | CHECK(accept_shape); 58 | PyArg_ParseTuple(accept_shape, "iiii", &shape[0], &shape[1], &shape[2], &shape[3]); 59 | Py_DECREF(accept_shape); 60 | shape[0] = batch; 61 | predict = module_call(module, "load", 62 | Py_BuildValue("((iiii))", shape[0], shape[1], shape[2], shape[3])); 63 | CHECK(predict); 64 | CHECK(PyCallable_Check(predict)); 65 | npy_intp dims[] = {shape[0], shape[1], shape[2], shape[3]}; 66 | input = PyArray_SimpleNew(4, dims, NPY_FLOAT); 67 | CHECK(input); 68 | CHECK(!fcn()); 69 | } 70 | 71 | ~PythonModel () { 72 | Py_DECREF(input); 73 | Py_DECREF(predict); 74 | Py_DECREF(module); 75 | Py_Finalize(); 76 | } 77 | 78 | virtual void apply (vector const &images, vector *ft) { 79 | // Just a big enough memory 1000x1000x3 80 | int batch = shape[0]; 81 | CHECK(!images.empty()) << "must input >= 1 images"; 82 | CHECK(images.size() <= batch) << "Too many input images."; 83 | #if 0 84 | if (fcn()) { // for FCN, we need to resize network according to image size 85 | cv::Size sz = images[0].size(); 86 | for (unsigned i = 1; i < images.size(); ++i) { 87 | CHECK(images[i].size() == sz) << "all images must be the same size"; 88 | } 89 | int bufsz = image_buffer_size(images[0]); 90 | image_data.resize(bufsz * images.size()); 91 | } 92 | #endif 93 | float *e = preprocess(images, reinterpret_cast(PyArray_DATA(input))); 94 | PyObject *tuple = Py_BuildValue("(O)", input); 95 | PyArrayObject *output = (PyArrayObject *)PyObject_CallObject(predict, tuple); 96 | CHECK(output); 97 | Py_DECREF(tuple); 98 | CHECK(PyArray_ITEMSIZE(output) == sizeof(float)); 99 | float const *from = reinterpret_cast(PyArray_DATA(output)); 100 | int sz = PyArray_SIZE(output); 101 | ft->resize(sz); 102 | std::copy(from, from + sz, ft->begin()); 103 | Py_DECREF(output); 104 | } 105 | }; 106 | 107 | Model *Model::create_python (fs::path const &dir, int batch) { 108 | return new PythonModel(dir, batch); 109 | } 110 | 111 | } 112 | 113 | 114 | -------------------------------------------------------------------------------- /templates/fcn/model/blobs: -------------------------------------------------------------------------------- 1 | prob 2 | -------------------------------------------------------------------------------- /templates/fcn/model/caffe.model.tmpl: -------------------------------------------------------------------------------- 1 | name: "FCN" 2 | force_backward: true 3 | input: "data" 4 | # We will manipulate the input_dim fields below in Python during testing. They appear here only for syntactic reasons. 5 | input_dim: 1 6 | input_dim: {{channels | default(3, true)}} 7 | input_dim: 1 8 | input_dim: 1 9 | layer { 10 | name: "conv1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1" 14 | param { 15 | lr_mult: 1 16 | decay_mult: 1 17 | } 18 | param { 19 | lr_mult: 2 20 | decay_mult: 0 21 | } 22 | convolution_param { 23 | num_output: 100 24 | pad: 50 25 | kernel_size: 5 26 | group: 1 27 | stride: 2 28 | weight_filler { 29 | type: "gaussian" 30 | mean: 0.0 31 | std: 0.01 32 | } 33 | bias_filler { 34 | type: "constant" 35 | value: 0.1 36 | } 37 | } 38 | } 39 | layer { 40 | name: "relu1" 41 | type: "ReLU" 42 | bottom: "conv1" 43 | top: "conv1" 44 | } 45 | layer { 46 | name: "pool1" 47 | type: "Pooling" 48 | bottom: "conv1" 49 | top: "pool1" 50 | pooling_param { 51 | pool: MAX 52 | kernel_size: 2 53 | stride: 2 54 | } 55 | } 56 | layer { 57 | name: "conv2" 58 | type: "Convolution" 59 | bottom: "pool1" 60 | top: "conv2" 61 | param { 62 | lr_mult: 1 63 | decay_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 200 71 | pad: 0 72 | kernel_size: 5 73 | group: 1 74 | stride: 2 75 | weight_filler { 76 | type: "gaussian" 77 | mean: 0.0 78 | std: 0.01 79 | } 80 | bias_filler { 81 | type: "constant" 82 | value: 0.1 83 | } 84 | } 85 | } 86 | layer { 87 | name: "relu2" 88 | type: "ReLU" 89 | bottom: "conv2" 90 | top: "conv2" 91 | } 92 | layer { 93 | name: "pool2" 94 | type: "Pooling" 95 | bottom: "conv2" 96 | top: "pool2" 97 | pooling_param { 98 | pool: MAX 99 | kernel_size: 2 100 | stride: 2 101 | } 102 | } 103 | layer { 104 | name: "conv3" 105 | type: "Convolution" 106 | bottom: "pool2" 107 | top: "conv3" 108 | param { 109 | lr_mult: 1 110 | decay_mult: 1 111 | } 112 | param { 113 | lr_mult: 2 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 300 118 | pad: 0 119 | kernel_size: 3 120 | group: 1 121 | stride: 1 122 | weight_filler { 123 | type: "gaussian" 124 | mean: 0.0 125 | std: 0.01 126 | } 127 | bias_filler { 128 | type: "constant" 129 | value: 0.1 130 | } 131 | } 132 | } 133 | layer { 134 | name: "relu3" 135 | type: "ReLU" 136 | bottom: "conv3" 137 | top: "conv3" 138 | } 139 | layer { 140 | name: "conv4" 141 | type: "Convolution" 142 | bottom: "conv3" 143 | top: "conv4" 144 | param { 145 | lr_mult: 1 146 | decay_mult: 1 147 | } 148 | param { 149 | lr_mult: 2 150 | decay_mult: 0 151 | } 152 | convolution_param { 153 | num_output: 300 154 | pad: 0 155 | kernel_size: 3 156 | group: 1 157 | stride: 1 158 | weight_filler { 159 | type: "gaussian" 160 | mean: 0.0 161 | std: 0.01 162 | } 163 | bias_filler { 164 | type: "constant" 165 | value: 0.1 166 | } 167 | } 168 | } 169 | layer { 170 | name: "relu4" 171 | type: "ReLU" 172 | bottom: "conv4" 173 | top: "conv4" 174 | } 175 | layer { 176 | name: "drop" 177 | type: "Dropout" 178 | bottom: "conv4" 179 | top: "conv4" 180 | dropout_param { 181 | dropout_ratio: 0.1 182 | } 183 | } 184 | layer { 185 | name: "score_classes" 186 | type: "Convolution" 187 | bottom: "conv4" 188 | top: "score_classes" 189 | param { 190 | lr_mult: 1 191 | decay_mult: 1 192 | } 193 | param { 194 | lr_mult: 2 195 | decay_mult: 0 196 | } 197 | convolution_param { 198 | num_output: 2 199 | pad: 0 200 | kernel_size: 1 201 | group: 1 202 | stride: 1 203 | weight_filler { 204 | type: "gaussian" 205 | mean: 0.0 206 | std: 0.01 207 | } 208 | bias_filler { 209 | type: "constant" 210 | value: 0.1 211 | } 212 | } 213 | } 214 | layer { 215 | name: "upscore" 216 | type: "Deconvolution" 217 | bottom: "score_classes" 218 | top: "upscore" 219 | param { 220 | lr_mult: 1 221 | decay_mult: 1 222 | } 223 | param { 224 | lr_mult: 2 225 | decay_mult: 0 226 | } 227 | convolution_param { 228 | num_output: 2 229 | bias_term: true 230 | kernel_size: 31 231 | pad: 8 232 | stride: 16 233 | weight_filler { type: "bilinear" } 234 | bias_filler { type: "constant" value: 0.1 } 235 | } 236 | } 237 | layer { 238 | name: "score" 239 | type: "Crop" 240 | bottom: "upscore" 241 | bottom: "data" 242 | top: "score" 243 | } 244 | layer { 245 | name: "prob" 246 | type: "Softmax" 247 | bottom: "score" 248 | top: "prob" 249 | } 250 | -------------------------------------------------------------------------------- /templates/fcn/solver.prototxt.tmpl: -------------------------------------------------------------------------------- 1 | # The train/test net protocol buffers definition 2 | train_net: "train.prototxt" 3 | # display interval 4 | display: {{display_interval |default(1000, true)}} 5 | average_loss: 200 6 | 7 | # The learning rate policy 8 | lr_policy: "multistep" 9 | stepvalue: 10000 10 | gamma: 0.1 11 | 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.01 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | # The maximum number of iterations 18 | max_iter: {{max_iter |default(1000000, true)}} 19 | 20 | # snapshot intervals to disk 21 | snapshot: {{snapshot_interval |default(1000, true)}} 22 | snapshot_prefix: "./snapshots/x" 23 | 24 | # misc settings 25 | test_initialization: true 26 | random_seed: 5 27 | #solver_type: NESTEROV 28 | solver_mode: {{device |default(GPU, true)}} 29 | -------------------------------------------------------------------------------- /templates/fcn/train.prototxt.tmpl: -------------------------------------------------------------------------------- 1 | name: "FCN" 2 | force_backward: true 3 | layer { 4 | name: "data1" 5 | type: "PicPac" 6 | top: "data" 7 | top: "label" 8 | picpac_param { 9 | path: "{{db_path}}" 10 | batch: 1 11 | channels: {{channels | default(3,true) }} 12 | split: {{split | default(10,true) }} 13 | split_fold: {{split_fold | default(0,true) }} 14 | {% if mixin %} 15 | mixin: "{{mixin}}" 16 | mixin_group_delta: {{mixin_group_delta|default(0,true)}} 17 | {% endif %} 18 | annotate: "{{annotate | default(json,true)}}" 19 | anno_color1: 1 20 | {% if anno_min_ratio %} 21 | anno_min_ratio: {{anno_min_ratio}} 22 | {% endif %} 23 | threads: 4 24 | perturb: true 25 | pert_color1: 10 26 | pert_color2: 10 27 | pert_color3: 10 28 | pert_angle: 20 29 | pert_min_scale: 0.8 30 | pert_max_scale: 1.2 31 | } 32 | } 33 | layer { 34 | name: "conv1" 35 | type: "Convolution" 36 | bottom: "data" 37 | top: "conv1" 38 | param { 39 | lr_mult: 1 40 | decay_mult: 1 41 | } 42 | param { 43 | lr_mult: 2 44 | decay_mult: 0 45 | } 46 | convolution_param { 47 | num_output: 100 48 | pad: 50 49 | kernel_size: 5 50 | group: 1 51 | stride: 2 52 | weight_filler { 53 | type: "gaussian" 54 | mean: 0.0 55 | std: 0.01 56 | } 57 | bias_filler { 58 | type: "constant" 59 | value: 0.1 60 | } 61 | } 62 | } 63 | layer { 64 | name: "relu1" 65 | type: "ReLU" 66 | bottom: "conv1" 67 | top: "conv1" 68 | } 69 | layer { 70 | name: "pool1" 71 | type: "Pooling" 72 | bottom: "conv1" 73 | top: "pool1" 74 | pooling_param { 75 | pool: MAX 76 | kernel_size: 2 77 | stride: 2 78 | } 79 | } 80 | layer { 81 | name: "conv2" 82 | type: "Convolution" 83 | bottom: "pool1" 84 | top: "conv2" 85 | param { 86 | lr_mult: 1 87 | decay_mult: 1 88 | } 89 | param { 90 | lr_mult: 2 91 | decay_mult: 0 92 | } 93 | convolution_param { 94 | num_output: 200 95 | pad: 0 96 | kernel_size: 5 97 | group: 1 98 | stride: 2 99 | weight_filler { 100 | type: "gaussian" 101 | mean: 0.0 102 | std: 0.01 103 | } 104 | bias_filler { 105 | type: "constant" 106 | value: 0.1 107 | } 108 | } 109 | } 110 | layer { 111 | name: "relu2" 112 | type: "ReLU" 113 | bottom: "conv2" 114 | top: "conv2" 115 | } 116 | layer { 117 | name: "pool2" 118 | type: "Pooling" 119 | bottom: "conv2" 120 | top: "pool2" 121 | pooling_param { 122 | pool: MAX 123 | kernel_size: 2 124 | stride: 2 125 | } 126 | } 127 | layer { 128 | name: "conv3" 129 | type: "Convolution" 130 | bottom: "pool2" 131 | top: "conv3" 132 | param { 133 | lr_mult: 1 134 | decay_mult: 1 135 | } 136 | param { 137 | lr_mult: 2 138 | decay_mult: 0 139 | } 140 | convolution_param { 141 | num_output: 300 142 | pad: 0 143 | kernel_size: 3 144 | group: 1 145 | stride: 1 146 | weight_filler { 147 | type: "gaussian" 148 | mean: 0.0 149 | std: 0.01 150 | } 151 | bias_filler { 152 | type: "constant" 153 | value: 0.1 154 | } 155 | } 156 | } 157 | layer { 158 | name: "relu3" 159 | type: "ReLU" 160 | bottom: "conv3" 161 | top: "conv3" 162 | } 163 | layer { 164 | name: "conv4" 165 | type: "Convolution" 166 | bottom: "conv3" 167 | top: "conv4" 168 | param { 169 | lr_mult: 1 170 | decay_mult: 1 171 | } 172 | param { 173 | lr_mult: 2 174 | decay_mult: 0 175 | } 176 | convolution_param { 177 | num_output: 300 178 | pad: 0 179 | kernel_size: 3 180 | group: 1 181 | stride: 1 182 | weight_filler { 183 | type: "gaussian" 184 | mean: 0.0 185 | std: 0.01 186 | } 187 | bias_filler { 188 | type: "constant" 189 | value: 0.1 190 | } 191 | } 192 | } 193 | layer { 194 | name: "relu4" 195 | type: "ReLU" 196 | bottom: "conv4" 197 | top: "conv4" 198 | } 199 | layer { 200 | name: "drop" 201 | type: "Dropout" 202 | bottom: "conv4" 203 | top: "conv4" 204 | dropout_param { 205 | dropout_ratio: 0.1 206 | } 207 | } 208 | layer { 209 | name: "score_classes" 210 | type: "Convolution" 211 | bottom: "conv4" 212 | top: "score_classes" 213 | param { 214 | lr_mult: 1 215 | decay_mult: 1 216 | } 217 | param { 218 | lr_mult: 2 219 | decay_mult: 0 220 | } 221 | convolution_param { 222 | num_output: 2 223 | pad: 0 224 | kernel_size: 1 225 | group: 1 226 | stride: 1 227 | weight_filler { 228 | type: "gaussian" 229 | mean: 0.0 230 | std: 0.01 231 | } 232 | bias_filler { 233 | type: "constant" 234 | value: 0.1 235 | } 236 | } 237 | } 238 | layer { 239 | name: "upscore" 240 | type: "Deconvolution" 241 | bottom: "score_classes" 242 | top: "upscore" 243 | param { 244 | lr_mult: 1 245 | decay_mult: 1 246 | } 247 | param { 248 | lr_mult: 2 249 | decay_mult: 0 250 | } 251 | convolution_param { 252 | num_output: {{num_output}} 253 | bias_term: true 254 | kernel_size: 31 255 | pad: 8 256 | stride: 16 257 | weight_filler { type: "bilinear" } 258 | bias_filler { type: "constant" value: 0.1 } 259 | } 260 | } 261 | layer { 262 | name: "score" 263 | type: "Crop" 264 | bottom: "upscore" 265 | bottom: "data" 266 | top: "score" 267 | } 268 | layer { 269 | name: "loss" 270 | type: "SoftmaxWithLoss" 271 | bottom: "score" 272 | bottom: "label" 273 | top: "loss" 274 | loss_param { 275 | normalize: true 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /templates/fcn/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | export GLOG_log_dir=log 4 | export GLOG_logtostderr=1 5 | 6 | CAFFE=caffe 7 | 8 | mkdir -p log snapshots 9 | 10 | SNAP=$1 11 | if [ -z "$SNAP" ] 12 | then 13 | $CAFFE train --solver solver.prototxt $* 14 | else 15 | shift 16 | $CAFFE train -solver solver.prototxt -snapshot $SNAP $* 17 | fi 18 | 19 | -------------------------------------------------------------------------------- /templates/googlenet/model/blobs: -------------------------------------------------------------------------------- 1 | prob 2 | -------------------------------------------------------------------------------- /templates/googlenet/model/caffe.model.tmpl: -------------------------------------------------------------------------------- 1 | name: "GoogleNet" 2 | input: "data" 3 | input_shape { 4 | dim: 10 5 | dim: {{channels|default(3,true)}} 6 | dim: 224 7 | dim: 224 8 | } 9 | layer { 10 | name: "conv1/7x7_s2" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1/7x7_s2" 14 | param { 15 | lr_mult: 1 16 | decay_mult: 1 17 | } 18 | param { 19 | lr_mult: 2 20 | decay_mult: 0 21 | } 22 | convolution_param { 23 | num_output: 64 24 | pad: 3 25 | kernel_size: 7 26 | stride: 2 27 | weight_filler { 28 | type: "xavier" 29 | std: 0.1 30 | } 31 | bias_filler { 32 | type: "constant" 33 | value: 0.2 34 | } 35 | } 36 | } 37 | layer { 38 | name: "conv1/relu_7x7" 39 | type: "ReLU" 40 | bottom: "conv1/7x7_s2" 41 | top: "conv1/7x7_s2" 42 | } 43 | layer { 44 | name: "pool1/3x3_s2" 45 | type: "Pooling" 46 | bottom: "conv1/7x7_s2" 47 | top: "pool1/3x3_s2" 48 | pooling_param { 49 | pool: MAX 50 | kernel_size: 3 51 | stride: 2 52 | } 53 | } 54 | layer { 55 | name: "pool1/norm1" 56 | type: "LRN" 57 | bottom: "pool1/3x3_s2" 58 | top: "pool1/norm1" 59 | lrn_param { 60 | local_size: 5 61 | alpha: 0.0001 62 | beta: 0.75 63 | } 64 | } 65 | layer { 66 | name: "conv2/3x3_reduce" 67 | type: "Convolution" 68 | bottom: "pool1/norm1" 69 | top: "conv2/3x3_reduce" 70 | param { 71 | lr_mult: 1 72 | decay_mult: 1 73 | } 74 | param { 75 | lr_mult: 2 76 | decay_mult: 0 77 | } 78 | convolution_param { 79 | num_output: 64 80 | kernel_size: 1 81 | weight_filler { 82 | type: "xavier" 83 | std: 0.1 84 | } 85 | bias_filler { 86 | type: "constant" 87 | value: 0.2 88 | } 89 | } 90 | } 91 | layer { 92 | name: "conv2/relu_3x3_reduce" 93 | type: "ReLU" 94 | bottom: "conv2/3x3_reduce" 95 | top: "conv2/3x3_reduce" 96 | } 97 | layer { 98 | name: "conv2/3x3" 99 | type: "Convolution" 100 | bottom: "conv2/3x3_reduce" 101 | top: "conv2/3x3" 102 | param { 103 | lr_mult: 1 104 | decay_mult: 1 105 | } 106 | param { 107 | lr_mult: 2 108 | decay_mult: 0 109 | } 110 | convolution_param { 111 | num_output: 192 112 | pad: 1 113 | kernel_size: 3 114 | weight_filler { 115 | type: "xavier" 116 | std: 0.03 117 | } 118 | bias_filler { 119 | type: "constant" 120 | value: 0.2 121 | } 122 | } 123 | } 124 | layer { 125 | name: "conv2/relu_3x3" 126 | type: "ReLU" 127 | bottom: "conv2/3x3" 128 | top: "conv2/3x3" 129 | } 130 | layer { 131 | name: "conv2/norm2" 132 | type: "LRN" 133 | bottom: "conv2/3x3" 134 | top: "conv2/norm2" 135 | lrn_param { 136 | local_size: 5 137 | alpha: 0.0001 138 | beta: 0.75 139 | } 140 | } 141 | layer { 142 | name: "pool2/3x3_s2" 143 | type: "Pooling" 144 | bottom: "conv2/norm2" 145 | top: "pool2/3x3_s2" 146 | pooling_param { 147 | pool: MAX 148 | kernel_size: 3 149 | stride: 2 150 | } 151 | } 152 | layer { 153 | name: "inception_3a/1x1" 154 | type: "Convolution" 155 | bottom: "pool2/3x3_s2" 156 | top: "inception_3a/1x1" 157 | param { 158 | lr_mult: 1 159 | decay_mult: 1 160 | } 161 | param { 162 | lr_mult: 2 163 | decay_mult: 0 164 | } 165 | convolution_param { 166 | num_output: 64 167 | kernel_size: 1 168 | weight_filler { 169 | type: "xavier" 170 | std: 0.03 171 | } 172 | bias_filler { 173 | type: "constant" 174 | value: 0.2 175 | } 176 | } 177 | } 178 | layer { 179 | name: "inception_3a/relu_1x1" 180 | type: "ReLU" 181 | bottom: "inception_3a/1x1" 182 | top: "inception_3a/1x1" 183 | } 184 | layer { 185 | name: "inception_3a/3x3_reduce" 186 | type: "Convolution" 187 | bottom: "pool2/3x3_s2" 188 | top: "inception_3a/3x3_reduce" 189 | param { 190 | lr_mult: 1 191 | decay_mult: 1 192 | } 193 | param { 194 | lr_mult: 2 195 | decay_mult: 0 196 | } 197 | convolution_param { 198 | num_output: 96 199 | kernel_size: 1 200 | weight_filler { 201 | type: "xavier" 202 | std: 0.09 203 | } 204 | bias_filler { 205 | type: "constant" 206 | value: 0.2 207 | } 208 | } 209 | } 210 | layer { 211 | name: "inception_3a/relu_3x3_reduce" 212 | type: "ReLU" 213 | bottom: "inception_3a/3x3_reduce" 214 | top: "inception_3a/3x3_reduce" 215 | } 216 | layer { 217 | name: "inception_3a/3x3" 218 | type: "Convolution" 219 | bottom: "inception_3a/3x3_reduce" 220 | top: "inception_3a/3x3" 221 | param { 222 | lr_mult: 1 223 | decay_mult: 1 224 | } 225 | param { 226 | lr_mult: 2 227 | decay_mult: 0 228 | } 229 | convolution_param { 230 | num_output: 128 231 | pad: 1 232 | kernel_size: 3 233 | weight_filler { 234 | type: "xavier" 235 | std: 0.03 236 | } 237 | bias_filler { 238 | type: "constant" 239 | value: 0.2 240 | } 241 | } 242 | } 243 | layer { 244 | name: "inception_3a/relu_3x3" 245 | type: "ReLU" 246 | bottom: "inception_3a/3x3" 247 | top: "inception_3a/3x3" 248 | } 249 | layer { 250 | name: "inception_3a/5x5_reduce" 251 | type: "Convolution" 252 | bottom: "pool2/3x3_s2" 253 | top: "inception_3a/5x5_reduce" 254 | param { 255 | lr_mult: 1 256 | decay_mult: 1 257 | } 258 | param { 259 | lr_mult: 2 260 | decay_mult: 0 261 | } 262 | convolution_param { 263 | num_output: 16 264 | kernel_size: 1 265 | weight_filler { 266 | type: "xavier" 267 | std: 0.2 268 | } 269 | bias_filler { 270 | type: "constant" 271 | value: 0.2 272 | } 273 | } 274 | } 275 | layer { 276 | name: "inception_3a/relu_5x5_reduce" 277 | type: "ReLU" 278 | bottom: "inception_3a/5x5_reduce" 279 | top: "inception_3a/5x5_reduce" 280 | } 281 | layer { 282 | name: "inception_3a/5x5" 283 | type: "Convolution" 284 | bottom: "inception_3a/5x5_reduce" 285 | top: "inception_3a/5x5" 286 | param { 287 | lr_mult: 1 288 | decay_mult: 1 289 | } 290 | param { 291 | lr_mult: 2 292 | decay_mult: 0 293 | } 294 | convolution_param { 295 | num_output: 32 296 | pad: 2 297 | kernel_size: 5 298 | weight_filler { 299 | type: "xavier" 300 | std: 0.03 301 | } 302 | bias_filler { 303 | type: "constant" 304 | value: 0.2 305 | } 306 | } 307 | } 308 | layer { 309 | name: "inception_3a/relu_5x5" 310 | type: "ReLU" 311 | bottom: "inception_3a/5x5" 312 | top: "inception_3a/5x5" 313 | } 314 | layer { 315 | name: "inception_3a/pool" 316 | type: "Pooling" 317 | bottom: "pool2/3x3_s2" 318 | top: "inception_3a/pool" 319 | pooling_param { 320 | pool: MAX 321 | kernel_size: 3 322 | stride: 1 323 | pad: 1 324 | } 325 | } 326 | layer { 327 | name: "inception_3a/pool_proj" 328 | type: "Convolution" 329 | bottom: "inception_3a/pool" 330 | top: "inception_3a/pool_proj" 331 | param { 332 | lr_mult: 1 333 | decay_mult: 1 334 | } 335 | param { 336 | lr_mult: 2 337 | decay_mult: 0 338 | } 339 | convolution_param { 340 | num_output: 32 341 | kernel_size: 1 342 | weight_filler { 343 | type: "xavier" 344 | std: 0.1 345 | } 346 | bias_filler { 347 | type: "constant" 348 | value: 0.2 349 | } 350 | } 351 | } 352 | layer { 353 | name: "inception_3a/relu_pool_proj" 354 | type: "ReLU" 355 | bottom: "inception_3a/pool_proj" 356 | top: "inception_3a/pool_proj" 357 | } 358 | layer { 359 | name: "inception_3a/output" 360 | type: "Concat" 361 | bottom: "inception_3a/1x1" 362 | bottom: "inception_3a/3x3" 363 | bottom: "inception_3a/5x5" 364 | bottom: "inception_3a/pool_proj" 365 | top: "inception_3a/output" 366 | } 367 | layer { 368 | name: "inception_3b/1x1" 369 | type: "Convolution" 370 | bottom: "inception_3a/output" 371 | top: "inception_3b/1x1" 372 | param { 373 | lr_mult: 1 374 | decay_mult: 1 375 | } 376 | param { 377 | lr_mult: 2 378 | decay_mult: 0 379 | } 380 | convolution_param { 381 | num_output: 128 382 | kernel_size: 1 383 | weight_filler { 384 | type: "xavier" 385 | std: 0.03 386 | } 387 | bias_filler { 388 | type: "constant" 389 | value: 0.2 390 | } 391 | } 392 | } 393 | layer { 394 | name: "inception_3b/relu_1x1" 395 | type: "ReLU" 396 | bottom: "inception_3b/1x1" 397 | top: "inception_3b/1x1" 398 | } 399 | layer { 400 | name: "inception_3b/3x3_reduce" 401 | type: "Convolution" 402 | bottom: "inception_3a/output" 403 | top: "inception_3b/3x3_reduce" 404 | param { 405 | lr_mult: 1 406 | decay_mult: 1 407 | } 408 | param { 409 | lr_mult: 2 410 | decay_mult: 0 411 | } 412 | convolution_param { 413 | num_output: 128 414 | kernel_size: 1 415 | weight_filler { 416 | type: "xavier" 417 | std: 0.09 418 | } 419 | bias_filler { 420 | type: "constant" 421 | value: 0.2 422 | } 423 | } 424 | } 425 | layer { 426 | name: "inception_3b/relu_3x3_reduce" 427 | type: "ReLU" 428 | bottom: "inception_3b/3x3_reduce" 429 | top: "inception_3b/3x3_reduce" 430 | } 431 | layer { 432 | name: "inception_3b/3x3" 433 | type: "Convolution" 434 | bottom: "inception_3b/3x3_reduce" 435 | top: "inception_3b/3x3" 436 | param { 437 | lr_mult: 1 438 | decay_mult: 1 439 | } 440 | param { 441 | lr_mult: 2 442 | decay_mult: 0 443 | } 444 | convolution_param { 445 | num_output: 192 446 | pad: 1 447 | kernel_size: 3 448 | weight_filler { 449 | type: "xavier" 450 | std: 0.03 451 | } 452 | bias_filler { 453 | type: "constant" 454 | value: 0.2 455 | } 456 | } 457 | } 458 | layer { 459 | name: "inception_3b/relu_3x3" 460 | type: "ReLU" 461 | bottom: "inception_3b/3x3" 462 | top: "inception_3b/3x3" 463 | } 464 | layer { 465 | name: "inception_3b/5x5_reduce" 466 | type: "Convolution" 467 | bottom: "inception_3a/output" 468 | top: "inception_3b/5x5_reduce" 469 | param { 470 | lr_mult: 1 471 | decay_mult: 1 472 | } 473 | param { 474 | lr_mult: 2 475 | decay_mult: 0 476 | } 477 | convolution_param { 478 | num_output: 32 479 | kernel_size: 1 480 | weight_filler { 481 | type: "xavier" 482 | std: 0.2 483 | } 484 | bias_filler { 485 | type: "constant" 486 | value: 0.2 487 | } 488 | } 489 | } 490 | layer { 491 | name: "inception_3b/relu_5x5_reduce" 492 | type: "ReLU" 493 | bottom: "inception_3b/5x5_reduce" 494 | top: "inception_3b/5x5_reduce" 495 | } 496 | layer { 497 | name: "inception_3b/5x5" 498 | type: "Convolution" 499 | bottom: "inception_3b/5x5_reduce" 500 | top: "inception_3b/5x5" 501 | param { 502 | lr_mult: 1 503 | decay_mult: 1 504 | } 505 | param { 506 | lr_mult: 2 507 | decay_mult: 0 508 | } 509 | convolution_param { 510 | num_output: 96 511 | pad: 2 512 | kernel_size: 5 513 | weight_filler { 514 | type: "xavier" 515 | std: 0.03 516 | } 517 | bias_filler { 518 | type: "constant" 519 | value: 0.2 520 | } 521 | } 522 | } 523 | layer { 524 | name: "inception_3b/relu_5x5" 525 | type: "ReLU" 526 | bottom: "inception_3b/5x5" 527 | top: "inception_3b/5x5" 528 | } 529 | layer { 530 | name: "inception_3b/pool" 531 | type: "Pooling" 532 | bottom: "inception_3a/output" 533 | top: "inception_3b/pool" 534 | pooling_param { 535 | pool: MAX 536 | kernel_size: 3 537 | stride: 1 538 | pad: 1 539 | } 540 | } 541 | layer { 542 | name: "inception_3b/pool_proj" 543 | type: "Convolution" 544 | bottom: "inception_3b/pool" 545 | top: "inception_3b/pool_proj" 546 | param { 547 | lr_mult: 1 548 | decay_mult: 1 549 | } 550 | param { 551 | lr_mult: 2 552 | decay_mult: 0 553 | } 554 | convolution_param { 555 | num_output: 64 556 | kernel_size: 1 557 | weight_filler { 558 | type: "xavier" 559 | std: 0.1 560 | } 561 | bias_filler { 562 | type: "constant" 563 | value: 0.2 564 | } 565 | } 566 | } 567 | layer { 568 | name: "inception_3b/relu_pool_proj" 569 | type: "ReLU" 570 | bottom: "inception_3b/pool_proj" 571 | top: "inception_3b/pool_proj" 572 | } 573 | layer { 574 | name: "inception_3b/output" 575 | type: "Concat" 576 | bottom: "inception_3b/1x1" 577 | bottom: "inception_3b/3x3" 578 | bottom: "inception_3b/5x5" 579 | bottom: "inception_3b/pool_proj" 580 | top: "inception_3b/output" 581 | } 582 | layer { 583 | name: "pool3/3x3_s2" 584 | type: "Pooling" 585 | bottom: "inception_3b/output" 586 | top: "pool3/3x3_s2" 587 | pooling_param { 588 | pool: MAX 589 | kernel_size: 3 590 | stride: 2 591 | } 592 | } 593 | layer { 594 | name: "inception_4a/1x1" 595 | type: "Convolution" 596 | bottom: "pool3/3x3_s2" 597 | top: "inception_4a/1x1" 598 | param { 599 | lr_mult: 1 600 | decay_mult: 1 601 | } 602 | param { 603 | lr_mult: 2 604 | decay_mult: 0 605 | } 606 | convolution_param { 607 | num_output: 192 608 | kernel_size: 1 609 | weight_filler { 610 | type: "xavier" 611 | std: 0.03 612 | } 613 | bias_filler { 614 | type: "constant" 615 | value: 0.2 616 | } 617 | } 618 | } 619 | layer { 620 | name: "inception_4a/relu_1x1" 621 | type: "ReLU" 622 | bottom: "inception_4a/1x1" 623 | top: "inception_4a/1x1" 624 | } 625 | layer { 626 | name: "inception_4a/3x3_reduce" 627 | type: "Convolution" 628 | bottom: "pool3/3x3_s2" 629 | top: "inception_4a/3x3_reduce" 630 | param { 631 | lr_mult: 1 632 | decay_mult: 1 633 | } 634 | param { 635 | lr_mult: 2 636 | decay_mult: 0 637 | } 638 | convolution_param { 639 | num_output: 96 640 | kernel_size: 1 641 | weight_filler { 642 | type: "xavier" 643 | std: 0.09 644 | } 645 | bias_filler { 646 | type: "constant" 647 | value: 0.2 648 | } 649 | } 650 | } 651 | layer { 652 | name: "inception_4a/relu_3x3_reduce" 653 | type: "ReLU" 654 | bottom: "inception_4a/3x3_reduce" 655 | top: "inception_4a/3x3_reduce" 656 | } 657 | layer { 658 | name: "inception_4a/3x3" 659 | type: "Convolution" 660 | bottom: "inception_4a/3x3_reduce" 661 | top: "inception_4a/3x3" 662 | param { 663 | lr_mult: 1 664 | decay_mult: 1 665 | } 666 | param { 667 | lr_mult: 2 668 | decay_mult: 0 669 | } 670 | convolution_param { 671 | num_output: 208 672 | pad: 1 673 | kernel_size: 3 674 | weight_filler { 675 | type: "xavier" 676 | std: 0.03 677 | } 678 | bias_filler { 679 | type: "constant" 680 | value: 0.2 681 | } 682 | } 683 | } 684 | layer { 685 | name: "inception_4a/relu_3x3" 686 | type: "ReLU" 687 | bottom: "inception_4a/3x3" 688 | top: "inception_4a/3x3" 689 | } 690 | layer { 691 | name: "inception_4a/5x5_reduce" 692 | type: "Convolution" 693 | bottom: "pool3/3x3_s2" 694 | top: "inception_4a/5x5_reduce" 695 | param { 696 | lr_mult: 1 697 | decay_mult: 1 698 | } 699 | param { 700 | lr_mult: 2 701 | decay_mult: 0 702 | } 703 | convolution_param { 704 | num_output: 16 705 | kernel_size: 1 706 | weight_filler { 707 | type: "xavier" 708 | std: 0.2 709 | } 710 | bias_filler { 711 | type: "constant" 712 | value: 0.2 713 | } 714 | } 715 | } 716 | layer { 717 | name: "inception_4a/relu_5x5_reduce" 718 | type: "ReLU" 719 | bottom: "inception_4a/5x5_reduce" 720 | top: "inception_4a/5x5_reduce" 721 | } 722 | layer { 723 | name: "inception_4a/5x5" 724 | type: "Convolution" 725 | bottom: "inception_4a/5x5_reduce" 726 | top: "inception_4a/5x5" 727 | param { 728 | lr_mult: 1 729 | decay_mult: 1 730 | } 731 | param { 732 | lr_mult: 2 733 | decay_mult: 0 734 | } 735 | convolution_param { 736 | num_output: 48 737 | pad: 2 738 | kernel_size: 5 739 | weight_filler { 740 | type: "xavier" 741 | std: 0.03 742 | } 743 | bias_filler { 744 | type: "constant" 745 | value: 0.2 746 | } 747 | } 748 | } 749 | layer { 750 | name: "inception_4a/relu_5x5" 751 | type: "ReLU" 752 | bottom: "inception_4a/5x5" 753 | top: "inception_4a/5x5" 754 | } 755 | layer { 756 | name: "inception_4a/pool" 757 | type: "Pooling" 758 | bottom: "pool3/3x3_s2" 759 | top: "inception_4a/pool" 760 | pooling_param { 761 | pool: MAX 762 | kernel_size: 3 763 | stride: 1 764 | pad: 1 765 | } 766 | } 767 | layer { 768 | name: "inception_4a/pool_proj" 769 | type: "Convolution" 770 | bottom: "inception_4a/pool" 771 | top: "inception_4a/pool_proj" 772 | param { 773 | lr_mult: 1 774 | decay_mult: 1 775 | } 776 | param { 777 | lr_mult: 2 778 | decay_mult: 0 779 | } 780 | convolution_param { 781 | num_output: 64 782 | kernel_size: 1 783 | weight_filler { 784 | type: "xavier" 785 | std: 0.1 786 | } 787 | bias_filler { 788 | type: "constant" 789 | value: 0.2 790 | } 791 | } 792 | } 793 | layer { 794 | name: "inception_4a/relu_pool_proj" 795 | type: "ReLU" 796 | bottom: "inception_4a/pool_proj" 797 | top: "inception_4a/pool_proj" 798 | } 799 | layer { 800 | name: "inception_4a/output" 801 | type: "Concat" 802 | bottom: "inception_4a/1x1" 803 | bottom: "inception_4a/3x3" 804 | bottom: "inception_4a/5x5" 805 | bottom: "inception_4a/pool_proj" 806 | top: "inception_4a/output" 807 | } 808 | layer { 809 | name: "inception_4b/1x1" 810 | type: "Convolution" 811 | bottom: "inception_4a/output" 812 | top: "inception_4b/1x1" 813 | param { 814 | lr_mult: 1 815 | decay_mult: 1 816 | } 817 | param { 818 | lr_mult: 2 819 | decay_mult: 0 820 | } 821 | convolution_param { 822 | num_output: 160 823 | kernel_size: 1 824 | weight_filler { 825 | type: "xavier" 826 | std: 0.03 827 | } 828 | bias_filler { 829 | type: "constant" 830 | value: 0.2 831 | } 832 | } 833 | } 834 | layer { 835 | name: "inception_4b/relu_1x1" 836 | type: "ReLU" 837 | bottom: "inception_4b/1x1" 838 | top: "inception_4b/1x1" 839 | } 840 | layer { 841 | name: "inception_4b/3x3_reduce" 842 | type: "Convolution" 843 | bottom: "inception_4a/output" 844 | top: "inception_4b/3x3_reduce" 845 | param { 846 | lr_mult: 1 847 | decay_mult: 1 848 | } 849 | param { 850 | lr_mult: 2 851 | decay_mult: 0 852 | } 853 | convolution_param { 854 | num_output: 112 855 | kernel_size: 1 856 | weight_filler { 857 | type: "xavier" 858 | std: 0.09 859 | } 860 | bias_filler { 861 | type: "constant" 862 | value: 0.2 863 | } 864 | } 865 | } 866 | layer { 867 | name: "inception_4b/relu_3x3_reduce" 868 | type: "ReLU" 869 | bottom: "inception_4b/3x3_reduce" 870 | top: "inception_4b/3x3_reduce" 871 | } 872 | layer { 873 | name: "inception_4b/3x3" 874 | type: "Convolution" 875 | bottom: "inception_4b/3x3_reduce" 876 | top: "inception_4b/3x3" 877 | param { 878 | lr_mult: 1 879 | decay_mult: 1 880 | } 881 | param { 882 | lr_mult: 2 883 | decay_mult: 0 884 | } 885 | convolution_param { 886 | num_output: 224 887 | pad: 1 888 | kernel_size: 3 889 | weight_filler { 890 | type: "xavier" 891 | std: 0.03 892 | } 893 | bias_filler { 894 | type: "constant" 895 | value: 0.2 896 | } 897 | } 898 | } 899 | layer { 900 | name: "inception_4b/relu_3x3" 901 | type: "ReLU" 902 | bottom: "inception_4b/3x3" 903 | top: "inception_4b/3x3" 904 | } 905 | layer { 906 | name: "inception_4b/5x5_reduce" 907 | type: "Convolution" 908 | bottom: "inception_4a/output" 909 | top: "inception_4b/5x5_reduce" 910 | param { 911 | lr_mult: 1 912 | decay_mult: 1 913 | } 914 | param { 915 | lr_mult: 2 916 | decay_mult: 0 917 | } 918 | convolution_param { 919 | num_output: 24 920 | kernel_size: 1 921 | weight_filler { 922 | type: "xavier" 923 | std: 0.2 924 | } 925 | bias_filler { 926 | type: "constant" 927 | value: 0.2 928 | } 929 | } 930 | } 931 | layer { 932 | name: "inception_4b/relu_5x5_reduce" 933 | type: "ReLU" 934 | bottom: "inception_4b/5x5_reduce" 935 | top: "inception_4b/5x5_reduce" 936 | } 937 | layer { 938 | name: "inception_4b/5x5" 939 | type: "Convolution" 940 | bottom: "inception_4b/5x5_reduce" 941 | top: "inception_4b/5x5" 942 | param { 943 | lr_mult: 1 944 | decay_mult: 1 945 | } 946 | param { 947 | lr_mult: 2 948 | decay_mult: 0 949 | } 950 | convolution_param { 951 | num_output: 64 952 | pad: 2 953 | kernel_size: 5 954 | weight_filler { 955 | type: "xavier" 956 | std: 0.03 957 | } 958 | bias_filler { 959 | type: "constant" 960 | value: 0.2 961 | } 962 | } 963 | } 964 | layer { 965 | name: "inception_4b/relu_5x5" 966 | type: "ReLU" 967 | bottom: "inception_4b/5x5" 968 | top: "inception_4b/5x5" 969 | } 970 | layer { 971 | name: "inception_4b/pool" 972 | type: "Pooling" 973 | bottom: "inception_4a/output" 974 | top: "inception_4b/pool" 975 | pooling_param { 976 | pool: MAX 977 | kernel_size: 3 978 | stride: 1 979 | pad: 1 980 | } 981 | } 982 | layer { 983 | name: "inception_4b/pool_proj" 984 | type: "Convolution" 985 | bottom: "inception_4b/pool" 986 | top: "inception_4b/pool_proj" 987 | param { 988 | lr_mult: 1 989 | decay_mult: 1 990 | } 991 | param { 992 | lr_mult: 2 993 | decay_mult: 0 994 | } 995 | convolution_param { 996 | num_output: 64 997 | kernel_size: 1 998 | weight_filler { 999 | type: "xavier" 1000 | std: 0.1 1001 | } 1002 | bias_filler { 1003 | type: "constant" 1004 | value: 0.2 1005 | } 1006 | } 1007 | } 1008 | layer { 1009 | name: "inception_4b/relu_pool_proj" 1010 | type: "ReLU" 1011 | bottom: "inception_4b/pool_proj" 1012 | top: "inception_4b/pool_proj" 1013 | } 1014 | layer { 1015 | name: "inception_4b/output" 1016 | type: "Concat" 1017 | bottom: "inception_4b/1x1" 1018 | bottom: "inception_4b/3x3" 1019 | bottom: "inception_4b/5x5" 1020 | bottom: "inception_4b/pool_proj" 1021 | top: "inception_4b/output" 1022 | } 1023 | layer { 1024 | name: "inception_4c/1x1" 1025 | type: "Convolution" 1026 | bottom: "inception_4b/output" 1027 | top: "inception_4c/1x1" 1028 | param { 1029 | lr_mult: 1 1030 | decay_mult: 1 1031 | } 1032 | param { 1033 | lr_mult: 2 1034 | decay_mult: 0 1035 | } 1036 | convolution_param { 1037 | num_output: 128 1038 | kernel_size: 1 1039 | weight_filler { 1040 | type: "xavier" 1041 | std: 0.03 1042 | } 1043 | bias_filler { 1044 | type: "constant" 1045 | value: 0.2 1046 | } 1047 | } 1048 | } 1049 | layer { 1050 | name: "inception_4c/relu_1x1" 1051 | type: "ReLU" 1052 | bottom: "inception_4c/1x1" 1053 | top: "inception_4c/1x1" 1054 | } 1055 | layer { 1056 | name: "inception_4c/3x3_reduce" 1057 | type: "Convolution" 1058 | bottom: "inception_4b/output" 1059 | top: "inception_4c/3x3_reduce" 1060 | param { 1061 | lr_mult: 1 1062 | decay_mult: 1 1063 | } 1064 | param { 1065 | lr_mult: 2 1066 | decay_mult: 0 1067 | } 1068 | convolution_param { 1069 | num_output: 128 1070 | kernel_size: 1 1071 | weight_filler { 1072 | type: "xavier" 1073 | std: 0.09 1074 | } 1075 | bias_filler { 1076 | type: "constant" 1077 | value: 0.2 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "inception_4c/relu_3x3_reduce" 1083 | type: "ReLU" 1084 | bottom: "inception_4c/3x3_reduce" 1085 | top: "inception_4c/3x3_reduce" 1086 | } 1087 | layer { 1088 | name: "inception_4c/3x3" 1089 | type: "Convolution" 1090 | bottom: "inception_4c/3x3_reduce" 1091 | top: "inception_4c/3x3" 1092 | param { 1093 | lr_mult: 1 1094 | decay_mult: 1 1095 | } 1096 | param { 1097 | lr_mult: 2 1098 | decay_mult: 0 1099 | } 1100 | convolution_param { 1101 | num_output: 256 1102 | pad: 1 1103 | kernel_size: 3 1104 | weight_filler { 1105 | type: "xavier" 1106 | std: 0.03 1107 | } 1108 | bias_filler { 1109 | type: "constant" 1110 | value: 0.2 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "inception_4c/relu_3x3" 1116 | type: "ReLU" 1117 | bottom: "inception_4c/3x3" 1118 | top: "inception_4c/3x3" 1119 | } 1120 | layer { 1121 | name: "inception_4c/5x5_reduce" 1122 | type: "Convolution" 1123 | bottom: "inception_4b/output" 1124 | top: "inception_4c/5x5_reduce" 1125 | param { 1126 | lr_mult: 1 1127 | decay_mult: 1 1128 | } 1129 | param { 1130 | lr_mult: 2 1131 | decay_mult: 0 1132 | } 1133 | convolution_param { 1134 | num_output: 24 1135 | kernel_size: 1 1136 | weight_filler { 1137 | type: "xavier" 1138 | std: 0.2 1139 | } 1140 | bias_filler { 1141 | type: "constant" 1142 | value: 0.2 1143 | } 1144 | } 1145 | } 1146 | layer { 1147 | name: "inception_4c/relu_5x5_reduce" 1148 | type: "ReLU" 1149 | bottom: "inception_4c/5x5_reduce" 1150 | top: "inception_4c/5x5_reduce" 1151 | } 1152 | layer { 1153 | name: "inception_4c/5x5" 1154 | type: "Convolution" 1155 | bottom: "inception_4c/5x5_reduce" 1156 | top: "inception_4c/5x5" 1157 | param { 1158 | lr_mult: 1 1159 | decay_mult: 1 1160 | } 1161 | param { 1162 | lr_mult: 2 1163 | decay_mult: 0 1164 | } 1165 | convolution_param { 1166 | num_output: 64 1167 | pad: 2 1168 | kernel_size: 5 1169 | weight_filler { 1170 | type: "xavier" 1171 | std: 0.03 1172 | } 1173 | bias_filler { 1174 | type: "constant" 1175 | value: 0.2 1176 | } 1177 | } 1178 | } 1179 | layer { 1180 | name: "inception_4c/relu_5x5" 1181 | type: "ReLU" 1182 | bottom: "inception_4c/5x5" 1183 | top: "inception_4c/5x5" 1184 | } 1185 | layer { 1186 | name: "inception_4c/pool" 1187 | type: "Pooling" 1188 | bottom: "inception_4b/output" 1189 | top: "inception_4c/pool" 1190 | pooling_param { 1191 | pool: MAX 1192 | kernel_size: 3 1193 | stride: 1 1194 | pad: 1 1195 | } 1196 | } 1197 | layer { 1198 | name: "inception_4c/pool_proj" 1199 | type: "Convolution" 1200 | bottom: "inception_4c/pool" 1201 | top: "inception_4c/pool_proj" 1202 | param { 1203 | lr_mult: 1 1204 | decay_mult: 1 1205 | } 1206 | param { 1207 | lr_mult: 2 1208 | decay_mult: 0 1209 | } 1210 | convolution_param { 1211 | num_output: 64 1212 | kernel_size: 1 1213 | weight_filler { 1214 | type: "xavier" 1215 | std: 0.1 1216 | } 1217 | bias_filler { 1218 | type: "constant" 1219 | value: 0.2 1220 | } 1221 | } 1222 | } 1223 | layer { 1224 | name: "inception_4c/relu_pool_proj" 1225 | type: "ReLU" 1226 | bottom: "inception_4c/pool_proj" 1227 | top: "inception_4c/pool_proj" 1228 | } 1229 | layer { 1230 | name: "inception_4c/output" 1231 | type: "Concat" 1232 | bottom: "inception_4c/1x1" 1233 | bottom: "inception_4c/3x3" 1234 | bottom: "inception_4c/5x5" 1235 | bottom: "inception_4c/pool_proj" 1236 | top: "inception_4c/output" 1237 | } 1238 | layer { 1239 | name: "inception_4d/1x1" 1240 | type: "Convolution" 1241 | bottom: "inception_4c/output" 1242 | top: "inception_4d/1x1" 1243 | param { 1244 | lr_mult: 1 1245 | decay_mult: 1 1246 | } 1247 | param { 1248 | lr_mult: 2 1249 | decay_mult: 0 1250 | } 1251 | convolution_param { 1252 | num_output: 112 1253 | kernel_size: 1 1254 | weight_filler { 1255 | type: "xavier" 1256 | std: 0.03 1257 | } 1258 | bias_filler { 1259 | type: "constant" 1260 | value: 0.2 1261 | } 1262 | } 1263 | } 1264 | layer { 1265 | name: "inception_4d/relu_1x1" 1266 | type: "ReLU" 1267 | bottom: "inception_4d/1x1" 1268 | top: "inception_4d/1x1" 1269 | } 1270 | layer { 1271 | name: "inception_4d/3x3_reduce" 1272 | type: "Convolution" 1273 | bottom: "inception_4c/output" 1274 | top: "inception_4d/3x3_reduce" 1275 | param { 1276 | lr_mult: 1 1277 | decay_mult: 1 1278 | } 1279 | param { 1280 | lr_mult: 2 1281 | decay_mult: 0 1282 | } 1283 | convolution_param { 1284 | num_output: 144 1285 | kernel_size: 1 1286 | weight_filler { 1287 | type: "xavier" 1288 | std: 0.09 1289 | } 1290 | bias_filler { 1291 | type: "constant" 1292 | value: 0.2 1293 | } 1294 | } 1295 | } 1296 | layer { 1297 | name: "inception_4d/relu_3x3_reduce" 1298 | type: "ReLU" 1299 | bottom: "inception_4d/3x3_reduce" 1300 | top: "inception_4d/3x3_reduce" 1301 | } 1302 | layer { 1303 | name: "inception_4d/3x3" 1304 | type: "Convolution" 1305 | bottom: "inception_4d/3x3_reduce" 1306 | top: "inception_4d/3x3" 1307 | param { 1308 | lr_mult: 1 1309 | decay_mult: 1 1310 | } 1311 | param { 1312 | lr_mult: 2 1313 | decay_mult: 0 1314 | } 1315 | convolution_param { 1316 | num_output: 288 1317 | pad: 1 1318 | kernel_size: 3 1319 | weight_filler { 1320 | type: "xavier" 1321 | std: 0.03 1322 | } 1323 | bias_filler { 1324 | type: "constant" 1325 | value: 0.2 1326 | } 1327 | } 1328 | } 1329 | layer { 1330 | name: "inception_4d/relu_3x3" 1331 | type: "ReLU" 1332 | bottom: "inception_4d/3x3" 1333 | top: "inception_4d/3x3" 1334 | } 1335 | layer { 1336 | name: "inception_4d/5x5_reduce" 1337 | type: "Convolution" 1338 | bottom: "inception_4c/output" 1339 | top: "inception_4d/5x5_reduce" 1340 | param { 1341 | lr_mult: 1 1342 | decay_mult: 1 1343 | } 1344 | param { 1345 | lr_mult: 2 1346 | decay_mult: 0 1347 | } 1348 | convolution_param { 1349 | num_output: 32 1350 | kernel_size: 1 1351 | weight_filler { 1352 | type: "xavier" 1353 | std: 0.2 1354 | } 1355 | bias_filler { 1356 | type: "constant" 1357 | value: 0.2 1358 | } 1359 | } 1360 | } 1361 | layer { 1362 | name: "inception_4d/relu_5x5_reduce" 1363 | type: "ReLU" 1364 | bottom: "inception_4d/5x5_reduce" 1365 | top: "inception_4d/5x5_reduce" 1366 | } 1367 | layer { 1368 | name: "inception_4d/5x5" 1369 | type: "Convolution" 1370 | bottom: "inception_4d/5x5_reduce" 1371 | top: "inception_4d/5x5" 1372 | param { 1373 | lr_mult: 1 1374 | decay_mult: 1 1375 | } 1376 | param { 1377 | lr_mult: 2 1378 | decay_mult: 0 1379 | } 1380 | convolution_param { 1381 | num_output: 64 1382 | pad: 2 1383 | kernel_size: 5 1384 | weight_filler { 1385 | type: "xavier" 1386 | std: 0.03 1387 | } 1388 | bias_filler { 1389 | type: "constant" 1390 | value: 0.2 1391 | } 1392 | } 1393 | } 1394 | layer { 1395 | name: "inception_4d/relu_5x5" 1396 | type: "ReLU" 1397 | bottom: "inception_4d/5x5" 1398 | top: "inception_4d/5x5" 1399 | } 1400 | layer { 1401 | name: "inception_4d/pool" 1402 | type: "Pooling" 1403 | bottom: "inception_4c/output" 1404 | top: "inception_4d/pool" 1405 | pooling_param { 1406 | pool: MAX 1407 | kernel_size: 3 1408 | stride: 1 1409 | pad: 1 1410 | } 1411 | } 1412 | layer { 1413 | name: "inception_4d/pool_proj" 1414 | type: "Convolution" 1415 | bottom: "inception_4d/pool" 1416 | top: "inception_4d/pool_proj" 1417 | param { 1418 | lr_mult: 1 1419 | decay_mult: 1 1420 | } 1421 | param { 1422 | lr_mult: 2 1423 | decay_mult: 0 1424 | } 1425 | convolution_param { 1426 | num_output: 64 1427 | kernel_size: 1 1428 | weight_filler { 1429 | type: "xavier" 1430 | std: 0.1 1431 | } 1432 | bias_filler { 1433 | type: "constant" 1434 | value: 0.2 1435 | } 1436 | } 1437 | } 1438 | layer { 1439 | name: "inception_4d/relu_pool_proj" 1440 | type: "ReLU" 1441 | bottom: "inception_4d/pool_proj" 1442 | top: "inception_4d/pool_proj" 1443 | } 1444 | layer { 1445 | name: "inception_4d/output" 1446 | type: "Concat" 1447 | bottom: "inception_4d/1x1" 1448 | bottom: "inception_4d/3x3" 1449 | bottom: "inception_4d/5x5" 1450 | bottom: "inception_4d/pool_proj" 1451 | top: "inception_4d/output" 1452 | } 1453 | layer { 1454 | name: "inception_4e/1x1" 1455 | type: "Convolution" 1456 | bottom: "inception_4d/output" 1457 | top: "inception_4e/1x1" 1458 | param { 1459 | lr_mult: 1 1460 | decay_mult: 1 1461 | } 1462 | param { 1463 | lr_mult: 2 1464 | decay_mult: 0 1465 | } 1466 | convolution_param { 1467 | num_output: 256 1468 | kernel_size: 1 1469 | weight_filler { 1470 | type: "xavier" 1471 | std: 0.03 1472 | } 1473 | bias_filler { 1474 | type: "constant" 1475 | value: 0.2 1476 | } 1477 | } 1478 | } 1479 | layer { 1480 | name: "inception_4e/relu_1x1" 1481 | type: "ReLU" 1482 | bottom: "inception_4e/1x1" 1483 | top: "inception_4e/1x1" 1484 | } 1485 | layer { 1486 | name: "inception_4e/3x3_reduce" 1487 | type: "Convolution" 1488 | bottom: "inception_4d/output" 1489 | top: "inception_4e/3x3_reduce" 1490 | param { 1491 | lr_mult: 1 1492 | decay_mult: 1 1493 | } 1494 | param { 1495 | lr_mult: 2 1496 | decay_mult: 0 1497 | } 1498 | convolution_param { 1499 | num_output: 160 1500 | kernel_size: 1 1501 | weight_filler { 1502 | type: "xavier" 1503 | std: 0.09 1504 | } 1505 | bias_filler { 1506 | type: "constant" 1507 | value: 0.2 1508 | } 1509 | } 1510 | } 1511 | layer { 1512 | name: "inception_4e/relu_3x3_reduce" 1513 | type: "ReLU" 1514 | bottom: "inception_4e/3x3_reduce" 1515 | top: "inception_4e/3x3_reduce" 1516 | } 1517 | layer { 1518 | name: "inception_4e/3x3" 1519 | type: "Convolution" 1520 | bottom: "inception_4e/3x3_reduce" 1521 | top: "inception_4e/3x3" 1522 | param { 1523 | lr_mult: 1 1524 | decay_mult: 1 1525 | } 1526 | param { 1527 | lr_mult: 2 1528 | decay_mult: 0 1529 | } 1530 | convolution_param { 1531 | num_output: 320 1532 | pad: 1 1533 | kernel_size: 3 1534 | weight_filler { 1535 | type: "xavier" 1536 | std: 0.03 1537 | } 1538 | bias_filler { 1539 | type: "constant" 1540 | value: 0.2 1541 | } 1542 | } 1543 | } 1544 | layer { 1545 | name: "inception_4e/relu_3x3" 1546 | type: "ReLU" 1547 | bottom: "inception_4e/3x3" 1548 | top: "inception_4e/3x3" 1549 | } 1550 | layer { 1551 | name: "inception_4e/5x5_reduce" 1552 | type: "Convolution" 1553 | bottom: "inception_4d/output" 1554 | top: "inception_4e/5x5_reduce" 1555 | param { 1556 | lr_mult: 1 1557 | decay_mult: 1 1558 | } 1559 | param { 1560 | lr_mult: 2 1561 | decay_mult: 0 1562 | } 1563 | convolution_param { 1564 | num_output: 32 1565 | kernel_size: 1 1566 | weight_filler { 1567 | type: "xavier" 1568 | std: 0.2 1569 | } 1570 | bias_filler { 1571 | type: "constant" 1572 | value: 0.2 1573 | } 1574 | } 1575 | } 1576 | layer { 1577 | name: "inception_4e/relu_5x5_reduce" 1578 | type: "ReLU" 1579 | bottom: "inception_4e/5x5_reduce" 1580 | top: "inception_4e/5x5_reduce" 1581 | } 1582 | layer { 1583 | name: "inception_4e/5x5" 1584 | type: "Convolution" 1585 | bottom: "inception_4e/5x5_reduce" 1586 | top: "inception_4e/5x5" 1587 | param { 1588 | lr_mult: 1 1589 | decay_mult: 1 1590 | } 1591 | param { 1592 | lr_mult: 2 1593 | decay_mult: 0 1594 | } 1595 | convolution_param { 1596 | num_output: 128 1597 | pad: 2 1598 | kernel_size: 5 1599 | weight_filler { 1600 | type: "xavier" 1601 | std: 0.03 1602 | } 1603 | bias_filler { 1604 | type: "constant" 1605 | value: 0.2 1606 | } 1607 | } 1608 | } 1609 | layer { 1610 | name: "inception_4e/relu_5x5" 1611 | type: "ReLU" 1612 | bottom: "inception_4e/5x5" 1613 | top: "inception_4e/5x5" 1614 | } 1615 | layer { 1616 | name: "inception_4e/pool" 1617 | type: "Pooling" 1618 | bottom: "inception_4d/output" 1619 | top: "inception_4e/pool" 1620 | pooling_param { 1621 | pool: MAX 1622 | kernel_size: 3 1623 | stride: 1 1624 | pad: 1 1625 | } 1626 | } 1627 | layer { 1628 | name: "inception_4e/pool_proj" 1629 | type: "Convolution" 1630 | bottom: "inception_4e/pool" 1631 | top: "inception_4e/pool_proj" 1632 | param { 1633 | lr_mult: 1 1634 | decay_mult: 1 1635 | } 1636 | param { 1637 | lr_mult: 2 1638 | decay_mult: 0 1639 | } 1640 | convolution_param { 1641 | num_output: 128 1642 | kernel_size: 1 1643 | weight_filler { 1644 | type: "xavier" 1645 | std: 0.1 1646 | } 1647 | bias_filler { 1648 | type: "constant" 1649 | value: 0.2 1650 | } 1651 | } 1652 | } 1653 | layer { 1654 | name: "inception_4e/relu_pool_proj" 1655 | type: "ReLU" 1656 | bottom: "inception_4e/pool_proj" 1657 | top: "inception_4e/pool_proj" 1658 | } 1659 | layer { 1660 | name: "inception_4e/output" 1661 | type: "Concat" 1662 | bottom: "inception_4e/1x1" 1663 | bottom: "inception_4e/3x3" 1664 | bottom: "inception_4e/5x5" 1665 | bottom: "inception_4e/pool_proj" 1666 | top: "inception_4e/output" 1667 | } 1668 | layer { 1669 | name: "pool4/3x3_s2" 1670 | type: "Pooling" 1671 | bottom: "inception_4e/output" 1672 | top: "pool4/3x3_s2" 1673 | pooling_param { 1674 | pool: MAX 1675 | kernel_size: 3 1676 | stride: 2 1677 | } 1678 | } 1679 | layer { 1680 | name: "inception_5a/1x1" 1681 | type: "Convolution" 1682 | bottom: "pool4/3x3_s2" 1683 | top: "inception_5a/1x1" 1684 | param { 1685 | lr_mult: 1 1686 | decay_mult: 1 1687 | } 1688 | param { 1689 | lr_mult: 2 1690 | decay_mult: 0 1691 | } 1692 | convolution_param { 1693 | num_output: 256 1694 | kernel_size: 1 1695 | weight_filler { 1696 | type: "xavier" 1697 | std: 0.03 1698 | } 1699 | bias_filler { 1700 | type: "constant" 1701 | value: 0.2 1702 | } 1703 | } 1704 | } 1705 | layer { 1706 | name: "inception_5a/relu_1x1" 1707 | type: "ReLU" 1708 | bottom: "inception_5a/1x1" 1709 | top: "inception_5a/1x1" 1710 | } 1711 | layer { 1712 | name: "inception_5a/3x3_reduce" 1713 | type: "Convolution" 1714 | bottom: "pool4/3x3_s2" 1715 | top: "inception_5a/3x3_reduce" 1716 | param { 1717 | lr_mult: 1 1718 | decay_mult: 1 1719 | } 1720 | param { 1721 | lr_mult: 2 1722 | decay_mult: 0 1723 | } 1724 | convolution_param { 1725 | num_output: 160 1726 | kernel_size: 1 1727 | weight_filler { 1728 | type: "xavier" 1729 | std: 0.09 1730 | } 1731 | bias_filler { 1732 | type: "constant" 1733 | value: 0.2 1734 | } 1735 | } 1736 | } 1737 | layer { 1738 | name: "inception_5a/relu_3x3_reduce" 1739 | type: "ReLU" 1740 | bottom: "inception_5a/3x3_reduce" 1741 | top: "inception_5a/3x3_reduce" 1742 | } 1743 | layer { 1744 | name: "inception_5a/3x3" 1745 | type: "Convolution" 1746 | bottom: "inception_5a/3x3_reduce" 1747 | top: "inception_5a/3x3" 1748 | param { 1749 | lr_mult: 1 1750 | decay_mult: 1 1751 | } 1752 | param { 1753 | lr_mult: 2 1754 | decay_mult: 0 1755 | } 1756 | convolution_param { 1757 | num_output: 320 1758 | pad: 1 1759 | kernel_size: 3 1760 | weight_filler { 1761 | type: "xavier" 1762 | std: 0.03 1763 | } 1764 | bias_filler { 1765 | type: "constant" 1766 | value: 0.2 1767 | } 1768 | } 1769 | } 1770 | layer { 1771 | name: "inception_5a/relu_3x3" 1772 | type: "ReLU" 1773 | bottom: "inception_5a/3x3" 1774 | top: "inception_5a/3x3" 1775 | } 1776 | layer { 1777 | name: "inception_5a/5x5_reduce" 1778 | type: "Convolution" 1779 | bottom: "pool4/3x3_s2" 1780 | top: "inception_5a/5x5_reduce" 1781 | param { 1782 | lr_mult: 1 1783 | decay_mult: 1 1784 | } 1785 | param { 1786 | lr_mult: 2 1787 | decay_mult: 0 1788 | } 1789 | convolution_param { 1790 | num_output: 32 1791 | kernel_size: 1 1792 | weight_filler { 1793 | type: "xavier" 1794 | std: 0.2 1795 | } 1796 | bias_filler { 1797 | type: "constant" 1798 | value: 0.2 1799 | } 1800 | } 1801 | } 1802 | layer { 1803 | name: "inception_5a/relu_5x5_reduce" 1804 | type: "ReLU" 1805 | bottom: "inception_5a/5x5_reduce" 1806 | top: "inception_5a/5x5_reduce" 1807 | } 1808 | layer { 1809 | name: "inception_5a/5x5" 1810 | type: "Convolution" 1811 | bottom: "inception_5a/5x5_reduce" 1812 | top: "inception_5a/5x5" 1813 | param { 1814 | lr_mult: 1 1815 | decay_mult: 1 1816 | } 1817 | param { 1818 | lr_mult: 2 1819 | decay_mult: 0 1820 | } 1821 | convolution_param { 1822 | num_output: 128 1823 | pad: 2 1824 | kernel_size: 5 1825 | weight_filler { 1826 | type: "xavier" 1827 | std: 0.03 1828 | } 1829 | bias_filler { 1830 | type: "constant" 1831 | value: 0.2 1832 | } 1833 | } 1834 | } 1835 | layer { 1836 | name: "inception_5a/relu_5x5" 1837 | type: "ReLU" 1838 | bottom: "inception_5a/5x5" 1839 | top: "inception_5a/5x5" 1840 | } 1841 | layer { 1842 | name: "inception_5a/pool" 1843 | type: "Pooling" 1844 | bottom: "pool4/3x3_s2" 1845 | top: "inception_5a/pool" 1846 | pooling_param { 1847 | pool: MAX 1848 | kernel_size: 3 1849 | stride: 1 1850 | pad: 1 1851 | } 1852 | } 1853 | layer { 1854 | name: "inception_5a/pool_proj" 1855 | type: "Convolution" 1856 | bottom: "inception_5a/pool" 1857 | top: "inception_5a/pool_proj" 1858 | param { 1859 | lr_mult: 1 1860 | decay_mult: 1 1861 | } 1862 | param { 1863 | lr_mult: 2 1864 | decay_mult: 0 1865 | } 1866 | convolution_param { 1867 | num_output: 128 1868 | kernel_size: 1 1869 | weight_filler { 1870 | type: "xavier" 1871 | std: 0.1 1872 | } 1873 | bias_filler { 1874 | type: "constant" 1875 | value: 0.2 1876 | } 1877 | } 1878 | } 1879 | layer { 1880 | name: "inception_5a/relu_pool_proj" 1881 | type: "ReLU" 1882 | bottom: "inception_5a/pool_proj" 1883 | top: "inception_5a/pool_proj" 1884 | } 1885 | layer { 1886 | name: "inception_5a/output" 1887 | type: "Concat" 1888 | bottom: "inception_5a/1x1" 1889 | bottom: "inception_5a/3x3" 1890 | bottom: "inception_5a/5x5" 1891 | bottom: "inception_5a/pool_proj" 1892 | top: "inception_5a/output" 1893 | } 1894 | layer { 1895 | name: "inception_5b/1x1" 1896 | type: "Convolution" 1897 | bottom: "inception_5a/output" 1898 | top: "inception_5b/1x1" 1899 | param { 1900 | lr_mult: 1 1901 | decay_mult: 1 1902 | } 1903 | param { 1904 | lr_mult: 2 1905 | decay_mult: 0 1906 | } 1907 | convolution_param { 1908 | num_output: 384 1909 | kernel_size: 1 1910 | weight_filler { 1911 | type: "xavier" 1912 | std: 0.03 1913 | } 1914 | bias_filler { 1915 | type: "constant" 1916 | value: 0.2 1917 | } 1918 | } 1919 | } 1920 | layer { 1921 | name: "inception_5b/relu_1x1" 1922 | type: "ReLU" 1923 | bottom: "inception_5b/1x1" 1924 | top: "inception_5b/1x1" 1925 | } 1926 | layer { 1927 | name: "inception_5b/3x3_reduce" 1928 | type: "Convolution" 1929 | bottom: "inception_5a/output" 1930 | top: "inception_5b/3x3_reduce" 1931 | param { 1932 | lr_mult: 1 1933 | decay_mult: 1 1934 | } 1935 | param { 1936 | lr_mult: 2 1937 | decay_mult: 0 1938 | } 1939 | convolution_param { 1940 | num_output: 192 1941 | kernel_size: 1 1942 | weight_filler { 1943 | type: "xavier" 1944 | std: 0.09 1945 | } 1946 | bias_filler { 1947 | type: "constant" 1948 | value: 0.2 1949 | } 1950 | } 1951 | } 1952 | layer { 1953 | name: "inception_5b/relu_3x3_reduce" 1954 | type: "ReLU" 1955 | bottom: "inception_5b/3x3_reduce" 1956 | top: "inception_5b/3x3_reduce" 1957 | } 1958 | layer { 1959 | name: "inception_5b/3x3" 1960 | type: "Convolution" 1961 | bottom: "inception_5b/3x3_reduce" 1962 | top: "inception_5b/3x3" 1963 | param { 1964 | lr_mult: 1 1965 | decay_mult: 1 1966 | } 1967 | param { 1968 | lr_mult: 2 1969 | decay_mult: 0 1970 | } 1971 | convolution_param { 1972 | num_output: 384 1973 | pad: 1 1974 | kernel_size: 3 1975 | weight_filler { 1976 | type: "xavier" 1977 | std: 0.03 1978 | } 1979 | bias_filler { 1980 | type: "constant" 1981 | value: 0.2 1982 | } 1983 | } 1984 | } 1985 | layer { 1986 | name: "inception_5b/relu_3x3" 1987 | type: "ReLU" 1988 | bottom: "inception_5b/3x3" 1989 | top: "inception_5b/3x3" 1990 | } 1991 | layer { 1992 | name: "inception_5b/5x5_reduce" 1993 | type: "Convolution" 1994 | bottom: "inception_5a/output" 1995 | top: "inception_5b/5x5_reduce" 1996 | param { 1997 | lr_mult: 1 1998 | decay_mult: 1 1999 | } 2000 | param { 2001 | lr_mult: 2 2002 | decay_mult: 0 2003 | } 2004 | convolution_param { 2005 | num_output: 48 2006 | kernel_size: 1 2007 | weight_filler { 2008 | type: "xavier" 2009 | std: 0.2 2010 | } 2011 | bias_filler { 2012 | type: "constant" 2013 | value: 0.2 2014 | } 2015 | } 2016 | } 2017 | layer { 2018 | name: "inception_5b/relu_5x5_reduce" 2019 | type: "ReLU" 2020 | bottom: "inception_5b/5x5_reduce" 2021 | top: "inception_5b/5x5_reduce" 2022 | } 2023 | layer { 2024 | name: "inception_5b/5x5" 2025 | type: "Convolution" 2026 | bottom: "inception_5b/5x5_reduce" 2027 | top: "inception_5b/5x5" 2028 | param { 2029 | lr_mult: 1 2030 | decay_mult: 1 2031 | } 2032 | param { 2033 | lr_mult: 2 2034 | decay_mult: 0 2035 | } 2036 | convolution_param { 2037 | num_output: 128 2038 | pad: 2 2039 | kernel_size: 5 2040 | weight_filler { 2041 | type: "xavier" 2042 | std: 0.03 2043 | } 2044 | bias_filler { 2045 | type: "constant" 2046 | value: 0.2 2047 | } 2048 | } 2049 | } 2050 | layer { 2051 | name: "inception_5b/relu_5x5" 2052 | type: "ReLU" 2053 | bottom: "inception_5b/5x5" 2054 | top: "inception_5b/5x5" 2055 | } 2056 | layer { 2057 | name: "inception_5b/pool" 2058 | type: "Pooling" 2059 | bottom: "inception_5a/output" 2060 | top: "inception_5b/pool" 2061 | pooling_param { 2062 | pool: MAX 2063 | kernel_size: 3 2064 | stride: 1 2065 | pad: 1 2066 | } 2067 | } 2068 | layer { 2069 | name: "inception_5b/pool_proj" 2070 | type: "Convolution" 2071 | bottom: "inception_5b/pool" 2072 | top: "inception_5b/pool_proj" 2073 | param { 2074 | lr_mult: 1 2075 | decay_mult: 1 2076 | } 2077 | param { 2078 | lr_mult: 2 2079 | decay_mult: 0 2080 | } 2081 | convolution_param { 2082 | num_output: 128 2083 | kernel_size: 1 2084 | weight_filler { 2085 | type: "xavier" 2086 | std: 0.1 2087 | } 2088 | bias_filler { 2089 | type: "constant" 2090 | value: 0.2 2091 | } 2092 | } 2093 | } 2094 | layer { 2095 | name: "inception_5b/relu_pool_proj" 2096 | type: "ReLU" 2097 | bottom: "inception_5b/pool_proj" 2098 | top: "inception_5b/pool_proj" 2099 | } 2100 | layer { 2101 | name: "inception_5b/output" 2102 | type: "Concat" 2103 | bottom: "inception_5b/1x1" 2104 | bottom: "inception_5b/3x3" 2105 | bottom: "inception_5b/5x5" 2106 | bottom: "inception_5b/pool_proj" 2107 | top: "inception_5b/output" 2108 | } 2109 | layer { 2110 | name: "pool5/7x7_s1" 2111 | type: "Pooling" 2112 | bottom: "inception_5b/output" 2113 | top: "pool5/7x7_s1" 2114 | pooling_param { 2115 | pool: AVE 2116 | kernel_size: 7 2117 | stride: 1 2118 | } 2119 | } 2120 | layer { 2121 | name: "pool5/drop_7x7_s1" 2122 | type: "Dropout" 2123 | bottom: "pool5/7x7_s1" 2124 | top: "pool5/7x7_s1" 2125 | dropout_param { 2126 | dropout_ratio: 0.4 2127 | } 2128 | } 2129 | layer { 2130 | name: "loss3/classifier" 2131 | type: "InnerProduct" 2132 | bottom: "pool5/7x7_s1" 2133 | top: "loss3/classifier" 2134 | param { 2135 | lr_mult: 1 2136 | decay_mult: 1 2137 | } 2138 | param { 2139 | lr_mult: 2 2140 | decay_mult: 0 2141 | } 2142 | inner_product_param { 2143 | num_output: {{num_output |default(2, true)}} 2144 | weight_filler { 2145 | type: "xavier" 2146 | } 2147 | bias_filler { 2148 | type: "constant" 2149 | value: 0 2150 | } 2151 | } 2152 | } 2153 | layer { 2154 | name: "prob" 2155 | type: "Softmax" 2156 | bottom: "loss3/classifier" 2157 | top: "prob" 2158 | } 2159 | -------------------------------------------------------------------------------- /templates/googlenet/quick_solver.prototxt.tmpl: -------------------------------------------------------------------------------- 1 | net: "train.prototxt" 2 | display: {{display_interval | default(1000,true)}} 3 | average_loss: 40 4 | base_lr: 0.01 5 | lr_policy: "poly" 6 | power: 0.5 7 | max_iter: {{max_iter | default(100000,true)}} 8 | momentum: 0.9 9 | weight_decay: 0.0002 10 | snapshot: {{snapshot_interval | default(1000,true)}} 11 | snapshot_prefix: "./snapshots/x" 12 | solver_mode: {{device | default(GPU,true)}} 13 | -------------------------------------------------------------------------------- /templates/googlenet/readme.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: BVLC GoogleNet Model 3 | caffemodel: bvlc_googlenet.caffemodel 4 | caffemodel_url: http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel 5 | license: unrestricted 6 | sha1: 405fc5acd08a3bb12de8ee5e23a96bec22f08204 7 | caffe_commit: bc614d1bd91896e3faceaf40b23b72dab47d44f5 8 | --- 9 | 10 | This model is a replication of the model described in the [GoogleNet](http://arxiv.org/abs/1409.4842) publication. We would like to thank Christian Szegedy for all his help in the replication of GoogleNet model. 11 | 12 | Differences: 13 | - not training with the relighting data-augmentation; 14 | - not training with the scale or aspect-ratio data-augmentation; 15 | - uses "xavier" to initialize the weights instead of "gaussian"; 16 | - quick_solver.prototxt uses a different learning rate decay policy than the original solver.prototxt, that allows a much faster training (60 epochs vs 250 epochs); 17 | 18 | The bundled model is the iteration 2,400,000 snapshot (60 epochs) using quick_solver.prototxt 19 | 20 | This bundled model obtains a top-1 accuracy 68.7% (31.3% error) and a top-5 accuracy 88.9% (11.1% error) on the validation set, using just the center crop. 21 | (Using the average of 10 crops, (4 + 1 center) * 2 mirror, should obtain a bit higher accuracy.) 22 | 23 | Timings for bvlc_googlenet with cuDNN using batch_size:128 on a K40c: 24 | - Average Forward pass: 562.841 ms. 25 | - Average Backward pass: 1123.84 ms. 26 | - Average Forward-Backward: 1688.8 ms. 27 | 28 | This model was trained by Sergio Guadarrama @sguada 29 | 30 | ## License 31 | 32 | This model is released for unrestricted use. 33 | -------------------------------------------------------------------------------- /templates/googlenet/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "train.prototxt" 2 | display: {{display_interval}} 3 | test_iter: {{test_iter}} 4 | test_interval: {{test_interval}} 5 | test_initialization: false 6 | average_loss: 40 7 | base_lr: 0.01 8 | lr_policy: "step" 9 | stepsize: 320000 10 | gamma: 0.96 11 | max_iter: {{max_iter}} 12 | momentum: 0.9 13 | weight_decay: 0.0002 14 | snapshot: {{snapshot_interval}} 15 | snapshot_prefix: "./snapshots/caffe" 16 | solver_mode: {{device}} 17 | -------------------------------------------------------------------------------- /templates/googlenet/train.prototxt.tmpl: -------------------------------------------------------------------------------- 1 | name: "GoogleNet" 2 | layer { 3 | name: "data" 4 | type: "PicPac" 5 | top: "data" 6 | top: "label" 7 | picpac_param { 8 | path: "{{db_path}}" 9 | batch: {{batch | default(16,true)}} 10 | channels: {{channels | default(3,true)}} 11 | split: {{split | default(10,true) }} 12 | split_fold: {{split_fold | default(0,true)}} 13 | resize_width: 224 14 | resize_height: 224 15 | {% if mixin %} 16 | mixin: "{{mixin}}" 17 | mixin_group_delta: {{mixin_group_delta|default(0,true)}} 18 | {% endif %} 19 | threads: 4 20 | perturb: true 21 | pert_color1: 10 22 | pert_color2: 10 23 | pert_color3: 10 24 | pert_angle: 20 25 | pert_min_scale: 0.8 26 | pert_max_scale: 1.2 27 | } 28 | } 29 | layer { 30 | name: "conv1/7x7_s2" 31 | type: "Convolution" 32 | bottom: "data" 33 | top: "conv1/7x7_s2" 34 | param { 35 | lr_mult: 1 36 | decay_mult: 1 37 | } 38 | param { 39 | lr_mult: 2 40 | decay_mult: 0 41 | } 42 | convolution_param { 43 | num_output: 64 44 | pad: 3 45 | kernel_size: 7 46 | stride: 2 47 | weight_filler { 48 | type: "xavier" 49 | } 50 | bias_filler { 51 | type: "constant" 52 | value: 0.2 53 | } 54 | } 55 | } 56 | layer { 57 | name: "conv1/relu_7x7" 58 | type: "ReLU" 59 | bottom: "conv1/7x7_s2" 60 | top: "conv1/7x7_s2" 61 | } 62 | layer { 63 | name: "pool1/3x3_s2" 64 | type: "Pooling" 65 | bottom: "conv1/7x7_s2" 66 | top: "pool1/3x3_s2" 67 | pooling_param { 68 | pool: MAX 69 | kernel_size: 3 70 | stride: 2 71 | } 72 | } 73 | layer { 74 | name: "pool1/norm1" 75 | type: "LRN" 76 | bottom: "pool1/3x3_s2" 77 | top: "pool1/norm1" 78 | lrn_param { 79 | local_size: 5 80 | alpha: 0.0001 81 | beta: 0.75 82 | } 83 | } 84 | layer { 85 | name: "conv2/3x3_reduce" 86 | type: "Convolution" 87 | bottom: "pool1/norm1" 88 | top: "conv2/3x3_reduce" 89 | param { 90 | lr_mult: 1 91 | decay_mult: 1 92 | } 93 | param { 94 | lr_mult: 2 95 | decay_mult: 0 96 | } 97 | convolution_param { 98 | num_output: 64 99 | kernel_size: 1 100 | weight_filler { 101 | type: "xavier" 102 | } 103 | bias_filler { 104 | type: "constant" 105 | value: 0.2 106 | } 107 | } 108 | } 109 | layer { 110 | name: "conv2/relu_3x3_reduce" 111 | type: "ReLU" 112 | bottom: "conv2/3x3_reduce" 113 | top: "conv2/3x3_reduce" 114 | } 115 | layer { 116 | name: "conv2/3x3" 117 | type: "Convolution" 118 | bottom: "conv2/3x3_reduce" 119 | top: "conv2/3x3" 120 | param { 121 | lr_mult: 1 122 | decay_mult: 1 123 | } 124 | param { 125 | lr_mult: 2 126 | decay_mult: 0 127 | } 128 | convolution_param { 129 | num_output: 192 130 | pad: 1 131 | kernel_size: 3 132 | weight_filler { 133 | type: "xavier" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.2 138 | } 139 | } 140 | } 141 | layer { 142 | name: "conv2/relu_3x3" 143 | type: "ReLU" 144 | bottom: "conv2/3x3" 145 | top: "conv2/3x3" 146 | } 147 | layer { 148 | name: "conv2/norm2" 149 | type: "LRN" 150 | bottom: "conv2/3x3" 151 | top: "conv2/norm2" 152 | lrn_param { 153 | local_size: 5 154 | alpha: 0.0001 155 | beta: 0.75 156 | } 157 | } 158 | layer { 159 | name: "pool2/3x3_s2" 160 | type: "Pooling" 161 | bottom: "conv2/norm2" 162 | top: "pool2/3x3_s2" 163 | pooling_param { 164 | pool: MAX 165 | kernel_size: 3 166 | stride: 2 167 | } 168 | } 169 | layer { 170 | name: "inception_3a/1x1" 171 | type: "Convolution" 172 | bottom: "pool2/3x3_s2" 173 | top: "inception_3a/1x1" 174 | param { 175 | lr_mult: 1 176 | decay_mult: 1 177 | } 178 | param { 179 | lr_mult: 2 180 | decay_mult: 0 181 | } 182 | convolution_param { 183 | num_output: 64 184 | kernel_size: 1 185 | weight_filler { 186 | type: "xavier" 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0.2 191 | } 192 | } 193 | } 194 | layer { 195 | name: "inception_3a/relu_1x1" 196 | type: "ReLU" 197 | bottom: "inception_3a/1x1" 198 | top: "inception_3a/1x1" 199 | } 200 | layer { 201 | name: "inception_3a/3x3_reduce" 202 | type: "Convolution" 203 | bottom: "pool2/3x3_s2" 204 | top: "inception_3a/3x3_reduce" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 2 211 | decay_mult: 0 212 | } 213 | convolution_param { 214 | num_output: 96 215 | kernel_size: 1 216 | weight_filler { 217 | type: "xavier" 218 | } 219 | bias_filler { 220 | type: "constant" 221 | value: 0.2 222 | } 223 | } 224 | } 225 | layer { 226 | name: "inception_3a/relu_3x3_reduce" 227 | type: "ReLU" 228 | bottom: "inception_3a/3x3_reduce" 229 | top: "inception_3a/3x3_reduce" 230 | } 231 | layer { 232 | name: "inception_3a/3x3" 233 | type: "Convolution" 234 | bottom: "inception_3a/3x3_reduce" 235 | top: "inception_3a/3x3" 236 | param { 237 | lr_mult: 1 238 | decay_mult: 1 239 | } 240 | param { 241 | lr_mult: 2 242 | decay_mult: 0 243 | } 244 | convolution_param { 245 | num_output: 128 246 | pad: 1 247 | kernel_size: 3 248 | weight_filler { 249 | type: "xavier" 250 | } 251 | bias_filler { 252 | type: "constant" 253 | value: 0.2 254 | } 255 | } 256 | } 257 | layer { 258 | name: "inception_3a/relu_3x3" 259 | type: "ReLU" 260 | bottom: "inception_3a/3x3" 261 | top: "inception_3a/3x3" 262 | } 263 | layer { 264 | name: "inception_3a/5x5_reduce" 265 | type: "Convolution" 266 | bottom: "pool2/3x3_s2" 267 | top: "inception_3a/5x5_reduce" 268 | param { 269 | lr_mult: 1 270 | decay_mult: 1 271 | } 272 | param { 273 | lr_mult: 2 274 | decay_mult: 0 275 | } 276 | convolution_param { 277 | num_output: 16 278 | kernel_size: 1 279 | weight_filler { 280 | type: "xavier" 281 | } 282 | bias_filler { 283 | type: "constant" 284 | value: 0.2 285 | } 286 | } 287 | } 288 | layer { 289 | name: "inception_3a/relu_5x5_reduce" 290 | type: "ReLU" 291 | bottom: "inception_3a/5x5_reduce" 292 | top: "inception_3a/5x5_reduce" 293 | } 294 | layer { 295 | name: "inception_3a/5x5" 296 | type: "Convolution" 297 | bottom: "inception_3a/5x5_reduce" 298 | top: "inception_3a/5x5" 299 | param { 300 | lr_mult: 1 301 | decay_mult: 1 302 | } 303 | param { 304 | lr_mult: 2 305 | decay_mult: 0 306 | } 307 | convolution_param { 308 | num_output: 32 309 | pad: 2 310 | kernel_size: 5 311 | weight_filler { 312 | type: "xavier" 313 | } 314 | bias_filler { 315 | type: "constant" 316 | value: 0.2 317 | } 318 | } 319 | } 320 | layer { 321 | name: "inception_3a/relu_5x5" 322 | type: "ReLU" 323 | bottom: "inception_3a/5x5" 324 | top: "inception_3a/5x5" 325 | } 326 | layer { 327 | name: "inception_3a/pool" 328 | type: "Pooling" 329 | bottom: "pool2/3x3_s2" 330 | top: "inception_3a/pool" 331 | pooling_param { 332 | pool: MAX 333 | kernel_size: 3 334 | stride: 1 335 | pad: 1 336 | } 337 | } 338 | layer { 339 | name: "inception_3a/pool_proj" 340 | type: "Convolution" 341 | bottom: "inception_3a/pool" 342 | top: "inception_3a/pool_proj" 343 | param { 344 | lr_mult: 1 345 | decay_mult: 1 346 | } 347 | param { 348 | lr_mult: 2 349 | decay_mult: 0 350 | } 351 | convolution_param { 352 | num_output: 32 353 | kernel_size: 1 354 | weight_filler { 355 | type: "xavier" 356 | } 357 | bias_filler { 358 | type: "constant" 359 | value: 0.2 360 | } 361 | } 362 | } 363 | layer { 364 | name: "inception_3a/relu_pool_proj" 365 | type: "ReLU" 366 | bottom: "inception_3a/pool_proj" 367 | top: "inception_3a/pool_proj" 368 | } 369 | layer { 370 | name: "inception_3a/output" 371 | type: "Concat" 372 | bottom: "inception_3a/1x1" 373 | bottom: "inception_3a/3x3" 374 | bottom: "inception_3a/5x5" 375 | bottom: "inception_3a/pool_proj" 376 | top: "inception_3a/output" 377 | } 378 | layer { 379 | name: "inception_3b/1x1" 380 | type: "Convolution" 381 | bottom: "inception_3a/output" 382 | top: "inception_3b/1x1" 383 | param { 384 | lr_mult: 1 385 | decay_mult: 1 386 | } 387 | param { 388 | lr_mult: 2 389 | decay_mult: 0 390 | } 391 | convolution_param { 392 | num_output: 128 393 | kernel_size: 1 394 | weight_filler { 395 | type: "xavier" 396 | } 397 | bias_filler { 398 | type: "constant" 399 | value: 0.2 400 | } 401 | } 402 | } 403 | layer { 404 | name: "inception_3b/relu_1x1" 405 | type: "ReLU" 406 | bottom: "inception_3b/1x1" 407 | top: "inception_3b/1x1" 408 | } 409 | layer { 410 | name: "inception_3b/3x3_reduce" 411 | type: "Convolution" 412 | bottom: "inception_3a/output" 413 | top: "inception_3b/3x3_reduce" 414 | param { 415 | lr_mult: 1 416 | decay_mult: 1 417 | } 418 | param { 419 | lr_mult: 2 420 | decay_mult: 0 421 | } 422 | convolution_param { 423 | num_output: 128 424 | kernel_size: 1 425 | weight_filler { 426 | type: "xavier" 427 | } 428 | bias_filler { 429 | type: "constant" 430 | value: 0.2 431 | } 432 | } 433 | } 434 | layer { 435 | name: "inception_3b/relu_3x3_reduce" 436 | type: "ReLU" 437 | bottom: "inception_3b/3x3_reduce" 438 | top: "inception_3b/3x3_reduce" 439 | } 440 | layer { 441 | name: "inception_3b/3x3" 442 | type: "Convolution" 443 | bottom: "inception_3b/3x3_reduce" 444 | top: "inception_3b/3x3" 445 | param { 446 | lr_mult: 1 447 | decay_mult: 1 448 | } 449 | param { 450 | lr_mult: 2 451 | decay_mult: 0 452 | } 453 | convolution_param { 454 | num_output: 192 455 | pad: 1 456 | kernel_size: 3 457 | weight_filler { 458 | type: "xavier" 459 | } 460 | bias_filler { 461 | type: "constant" 462 | value: 0.2 463 | } 464 | } 465 | } 466 | layer { 467 | name: "inception_3b/relu_3x3" 468 | type: "ReLU" 469 | bottom: "inception_3b/3x3" 470 | top: "inception_3b/3x3" 471 | } 472 | layer { 473 | name: "inception_3b/5x5_reduce" 474 | type: "Convolution" 475 | bottom: "inception_3a/output" 476 | top: "inception_3b/5x5_reduce" 477 | param { 478 | lr_mult: 1 479 | decay_mult: 1 480 | } 481 | param { 482 | lr_mult: 2 483 | decay_mult: 0 484 | } 485 | convolution_param { 486 | num_output: 32 487 | kernel_size: 1 488 | weight_filler { 489 | type: "xavier" 490 | } 491 | bias_filler { 492 | type: "constant" 493 | value: 0.2 494 | } 495 | } 496 | } 497 | layer { 498 | name: "inception_3b/relu_5x5_reduce" 499 | type: "ReLU" 500 | bottom: "inception_3b/5x5_reduce" 501 | top: "inception_3b/5x5_reduce" 502 | } 503 | layer { 504 | name: "inception_3b/5x5" 505 | type: "Convolution" 506 | bottom: "inception_3b/5x5_reduce" 507 | top: "inception_3b/5x5" 508 | param { 509 | lr_mult: 1 510 | decay_mult: 1 511 | } 512 | param { 513 | lr_mult: 2 514 | decay_mult: 0 515 | } 516 | convolution_param { 517 | num_output: 96 518 | pad: 2 519 | kernel_size: 5 520 | weight_filler { 521 | type: "xavier" 522 | } 523 | bias_filler { 524 | type: "constant" 525 | value: 0.2 526 | } 527 | } 528 | } 529 | layer { 530 | name: "inception_3b/relu_5x5" 531 | type: "ReLU" 532 | bottom: "inception_3b/5x5" 533 | top: "inception_3b/5x5" 534 | } 535 | layer { 536 | name: "inception_3b/pool" 537 | type: "Pooling" 538 | bottom: "inception_3a/output" 539 | top: "inception_3b/pool" 540 | pooling_param { 541 | pool: MAX 542 | kernel_size: 3 543 | stride: 1 544 | pad: 1 545 | } 546 | } 547 | layer { 548 | name: "inception_3b/pool_proj" 549 | type: "Convolution" 550 | bottom: "inception_3b/pool" 551 | top: "inception_3b/pool_proj" 552 | param { 553 | lr_mult: 1 554 | decay_mult: 1 555 | } 556 | param { 557 | lr_mult: 2 558 | decay_mult: 0 559 | } 560 | convolution_param { 561 | num_output: 64 562 | kernel_size: 1 563 | weight_filler { 564 | type: "xavier" 565 | } 566 | bias_filler { 567 | type: "constant" 568 | value: 0.2 569 | } 570 | } 571 | } 572 | layer { 573 | name: "inception_3b/relu_pool_proj" 574 | type: "ReLU" 575 | bottom: "inception_3b/pool_proj" 576 | top: "inception_3b/pool_proj" 577 | } 578 | layer { 579 | name: "inception_3b/output" 580 | type: "Concat" 581 | bottom: "inception_3b/1x1" 582 | bottom: "inception_3b/3x3" 583 | bottom: "inception_3b/5x5" 584 | bottom: "inception_3b/pool_proj" 585 | top: "inception_3b/output" 586 | } 587 | layer { 588 | name: "pool3/3x3_s2" 589 | type: "Pooling" 590 | bottom: "inception_3b/output" 591 | top: "pool3/3x3_s2" 592 | pooling_param { 593 | pool: MAX 594 | kernel_size: 3 595 | stride: 2 596 | } 597 | } 598 | layer { 599 | name: "inception_4a/1x1" 600 | type: "Convolution" 601 | bottom: "pool3/3x3_s2" 602 | top: "inception_4a/1x1" 603 | param { 604 | lr_mult: 1 605 | decay_mult: 1 606 | } 607 | param { 608 | lr_mult: 2 609 | decay_mult: 0 610 | } 611 | convolution_param { 612 | num_output: 192 613 | kernel_size: 1 614 | weight_filler { 615 | type: "xavier" 616 | } 617 | bias_filler { 618 | type: "constant" 619 | value: 0.2 620 | } 621 | } 622 | } 623 | layer { 624 | name: "inception_4a/relu_1x1" 625 | type: "ReLU" 626 | bottom: "inception_4a/1x1" 627 | top: "inception_4a/1x1" 628 | } 629 | layer { 630 | name: "inception_4a/3x3_reduce" 631 | type: "Convolution" 632 | bottom: "pool3/3x3_s2" 633 | top: "inception_4a/3x3_reduce" 634 | param { 635 | lr_mult: 1 636 | decay_mult: 1 637 | } 638 | param { 639 | lr_mult: 2 640 | decay_mult: 0 641 | } 642 | convolution_param { 643 | num_output: 96 644 | kernel_size: 1 645 | weight_filler { 646 | type: "xavier" 647 | } 648 | bias_filler { 649 | type: "constant" 650 | value: 0.2 651 | } 652 | } 653 | } 654 | layer { 655 | name: "inception_4a/relu_3x3_reduce" 656 | type: "ReLU" 657 | bottom: "inception_4a/3x3_reduce" 658 | top: "inception_4a/3x3_reduce" 659 | } 660 | layer { 661 | name: "inception_4a/3x3" 662 | type: "Convolution" 663 | bottom: "inception_4a/3x3_reduce" 664 | top: "inception_4a/3x3" 665 | param { 666 | lr_mult: 1 667 | decay_mult: 1 668 | } 669 | param { 670 | lr_mult: 2 671 | decay_mult: 0 672 | } 673 | convolution_param { 674 | num_output: 208 675 | pad: 1 676 | kernel_size: 3 677 | weight_filler { 678 | type: "xavier" 679 | } 680 | bias_filler { 681 | type: "constant" 682 | value: 0.2 683 | } 684 | } 685 | } 686 | layer { 687 | name: "inception_4a/relu_3x3" 688 | type: "ReLU" 689 | bottom: "inception_4a/3x3" 690 | top: "inception_4a/3x3" 691 | } 692 | layer { 693 | name: "inception_4a/5x5_reduce" 694 | type: "Convolution" 695 | bottom: "pool3/3x3_s2" 696 | top: "inception_4a/5x5_reduce" 697 | param { 698 | lr_mult: 1 699 | decay_mult: 1 700 | } 701 | param { 702 | lr_mult: 2 703 | decay_mult: 0 704 | } 705 | convolution_param { 706 | num_output: 16 707 | kernel_size: 1 708 | weight_filler { 709 | type: "xavier" 710 | } 711 | bias_filler { 712 | type: "constant" 713 | value: 0.2 714 | } 715 | } 716 | } 717 | layer { 718 | name: "inception_4a/relu_5x5_reduce" 719 | type: "ReLU" 720 | bottom: "inception_4a/5x5_reduce" 721 | top: "inception_4a/5x5_reduce" 722 | } 723 | layer { 724 | name: "inception_4a/5x5" 725 | type: "Convolution" 726 | bottom: "inception_4a/5x5_reduce" 727 | top: "inception_4a/5x5" 728 | param { 729 | lr_mult: 1 730 | decay_mult: 1 731 | } 732 | param { 733 | lr_mult: 2 734 | decay_mult: 0 735 | } 736 | convolution_param { 737 | num_output: 48 738 | pad: 2 739 | kernel_size: 5 740 | weight_filler { 741 | type: "xavier" 742 | } 743 | bias_filler { 744 | type: "constant" 745 | value: 0.2 746 | } 747 | } 748 | } 749 | layer { 750 | name: "inception_4a/relu_5x5" 751 | type: "ReLU" 752 | bottom: "inception_4a/5x5" 753 | top: "inception_4a/5x5" 754 | } 755 | layer { 756 | name: "inception_4a/pool" 757 | type: "Pooling" 758 | bottom: "pool3/3x3_s2" 759 | top: "inception_4a/pool" 760 | pooling_param { 761 | pool: MAX 762 | kernel_size: 3 763 | stride: 1 764 | pad: 1 765 | } 766 | } 767 | layer { 768 | name: "inception_4a/pool_proj" 769 | type: "Convolution" 770 | bottom: "inception_4a/pool" 771 | top: "inception_4a/pool_proj" 772 | param { 773 | lr_mult: 1 774 | decay_mult: 1 775 | } 776 | param { 777 | lr_mult: 2 778 | decay_mult: 0 779 | } 780 | convolution_param { 781 | num_output: 64 782 | kernel_size: 1 783 | weight_filler { 784 | type: "xavier" 785 | } 786 | bias_filler { 787 | type: "constant" 788 | value: 0.2 789 | } 790 | } 791 | } 792 | layer { 793 | name: "inception_4a/relu_pool_proj" 794 | type: "ReLU" 795 | bottom: "inception_4a/pool_proj" 796 | top: "inception_4a/pool_proj" 797 | } 798 | layer { 799 | name: "inception_4a/output" 800 | type: "Concat" 801 | bottom: "inception_4a/1x1" 802 | bottom: "inception_4a/3x3" 803 | bottom: "inception_4a/5x5" 804 | bottom: "inception_4a/pool_proj" 805 | top: "inception_4a/output" 806 | } 807 | layer { 808 | name: "loss1/ave_pool" 809 | type: "Pooling" 810 | bottom: "inception_4a/output" 811 | top: "loss1/ave_pool" 812 | pooling_param { 813 | pool: AVE 814 | kernel_size: 5 815 | stride: 3 816 | } 817 | } 818 | layer { 819 | name: "loss1/conv" 820 | type: "Convolution" 821 | bottom: "loss1/ave_pool" 822 | top: "loss1/conv" 823 | param { 824 | lr_mult: 1 825 | decay_mult: 1 826 | } 827 | param { 828 | lr_mult: 2 829 | decay_mult: 0 830 | } 831 | convolution_param { 832 | num_output: 128 833 | kernel_size: 1 834 | weight_filler { 835 | type: "xavier" 836 | } 837 | bias_filler { 838 | type: "constant" 839 | value: 0.2 840 | } 841 | } 842 | } 843 | layer { 844 | name: "loss1/relu_conv" 845 | type: "ReLU" 846 | bottom: "loss1/conv" 847 | top: "loss1/conv" 848 | } 849 | layer { 850 | name: "loss1/fc" 851 | type: "InnerProduct" 852 | bottom: "loss1/conv" 853 | top: "loss1/fc" 854 | param { 855 | lr_mult: 1 856 | decay_mult: 1 857 | } 858 | param { 859 | lr_mult: 2 860 | decay_mult: 0 861 | } 862 | inner_product_param { 863 | num_output: 1024 864 | weight_filler { 865 | type: "xavier" 866 | } 867 | bias_filler { 868 | type: "constant" 869 | value: 0.2 870 | } 871 | } 872 | } 873 | layer { 874 | name: "loss1/relu_fc" 875 | type: "ReLU" 876 | bottom: "loss1/fc" 877 | top: "loss1/fc" 878 | } 879 | layer { 880 | name: "loss1/drop_fc" 881 | type: "Dropout" 882 | bottom: "loss1/fc" 883 | top: "loss1/fc" 884 | dropout_param { 885 | dropout_ratio: 0.7 886 | } 887 | } 888 | layer { 889 | name: "loss1/classifier" 890 | type: "InnerProduct" 891 | bottom: "loss1/fc" 892 | top: "loss1/classifier" 893 | param { 894 | lr_mult: 1 895 | decay_mult: 1 896 | } 897 | param { 898 | lr_mult: 2 899 | decay_mult: 0 900 | } 901 | inner_product_param { 902 | num_output: {{num_output}} 903 | weight_filler { 904 | type: "xavier" 905 | } 906 | bias_filler { 907 | type: "constant" 908 | value: 0 909 | } 910 | } 911 | } 912 | layer { 913 | name: "loss1/loss" 914 | type: "SoftmaxWithLoss" 915 | bottom: "loss1/classifier" 916 | bottom: "label" 917 | top: "loss1/loss1" 918 | loss_weight: 0.3 919 | } 920 | layer { 921 | name: "loss1/accuracy" 922 | type: "Accuracy" 923 | bottom: "loss1/classifier" 924 | bottom: "label" 925 | top: "loss1/accuracy" 926 | include { 927 | phase: TEST 928 | } 929 | } 930 | layer { 931 | name: "inception_4b/1x1" 932 | type: "Convolution" 933 | bottom: "inception_4a/output" 934 | top: "inception_4b/1x1" 935 | param { 936 | lr_mult: 1 937 | decay_mult: 1 938 | } 939 | param { 940 | lr_mult: 2 941 | decay_mult: 0 942 | } 943 | convolution_param { 944 | num_output: 160 945 | kernel_size: 1 946 | weight_filler { 947 | type: "xavier" 948 | } 949 | bias_filler { 950 | type: "constant" 951 | value: 0.2 952 | } 953 | } 954 | } 955 | layer { 956 | name: "inception_4b/relu_1x1" 957 | type: "ReLU" 958 | bottom: "inception_4b/1x1" 959 | top: "inception_4b/1x1" 960 | } 961 | layer { 962 | name: "inception_4b/3x3_reduce" 963 | type: "Convolution" 964 | bottom: "inception_4a/output" 965 | top: "inception_4b/3x3_reduce" 966 | param { 967 | lr_mult: 1 968 | decay_mult: 1 969 | } 970 | param { 971 | lr_mult: 2 972 | decay_mult: 0 973 | } 974 | convolution_param { 975 | num_output: 112 976 | kernel_size: 1 977 | weight_filler { 978 | type: "xavier" 979 | } 980 | bias_filler { 981 | type: "constant" 982 | value: 0.2 983 | } 984 | } 985 | } 986 | layer { 987 | name: "inception_4b/relu_3x3_reduce" 988 | type: "ReLU" 989 | bottom: "inception_4b/3x3_reduce" 990 | top: "inception_4b/3x3_reduce" 991 | } 992 | layer { 993 | name: "inception_4b/3x3" 994 | type: "Convolution" 995 | bottom: "inception_4b/3x3_reduce" 996 | top: "inception_4b/3x3" 997 | param { 998 | lr_mult: 1 999 | decay_mult: 1 1000 | } 1001 | param { 1002 | lr_mult: 2 1003 | decay_mult: 0 1004 | } 1005 | convolution_param { 1006 | num_output: 224 1007 | pad: 1 1008 | kernel_size: 3 1009 | weight_filler { 1010 | type: "xavier" 1011 | } 1012 | bias_filler { 1013 | type: "constant" 1014 | value: 0.2 1015 | } 1016 | } 1017 | } 1018 | layer { 1019 | name: "inception_4b/relu_3x3" 1020 | type: "ReLU" 1021 | bottom: "inception_4b/3x3" 1022 | top: "inception_4b/3x3" 1023 | } 1024 | layer { 1025 | name: "inception_4b/5x5_reduce" 1026 | type: "Convolution" 1027 | bottom: "inception_4a/output" 1028 | top: "inception_4b/5x5_reduce" 1029 | param { 1030 | lr_mult: 1 1031 | decay_mult: 1 1032 | } 1033 | param { 1034 | lr_mult: 2 1035 | decay_mult: 0 1036 | } 1037 | convolution_param { 1038 | num_output: 24 1039 | kernel_size: 1 1040 | weight_filler { 1041 | type: "xavier" 1042 | } 1043 | bias_filler { 1044 | type: "constant" 1045 | value: 0.2 1046 | } 1047 | } 1048 | } 1049 | layer { 1050 | name: "inception_4b/relu_5x5_reduce" 1051 | type: "ReLU" 1052 | bottom: "inception_4b/5x5_reduce" 1053 | top: "inception_4b/5x5_reduce" 1054 | } 1055 | layer { 1056 | name: "inception_4b/5x5" 1057 | type: "Convolution" 1058 | bottom: "inception_4b/5x5_reduce" 1059 | top: "inception_4b/5x5" 1060 | param { 1061 | lr_mult: 1 1062 | decay_mult: 1 1063 | } 1064 | param { 1065 | lr_mult: 2 1066 | decay_mult: 0 1067 | } 1068 | convolution_param { 1069 | num_output: 64 1070 | pad: 2 1071 | kernel_size: 5 1072 | weight_filler { 1073 | type: "xavier" 1074 | } 1075 | bias_filler { 1076 | type: "constant" 1077 | value: 0.2 1078 | } 1079 | } 1080 | } 1081 | layer { 1082 | name: "inception_4b/relu_5x5" 1083 | type: "ReLU" 1084 | bottom: "inception_4b/5x5" 1085 | top: "inception_4b/5x5" 1086 | } 1087 | layer { 1088 | name: "inception_4b/pool" 1089 | type: "Pooling" 1090 | bottom: "inception_4a/output" 1091 | top: "inception_4b/pool" 1092 | pooling_param { 1093 | pool: MAX 1094 | kernel_size: 3 1095 | stride: 1 1096 | pad: 1 1097 | } 1098 | } 1099 | layer { 1100 | name: "inception_4b/pool_proj" 1101 | type: "Convolution" 1102 | bottom: "inception_4b/pool" 1103 | top: "inception_4b/pool_proj" 1104 | param { 1105 | lr_mult: 1 1106 | decay_mult: 1 1107 | } 1108 | param { 1109 | lr_mult: 2 1110 | decay_mult: 0 1111 | } 1112 | convolution_param { 1113 | num_output: 64 1114 | kernel_size: 1 1115 | weight_filler { 1116 | type: "xavier" 1117 | } 1118 | bias_filler { 1119 | type: "constant" 1120 | value: 0.2 1121 | } 1122 | } 1123 | } 1124 | layer { 1125 | name: "inception_4b/relu_pool_proj" 1126 | type: "ReLU" 1127 | bottom: "inception_4b/pool_proj" 1128 | top: "inception_4b/pool_proj" 1129 | } 1130 | layer { 1131 | name: "inception_4b/output" 1132 | type: "Concat" 1133 | bottom: "inception_4b/1x1" 1134 | bottom: "inception_4b/3x3" 1135 | bottom: "inception_4b/5x5" 1136 | bottom: "inception_4b/pool_proj" 1137 | top: "inception_4b/output" 1138 | } 1139 | layer { 1140 | name: "inception_4c/1x1" 1141 | type: "Convolution" 1142 | bottom: "inception_4b/output" 1143 | top: "inception_4c/1x1" 1144 | param { 1145 | lr_mult: 1 1146 | decay_mult: 1 1147 | } 1148 | param { 1149 | lr_mult: 2 1150 | decay_mult: 0 1151 | } 1152 | convolution_param { 1153 | num_output: 128 1154 | kernel_size: 1 1155 | weight_filler { 1156 | type: "xavier" 1157 | } 1158 | bias_filler { 1159 | type: "constant" 1160 | value: 0.2 1161 | } 1162 | } 1163 | } 1164 | layer { 1165 | name: "inception_4c/relu_1x1" 1166 | type: "ReLU" 1167 | bottom: "inception_4c/1x1" 1168 | top: "inception_4c/1x1" 1169 | } 1170 | layer { 1171 | name: "inception_4c/3x3_reduce" 1172 | type: "Convolution" 1173 | bottom: "inception_4b/output" 1174 | top: "inception_4c/3x3_reduce" 1175 | param { 1176 | lr_mult: 1 1177 | decay_mult: 1 1178 | } 1179 | param { 1180 | lr_mult: 2 1181 | decay_mult: 0 1182 | } 1183 | convolution_param { 1184 | num_output: 128 1185 | kernel_size: 1 1186 | weight_filler { 1187 | type: "xavier" 1188 | } 1189 | bias_filler { 1190 | type: "constant" 1191 | value: 0.2 1192 | } 1193 | } 1194 | } 1195 | layer { 1196 | name: "inception_4c/relu_3x3_reduce" 1197 | type: "ReLU" 1198 | bottom: "inception_4c/3x3_reduce" 1199 | top: "inception_4c/3x3_reduce" 1200 | } 1201 | layer { 1202 | name: "inception_4c/3x3" 1203 | type: "Convolution" 1204 | bottom: "inception_4c/3x3_reduce" 1205 | top: "inception_4c/3x3" 1206 | param { 1207 | lr_mult: 1 1208 | decay_mult: 1 1209 | } 1210 | param { 1211 | lr_mult: 2 1212 | decay_mult: 0 1213 | } 1214 | convolution_param { 1215 | num_output: 256 1216 | pad: 1 1217 | kernel_size: 3 1218 | weight_filler { 1219 | type: "xavier" 1220 | } 1221 | bias_filler { 1222 | type: "constant" 1223 | value: 0.2 1224 | } 1225 | } 1226 | } 1227 | layer { 1228 | name: "inception_4c/relu_3x3" 1229 | type: "ReLU" 1230 | bottom: "inception_4c/3x3" 1231 | top: "inception_4c/3x3" 1232 | } 1233 | layer { 1234 | name: "inception_4c/5x5_reduce" 1235 | type: "Convolution" 1236 | bottom: "inception_4b/output" 1237 | top: "inception_4c/5x5_reduce" 1238 | param { 1239 | lr_mult: 1 1240 | decay_mult: 1 1241 | } 1242 | param { 1243 | lr_mult: 2 1244 | decay_mult: 0 1245 | } 1246 | convolution_param { 1247 | num_output: 24 1248 | kernel_size: 1 1249 | weight_filler { 1250 | type: "xavier" 1251 | } 1252 | bias_filler { 1253 | type: "constant" 1254 | value: 0.2 1255 | } 1256 | } 1257 | } 1258 | layer { 1259 | name: "inception_4c/relu_5x5_reduce" 1260 | type: "ReLU" 1261 | bottom: "inception_4c/5x5_reduce" 1262 | top: "inception_4c/5x5_reduce" 1263 | } 1264 | layer { 1265 | name: "inception_4c/5x5" 1266 | type: "Convolution" 1267 | bottom: "inception_4c/5x5_reduce" 1268 | top: "inception_4c/5x5" 1269 | param { 1270 | lr_mult: 1 1271 | decay_mult: 1 1272 | } 1273 | param { 1274 | lr_mult: 2 1275 | decay_mult: 0 1276 | } 1277 | convolution_param { 1278 | num_output: 64 1279 | pad: 2 1280 | kernel_size: 5 1281 | weight_filler { 1282 | type: "xavier" 1283 | } 1284 | bias_filler { 1285 | type: "constant" 1286 | value: 0.2 1287 | } 1288 | } 1289 | } 1290 | layer { 1291 | name: "inception_4c/relu_5x5" 1292 | type: "ReLU" 1293 | bottom: "inception_4c/5x5" 1294 | top: "inception_4c/5x5" 1295 | } 1296 | layer { 1297 | name: "inception_4c/pool" 1298 | type: "Pooling" 1299 | bottom: "inception_4b/output" 1300 | top: "inception_4c/pool" 1301 | pooling_param { 1302 | pool: MAX 1303 | kernel_size: 3 1304 | stride: 1 1305 | pad: 1 1306 | } 1307 | } 1308 | layer { 1309 | name: "inception_4c/pool_proj" 1310 | type: "Convolution" 1311 | bottom: "inception_4c/pool" 1312 | top: "inception_4c/pool_proj" 1313 | param { 1314 | lr_mult: 1 1315 | decay_mult: 1 1316 | } 1317 | param { 1318 | lr_mult: 2 1319 | decay_mult: 0 1320 | } 1321 | convolution_param { 1322 | num_output: 64 1323 | kernel_size: 1 1324 | weight_filler { 1325 | type: "xavier" 1326 | } 1327 | bias_filler { 1328 | type: "constant" 1329 | value: 0.2 1330 | } 1331 | } 1332 | } 1333 | layer { 1334 | name: "inception_4c/relu_pool_proj" 1335 | type: "ReLU" 1336 | bottom: "inception_4c/pool_proj" 1337 | top: "inception_4c/pool_proj" 1338 | } 1339 | layer { 1340 | name: "inception_4c/output" 1341 | type: "Concat" 1342 | bottom: "inception_4c/1x1" 1343 | bottom: "inception_4c/3x3" 1344 | bottom: "inception_4c/5x5" 1345 | bottom: "inception_4c/pool_proj" 1346 | top: "inception_4c/output" 1347 | } 1348 | layer { 1349 | name: "inception_4d/1x1" 1350 | type: "Convolution" 1351 | bottom: "inception_4c/output" 1352 | top: "inception_4d/1x1" 1353 | param { 1354 | lr_mult: 1 1355 | decay_mult: 1 1356 | } 1357 | param { 1358 | lr_mult: 2 1359 | decay_mult: 0 1360 | } 1361 | convolution_param { 1362 | num_output: 112 1363 | kernel_size: 1 1364 | weight_filler { 1365 | type: "xavier" 1366 | } 1367 | bias_filler { 1368 | type: "constant" 1369 | value: 0.2 1370 | } 1371 | } 1372 | } 1373 | layer { 1374 | name: "inception_4d/relu_1x1" 1375 | type: "ReLU" 1376 | bottom: "inception_4d/1x1" 1377 | top: "inception_4d/1x1" 1378 | } 1379 | layer { 1380 | name: "inception_4d/3x3_reduce" 1381 | type: "Convolution" 1382 | bottom: "inception_4c/output" 1383 | top: "inception_4d/3x3_reduce" 1384 | param { 1385 | lr_mult: 1 1386 | decay_mult: 1 1387 | } 1388 | param { 1389 | lr_mult: 2 1390 | decay_mult: 0 1391 | } 1392 | convolution_param { 1393 | num_output: 144 1394 | kernel_size: 1 1395 | weight_filler { 1396 | type: "xavier" 1397 | } 1398 | bias_filler { 1399 | type: "constant" 1400 | value: 0.2 1401 | } 1402 | } 1403 | } 1404 | layer { 1405 | name: "inception_4d/relu_3x3_reduce" 1406 | type: "ReLU" 1407 | bottom: "inception_4d/3x3_reduce" 1408 | top: "inception_4d/3x3_reduce" 1409 | } 1410 | layer { 1411 | name: "inception_4d/3x3" 1412 | type: "Convolution" 1413 | bottom: "inception_4d/3x3_reduce" 1414 | top: "inception_4d/3x3" 1415 | param { 1416 | lr_mult: 1 1417 | decay_mult: 1 1418 | } 1419 | param { 1420 | lr_mult: 2 1421 | decay_mult: 0 1422 | } 1423 | convolution_param { 1424 | num_output: 288 1425 | pad: 1 1426 | kernel_size: 3 1427 | weight_filler { 1428 | type: "xavier" 1429 | } 1430 | bias_filler { 1431 | type: "constant" 1432 | value: 0.2 1433 | } 1434 | } 1435 | } 1436 | layer { 1437 | name: "inception_4d/relu_3x3" 1438 | type: "ReLU" 1439 | bottom: "inception_4d/3x3" 1440 | top: "inception_4d/3x3" 1441 | } 1442 | layer { 1443 | name: "inception_4d/5x5_reduce" 1444 | type: "Convolution" 1445 | bottom: "inception_4c/output" 1446 | top: "inception_4d/5x5_reduce" 1447 | param { 1448 | lr_mult: 1 1449 | decay_mult: 1 1450 | } 1451 | param { 1452 | lr_mult: 2 1453 | decay_mult: 0 1454 | } 1455 | convolution_param { 1456 | num_output: 32 1457 | kernel_size: 1 1458 | weight_filler { 1459 | type: "xavier" 1460 | } 1461 | bias_filler { 1462 | type: "constant" 1463 | value: 0.2 1464 | } 1465 | } 1466 | } 1467 | layer { 1468 | name: "inception_4d/relu_5x5_reduce" 1469 | type: "ReLU" 1470 | bottom: "inception_4d/5x5_reduce" 1471 | top: "inception_4d/5x5_reduce" 1472 | } 1473 | layer { 1474 | name: "inception_4d/5x5" 1475 | type: "Convolution" 1476 | bottom: "inception_4d/5x5_reduce" 1477 | top: "inception_4d/5x5" 1478 | param { 1479 | lr_mult: 1 1480 | decay_mult: 1 1481 | } 1482 | param { 1483 | lr_mult: 2 1484 | decay_mult: 0 1485 | } 1486 | convolution_param { 1487 | num_output: 64 1488 | pad: 2 1489 | kernel_size: 5 1490 | weight_filler { 1491 | type: "xavier" 1492 | } 1493 | bias_filler { 1494 | type: "constant" 1495 | value: 0.2 1496 | } 1497 | } 1498 | } 1499 | layer { 1500 | name: "inception_4d/relu_5x5" 1501 | type: "ReLU" 1502 | bottom: "inception_4d/5x5" 1503 | top: "inception_4d/5x5" 1504 | } 1505 | layer { 1506 | name: "inception_4d/pool" 1507 | type: "Pooling" 1508 | bottom: "inception_4c/output" 1509 | top: "inception_4d/pool" 1510 | pooling_param { 1511 | pool: MAX 1512 | kernel_size: 3 1513 | stride: 1 1514 | pad: 1 1515 | } 1516 | } 1517 | layer { 1518 | name: "inception_4d/pool_proj" 1519 | type: "Convolution" 1520 | bottom: "inception_4d/pool" 1521 | top: "inception_4d/pool_proj" 1522 | param { 1523 | lr_mult: 1 1524 | decay_mult: 1 1525 | } 1526 | param { 1527 | lr_mult: 2 1528 | decay_mult: 0 1529 | } 1530 | convolution_param { 1531 | num_output: 64 1532 | kernel_size: 1 1533 | weight_filler { 1534 | type: "xavier" 1535 | } 1536 | bias_filler { 1537 | type: "constant" 1538 | value: 0.2 1539 | } 1540 | } 1541 | } 1542 | layer { 1543 | name: "inception_4d/relu_pool_proj" 1544 | type: "ReLU" 1545 | bottom: "inception_4d/pool_proj" 1546 | top: "inception_4d/pool_proj" 1547 | } 1548 | layer { 1549 | name: "inception_4d/output" 1550 | type: "Concat" 1551 | bottom: "inception_4d/1x1" 1552 | bottom: "inception_4d/3x3" 1553 | bottom: "inception_4d/5x5" 1554 | bottom: "inception_4d/pool_proj" 1555 | top: "inception_4d/output" 1556 | } 1557 | layer { 1558 | name: "loss2/ave_pool" 1559 | type: "Pooling" 1560 | bottom: "inception_4d/output" 1561 | top: "loss2/ave_pool" 1562 | pooling_param { 1563 | pool: AVE 1564 | kernel_size: 5 1565 | stride: 3 1566 | } 1567 | } 1568 | layer { 1569 | name: "loss2/conv" 1570 | type: "Convolution" 1571 | bottom: "loss2/ave_pool" 1572 | top: "loss2/conv" 1573 | param { 1574 | lr_mult: 1 1575 | decay_mult: 1 1576 | } 1577 | param { 1578 | lr_mult: 2 1579 | decay_mult: 0 1580 | } 1581 | convolution_param { 1582 | num_output: 128 1583 | kernel_size: 1 1584 | weight_filler { 1585 | type: "xavier" 1586 | } 1587 | bias_filler { 1588 | type: "constant" 1589 | value: 0.2 1590 | } 1591 | } 1592 | } 1593 | layer { 1594 | name: "loss2/relu_conv" 1595 | type: "ReLU" 1596 | bottom: "loss2/conv" 1597 | top: "loss2/conv" 1598 | } 1599 | layer { 1600 | name: "loss2/fc" 1601 | type: "InnerProduct" 1602 | bottom: "loss2/conv" 1603 | top: "loss2/fc" 1604 | param { 1605 | lr_mult: 1 1606 | decay_mult: 1 1607 | } 1608 | param { 1609 | lr_mult: 2 1610 | decay_mult: 0 1611 | } 1612 | inner_product_param { 1613 | num_output: 1024 1614 | weight_filler { 1615 | type: "xavier" 1616 | } 1617 | bias_filler { 1618 | type: "constant" 1619 | value: 0.2 1620 | } 1621 | } 1622 | } 1623 | layer { 1624 | name: "loss2/relu_fc" 1625 | type: "ReLU" 1626 | bottom: "loss2/fc" 1627 | top: "loss2/fc" 1628 | } 1629 | layer { 1630 | name: "loss2/drop_fc" 1631 | type: "Dropout" 1632 | bottom: "loss2/fc" 1633 | top: "loss2/fc" 1634 | dropout_param { 1635 | dropout_ratio: 0.7 1636 | } 1637 | } 1638 | layer { 1639 | name: "loss2/classifier" 1640 | type: "InnerProduct" 1641 | bottom: "loss2/fc" 1642 | top: "loss2/classifier" 1643 | param { 1644 | lr_mult: 1 1645 | decay_mult: 1 1646 | } 1647 | param { 1648 | lr_mult: 2 1649 | decay_mult: 0 1650 | } 1651 | inner_product_param { 1652 | num_output: {{num_output}} 1653 | weight_filler { 1654 | type: "xavier" 1655 | } 1656 | bias_filler { 1657 | type: "constant" 1658 | value: 0 1659 | } 1660 | } 1661 | } 1662 | layer { 1663 | name: "loss2/loss" 1664 | type: "SoftmaxWithLoss" 1665 | bottom: "loss2/classifier" 1666 | bottom: "label" 1667 | top: "loss2/loss1" 1668 | loss_weight: 0.3 1669 | } 1670 | layer { 1671 | name: "loss2/accuracy" 1672 | type: "Accuracy" 1673 | bottom: "loss2/classifier" 1674 | bottom: "label" 1675 | top: "loss2/accuracy" 1676 | include { 1677 | phase: TEST 1678 | } 1679 | } 1680 | layer { 1681 | name: "inception_4e/1x1" 1682 | type: "Convolution" 1683 | bottom: "inception_4d/output" 1684 | top: "inception_4e/1x1" 1685 | param { 1686 | lr_mult: 1 1687 | decay_mult: 1 1688 | } 1689 | param { 1690 | lr_mult: 2 1691 | decay_mult: 0 1692 | } 1693 | convolution_param { 1694 | num_output: 256 1695 | kernel_size: 1 1696 | weight_filler { 1697 | type: "xavier" 1698 | } 1699 | bias_filler { 1700 | type: "constant" 1701 | value: 0.2 1702 | } 1703 | } 1704 | } 1705 | layer { 1706 | name: "inception_4e/relu_1x1" 1707 | type: "ReLU" 1708 | bottom: "inception_4e/1x1" 1709 | top: "inception_4e/1x1" 1710 | } 1711 | layer { 1712 | name: "inception_4e/3x3_reduce" 1713 | type: "Convolution" 1714 | bottom: "inception_4d/output" 1715 | top: "inception_4e/3x3_reduce" 1716 | param { 1717 | lr_mult: 1 1718 | decay_mult: 1 1719 | } 1720 | param { 1721 | lr_mult: 2 1722 | decay_mult: 0 1723 | } 1724 | convolution_param { 1725 | num_output: 160 1726 | kernel_size: 1 1727 | weight_filler { 1728 | type: "xavier" 1729 | } 1730 | bias_filler { 1731 | type: "constant" 1732 | value: 0.2 1733 | } 1734 | } 1735 | } 1736 | layer { 1737 | name: "inception_4e/relu_3x3_reduce" 1738 | type: "ReLU" 1739 | bottom: "inception_4e/3x3_reduce" 1740 | top: "inception_4e/3x3_reduce" 1741 | } 1742 | layer { 1743 | name: "inception_4e/3x3" 1744 | type: "Convolution" 1745 | bottom: "inception_4e/3x3_reduce" 1746 | top: "inception_4e/3x3" 1747 | param { 1748 | lr_mult: 1 1749 | decay_mult: 1 1750 | } 1751 | param { 1752 | lr_mult: 2 1753 | decay_mult: 0 1754 | } 1755 | convolution_param { 1756 | num_output: 320 1757 | pad: 1 1758 | kernel_size: 3 1759 | weight_filler { 1760 | type: "xavier" 1761 | } 1762 | bias_filler { 1763 | type: "constant" 1764 | value: 0.2 1765 | } 1766 | } 1767 | } 1768 | layer { 1769 | name: "inception_4e/relu_3x3" 1770 | type: "ReLU" 1771 | bottom: "inception_4e/3x3" 1772 | top: "inception_4e/3x3" 1773 | } 1774 | layer { 1775 | name: "inception_4e/5x5_reduce" 1776 | type: "Convolution" 1777 | bottom: "inception_4d/output" 1778 | top: "inception_4e/5x5_reduce" 1779 | param { 1780 | lr_mult: 1 1781 | decay_mult: 1 1782 | } 1783 | param { 1784 | lr_mult: 2 1785 | decay_mult: 0 1786 | } 1787 | convolution_param { 1788 | num_output: 32 1789 | kernel_size: 1 1790 | weight_filler { 1791 | type: "xavier" 1792 | } 1793 | bias_filler { 1794 | type: "constant" 1795 | value: 0.2 1796 | } 1797 | } 1798 | } 1799 | layer { 1800 | name: "inception_4e/relu_5x5_reduce" 1801 | type: "ReLU" 1802 | bottom: "inception_4e/5x5_reduce" 1803 | top: "inception_4e/5x5_reduce" 1804 | } 1805 | layer { 1806 | name: "inception_4e/5x5" 1807 | type: "Convolution" 1808 | bottom: "inception_4e/5x5_reduce" 1809 | top: "inception_4e/5x5" 1810 | param { 1811 | lr_mult: 1 1812 | decay_mult: 1 1813 | } 1814 | param { 1815 | lr_mult: 2 1816 | decay_mult: 0 1817 | } 1818 | convolution_param { 1819 | num_output: 128 1820 | pad: 2 1821 | kernel_size: 5 1822 | weight_filler { 1823 | type: "xavier" 1824 | } 1825 | bias_filler { 1826 | type: "constant" 1827 | value: 0.2 1828 | } 1829 | } 1830 | } 1831 | layer { 1832 | name: "inception_4e/relu_5x5" 1833 | type: "ReLU" 1834 | bottom: "inception_4e/5x5" 1835 | top: "inception_4e/5x5" 1836 | } 1837 | layer { 1838 | name: "inception_4e/pool" 1839 | type: "Pooling" 1840 | bottom: "inception_4d/output" 1841 | top: "inception_4e/pool" 1842 | pooling_param { 1843 | pool: MAX 1844 | kernel_size: 3 1845 | stride: 1 1846 | pad: 1 1847 | } 1848 | } 1849 | layer { 1850 | name: "inception_4e/pool_proj" 1851 | type: "Convolution" 1852 | bottom: "inception_4e/pool" 1853 | top: "inception_4e/pool_proj" 1854 | param { 1855 | lr_mult: 1 1856 | decay_mult: 1 1857 | } 1858 | param { 1859 | lr_mult: 2 1860 | decay_mult: 0 1861 | } 1862 | convolution_param { 1863 | num_output: 128 1864 | kernel_size: 1 1865 | weight_filler { 1866 | type: "xavier" 1867 | } 1868 | bias_filler { 1869 | type: "constant" 1870 | value: 0.2 1871 | } 1872 | } 1873 | } 1874 | layer { 1875 | name: "inception_4e/relu_pool_proj" 1876 | type: "ReLU" 1877 | bottom: "inception_4e/pool_proj" 1878 | top: "inception_4e/pool_proj" 1879 | } 1880 | layer { 1881 | name: "inception_4e/output" 1882 | type: "Concat" 1883 | bottom: "inception_4e/1x1" 1884 | bottom: "inception_4e/3x3" 1885 | bottom: "inception_4e/5x5" 1886 | bottom: "inception_4e/pool_proj" 1887 | top: "inception_4e/output" 1888 | } 1889 | layer { 1890 | name: "pool4/3x3_s2" 1891 | type: "Pooling" 1892 | bottom: "inception_4e/output" 1893 | top: "pool4/3x3_s2" 1894 | pooling_param { 1895 | pool: MAX 1896 | kernel_size: 3 1897 | stride: 2 1898 | } 1899 | } 1900 | layer { 1901 | name: "inception_5a/1x1" 1902 | type: "Convolution" 1903 | bottom: "pool4/3x3_s2" 1904 | top: "inception_5a/1x1" 1905 | param { 1906 | lr_mult: 1 1907 | decay_mult: 1 1908 | } 1909 | param { 1910 | lr_mult: 2 1911 | decay_mult: 0 1912 | } 1913 | convolution_param { 1914 | num_output: 256 1915 | kernel_size: 1 1916 | weight_filler { 1917 | type: "xavier" 1918 | } 1919 | bias_filler { 1920 | type: "constant" 1921 | value: 0.2 1922 | } 1923 | } 1924 | } 1925 | layer { 1926 | name: "inception_5a/relu_1x1" 1927 | type: "ReLU" 1928 | bottom: "inception_5a/1x1" 1929 | top: "inception_5a/1x1" 1930 | } 1931 | layer { 1932 | name: "inception_5a/3x3_reduce" 1933 | type: "Convolution" 1934 | bottom: "pool4/3x3_s2" 1935 | top: "inception_5a/3x3_reduce" 1936 | param { 1937 | lr_mult: 1 1938 | decay_mult: 1 1939 | } 1940 | param { 1941 | lr_mult: 2 1942 | decay_mult: 0 1943 | } 1944 | convolution_param { 1945 | num_output: 160 1946 | kernel_size: 1 1947 | weight_filler { 1948 | type: "xavier" 1949 | } 1950 | bias_filler { 1951 | type: "constant" 1952 | value: 0.2 1953 | } 1954 | } 1955 | } 1956 | layer { 1957 | name: "inception_5a/relu_3x3_reduce" 1958 | type: "ReLU" 1959 | bottom: "inception_5a/3x3_reduce" 1960 | top: "inception_5a/3x3_reduce" 1961 | } 1962 | layer { 1963 | name: "inception_5a/3x3" 1964 | type: "Convolution" 1965 | bottom: "inception_5a/3x3_reduce" 1966 | top: "inception_5a/3x3" 1967 | param { 1968 | lr_mult: 1 1969 | decay_mult: 1 1970 | } 1971 | param { 1972 | lr_mult: 2 1973 | decay_mult: 0 1974 | } 1975 | convolution_param { 1976 | num_output: 320 1977 | pad: 1 1978 | kernel_size: 3 1979 | weight_filler { 1980 | type: "xavier" 1981 | } 1982 | bias_filler { 1983 | type: "constant" 1984 | value: 0.2 1985 | } 1986 | } 1987 | } 1988 | layer { 1989 | name: "inception_5a/relu_3x3" 1990 | type: "ReLU" 1991 | bottom: "inception_5a/3x3" 1992 | top: "inception_5a/3x3" 1993 | } 1994 | layer { 1995 | name: "inception_5a/5x5_reduce" 1996 | type: "Convolution" 1997 | bottom: "pool4/3x3_s2" 1998 | top: "inception_5a/5x5_reduce" 1999 | param { 2000 | lr_mult: 1 2001 | decay_mult: 1 2002 | } 2003 | param { 2004 | lr_mult: 2 2005 | decay_mult: 0 2006 | } 2007 | convolution_param { 2008 | num_output: 32 2009 | kernel_size: 1 2010 | weight_filler { 2011 | type: "xavier" 2012 | } 2013 | bias_filler { 2014 | type: "constant" 2015 | value: 0.2 2016 | } 2017 | } 2018 | } 2019 | layer { 2020 | name: "inception_5a/relu_5x5_reduce" 2021 | type: "ReLU" 2022 | bottom: "inception_5a/5x5_reduce" 2023 | top: "inception_5a/5x5_reduce" 2024 | } 2025 | layer { 2026 | name: "inception_5a/5x5" 2027 | type: "Convolution" 2028 | bottom: "inception_5a/5x5_reduce" 2029 | top: "inception_5a/5x5" 2030 | param { 2031 | lr_mult: 1 2032 | decay_mult: 1 2033 | } 2034 | param { 2035 | lr_mult: 2 2036 | decay_mult: 0 2037 | } 2038 | convolution_param { 2039 | num_output: 128 2040 | pad: 2 2041 | kernel_size: 5 2042 | weight_filler { 2043 | type: "xavier" 2044 | } 2045 | bias_filler { 2046 | type: "constant" 2047 | value: 0.2 2048 | } 2049 | } 2050 | } 2051 | layer { 2052 | name: "inception_5a/relu_5x5" 2053 | type: "ReLU" 2054 | bottom: "inception_5a/5x5" 2055 | top: "inception_5a/5x5" 2056 | } 2057 | layer { 2058 | name: "inception_5a/pool" 2059 | type: "Pooling" 2060 | bottom: "pool4/3x3_s2" 2061 | top: "inception_5a/pool" 2062 | pooling_param { 2063 | pool: MAX 2064 | kernel_size: 3 2065 | stride: 1 2066 | pad: 1 2067 | } 2068 | } 2069 | layer { 2070 | name: "inception_5a/pool_proj" 2071 | type: "Convolution" 2072 | bottom: "inception_5a/pool" 2073 | top: "inception_5a/pool_proj" 2074 | param { 2075 | lr_mult: 1 2076 | decay_mult: 1 2077 | } 2078 | param { 2079 | lr_mult: 2 2080 | decay_mult: 0 2081 | } 2082 | convolution_param { 2083 | num_output: 128 2084 | kernel_size: 1 2085 | weight_filler { 2086 | type: "xavier" 2087 | } 2088 | bias_filler { 2089 | type: "constant" 2090 | value: 0.2 2091 | } 2092 | } 2093 | } 2094 | layer { 2095 | name: "inception_5a/relu_pool_proj" 2096 | type: "ReLU" 2097 | bottom: "inception_5a/pool_proj" 2098 | top: "inception_5a/pool_proj" 2099 | } 2100 | layer { 2101 | name: "inception_5a/output" 2102 | type: "Concat" 2103 | bottom: "inception_5a/1x1" 2104 | bottom: "inception_5a/3x3" 2105 | bottom: "inception_5a/5x5" 2106 | bottom: "inception_5a/pool_proj" 2107 | top: "inception_5a/output" 2108 | } 2109 | layer { 2110 | name: "inception_5b/1x1" 2111 | type: "Convolution" 2112 | bottom: "inception_5a/output" 2113 | top: "inception_5b/1x1" 2114 | param { 2115 | lr_mult: 1 2116 | decay_mult: 1 2117 | } 2118 | param { 2119 | lr_mult: 2 2120 | decay_mult: 0 2121 | } 2122 | convolution_param { 2123 | num_output: 384 2124 | kernel_size: 1 2125 | weight_filler { 2126 | type: "xavier" 2127 | } 2128 | bias_filler { 2129 | type: "constant" 2130 | value: 0.2 2131 | } 2132 | } 2133 | } 2134 | layer { 2135 | name: "inception_5b/relu_1x1" 2136 | type: "ReLU" 2137 | bottom: "inception_5b/1x1" 2138 | top: "inception_5b/1x1" 2139 | } 2140 | layer { 2141 | name: "inception_5b/3x3_reduce" 2142 | type: "Convolution" 2143 | bottom: "inception_5a/output" 2144 | top: "inception_5b/3x3_reduce" 2145 | param { 2146 | lr_mult: 1 2147 | decay_mult: 1 2148 | } 2149 | param { 2150 | lr_mult: 2 2151 | decay_mult: 0 2152 | } 2153 | convolution_param { 2154 | num_output: 192 2155 | kernel_size: 1 2156 | weight_filler { 2157 | type: "xavier" 2158 | } 2159 | bias_filler { 2160 | type: "constant" 2161 | value: 0.2 2162 | } 2163 | } 2164 | } 2165 | layer { 2166 | name: "inception_5b/relu_3x3_reduce" 2167 | type: "ReLU" 2168 | bottom: "inception_5b/3x3_reduce" 2169 | top: "inception_5b/3x3_reduce" 2170 | } 2171 | layer { 2172 | name: "inception_5b/3x3" 2173 | type: "Convolution" 2174 | bottom: "inception_5b/3x3_reduce" 2175 | top: "inception_5b/3x3" 2176 | param { 2177 | lr_mult: 1 2178 | decay_mult: 1 2179 | } 2180 | param { 2181 | lr_mult: 2 2182 | decay_mult: 0 2183 | } 2184 | convolution_param { 2185 | num_output: 384 2186 | pad: 1 2187 | kernel_size: 3 2188 | weight_filler { 2189 | type: "xavier" 2190 | } 2191 | bias_filler { 2192 | type: "constant" 2193 | value: 0.2 2194 | } 2195 | } 2196 | } 2197 | layer { 2198 | name: "inception_5b/relu_3x3" 2199 | type: "ReLU" 2200 | bottom: "inception_5b/3x3" 2201 | top: "inception_5b/3x3" 2202 | } 2203 | layer { 2204 | name: "inception_5b/5x5_reduce" 2205 | type: "Convolution" 2206 | bottom: "inception_5a/output" 2207 | top: "inception_5b/5x5_reduce" 2208 | param { 2209 | lr_mult: 1 2210 | decay_mult: 1 2211 | } 2212 | param { 2213 | lr_mult: 2 2214 | decay_mult: 0 2215 | } 2216 | convolution_param { 2217 | num_output: 48 2218 | kernel_size: 1 2219 | weight_filler { 2220 | type: "xavier" 2221 | } 2222 | bias_filler { 2223 | type: "constant" 2224 | value: 0.2 2225 | } 2226 | } 2227 | } 2228 | layer { 2229 | name: "inception_5b/relu_5x5_reduce" 2230 | type: "ReLU" 2231 | bottom: "inception_5b/5x5_reduce" 2232 | top: "inception_5b/5x5_reduce" 2233 | } 2234 | layer { 2235 | name: "inception_5b/5x5" 2236 | type: "Convolution" 2237 | bottom: "inception_5b/5x5_reduce" 2238 | top: "inception_5b/5x5" 2239 | param { 2240 | lr_mult: 1 2241 | decay_mult: 1 2242 | } 2243 | param { 2244 | lr_mult: 2 2245 | decay_mult: 0 2246 | } 2247 | convolution_param { 2248 | num_output: 128 2249 | pad: 2 2250 | kernel_size: 5 2251 | weight_filler { 2252 | type: "xavier" 2253 | } 2254 | bias_filler { 2255 | type: "constant" 2256 | value: 0.2 2257 | } 2258 | } 2259 | } 2260 | layer { 2261 | name: "inception_5b/relu_5x5" 2262 | type: "ReLU" 2263 | bottom: "inception_5b/5x5" 2264 | top: "inception_5b/5x5" 2265 | } 2266 | layer { 2267 | name: "inception_5b/pool" 2268 | type: "Pooling" 2269 | bottom: "inception_5a/output" 2270 | top: "inception_5b/pool" 2271 | pooling_param { 2272 | pool: MAX 2273 | kernel_size: 3 2274 | stride: 1 2275 | pad: 1 2276 | } 2277 | } 2278 | layer { 2279 | name: "inception_5b/pool_proj" 2280 | type: "Convolution" 2281 | bottom: "inception_5b/pool" 2282 | top: "inception_5b/pool_proj" 2283 | param { 2284 | lr_mult: 1 2285 | decay_mult: 1 2286 | } 2287 | param { 2288 | lr_mult: 2 2289 | decay_mult: 0 2290 | } 2291 | convolution_param { 2292 | num_output: 128 2293 | kernel_size: 1 2294 | weight_filler { 2295 | type: "xavier" 2296 | } 2297 | bias_filler { 2298 | type: "constant" 2299 | value: 0.2 2300 | } 2301 | } 2302 | } 2303 | layer { 2304 | name: "inception_5b/relu_pool_proj" 2305 | type: "ReLU" 2306 | bottom: "inception_5b/pool_proj" 2307 | top: "inception_5b/pool_proj" 2308 | } 2309 | layer { 2310 | name: "inception_5b/output" 2311 | type: "Concat" 2312 | bottom: "inception_5b/1x1" 2313 | bottom: "inception_5b/3x3" 2314 | bottom: "inception_5b/5x5" 2315 | bottom: "inception_5b/pool_proj" 2316 | top: "inception_5b/output" 2317 | } 2318 | layer { 2319 | name: "pool5/7x7_s1" 2320 | type: "Pooling" 2321 | bottom: "inception_5b/output" 2322 | top: "pool5/7x7_s1" 2323 | pooling_param { 2324 | pool: AVE 2325 | kernel_size: 7 2326 | stride: 1 2327 | } 2328 | } 2329 | layer { 2330 | name: "pool5/drop_7x7_s1" 2331 | type: "Dropout" 2332 | bottom: "pool5/7x7_s1" 2333 | top: "pool5/7x7_s1" 2334 | dropout_param { 2335 | dropout_ratio: 0.4 2336 | } 2337 | } 2338 | layer { 2339 | name: "loss3/classifier" 2340 | type: "InnerProduct" 2341 | bottom: "pool5/7x7_s1" 2342 | top: "loss3/classifier" 2343 | param { 2344 | lr_mult: 1 2345 | decay_mult: 1 2346 | } 2347 | param { 2348 | lr_mult: 2 2349 | decay_mult: 0 2350 | } 2351 | inner_product_param { 2352 | num_output: {{num_output}} 2353 | weight_filler { 2354 | type: "xavier" 2355 | } 2356 | bias_filler { 2357 | type: "constant" 2358 | value: 0 2359 | } 2360 | } 2361 | } 2362 | layer { 2363 | name: "loss3/loss3" 2364 | type: "SoftmaxWithLoss" 2365 | bottom: "loss3/classifier" 2366 | bottom: "label" 2367 | top: "loss3/loss3" 2368 | loss_weight: 1 2369 | } 2370 | layer { 2371 | name: "loss3/accuracy" 2372 | type: "Accuracy" 2373 | bottom: "loss3/classifier" 2374 | bottom: "label" 2375 | top: "loss3/accuracy" 2376 | include { 2377 | phase: TEST 2378 | } 2379 | } 2380 | -------------------------------------------------------------------------------- /templates/googlenet/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | export GLOG_log_dir=log 4 | export GLOG_logtostderr=1 5 | 6 | CAFFE=caffe 7 | 8 | mkdir -p log snapshots 9 | 10 | SNAP=$1 11 | if [ -z "$SNAP" ] 12 | then 13 | $CAFFE train --solver quick_solver.prototxt $* 14 | else 15 | shift 16 | $CAFFE train -solver quick_solver.prototxt -snapshot $SNAP $* 17 | fi 18 | 19 | -------------------------------------------------------------------------------- /test_python.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define timer timer_for_boost_progress_t 5 | #include 6 | #undef timer 7 | #include 8 | #include 9 | #include 10 | #include "xnn.h" 11 | 12 | using namespace std; 13 | using namespace boost; 14 | 15 | int main( int argc, char ** argv ) { 16 | namespace po = boost::program_options; 17 | static int batch; 18 | static int loop; 19 | 20 | po::options_description desc("Allowed options"); 21 | desc.add_options() 22 | ("help,h", "produce help message.") 23 | ("batch", po::value(&batch)->default_value(64), "") 24 | ("loop", po::value(&loop)->default_value(64), "") 25 | ; 26 | 27 | po::positional_options_description p; 28 | 29 | po::variables_map vm; 30 | po::store(po::command_line_parser(argc, argv). 31 | options(desc).positional(p).run(), vm); 32 | po::notify(vm); 33 | 34 | if (vm.count("help")) { 35 | cerr << desc; 36 | return 1; 37 | } 38 | 39 | cv::Mat image = cv::imread("a.jpg", CV_LOAD_IMAGE_GRAYSCALE); 40 | cv::resize(image, image, cv::Size(256,256)); 41 | { 42 | cv::Mat tmp; 43 | image.convertTo(tmp, CV_32F); 44 | image = tmp; 45 | 46 | } 47 | CHECK(image.channels() == 1); 48 | vector output; 49 | xnn::Model *model = xnn::Model::create_python("models/wv1", batch); 50 | CHECK(model); 51 | { 52 | cerr << "Ramp up with initial batch"; 53 | { 54 | vector images(batch, image); 55 | model->apply(images, &output); 56 | } 57 | boost::timer::auto_cpu_timer t; 58 | progress_display progress(loop, cerr); 59 | struct mallinfo mbefore = mallinfo(); 60 | for (int i = 0; i < loop; ++i) { 61 | vector images(batch, image); 62 | model->apply(images, &output); 63 | ++progress; 64 | } 65 | struct mallinfo mafter = mallinfo(); 66 | cerr << "Finished processing " << batch * loop << " images." << endl; 67 | cerr << "leak: " << mafter.uordblks - mbefore.uordblks << endl; 68 | } 69 | CHECK(output.size() > image.total()); 70 | cv::Mat label(image.rows, image.cols, CV_32F, &output[0]); 71 | label *= 255; 72 | cv::imwrite("xxx.jpg", label); 73 | delete model; 74 | return 0; 75 | } 76 | 77 | -------------------------------------------------------------------------------- /torch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "xnn.h" 7 | 8 | /* How to use Torch model 9 | 10 | To build, link against -lTH -lluaT -lluajit -llapack -lopenblas 11 | The TH library is here: https://github.com/torch/TH 12 | You should get the rest when you installed torch. 13 | 14 | Prepare a "lua_config" file containing the following content; 15 | this will be called to initialize the torch environment and load 16 | the model. 17 | 18 | require 'torch' 19 | require 'nn' 20 | require 'dpnn' 21 | 22 | io.stdout:setvbuf 'no' 23 | torch.setdefaulttensortype('torch.FloatTensor') 24 | 25 | require 'torch' 26 | require 'nn' 27 | require 'dpnn' 28 | 29 | io.stdout:setvbuf 'no' 30 | torch.setdefaulttensortype('torch.FloatTensor') 31 | 32 | torch.setnumthreads(1) -- In server we usually want to run single-threaded 33 | -- so the server itself can run multiple threads 34 | net = nil· 35 | 36 | function setup (model) -- this function will be called by C++ constructor 37 | net = torch.load(model) 38 | net:evaluate() 39 | end 40 | 41 | function apply (input) 42 | return net:forward(input) 43 | end 44 | */ 45 | 46 | namespace xnn { 47 | 48 | using std::string; 49 | using std::vector; 50 | 51 | class TorchModel: public Model { 52 | lua_State *state; 53 | public: 54 | TorchModel (fs::path const &lua_config, fs::path const& model_path, int batch, int H, int W, int C) 55 | : state(luaL_newstate()) 56 | // we are taking in input tensor shape here 57 | // will have to change code to get the shape from the lua_config 58 | { 59 | BOOST_VERIFY(batch >= 1); 60 | if (!state) throw std::runtime_error("failed to initialize Lua"); 61 | luaL_openlibs(state); 62 | int result = luaL_loadfile(state, lua_config.c_str()); 63 | if (result != 0) throw std::runtime_error("failed to load openface server"); 64 | result = lua_pcall(state, 0, LUA_MULTRET, 0); 65 | if (result != 0) throw std::runtime_error("failed to run openface server"); 66 | // call lua's setup with model path 67 | lua_getglobal(state, "setup"); 68 | lua_pushstring(state, model_path.c_str()); 69 | if (lua_pcall(state, 1, 0, 0) != 0) { 70 | throw std::runtime_error("fail to extract"); 71 | } 72 | 73 | shape[0] = batch; 74 | shape[1] = C; 75 | shape[2] = H; 76 | shape[3] = W; 77 | } 78 | 79 | ~TorchModel () { 80 | lua_close(state); 81 | } 82 | 83 | // The images will form a batch. the elements of the output tensor 84 | // will be densely added to the ft vector. 85 | virtual void apply (vector const &images, vector *ft) { 86 | // lua will take care of memory release 87 | THFloatTensor *itensor = THFloatTensor_newWithSize4d(shape[0], shape[1], shape[2], shape[3]); 88 | if (!THFloatTensor_isContiguous(itensor)) { 89 | throw std::runtime_error("Torch tensor is not contiguous."); 90 | } 91 | float *origin = THFloatTensor_data(itensor); 92 | // pre-process images and load data to buffer 93 | float *e = preprocess(images, &origin); 94 | 95 | // call "apply" in lua 96 | lua_getglobal(state, "apply"); 97 | luaT_pushudata(state, itensor, "torch.FloatTensor"); 98 | if (lua_pcall(state, 1, 1, 0) != 0) { 99 | throw std::runtime_error("fail to extract"); 100 | } 101 | THFloatTensor const*otensor = reinterpret_cast(luaT_toudata(state, -1, "torch.FloatTensor")); 102 | if (!THFloatTensor_isContiguous(otensor)) { 103 | throw std::runtime_error("Torch output tensor is not contiguous."); 104 | } 105 | long dim = THFloatTensor_nDimension(otensor); 106 | // get output size 107 | size_t sz = 1; 108 | for (long i = 0; i < dim; ++i) { 109 | sz *= THFloatTensor_size(otensor, i); 110 | } 111 | 112 | float const *ptr = THFloatTensor_data(otensor); 113 | ft->resize(sz); 114 | for (long i = 0; i < sz; ++i) { 115 | ft->at(i) = ptr[i]; 116 | } 117 | lua_pop(state, 1); 118 | } 119 | }; 120 | 121 | Model *create_torch (fs::path const &lua_config, int batch, int H, int W, int C) { 122 | return new TorchModel(lua_config, batch, H, W, C); 123 | } 124 | 125 | } 126 | 127 | 128 | -------------------------------------------------------------------------------- /train-caffe-fcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import re 5 | import glob 6 | import shutil 7 | import logging 8 | import subprocess 9 | import argparse 10 | import xnn_train 11 | 12 | logging.basicConfig(level=logging.DEBUG) 13 | 14 | parser = xnn_train.basic_args() 15 | 16 | args = parser.parse_args() 17 | 18 | db = os.path.abspath(args.db[0]) 19 | mixin = args.mixin 20 | if mixin: 21 | mixin = os.path.abspath(mixin) 22 | ws = args.ws[0] 23 | 24 | if os.path.exists(ws): 25 | logging.error("%s already exists" % ws) 26 | sys.exit(1) 27 | 28 | params = { 29 | "template": args.template[0], 30 | "channels": args.channels, 31 | "db_path": db, 32 | "split": args.split, 33 | "split_fold": args.fold, 34 | "num_output": 2, 35 | "display_interval": 1000, 36 | "snapshot_interval": args.snapshot, 37 | "max_iter": args.it, 38 | "device": "GPU", 39 | "annotate": "json", 40 | "mixin": mixin, 41 | "mixin_group_delta": 1, 42 | "anno_min_ratio": args.anno_min_ratio 43 | } 44 | 45 | xnn_train.prepare_ws_chdir(ws, params) 46 | 47 | subprocess.check_call("./train.sh 2>&1 | tee train.log", shell=True) 48 | 49 | # evaluation 50 | xnn_train.caffe_eval_fcn() 51 | 52 | 53 | -------------------------------------------------------------------------------- /visualize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "xnn.h" 7 | 8 | 9 | using namespace std; 10 | using namespace boost; 11 | 12 | int main(int argc, char **argv) { 13 | namespace po = boost::program_options; 14 | string model; 15 | string ipath; 16 | string opath; 17 | float b_th; 18 | float b_keep; 19 | int mode; 20 | 21 | po::options_description desc("Allowed options"); 22 | desc.add_options() 23 | ("help,h", "produce help message.") 24 | ("model", po::value(&model)->default_value("model"), "") 25 | ("input", po::value(&ipath), "") 26 | ("output", po::value(&opath), "") 27 | ("th", po::value(&b_th)->default_value(0.05), "") 28 | ("keep", po::value(&b_keep)->default_value(0.95), "") 29 | ("mode", po::value(&mode)->default_value(0), "") 30 | ; 31 | 32 | 33 | po::positional_options_description p; 34 | p.add("input", 1); 35 | p.add("output", 1); 36 | 37 | po::variables_map vm; 38 | po::store(po::command_line_parser(argc, argv). 39 | options(desc).positional(p).run(), vm); 40 | po::notify(vm); 41 | 42 | if (vm.count("help") || ipath.empty()) { 43 | cerr << desc; 44 | return 1; 45 | } 46 | 47 | xnn::Model::set_mode(mode); 48 | unique_ptr det(xnn::Model::create(model)); 49 | CHECK(det->fcn()); 50 | cv::Mat ret; 51 | cv::Mat input = cv::imread(ipath, CV_LOAD_IMAGE_COLOR); 52 | BOOST_VERIFY(input.data); 53 | vector resp; 54 | det->apply(vector{input}, &resp); 55 | CHECK(resp.size() == input.total() * 2) << resp.size() << ' ' << input.total(); 56 | for (auto &v: resp) { 57 | v = 1.0 - v; 58 | } 59 | cv::Mat fl; 60 | input.convertTo(fl, CV_32FC3); 61 | cv::Mat prob(input.size(), CV_32F, &resp[0]); 62 | vector boxes; 63 | xnn::BBoxDetector bdet(b_th, b_keep); 64 | bdet.apply(prob, &boxes); 65 | 66 | vector chs{prob, prob, prob}; 67 | cv::Mat prob3d; 68 | cv::merge(&chs[0], 3, prob3d); 69 | cv::Mat mask = fl.mul(prob3d); 70 | prob3d *= 255; 71 | for (auto const &box: boxes) { 72 | cv::rectangle(prob3d, box.box, cv::Scalar(0, 0, 0xFF), 2); 73 | cv::rectangle(mask, box.box, cv::Scalar(0, 0, 0xFF), 2); 74 | cv::rectangle(fl, box.box, cv::Scalar(0, 0, 0xFF), 2); 75 | } 76 | cv::hconcat(mask, prob3d, mask); 77 | cv::hconcat(mask, fl, fl); 78 | cv::imwrite(opath, fl); 79 | return 0; 80 | } 81 | 82 | -------------------------------------------------------------------------------- /xnn-roc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "xnn.h" 4 | #include "picpac-cv.h" 5 | 6 | using namespace std; 7 | using namespace boost; 8 | using namespace xnn; 9 | 10 | /* 11 | * precision = tp / (tp + fp) 12 | * recall = tp / p 13 | */ 14 | 15 | // thresholding pixels by 0/N, 1/N, ...., N/N, and produce a curve 16 | // of x: (tp + fp)/total, y: tp/(all_positive) 17 | void roc (unique_ptr &model, picpac::ImageStream::Value &v, bool tile, 18 | int N, vector> *curve) { 19 | vector resp; // prob response 20 | model->apply(v.image, &resp); 21 | CHECK(v.annotation.type() == CV_8UC1); 22 | CHECK(resp.size() % v.image.total() == 0); 23 | CHECK(v.image.size() == v.annotation.size()); 24 | // response is N prob of bg, and then N prob of fg == (1--bg) 25 | float *p = &resp[0]; 26 | // pair P(fg), label 27 | vector> all; 28 | for (int i = 0; i < v.image.rows; ++i) { 29 | uint8_t const *row = v.annotation.ptr(i); 30 | for (int j = 0; j < v.image.cols; ++j) { 31 | all.push_back(std::make_pair(*p, row[j])); 32 | ++p; 33 | } 34 | } 35 | sort(all.begin(), all.end()); 36 | int tp = 0; 37 | int off = 0; 38 | curve->clear(); 39 | for (int i = 0; i <= N; ++i) { 40 | while ((off < all.size()) && (all[off].first * N <= i)) { 41 | if (all[off].second) { 42 | ++tp; 43 | } 44 | ++off; 45 | } 46 | curve->emplace_back(off, tp); 47 | } 48 | /* 49 | double area = 0; 50 | unsigned total = 0; 51 | for (auto const &p: all) { 52 | unsigned total2 = total + p.second; 53 | area += total + total2; 54 | total = total2; 55 | } 56 | //cout << total << '\t' << all.size() << endl; 57 | area /= 2; 58 | area /= total; 59 | area /= all.size(); 60 | */ 61 | for (auto &p: *curve) { 62 | p.first /= all.size(); 63 | p.second /= tp; 64 | } 65 | } 66 | 67 | int main(int argc, char **argv) { 68 | namespace po = boost::program_options; 69 | picpac::BatchImageStream::Config config; 70 | config.loop = false; 71 | config.shuffle = true; 72 | config.stratify = true; 73 | config.split_negate = true; 74 | config.anno_type = CV_8UC1; 75 | config.anno_color1 = 1; 76 | config.anno_thickness = -1; // fill 77 | 78 | fs::path model_dir; 79 | fs::path db_path; 80 | int mode; 81 | int batch; 82 | int max_size; 83 | unsigned N; 84 | bool tile; 85 | 86 | po::options_description desc("Allowed options"); 87 | desc.add_options() 88 | ("help,h", "produce help message.") 89 | ("model", po::value(&model_dir), "") 90 | ("db", po::value(&db_path), "") 91 | ("mode", po::value(&mode)->default_value(0), "0 for CPU or 1 for GPU") 92 | ("max", po::value(&config.max_size)->default_value(-1), "") 93 | ("max-size", po::value(&config.max_size)->default_value(800), "") 94 | /* 95 | ("split,s", po::value(&config.split)->default_value(5), "") 96 | ("fold,f", po::value(&config.split_fold)->default_value(0), "") 97 | ("stratify", po::value(&config.stratify)->default_value(true), "") 98 | ("shuffle", po::value(&config.shuffle)->default_value(true), "") 99 | ("annotate", po::value(&config.annotate)->default_value("none"), "none for classification") 100 | ("negate", po::value(&config.split_negate)->default_value(true), "") 101 | */ 102 | ("level", po::value(&FLAGS_minloglevel)->default_value(1),"") 103 | (",N", po::value(&N)->default_value(1000), "") 104 | //("batch", po::value(&batch)->default_value(1), "") 105 | ("tile", "") 106 | ; 107 | #define PICPAC_CONFIG_UPDATE(C,p) desc.add_options()(#p, po::value(&C.p)->default_value(C.p), "") 108 | PICPAC_CONFIG_UPDATE_ALL(config); 109 | #undef PICPAC_CONFIG_UPDATE 110 | 111 | po::positional_options_description p; 112 | p.add("model", 1); 113 | p.add("db", 1); 114 | 115 | po::variables_map vm; 116 | po::store(po::command_line_parser(argc, argv). 117 | options(desc).positional(p).run(), vm); 118 | po::notify(vm); 119 | 120 | if (vm.count("help") || model_dir.empty() || db_path.empty()) { 121 | cerr << desc; 122 | return 1; 123 | } 124 | FLAGS_logtostderr = 1; 125 | tile = vm.count("tile") > 0; 126 | google::InitGoogleLogging(argv[0]); 127 | Model::set_mode(mode); 128 | unique_ptr model(Model::create(model_dir, config.batch)); 129 | picpac::ImageStream db(db_path, config); 130 | if (config.annotate == "none") { 131 | /* 132 | int total = 0; 133 | int correct = 0; 134 | */ 135 | map> cnt; 136 | for (;;) { 137 | vector images; 138 | vector labels; 139 | for (unsigned i = 0; i < config.batch; ++i) { 140 | try { 141 | picpac::ImageStream::Value v(db.next()); 142 | images.push_back(v.image); 143 | CHECK(v.image.total() > 0); 144 | unsigned l = v.label; 145 | CHECK(l == v.label); 146 | labels.push_back(l); 147 | } 148 | catch (picpac::EoS const &) { 149 | break; 150 | } 151 | } 152 | if (images.empty()) break; 153 | vector resp; // prob response 154 | model->apply(images, &resp); 155 | float const *off = &resp[0]; 156 | CHECK(resp.size() % images.size() == 0); 157 | size_t nc = resp.size() / images.size(); 158 | for (unsigned l: labels) { 159 | CHECK(l < nc); 160 | bool ok = true; 161 | for (unsigned c = 0; c < nc; ++c) { 162 | if (off[l] < off[c]) { 163 | ok = false; 164 | break; 165 | } 166 | } 167 | auto &p = cnt[l]; 168 | ++p.second; 169 | if (ok) ++p.first; 170 | off += nc; 171 | } 172 | if (images.size() < config.batch) break; 173 | } 174 | double sum = 0; 175 | for (auto const &p: cnt) { 176 | double r = 1.0 * p.second.first / p.second.second; 177 | sum += r; 178 | std::cout << p.first << ':' << r << '\t'; 179 | } 180 | std::cout << sum / cnt.size() << std::endl; 181 | } 182 | else { 183 | int cnt = 0; 184 | vector> sum(N+1, std::make_pair(0,0)); 185 | vector> curv; 186 | for (;;) { 187 | try { 188 | picpac::ImageStream::Value v(db.next()); 189 | CHECK(v.image.total() > 0); 190 | CHECK(v.annotation.total() > 0); 191 | roc(model, v, tile, N, &curv); 192 | for (unsigned i = 0; i <= N; ++i) { 193 | sum[i].first += curv[i].first; 194 | sum[i].second += curv[i].second; 195 | } 196 | cnt += 1; 197 | //cout << a << endl; 198 | } 199 | catch (picpac::EoS const &) { 200 | break; 201 | } 202 | } 203 | for (auto const &p: sum) { 204 | cout << p.first/cnt << '\t' << p.second/cnt << endl; 205 | } 206 | } 207 | return 0; 208 | } 209 | 210 | -------------------------------------------------------------------------------- /xnn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "xnn.h" 5 | 6 | namespace xnn { 7 | 8 | int Model::mode = 0; 9 | 10 | void Model::set_mode (int m) { 11 | #ifndef CPU_ONLY 12 | mode = m; 13 | #endif 14 | } 15 | 16 | float *Model::preprocess (cv::Mat const &image, 17 | float *buffer) const { 18 | 19 | BOOST_VERIFY(image.data); 20 | BOOST_VERIFY(image.total()); 21 | cv::Mat tmp; 22 | // convert color space 23 | if (image.channels() != channels()) { 24 | if (image.channels() == 3 && channels() == 1) { 25 | cv::cvtColor(image, tmp, CV_BGR2GRAY); 26 | } 27 | else if (image.channels() == 4 && channels() == 1) { 28 | cv::cvtColor(image, tmp, CV_BGRA2GRAY); 29 | } 30 | else if (image.channels() == 4 && channels() == 3) { 31 | cv::cvtColor(image, tmp, CV_BGRA2BGR); 32 | } 33 | else if (image.channels() == 1 && channels() == 3) { 34 | cv::cvtColor(image, tmp, CV_GRAY2BGR); 35 | } 36 | else { 37 | throw 0; 38 | } 39 | } 40 | else { 41 | tmp = image; 42 | } 43 | 44 | // check resize 45 | if ((shape[2] > 1)) { // shape is fixed 46 | cv::Size sz(shape[3], shape[2]); 47 | if (sz != tmp.size()) { 48 | cv::resize(tmp, tmp, sz); 49 | } 50 | } 51 | 52 | int type = CV_32FC(channels()); 53 | if (tmp.type() != type) { 54 | cv::Mat x; 55 | tmp.convertTo(x, type); 56 | tmp = x; 57 | } 58 | float *ptr_b = buffer; 59 | float *ptr_g = buffer; 60 | float *ptr_r = buffer; 61 | if (rgb) { 62 | CHECK(channels() == 3); 63 | ptr_g += tmp.total(); 64 | ptr_b += 2 * tmp.total(); 65 | } 66 | else if (channels() == 2) { 67 | ptr_g += tmp.total(); 68 | } 69 | else if (channels() == 3) { 70 | ptr_g += tmp.total(); 71 | ptr_r += 2 * tmp.total(); 72 | } 73 | CHECK(tmp.elemSize() == channels() * sizeof(float)); 74 | int off = 0; 75 | for (int i = 0; i < tmp.rows; ++i) { 76 | float const *line = tmp.ptr(i); 77 | for (int j = 0; j < tmp.cols; ++j) { 78 | ptr_b[off] = (*line++) - means[0]; 79 | if (channels() > 1) { 80 | ptr_g[off] = (*line++) - means[1]; 81 | } 82 | if (channels() > 2) { 83 | ptr_r[off] = (*line++) - means[2]; 84 | } 85 | ++off; 86 | } 87 | } 88 | CHECK(off == tmp.total()); 89 | return buffer + channels() * tmp.total(); 90 | } 91 | 92 | Model::~Model () { 93 | } 94 | 95 | 96 | Model *Model::create (fs::path const &dir, int batch) { 97 | #ifdef USE_CAFFE 98 | if (fs::exists(dir / "caffe.model")) return create_caffe(dir, batch); 99 | #endif 100 | #ifdef USE_PYTHON 101 | if (fs::exists(dir / "model.py")) return create_python(dir, batch); 102 | #endif 103 | #ifdef USE_MXNET 104 | return create_mxnet(dir, batch); 105 | #endif 106 | return nullptr; 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /xnn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace xnn { 9 | using std::array; 10 | using std::vector; 11 | namespace fs = boost::filesystem; 12 | 13 | class Model { 14 | protected: 15 | static int mode; // 0: CPU, 1: GPU 16 | array shape; // batch shape 17 | // batch_size 18 | // channel 19 | // rows, -1 for FCN 20 | // cols, -1 for FCN 21 | array means; // pixel means, R, G, B 22 | bool rgb; 23 | // save data to buffer, return buffer + data_size 24 | // rgb = true: output is RGB, else output is BGR (input is always BGR) 25 | float *preprocess (cv::Mat const &, float *buffer) const; 26 | float *preprocess (vector const &images, float *buffer) const { 27 | for (auto const &image: images) { 28 | buffer = preprocess(image, buffer); 29 | } 30 | return buffer; 31 | } 32 | int image_buffer_size (cv::Mat const &image) { 33 | if (shape[2] > 1) { 34 | return shape[1] * shape[2] * shape[3]; 35 | } 36 | return shape[1] * image.total(); 37 | } 38 | public: 39 | static void set_mode (int m); 40 | bool fcn () const { return shape[2] <= 1; } 41 | void set_bgr2rgb () { 42 | rgb = true; 43 | } 44 | int batch () const { return shape[0];} 45 | int channels () const { return shape[1];} 46 | static Model *create (fs::path const &, int = 1); 47 | #ifdef USE_CAFFE 48 | static Model *create_caffe (fs::path const &, int); 49 | static Model *create_colorize_caffe (fs::path const &dir, int batch); 50 | #endif 51 | #ifdef USE_MOVIDIUS 52 | static Model *create_movidius (fs::path const &, int); 53 | #endif 54 | #ifdef USE_MXNET 55 | static Model *create_mxnet (fs::path const &, int); 56 | #endif 57 | #ifdef USE_PYTHON 58 | static Model *create_python (fs::path const &, int); 59 | #endif 60 | #ifdef USE_TORCH 61 | static Model *create_torch (fs::path const &lua_config, 62 | fs::path const &model, 63 | int batch, int H, int W, int C); 64 | #endif 65 | virtual void apply (cv::Mat const &image, vector *ft) { 66 | apply(vector{image}, ft); 67 | } 68 | virtual void apply (vector const &, vector *) = 0; 69 | Model (): rgb(false) { 70 | means[0] = means[1] = means[2] = 0; 71 | } 72 | virtual ~Model (); 73 | }; 74 | 75 | class Tiler { 76 | struct Tiling { 77 | vector tiles; 78 | cv::Size size; 79 | 80 | int add2 (cv::Size sz, int x, 81 | int top1, int top2, int bottom) { 82 | // top1 > top2 83 | tiles.emplace_back(x, 0, sz.width * top1/bottom, 84 | sz.height * top1/bottom); 85 | int rx = x + tiles.back().width; 86 | tiles.emplace_back(x, tiles.back().height, 87 | sz.width * top2/bottom, 88 | sz.height * top2/bottom); 89 | return rx; 90 | } 91 | 92 | Tiling (cv::Size sz) { 93 | int x = 0; 94 | tiles.emplace_back(0, 0, sz.width, sz.height); 95 | x += tiles.back().width; 96 | x = add2(sz, x, 3, 2, 5); 97 | x = add2(sz, x, 3, 1, 4); 98 | x = add2(sz, x, 2, 1, 3); 99 | size = cv::Size(x, sz.height); 100 | } 101 | }; 102 | public: 103 | static void forward (cv::Mat input, cv::Mat *tiled) { 104 | Tiling tiling(input.size()); 105 | cv::Mat all(tiling.size, input.type(), cv::Scalar(0,0,0)); 106 | input.copyTo(all(tiling.tiles[0])); 107 | for (unsigned i = 1; i < tiling.tiles.size(); ++i) { 108 | cv::resize(input, all(tiling.tiles[i]), tiling.tiles[i].size()); 109 | } 110 | *tiled = all; 111 | } 112 | static void backward (cv::Mat input, cv::Mat tiled, vector *out) { 113 | Tiling tiling(input.size()); 114 | if (tiling.size != tiled.size()) throw 0; 115 | out->clear(); 116 | out->push_back(tiled(tiling.tiles[0])); 117 | for (unsigned i = 1; i < tiling.tiles.size(); ++i) { 118 | cv::Mat tmp; 119 | cv::resize(tiled(tiling.tiles[i]), tmp, input.size()); 120 | out->push_back(tmp); 121 | } 122 | } 123 | }; 124 | } 125 | 126 | -------------------------------------------------------------------------------- /xnn_train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | import shutil 5 | import logging 6 | import argparse 7 | import subprocess 8 | import argparse 9 | import pickle 10 | from jinja2 import Environment, FileSystemLoader 11 | 12 | logging.basicConfig(level=logging.DEBUG) 13 | 14 | base_dir = os.path.abspath(os.path.dirname(__file__)) 15 | TMPL_ROOT = os.path.join(base_dir, "templates") 16 | 17 | # templates are named as "xxx.tmpl", find all such files 18 | def find_templates (root): 19 | paths = [] 20 | for base, dirs, files in os.walk(root): 21 | for f in files: 22 | if '.tmpl' in f: 23 | paths.append(os.path.join(base, f)) 24 | return paths 25 | 26 | # enumerate the variables in templates 27 | def print_template_variables (env, paths): 28 | from jinja2 import meta 29 | vars = set() 30 | for path in paths: 31 | src = env.loader.get_source(env, path)[0] 32 | ast = env.parse(src) 33 | vars |= meta.find_undeclared_variables(ast) 34 | pass 35 | print "Templates variables:", vars 36 | pass 37 | 38 | def render_templates (env, paths, params): 39 | for path in paths: 40 | template = env.get_template(path) 41 | fname = os.path.splitext(path)[0] 42 | with open(fname, 'w') as f: 43 | f.write(template.render(params)) 44 | pass 45 | pass 46 | pass 47 | 48 | # prepare workspace 49 | # - copy templates 50 | # - replace params 51 | # - chdir into the working directory 52 | def prepare_ws_chdir (ws, params): 53 | # tmpl dir to copy from 54 | TMPL_DIR = os.path.join(TMPL_ROOT, params['template']) 55 | assert os.path.isdir(TMPL_DIR) 56 | assert not os.path.exists(ws) 57 | os.mkdir(ws) 58 | os.chdir(ws) 59 | # copy data 60 | cmd = "cp -r %s/* ./" % TMPL_DIR 61 | subprocess.check_call(cmd, shell=True) 62 | 63 | TO_BE_REPLACED = find_templates('.') 64 | 65 | env = Environment(loader=FileSystemLoader(searchpath='.')) 66 | tmpls = find_templates('.') 67 | 68 | print_template_variables(env, tmpls) 69 | render_templates(env, tmpls, params) 70 | with open('params.pickle', 'w') as f: 71 | pickle.dump(params, f) 72 | pass 73 | 74 | def basic_args (snapshot = 4000, channels=3, iteration=400000): 75 | parser = argparse.ArgumentParser() 76 | parser.add_argument("template", nargs=1) 77 | parser.add_argument("db", nargs=1) # database 78 | parser.add_argument("ws", nargs=1) # workspace, must not exist 79 | parser.add_argument("--it", default=iteration, type=int) 80 | parser.add_argument("--channels", default=channels, type=int) 81 | parser.add_argument("--split", default=5, type=int) 82 | parser.add_argument("--fold", default=0, type=int) 83 | parser.add_argument("--snapshot", default=snapshot, type=int) 84 | parser.add_argument("--mixin") 85 | parser.add_argument("--anno_min_ratio", default=0.05, type=float) 86 | return parser 87 | 88 | def caffe_scan_snapshots (): 89 | shots = [] 90 | # find all saved snapshots 91 | for x in glob.glob('snapshots/*.caffemodel'): 92 | it = int(x.split('_')[-1].split('.')[0]) 93 | shots.append((it, x)) 94 | pass 95 | # sort by iteration 96 | shots = sorted(shots, key = lambda x: x[0]) 97 | return shots 98 | 99 | # must work within ws directory 100 | def caffe_eval_fcn (): 101 | if not os.path.exists("eval"): 102 | os.mkdir("eval") 103 | pass 104 | with open('params.pickle', 'r') as f: 105 | params = pickle.load(f) 106 | hist = [] 107 | best = None 108 | best_score = 100 109 | best_path = None 110 | shots = caffe_scan_snapshots() 111 | 112 | params_path = 'model/caffe.params' 113 | for it, path in shots: 114 | print it, path 115 | out = os.path.join('eval', str(it)) 116 | if os.path.exists(params_path): 117 | os.remove(params_path) 118 | os.symlink(os.path.abspath(path), params_path) 119 | if not os.path.exists(out): 120 | subprocess.check_call('%s model %s --mode 1 --anno_min_ratio %g --split %d --split_fold %d --annotate json --channels %s > %s' % (os.path.join(base_dir, 'xnn-roc'), params['db_path'], params['anno_min_ratio'], params['split'], params['split_fold'], params['channels'], out), shell=True) 121 | os.remove('model/caffe.params') 122 | cc = [] 123 | with open(out, 'r') as f: 124 | for l in f: 125 | l = l.strip().split('\t') 126 | if (len(l) != 2): 127 | continue 128 | x, y = l 129 | cc.append((float(x), float(y))) 130 | #if float(y) > 0.5: 131 | # hist.append((it, float(x))) 132 | # break 133 | #pass 134 | pass 135 | for x, y in cc: 136 | if y > 0.5: 137 | hist.append((it, x)) 138 | if x <= best_score: 139 | best = it 140 | best_score = x 141 | best_path = path 142 | break 143 | pass 144 | print hist[-1] 145 | pass 146 | print "Best iteration is %s, with score %g" % (best, best_score) 147 | shutil.copy(best_path, 'model/caffe.params') 148 | --------------------------------------------------------------------------------