├── LICENSE.md
├── README.md
├── corloc.lua
├── data
    ├── README.md
    └── common
    │   ├── Makefile
    │   └── README.md
├── dataset.lua
├── detection_mAP.lua
├── example_loader.lua
├── fbnn_Optim.lua
├── model
    ├── CMakeLists.txt
    ├── HingeCriterion.lua
    ├── VGGF.lua
    ├── additive.lua
    ├── contextlocnet-scm-1.rockspec
    ├── contrastive_a.lua
    ├── contrastive_s.lua
    ├── rectangularringroipooling.cu
    ├── rectangularringroipooling.lua
    ├── roi_transforms.lua
    ├── util.lua
    └── wsddn_repro.lua
├── opts.lua
├── parallel_batch_loader.lua
├── pascal_voc.lua
├── preprocess.lua
├── test.lua
├── train.lua
└── util.lua


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Vadim Kantorov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Information & Contact
 2 | If you use this code, please cite our work:
 3 | > @inproceedings{kantorov2016,  
 4 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;title = {ContextLocNet: Context-aware Deep Network Models for Weakly Supervised Localization},  
 5 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;author = {Kantorov, V., Oquab, M., Cho M. and Laptev, I.},  
 6 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;booktitle = {Proc. European Conference on Computer Vision (ECCV), 2016},  
 7 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;year = {2016}  
 8 | }
 9 | 
10 | The results are available on the [project website](http://www.di.ens.fr/willow/research/contextlocnet) and in the [paper](http://arxiv.org/pdf/1609.04331.pdf) (arXiv [page](http://arxiv.org/abs/1609.04331)). Please submit bugs and ask questions on [GitHub](http://github.com/vadimkantorov/contextlocnet/issues) directly, for other inquiries please contact [Vadim Kantorov](mailto:vadim.kantorov@gmail.com).
11 | 
12 | This is a joint work of [Vadim Kantorov](http://vadimkantorov.com), [Maxime Oquab](http://github.com/qassemoquab), [Minsu Cho](http://www.di.ens.fr/~mcho), and [Ivan Laptev](http://www.di.ens.fr/~laptev).
13 | 
14 | # Running the code
15 | 1. Install the dependencies: [Torch](http://github.com/torch/distro) with [cuDNN](http://developer.nvidia.com/cudnn) support; [HDF5](http://www.hdfgroup.org/HDF5/); [matio](http://github.com/tbeu/matio); [protobuf](http://github.com/google/protobuf); Luarocks packages [rapidjson](http://github.com/xpol/lua-rapidjson), [hdf5](http://github.com/deepmind/torch-hdf5), [matio](http://github.com/soumith/matio-ffi.torch), [loadcaffe](http://github.com/szagoruyko/loadcaffe), [xml](https://://github.com/lubyk/xml); MATLAB or [octave](https://www.gnu.org/software/octave/) binary in PATH (for computing detection mAP).
16 | 
17 |   We strongly recommend using [wigwam](http://wigwam.in/) for this (fix the paths to `nvcc` and `libcudnn.so` before running the command):
18 | 
19 |   ```shell
20 |   wigwam install torch hdf5 matio protobuf octave -DPATH_TO_NVCC="/path/to/cuda/bin/nvcc" -DPATH_TO_CUDNN_SO="/path/to/cudnn/lib64/libcudnn.so"
21 |   wigwam install lua-rapidjson lua-hdf5 lua-matio lua-loadcaffe lua-xml
22 |   wigwam in # execute this to make the installed libraries available
23 |   ```
24 | 2. Clone this repository, change the current directory to `contextlocnet`, and compile the ROI pooling module:
25 | 
26 |   ```shell
27 |   git clone https://github.com/vadimkantorov/contextlocnet
28 |   cd contextlocnet
29 |   (cd ./model && luarocks make)
30 |   ```
31 | 3. Download the [VOC 2007](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/) dataset and Koen van de Sande's [selective search windows](http://koen.me/research/selectivesearch/) for VOC 2007 and the [VGG-F](https://gist.github.com/ksimonyan/a32c9063ec8e1118221a) model by running the first command. Optionally download the [VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) and Ross Girshick's [selective search windows](https://github.com/rbgirshick/fast-rcnn/blob/master/data/scripts/fetch_fast_rcnn_models.sh) by manually downloading the [VOC 2012 test data tarball](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar) to `data/common` and then running the second command:
32 |   
33 |   ```shell
34 |   make -f data/common/Makefile download_and_extract_VOC2007 download_VGGF
35 |   # make -f data/common/Makefile download_and_extract_VOC2012
36 |   ```
37 | 4. Choose a dataset, preprocess it, and convert the VGG-F model to the Torch format:
38 | 
39 |   ```shell
40 |   export DATASET=VOC2007
41 |   th preprocess.lua VOC VGGF
42 |   ```
43 | 5. Select a GPU and train a model (our best model is `model/contrastive_s.lua`, other choices are `model/contrastive_a.lua`, `model/additive.lua`, and `model/wsddn_repro.lua`):
44 | 
45 |   ```shell
46 |   export CUDA_VISIBLE_DEVICES=0
47 |   th train.lua model/contrastive_s.lua				# will produce data/model_epoch30.h5 and data/log.json
48 |   ```
49 | 6. Test the trained model and compute CorLoc and mAP:
50 | 
51 |   ```shell
52 |   SUBSET=trainval th test.lua data/model_epoch30.h5 # will produce data/scores_trainval.h5
53 |   th corloc.lua data/scores_trainval.h5			    # will produce data/corloc.json
54 |   SUBSET=test th test.lua data/model_epoch30.h5	    # will produce data/scores_test.h5
55 |   th detection_mAP.lua data/scores_test.h5		    # will produce data/detection_mAP.json
56 |   ```
57 | 
58 | # Pretrained models for VOC 2007
59 | Model | model_epoch30.h5 | log.json | corloc.json | detection_mAP.json|
60 | :---|:---:|:---:|:---:|:---:|
61 | contrastive_s | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_model_epoch30.h5) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_log.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_corloc.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_detection_mAP.json) 
62 | wsddn_repro | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_model_epoch30.h5) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_log.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_corloc.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_detection_mAP.json)
63 |   
64 | # Acknowledgements & Notes
65 | We greatly thank Hakan Bilen, Relja Arandjelović and Soumith Chintala for fruitful discussion and help.
66 | 
67 | This work would not have been possible without prior work: Hakan Bilen's [WSDDN](http://github.com/hbilen/WSDDN), Spyros Gidaris's [LocNet](http://github.com/gidariss/LocNet), Sergey Zagoruyko's [loadcaffe](http://github.com/szagoruyko/loadcaffe), Facebook FAIR's [fbnn/Optim.lua](http://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua).
68 | 
69 | The code is released under the [MIT](http://github.com/vadimkantorov/contextlocnet/blob/master/LICENSE.md) license.
70 | 


--------------------------------------------------------------------------------
/corloc.lua:
--------------------------------------------------------------------------------
 1 | dofile('opts.lua')
 2 | dofile('util.lua')
 3 | dofile('dataset.lua')
 4 | 
 5 | opts.SCORES_FILES = #arg >= 1 and arg or {opts.PATHS.SCORES_PATTERN:format('trainval')}
 6 | 
 7 | loaded = hdf5_load(opts.SCORES_FILES[1], {'subset', 'rois', 'labels', 'output'})
 8 | outputs = {}
 9 | 
10 | for i = 1, #opts.SCORES_FILES do
11 | 	outputs_i = hdf5_load(opts.SCORES_FILES[i], 'outputs')
12 | 	for output_field, scores in pairs(outputs_i) do
13 | 		outputs[output_field] = {}
14 | 		for exampleIdx = 1, #scores do
15 | 			outputs[output_field][exampleIdx] = (outputs[output_field][exampleIdx] or scores[exampleIdx]:clone():zero()):add(scores[exampleIdx]:div(#opts.SCORES_FILES))
16 | 		end
17 | 	end
18 | end
19 | 
20 | res = {training_MAP = dataset_tools.meanAP(loaded.output, loaded.labels)}
21 | for output_field, scores in pairs(outputs) do
22 | 	res[output_field] = {by_class = {}, _mean = corloc(dataset[loaded.subset], {scores, loaded.rois})}
23 | 	for classLabelInd, classLabel in ipairs(classLabels) do
24 | 		res[output_field].by_class[classLabels[classLabelInd]] = corloc(dataset[loaded.subset], {scores, loaded.rois}, classLabelInd)
25 | 	end
26 | end
27 | 
28 | json_save(opts.PATHS.CORLOC, res)
29 | print('result in ' .. opts.PATHS.CORLOC)
30 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | This is a directory for produced models and results.
2 | 


--------------------------------------------------------------------------------
/data/common/Makefile:
--------------------------------------------------------------------------------
 1 | DOWNLOADS = ./data/common
 2 | 
 3 | download_and_extract_VOC2007:
 4 | 	wget -P $(DOWNLOADS) \
 5 | 		http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar \
 6 | 		http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar \
 7 | 		http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar \
 8 | 		http://koen.me/research/downloads/SelectiveSearchVOC2007trainval.mat \
 9 | 		http://koen.me/research/downloads/SelectiveSearchVOC2007test.mat
10 | 	cd $(DOWNLOADS) && for f in VOCdevkit_08-Jun-2007.tar VOCtrainval_06-Nov-2007.tar VOCtest_06-Nov-2007.tar; do tar -xf $$f; done && mv VOCdevkit VOCdevkit_2007
11 |   
12 | download_and_extract_VOC2012:
13 | 	wget -P $(DOWNLOADS) \
14 | 		http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar \
15 | 		http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar \
16 | 		http://people.eecs.berkeley.edu/~rbg/fast-rcnn-data/selective_search_data.tgz
17 | 	cd $(DOWNLOADS) && for f in VOCdevkit_18-May-2011.tar VOCtrainval_11-May-2012.tar VOC2012test.tar selective_search_data.tgz; do tar -xf $$f; done && mv VOCdevkit VOCdevkit_2012
18 | 
19 | download_VGGF:
20 | 	wget -P $(DOWNLOADS) \
21 | 		http://www.robots.ox.ac.uk/~vgg/software/deep_eval/releases/bvlc/VGG_CNN_F.caffemodel \
22 | 		https://gist.githubusercontent.com/ksimonyan/a32c9063ec8e1118221a/raw/6a3b8af023bae65669a4ceccd7331a5e7767aa4e/VGG_CNN_F_deploy.prototxt
23 |   
24 | .PHONY: download_and_extract_VOC2007 download_and_extract_VOC2012 download_VGGF
25 | 


--------------------------------------------------------------------------------
/data/common/README.md:
--------------------------------------------------------------------------------
1 | This is a directory for the downloaded datasets and models.
2 | 
3 | Instructions for downloading prerequisite data are at https://github.com/vadimkantorov/contextlocnet/blob/master/README.md (step 3).
4 | 
5 | Links are in the [Makefile](https://github.com/vadimkantorov/contextlocnet/blob/master/data/common/Makefile).
6 | 


--------------------------------------------------------------------------------
/dataset.lua:
--------------------------------------------------------------------------------
 1 | if opts.DATASET == 'VOC2007' or opts.DATASET == 'VOC2012' then
 2 | 	dataset_tools = dofile('pascal_voc.lua')
 3 | 	classLabels = dataset_tools.classLabels
 4 | 	numClasses = dataset_tools.numClasses
 5 | end
 6 | 
 7 | dataset = torch.load(opts.PATHS.DATASET_CACHED)
 8 | 
 9 | dofile('parallel_batch_loader.lua')
10 | dofile('example_loader.lua')
11 | 


--------------------------------------------------------------------------------
/detection_mAP.lua:
--------------------------------------------------------------------------------
 1 | dofile('opts.lua')
 2 | dofile('util.lua')
 3 | dofile('dataset.lua')
 4 | threads = require 'threads'
 5 | 
 6 | local MATLAB = assert((#sys.execute('which matlab') > 0 and 'matlab -r') or (#sys.execute('which octave') > 0 and 'octave --eval'), 'matlab or octave not found in PATH')
 7 | local subset = 'test'
 8 | output_field = opts.OUTPUT_FIELDS[1]
 9 | 
10 | opts.SCORES_FILES = #arg >= 1 and arg or {opts.PATHS.SCORES_PATTERN:format(subset)}
11 | rois = hdf5_load(opts.SCORES_FILES[1], 'rois')
12 | 
13 | scores = {}
14 | for i = 1, #opts.SCORES_FILES do
15 | 	scores_i = hdf5_load(opts.SCORES_FILES[i], 'outputs/' .. output_field)
16 | 	for exampleIdx = 1, #scores_i do
17 | 		scores[exampleIdx] = (scores[exampleIdx] or scores_i[exampleIdx]:clone():zero()):add(scores_i[exampleIdx]:div(#opts.SCORES_FILES))
18 | 	end
19 | end
20 | 
21 | local detrespath = dataset_tools.package_submission(opts.PATHS.DATA, dataset, opts.DATASET, subset, 'comp4_det', rois, scores, nms_mask(rois, scores, opts.NMS_OVERLAP_THRESHOLD, opts.NMS_SCORE_THRESHOLD))
22 | local opts = opts
23 | 
24 | if dataset[subset].objectBoxes == nil then
25 | 	print('detection mAP cannot be computed for ' .. opts.DATASET .. '. Quitting.')
26 | 	print(('VOC submission saved in "%s/results-%s-%s-%s.tar.gz"'):format(opts.PATHS.DATA, opts.DATASET, 'comp4_det', subset))
27 | 	os.exit(0)
28 | end
29 | 
30 | res = {[output_field] = {_mean = nil, by_class = {}}}
31 | APs = torch.FloatTensor(numClasses):zero()
32 | 
33 | local imgsetpath = paths.tmpname()
34 | os.execute(('sed \'s/$/ -1/\' %s > %s'):format(paths.concat(opts.PATHS.VOC_DEVKIT_VOCYEAR, 'ImageSets', 'Main', subset .. '.txt'), imgsetpath)) -- hack for octave
35 | 
36 | jobQueue = threads.Threads(numClasses)
37 | for classLabelInd, classLabel in ipairs(classLabels) do
38 | 	jobQueue:addjob(function()
39 | 		os.execute(('%s "oldpwd = pwd; cd(\'%s\'); addpath(fullfile(pwd, \'VOCcode\')); VOCinit; cd(oldpwd); VOCopts.testset = \'%s\'; VOCopts.detrespath = \'%s\'; VOCopts.imgsetpath = \'%s\'; classLabel = \'%s\'; [rec, prec, ap] = VOCevaldet(VOCopts, \'comp4\', classLabel, false); dlmwrite(sprintf(VOCopts.detrespath, \'resu4\', classLabel), ap); quit;"'):format(MATLAB, paths.dirname(opts.PATHS.VOC_DEVKIT_VOCYEAR), subset, detrespath, imgsetpath, classLabel))
40 | 		return tonumber(io.open(detrespath:format('resu4', classLabel)):read('*all'))
41 | 	end, function(ap) res[output_field].by_class[classLabel] = ap; APs[classLabelInd] = ap; end)
42 | end
43 | jobQueue:synchronize()
44 | os.execute('[ -t 1 ] && reset')
45 | 
46 | res[output_field]._mean = APs:mean()
47 | 
48 | json_save(opts.PATHS.DETECTION_MAP, res)
49 | print('result in ' .. opts.PATHS.DETECTION_MAP)
50 | 


--------------------------------------------------------------------------------
/example_loader.lua:
--------------------------------------------------------------------------------
  1 | local ExampleLoader, parent = torch.class('ExampleLoader')
  2 | 
  3 | function ExampleLoader:__init(dataset, normalization_params, scales, example_loader_opts)
  4 | 	self.scales = scales
  5 | 	self.normalization_params = normalization_params
  6 | 	self.example_loader_opts = example_loader_opts
  7 | 	self.dataset = dataset
  8 | end
  9 | 
 10 | local function table2d(I, J, elem_generator)
 11 | 	local res = {}
 12 | 	for i = 1, I do
 13 | 		res[i] = {}
 14 | 		for j = 1, J do
 15 | 			res[i][j] = elem_generator(i, j)
 16 | 		end
 17 | 	end
 18 | 	return res
 19 | end
 20 | 
 21 | local function subtract_mean(dst, src, normalization_params)
 22 | 	local channel_order = assert(({rgb = {1, 2, 3}, bgr = {3, 2, 1}})[normalization_params.channel_order])
 23 | 	for c = 1, 3 do
 24 | 		dst[c]:copy(src[channel_order[c]]):add(-normalization_params.rgb_mean[channel_order[c]])
 25 | 		if normalization_params.rgb_std then
 26 | 			dst[c]:div(normalization_params.rgb_std[channel_order[c]])
 27 | 		end
 28 | 	end
 29 | end
 30 | 
 31 | local function rescale(img, max_height, max_width)
 32 | 	--local height_width = math.max(dhw_rgb:size(3), dhw_rgb:size(2))
 33 | 	--local im_scale = target_height_width / height_width
 34 | 	local scale_factor = max_height / img:size(2)
 35 | 	if torch.round(img:size(3) * scale_factor) > max_width then
 36 | 		scale_factor = math.min(scale_factor, max_width / img:size(3))
 37 | 	end
 38 | 
 39 | 	return image.scale(img, math.min(max_width, img:size(3) * scale_factor), math.min(max_height, img:size(2) * scale_factor))
 40 | end
 41 | 
 42 | local function flip(images_j, rois_j)
 43 | 	image.hflip(images_j, images_j)
 44 | 	rois_j:select(2, 1):mul(-1):add(images_j:size(3))
 45 | 	rois_j:select(2, 3):mul(-1):add(images_j:size(3))
 46 | 
 47 | 	local tmp = rois_j:select(2, 1):clone()
 48 | 	rois_j:select(2, 1):copy(rois_j:select(2, 3))
 49 | 	rois_j:select(2, 3):copy(tmp)
 50 | end
 51 | 
 52 | local function insert_dummy_dim1(...)
 53 | 	for _, tensor in ipairs({...}) do
 54 | 		tensor:resize(1, unpack(tensor:size():totable()))
 55 | 	end
 56 | end
 57 | 
 58 | function ExampleLoader:makeBatchTable(batchSize, isTrainingPhase)
 59 | 	local o = self:getPhaseOpts(isTrainingPhase)
 60 | 	local num_jittered_copies = isTrainingPhase and 2 or (1 + (o.hflips and 2 or 1) * o.numScales)
 61 | 
 62 | 	return table2d(batchSize, num_jittered_copies, function() return {torch.FloatTensor(), torch.FloatTensor(), torch.FloatTensor()} end)
 63 | end
 64 | 
 65 | function ExampleLoader:loadExample(exampleIdx, isTrainingPhase)
 66 | 	local o = self:getPhaseOpts(isTrainingPhase)
 67 | 	
 68 | 	local labels_loaded = self.dataset[o.subset]:getLabels(exampleIdx)
 69 | 	local rois_loaded = self.dataset[o.subset]:getProposals(exampleIdx)
 70 | 	local jpeg_loaded = self.dataset[o.subset]:getJpegBytes(exampleIdx)
 71 | 	local scales = o.scales or self.scales
 72 | 	local normalization_params = self.normalization_params
 73 | 
 74 | 	local scale_inds = isTrainingPhase and {0, torch.random(1, o.numScales)} or torch.range(0, o.numScales):totable()
 75 | 	local hflips = isTrainingPhase and (o.hflips and torch.random(0, 1) or 0) or (o.hflips and 2 or 0) -- 0 is no_flip, 1 is do_flip, 2 is both
 76 | 	local rois_perm = isTrainingPhase and torch.randperm(rois_loaded:size(1)) or torch.range(1, rois_loaded:size(1))
 77 | 
 78 | 	return function(indexInBatch, batchTable)
 79 | 		image = image or require 'image'
 80 | 		local img_original = image.decompressJPG(jpeg_loaded, 3, normalization_params.scale == 255 and 'byte' or 'float')
 81 | 		local height_original, width_original = img_original:size(2), img_original:size(3)
 82 | 
 83 | 		local rois_scale0 = rois_loaded:index(1, rois_perm:sub(1, math.min(rois_loaded:size(1), o.numRoisPerImage)):long())
 84 | 		rois_scale0[1]:copy(torch.FloatTensor{0, 0, width_original - 1, height_original - 1, 0.0}:sub(1, rois_scale0:size(2)))
 85 | 
 86 | 		for j, scale_ind in ipairs(scale_inds) do
 87 | 			local images, rois, labels = unpack(batchTable[indexInBatch][j])
 88 | 
 89 | 			local img_scaled = scale_ind == 0 and img_original:clone() or rescale(img_original, scales[scale_ind][1], scales[scale_ind][2])
 90 | 			local width_scaled, height_scaled = img_scaled:size(3), img_scaled:size(2)
 91 | 
 92 | 			subtract_mean(images:resize(img_scaled:size()), img_scaled, normalization_params)
 93 | 			rois:cmul(rois_scale0, torch.FloatTensor{{width_scaled / width_original, height_scaled / height_original, width_scaled / width_original, height_scaled / height_original, 1.0}}:narrow(2, 1, rois_scale0:size(2)):contiguous():expandAs(rois_scale0))
 94 | 			labels:resize(labels_loaded:size()):copy(labels_loaded)
 95 | 
 96 | 			if hflips == 1 then
 97 | 				flip(images, rois)
 98 | 			elseif scale_ind ~= 0 and hflips == 2 then
 99 | 				local jj = #batchTable[indexInBatch] - j + 2
100 | 				local images_flipped, rois_flipped, labels_flipped = unpack(batchTable[indexInBatch][jj])
101 | 				images_flipped:resizeAs(images):copy(images)
102 | 				rois_flipped:resizeAs(rois):copy(rois)
103 | 				labels_flipped:resizeAs(labels):copy(labels)
104 | 				flip(images_flipped, rois_flipped)
105 | 				insert_dummy_dim1(images_flipped, rois_flipped, labels_flipped)
106 | 			end
107 | 
108 | 			insert_dummy_dim1(images, rois, labels)
109 | 		end
110 | 
111 | 		collectgarbage()
112 | 	end
113 | end
114 | 
115 | function ExampleLoader:getNumExamples(isTrainingPhase)
116 | 	return self.dataset[self:getSubset(isTrainingPhase)]:getNumExamples()
117 | end
118 | 
119 | function ExampleLoader:getPhaseOpts(isTrainingPhase)
120 | 	return isTrainingPhase and self.example_loader_opts['training'] or self.example_loader_opts['evaluate']
121 | end
122 | 
123 | function ExampleLoader:getSubset(isTrainingPhase)
124 | 	return self:getPhaseOpts(isTrainingPhase).subset
125 | end
126 | 


--------------------------------------------------------------------------------
/fbnn_Optim.lua:
--------------------------------------------------------------------------------
  1 | -- This file is copied from Facebook FAIR's fbnn project: https://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua
  2 | -- Copyright 2004-present Facebook. All Rights Reserved.
  3 | 
  4 | local pl = require('pl.import_into')()
  5 | 
  6 | -- from fblualib/fb/util/data.lua , copied here because fblualib is not rockspec ready yet.
  7 | -- deepcopy routine that assumes the presence of a 'clone' method in user
  8 | -- data should be used to deeply copy. This matches the behavior of Torch
  9 | -- tensors.
 10 | local function deepcopy(x)
 11 |     local typename = type(x)
 12 |     if typename == "userdata" then
 13 |         return x:clone()
 14 |     end
 15 |     if typename == "table" then
 16 |         local retval = { }
 17 |         for k,v in pairs(x) do
 18 |             retval[deepcopy(k)] = deepcopy(v)
 19 |         end
 20 |         return retval
 21 |     end
 22 |     return x
 23 | end
 24 | 
 25 | local Optim, parent = torch.class('nn.Optim')
 26 | 
 27 | 
 28 | -- Returns weight parameters and bias parameters and associated grad parameters
 29 | -- for this module. Annotates the return values with flag marking parameter set
 30 | -- as bias parameters set
 31 | function Optim.weight_bias_parameters(module)
 32 |     local weight_params, bias_params
 33 |     if module.weight then
 34 |         weight_params = {module.weight, module.gradWeight}
 35 |         weight_params.is_bias = false
 36 |     end
 37 |     if module.bias then
 38 |         bias_params = {module.bias, module.gradBias}
 39 |         bias_params.is_bias = true
 40 |     end
 41 |     return {weight_params, bias_params}
 42 | end
 43 | 
 44 | -- The regular `optim` package relies on `getParameters`, which is a
 45 | -- beastly abomination before all. This `optim` package uses separate
 46 | -- optim state for each submodule of a `nn.Module`.
 47 | function Optim:__init(model, optState, checkpoint_data)
 48 |     assert(model)
 49 |     assert(checkpoint_data or optState)
 50 |     assert(not (checkpoint_data and optState))
 51 | 
 52 |     self.model = model
 53 |     self.modulesToOptState = {}
 54 |     -- Keep this around so we update it in setParameters
 55 |     self.originalOptState = optState
 56 | 
 57 |     -- Each module has some set of parameters and grad parameters. Since
 58 |     -- they may be allocated discontinuously, we need separate optState for
 59 |     -- each parameter tensor. self.modulesToOptState maps each module to
 60 |     -- a lua table of optState clones.
 61 |     if not checkpoint_data then
 62 |         self.model:for_each(function(module)
 63 |             self.modulesToOptState[module] = { }
 64 |             local params = self.weight_bias_parameters(module)
 65 |             -- expects either an empty table or 2 element table, one for weights
 66 |             -- and one for biases
 67 |             assert(pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2)
 68 |             for i, _ in ipairs(params) do
 69 |                 self.modulesToOptState[module][i] = deepcopy(optState)
 70 |                 if params[i] and params[i].is_bias then
 71 |                     -- never regularize biases
 72 |                     self.modulesToOptState[module][i].weightDecay = 0.0
 73 |                 end
 74 |             end
 75 |             assert(module)
 76 |             assert(self.modulesToOptState[module])
 77 |         end)
 78 |     else
 79 |         local state = checkpoint_data.optim_state
 80 |         local modules = {}
 81 |         self.model:for_each(function(m) table.insert(modules, m) end)
 82 |         assert(pl.tablex.compare_no_order(modules, pl.tablex.keys(state)))
 83 |         self.modulesToOptState = state
 84 |     end
 85 | end
 86 | 
 87 | function Optim:save()
 88 |     return {
 89 |         optim_state = self.modulesToOptState
 90 |     }
 91 | end
 92 | 
 93 | local function _type_all(obj, t)
 94 |     for k, v in pairs(obj) do
 95 |         if type(v) == 'table' then
 96 |             _type_all(v, t)
 97 |         else
 98 |             local tn = torch.typename(v)
 99 |             if tn and tn:find('torch%..+Tensor') then
100 |                 obj[k] = v:type(t)
101 |             end
102 |         end
103 |     end
104 | end
105 | 
106 | function Optim:type(t)
107 |     self.model:for_each(function(module)
108 |         local state= self.modulesToOptState[module]
109 |         assert(state)
110 |         _type_all(state, t)
111 |     end)
112 | end
113 | 
114 | local function get_device_for_module(mod)
115 |    local dev_id = nil
116 |    for name, val in pairs(mod) do
117 |        if torch.typename(val) == 'torch.CudaTensor' then
118 |            local this_dev = val:getDevice()
119 |            if this_dev ~= 0 then
120 |                -- _make sure the tensors are allocated consistently
121 |                assert(dev_id == nil or dev_id == this_dev)
122 |                dev_id = this_dev
123 |            end
124 |        end
125 |    end
126 |    return dev_id -- _may still be zero if none are allocated.
127 | end
128 | 
129 | local function on_device_for_module(mod, f)
130 |     local this_dev = get_device_for_module(mod)
131 |     if this_dev ~= nil then
132 |         return cutorch.withDevice(this_dev, f)
133 |     end
134 |     return f()
135 | end
136 | 
137 | function Optim:optimize(optimMethod, inputs, targets, criterion, scale)
138 |     assert(optimMethod)
139 |     assert(inputs)
140 |     assert(targets)
141 |     assert(criterion)
142 |     assert(self.modulesToOptState)
143 | 
144 |     self.model:zeroGradParameters()
145 |     local output = self.model:forward(inputs)
146 | 
147 |     local err = criterion:forward(output, targets)
148 | 
149 | 	if err ~= 0 then
150 | 
151 |     local df_do = criterion:backward(output, targets)
152 |     self.model:backward(inputs, df_do, scale)
153 | 
154 |     -- We'll set these in the loop that iterates over each module. Get them
155 |     -- out here to be captured.
156 |     local curGrad
157 |     local curParam
158 |     local function fEvalMod(x)
159 |         return err, curGrad
160 |     end
161 | 
162 |     for curMod, opt in pairs(self.modulesToOptState) do
163 |         on_device_for_module(curMod, function()
164 |             local curModParams = self.weight_bias_parameters(curMod)
165 |             -- expects either an empty table or 2 element table, one for weights
166 |             -- and one for biases
167 |             assert(pl.tablex.size(curModParams) == 0 or
168 |                    pl.tablex.size(curModParams) == 2)
169 |             if curModParams then
170 |                 for i, tensor in ipairs(curModParams) do
171 |                     if curModParams[i] then
172 |                         -- expect param, gradParam pair
173 |                         curParam, curGrad = table.unpack(curModParams[i])
174 |                         assert(curParam and curGrad)
175 |                         optimMethod(fEvalMod, curParam, opt[i])
176 |                     end
177 |                 end
178 |             end
179 |         end)
180 |     end
181 | 
182 | 	end
183 | 
184 |     return err, output
185 | end
186 | 
187 | function Optim:setParameters(newParams)
188 |     assert(newParams)
189 |     assert(type(newParams) == 'table')
190 |     local function splice(dest, src)
191 |         for k,v in pairs(src) do
192 |             dest[k] = v
193 |         end
194 |     end
195 | 
196 |     splice(self.originalOptState, newParams)
197 |     for _,optStates in pairs(self.modulesToOptState) do
198 |         for i,optState in pairs(optStates) do
199 |             assert(type(optState) == 'table')
200 |             splice(optState, newParams)
201 |         end
202 |     end
203 | end
204 | 


--------------------------------------------------------------------------------
/model/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # cloned originally from https://github.com/vadimkantorov/fast-rcnn.torch/commit/3309057f05d0e36059b5e1213a180b3d616f4308
 2 | 
 3 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR)
 4 | CMAKE_POLICY(VERSION 2.8)
 5 | 
 6 | SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
 7 | 
 8 | FIND_PACKAGE(Torch REQUIRED)
 9 | 
10 | # Flags
11 | # When using MSVC
12 | IF(MSVC)
13 |   # we want to respect the standard, and we are bored of those **** .
14 |   ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)
15 | ENDIF(MSVC)
16 | 
17 | # OpenMP support?
18 | SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?")
19 | IF (APPLE AND CMAKE_COMPILER_IS_GNUCC)
20 |   EXEC_PROGRAM (uname ARGS -v  OUTPUT_VARIABLE DARWIN_VERSION)
21 |   STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
22 |   MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}")
23 |   IF (DARWIN_VERSION GREATER 9)
24 |     SET(APPLE_OPENMP_SUCKS 1)
25 |   ENDIF (DARWIN_VERSION GREATER 9)
26 |   EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion
27 |     OUTPUT_VARIABLE GCC_VERSION)
28 |   IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)
29 |     MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)")
30 |     MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP")
31 |     SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas")
32 |     SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE)
33 |   ENDIF ()
34 | ENDIF ()
35 | 
36 | IF (WITH_OPENMP)
37 |   FIND_PACKAGE(OpenMP)
38 |   IF(OPENMP_FOUND)
39 |     MESSAGE(STATUS "Compiling with OpenMP support")
40 |     SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
41 |     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
42 |     SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
43 |   ENDIF(OPENMP_FOUND)
44 | ENDIF (WITH_OPENMP)
45 | 
46 | LINK_DIRECTORIES("${Torch_INSTALL_LIB}")
47 | 
48 | FIND_PACKAGE(CUDA 4.0 REQUIRED)
49 | 
50 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_35")
51 | 
52 | INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}/THC")
53 | SET(src-cuda rectangularringroipooling.cu)
54 | CUDA_ADD_LIBRARY(cucontextlocnet MODULE ${src-cuda})
55 | TARGET_LINK_LIBRARIES(cucontextlocnet luaT THC TH)
56 | IF(APPLE)
57 |   SET_TARGET_PROPERTIES(cucontextlocnet PROPERTIES
58 |     LINK_FLAGS "-undefined dynamic_lookup")
59 | ENDIF()
60 | 
61 | 
62 | ### Torch packages supposes libraries prefix is "lib"
63 | SET_TARGET_PROPERTIES(cucontextlocnet PROPERTIES
64 |   PREFIX "lib"
65 |   IMPORT_PREFIX "lib")
66 | 
67 | INSTALL(TARGETS cucontextlocnet
68 |   RUNTIME DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}"
69 |   LIBRARY DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}")
70 | 


--------------------------------------------------------------------------------
/model/HingeCriterion.lua:
--------------------------------------------------------------------------------
 1 | local HingeCriterion, parent = torch.class('HingeCriterion', 'nn.Criterion')
 2 | 
 3 | function HingeCriterion:__init(margin)
 4 | 	parent.__init(self)
 5 | 	self.sizeAverage=true
 6 | 	
 7 | 	self.sequence=nn.Sequential()
 8 | 	self.sequence:add(nn.CMulTable())
 9 | 	self.sequence:add(nn.MulConstant(-1,true))
10 | 	self.sequence:add(nn.AddConstant(margin or 1, true))
11 | 	self.sequence:add(nn.ReLU(true))
12 | 	
13 | 	self.gradient=torch.Tensor()
14 | end
15 | 
16 | function HingeCriterion:setFactor(factor)
17 | 	self.factor = factor
18 | 	return self
19 | end
20 | 
21 | function HingeCriterion:updateOutput(input, target)
22 | 	self.sequence:forward({input,target})
23 | 	self.output=self.sequence.output:sum()
24 | 	local p = (self.sizeAverage and 1/input:size(1) or 1) * (self.factor or 1)
25 | 	self.output = self.output * p
26 | 	return self.output
27 | end
28 | 
29 | 
30 | function HingeCriterion:updateGradInput(input, target)
31 | 	local p = (self.sizeAverage and 1/input:size(1) or 1) * (self.factor or 1)
32 | 
33 | 	self.gradient:resize(self.sequence.output:size()):fill(p)
34 | 	self.sequence:backward({input,target}, self.gradient)
35 | 	self.gradInput=self.sequence.gradInput[1]
36 | 	return self.gradInput
37 | end
38 | 
39 | function HingeCriterion:type(type)
40 | 	parent.type(self, type)
41 | 	self.sequence:type(type)
42 | 	self.gradient:type(type)
43 | 	return self
44 | end
45 | 


--------------------------------------------------------------------------------
/model/VGGF.lua:
--------------------------------------------------------------------------------
 1 | return function(modelPath)
 2 | 	local vggf = torch.load(modelPath)
 3 | 
 4 | 	local conv_layers = nn.Sequential()
 5 | 	for i = 1, 14 do
 6 | 		conv_layers:add(vggf:get(i))
 7 | 	end
 8 | 
 9 | 	local fc_layers = nn.Sequential()
10 | 	for i = 17, 22 do
11 | 		fc_layers:add(vggf:get(i))
12 | 	end
13 | 
14 | 	return {
15 | 		conv_layers = conv_layers, 
16 | 		fc_layers = fc_layers, 
17 | 		channel_order = 'bgr', 
18 | 		spatial_scale = 1 / 16, 
19 | 		fc_layers_output_size = 4096,
20 | 		pooled_height = 6, 
21 | 		pooled_width = 6, 
22 | 		spp_correction_params = {offset0 = -18, offset = 0.0},
23 | 		--spp_correction_params = {offset0 = -18.0, offset = 9.5},
24 | 		fc_layers_view = function(RoiReshaper) return nn.View(-1):setNumInputDims(3) end,
25 | 		normalization_params = {channel_order = 'bgr', rgb_mean = {122.7717, 115.9465, 102.9801}, rgb_std = nil, scale = 255}
26 | 	}
27 | end
28 | 


--------------------------------------------------------------------------------
/model/additive.lua:
--------------------------------------------------------------------------------
 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r')
 2 | 
 3 | model = nn.Sequential():
 4 | 	add(nn.ParallelTable():
 5 | 		add(base_model.conv_layers):
 6 | 		add(RoiReshaper:StoreShape())
 7 | 	):
 8 |     add(nn.ConcatTable():
 9 |         add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)):
10 |         add(branch_transform_rois_share_fc_layers(base_model, ContextRegion))
11 |     ):
12 |     add(nn.ConcatTable():
13 |         add(nn.Sequential():
14 | 			add(nn.SelectTable(1)):
15 | 			add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')):
16 | 			add(RoiReshaper:RestoreShape()):
17 | 			named('output_fc8c')
18 |         ):
19 |         add(nn.Sequential():
20 |             add(nn.ParallelTable():
21 | 				add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('output_fc8d_orig')):
22 | 				add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('output_fc8d_context'))
23 |             ):
24 | 			add(nn.CAddTable()):
25 | 			add(RoiReshaper:RestoreShape(4)):
26 | 			add(cudnn.SpatialSoftMax()):
27 | 			add(nn.Squeeze(4)):
28 | 			named('output_softmax')
29 |         )
30 |     ):
31 |     add(nn.CMulTable():named('output_prod')):
32 |     add(nn.Sum(2))
33 | 
34 | criterion = HingeCriterion():setFactor(1 / numClasses)
35 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4}
36 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10}
37 | 


--------------------------------------------------------------------------------
/model/contextlocnet-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "contextlocnet"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |    url = "git://github.com/vadimkantorov/contextlocnet",
 6 |    tag = "master"
 7 | }
 8 | 
 9 | dependencies = {
10 |    "torch >= 7.0",
11 |    "nn",
12 |    "cunn",
13 | }
14 | 
15 | build = {
16 |    type = "command",
17 |    build_command = [[
18 | cmake -E make_directory build;
19 | cd build;
20 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 
21 | $(MAKE)
22 |    ]],
23 |    install_command = "cd build && $(MAKE) install"
24 | }
25 | 


--------------------------------------------------------------------------------
/model/contrastive_a.lua:
--------------------------------------------------------------------------------
 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r')
 2 | 
 3 | model = nn.Sequential():
 4 | 	add(nn.ParallelTable():
 5 | 		add(base_model.conv_layers):
 6 | 		add(RoiReshaper:StoreShape())
 7 | 	):
 8 |     add(nn.ConcatTable():
 9 |         add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)):
10 |         add(branch_transform_rois_share_fc_layers(base_model, ContextRegion))
11 |     ):
12 |     add(nn.ConcatTable():
13 |         add(nn.Sequential():
14 | 			add(nn.SelectTable(1)):
15 | 			add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')):
16 | 			add(RoiReshaper:RestoreShape()):
17 | 			named('output_fc8c')
18 |         ):
19 |         add(nn.Sequential():
20 |             add(nn.ParallelTable():
21 | 				add(share_weight_bias(fc8r):named('output_fc8d_orig')):
22 | 				add(nn.Sequential():
23 | 					add(share_weight_bias(fc8r)):
24 | 					add(nn.MulConstant(-1)):
25 | 					named('output_fc8d_context')
26 | 				)
27 |             ):
28 | 			add(nn.CAddTable()):
29 | 			add(RoiReshaper:RestoreShape(4)):
30 | 			add(cudnn.SpatialSoftMax()):
31 | 			add(nn.Squeeze(4)):
32 | 			named('output_softmax')
33 |         )
34 |     ):
35 |     add(nn.CMulTable():named('output_prod')):
36 |     add(nn.Sum(2))
37 | 
38 | criterion = HingeCriterion():setFactor(1 / numClasses)
39 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4}
40 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10}
41 | 


--------------------------------------------------------------------------------
/model/contrastive_s.lua:
--------------------------------------------------------------------------------
 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r')
 2 | 
 3 | model = nn.Sequential():
 4 | 	add(nn.ParallelTable():
 5 | 		add(base_model.conv_layers):
 6 | 		add(RoiReshaper:StoreShape())
 7 | 	):
 8 | 	add(nn.ConcatTable():
 9 | 		add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)):
10 | 		add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal_ring)):
11 | 		add(branch_transform_rois_share_fc_layers(base_model, ContextRegion))
12 | 	):
13 | 	add(nn.ConcatTable():
14 | 		add(nn.Sequential():
15 | 			add(nn.SelectTable(1)):
16 | 			add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')):
17 | 			add(RoiReshaper:RestoreShape()):
18 | 			named('output_fc8c')
19 | 		):
20 | 		add(nn.Sequential():
21 | 			add(nn.ConcatTable():
22 | 				add(nn.Sequential():
23 | 					add(nn.SelectTable(2)):
24 | 					add(share_weight_bias(fc8r)):
25 | 					named('output_fc8d_origring')
26 | 				):
27 | 				add(nn.Sequential():
28 | 					add(nn.SelectTable(3)):
29 | 					add(share_weight_bias(fc8r)):
30 | 					add(nn.MulConstant(-1)):
31 | 					named('output_fc8d_context')
32 | 				)
33 | 			):
34 | 			add(nn.CAddTable()):
35 | 			add(RoiReshaper:RestoreShape(4)):
36 | 			add(cudnn.SpatialSoftMax()):
37 | 			add(nn.Squeeze(4)):
38 | 			named('output_softmax')
39 | 		)
40 | 	):
41 | 	add(nn.CMulTable():named('output_prod')):
42 | 	add(nn.Sum(2))
43 | 
44 | --classification_criterion = nn.BCECriterion(nil, false)
45 | --classification_criterion.updateOutput = function(self, input, target) return nn.BCECriterion.updateOutput(self, input, target * 0.5 + 0.5) end
46 | --classification_criterion.updateGradInput = function(self, input, target) return nn.BCECriterion.updateGradInput(self, input, target * 0.5 + 0.5) end
47 | --criterion = classification_criterion
48 | 
49 | criterion = HingeCriterion():setFactor(1 / numClasses)
50 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4}
51 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10}
52 | 


--------------------------------------------------------------------------------
/model/rectangularringroipooling.cu:
--------------------------------------------------------------------------------
  1 | //copied from https://github.com/gidariss/caffe_LocNet/blob/d2ba49552068958556b98ba382610ea865add17c/src/caffe/layers/region_pooling_layer.cu
  2 | 
  3 | #include "luaT.h"
  4 | #include "THC.h"
  5 | 
  6 | #include <lua.h>
  7 | #include "THCGeneral.h"
  8 | 
  9 | #define CAFFE_CUDA_NUM_THREADS 1024
 10 | 
 11 | // CUDA: various checks for different function calls.
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 | 	if(error != cudaSuccess) { printf("CUDA ERROR. %s\n", cudaGetErrorString(error)); }; \
 17 |   } while (0)
 18 | 
 19 | // CUDA: number of blocks for threads.
 20 | inline int CAFFE_GET_BLOCKS(const int N) {
 21 |   return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
 22 | }
 23 | 
 24 | // CUDA: grid stride looping
 25 | #define CUDA_KERNEL_LOOP(i, n) \
 26 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 27 |        i < (n); \
 28 |        i += blockDim.x * gridDim.x)
 29 | 
 30 | // CUDA: check for error after kernel execution and exit loudly if there is one.
 31 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
 32 | 
 33 | template <typename Dtype>
 34 | __global__ void ROIPoolForward(const int nthreads, const Dtype* bottom_data,
 35 |     const Dtype spatial_scale, const int channels, const int height,
 36 |     const int width, const int pooled_height, const int pooled_width,
 37 |     const Dtype* bottom_rois, Dtype* top_data, int* argmax_data) {
 38 | 	CUDA_KERNEL_LOOP(index, nthreads) {
 39 | 		// (n, c, ph, pw) is an element in the pooled output
 40 | 		int pw = index % pooled_width;
 41 | 		int ph = (index / pooled_width) % pooled_height;
 42 | 		int c = (index / pooled_width / pooled_height) % channels;
 43 | 		int n = index / pooled_width / pooled_height / channels;
 44 | 
 45 | 		// For each ROI R = [batch_index, x_outer_1, y_outer_1, x_outer_2, y_outer_2, x_inner_1, y_inner_1, x_inner_2, y_inner_2]: 
 46 | 		// where R_outer = [x_outer_1, y_outer_1, x_outer_2, y_outer_2] is the outer rectangle of the region and 
 47 | 		// R_inner = [x_inner_1, y_inner_1, x_inner_2, y_inner_2] is the inner rectangle of the region
 48 | 		// max pooler over R by ignoring (setting to zero) the activations that lay inside the inner rectangle R_inner
 49 | 
 50 | 		bottom_rois += n * 9;
 51 | 		int roi_batch_ind = bottom_rois[0];
 52 | 
 53 | 
 54 | 		// outer rectangle of the region
 55 | 		int roi_start_w   = int(bottom_rois[1] );//* spatial_scale);
 56 | 		int roi_start_h   = int(bottom_rois[2] );//* spatial_scale);
 57 | 		int roi_end_w     = int(bottom_rois[3] );//* spatial_scale);
 58 | 		int roi_end_h     = int(bottom_rois[4] );//* spatial_scale);
 59 | 
 60 | 		// inner rectangle of the region
 61 | 		int roi_start_w_in = int(bottom_rois[5]);//* spatial_scale);
 62 | 		int roi_start_h_in = int(bottom_rois[6]);//* spatial_scale);
 63 | 		int roi_end_w_in   = int(bottom_rois[7]);//* spatial_scale);
 64 | 		int roi_end_h_in   = int(bottom_rois[8]);//* spatial_scale);
 65 | 
 66 | 		// Force malformed ROIs to be 1x1
 67 | 		int roi_width  = max(roi_end_w - roi_start_w + 1, 1);
 68 | 		int roi_height = max(roi_end_h - roi_start_h + 1, 1);
 69 | 		Dtype bin_size_h = static_cast<Dtype>(roi_height) / static_cast<Dtype>(pooled_height);
 70 | 		Dtype bin_size_w = static_cast<Dtype>(roi_width)  / static_cast<Dtype>(pooled_width);
 71 | 
 72 | 		const int hstart = min(height, max(0, static_cast<int>(floor(static_cast<Dtype>(ph)   * bin_size_h)) + roi_start_h));
 73 | 		const int hend   = min(height, max(0, static_cast<int>(ceil( static_cast<Dtype>(ph+1) * bin_size_h)) + roi_start_h));
 74 | 		const int wstart = min(width,  max(0, static_cast<int>(floor(static_cast<Dtype>(pw)   * bin_size_w)) + roi_start_w));
 75 | 		const int wend   = min(width,  max(0, static_cast<int>(ceil( static_cast<Dtype>(pw+1) * bin_size_w)) + roi_start_w));
 76 | 
 77 | 		Dtype maxval = 0; 
 78 | 
 79 | 		int maxidx = -1;
 80 | 		bottom_data += (roi_batch_ind * channels + c) * height * width;
 81 | 		for (int h = hstart; h < hend; ++h) {
 82 | 			for (int w = wstart; w < wend; ++w) {
 83 | 				if (!(w > roi_start_w_in && w < roi_end_w_in && h > roi_start_h_in && h < roi_end_h_in)) {
 84 | 					// if it is not inside the inner rectangle of the region
 85 | 					int bottom_index = h * width + w;
 86 | 					if (bottom_data[bottom_index] > maxval) {
 87 | 						maxval = bottom_data[bottom_index];
 88 | 						maxidx = bottom_index;
 89 | 					}
 90 | 				}
 91 | 			}
 92 | 		}
 93 | 		top_data[index] = maxval;
 94 | 		argmax_data[index] = maxidx;
 95 | 	}
 96 | }
 97 | 
 98 | template <typename Dtype>
 99 | __global__ void ROIPoolBackward(const int nthreads, const Dtype* top_diff,
100 |     const int* argmax_data, const int num_rois, const Dtype spatial_scale,
101 |     const int channels, const int height, const int width,
102 |     const int pooled_height, const int pooled_width, Dtype* bottom_diff,
103 |     const Dtype* bottom_rois) {
104 | 	CUDA_KERNEL_LOOP(index, nthreads) {
105 | 		// (n, c, h, w) coords in bottom data
106 | 		int w = index % width;
107 | 		int h = (index / width) % height;
108 | 		int c = (index / width / height) % channels;
109 | 		int n = index / width / height / channels;
110 | 
111 | 		Dtype gradient = 0;
112 | 		// Accumulate gradient over all ROIs that pooled this element
113 | 		for (int roi_n = 0; roi_n < num_rois; ++roi_n) {
114 | 			const Dtype* offset_bottom_rois = bottom_rois + roi_n * 9;
115 | 			int roi_batch_ind = offset_bottom_rois[0];
116 | 			// Skip if ROI's batch index doesn't match n
117 | 			if (n != roi_batch_ind) {
118 | 				continue;
119 | 			}
120 | 
121 | 
122 | 			// outer rectangle of the region
123 | 			int roi_start_w   = int(offset_bottom_rois[1]);// * spatial_scale);
124 | 			int roi_start_h   = int(offset_bottom_rois[2]);// * spatial_scale);
125 | 			int roi_end_w     = int(offset_bottom_rois[3]);// * spatial_scale);
126 | 			int roi_end_h     = int(offset_bottom_rois[4]);// * spatial_scale);
127 | 
128 | 			// inner rectangle of the region
129 | 			int roi_start_w_in= int(offset_bottom_rois[5]);// * spatial_scale);
130 | 			int roi_start_h_in= int(offset_bottom_rois[6]);// * spatial_scale);
131 | 			int roi_end_w_in  = int(offset_bottom_rois[7]);// * spatial_scale);
132 | 			int roi_end_h_in  = int(offset_bottom_rois[8]);// * spatial_scale);
133 | 
134 | 
135 | 			// Skip if ROI doesn't include (h, w)
136 | 			const bool in_roi =  (w >= roi_start_w && w <= roi_end_w &&
137 | 					h >= roi_start_h && h <= roi_end_h) && 
138 | 				!(w > roi_start_w_in && w < roi_end_w_in && 
139 | 						h > roi_start_h_in && h < roi_end_h_in);
140 | 
141 | 			if (!in_roi) {
142 | 				continue;
143 | 			}
144 | 
145 | 			int top_offset = (roi_n * channels + c) * pooled_height * pooled_width;
146 | 			const Dtype* offset_top_diff = top_diff + top_offset;
147 | 			const int* offset_argmax_data = argmax_data + top_offset;
148 | 
149 | 			// Compute feasible set of pooled units that could have pooled
150 | 			// this bottom unit
151 | 
152 | 			// Force malformed ROIs to be 1x1
153 | 			int roi_width = max(roi_end_w - roi_start_w + 1, 1);
154 | 			int roi_height = max(roi_end_h - roi_start_h + 1, 1);
155 | 
156 | 			Dtype bin_size_h = static_cast<Dtype>(roi_height) / static_cast<Dtype>(pooled_height);
157 | 			Dtype bin_size_w = static_cast<Dtype>(roi_width)  / static_cast<Dtype>(pooled_width);
158 | 
159 | 			int phstart = floor(static_cast<Dtype>(h - roi_start_h) / bin_size_h);
160 | 			int phend = ceil(static_cast<Dtype>(h - roi_start_h + 1) / bin_size_h);
161 | 			int pwstart = floor(static_cast<Dtype>(w - roi_start_w) / bin_size_w);
162 | 			int pwend = ceil(static_cast<Dtype>(w - roi_start_w + 1) / bin_size_w);
163 | 
164 | 			phstart = min(max(phstart, 0), pooled_height);
165 | 			phend = min(max(phend, 0), pooled_height);
166 | 			pwstart = min(max(pwstart, 0), pooled_width);
167 | 			pwend = min(max(pwend, 0), pooled_width);
168 | 
169 | 			for (int ph = phstart; ph < phend; ++ph) {
170 | 				for (int pw = pwstart; pw < pwend; ++pw) {
171 | 					if (offset_argmax_data[ph * pooled_width + pw] == (h * width + w)) {
172 | 						gradient += offset_top_diff[ph * pooled_width + pw];
173 | 					}
174 | 				}
175 | 			}
176 | 		}
177 | 		bottom_diff[index] = gradient;
178 | 	}
179 | }
180 | 
181 | THCState* getCutorchState(lua_State* L)
182 | {
183 |     lua_getglobal(L, "cutorch");
184 |     lua_getfield(L, -1, "getState");
185 |     lua_call(L, 0, 1);
186 |     THCState *state = (THCState*) lua_touserdata(L, -1);
187 |     lua_pop(L, 2);
188 |     return state;
189 | }
190 | 
191 | static int updateOutput(lua_State *L)
192 | {
193 | 	THCState *state = getCutorchState(L);
194 | 	THCudaTensor *input = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
195 | 	THCudaTensor *rois = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
196 | 	THCudaTensor *output = (THCudaTensor *)luaT_getfieldcheckudata(L, 1, "output", "torch.CudaTensor");
197 | 	THCudaIntTensor *argmax = (THCudaIntTensor *)luaT_getfieldcheckudata(L, 1, "argmax", "torch.CudaIntTensor");
198 | 
199 | 	int pooled_height_ = luaT_getfieldcheckint(L, 1, "pooled_height");
200 | 	int pooled_width_ = luaT_getfieldcheckint(L, 1, "pooled_width");
201 | 	THCudaTensor_resize5d(state, output, THCudaTensor_size(state, rois, 0), THCudaTensor_size(state, rois, 1), THCudaTensor_size(state, input, 1), pooled_height_, pooled_width_);
202 | 	THCudaIntTensor_resize5d(state, argmax, THCudaTensor_size(state, rois, 0), THCudaTensor_size(state, rois, 1), THCudaTensor_size(state, input, 1), pooled_height_, pooled_width_);
203 | 
204 | 	const float* bottom_data = THCudaTensor_data(state, input);
205 | 	const float* bottom_rois = THCudaTensor_data(state, rois);
206 | 	float* top_data = THCudaTensor_data(state, output);
207 | 	int* argmax_data = THCudaIntTensor_data(state, argmax); // int -> float
208 | 	
209 | 	// TODO: BATCH
210 | 	// BDHW 1DHW
211 | 	int count = THCudaTensor_nElement(state, output); // top[0]->count();
212 | 	int channels_ = THCudaTensor_size(state, input, 1);
213 | 	int height_ = THCudaTensor_size(state, input, 2);
214 | 	int width_ = THCudaTensor_size(state, input, 3);
215 | 	float spatial_scale_ = luaT_getfieldchecknumber(L, 1, "spatial_scale");
216 | 
217 | 	CUDA_POST_KERNEL_CHECK;
218 | 	
219 | 	// NOLINT_NEXT_LINE(whitespace/operators)
220 | 	ROIPoolForward<float><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
221 | 			  count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_,
222 | 			  pooled_width_, bottom_rois, top_data, argmax_data);
223 | 	CUDA_POST_KERNEL_CHECK;
224 | 
225 | 	return 1;
226 | }
227 | 
228 | static int updateGradInput(lua_State *L)
229 | {
230 | 	THCState *state = getCutorchState(L);
231 | 	THCudaTensor *input = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
232 | 	THCudaTensor *rois = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
233 | 	THCudaIntTensor *argmax = (THCudaIntTensor *)luaT_getfieldcheckudata(L, 1, "argmax", "torch.CudaIntTensor");
234 | 
235 | 	THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
236 | 	THCudaTensor *gradInput = (THCudaTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.CudaTensor");
237 | 
238 | 	THCudaTensor_resizeAs(state, gradInput, input);
239 | 	THCudaTensor_zero(state, gradInput);
240 | 	
241 | 	const float* bottom_rois = THCudaTensor_data(state, rois);
242 | 	const float* top_diff = THCudaTensor_data(state, gradOutput);
243 | 	float* bottom_diff = THCudaTensor_data(state, gradInput);
244 | 	int* argmax_data = THCudaIntTensor_data(state, argmax);
245 | 
246 | 	const int count = THCudaTensor_nElement(state, gradInput);
247 |     int channels_ = THCudaTensor_size(state, input, 1);
248 | 	int height_ = THCudaTensor_size(state, input, 2);
249 | 	int width_ = THCudaTensor_size(state, input, 3);
250 | 	int pooled_height_ = luaT_getfieldcheckint(L, 1, "pooled_height");
251 | 	int pooled_width_ = luaT_getfieldcheckint(L, 1, "pooled_width");
252 | 	float spatial_scale_ = luaT_getfieldchecknumber(L, 1, "spatial_scale");
253 | 	int num_rois = THCudaTensor_size(state, rois, 0) * THCudaTensor_size(state, rois, 1); // bachSize x numRoisPerImage
254 | 
255 | 	// NOLINT_NEXT_LINE(whitespace/operators)
256 | 	CUDA_POST_KERNEL_CHECK;
257 | 	ROIPoolBackward<float><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
258 | 			count, top_diff, argmax_data, num_rois, spatial_scale_, channels_,
259 | 			height_, width_, pooled_height_, pooled_width_, bottom_diff, bottom_rois);
260 | 	CUDA_POST_KERNEL_CHECK;
261 | 
262 | 	return 1;
263 | }
264 | 
265 | static const struct luaL_Reg lua_registrations [] = {
266 |   {"updateOutput", updateOutput},
267 |   {"updateGradInput", updateGradInput},
268 |   {NULL, NULL}
269 | };
270 | 
271 | LUA_EXTERNC DLL_EXPORT int luaopen_libcucontextlocnet(lua_State *L)
272 | {
273 |   lua_newtable(L);
274 | 
275 |   luaT_pushmetatable(L, "torch.CudaTensor");
276 |   luaT_registeratname(L, lua_registrations, "contextlocnet");
277 |   lua_pop(L,1);
278 | 
279 |   return 1;
280 | }
281 | 


--------------------------------------------------------------------------------
/model/rectangularringroipooling.lua:
--------------------------------------------------------------------------------
 1 | require 'cunn'
 2 | require 'libcucontextlocnet'
 3 | 
 4 | local RectangularRingRoiPooling, parent = torch.class('RectangularRingRoiPooling', 'nn.Module')
 5 | 
 6 | function RectangularRingRoiPooling:__init(pooled_height, pooled_width, spatial_scale, scale_correction_params, roi_pre_transformer)
 7 | 	parent.__init(self)
 8 | 
 9 | 	assert(pooled_height > 0, 'pooled_h must be > 0')
10 | 	assert(pooled_width > 0, 'pooled_w must be > 0');
11 | 
12 | 	self.pooled_height = pooled_height
13 | 	self.pooled_width = pooled_width
14 | 	self.spatial_scale = spatial_scale or 1.0
15 | 
16 | 	self.scale_correction_params = scale_correction_params
17 | 	self.roi_pre_transformer = roi_pre_transformer
18 | end
19 | 
20 | function RectangularRingRoiPooling:preprocess_rois(raw_rois)
21 | 	for i = 1, raw_rois:size(1) do
22 | 		self.preprocessed_rois[i]:select(2, 1):fill(i - 1)
23 | 	end
24 | 	self.preprocessed_rois:narrow(self.preprocessed_rois:dim(), 2, 4):copy(raw_rois:narrow(raw_rois:dim(), 1, 4))
25 | 	local rois = self.preprocessed_rois:narrow(self.preprocessed_rois:dim(), 2, 8)
26 | 	
27 | 	if self.roi_pre_transformer then
28 | 		self.roi_pre_transformer(rois)
29 | 	end
30 | 	
31 | 	local offset0, offset, spatial_scale = self.scale_correction_params.offset0, self.scale_correction_params.offset, self.spatial_scale
32 | 	rois:select(rois:dim(), 1):add(offset0 + offset):mul(spatial_scale):add(0.5):floor()
33 | 	rois:select(rois:dim(), 2):add(offset0 + offset):mul(spatial_scale):add(0.5):floor()
34 | 	rois:select(rois:dim(), 3):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil()
35 | 	rois:select(rois:dim(), 4):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil()
36 | 
37 | 	rois:select(rois:dim(), 5):add(offset0 + offset):mul(spatial_scale):add(0.5):floor()
38 | 	rois:select(rois:dim(), 6):add(offset0 + offset):mul(spatial_scale):add(0.5):floor()
39 | 	rois:select(rois:dim(), 7):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil()
40 | 	rois:select(rois:dim(), 8):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil()
41 | end
42 | 
43 | function RectangularRingRoiPooling:updateOutput(input)
44 | 	self.preprocessed_rois = (self.preprocessed_rois or torch.CudaTensor()):resize(input[2]:size(1), input[2]:size(2), 1 + 8):zero()
45 | 	self:preprocess_rois(input[2])
46 | 
47 | 	self.argmax = self.argmax or torch.CudaIntTensor()
48 | 	input[1].contextlocnet.updateOutput(self, input[1], self.preprocessed_rois)
49 | 	return self.output
50 | end
51 | 
52 | function RectangularRingRoiPooling:updateGradInput(input, gradOutput)
53 | 	self.gradInput = type(self.gradInput) == 'table' and (self.gradInput[1]  or torch.CudaTensor()) or self.gradInput
54 | 
55 | 	input[1].contextlocnet.updateGradInput(self, input[1], self.preprocessed_rois, gradOutput)
56 | 	self.rois_zero_grad = (self.rois_zero_grad or input[2].new()):resizeAs(input[2]):zero()
57 | 	self.gradInput = {self.gradInput, self.rois_zero_grad}
58 | 	return self.gradInput
59 | end
60 | 


--------------------------------------------------------------------------------
/model/roi_transforms.lua:
--------------------------------------------------------------------------------
 1 | function branch_transform_rois_share_fc_layers(base_model, transformer)
 2 | 	return nn.Sequential():
 3 | 		add(RectangularRingRoiPooling(base_model.pooled_height, base_model.pooled_width, base_model.spatial_scale, base_model.spp_correction_params, transformer)):
 4 | 		add(base_model.fc_layers_view(RoiReshaper)):
 5 | 		add(share_weight_bias(base_model.fc_layers))
 6 | end
 7 | 
 8 | function RectangularRing(rois, scale_inner, scale_outer)
 9 | 	local center_x = (rois:select(rois:dim(), 1) + rois:select(rois:dim(), 3)) / 2
10 | 	local center_y = (rois:select(rois:dim(), 2) + rois:select(rois:dim(), 4)) / 2
11 | 	local w_half = (rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1)) / 2
12 | 	local h_half = (rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2)) / 2
13 | 	
14 | 	rois:select(rois:dim(), 1):copy(center_x - w_half*scale_outer)
15 | 	rois:select(rois:dim(), 2):copy(center_y - h_half*scale_outer)
16 | 	rois:select(rois:dim(), 3):copy(center_x + w_half*scale_outer)
17 | 	rois:select(rois:dim(), 4):copy(center_y + h_half*scale_outer)
18 | 	rois:select(rois:dim(), 5):copy(center_x - w_half*scale_inner)
19 | 	rois:select(rois:dim(), 6):copy(center_y - h_half*scale_inner)
20 | 	rois:select(rois:dim(), 7):copy(center_x + w_half*scale_inner)
21 | 	rois:select(rois:dim(), 8):copy(center_y + h_half*scale_inner)
22 | end
23 | 
24 | function MakeRectangularRingTransform(scale_inner, scale_outer)
25 | 	return function(rois) RectangularRing(rois, scale_inner, scale_outer) end
26 | end
27 | 
28 | function BoxOriginal(rois)
29 | end
30 | 
31 | CentralRegion1 = MakeRectangularRingTransform(0.0, 0.5)
32 | CentralRegion2 = MakeRectangularRingTransform(0.3, 0.8)
33 | BorderRegion1 = MakeRectangularRingTransform(0.5, 1.0)
34 | BorderRegion2 = MakeRectangularRingTransform(0.8, 1.5)
35 | ContextRegion = MakeRectangularRingTransform(1.0, opts.ROI_FACTOR)
36 | BoxOriginal_ring = MakeRectangularRingTransform(1.0 / opts.ROI_FACTOR, 1.0)
37 | ContextRegion_overlap = MakeRectangularRingTransform(0.8, 0.8 * opts.ROI_FACTOR)
38 | ContextRegion_outer = MakeRectangularRingTransform(1.2, 1.2 * opts.ROI_FACTOR)
39 | ContextRegion_big = MakeRectangularRingTransform(1.5, 2.0)
40 | CentralRegion_big = MakeRectangularRingTransform(0.0, 2.0)
41 | BoxScaleUp = MakeRectangularRingTransform(0.0, opts.ROI_FACTOR)
42 | 
43 | function BoxHalfLeft(rois)
44 | 	rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 1)):div(2)
45 | end
46 | 
47 | function BoxHalfRight(rois)
48 | 	rois:select(rois:dim(), 1):add(rois:select(rois:dim(), 3)):div(2)
49 | end
50 | 
51 | function BoxHalfUp(rois)
52 | 	rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 2)):div(2)
53 | end
54 | 
55 | function BoxHalfBottom(rois)
56 | 	rois:select(rois:dim(), 2):add(rois:select(rois:dim(), 4)):div(2)
57 | end
58 | 
59 | function DoubleUp(rois)
60 | 	rois:select(rois:dim(), 2):csub(rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2))
61 | end
62 | 
63 | function DoubleDown(rois)
64 | 	rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2))
65 | end
66 | 
67 | function DoubleLeft(rois)
68 | 	rois:select(rois:dim(), 1):csub(rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1))
69 | end
70 | 
71 | function DoubleRight(rois)
72 | 	rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1))
73 | end
74 | 
75 | function ShiftUp(rois)
76 | 	DoubleUp(rois)
77 | 	rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 2)):div(2)
78 | end
79 | 
80 | function ShiftDown(rois)
81 | 	DoubleDown(rois)
82 | 	rois:select(rois:dim(), 2):add(rois:select(rois:dim(), 4)):div(2)
83 | end
84 | 
85 | function ShiftLeft(rois)
86 | 	DoubleLeft(rois)
87 | 	rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 1)):div(2)
88 | end
89 | 
90 | function ShiftRight(rois)
91 | 	DoubleRight(rois)
92 | 	rois:select(rois:dim(), 1):add(rois:select(rois:dim(), 3)):div(2)
93 | end
94 | 


--------------------------------------------------------------------------------
/model/util.lua:
--------------------------------------------------------------------------------
  1 | require 'cudnn'
  2 | 
  3 | dofile('model/rectangularringroipooling.lua')
  4 | dofile('model/HingeCriterion.lua')
  5 | dofile('model/roi_transforms.lua')
  6 | 
  7 | local function module_typename(module)
  8 | 	return torch.typename(module):sub(4)
  9 | end
 10 | 
 11 | function model_load(path, opts)
 12 | 	local loaded = paths.extname(path) == 'lua' and {model_path = path} or hdf5_load(path)
 13 | 	local opts = opts or loaded.meta.opts
 14 | 	local model_definition = io.open(loaded.model_path or loaded.meta.model_path):read('*all')
 15 | 
 16 | 	base_model = dofile(paths.concat('model', opts.BASE_MODEL .. '.lua'))(opts.PATHS.BASE_MODEL_CACHED)
 17 | 	assert(loadstring(model_definition))()
 18 | 	
 19 | 	local function dfs(module, prefix)
 20 | 		if module.weight then
 21 | 			assert(loaded.parameters[prefix .. '_weight'] ~= nil)
 22 | 			module.weight:copy(loaded.parameters[prefix .. '_weight'])
 23 | 		end
 24 | 		if module.bias then
 25 | 			assert(loaded.parameters[prefix .. '_bias'] ~= nil)
 26 | 			module.bias:copy(loaded.parameters[prefix .. '_bias'])
 27 | 		end
 28 | 
 29 | 		for i, submodule in ipairs(module.modules or {}) do
 30 | 			dfs(submodule, (submodule.name and submodule.name[1]) or ((prefix or module_typename(module)) .. '_' .. module_typename(submodule) .. i))
 31 | 		end
 32 | 	end
 33 | 
 34 | 	if loaded.parameters then
 35 | 		dfs(model)
 36 | 	end
 37 | 	
 38 | 	return loaded
 39 | end
 40 | 
 41 | function model_save(path, model, meta, epoch, log)
 42 | 	local saved = {
 43 | 		meta = meta,
 44 | 		epoch = epoch,
 45 | 		log = log,
 46 | 		parameters = {}
 47 | 	}
 48 | 
 49 | 	local function dfs(module, prefix)
 50 | 		if module.weight then
 51 | 			local tensor_name = prefix .. '_weight'
 52 | 			assert(saved.parameters[tensor_name] == nil or saved.parameters[tensor_name]:isSetTo(module.weight), torch.typename(module) .. ', ' ..prefix)
 53 | 			saved.parameters[tensor_name] = module.weight
 54 | 		end
 55 | 
 56 | 		if module.bias then
 57 | 			local tensor_name = prefix .. '_bias'
 58 | 			assert(saved.parameters[tensor_name] == nil or saved.parameters[tensor_name]:isSetTo(module.bias), torch.typename(module) .. ', ' ..prefix)
 59 | 			saved.parameters[tensor_name] = module.bias
 60 | 		end
 61 | 
 62 | 		for i, submodule in ipairs(module.modules or {}) do
 63 | 			dfs(submodule, (submodule.name and submodule.name[1]) or ((prefix or module_typename(module)) .. '_' .. module_typename(submodule) .. i))
 64 | 		end
 65 | 	end
 66 | 
 67 | 	dfs(model)
 68 | 
 69 | 	hdf5_save(path, saved)
 70 | end
 71 | 
 72 | RoiReshaper = {
 73 | 	inputSize = nil,
 74 | 
 75 | 	StoreShape = function(this)
 76 | 		local module = nn.Identity()
 77 | 		function module:updateOutput(input)
 78 | 			this.inputSize = input:size()
 79 | 			return nn.Identity.updateOutput(self, input)
 80 | 		end
 81 | 		return module	
 82 | 	end,
 83 | 
 84 | 	RestoreShape = function(self, singletonDimension)
 85 | 		return singletonDimension and DynamicView(function() return {-1, assert(self.inputSize)[2], numClasses, 1} end) or DynamicView(function() return {-1, assert(self.inputSize)[2], numClasses} end)
 86 | 	end
 87 | }
 88 | 
 89 | function DynamicView(sizeFactory)
 90 | 	local module = nn.View(-1)
 91 | 	module.updateOutput = function(self, input) return nn.View.updateOutput(self:resetSize(unpack(sizeFactory())), input) end
 92 | 	return module
 93 | end
 94 | 
 95 | function flatdim2(tensor)
 96 | 	return tensor:contiguous():view(-1, unpack(torch.LongTensor(tensor:size()):sub(3, #tensor:size()):totable()))
 97 | end
 98 | 
 99 | function meandim2(tensor, batchSize)
100 | 	return tensor:contiguous():view(batchSize, -1, unpack(torch.LongTensor(tensor:size()):sub(2, #tensor:size()):totable())):mean(2):squeeze(2)
101 | end
102 | 
103 | function share_weight_bias(module)
104 | 	return module:clone('weight', 'bias', 'gradWeight', 'gradBias')
105 | end
106 | 
107 | function nn.Module.named(self, name)
108 | 	if not self.name then
109 | 		self.name = name
110 | 	else
111 | 		self.name = type(self.name) == 'table' and self.name or {self.name}
112 | 		table.insert(self.name, name)
113 | 	end
114 | 	return self
115 | end
116 | 
117 | local nn_Module_findModules = nn.Module.findModules
118 | function nn.Module.findModules(self, typename, container)
119 | 	for _, name in ipairs(type(self.name) == 'table' and self.name or (type(self.name) == 'string' and {self.name} or {})) do
120 | 		if name == typename then
121 | 			return {self}, {self}
122 | 		end
123 | 	end
124 | 	return nn_Module_findModules(self, typename, container)
125 | end
126 | 
127 | function Probe(module, name, recursive)
128 | 	name = name or module_typename(module)
129 | 	if recursive and module.modules then
130 | 		for i = 1, #module.modules do
131 | 			module.modules[i] = Probe(module.modules[i], module.modules[i].name or (name .. '->' .. i), recursive)
132 | 		end
133 | 	end
134 | 
135 | 	local module_updateOutput, module_updateGradInput, module_accGradParameters = module.updateOutput, module.updateGradInput, module.accGradParameters
136 | 	local fmtSize = function(tensor) return torch.isTensor(tensor) and ('('..('%d '):rep(tensor:dim())..')'):format(unpack(torch.LongTensor(tensor:size()):totable())) or tostring(#tensor)  end
137 | 	function module:updateOutput(input)
138 | 		print(name, 'updateOutput: in', '#input = ', fmtSize(input))
139 | 		local elapsed = gpuTicToc(function() self.output = module_updateOutput(self, input) end)
140 | 		print(name, 'updateOutput: out', ('%.4f ms'):format(elapsed*1000))
141 | 		return self.output
142 | 	end
143 | 	function module:updateGradInput(input, gradOutput)
144 | 		print(name, 'updateGradInput: in')
145 | 		local elapsed = gpuTicToc(function() self.gradInput = module_updateGradInput(self, input, gradOutput) end)
146 | 		print(name, 'updateGradInput: out', ('%.4f ms'):format(elapsed*1000))
147 | 		return self.gradInput
148 | 	end
149 | 	function module:accGradParameters(input, gradOutput, scale)
150 | 		print(name, 'accGradParameters: in')
151 | 		local elapsed = gpuTicToc(function() module_accGradParameters(self, input, gradOutput, scale) end)
152 | 		print(name, 'accGradParameters: out', ('%.4f ms'):format(elapsed*1000))
153 | 	end
154 | 	return module
155 | end
156 | 
157 | function gpuTicToc(f)
158 | 	cutorch.synchronize()
159 | 	local tic = torch.tic()
160 | 	f()
161 | 	cutorch.synchronize()
162 | 	return torch.toc(tic)
163 | end
164 | 
165 | 
166 | collectgarbage()
167 | 


--------------------------------------------------------------------------------
/model/wsddn_repro.lua:
--------------------------------------------------------------------------------
 1 | model = nn.Sequential():
 2 | 	add(nn.ParallelTable():
 3 | 		add(base_model.conv_layers):
 4 | 		add(nn.Identity())
 5 | 	):
 6 | 	add(RectangularRingRoiPooling(base_model.pooled_height, base_model.pooled_width, base_model.spatial_scale, base_model.spp_correction_params)):
 7 | 	add(RoiReshaper:StoreShape()):
 8 | 	add(base_model.fc_layers_view(RoiReshaper)):
 9 | 	add(base_model.fc_layers):
10 | 	add(nn.ConcatTable():
11 | 		add(nn.Sequential():
12 | 			add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')):
13 | 			add(RoiReshaper:RestoreShape()):
14 | 			named('output_fc8c')
15 | 		):
16 | 		add(nn.Sequential():
17 | 			add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8d')):
18 | 			add(RoiReshaper:RestoreShape(4)):
19 | 			add(cudnn.SpatialSoftMax()):
20 | 			add(nn.Squeeze(4)):
21 | 			named('output_softmax')
22 | 		)
23 | 	):
24 | 	add(nn.CMulTable():named('output_prod')):
25 | 	add(nn.Sum(2))
26 | 
27 | criterion = HingeCriterion():setFactor(1 / numClasses)
28 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4}
29 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10}
30 | 


--------------------------------------------------------------------------------
/opts.lua:
--------------------------------------------------------------------------------
 1 | local DATA = os.getenv('DATA') or 'data'
 2 | local DATA_COMMON = os.getenv('DATA_COMMON') or paths.concat(DATA, 'common')
 3 | 
 4 | PATHS = 
 5 | {
 6 | 	EXTERNAL = 
 7 | 	{
 8 | 		PRETRAINED_MODEL_VGGF = 
 9 | 		{
10 | 			PROTOTXT = paths.concat(DATA_COMMON, 'VGG_CNN_F_deploy.prototxt'),
11 | 			CAFFEMODEL = paths.concat(DATA_COMMON, 'VGG_CNN_F.caffemodel'),
12 | 		},
13 | 
14 | 		SSW_VOC2007 =
15 | 		{
16 | 			trainval = paths.concat(DATA_COMMON, 'SelectiveSearchVOC2007trainval.mat'),
17 | 			test = paths.concat(DATA_COMMON, 'SelectiveSearchVOC2007test.mat')
18 | 		},
19 | 
20 | 		SSW_VOC2012 =
21 | 		{
22 | 			trainval = paths.concat(DATA_COMMON, 'selective_search_data/voc_2012_trainval.mat'),
23 | 			test = paths.concat(DATA_COMMON, 'selective_search_data/voc_2012_test.mat')
24 | 		},
25 | 		
26 | 		VOC_DEVKIT_VOCYEAR =
27 | 		{
28 | 			VOC2007 = paths.concat(DATA_COMMON, 'VOCdevkit_2007/VOC2007'),
29 | 			VOC2012 = paths.concat(DATA_COMMON, 'VOCdevkit_2012/VOC2012')
30 | 		}
31 | 	},
32 | 	
33 | 	BASE_MODEL_CACHED = 
34 | 	{
35 | 		VGGF = paths.concat(DATA_COMMON, 'VGG_CNN_F.t7')
36 | 	},
37 | 
38 | 	DATASET_CACHED_PATTERN = paths.concat(DATA_COMMON, '%s_%s.t7'),
39 | 	CHECKPOINT_PATTERN = paths.concat(DATA, 'model_epoch%02d.h5'),
40 | 	LOG = paths.concat(DATA, 'log.json'),
41 | 	SCORES_PATTERN = paths.concat(DATA, 'scores_%s.h5'),
42 | 	CORLOC = paths.concat(DATA, 'corloc.json'),
43 | 	DETECTION_MAP = paths.concat(DATA, 'detection_mAP.json'),
44 | }
45 | 
46 | local DATASET = os.getenv('DATASET') or 'VOC2007'
47 | local NUM_EPOCHS = tonumber(os.getenv('NUM_EPOCHS')) or 30
48 | local SUBSET = os.getenv('SUBSET') or 'trainval'
49 | local BASE_MODEL = 'VGGF'
50 | 
51 | opts = {
52 | 	ROI_FACTOR = 1.8,
53 | 	SEED = 1,
54 | 	
55 | 	NMS_OVERLAP_THRESHOLD = 0.4,
56 | 	NMS_SCORE_THRESHOLD = 1e-4,
57 | 	
58 | 	IMAGE_SCALES = {{608, 800}, {496, 656}, {400, 544}, {720, 960}, {864, 1152}}, --{{608, 800}, {368, 480}, {432, 576}, {528, 688}, {656, 864}, {912, 1200}}
59 | 
60 | 	NUM_SCALES = 5,
61 | 	NUM_EPOCHS = NUM_EPOCHS,
62 | 	
63 | 	OUTPUT_FIELDS = {'output_prod'},
64 | 	DATASET = DATASET,
65 | 	BASE_MODEL = BASE_MODEL,
66 | 
67 | 	SUBSET = SUBSET,
68 | 	PATHS = 
69 | 	{
70 | 		MODEL = arg[1],
71 | 
72 | 		DATA = DATA,
73 | 		DATA_COMMON = DATA_COMMON,
74 | 
75 | 		CHECKPOINT_PATTERN = PATHS.CHECKPOINT_PATTERN,
76 | 		LOG = PATHS.LOG,
77 | 		SCORES_PATTERN = PATHS.SCORES_PATTERN,
78 | 
79 | 		BASE_MODEL_CACHED = PATHS.BASE_MODEL_CACHED[BASE_MODEL],
80 | 		BASE_MODEL_RAW = PATHS.EXTERNAL['PRETRAINED_MODEL_' .. BASE_MODEL],
81 | 		
82 | 		PROPOSALS = PATHS.EXTERNAL['SSW_' .. DATASET],
83 | 		
84 | 		VOC_DEVKIT_VOCYEAR = PATHS.EXTERNAL.VOC_DEVKIT_VOCYEAR[DATASET],
85 | 		DATASET_CACHED = PATHS.DATASET_CACHED_PATTERN:format(DATASET, 'SSW'),
86 | 
87 | 		CORLOC = PATHS.CORLOC,
88 | 		DETECTION_MAP = PATHS.DETECTION_MAP,
89 | 		RUN_STATS_PATTERN = PATHS.RUN_STATS_PATTERN
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/parallel_batch_loader.lua:
--------------------------------------------------------------------------------
  1 | --if nThreads = 0 do everything on main thread
  2 | 
  3 | require 'nn'
  4 | 
  5 | local ParallelBatchLoader, parent = torch.class('ParallelBatchLoader', 'nn.Module')
  6 | 
  7 | function ParallelBatchLoader:__init(example_loader, nThreads)
  8 | 	parent.__init(self)
  9 | 
 10 | 	self.example_loader = example_loader
 11 | 	self.nThreads = nThreads or 16
 12 | 
 13 | 	self.nextBatchIdx = 1
 14 | 	self.preloadedBatchIdx = nil
 15 | 	
 16 | 	self.batchSize = {[true] = nil, [false] = nil}
 17 | 	self.batchBuffers = nil
 18 | 	self.currentBufferIdx = 1
 19 | 	
 20 | 	local threads = require 'threads'
 21 | 	threads.Threads.serialization('threads.sharedserialize')
 22 | 	self.jobQueue = threads.Threads(self.nThreads)
 23 | 
 24 | 	parent:evaluate()
 25 | end
 26 | 
 27 | function ParallelBatchLoader:loadBatch(exampleIdxBegin)
 28 | 	self.jobQueue:synchronize()
 29 | 
 30 | 	self.currentBufferIdx = 3 - self.currentBufferIdx
 31 | 	local batchTable = self.batchBuffers[self.currentBufferIdx]
 32 | 	local isTrainingPhase = self.train
 33 | 
 34 | 	for exampleIndexInBatch = 1, self:getBatchSize() do
 35 | 		local exampleIdx = isTrainingPhase and torch.random(1, self:getNumExamples()) or (exampleIdxBegin - 1 + exampleIndexInBatch)
 36 | 		local fillBatchTable = self.example_loader:loadExample(exampleIdx, isTrainingPhase)
 37 | 		self.jobQueue:addjob(function()	fillBatchTable(exampleIndexInBatch, batchTable) end)
 38 | 	end
 39 | end
 40 | 
 41 | function ParallelBatchLoader:getBatch(batchIdx)
 42 | 	batchIdx = batchIdx or 1
 43 | 	assert(batchIdx <= self:getNumBatches())
 44 | 	
 45 | 	local exampleIdxBegin = 1 + (batchIdx - 1) * self:getBatchSize()
 46 | 	local exampleIdxEnd = 1 + math.min(batchIdx * self:getBatchSize(), self:getNumExamples())
 47 | 	local effectiveBatchSize = exampleIdxEnd - exampleIdxBegin
 48 | 	local oldBatchSize = self:getBatchSize()
 49 | 
 50 | 	if batchIdx ~= self.preloadedBatchIdx or effectiveBatchSize ~= self:getBatchSize() then
 51 | 		self:setBatchSize(effectiveBatchSize)
 52 | 		self.preloadedBatchIdx = batchIdx
 53 | 		self:loadBatch(exampleIdxBegin)
 54 | 	end
 55 | 
 56 | 	self.jobQueue:synchronize()
 57 | 	local loadedBatchTable = self.batchBuffers[self.currentBufferIdx]
 58 | 
 59 | 	if self:getBatchSize() ~= oldBatchSize then
 60 | 		self:setBatchSize(oldBatchSize)
 61 | 	end
 62 | 
 63 | 	local nextBatchIdx = batchIdx + 1
 64 | 	if nextBatchIdx < self:getNumBatches() then
 65 | 		self.preloadedBatchIdx = nextBatchIdx
 66 | 		self:loadBatch(exampleIdxBegin + self:getBatchSize())
 67 | 	end
 68 | 
 69 | 	return loadedBatchTable
 70 | end
 71 | 
 72 | function ParallelBatchLoader:updateOutput()
 73 | 	assert(self:getBatchSize())
 74 | 	assert(self.nextBatchIdx)
 75 | 	self.output = self:getBatch(self.nextBatchIdx)
 76 | 	self.nextBatchIdx = self.nextBatchIdx + 1
 77 | 	return self.output
 78 | end
 79 | 
 80 | function ParallelBatchLoader:setBatchSize(batchSize)
 81 | 	if type(batchSize) == 'table' then
 82 | 		self.batchSize = {[true] = batchSize.training, [false] = batchSize.evaluate}
 83 | 	else
 84 | 		self.batchSize[self.train] = batchSize
 85 | 		if self.batchSize[not self.train] == nil then
 86 | 			self.batchSize[not self.train] = batchSize
 87 | 		end
 88 | 	end
 89 | 
 90 | 	self:reinitBatchBuffers()
 91 | 
 92 | 	return self
 93 | end
 94 | 
 95 | function ParallelBatchLoader:reinitBatchBuffers()
 96 | 	self.batchBuffers = {self.example_loader:makeBatchTable(self:getBatchSize(), self.train), self.example_loader:makeBatchTable(self:getBatchSize(), self.train)}
 97 | end
 98 | 
 99 | function ParallelBatchLoader:getBatchSize()
100 | 	return self.batchSize[self.train]
101 | end
102 | 
103 | function ParallelBatchLoader:getNumBatches()
104 | 	return torch.ceil(self:getNumExamples() / self:getBatchSize())
105 | end
106 | 
107 | function ParallelBatchLoader:getNumExamples()
108 | 	return self.example_loader:getNumExamples(self.train)
109 | end
110 | 
111 | function ParallelBatchLoader:training()
112 | 	parent:training()
113 | 	self.nextBatchIdx = 1
114 | 	self:reinitBatchBuffers()
115 | end
116 | 
117 | function ParallelBatchLoader:evaluate()
118 | 	parent:evaluate()
119 | 	self.nextBatchIdx = 1
120 | 	self:reinitBatchBuffers()
121 | end
122 | 


--------------------------------------------------------------------------------
/pascal_voc.lua:
--------------------------------------------------------------------------------
  1 | local classLabels = {'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'}
  2 | 
  3 | local function precisionrecall(scores_all, labels_all)
  4 | 	--adapted from VOCdevkit/VOCcode/VOCevalcls.m (VOCap.m). tested, gives equivalent results
  5 | 	local function VOCap(rec, prec)
  6 | 		local mrec = torch.cat(torch.cat(torch.FloatTensor({0}), rec), torch.FloatTensor({1}))
  7 | 		local mpre = torch.cat(torch.cat(torch.FloatTensor({0}), prec), torch.FloatTensor({0}))
  8 | 		for i=mpre:numel()-1, 1, -1 do
  9 | 			mpre[i]=math.max(mpre[i], mpre[i+1])
 10 | 		end
 11 | 
 12 | 		local i = (mrec:sub(2, mrec:numel())):ne(mrec:sub(1, mrec:numel() - 1)):nonzero():squeeze(2) + 1
 13 | 		local ap = (mrec:index(1, i) - mrec:index(1, i - 1)):cmul(mpre:index(1, i)):sum()
 14 | 
 15 | 		return ap
 16 | 	end
 17 | 
 18 | 	local function VOCevalcls(out, gt)
 19 | 		local so,si= (-out):sort()
 20 | 
 21 | 		local tp=gt:index(1, si):gt(0):float()
 22 | 		local fp=gt:index(1, si):lt(0):float()
 23 | 
 24 | 		fp=fp:cumsum()
 25 | 		tp=tp:cumsum()
 26 | 
 27 | 		local rec=tp/gt:gt(0):sum()
 28 | 		local prec=tp:cdiv(fp+tp)
 29 | 
 30 | 		local ap=VOCap(rec,prec)
 31 | 		return rec, prec, ap
 32 | 	end
 33 | 
 34 | 	local prec = torch.FloatTensor(scores_all:size())
 35 | 	local rec = torch.FloatTensor(scores_all:size())
 36 | 	local ap = torch.FloatTensor(#classLabels)
 37 | 
 38 | 	for classLabelInd = 1, #classLabels do
 39 | 		local p, r, a = VOCevalcls(scores_all:narrow(2, classLabelInd, 1):squeeze(), labels_all:narrow(2, classLabelInd, 1):squeeze())
 40 | 		prec:narrow(2, classLabelInd, 1):copy(p)
 41 | 		rec:narrow(2, classLabelInd, 1):copy(r)
 42 | 		ap[classLabelInd] = a
 43 | 	end
 44 | 
 45 | 	return prec, rec, ap
 46 | end
 47 | 
 48 | return {
 49 | 	classLabels = classLabels,
 50 | 	numClasses = #classLabels,
 51 | 
 52 | 	load = function(VOCdevkit_VOCYEAR)
 53 | 		local xml = require 'xml'
 54 | 
 55 | 		local filelists = 
 56 | 		{
 57 | 			train = paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/train.txt'),
 58 | 			val	= paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/val.txt'),
 59 | 			test = paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/test.txt'),
 60 | 		}
 61 | 
 62 | 		local numMaxExamples = 11000
 63 | 		local numMaxObjectsPerExample = 5
 64 | 
 65 | 		local mkDataset = function() return 
 66 | 		{
 67 | 			filenames = torch.CharTensor(numMaxExamples, 16):zero(),
 68 | 			labels = torch.FloatTensor(numMaxExamples, #classLabels):zero(),
 69 | 			objectBoxes = torch.FloatTensor(numMaxExamples * numMaxObjectsPerExample, 5):zero(),
 70 | 			objectBoxesInds = torch.IntTensor(numMaxExamples, 2):zero(),
 71 | 			jpegs = torch.ByteTensor(numMaxExamples * 3 * 50000):zero(),
 72 | 			jpegsInds = torch.IntTensor(numMaxExamples, 2):zero(),
 73 | 
 74 | 			getNumExamples = function(self)
 75 | 				return self.numExamples
 76 | 			end,
 77 | 
 78 | 			getImageFileName = function(self, exampleIdx)
 79 | 				return self.filenames[exampleIdx]:clone():storage():string():match('%Z+')
 80 | 			end,
 81 | 
 82 | 			getGroundTruthBoxes = function(self, exampleIdx)
 83 | 				return self.objectBoxes:sub(self.objectBoxesInds[exampleIdx][1], self.objectBoxesInds[exampleIdx][2])
 84 | 			end,
 85 | 
 86 | 			getJpegBytes = function(self, exampleIdx)
 87 | 				return self.jpegs:sub(self.jpegsInds[exampleIdx][1], self.jpegsInds[exampleIdx][2])
 88 | 			end,
 89 | 
 90 | 			getLabels = function(self, exampleIdx)
 91 | 				return self.labels[exampleIdx]
 92 | 			end
 93 | 		} end
 94 | 
 95 | 		local voc = { train = mkDataset(), val = mkDataset(), test = mkDataset() }
 96 | 
 97 | 		for _, subset in ipairs{'train', 'val', 'test'} do
 98 | 			local exampleIdx = 1
 99 | 			local jpegsFirstByteInd = 1
100 | 			for line in io.lines(filelists[subset]) do
101 | 				assert(exampleIdx <= numMaxExamples)
102 | 				assert(#line < voc[subset].filenames:size(2))
103 | 
104 | 				voc[subset].filenames[exampleIdx]:sub(1, #line):copy(torch.CharTensor(torch.CharStorage():string(line)))
105 | 					
106 | 				local f = torch.DiskFile(paths.concat(VOCdevkit_VOCYEAR, 'JPEGImages', line .. '.jpg'), 'r')
107 | 				f:binary()
108 | 				f:seekEnd()
109 | 				local file_size_bytes = f:position() - 1
110 | 				f:seek(1)
111 | 				local bytes = torch.ByteTensor(file_size_bytes)
112 | 				f:readByte(bytes:storage())
113 | 				voc[subset].jpegsInds[exampleIdx] = torch.IntTensor({jpegsFirstByteInd, jpegsFirstByteInd + file_size_bytes - 1})
114 | 				voc[subset]:getJpegBytes(exampleIdx):copy(bytes)
115 | 				f:close()
116 | 
117 | 				jpegsFirstByteInd = voc[subset].jpegsInds[exampleIdx][2] + 1
118 | 				exampleIdx = exampleIdx + 1
119 | 			end
120 | 			voc[subset].numExamples = exampleIdx - 1
121 | 		end	 
122 | 		local testHasAnnotation = VOCdevkit_VOCYEAR:find('2007') ~= nil
123 | 		for _, subset in ipairs(testHasAnnotation and {'train', 'val', 'test'} or {'train', 'val'})  do
124 | 			for classLabelInd, v in ipairs(classLabels) do
125 | 				local exampleIdx = 1
126 | 				for line in io.lines(paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/'..v..'_'..subset..'.txt')) do
127 | 					if string.find(line, ' -1', 1, true) then
128 | 						voc[subset].labels[exampleIdx][classLabelInd] = -1
129 | 					elseif string.find(line, ' 1', 1, true) then
130 | 						voc[subset].labels[exampleIdx][classLabelInd] = 1
131 | 					end
132 | 					exampleIdx = exampleIdx + 1
133 | 				end
134 | 			end
135 | 
136 | 			local exampleIdx = 1
137 | 			local objectBoxIdx = 1
138 | 			for line in io.lines(filelists[subset]) do
139 | 				local anno_xml = xml.loadpath(paths.concat(VOCdevkit_VOCYEAR, 'Annotations/' .. line ..'.xml'))
140 | 
141 | 				local firstObjectBoxIdx = objectBoxIdx
142 | 				for i = 1, #anno_xml do
143 | 					if anno_xml[i].xml == 'object' then
144 | 						local classLabel = xml.find(anno_xml[i], 'name')[1]
145 | 						local xmin = xml.find(xml.find(anno_xml[i], 'bndbox'), 'xmin')[1]
146 | 						local xmax = xml.find(xml.find(anno_xml[i], 'bndbox'), 'xmax')[1]
147 | 						local ymin = xml.find(xml.find(anno_xml[i], 'bndbox'), 'ymin')[1]
148 | 						local ymax = xml.find(xml.find(anno_xml[i], 'bndbox'), 'ymax')[1]
149 | 
150 | 						for classLabelInd = 1, #classLabels do
151 | 							if classLabels[classLabelInd] == classLabel then
152 | 								assert(objectBoxIdx <= voc[subset].objectBoxes:size(1))
153 | 
154 | 								voc[subset].objectBoxes[objectBoxIdx] = torch.FloatTensor({classLabelInd, xmin, ymin, xmax, ymax})
155 | 								objectBoxIdx = objectBoxIdx + 1
156 | 							end
157 | 						end
158 | 					end
159 | 				end
160 | 				
161 | 				voc[subset].objectBoxesInds[exampleIdx] = torch.IntTensor({firstObjectBoxIdx, objectBoxIdx - 1})
162 | 				exampleIdx = exampleIdx + 1
163 | 			end
164 | 		end
165 | 
166 | 		if not testHasAnnotation then
167 | 			voc['test'].objectBoxesInds = nil
168 | 			voc['test'].objectBoxes = nil
169 | 		end
170 | 		
171 | 		for _, subset in ipairs{'train', 'val', 'test'} do
172 | 			voc[subset].filenames = voc[subset].filenames:sub(1, voc[subset].numExamples):clone()
173 | 			voc[subset].labels = voc[subset].labels:sub(1, voc[subset].numExamples):clone()
174 | 			voc[subset].jpegsInds = voc[subset].jpegsInds:sub(1, voc[subset].numExamples):clone()
175 | 			voc[subset].jpegs = voc[subset].jpegs:sub(1, voc[subset].jpegsInds[voc[subset].numExamples][2]):clone()
176 | 
177 | 			if voc[subset].objectBoxes and voc[subset].objectBoxesInds then
178 | 				voc[subset].objectBoxesInds =  voc[subset].objectBoxesInds:sub(1, voc[subset].numExamples):clone()
179 | 				voc[subset].objectBoxes = voc[subset].objectBoxes:sub(1, voc[subset].objectBoxesInds[voc[subset].numExamples][2]):clone()
180 | 			end
181 | 		end
182 | 
183 | 		voc['trainval'] = {
184 | 			train = voc['train'],
185 | 			val = voc['val'],
186 | 			getNumExamples = function(self)
187 | 				return self.train:getNumExamples() + self.val:getNumExamples()
188 | 			end,
189 | 
190 | 			getImageFileName = function(self, exampleIdx)
191 | 				return exampleIdx <= self.train:getNumExamples() and self.train:getImageFileName(exampleIdx) or self.val:getImageFileName(exampleIdx - self.train:getNumExamples())
192 | 			end,
193 | 
194 | 			getGroundTruthBoxes = function(self, exampleIdx)
195 | 				return exampleIdx <= self.train:getNumExamples() and self.train:getGroundTruthBoxes(exampleIdx) or self.val:getGroundTruthBoxes(exampleIdx - self.train:getNumExamples())
196 | 			end,
197 | 
198 | 			getJpegBytes = function(self, exampleIdx)
199 | 				return exampleIdx <= self.train:getNumExamples() and self.train:getJpegBytes(exampleIdx) or self.val:getJpegBytes(exampleIdx - self.train:getNumExamples())
200 | 			end,
201 | 
202 | 			getLabels = function(self, exampleIdx)
203 | 				return exampleIdx <= self.train:getNumExamples() and self.train:getLabels(exampleIdx) or self.val:getLabels(exampleIdx - self.train:getNumExamples())
204 | 			end
205 | 		}
206 | 
207 | 		return voc
208 | 	end,
209 | 
210 | 	package_submission = function(OUT, voc, VOCYEAR, subset, task, ...)
211 | 		local task_a, task_b  = task:match('(.+)_(.+)')
212 | 		local write = {
213 | 			cls = function(f, classLabelInd, scores)
214 | 				assert(voc[subset]:getNumExamples() == scores:size(1))
215 | 
216 | 				for exampleIdx = 1, voc[subset]:getNumExamples() do
217 | 					f:write(string.format('%s %.12f\n', voc[subset]:getImageFileName(exampleIdx), scores[exampleIdx][classLabelInd]))
218 | 				end
219 | 			end,
220 | 			det = function(f, classLabelInd, rois, scores, mask)
221 | 				assert(voc[subset]:getNumExamples() == #scores and voc[subset]:getNumExamples() == #rois)
222 | 
223 | 				for exampleIdx = 1, voc[subset]:getNumExamples() do
224 | 					for roiInd = 1, scores[exampleIdx]:size(scores[exampleIdx]:dim()) do
225 | 						if mask[exampleIdx][classLabelInd][roiInd] > 0 then
226 | 							f:write(string.format('%s %.12f %.12f %.12f %.12f %.12f\n',
227 | 								voc[subset]:getImageFileName(exampleIdx),
228 | 								scores[exampleIdx][classLabelInd][roiInd], 
229 | 								math.max(1, rois[exampleIdx][roiInd][1] + 1),
230 | 								math.max(1, rois[exampleIdx][roiInd][2] + 1),
231 | 								math.max(1, rois[exampleIdx][roiInd][3] + 1),
232 | 								math.max(1, rois[exampleIdx][roiInd][4] + 1)
233 | 							))
234 | 						end
235 | 					end
236 | 				end
237 | 			end
238 | 		}
239 | 
240 | 		os.execute(string.format('rm -rf "%s/results"', OUT))
241 | 		os.execute(string.format('mkdir -p "%s/results/%s/Main"', OUT, VOCYEAR))
242 | 
243 | 		local respath = string.format('%s/results/%s/Main/%%s_%s_%s_%%s.txt', OUT, VOCYEAR, task_b, subset)
244 | 
245 | 		threads = require 'threads'
246 | 		threads.Threads.serialization('threads.sharedserialize')
247 | 		jobQueue = threads.Threads(#classLabels)
248 | 		local writer = write[task_b]
249 | 		for classLabelInd, classLabel in ipairs(classLabels) do
250 | 			jobQueue:addjob(function(...)
251 | 				local f = assert(io.open(respath:format(task_a, classLabel), 'w'))
252 | 				writer(f, classLabelInd, ...)
253 | 				f:close()
254 | 			end, function() end, ...)
255 | 		end
256 | 		jobQueue:synchronize()
257 | 		os.execute(string.format('cd "%s" && tar -czf "results-%s-%s-%s.tar.gz" results', OUT, VOCYEAR, task, subset))
258 | 		return respath
259 | 	end,
260 | 
261 | 	vis_classification_submission = function(OUT, VOCYEAR, subset, classLabel, JPEGImages_DIR, top_k)
262 | 		top_k = top_k or 20
263 | 		local res_file_path = string.format('%s/results/%s/Main/comp2_cls_%s_%s.txt', OUT, VOCYEAR, subset, classLabel)
264 | 
265 | 		local scores = {}
266 | 		for line in assert(io.open(res_file_path)):lines() do
267 | 			scores[#scores + 1] = line:split(' ')
268 | 		end
269 | 
270 | 		table.sort(scores, function(a, b) return -tonumber(a[2]) < -tonumber(b[2]) end)
271 | 
272 | 		local image = require 'image'
273 | 		local top_imgs = {}
274 | 		print('K = ', top_k)
275 | 		for i = 1, top_k do
276 | 			top_imgs[i] = image.scale(image.load(paths.concat(JPEGImages_DIR, scores[i][1] .. '.jpg')), 128, 128)
277 | 			print(scores[i][2], scores[i][1])
278 | 		end
279 | 
280 | 		image.display(top_imgs)
281 | 	end,
282 | 	
283 | 	precisionrecall = precisionrecall,
284 | 
285 | 	meanAP = function(scores_all, labels_all)
286 | 		return ({precisionrecall(scores_all, labels_all)})[3]:mean()
287 | 	end
288 | }
289 | 


--------------------------------------------------------------------------------
/preprocess.lua:
--------------------------------------------------------------------------------
 1 | require 'cudnn'
 2 | require 'loadcaffe'
 3 | require 'image'
 4 | 
 5 | matio = require 'matio'
 6 | voc_tools = dofile('pascal_voc.lua')
 7 | 
 8 | dofile('opts.lua')
 9 | 
10 | function VGGF()
11 | 	local model_converted = loadcaffe.load(opts.PATHS.BASE_MODEL_RAW.PROTOTXT, opts.PATHS.BASE_MODEL_RAW.CAFFEMODEL, 'cudnn'):float()
12 | 	torch.save(opts.PATHS.BASE_MODEL_CACHED, model_converted)
13 | end
14 | 
15 | function VOC()
16 | 	local function copy_proposals_in_dataset(trainval_test_mat_paths, voc)
17 | 		local subset_paths = {{'train', trainval_test_mat_paths.trainval}, {'val', trainval_test_mat_paths.trainval}, {'test', trainval_test_mat_paths.test}}
18 | 
19 | 		local m = {train = {}, val = {}, test = {}}
20 | 		local b = {train = nil, val = nil, test = nil}
21 | 		local s = {train = nil, val = nil, test = nil}
22 | 		for _, t in ipairs(subset_paths) do
23 | 			local h = matio.load(t[2])
24 | 			b[t[1]] = h.boxes
25 | 			s[t[1]] = h.boxScores
26 | 			for exampleIdx = 1, #b[t[1]] do
27 | 				m[t[1]][h.images[exampleIdx]:storage():string()] = exampleIdx
28 | 			end
29 | 		end
30 | 
31 | 		for _, subset in ipairs{'train', 'val', 'test'} do
32 | 			voc[subset].rois = {}
33 | 			for exampleIdx = 1, voc[subset]:getNumExamples() do
34 | 				local ind = m[subset][voc[subset]:getImageFileName(exampleIdx)]
35 | 				local box_scores = s[subset] and s[subset][ind] or torch.FloatTensor(b[subset][ind]:size(1), 1):zero()
36 | 				--local box_scores = torch.FloatTensor(b[subset][ind]:size(1), 1):zero()
37 | 				voc[subset].rois[exampleIdx] = torch.cat(b[subset][ind]:index(2, torch.LongTensor{2, 1, 4, 3}):float() - 1, box_scores)
38 | 
39 | 				if s[subset] then
40 | 					voc[subset].rois[exampleIdx] = voc[subset].rois[exampleIdx]:index(1, ({box_scores:squeeze(2):sort(1, true)})[2]:sub(1, math.min(box_scores:size(1), 2048)))
41 | 				end
42 | 			end
43 | 			voc[subset].getProposals = function(self, exampleIdx)
44 | 				return self.rois[exampleIdx]
45 | 			end
46 | 		end
47 | 
48 | 		voc['trainval'].getProposals = function(self, exampleIdx)
49 | 			return exampleIdx <= self.train:getNumExamples() and self.train:getProposals(exampleIdx) or self.val:getProposals(exampleIdx - self.train:getNumExamples())
50 | 		end
51 | 	end
52 | 
53 | 	local function filter_proposals(voc)
54 | 		local min_width_height = 20
55 | 		for _, subset in ipairs{'train', 'val', 'test'} do
56 | 			for exampleIdx = 1, voc[subset]:getNumExamples() do
57 | 				local x1, y1, x2, y2 = unpack(voc[subset].rois[exampleIdx]:split(1, 2))
58 | 				local channels, height, width = unpack(image.decompressJPG(voc[subset]:getJpegBytes(exampleIdx)):size():totable())
59 | 				
60 | 				assert(x1:ge(0):all() and x1:le(width):all())
61 | 				assert(x2:ge(0):all() and x2:le(width):all())
62 | 				assert(y1:ge(0):all() and y1:le(height):all())
63 | 				assert(y2:ge(0):all() and y2:le(height):all())
64 | 				assert(x1:le(x2):all() and y1:le(y2):all())
65 | 
66 | 				voc[subset].rois[exampleIdx] = voc[subset].rois[exampleIdx]:index(1, (x2 - x1):ge(min_width_height):cmul((y2 - y1):ge(min_width_height)):squeeze(2):nonzero():squeeze(2))
67 | 			end
68 | 		end
69 | 	end
70 | 
71 | 	local voc = voc_tools.load(opts.PATHS.VOC_DEVKIT_VOCYEAR)
72 | 	copy_proposals_in_dataset(opts.PATHS.PROPOSALS, voc)
73 | 	filter_proposals(voc)
74 | 	torch.save(opts.PATHS.DATASET_CACHED, voc)
75 | end
76 | 
77 | for _, a in ipairs(arg) do
78 | 	print('Preprocessing', a)
79 | 	_G[a]()
80 | end
81 | print('Done')
82 | 


--------------------------------------------------------------------------------
/test.lua:
--------------------------------------------------------------------------------
 1 | dofile('opts.lua')
 2 | dofile('util.lua')
 3 | dofile('dataset.lua')
 4 | dofile('model/util.lua')
 5 | 
 6 | assert(os.getenv('CUDA_VISIBLE_DEVICES') ~= nil and cutorch.getDeviceCount() <= 1, 'SHOULD RUN ON ONE GPU FOR NOW')
 7 | 
 8 | loaded = model_load(opts.PATHS.MODEL, opts)
 9 | 
10 | meta = {
11 | 	opts = opts,
12 | 	training_meta = loaded.meta,
13 | 	example_loader_options = {
14 | 		evaluate = {
15 | 			numRoisPerImage = 8192,
16 | 			subset = opts.SUBSET,
17 | 			hflips = true,
18 | 			numScales = opts.NUM_SCALES
19 | 		}
20 | 	}
21 | }
22 | 
23 | batch_loader = ParallelBatchLoader(ExampleLoader(dataset, base_model.normalization_params, opts.IMAGE_SCALES, meta.example_loader_options)):setBatchSize({evaluate = 1})
24 | 
25 | print(meta)
26 | assert(model):cuda()
27 | assert(criterion):cuda()
28 | collectgarbage()
29 | 
30 | tic_start = torch.tic()
31 | 
32 | batch_loader:evaluate()
33 | model:evaluate()
34 | scores, labels, rois, outputs = {}, {}, {}, {}
35 | for batchIdx = 1, batch_loader:getNumBatches() do
36 | 	tic = torch.tic()
37 | 
38 | 	scale_batches = batch_loader:forward()[1]
39 | 	scale0_rois = scale_batches[1][2]
40 | 	scale_outputs, scale_scores, scale_costs = {}, {}, {}
41 | 	for i = 2, #scale_batches do
42 | 		batch_images, batch_rois, batch_labels = unpack(scale_batches[i])
43 | 		batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images)
44 | 		batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels)
45 | 		if nn.gModule then
46 | 			batch_scores = model:forward({batch_images_gpu, batch_rois, scale0_rois})
47 | 		else
48 | 			batch_scores = model:forward({batch_images_gpu, batch_rois})
49 | 		end
50 | 
51 | 		cost = criterion:forward(batch_scores, batch_labels_gpu)
52 | 		
53 | 		table.insert(scale_scores, (type(batch_scores) == 'table' and batch_scores[1] or batch_scores):float())
54 | 		table.insert(scale_costs, cost)
55 | 		for _, output_field in ipairs(opts.OUTPUT_FIELDS) do
56 | 			module = model:findModules(output_field)[1]
57 | 			if module then
58 | 				scale_outputs[output_field] = scale_outputs[output_field] or {}
59 | 				table.insert(scale_outputs[output_field], module.output:transpose(2, 3):float())
60 | 			end
61 | 		end
62 | 	end
63 | 
64 | 	for output_field, output in pairs(scale_outputs) do
65 | 		outputs[output_field] = outputs[output_field] or {}
66 | 		table.insert(outputs[output_field], torch.cat(output, 1):mean(1):squeeze(1))
67 | 	end
68 | 
69 | 	table.insert(scores, torch.cat(scale_scores, 1):mean(1))
70 | 	table.insert(labels, batch_labels:clone())
71 | 	table.insert(rois, scale0_rois:narrow(scale0_rois:dim(), 1, 4):clone()[1])
72 | 	
73 | 	collectgarbage()
74 | 	print('val', 'batch', batchIdx, torch.FloatTensor(scale_costs):mean(), 'img/sec', (#scale_batches - 1) / torch.toc(tic))
75 | end
76 | 
77 | subset = batch_loader.example_loader:getSubset(batch_loader.train)
78 | hdf5_save(opts.PATHS.SCORES_PATTERN:format(subset), {
79 | 	subset = subset,
80 | 	meta = meta,
81 | 
82 | 	rois = rois,
83 | 	labels = torch.cat(labels, 1),
84 | 	output = torch.cat(scores, 1),
85 | 	outputs = outputs,
86 | })
87 | 
88 | print('DONE:', torch.toc(tic_start), 'sec')
89 | 


--------------------------------------------------------------------------------
/train.lua:
--------------------------------------------------------------------------------
  1 | dofile('opts.lua')
  2 | dofile('util.lua')
  3 | dofile('dataset.lua')
  4 | dofile('model/util.lua')
  5 | 
  6 | require 'optim'
  7 | dofile('fbnn_Optim.lua')
  8 | 
  9 | assert(os.getenv('CUDA_VISIBLE_DEVICES') ~= nil and cutorch.getDeviceCount() <= 1, 'SHOULD RUN ON ONE GPU FOR NOW')
 10 | 
 11 | torch.manualSeed(opts.SEED)
 12 | cutorch.manualSeedAll(opts.SEED)
 13 | 
 14 | example_loader_options_preset = {
 15 | 	training = {
 16 | 		numRoisPerImage = 8192,
 17 | 		subset = 'trainval',
 18 | 		hflips = true,
 19 | 		numScales = 5,
 20 | 	},
 21 | 	evaluate = {
 22 | 		numRoisPerImage = 8192,
 23 | 		subset = 'trainval',
 24 | 		hflips = true,
 25 | 		numScales = 1,
 26 | 	}
 27 | }
 28 | 
 29 | if paths.extname(opts.PATHS.MODEL) == 'lua' then
 30 | 	loaded = model_load(opts.PATHS.MODEL, opts)
 31 | 	meta = {
 32 | 		model_path = loaded.model_path,
 33 | 		opts = opts,
 34 | 		example_loader_options = example_loader_options_preset
 35 | 	}
 36 | 	log = {{meta = meta}}
 37 | else
 38 | 	loaded = model_load(opts.PATHS.MODEL)
 39 | 	meta = loaded.meta
 40 | 	log = loaded.log
 41 | 	previous_epoch = loaded.epoch
 42 | end
 43 | 
 44 | batch_loader = ParallelBatchLoader(ExampleLoader(dataset, base_model.normalization_params, opts.IMAGE_SCALES, meta.example_loader_options)):setBatchSize({training = 1, evaluate = 1})
 45 | 
 46 | print(meta)
 47 | 
 48 | assert(model):cuda()
 49 | assert(criterion):cuda()
 50 | collectgarbage()
 51 | 
 52 | model:apply(function (x) x.for_each = x.apply end)
 53 | optimizer = nn.Optim(model, optimState)
 54 | optimalg = optim.sgd
 55 | 
 56 | for epoch = (previous_epoch or 0) + 1, opts.NUM_EPOCHS do
 57 | 	if epoch > optimState_annealed.epoch then
 58 | 		optimizer:setParameters(optimState_annealed)
 59 | 	end
 60 | 
 61 | 	batch_loader:training()
 62 | 	model:training()
 63 | 	for batchIdx = 1, batch_loader:getNumBatches() -1 do
 64 | 		tic = torch.tic()
 65 | 
 66 | 		scale_batches = batch_loader:forward()[1]
 67 | 		scale0_rois = scale_batches[1][2]
 68 | 		batch_images, batch_rois, batch_labels = unpack(scale_batches[2])
 69 | 		batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images)
 70 | 		batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels)
 71 | 
 72 | 		cost = optimizer:optimize(optimalg, {batch_images_gpu, batch_rois}, batch_labels_gpu, criterion)
 73 | 
 74 | 		collectgarbage()
 75 | 		print('epoch', epoch, 'batch', batchIdx, cost, 'img/sec', batch_images:size(1) / torch.toc(tic))
 76 | 	end
 77 | 
 78 | 	if epoch % 5 == 0 or epoch == opts.NUM_EPOCHS or epoch == 1 then
 79 | 		batch_loader:evaluate()
 80 | 		model:evaluate()
 81 | 		scores, labels, rois, costs, outputs, corlocs = {}, {}, {}, {}, {}, {}
 82 | 		for batchIdx = 1, batch_loader:getNumBatches() - 1 do
 83 | 			tic = torch.tic()
 84 | 
 85 | 			scale_batches = batch_loader:forward()[1]
 86 | 			scale0_rois = scale_batches[1][2]
 87 | 			scale_outputs, scale_scores, scale_costs = {}, {}, {}
 88 | 			for i = 2, #scale_batches do
 89 | 				batch_images, batch_rois, batch_labels = unpack(scale_batches[i])
 90 | 				batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images)
 91 | 				batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels)
 92 | 
 93 | 				batch_scores = model:forward({batch_images_gpu, batch_rois})
 94 | 
 95 | 				cost = criterion:forward(batch_scores, batch_labels_gpu)
 96 | 				
 97 | 				table.insert(scale_scores, (type(batch_scores) == 'table' and batch_scores[1] or batch_scores):float())
 98 | 				table.insert(scale_costs, cost)
 99 | 				for _, output_field in ipairs(opts.OUTPUT_FIELDS) do
100 | 					module = model:findModules(output_field)[1]
101 | 					if module then
102 | 						scale_outputs[output_field] = scale_outputs[output_field] or {}
103 | 						table.insert(scale_outputs[output_field], module.output:transpose(2, 3):float())
104 | 					end
105 | 				end
106 | 			end
107 | 
108 | 			for output_field, output in pairs(scale_outputs) do
109 | 				outputs[output_field] = outputs[output_field] or {}
110 | 				table.insert(outputs[output_field], torch.cat(output, 1):mean(1)[1])
111 | 			end
112 | 
113 | 			table.insert(costs, torch.FloatTensor(scale_costs):mean())
114 | 			table.insert(scores, torch.cat(scale_scores, 1):mean(1))
115 | 			table.insert(labels, batch_labels:clone())
116 | 			table.insert(rois, scale0_rois:narrow(scale0_rois:dim(), 1, 4):clone()[1])
117 | 			
118 | 			collectgarbage()
119 | 			print('val', 'epoch', epoch, 'batch', batchIdx, costs[#costs], 'img/sec', (#scale_batches - 1) / torch.toc(tic))
120 | 		end
121 | 
122 | 		for output_field, output in pairs(outputs) do
123 | 			corlocs[output_field] = corloc(dataset[batch_loader.example_loader:getSubset(batch_loader.train)], {output, rois})
124 | 		end
125 | 
126 | 		table.insert(log, {
127 | 			training = false,
128 | 			epoch = epoch,
129 | 			mAP = dataset_tools.meanAP(torch.cat(scores, 1), torch.cat(labels, 1)),
130 | 			corlocs = corlocs,
131 | 			valCost = torch.FloatTensor(costs):mean(),
132 | 		})
133 | 	end
134 | 
135 | 	if epoch % 5 == 0 or epoch == opts.NUM_EPOCHS then
136 | 		model:clearState()
137 | 		model_save(opts.PATHS.CHECKPOINT_PATTERN:format(epoch), model, meta, epoch, log)
138 | 	end
139 | 
140 | 	json_save(opts.PATHS.LOG, log)
141 | 	io.stderr:write('log in "', opts.PATHS.LOG, '"\n')
142 | end
143 | 
144 | 


--------------------------------------------------------------------------------
/util.lua:
--------------------------------------------------------------------------------
  1 | require 'hdf5'
  2 | rapidjson = require 'rapidjson'
  3 | 
  4 | function hdf5_save(path, obj)
  5 | 	local h = hdf5.open(path, 'w')
  6 | 	local function r(prefix, o)
  7 | 		for k, v in pairs(o) do
  8 | 			local p = prefix..'/'..k
  9 | 			if torch.isTypeOf(v, torch.CudaTensor) then
 10 | 				h:write(p, v:float())
 11 | 			elseif torch.isTensor(v) then
 12 | 				h:write(p, v)
 13 | 			elseif type(v) == 'number' then
 14 | 				h:write(p, torch.DoubleTensor(1):fill(v))
 15 | 			elseif type(v) == 'string' then
 16 | 				h:write(p, torch.CharTensor(torch.CharStorage():string(v)))
 17 | 			elseif type(v) == 'boolean' then
 18 | 				h:write(p, torch.IntTensor(1):fill(v and 1 or 0))
 19 | 			else
 20 | 				r(p, v)
 21 | 			end
 22 | 		end
 23 | 	end
 24 | 	r('', obj)
 25 | 	h:close()
 26 | end
 27 | 
 28 | function hdf5_load(path, fields)
 29 | 	local res = {}
 30 | 
 31 | 	local h = hdf5.open(path, 'r')
 32 | 	if fields then
 33 | 		local returnValue = false
 34 | 		if type(fields) ~= 'table' then
 35 | 			returnValue = true
 36 | 			fields = {fields}
 37 | 		end
 38 | 		for _, f in ipairs(fields) do
 39 | 			if not pcall(function()	res[f] = h:read('/'..f):all() end) then
 40 | 				res[f] = nil
 41 | 			end
 42 | 		end
 43 | 		if returnValue then
 44 | 			res = res[fields[1]]
 45 | 		end
 46 | 	else
 47 | 		res = h:all()
 48 | 	end
 49 | 	h:close()
 50 | 
 51 | 	local function dfs(obj)
 52 | 		for k, v in pairs(obj) do
 53 | 			if tonumber(k) ~= nil then
 54 | 				obj[k] = nil
 55 | 				k = tonumber(k)
 56 | 				obj[k] = v
 57 | 			end
 58 | 
 59 | 			if torch.isTypeOf(v, torch.CharTensor) or torch.isTypeOf(v, torch.ByteTensor) then
 60 | 				obj[k] = v:storage():string()
 61 | 			elseif torch.isTypeOf(v, torch.DoubleTensor) and v:nElement() == 1 then
 62 | 				obj[k] = v:squeeze()
 63 | 			elseif  torch.isTypeOf(v, torch.IntTensor) and v:nElement() == 1 and (v:squeeze() == 0 or v:squeeze() == 1) then
 64 | 				obj[k] = v:squeeze() == 1 and true or false
 65 | 			elseif type(v) == 'table' then
 66 | 				dfs(v)
 67 | 			end
 68 | 		end
 69 | 	end
 70 | 
 71 | 	if type(res) == 'table' then
 72 | 		dfs(res)
 73 | 	end
 74 | 
 75 | 	return res
 76 | end
 77 | 
 78 | json_load = rapidjson.load
 79 | json_save = function(path, obj)	rapidjson.dump(obj, path, {pretty = true, sort_keys = true}) end
 80 | 
 81 | function area_1(box)
 82 | 	return (box[3] - box[1] + 1) * (box[4] - box[2] + 1)
 83 | end
 84 | 
 85 | function overlap(box1, box2)
 86 | 	if torch.isTensor(box2) and box2:dim() == 2 then
 87 | 		local res = box2.new(box2:size(1))
 88 | 		for i = 1, res:nElement() do
 89 | 			res[i] = overlap(box1, box2[i])
 90 | 		end
 91 | 		return res
 92 | 	end
 93 | 
 94 | 	local a1 = area_1(box1)
 95 | 	local a2 = area_1(box2)
 96 | 
 97 | 	local xx1 = math.max(box1[1], box2[1])
 98 | 	local yy1 = math.max(box1[2], box2[2])
 99 | 	local xx2 = math.min(box1[3], box2[3])
100 | 	local yy2 = math.min(box1[4], box2[4])
101 | 
102 | 	local w = math.max(0.0, xx2 - xx1 + 1)
103 | 	local h = math.max(0.0, yy2 - yy1 + 1)
104 | 	local inter = w * h
105 | 	
106 | 	local ovr = inter / (a1 + a2 - inter)
107 | 	return ovr
108 | end
109 | 
110 | function localizeMaxBox3d(scores, rois)
111 | 	if torch.isTensor(scores) and torch.isTensor(rois) then
112 | 		assert(scores:dim() == 3) -- numSamples x numClasses x numRois
113 | 		assert(rois:dim() == 3) -- numSamples x numRois x 4
114 | 
115 | 		return rois:gather(2, ({scores:max(3)})[2]:expand(scores:size(1), scores:size(2), rois:size(3)))
116 | 	else
117 | 		assert(#scores == #rois)
118 | 		local res = torch.FloatTensor(#scores, scores[1]:size(1), 4)
119 | 		for exampleIdx = 1, res:size(1) do
120 | 			res[exampleIdx]:copy(rois[exampleIdx]:gather(1, ({scores[exampleIdx]:max(2)})[2]:expand(scores[exampleIdx]:size(1), rois[exampleIdx]:size(rois[exampleIdx]:dim()))))
121 | 		end
122 | 		return res
123 | 	end
124 | end
125 | 
126 | function corloc(dataset_subset, localizedBoxes, classLabelInd)
127 | 	return mIOU(dataset_subset, localizedBoxes, 0.5, classLabelInd)
128 | end
129 | 
130 | function mIOU(dataset_subset, localizedBoxes, corlocThreshold, classLabelInd)
131 | 	if type(localizedBoxes) == 'table' then
132 | 		localizedBoxes = localizeMaxBox3d(unpack(localizedBoxes))
133 | 	end
134 | 	assert(localizedBoxes:dim() == 3 and localizedBoxes:size(3) == 4)
135 | 	local beg_classLabelInd = classLabelInd == nil and 1 or classLabelInd
136 | 	local end_classLabelInd = classLabelInd == nil and localizedBoxes:size(2) or classLabelInd
137 | 
138 | 	local mIOUs = {}
139 | 	for classLabelInd = beg_classLabelInd, end_classLabelInd  do
140 | 		local overlaps = {}
141 | 		for exampleIdx = 1, localizedBoxes:size(1) do
142 | 			local gtBoxes_ = dataset_subset:getGroundTruthBoxes(exampleIdx)
143 | 			local gtInds = gtBoxes_:select(2, 1):eq(classLabelInd):nonzero()
144 | 			if gtInds:nElement() > 0 then
145 | 				local gtBoxes = gtBoxes_:index(1, gtInds:squeeze(2)):narrow(2, 2, 4)
146 | 				local localizedBox = localizedBoxes[exampleIdx][classLabelInd]
147 | 				local maxOverlap = 0
148 | 				for i = 1, gtBoxes:size(1) do
149 | 					local o = overlap(gtBoxes[i], localizedBox)
150 | 					if corlocThreshold then
151 | 						o = o > corlocThreshold and 1 or 0
152 | 					end
153 | 					maxOverlap = math.max(maxOverlap, o)
154 | 				end
155 | 				table.insert(overlaps, maxOverlap)
156 | 			end
157 | 		end
158 | 
159 | 		table.insert(mIOUs, torch.FloatTensor(#overlaps == 0 and {0.0} or overlaps):mean())
160 | 	end
161 | 	return torch.FloatTensor(mIOUs):mean()
162 | end
163 | 
164 | function nms_mask(boxes, scores, overlap_threshold, score_threshold)
165 | 	local function nmsEx(boxes, scores, mask)
166 | 		--https://raw.githubusercontent.com/fmassa/object-detection.torch/master/nms.lua
167 | 		local xx1, yy1, xx2, yy2, w, h, area = boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new()
168 | 		local pick = torch.LongTensor()
169 | 		for classLabelInd = 1, scores:size(1) do
170 | 			local x1, y1, x2, y2 = boxes:select(2, 1), boxes:select(2, 2), boxes:select(2, 3), boxes:select(2, 4)
171 | 			area:cmul(x2 - x1 + 1, y2 - y1 + 1)
172 | 			pick:resize(area:size()):zero()
173 | 
174 | 			local _, I = scores[classLabelInd]:sort(1)
175 | 			local overTh = scores[classLabelInd]:index(1, I):ge(score_threshold)
176 | 			if overTh:any() then
177 | 				I = I[overTh]
178 | 			else
179 | 				I:resize(0)
180 | 			end
181 | 
182 | 			local count = 1
183 | 			while I:numel() > 0 do 
184 | 				local last = I:size(1)
185 | 				local i = I[last]
186 | 
187 | 				pick[count] = i
188 | 				count = count + 1
189 | 
190 | 				if last == 1 then
191 | 					break
192 | 				end
193 | 
194 | 				I = I[{{1, last-1}}]
195 | 
196 | 				xx1:index(x1, 1, I)
197 | 				yy1:index(y1, 1, I)
198 | 				xx2:index(x2, 1, I)
199 | 				yy2:index(y2, 1, I)
200 | 
201 | 				xx1:cmax(x1[i])
202 | 				yy1:cmax(y1[i])
203 | 				xx2:cmin(x2[i])
204 | 				yy2:cmin(y2[i])
205 | 
206 | 				w:add(xx2, -1, xx1):add(1):cmax(0)
207 | 				h:add(yy2, -1, yy1):add(1):cmax(0)
208 | 				
209 | 				local intersection = w:cmul(h)
210 | 				local IoU = h
211 | 
212 | 				xx1:index(area, 1, I)
213 | 				IoU:cdiv(intersection, xx1 + area[i] - intersection)
214 | 
215 | 				I = I[IoU:le(overlap_threshold)]
216 | 			end
217 | 			
218 | 			if count >= 2 then
219 | 				mask[classLabelInd]:scatter(1, pick[{{1, count-1}}], 1)
220 | 			end
221 | 		end
222 | 	end
223 | 
224 | 	local mask = {}
225 | 
226 | 	local threads = require 'threads'
227 | 	threads.Threads.serialization('threads.sharedserialize')
228 | 	local jobQueue = threads.Threads(16)
229 | 	for exampleIdx = 1, #scores do
230 | 		mask[exampleIdx] = torch.ByteTensor(scores[exampleIdx]:size()):zero()
231 | 		jobQueue:addjob(nmsEx, function() end, boxes[exampleIdx], scores[exampleIdx], mask[exampleIdx])
232 | 	end
233 | 
234 | 	jobQueue:synchronize()
235 | 
236 | 	return mask
237 | end
238 | 


--------------------------------------------------------------------------------