├── LICENSE.md ├── README.md ├── corloc.lua ├── data ├── README.md └── common │ ├── Makefile │ └── README.md ├── dataset.lua ├── detection_mAP.lua ├── example_loader.lua ├── fbnn_Optim.lua ├── model ├── CMakeLists.txt ├── HingeCriterion.lua ├── VGGF.lua ├── additive.lua ├── contextlocnet-scm-1.rockspec ├── contrastive_a.lua ├── contrastive_s.lua ├── rectangularringroipooling.cu ├── rectangularringroipooling.lua ├── roi_transforms.lua ├── util.lua └── wsddn_repro.lua ├── opts.lua ├── parallel_batch_loader.lua ├── pascal_voc.lua ├── preprocess.lua ├── test.lua ├── train.lua └── util.lua /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Vadim Kantorov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Information & Contact 2 | If you use this code, please cite our work: 3 | > @inproceedings{kantorov2016, 4 |       title = {ContextLocNet: Context-aware Deep Network Models for Weakly Supervised Localization}, 5 |       author = {Kantorov, V., Oquab, M., Cho M. and Laptev, I.}, 6 |       booktitle = {Proc. European Conference on Computer Vision (ECCV), 2016}, 7 |       year = {2016} 8 | } 9 | 10 | The results are available on the [project website](http://www.di.ens.fr/willow/research/contextlocnet) and in the [paper](http://arxiv.org/pdf/1609.04331.pdf) (arXiv [page](http://arxiv.org/abs/1609.04331)). Please submit bugs and ask questions on [GitHub](http://github.com/vadimkantorov/contextlocnet/issues) directly, for other inquiries please contact [Vadim Kantorov](mailto:vadim.kantorov@gmail.com). 11 | 12 | This is a joint work of [Vadim Kantorov](http://vadimkantorov.com), [Maxime Oquab](http://github.com/qassemoquab), [Minsu Cho](http://www.di.ens.fr/~mcho), and [Ivan Laptev](http://www.di.ens.fr/~laptev). 13 | 14 | # Running the code 15 | 1. Install the dependencies: [Torch](http://github.com/torch/distro) with [cuDNN](http://developer.nvidia.com/cudnn) support; [HDF5](http://www.hdfgroup.org/HDF5/); [matio](http://github.com/tbeu/matio); [protobuf](http://github.com/google/protobuf); Luarocks packages [rapidjson](http://github.com/xpol/lua-rapidjson), [hdf5](http://github.com/deepmind/torch-hdf5), [matio](http://github.com/soumith/matio-ffi.torch), [loadcaffe](http://github.com/szagoruyko/loadcaffe), [xml](https://://github.com/lubyk/xml); MATLAB or [octave](https://www.gnu.org/software/octave/) binary in PATH (for computing detection mAP). 16 | 17 | We strongly recommend using [wigwam](http://wigwam.in/) for this (fix the paths to `nvcc` and `libcudnn.so` before running the command): 18 | 19 | ```shell 20 | wigwam install torch hdf5 matio protobuf octave -DPATH_TO_NVCC="/path/to/cuda/bin/nvcc" -DPATH_TO_CUDNN_SO="/path/to/cudnn/lib64/libcudnn.so" 21 | wigwam install lua-rapidjson lua-hdf5 lua-matio lua-loadcaffe lua-xml 22 | wigwam in # execute this to make the installed libraries available 23 | ``` 24 | 2. Clone this repository, change the current directory to `contextlocnet`, and compile the ROI pooling module: 25 | 26 | ```shell 27 | git clone https://github.com/vadimkantorov/contextlocnet 28 | cd contextlocnet 29 | (cd ./model && luarocks make) 30 | ``` 31 | 3. Download the [VOC 2007](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/) dataset and Koen van de Sande's [selective search windows](http://koen.me/research/selectivesearch/) for VOC 2007 and the [VGG-F](https://gist.github.com/ksimonyan/a32c9063ec8e1118221a) model by running the first command. Optionally download the [VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) and Ross Girshick's [selective search windows](https://github.com/rbgirshick/fast-rcnn/blob/master/data/scripts/fetch_fast_rcnn_models.sh) by manually downloading the [VOC 2012 test data tarball](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar) to `data/common` and then running the second command: 32 | 33 | ```shell 34 | make -f data/common/Makefile download_and_extract_VOC2007 download_VGGF 35 | # make -f data/common/Makefile download_and_extract_VOC2012 36 | ``` 37 | 4. Choose a dataset, preprocess it, and convert the VGG-F model to the Torch format: 38 | 39 | ```shell 40 | export DATASET=VOC2007 41 | th preprocess.lua VOC VGGF 42 | ``` 43 | 5. Select a GPU and train a model (our best model is `model/contrastive_s.lua`, other choices are `model/contrastive_a.lua`, `model/additive.lua`, and `model/wsddn_repro.lua`): 44 | 45 | ```shell 46 | export CUDA_VISIBLE_DEVICES=0 47 | th train.lua model/contrastive_s.lua # will produce data/model_epoch30.h5 and data/log.json 48 | ``` 49 | 6. Test the trained model and compute CorLoc and mAP: 50 | 51 | ```shell 52 | SUBSET=trainval th test.lua data/model_epoch30.h5 # will produce data/scores_trainval.h5 53 | th corloc.lua data/scores_trainval.h5 # will produce data/corloc.json 54 | SUBSET=test th test.lua data/model_epoch30.h5 # will produce data/scores_test.h5 55 | th detection_mAP.lua data/scores_test.h5 # will produce data/detection_mAP.json 56 | ``` 57 | 58 | # Pretrained models for VOC 2007 59 | Model | model_epoch30.h5 | log.json | corloc.json | detection_mAP.json| 60 | :---|:---:|:---:|:---:|:---:| 61 | contrastive_s | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_model_epoch30.h5) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_log.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_corloc.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/contrastive_s_detection_mAP.json) 62 | wsddn_repro | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_model_epoch30.h5) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_log.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_corloc.json) | [link](https://github.com/vadimkantorov/contextlocnet/releases/download/1.0/wsddn_repro_detection_mAP.json) 63 | 64 | # Acknowledgements & Notes 65 | We greatly thank Hakan Bilen, Relja Arandjelović and Soumith Chintala for fruitful discussion and help. 66 | 67 | This work would not have been possible without prior work: Hakan Bilen's [WSDDN](http://github.com/hbilen/WSDDN), Spyros Gidaris's [LocNet](http://github.com/gidariss/LocNet), Sergey Zagoruyko's [loadcaffe](http://github.com/szagoruyko/loadcaffe), Facebook FAIR's [fbnn/Optim.lua](http://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua). 68 | 69 | The code is released under the [MIT](http://github.com/vadimkantorov/contextlocnet/blob/master/LICENSE.md) license. 70 | -------------------------------------------------------------------------------- /corloc.lua: -------------------------------------------------------------------------------- 1 | dofile('opts.lua') 2 | dofile('util.lua') 3 | dofile('dataset.lua') 4 | 5 | opts.SCORES_FILES = #arg >= 1 and arg or {opts.PATHS.SCORES_PATTERN:format('trainval')} 6 | 7 | loaded = hdf5_load(opts.SCORES_FILES[1], {'subset', 'rois', 'labels', 'output'}) 8 | outputs = {} 9 | 10 | for i = 1, #opts.SCORES_FILES do 11 | outputs_i = hdf5_load(opts.SCORES_FILES[i], 'outputs') 12 | for output_field, scores in pairs(outputs_i) do 13 | outputs[output_field] = {} 14 | for exampleIdx = 1, #scores do 15 | outputs[output_field][exampleIdx] = (outputs[output_field][exampleIdx] or scores[exampleIdx]:clone():zero()):add(scores[exampleIdx]:div(#opts.SCORES_FILES)) 16 | end 17 | end 18 | end 19 | 20 | res = {training_MAP = dataset_tools.meanAP(loaded.output, loaded.labels)} 21 | for output_field, scores in pairs(outputs) do 22 | res[output_field] = {by_class = {}, _mean = corloc(dataset[loaded.subset], {scores, loaded.rois})} 23 | for classLabelInd, classLabel in ipairs(classLabels) do 24 | res[output_field].by_class[classLabels[classLabelInd]] = corloc(dataset[loaded.subset], {scores, loaded.rois}, classLabelInd) 25 | end 26 | end 27 | 28 | json_save(opts.PATHS.CORLOC, res) 29 | print('result in ' .. opts.PATHS.CORLOC) 30 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | This is a directory for produced models and results. 2 | -------------------------------------------------------------------------------- /data/common/Makefile: -------------------------------------------------------------------------------- 1 | DOWNLOADS = ./data/common 2 | 3 | download_and_extract_VOC2007: 4 | wget -P $(DOWNLOADS) \ 5 | http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar \ 6 | http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar \ 7 | http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar \ 8 | http://koen.me/research/downloads/SelectiveSearchVOC2007trainval.mat \ 9 | http://koen.me/research/downloads/SelectiveSearchVOC2007test.mat 10 | cd $(DOWNLOADS) && for f in VOCdevkit_08-Jun-2007.tar VOCtrainval_06-Nov-2007.tar VOCtest_06-Nov-2007.tar; do tar -xf $$f; done && mv VOCdevkit VOCdevkit_2007 11 | 12 | download_and_extract_VOC2012: 13 | wget -P $(DOWNLOADS) \ 14 | http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar \ 15 | http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar \ 16 | http://people.eecs.berkeley.edu/~rbg/fast-rcnn-data/selective_search_data.tgz 17 | cd $(DOWNLOADS) && for f in VOCdevkit_18-May-2011.tar VOCtrainval_11-May-2012.tar VOC2012test.tar selective_search_data.tgz; do tar -xf $$f; done && mv VOCdevkit VOCdevkit_2012 18 | 19 | download_VGGF: 20 | wget -P $(DOWNLOADS) \ 21 | http://www.robots.ox.ac.uk/~vgg/software/deep_eval/releases/bvlc/VGG_CNN_F.caffemodel \ 22 | https://gist.githubusercontent.com/ksimonyan/a32c9063ec8e1118221a/raw/6a3b8af023bae65669a4ceccd7331a5e7767aa4e/VGG_CNN_F_deploy.prototxt 23 | 24 | .PHONY: download_and_extract_VOC2007 download_and_extract_VOC2012 download_VGGF 25 | -------------------------------------------------------------------------------- /data/common/README.md: -------------------------------------------------------------------------------- 1 | This is a directory for the downloaded datasets and models. 2 | 3 | Instructions for downloading prerequisite data are at https://github.com/vadimkantorov/contextlocnet/blob/master/README.md (step 3). 4 | 5 | Links are in the [Makefile](https://github.com/vadimkantorov/contextlocnet/blob/master/data/common/Makefile). 6 | -------------------------------------------------------------------------------- /dataset.lua: -------------------------------------------------------------------------------- 1 | if opts.DATASET == 'VOC2007' or opts.DATASET == 'VOC2012' then 2 | dataset_tools = dofile('pascal_voc.lua') 3 | classLabels = dataset_tools.classLabels 4 | numClasses = dataset_tools.numClasses 5 | end 6 | 7 | dataset = torch.load(opts.PATHS.DATASET_CACHED) 8 | 9 | dofile('parallel_batch_loader.lua') 10 | dofile('example_loader.lua') 11 | -------------------------------------------------------------------------------- /detection_mAP.lua: -------------------------------------------------------------------------------- 1 | dofile('opts.lua') 2 | dofile('util.lua') 3 | dofile('dataset.lua') 4 | threads = require 'threads' 5 | 6 | local MATLAB = assert((#sys.execute('which matlab') > 0 and 'matlab -r') or (#sys.execute('which octave') > 0 and 'octave --eval'), 'matlab or octave not found in PATH') 7 | local subset = 'test' 8 | output_field = opts.OUTPUT_FIELDS[1] 9 | 10 | opts.SCORES_FILES = #arg >= 1 and arg or {opts.PATHS.SCORES_PATTERN:format(subset)} 11 | rois = hdf5_load(opts.SCORES_FILES[1], 'rois') 12 | 13 | scores = {} 14 | for i = 1, #opts.SCORES_FILES do 15 | scores_i = hdf5_load(opts.SCORES_FILES[i], 'outputs/' .. output_field) 16 | for exampleIdx = 1, #scores_i do 17 | scores[exampleIdx] = (scores[exampleIdx] or scores_i[exampleIdx]:clone():zero()):add(scores_i[exampleIdx]:div(#opts.SCORES_FILES)) 18 | end 19 | end 20 | 21 | local detrespath = dataset_tools.package_submission(opts.PATHS.DATA, dataset, opts.DATASET, subset, 'comp4_det', rois, scores, nms_mask(rois, scores, opts.NMS_OVERLAP_THRESHOLD, opts.NMS_SCORE_THRESHOLD)) 22 | local opts = opts 23 | 24 | if dataset[subset].objectBoxes == nil then 25 | print('detection mAP cannot be computed for ' .. opts.DATASET .. '. Quitting.') 26 | print(('VOC submission saved in "%s/results-%s-%s-%s.tar.gz"'):format(opts.PATHS.DATA, opts.DATASET, 'comp4_det', subset)) 27 | os.exit(0) 28 | end 29 | 30 | res = {[output_field] = {_mean = nil, by_class = {}}} 31 | APs = torch.FloatTensor(numClasses):zero() 32 | 33 | local imgsetpath = paths.tmpname() 34 | os.execute(('sed \'s/$/ -1/\' %s > %s'):format(paths.concat(opts.PATHS.VOC_DEVKIT_VOCYEAR, 'ImageSets', 'Main', subset .. '.txt'), imgsetpath)) -- hack for octave 35 | 36 | jobQueue = threads.Threads(numClasses) 37 | for classLabelInd, classLabel in ipairs(classLabels) do 38 | jobQueue:addjob(function() 39 | os.execute(('%s "oldpwd = pwd; cd(\'%s\'); addpath(fullfile(pwd, \'VOCcode\')); VOCinit; cd(oldpwd); VOCopts.testset = \'%s\'; VOCopts.detrespath = \'%s\'; VOCopts.imgsetpath = \'%s\'; classLabel = \'%s\'; [rec, prec, ap] = VOCevaldet(VOCopts, \'comp4\', classLabel, false); dlmwrite(sprintf(VOCopts.detrespath, \'resu4\', classLabel), ap); quit;"'):format(MATLAB, paths.dirname(opts.PATHS.VOC_DEVKIT_VOCYEAR), subset, detrespath, imgsetpath, classLabel)) 40 | return tonumber(io.open(detrespath:format('resu4', classLabel)):read('*all')) 41 | end, function(ap) res[output_field].by_class[classLabel] = ap; APs[classLabelInd] = ap; end) 42 | end 43 | jobQueue:synchronize() 44 | os.execute('[ -t 1 ] && reset') 45 | 46 | res[output_field]._mean = APs:mean() 47 | 48 | json_save(opts.PATHS.DETECTION_MAP, res) 49 | print('result in ' .. opts.PATHS.DETECTION_MAP) 50 | -------------------------------------------------------------------------------- /example_loader.lua: -------------------------------------------------------------------------------- 1 | local ExampleLoader, parent = torch.class('ExampleLoader') 2 | 3 | function ExampleLoader:__init(dataset, normalization_params, scales, example_loader_opts) 4 | self.scales = scales 5 | self.normalization_params = normalization_params 6 | self.example_loader_opts = example_loader_opts 7 | self.dataset = dataset 8 | end 9 | 10 | local function table2d(I, J, elem_generator) 11 | local res = {} 12 | for i = 1, I do 13 | res[i] = {} 14 | for j = 1, J do 15 | res[i][j] = elem_generator(i, j) 16 | end 17 | end 18 | return res 19 | end 20 | 21 | local function subtract_mean(dst, src, normalization_params) 22 | local channel_order = assert(({rgb = {1, 2, 3}, bgr = {3, 2, 1}})[normalization_params.channel_order]) 23 | for c = 1, 3 do 24 | dst[c]:copy(src[channel_order[c]]):add(-normalization_params.rgb_mean[channel_order[c]]) 25 | if normalization_params.rgb_std then 26 | dst[c]:div(normalization_params.rgb_std[channel_order[c]]) 27 | end 28 | end 29 | end 30 | 31 | local function rescale(img, max_height, max_width) 32 | --local height_width = math.max(dhw_rgb:size(3), dhw_rgb:size(2)) 33 | --local im_scale = target_height_width / height_width 34 | local scale_factor = max_height / img:size(2) 35 | if torch.round(img:size(3) * scale_factor) > max_width then 36 | scale_factor = math.min(scale_factor, max_width / img:size(3)) 37 | end 38 | 39 | return image.scale(img, math.min(max_width, img:size(3) * scale_factor), math.min(max_height, img:size(2) * scale_factor)) 40 | end 41 | 42 | local function flip(images_j, rois_j) 43 | image.hflip(images_j, images_j) 44 | rois_j:select(2, 1):mul(-1):add(images_j:size(3)) 45 | rois_j:select(2, 3):mul(-1):add(images_j:size(3)) 46 | 47 | local tmp = rois_j:select(2, 1):clone() 48 | rois_j:select(2, 1):copy(rois_j:select(2, 3)) 49 | rois_j:select(2, 3):copy(tmp) 50 | end 51 | 52 | local function insert_dummy_dim1(...) 53 | for _, tensor in ipairs({...}) do 54 | tensor:resize(1, unpack(tensor:size():totable())) 55 | end 56 | end 57 | 58 | function ExampleLoader:makeBatchTable(batchSize, isTrainingPhase) 59 | local o = self:getPhaseOpts(isTrainingPhase) 60 | local num_jittered_copies = isTrainingPhase and 2 or (1 + (o.hflips and 2 or 1) * o.numScales) 61 | 62 | return table2d(batchSize, num_jittered_copies, function() return {torch.FloatTensor(), torch.FloatTensor(), torch.FloatTensor()} end) 63 | end 64 | 65 | function ExampleLoader:loadExample(exampleIdx, isTrainingPhase) 66 | local o = self:getPhaseOpts(isTrainingPhase) 67 | 68 | local labels_loaded = self.dataset[o.subset]:getLabels(exampleIdx) 69 | local rois_loaded = self.dataset[o.subset]:getProposals(exampleIdx) 70 | local jpeg_loaded = self.dataset[o.subset]:getJpegBytes(exampleIdx) 71 | local scales = o.scales or self.scales 72 | local normalization_params = self.normalization_params 73 | 74 | local scale_inds = isTrainingPhase and {0, torch.random(1, o.numScales)} or torch.range(0, o.numScales):totable() 75 | local hflips = isTrainingPhase and (o.hflips and torch.random(0, 1) or 0) or (o.hflips and 2 or 0) -- 0 is no_flip, 1 is do_flip, 2 is both 76 | local rois_perm = isTrainingPhase and torch.randperm(rois_loaded:size(1)) or torch.range(1, rois_loaded:size(1)) 77 | 78 | return function(indexInBatch, batchTable) 79 | image = image or require 'image' 80 | local img_original = image.decompressJPG(jpeg_loaded, 3, normalization_params.scale == 255 and 'byte' or 'float') 81 | local height_original, width_original = img_original:size(2), img_original:size(3) 82 | 83 | local rois_scale0 = rois_loaded:index(1, rois_perm:sub(1, math.min(rois_loaded:size(1), o.numRoisPerImage)):long()) 84 | rois_scale0[1]:copy(torch.FloatTensor{0, 0, width_original - 1, height_original - 1, 0.0}:sub(1, rois_scale0:size(2))) 85 | 86 | for j, scale_ind in ipairs(scale_inds) do 87 | local images, rois, labels = unpack(batchTable[indexInBatch][j]) 88 | 89 | local img_scaled = scale_ind == 0 and img_original:clone() or rescale(img_original, scales[scale_ind][1], scales[scale_ind][2]) 90 | local width_scaled, height_scaled = img_scaled:size(3), img_scaled:size(2) 91 | 92 | subtract_mean(images:resize(img_scaled:size()), img_scaled, normalization_params) 93 | rois:cmul(rois_scale0, torch.FloatTensor{{width_scaled / width_original, height_scaled / height_original, width_scaled / width_original, height_scaled / height_original, 1.0}}:narrow(2, 1, rois_scale0:size(2)):contiguous():expandAs(rois_scale0)) 94 | labels:resize(labels_loaded:size()):copy(labels_loaded) 95 | 96 | if hflips == 1 then 97 | flip(images, rois) 98 | elseif scale_ind ~= 0 and hflips == 2 then 99 | local jj = #batchTable[indexInBatch] - j + 2 100 | local images_flipped, rois_flipped, labels_flipped = unpack(batchTable[indexInBatch][jj]) 101 | images_flipped:resizeAs(images):copy(images) 102 | rois_flipped:resizeAs(rois):copy(rois) 103 | labels_flipped:resizeAs(labels):copy(labels) 104 | flip(images_flipped, rois_flipped) 105 | insert_dummy_dim1(images_flipped, rois_flipped, labels_flipped) 106 | end 107 | 108 | insert_dummy_dim1(images, rois, labels) 109 | end 110 | 111 | collectgarbage() 112 | end 113 | end 114 | 115 | function ExampleLoader:getNumExamples(isTrainingPhase) 116 | return self.dataset[self:getSubset(isTrainingPhase)]:getNumExamples() 117 | end 118 | 119 | function ExampleLoader:getPhaseOpts(isTrainingPhase) 120 | return isTrainingPhase and self.example_loader_opts['training'] or self.example_loader_opts['evaluate'] 121 | end 122 | 123 | function ExampleLoader:getSubset(isTrainingPhase) 124 | return self:getPhaseOpts(isTrainingPhase).subset 125 | end 126 | -------------------------------------------------------------------------------- /fbnn_Optim.lua: -------------------------------------------------------------------------------- 1 | -- This file is copied from Facebook FAIR's fbnn project: https://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua 2 | -- Copyright 2004-present Facebook. All Rights Reserved. 3 | 4 | local pl = require('pl.import_into')() 5 | 6 | -- from fblualib/fb/util/data.lua , copied here because fblualib is not rockspec ready yet. 7 | -- deepcopy routine that assumes the presence of a 'clone' method in user 8 | -- data should be used to deeply copy. This matches the behavior of Torch 9 | -- tensors. 10 | local function deepcopy(x) 11 | local typename = type(x) 12 | if typename == "userdata" then 13 | return x:clone() 14 | end 15 | if typename == "table" then 16 | local retval = { } 17 | for k,v in pairs(x) do 18 | retval[deepcopy(k)] = deepcopy(v) 19 | end 20 | return retval 21 | end 22 | return x 23 | end 24 | 25 | local Optim, parent = torch.class('nn.Optim') 26 | 27 | 28 | -- Returns weight parameters and bias parameters and associated grad parameters 29 | -- for this module. Annotates the return values with flag marking parameter set 30 | -- as bias parameters set 31 | function Optim.weight_bias_parameters(module) 32 | local weight_params, bias_params 33 | if module.weight then 34 | weight_params = {module.weight, module.gradWeight} 35 | weight_params.is_bias = false 36 | end 37 | if module.bias then 38 | bias_params = {module.bias, module.gradBias} 39 | bias_params.is_bias = true 40 | end 41 | return {weight_params, bias_params} 42 | end 43 | 44 | -- The regular `optim` package relies on `getParameters`, which is a 45 | -- beastly abomination before all. This `optim` package uses separate 46 | -- optim state for each submodule of a `nn.Module`. 47 | function Optim:__init(model, optState, checkpoint_data) 48 | assert(model) 49 | assert(checkpoint_data or optState) 50 | assert(not (checkpoint_data and optState)) 51 | 52 | self.model = model 53 | self.modulesToOptState = {} 54 | -- Keep this around so we update it in setParameters 55 | self.originalOptState = optState 56 | 57 | -- Each module has some set of parameters and grad parameters. Since 58 | -- they may be allocated discontinuously, we need separate optState for 59 | -- each parameter tensor. self.modulesToOptState maps each module to 60 | -- a lua table of optState clones. 61 | if not checkpoint_data then 62 | self.model:for_each(function(module) 63 | self.modulesToOptState[module] = { } 64 | local params = self.weight_bias_parameters(module) 65 | -- expects either an empty table or 2 element table, one for weights 66 | -- and one for biases 67 | assert(pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2) 68 | for i, _ in ipairs(params) do 69 | self.modulesToOptState[module][i] = deepcopy(optState) 70 | if params[i] and params[i].is_bias then 71 | -- never regularize biases 72 | self.modulesToOptState[module][i].weightDecay = 0.0 73 | end 74 | end 75 | assert(module) 76 | assert(self.modulesToOptState[module]) 77 | end) 78 | else 79 | local state = checkpoint_data.optim_state 80 | local modules = {} 81 | self.model:for_each(function(m) table.insert(modules, m) end) 82 | assert(pl.tablex.compare_no_order(modules, pl.tablex.keys(state))) 83 | self.modulesToOptState = state 84 | end 85 | end 86 | 87 | function Optim:save() 88 | return { 89 | optim_state = self.modulesToOptState 90 | } 91 | end 92 | 93 | local function _type_all(obj, t) 94 | for k, v in pairs(obj) do 95 | if type(v) == 'table' then 96 | _type_all(v, t) 97 | else 98 | local tn = torch.typename(v) 99 | if tn and tn:find('torch%..+Tensor') then 100 | obj[k] = v:type(t) 101 | end 102 | end 103 | end 104 | end 105 | 106 | function Optim:type(t) 107 | self.model:for_each(function(module) 108 | local state= self.modulesToOptState[module] 109 | assert(state) 110 | _type_all(state, t) 111 | end) 112 | end 113 | 114 | local function get_device_for_module(mod) 115 | local dev_id = nil 116 | for name, val in pairs(mod) do 117 | if torch.typename(val) == 'torch.CudaTensor' then 118 | local this_dev = val:getDevice() 119 | if this_dev ~= 0 then 120 | -- _make sure the tensors are allocated consistently 121 | assert(dev_id == nil or dev_id == this_dev) 122 | dev_id = this_dev 123 | end 124 | end 125 | end 126 | return dev_id -- _may still be zero if none are allocated. 127 | end 128 | 129 | local function on_device_for_module(mod, f) 130 | local this_dev = get_device_for_module(mod) 131 | if this_dev ~= nil then 132 | return cutorch.withDevice(this_dev, f) 133 | end 134 | return f() 135 | end 136 | 137 | function Optim:optimize(optimMethod, inputs, targets, criterion, scale) 138 | assert(optimMethod) 139 | assert(inputs) 140 | assert(targets) 141 | assert(criterion) 142 | assert(self.modulesToOptState) 143 | 144 | self.model:zeroGradParameters() 145 | local output = self.model:forward(inputs) 146 | 147 | local err = criterion:forward(output, targets) 148 | 149 | if err ~= 0 then 150 | 151 | local df_do = criterion:backward(output, targets) 152 | self.model:backward(inputs, df_do, scale) 153 | 154 | -- We'll set these in the loop that iterates over each module. Get them 155 | -- out here to be captured. 156 | local curGrad 157 | local curParam 158 | local function fEvalMod(x) 159 | return err, curGrad 160 | end 161 | 162 | for curMod, opt in pairs(self.modulesToOptState) do 163 | on_device_for_module(curMod, function() 164 | local curModParams = self.weight_bias_parameters(curMod) 165 | -- expects either an empty table or 2 element table, one for weights 166 | -- and one for biases 167 | assert(pl.tablex.size(curModParams) == 0 or 168 | pl.tablex.size(curModParams) == 2) 169 | if curModParams then 170 | for i, tensor in ipairs(curModParams) do 171 | if curModParams[i] then 172 | -- expect param, gradParam pair 173 | curParam, curGrad = table.unpack(curModParams[i]) 174 | assert(curParam and curGrad) 175 | optimMethod(fEvalMod, curParam, opt[i]) 176 | end 177 | end 178 | end 179 | end) 180 | end 181 | 182 | end 183 | 184 | return err, output 185 | end 186 | 187 | function Optim:setParameters(newParams) 188 | assert(newParams) 189 | assert(type(newParams) == 'table') 190 | local function splice(dest, src) 191 | for k,v in pairs(src) do 192 | dest[k] = v 193 | end 194 | end 195 | 196 | splice(self.originalOptState, newParams) 197 | for _,optStates in pairs(self.modulesToOptState) do 198 | for i,optState in pairs(optStates) do 199 | assert(type(optState) == 'table') 200 | splice(optState, newParams) 201 | end 202 | end 203 | end 204 | -------------------------------------------------------------------------------- /model/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # cloned originally from https://github.com/vadimkantorov/fast-rcnn.torch/commit/3309057f05d0e36059b5e1213a180b3d616f4308 2 | 3 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR) 4 | CMAKE_POLICY(VERSION 2.8) 5 | 6 | SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}") 7 | 8 | FIND_PACKAGE(Torch REQUIRED) 9 | 10 | # Flags 11 | # When using MSVC 12 | IF(MSVC) 13 | # we want to respect the standard, and we are bored of those **** . 14 | ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) 15 | ENDIF(MSVC) 16 | 17 | # OpenMP support? 18 | SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") 19 | IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) 20 | EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) 21 | STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) 22 | MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") 23 | IF (DARWIN_VERSION GREATER 9) 24 | SET(APPLE_OPENMP_SUCKS 1) 25 | ENDIF (DARWIN_VERSION GREATER 9) 26 | EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion 27 | OUTPUT_VARIABLE GCC_VERSION) 28 | IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) 29 | MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") 30 | MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") 31 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas") 32 | SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) 33 | ENDIF () 34 | ENDIF () 35 | 36 | IF (WITH_OPENMP) 37 | FIND_PACKAGE(OpenMP) 38 | IF(OPENMP_FOUND) 39 | MESSAGE(STATUS "Compiling with OpenMP support") 40 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 41 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 42 | SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 43 | ENDIF(OPENMP_FOUND) 44 | ENDIF (WITH_OPENMP) 45 | 46 | LINK_DIRECTORIES("${Torch_INSTALL_LIB}") 47 | 48 | FIND_PACKAGE(CUDA 4.0 REQUIRED) 49 | 50 | LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_35") 51 | 52 | INCLUDE_DIRECTORIES("${Torch_INSTALL_INCLUDE}/THC") 53 | SET(src-cuda rectangularringroipooling.cu) 54 | CUDA_ADD_LIBRARY(cucontextlocnet MODULE ${src-cuda}) 55 | TARGET_LINK_LIBRARIES(cucontextlocnet luaT THC TH) 56 | IF(APPLE) 57 | SET_TARGET_PROPERTIES(cucontextlocnet PROPERTIES 58 | LINK_FLAGS "-undefined dynamic_lookup") 59 | ENDIF() 60 | 61 | 62 | ### Torch packages supposes libraries prefix is "lib" 63 | SET_TARGET_PROPERTIES(cucontextlocnet PROPERTIES 64 | PREFIX "lib" 65 | IMPORT_PREFIX "lib") 66 | 67 | INSTALL(TARGETS cucontextlocnet 68 | RUNTIME DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}" 69 | LIBRARY DESTINATION "${Torch_INSTALL_LUA_CPATH_SUBDIR}") 70 | -------------------------------------------------------------------------------- /model/HingeCriterion.lua: -------------------------------------------------------------------------------- 1 | local HingeCriterion, parent = torch.class('HingeCriterion', 'nn.Criterion') 2 | 3 | function HingeCriterion:__init(margin) 4 | parent.__init(self) 5 | self.sizeAverage=true 6 | 7 | self.sequence=nn.Sequential() 8 | self.sequence:add(nn.CMulTable()) 9 | self.sequence:add(nn.MulConstant(-1,true)) 10 | self.sequence:add(nn.AddConstant(margin or 1, true)) 11 | self.sequence:add(nn.ReLU(true)) 12 | 13 | self.gradient=torch.Tensor() 14 | end 15 | 16 | function HingeCriterion:setFactor(factor) 17 | self.factor = factor 18 | return self 19 | end 20 | 21 | function HingeCriterion:updateOutput(input, target) 22 | self.sequence:forward({input,target}) 23 | self.output=self.sequence.output:sum() 24 | local p = (self.sizeAverage and 1/input:size(1) or 1) * (self.factor or 1) 25 | self.output = self.output * p 26 | return self.output 27 | end 28 | 29 | 30 | function HingeCriterion:updateGradInput(input, target) 31 | local p = (self.sizeAverage and 1/input:size(1) or 1) * (self.factor or 1) 32 | 33 | self.gradient:resize(self.sequence.output:size()):fill(p) 34 | self.sequence:backward({input,target}, self.gradient) 35 | self.gradInput=self.sequence.gradInput[1] 36 | return self.gradInput 37 | end 38 | 39 | function HingeCriterion:type(type) 40 | parent.type(self, type) 41 | self.sequence:type(type) 42 | self.gradient:type(type) 43 | return self 44 | end 45 | -------------------------------------------------------------------------------- /model/VGGF.lua: -------------------------------------------------------------------------------- 1 | return function(modelPath) 2 | local vggf = torch.load(modelPath) 3 | 4 | local conv_layers = nn.Sequential() 5 | for i = 1, 14 do 6 | conv_layers:add(vggf:get(i)) 7 | end 8 | 9 | local fc_layers = nn.Sequential() 10 | for i = 17, 22 do 11 | fc_layers:add(vggf:get(i)) 12 | end 13 | 14 | return { 15 | conv_layers = conv_layers, 16 | fc_layers = fc_layers, 17 | channel_order = 'bgr', 18 | spatial_scale = 1 / 16, 19 | fc_layers_output_size = 4096, 20 | pooled_height = 6, 21 | pooled_width = 6, 22 | spp_correction_params = {offset0 = -18, offset = 0.0}, 23 | --spp_correction_params = {offset0 = -18.0, offset = 9.5}, 24 | fc_layers_view = function(RoiReshaper) return nn.View(-1):setNumInputDims(3) end, 25 | normalization_params = {channel_order = 'bgr', rgb_mean = {122.7717, 115.9465, 102.9801}, rgb_std = nil, scale = 255} 26 | } 27 | end 28 | -------------------------------------------------------------------------------- /model/additive.lua: -------------------------------------------------------------------------------- 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r') 2 | 3 | model = nn.Sequential(): 4 | add(nn.ParallelTable(): 5 | add(base_model.conv_layers): 6 | add(RoiReshaper:StoreShape()) 7 | ): 8 | add(nn.ConcatTable(): 9 | add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)): 10 | add(branch_transform_rois_share_fc_layers(base_model, ContextRegion)) 11 | ): 12 | add(nn.ConcatTable(): 13 | add(nn.Sequential(): 14 | add(nn.SelectTable(1)): 15 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')): 16 | add(RoiReshaper:RestoreShape()): 17 | named('output_fc8c') 18 | ): 19 | add(nn.Sequential(): 20 | add(nn.ParallelTable(): 21 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('output_fc8d_orig')): 22 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('output_fc8d_context')) 23 | ): 24 | add(nn.CAddTable()): 25 | add(RoiReshaper:RestoreShape(4)): 26 | add(cudnn.SpatialSoftMax()): 27 | add(nn.Squeeze(4)): 28 | named('output_softmax') 29 | ) 30 | ): 31 | add(nn.CMulTable():named('output_prod')): 32 | add(nn.Sum(2)) 33 | 34 | criterion = HingeCriterion():setFactor(1 / numClasses) 35 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4} 36 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10} 37 | -------------------------------------------------------------------------------- /model/contextlocnet-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "contextlocnet" 2 | version = "scm-1" 3 | 4 | source = { 5 | url = "git://github.com/vadimkantorov/contextlocnet", 6 | tag = "master" 7 | } 8 | 9 | dependencies = { 10 | "torch >= 7.0", 11 | "nn", 12 | "cunn", 13 | } 14 | 15 | build = { 16 | type = "command", 17 | build_command = [[ 18 | cmake -E make_directory build; 19 | cd build; 20 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 21 | $(MAKE) 22 | ]], 23 | install_command = "cd build && $(MAKE) install" 24 | } 25 | -------------------------------------------------------------------------------- /model/contrastive_a.lua: -------------------------------------------------------------------------------- 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r') 2 | 3 | model = nn.Sequential(): 4 | add(nn.ParallelTable(): 5 | add(base_model.conv_layers): 6 | add(RoiReshaper:StoreShape()) 7 | ): 8 | add(nn.ConcatTable(): 9 | add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)): 10 | add(branch_transform_rois_share_fc_layers(base_model, ContextRegion)) 11 | ): 12 | add(nn.ConcatTable(): 13 | add(nn.Sequential(): 14 | add(nn.SelectTable(1)): 15 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')): 16 | add(RoiReshaper:RestoreShape()): 17 | named('output_fc8c') 18 | ): 19 | add(nn.Sequential(): 20 | add(nn.ParallelTable(): 21 | add(share_weight_bias(fc8r):named('output_fc8d_orig')): 22 | add(nn.Sequential(): 23 | add(share_weight_bias(fc8r)): 24 | add(nn.MulConstant(-1)): 25 | named('output_fc8d_context') 26 | ) 27 | ): 28 | add(nn.CAddTable()): 29 | add(RoiReshaper:RestoreShape(4)): 30 | add(cudnn.SpatialSoftMax()): 31 | add(nn.Squeeze(4)): 32 | named('output_softmax') 33 | ) 34 | ): 35 | add(nn.CMulTable():named('output_prod')): 36 | add(nn.Sum(2)) 37 | 38 | criterion = HingeCriterion():setFactor(1 / numClasses) 39 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4} 40 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10} 41 | -------------------------------------------------------------------------------- /model/contrastive_s.lua: -------------------------------------------------------------------------------- 1 | fc8r = nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8r') 2 | 3 | model = nn.Sequential(): 4 | add(nn.ParallelTable(): 5 | add(base_model.conv_layers): 6 | add(RoiReshaper:StoreShape()) 7 | ): 8 | add(nn.ConcatTable(): 9 | add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal)): 10 | add(branch_transform_rois_share_fc_layers(base_model, BoxOriginal_ring)): 11 | add(branch_transform_rois_share_fc_layers(base_model, ContextRegion)) 12 | ): 13 | add(nn.ConcatTable(): 14 | add(nn.Sequential(): 15 | add(nn.SelectTable(1)): 16 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')): 17 | add(RoiReshaper:RestoreShape()): 18 | named('output_fc8c') 19 | ): 20 | add(nn.Sequential(): 21 | add(nn.ConcatTable(): 22 | add(nn.Sequential(): 23 | add(nn.SelectTable(2)): 24 | add(share_weight_bias(fc8r)): 25 | named('output_fc8d_origring') 26 | ): 27 | add(nn.Sequential(): 28 | add(nn.SelectTable(3)): 29 | add(share_weight_bias(fc8r)): 30 | add(nn.MulConstant(-1)): 31 | named('output_fc8d_context') 32 | ) 33 | ): 34 | add(nn.CAddTable()): 35 | add(RoiReshaper:RestoreShape(4)): 36 | add(cudnn.SpatialSoftMax()): 37 | add(nn.Squeeze(4)): 38 | named('output_softmax') 39 | ) 40 | ): 41 | add(nn.CMulTable():named('output_prod')): 42 | add(nn.Sum(2)) 43 | 44 | --classification_criterion = nn.BCECriterion(nil, false) 45 | --classification_criterion.updateOutput = function(self, input, target) return nn.BCECriterion.updateOutput(self, input, target * 0.5 + 0.5) end 46 | --classification_criterion.updateGradInput = function(self, input, target) return nn.BCECriterion.updateGradInput(self, input, target * 0.5 + 0.5) end 47 | --criterion = classification_criterion 48 | 49 | criterion = HingeCriterion():setFactor(1 / numClasses) 50 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4} 51 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10} 52 | -------------------------------------------------------------------------------- /model/rectangularringroipooling.cu: -------------------------------------------------------------------------------- 1 | //copied from https://github.com/gidariss/caffe_LocNet/blob/d2ba49552068958556b98ba382610ea865add17c/src/caffe/layers/region_pooling_layer.cu 2 | 3 | #include "luaT.h" 4 | #include "THC.h" 5 | 6 | #include 7 | #include "THCGeneral.h" 8 | 9 | #define CAFFE_CUDA_NUM_THREADS 1024 10 | 11 | // CUDA: various checks for different function calls. 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if(error != cudaSuccess) { printf("CUDA ERROR. %s\n", cudaGetErrorString(error)); }; \ 17 | } while (0) 18 | 19 | // CUDA: number of blocks for threads. 20 | inline int CAFFE_GET_BLOCKS(const int N) { 21 | return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; 22 | } 23 | 24 | // CUDA: grid stride looping 25 | #define CUDA_KERNEL_LOOP(i, n) \ 26 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 27 | i < (n); \ 28 | i += blockDim.x * gridDim.x) 29 | 30 | // CUDA: check for error after kernel execution and exit loudly if there is one. 31 | #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) 32 | 33 | template 34 | __global__ void ROIPoolForward(const int nthreads, const Dtype* bottom_data, 35 | const Dtype spatial_scale, const int channels, const int height, 36 | const int width, const int pooled_height, const int pooled_width, 37 | const Dtype* bottom_rois, Dtype* top_data, int* argmax_data) { 38 | CUDA_KERNEL_LOOP(index, nthreads) { 39 | // (n, c, ph, pw) is an element in the pooled output 40 | int pw = index % pooled_width; 41 | int ph = (index / pooled_width) % pooled_height; 42 | int c = (index / pooled_width / pooled_height) % channels; 43 | int n = index / pooled_width / pooled_height / channels; 44 | 45 | // For each ROI R = [batch_index, x_outer_1, y_outer_1, x_outer_2, y_outer_2, x_inner_1, y_inner_1, x_inner_2, y_inner_2]: 46 | // where R_outer = [x_outer_1, y_outer_1, x_outer_2, y_outer_2] is the outer rectangle of the region and 47 | // R_inner = [x_inner_1, y_inner_1, x_inner_2, y_inner_2] is the inner rectangle of the region 48 | // max pooler over R by ignoring (setting to zero) the activations that lay inside the inner rectangle R_inner 49 | 50 | bottom_rois += n * 9; 51 | int roi_batch_ind = bottom_rois[0]; 52 | 53 | 54 | // outer rectangle of the region 55 | int roi_start_w = int(bottom_rois[1] );//* spatial_scale); 56 | int roi_start_h = int(bottom_rois[2] );//* spatial_scale); 57 | int roi_end_w = int(bottom_rois[3] );//* spatial_scale); 58 | int roi_end_h = int(bottom_rois[4] );//* spatial_scale); 59 | 60 | // inner rectangle of the region 61 | int roi_start_w_in = int(bottom_rois[5]);//* spatial_scale); 62 | int roi_start_h_in = int(bottom_rois[6]);//* spatial_scale); 63 | int roi_end_w_in = int(bottom_rois[7]);//* spatial_scale); 64 | int roi_end_h_in = int(bottom_rois[8]);//* spatial_scale); 65 | 66 | // Force malformed ROIs to be 1x1 67 | int roi_width = max(roi_end_w - roi_start_w + 1, 1); 68 | int roi_height = max(roi_end_h - roi_start_h + 1, 1); 69 | Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); 70 | Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); 71 | 72 | const int hstart = min(height, max(0, static_cast(floor(static_cast(ph) * bin_size_h)) + roi_start_h)); 73 | const int hend = min(height, max(0, static_cast(ceil( static_cast(ph+1) * bin_size_h)) + roi_start_h)); 74 | const int wstart = min(width, max(0, static_cast(floor(static_cast(pw) * bin_size_w)) + roi_start_w)); 75 | const int wend = min(width, max(0, static_cast(ceil( static_cast(pw+1) * bin_size_w)) + roi_start_w)); 76 | 77 | Dtype maxval = 0; 78 | 79 | int maxidx = -1; 80 | bottom_data += (roi_batch_ind * channels + c) * height * width; 81 | for (int h = hstart; h < hend; ++h) { 82 | for (int w = wstart; w < wend; ++w) { 83 | if (!(w > roi_start_w_in && w < roi_end_w_in && h > roi_start_h_in && h < roi_end_h_in)) { 84 | // if it is not inside the inner rectangle of the region 85 | int bottom_index = h * width + w; 86 | if (bottom_data[bottom_index] > maxval) { 87 | maxval = bottom_data[bottom_index]; 88 | maxidx = bottom_index; 89 | } 90 | } 91 | } 92 | } 93 | top_data[index] = maxval; 94 | argmax_data[index] = maxidx; 95 | } 96 | } 97 | 98 | template 99 | __global__ void ROIPoolBackward(const int nthreads, const Dtype* top_diff, 100 | const int* argmax_data, const int num_rois, const Dtype spatial_scale, 101 | const int channels, const int height, const int width, 102 | const int pooled_height, const int pooled_width, Dtype* bottom_diff, 103 | const Dtype* bottom_rois) { 104 | CUDA_KERNEL_LOOP(index, nthreads) { 105 | // (n, c, h, w) coords in bottom data 106 | int w = index % width; 107 | int h = (index / width) % height; 108 | int c = (index / width / height) % channels; 109 | int n = index / width / height / channels; 110 | 111 | Dtype gradient = 0; 112 | // Accumulate gradient over all ROIs that pooled this element 113 | for (int roi_n = 0; roi_n < num_rois; ++roi_n) { 114 | const Dtype* offset_bottom_rois = bottom_rois + roi_n * 9; 115 | int roi_batch_ind = offset_bottom_rois[0]; 116 | // Skip if ROI's batch index doesn't match n 117 | if (n != roi_batch_ind) { 118 | continue; 119 | } 120 | 121 | 122 | // outer rectangle of the region 123 | int roi_start_w = int(offset_bottom_rois[1]);// * spatial_scale); 124 | int roi_start_h = int(offset_bottom_rois[2]);// * spatial_scale); 125 | int roi_end_w = int(offset_bottom_rois[3]);// * spatial_scale); 126 | int roi_end_h = int(offset_bottom_rois[4]);// * spatial_scale); 127 | 128 | // inner rectangle of the region 129 | int roi_start_w_in= int(offset_bottom_rois[5]);// * spatial_scale); 130 | int roi_start_h_in= int(offset_bottom_rois[6]);// * spatial_scale); 131 | int roi_end_w_in = int(offset_bottom_rois[7]);// * spatial_scale); 132 | int roi_end_h_in = int(offset_bottom_rois[8]);// * spatial_scale); 133 | 134 | 135 | // Skip if ROI doesn't include (h, w) 136 | const bool in_roi = (w >= roi_start_w && w <= roi_end_w && 137 | h >= roi_start_h && h <= roi_end_h) && 138 | !(w > roi_start_w_in && w < roi_end_w_in && 139 | h > roi_start_h_in && h < roi_end_h_in); 140 | 141 | if (!in_roi) { 142 | continue; 143 | } 144 | 145 | int top_offset = (roi_n * channels + c) * pooled_height * pooled_width; 146 | const Dtype* offset_top_diff = top_diff + top_offset; 147 | const int* offset_argmax_data = argmax_data + top_offset; 148 | 149 | // Compute feasible set of pooled units that could have pooled 150 | // this bottom unit 151 | 152 | // Force malformed ROIs to be 1x1 153 | int roi_width = max(roi_end_w - roi_start_w + 1, 1); 154 | int roi_height = max(roi_end_h - roi_start_h + 1, 1); 155 | 156 | Dtype bin_size_h = static_cast(roi_height) / static_cast(pooled_height); 157 | Dtype bin_size_w = static_cast(roi_width) / static_cast(pooled_width); 158 | 159 | int phstart = floor(static_cast(h - roi_start_h) / bin_size_h); 160 | int phend = ceil(static_cast(h - roi_start_h + 1) / bin_size_h); 161 | int pwstart = floor(static_cast(w - roi_start_w) / bin_size_w); 162 | int pwend = ceil(static_cast(w - roi_start_w + 1) / bin_size_w); 163 | 164 | phstart = min(max(phstart, 0), pooled_height); 165 | phend = min(max(phend, 0), pooled_height); 166 | pwstart = min(max(pwstart, 0), pooled_width); 167 | pwend = min(max(pwend, 0), pooled_width); 168 | 169 | for (int ph = phstart; ph < phend; ++ph) { 170 | for (int pw = pwstart; pw < pwend; ++pw) { 171 | if (offset_argmax_data[ph * pooled_width + pw] == (h * width + w)) { 172 | gradient += offset_top_diff[ph * pooled_width + pw]; 173 | } 174 | } 175 | } 176 | } 177 | bottom_diff[index] = gradient; 178 | } 179 | } 180 | 181 | THCState* getCutorchState(lua_State* L) 182 | { 183 | lua_getglobal(L, "cutorch"); 184 | lua_getfield(L, -1, "getState"); 185 | lua_call(L, 0, 1); 186 | THCState *state = (THCState*) lua_touserdata(L, -1); 187 | lua_pop(L, 2); 188 | return state; 189 | } 190 | 191 | static int updateOutput(lua_State *L) 192 | { 193 | THCState *state = getCutorchState(L); 194 | THCudaTensor *input = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 195 | THCudaTensor *rois = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 196 | THCudaTensor *output = (THCudaTensor *)luaT_getfieldcheckudata(L, 1, "output", "torch.CudaTensor"); 197 | THCudaIntTensor *argmax = (THCudaIntTensor *)luaT_getfieldcheckudata(L, 1, "argmax", "torch.CudaIntTensor"); 198 | 199 | int pooled_height_ = luaT_getfieldcheckint(L, 1, "pooled_height"); 200 | int pooled_width_ = luaT_getfieldcheckint(L, 1, "pooled_width"); 201 | THCudaTensor_resize5d(state, output, THCudaTensor_size(state, rois, 0), THCudaTensor_size(state, rois, 1), THCudaTensor_size(state, input, 1), pooled_height_, pooled_width_); 202 | THCudaIntTensor_resize5d(state, argmax, THCudaTensor_size(state, rois, 0), THCudaTensor_size(state, rois, 1), THCudaTensor_size(state, input, 1), pooled_height_, pooled_width_); 203 | 204 | const float* bottom_data = THCudaTensor_data(state, input); 205 | const float* bottom_rois = THCudaTensor_data(state, rois); 206 | float* top_data = THCudaTensor_data(state, output); 207 | int* argmax_data = THCudaIntTensor_data(state, argmax); // int -> float 208 | 209 | // TODO: BATCH 210 | // BDHW 1DHW 211 | int count = THCudaTensor_nElement(state, output); // top[0]->count(); 212 | int channels_ = THCudaTensor_size(state, input, 1); 213 | int height_ = THCudaTensor_size(state, input, 2); 214 | int width_ = THCudaTensor_size(state, input, 3); 215 | float spatial_scale_ = luaT_getfieldchecknumber(L, 1, "spatial_scale"); 216 | 217 | CUDA_POST_KERNEL_CHECK; 218 | 219 | // NOLINT_NEXT_LINE(whitespace/operators) 220 | ROIPoolForward<<>>( 221 | count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_, 222 | pooled_width_, bottom_rois, top_data, argmax_data); 223 | CUDA_POST_KERNEL_CHECK; 224 | 225 | return 1; 226 | } 227 | 228 | static int updateGradInput(lua_State *L) 229 | { 230 | THCState *state = getCutorchState(L); 231 | THCudaTensor *input = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 232 | THCudaTensor *rois = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 233 | THCudaIntTensor *argmax = (THCudaIntTensor *)luaT_getfieldcheckudata(L, 1, "argmax", "torch.CudaIntTensor"); 234 | 235 | THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 236 | THCudaTensor *gradInput = (THCudaTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.CudaTensor"); 237 | 238 | THCudaTensor_resizeAs(state, gradInput, input); 239 | THCudaTensor_zero(state, gradInput); 240 | 241 | const float* bottom_rois = THCudaTensor_data(state, rois); 242 | const float* top_diff = THCudaTensor_data(state, gradOutput); 243 | float* bottom_diff = THCudaTensor_data(state, gradInput); 244 | int* argmax_data = THCudaIntTensor_data(state, argmax); 245 | 246 | const int count = THCudaTensor_nElement(state, gradInput); 247 | int channels_ = THCudaTensor_size(state, input, 1); 248 | int height_ = THCudaTensor_size(state, input, 2); 249 | int width_ = THCudaTensor_size(state, input, 3); 250 | int pooled_height_ = luaT_getfieldcheckint(L, 1, "pooled_height"); 251 | int pooled_width_ = luaT_getfieldcheckint(L, 1, "pooled_width"); 252 | float spatial_scale_ = luaT_getfieldchecknumber(L, 1, "spatial_scale"); 253 | int num_rois = THCudaTensor_size(state, rois, 0) * THCudaTensor_size(state, rois, 1); // bachSize x numRoisPerImage 254 | 255 | // NOLINT_NEXT_LINE(whitespace/operators) 256 | CUDA_POST_KERNEL_CHECK; 257 | ROIPoolBackward<<>>( 258 | count, top_diff, argmax_data, num_rois, spatial_scale_, channels_, 259 | height_, width_, pooled_height_, pooled_width_, bottom_diff, bottom_rois); 260 | CUDA_POST_KERNEL_CHECK; 261 | 262 | return 1; 263 | } 264 | 265 | static const struct luaL_Reg lua_registrations [] = { 266 | {"updateOutput", updateOutput}, 267 | {"updateGradInput", updateGradInput}, 268 | {NULL, NULL} 269 | }; 270 | 271 | LUA_EXTERNC DLL_EXPORT int luaopen_libcucontextlocnet(lua_State *L) 272 | { 273 | lua_newtable(L); 274 | 275 | luaT_pushmetatable(L, "torch.CudaTensor"); 276 | luaT_registeratname(L, lua_registrations, "contextlocnet"); 277 | lua_pop(L,1); 278 | 279 | return 1; 280 | } 281 | -------------------------------------------------------------------------------- /model/rectangularringroipooling.lua: -------------------------------------------------------------------------------- 1 | require 'cunn' 2 | require 'libcucontextlocnet' 3 | 4 | local RectangularRingRoiPooling, parent = torch.class('RectangularRingRoiPooling', 'nn.Module') 5 | 6 | function RectangularRingRoiPooling:__init(pooled_height, pooled_width, spatial_scale, scale_correction_params, roi_pre_transformer) 7 | parent.__init(self) 8 | 9 | assert(pooled_height > 0, 'pooled_h must be > 0') 10 | assert(pooled_width > 0, 'pooled_w must be > 0'); 11 | 12 | self.pooled_height = pooled_height 13 | self.pooled_width = pooled_width 14 | self.spatial_scale = spatial_scale or 1.0 15 | 16 | self.scale_correction_params = scale_correction_params 17 | self.roi_pre_transformer = roi_pre_transformer 18 | end 19 | 20 | function RectangularRingRoiPooling:preprocess_rois(raw_rois) 21 | for i = 1, raw_rois:size(1) do 22 | self.preprocessed_rois[i]:select(2, 1):fill(i - 1) 23 | end 24 | self.preprocessed_rois:narrow(self.preprocessed_rois:dim(), 2, 4):copy(raw_rois:narrow(raw_rois:dim(), 1, 4)) 25 | local rois = self.preprocessed_rois:narrow(self.preprocessed_rois:dim(), 2, 8) 26 | 27 | if self.roi_pre_transformer then 28 | self.roi_pre_transformer(rois) 29 | end 30 | 31 | local offset0, offset, spatial_scale = self.scale_correction_params.offset0, self.scale_correction_params.offset, self.spatial_scale 32 | rois:select(rois:dim(), 1):add(offset0 + offset):mul(spatial_scale):add(0.5):floor() 33 | rois:select(rois:dim(), 2):add(offset0 + offset):mul(spatial_scale):add(0.5):floor() 34 | rois:select(rois:dim(), 3):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil() 35 | rois:select(rois:dim(), 4):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil() 36 | 37 | rois:select(rois:dim(), 5):add(offset0 + offset):mul(spatial_scale):add(0.5):floor() 38 | rois:select(rois:dim(), 6):add(offset0 + offset):mul(spatial_scale):add(0.5):floor() 39 | rois:select(rois:dim(), 7):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil() 40 | rois:select(rois:dim(), 8):add(offset0 - offset):mul(spatial_scale):add(-0.5):ceil() 41 | end 42 | 43 | function RectangularRingRoiPooling:updateOutput(input) 44 | self.preprocessed_rois = (self.preprocessed_rois or torch.CudaTensor()):resize(input[2]:size(1), input[2]:size(2), 1 + 8):zero() 45 | self:preprocess_rois(input[2]) 46 | 47 | self.argmax = self.argmax or torch.CudaIntTensor() 48 | input[1].contextlocnet.updateOutput(self, input[1], self.preprocessed_rois) 49 | return self.output 50 | end 51 | 52 | function RectangularRingRoiPooling:updateGradInput(input, gradOutput) 53 | self.gradInput = type(self.gradInput) == 'table' and (self.gradInput[1] or torch.CudaTensor()) or self.gradInput 54 | 55 | input[1].contextlocnet.updateGradInput(self, input[1], self.preprocessed_rois, gradOutput) 56 | self.rois_zero_grad = (self.rois_zero_grad or input[2].new()):resizeAs(input[2]):zero() 57 | self.gradInput = {self.gradInput, self.rois_zero_grad} 58 | return self.gradInput 59 | end 60 | -------------------------------------------------------------------------------- /model/roi_transforms.lua: -------------------------------------------------------------------------------- 1 | function branch_transform_rois_share_fc_layers(base_model, transformer) 2 | return nn.Sequential(): 3 | add(RectangularRingRoiPooling(base_model.pooled_height, base_model.pooled_width, base_model.spatial_scale, base_model.spp_correction_params, transformer)): 4 | add(base_model.fc_layers_view(RoiReshaper)): 5 | add(share_weight_bias(base_model.fc_layers)) 6 | end 7 | 8 | function RectangularRing(rois, scale_inner, scale_outer) 9 | local center_x = (rois:select(rois:dim(), 1) + rois:select(rois:dim(), 3)) / 2 10 | local center_y = (rois:select(rois:dim(), 2) + rois:select(rois:dim(), 4)) / 2 11 | local w_half = (rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1)) / 2 12 | local h_half = (rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2)) / 2 13 | 14 | rois:select(rois:dim(), 1):copy(center_x - w_half*scale_outer) 15 | rois:select(rois:dim(), 2):copy(center_y - h_half*scale_outer) 16 | rois:select(rois:dim(), 3):copy(center_x + w_half*scale_outer) 17 | rois:select(rois:dim(), 4):copy(center_y + h_half*scale_outer) 18 | rois:select(rois:dim(), 5):copy(center_x - w_half*scale_inner) 19 | rois:select(rois:dim(), 6):copy(center_y - h_half*scale_inner) 20 | rois:select(rois:dim(), 7):copy(center_x + w_half*scale_inner) 21 | rois:select(rois:dim(), 8):copy(center_y + h_half*scale_inner) 22 | end 23 | 24 | function MakeRectangularRingTransform(scale_inner, scale_outer) 25 | return function(rois) RectangularRing(rois, scale_inner, scale_outer) end 26 | end 27 | 28 | function BoxOriginal(rois) 29 | end 30 | 31 | CentralRegion1 = MakeRectangularRingTransform(0.0, 0.5) 32 | CentralRegion2 = MakeRectangularRingTransform(0.3, 0.8) 33 | BorderRegion1 = MakeRectangularRingTransform(0.5, 1.0) 34 | BorderRegion2 = MakeRectangularRingTransform(0.8, 1.5) 35 | ContextRegion = MakeRectangularRingTransform(1.0, opts.ROI_FACTOR) 36 | BoxOriginal_ring = MakeRectangularRingTransform(1.0 / opts.ROI_FACTOR, 1.0) 37 | ContextRegion_overlap = MakeRectangularRingTransform(0.8, 0.8 * opts.ROI_FACTOR) 38 | ContextRegion_outer = MakeRectangularRingTransform(1.2, 1.2 * opts.ROI_FACTOR) 39 | ContextRegion_big = MakeRectangularRingTransform(1.5, 2.0) 40 | CentralRegion_big = MakeRectangularRingTransform(0.0, 2.0) 41 | BoxScaleUp = MakeRectangularRingTransform(0.0, opts.ROI_FACTOR) 42 | 43 | function BoxHalfLeft(rois) 44 | rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 1)):div(2) 45 | end 46 | 47 | function BoxHalfRight(rois) 48 | rois:select(rois:dim(), 1):add(rois:select(rois:dim(), 3)):div(2) 49 | end 50 | 51 | function BoxHalfUp(rois) 52 | rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 2)):div(2) 53 | end 54 | 55 | function BoxHalfBottom(rois) 56 | rois:select(rois:dim(), 2):add(rois:select(rois:dim(), 4)):div(2) 57 | end 58 | 59 | function DoubleUp(rois) 60 | rois:select(rois:dim(), 2):csub(rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2)) 61 | end 62 | 63 | function DoubleDown(rois) 64 | rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 4) - rois:select(rois:dim(), 2)) 65 | end 66 | 67 | function DoubleLeft(rois) 68 | rois:select(rois:dim(), 1):csub(rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1)) 69 | end 70 | 71 | function DoubleRight(rois) 72 | rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 3) - rois:select(rois:dim(), 1)) 73 | end 74 | 75 | function ShiftUp(rois) 76 | DoubleUp(rois) 77 | rois:select(rois:dim(), 4):add(rois:select(rois:dim(), 2)):div(2) 78 | end 79 | 80 | function ShiftDown(rois) 81 | DoubleDown(rois) 82 | rois:select(rois:dim(), 2):add(rois:select(rois:dim(), 4)):div(2) 83 | end 84 | 85 | function ShiftLeft(rois) 86 | DoubleLeft(rois) 87 | rois:select(rois:dim(), 3):add(rois:select(rois:dim(), 1)):div(2) 88 | end 89 | 90 | function ShiftRight(rois) 91 | DoubleRight(rois) 92 | rois:select(rois:dim(), 1):add(rois:select(rois:dim(), 3)):div(2) 93 | end 94 | -------------------------------------------------------------------------------- /model/util.lua: -------------------------------------------------------------------------------- 1 | require 'cudnn' 2 | 3 | dofile('model/rectangularringroipooling.lua') 4 | dofile('model/HingeCriterion.lua') 5 | dofile('model/roi_transforms.lua') 6 | 7 | local function module_typename(module) 8 | return torch.typename(module):sub(4) 9 | end 10 | 11 | function model_load(path, opts) 12 | local loaded = paths.extname(path) == 'lua' and {model_path = path} or hdf5_load(path) 13 | local opts = opts or loaded.meta.opts 14 | local model_definition = io.open(loaded.model_path or loaded.meta.model_path):read('*all') 15 | 16 | base_model = dofile(paths.concat('model', opts.BASE_MODEL .. '.lua'))(opts.PATHS.BASE_MODEL_CACHED) 17 | assert(loadstring(model_definition))() 18 | 19 | local function dfs(module, prefix) 20 | if module.weight then 21 | assert(loaded.parameters[prefix .. '_weight'] ~= nil) 22 | module.weight:copy(loaded.parameters[prefix .. '_weight']) 23 | end 24 | if module.bias then 25 | assert(loaded.parameters[prefix .. '_bias'] ~= nil) 26 | module.bias:copy(loaded.parameters[prefix .. '_bias']) 27 | end 28 | 29 | for i, submodule in ipairs(module.modules or {}) do 30 | dfs(submodule, (submodule.name and submodule.name[1]) or ((prefix or module_typename(module)) .. '_' .. module_typename(submodule) .. i)) 31 | end 32 | end 33 | 34 | if loaded.parameters then 35 | dfs(model) 36 | end 37 | 38 | return loaded 39 | end 40 | 41 | function model_save(path, model, meta, epoch, log) 42 | local saved = { 43 | meta = meta, 44 | epoch = epoch, 45 | log = log, 46 | parameters = {} 47 | } 48 | 49 | local function dfs(module, prefix) 50 | if module.weight then 51 | local tensor_name = prefix .. '_weight' 52 | assert(saved.parameters[tensor_name] == nil or saved.parameters[tensor_name]:isSetTo(module.weight), torch.typename(module) .. ', ' ..prefix) 53 | saved.parameters[tensor_name] = module.weight 54 | end 55 | 56 | if module.bias then 57 | local tensor_name = prefix .. '_bias' 58 | assert(saved.parameters[tensor_name] == nil or saved.parameters[tensor_name]:isSetTo(module.bias), torch.typename(module) .. ', ' ..prefix) 59 | saved.parameters[tensor_name] = module.bias 60 | end 61 | 62 | for i, submodule in ipairs(module.modules or {}) do 63 | dfs(submodule, (submodule.name and submodule.name[1]) or ((prefix or module_typename(module)) .. '_' .. module_typename(submodule) .. i)) 64 | end 65 | end 66 | 67 | dfs(model) 68 | 69 | hdf5_save(path, saved) 70 | end 71 | 72 | RoiReshaper = { 73 | inputSize = nil, 74 | 75 | StoreShape = function(this) 76 | local module = nn.Identity() 77 | function module:updateOutput(input) 78 | this.inputSize = input:size() 79 | return nn.Identity.updateOutput(self, input) 80 | end 81 | return module 82 | end, 83 | 84 | RestoreShape = function(self, singletonDimension) 85 | return singletonDimension and DynamicView(function() return {-1, assert(self.inputSize)[2], numClasses, 1} end) or DynamicView(function() return {-1, assert(self.inputSize)[2], numClasses} end) 86 | end 87 | } 88 | 89 | function DynamicView(sizeFactory) 90 | local module = nn.View(-1) 91 | module.updateOutput = function(self, input) return nn.View.updateOutput(self:resetSize(unpack(sizeFactory())), input) end 92 | return module 93 | end 94 | 95 | function flatdim2(tensor) 96 | return tensor:contiguous():view(-1, unpack(torch.LongTensor(tensor:size()):sub(3, #tensor:size()):totable())) 97 | end 98 | 99 | function meandim2(tensor, batchSize) 100 | return tensor:contiguous():view(batchSize, -1, unpack(torch.LongTensor(tensor:size()):sub(2, #tensor:size()):totable())):mean(2):squeeze(2) 101 | end 102 | 103 | function share_weight_bias(module) 104 | return module:clone('weight', 'bias', 'gradWeight', 'gradBias') 105 | end 106 | 107 | function nn.Module.named(self, name) 108 | if not self.name then 109 | self.name = name 110 | else 111 | self.name = type(self.name) == 'table' and self.name or {self.name} 112 | table.insert(self.name, name) 113 | end 114 | return self 115 | end 116 | 117 | local nn_Module_findModules = nn.Module.findModules 118 | function nn.Module.findModules(self, typename, container) 119 | for _, name in ipairs(type(self.name) == 'table' and self.name or (type(self.name) == 'string' and {self.name} or {})) do 120 | if name == typename then 121 | return {self}, {self} 122 | end 123 | end 124 | return nn_Module_findModules(self, typename, container) 125 | end 126 | 127 | function Probe(module, name, recursive) 128 | name = name or module_typename(module) 129 | if recursive and module.modules then 130 | for i = 1, #module.modules do 131 | module.modules[i] = Probe(module.modules[i], module.modules[i].name or (name .. '->' .. i), recursive) 132 | end 133 | end 134 | 135 | local module_updateOutput, module_updateGradInput, module_accGradParameters = module.updateOutput, module.updateGradInput, module.accGradParameters 136 | local fmtSize = function(tensor) return torch.isTensor(tensor) and ('('..('%d '):rep(tensor:dim())..')'):format(unpack(torch.LongTensor(tensor:size()):totable())) or tostring(#tensor) end 137 | function module:updateOutput(input) 138 | print(name, 'updateOutput: in', '#input = ', fmtSize(input)) 139 | local elapsed = gpuTicToc(function() self.output = module_updateOutput(self, input) end) 140 | print(name, 'updateOutput: out', ('%.4f ms'):format(elapsed*1000)) 141 | return self.output 142 | end 143 | function module:updateGradInput(input, gradOutput) 144 | print(name, 'updateGradInput: in') 145 | local elapsed = gpuTicToc(function() self.gradInput = module_updateGradInput(self, input, gradOutput) end) 146 | print(name, 'updateGradInput: out', ('%.4f ms'):format(elapsed*1000)) 147 | return self.gradInput 148 | end 149 | function module:accGradParameters(input, gradOutput, scale) 150 | print(name, 'accGradParameters: in') 151 | local elapsed = gpuTicToc(function() module_accGradParameters(self, input, gradOutput, scale) end) 152 | print(name, 'accGradParameters: out', ('%.4f ms'):format(elapsed*1000)) 153 | end 154 | return module 155 | end 156 | 157 | function gpuTicToc(f) 158 | cutorch.synchronize() 159 | local tic = torch.tic() 160 | f() 161 | cutorch.synchronize() 162 | return torch.toc(tic) 163 | end 164 | 165 | 166 | collectgarbage() 167 | -------------------------------------------------------------------------------- /model/wsddn_repro.lua: -------------------------------------------------------------------------------- 1 | model = nn.Sequential(): 2 | add(nn.ParallelTable(): 3 | add(base_model.conv_layers): 4 | add(nn.Identity()) 5 | ): 6 | add(RectangularRingRoiPooling(base_model.pooled_height, base_model.pooled_width, base_model.spatial_scale, base_model.spp_correction_params)): 7 | add(RoiReshaper:StoreShape()): 8 | add(base_model.fc_layers_view(RoiReshaper)): 9 | add(base_model.fc_layers): 10 | add(nn.ConcatTable(): 11 | add(nn.Sequential(): 12 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8c')): 13 | add(RoiReshaper:RestoreShape()): 14 | named('output_fc8c') 15 | ): 16 | add(nn.Sequential(): 17 | add(nn.Linear(base_model.fc_layers_output_size, numClasses):named('fc8d')): 18 | add(RoiReshaper:RestoreShape(4)): 19 | add(cudnn.SpatialSoftMax()): 20 | add(nn.Squeeze(4)): 21 | named('output_softmax') 22 | ) 23 | ): 24 | add(nn.CMulTable():named('output_prod')): 25 | add(nn.Sum(2)) 26 | 27 | criterion = HingeCriterion():setFactor(1 / numClasses) 28 | optimState = {learningRate = 5e-3, momentum = 0.9, weightDecay = 5e-4} 29 | optimState_annealed = {learningRate = 5e-4, momentum = 0.9, weightDecay = 5e-4, epoch = 10} 30 | -------------------------------------------------------------------------------- /opts.lua: -------------------------------------------------------------------------------- 1 | local DATA = os.getenv('DATA') or 'data' 2 | local DATA_COMMON = os.getenv('DATA_COMMON') or paths.concat(DATA, 'common') 3 | 4 | PATHS = 5 | { 6 | EXTERNAL = 7 | { 8 | PRETRAINED_MODEL_VGGF = 9 | { 10 | PROTOTXT = paths.concat(DATA_COMMON, 'VGG_CNN_F_deploy.prototxt'), 11 | CAFFEMODEL = paths.concat(DATA_COMMON, 'VGG_CNN_F.caffemodel'), 12 | }, 13 | 14 | SSW_VOC2007 = 15 | { 16 | trainval = paths.concat(DATA_COMMON, 'SelectiveSearchVOC2007trainval.mat'), 17 | test = paths.concat(DATA_COMMON, 'SelectiveSearchVOC2007test.mat') 18 | }, 19 | 20 | SSW_VOC2012 = 21 | { 22 | trainval = paths.concat(DATA_COMMON, 'selective_search_data/voc_2012_trainval.mat'), 23 | test = paths.concat(DATA_COMMON, 'selective_search_data/voc_2012_test.mat') 24 | }, 25 | 26 | VOC_DEVKIT_VOCYEAR = 27 | { 28 | VOC2007 = paths.concat(DATA_COMMON, 'VOCdevkit_2007/VOC2007'), 29 | VOC2012 = paths.concat(DATA_COMMON, 'VOCdevkit_2012/VOC2012') 30 | } 31 | }, 32 | 33 | BASE_MODEL_CACHED = 34 | { 35 | VGGF = paths.concat(DATA_COMMON, 'VGG_CNN_F.t7') 36 | }, 37 | 38 | DATASET_CACHED_PATTERN = paths.concat(DATA_COMMON, '%s_%s.t7'), 39 | CHECKPOINT_PATTERN = paths.concat(DATA, 'model_epoch%02d.h5'), 40 | LOG = paths.concat(DATA, 'log.json'), 41 | SCORES_PATTERN = paths.concat(DATA, 'scores_%s.h5'), 42 | CORLOC = paths.concat(DATA, 'corloc.json'), 43 | DETECTION_MAP = paths.concat(DATA, 'detection_mAP.json'), 44 | } 45 | 46 | local DATASET = os.getenv('DATASET') or 'VOC2007' 47 | local NUM_EPOCHS = tonumber(os.getenv('NUM_EPOCHS')) or 30 48 | local SUBSET = os.getenv('SUBSET') or 'trainval' 49 | local BASE_MODEL = 'VGGF' 50 | 51 | opts = { 52 | ROI_FACTOR = 1.8, 53 | SEED = 1, 54 | 55 | NMS_OVERLAP_THRESHOLD = 0.4, 56 | NMS_SCORE_THRESHOLD = 1e-4, 57 | 58 | IMAGE_SCALES = {{608, 800}, {496, 656}, {400, 544}, {720, 960}, {864, 1152}}, --{{608, 800}, {368, 480}, {432, 576}, {528, 688}, {656, 864}, {912, 1200}} 59 | 60 | NUM_SCALES = 5, 61 | NUM_EPOCHS = NUM_EPOCHS, 62 | 63 | OUTPUT_FIELDS = {'output_prod'}, 64 | DATASET = DATASET, 65 | BASE_MODEL = BASE_MODEL, 66 | 67 | SUBSET = SUBSET, 68 | PATHS = 69 | { 70 | MODEL = arg[1], 71 | 72 | DATA = DATA, 73 | DATA_COMMON = DATA_COMMON, 74 | 75 | CHECKPOINT_PATTERN = PATHS.CHECKPOINT_PATTERN, 76 | LOG = PATHS.LOG, 77 | SCORES_PATTERN = PATHS.SCORES_PATTERN, 78 | 79 | BASE_MODEL_CACHED = PATHS.BASE_MODEL_CACHED[BASE_MODEL], 80 | BASE_MODEL_RAW = PATHS.EXTERNAL['PRETRAINED_MODEL_' .. BASE_MODEL], 81 | 82 | PROPOSALS = PATHS.EXTERNAL['SSW_' .. DATASET], 83 | 84 | VOC_DEVKIT_VOCYEAR = PATHS.EXTERNAL.VOC_DEVKIT_VOCYEAR[DATASET], 85 | DATASET_CACHED = PATHS.DATASET_CACHED_PATTERN:format(DATASET, 'SSW'), 86 | 87 | CORLOC = PATHS.CORLOC, 88 | DETECTION_MAP = PATHS.DETECTION_MAP, 89 | RUN_STATS_PATTERN = PATHS.RUN_STATS_PATTERN 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /parallel_batch_loader.lua: -------------------------------------------------------------------------------- 1 | --if nThreads = 0 do everything on main thread 2 | 3 | require 'nn' 4 | 5 | local ParallelBatchLoader, parent = torch.class('ParallelBatchLoader', 'nn.Module') 6 | 7 | function ParallelBatchLoader:__init(example_loader, nThreads) 8 | parent.__init(self) 9 | 10 | self.example_loader = example_loader 11 | self.nThreads = nThreads or 16 12 | 13 | self.nextBatchIdx = 1 14 | self.preloadedBatchIdx = nil 15 | 16 | self.batchSize = {[true] = nil, [false] = nil} 17 | self.batchBuffers = nil 18 | self.currentBufferIdx = 1 19 | 20 | local threads = require 'threads' 21 | threads.Threads.serialization('threads.sharedserialize') 22 | self.jobQueue = threads.Threads(self.nThreads) 23 | 24 | parent:evaluate() 25 | end 26 | 27 | function ParallelBatchLoader:loadBatch(exampleIdxBegin) 28 | self.jobQueue:synchronize() 29 | 30 | self.currentBufferIdx = 3 - self.currentBufferIdx 31 | local batchTable = self.batchBuffers[self.currentBufferIdx] 32 | local isTrainingPhase = self.train 33 | 34 | for exampleIndexInBatch = 1, self:getBatchSize() do 35 | local exampleIdx = isTrainingPhase and torch.random(1, self:getNumExamples()) or (exampleIdxBegin - 1 + exampleIndexInBatch) 36 | local fillBatchTable = self.example_loader:loadExample(exampleIdx, isTrainingPhase) 37 | self.jobQueue:addjob(function() fillBatchTable(exampleIndexInBatch, batchTable) end) 38 | end 39 | end 40 | 41 | function ParallelBatchLoader:getBatch(batchIdx) 42 | batchIdx = batchIdx or 1 43 | assert(batchIdx <= self:getNumBatches()) 44 | 45 | local exampleIdxBegin = 1 + (batchIdx - 1) * self:getBatchSize() 46 | local exampleIdxEnd = 1 + math.min(batchIdx * self:getBatchSize(), self:getNumExamples()) 47 | local effectiveBatchSize = exampleIdxEnd - exampleIdxBegin 48 | local oldBatchSize = self:getBatchSize() 49 | 50 | if batchIdx ~= self.preloadedBatchIdx or effectiveBatchSize ~= self:getBatchSize() then 51 | self:setBatchSize(effectiveBatchSize) 52 | self.preloadedBatchIdx = batchIdx 53 | self:loadBatch(exampleIdxBegin) 54 | end 55 | 56 | self.jobQueue:synchronize() 57 | local loadedBatchTable = self.batchBuffers[self.currentBufferIdx] 58 | 59 | if self:getBatchSize() ~= oldBatchSize then 60 | self:setBatchSize(oldBatchSize) 61 | end 62 | 63 | local nextBatchIdx = batchIdx + 1 64 | if nextBatchIdx < self:getNumBatches() then 65 | self.preloadedBatchIdx = nextBatchIdx 66 | self:loadBatch(exampleIdxBegin + self:getBatchSize()) 67 | end 68 | 69 | return loadedBatchTable 70 | end 71 | 72 | function ParallelBatchLoader:updateOutput() 73 | assert(self:getBatchSize()) 74 | assert(self.nextBatchIdx) 75 | self.output = self:getBatch(self.nextBatchIdx) 76 | self.nextBatchIdx = self.nextBatchIdx + 1 77 | return self.output 78 | end 79 | 80 | function ParallelBatchLoader:setBatchSize(batchSize) 81 | if type(batchSize) == 'table' then 82 | self.batchSize = {[true] = batchSize.training, [false] = batchSize.evaluate} 83 | else 84 | self.batchSize[self.train] = batchSize 85 | if self.batchSize[not self.train] == nil then 86 | self.batchSize[not self.train] = batchSize 87 | end 88 | end 89 | 90 | self:reinitBatchBuffers() 91 | 92 | return self 93 | end 94 | 95 | function ParallelBatchLoader:reinitBatchBuffers() 96 | self.batchBuffers = {self.example_loader:makeBatchTable(self:getBatchSize(), self.train), self.example_loader:makeBatchTable(self:getBatchSize(), self.train)} 97 | end 98 | 99 | function ParallelBatchLoader:getBatchSize() 100 | return self.batchSize[self.train] 101 | end 102 | 103 | function ParallelBatchLoader:getNumBatches() 104 | return torch.ceil(self:getNumExamples() / self:getBatchSize()) 105 | end 106 | 107 | function ParallelBatchLoader:getNumExamples() 108 | return self.example_loader:getNumExamples(self.train) 109 | end 110 | 111 | function ParallelBatchLoader:training() 112 | parent:training() 113 | self.nextBatchIdx = 1 114 | self:reinitBatchBuffers() 115 | end 116 | 117 | function ParallelBatchLoader:evaluate() 118 | parent:evaluate() 119 | self.nextBatchIdx = 1 120 | self:reinitBatchBuffers() 121 | end 122 | -------------------------------------------------------------------------------- /pascal_voc.lua: -------------------------------------------------------------------------------- 1 | local classLabels = {'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'} 2 | 3 | local function precisionrecall(scores_all, labels_all) 4 | --adapted from VOCdevkit/VOCcode/VOCevalcls.m (VOCap.m). tested, gives equivalent results 5 | local function VOCap(rec, prec) 6 | local mrec = torch.cat(torch.cat(torch.FloatTensor({0}), rec), torch.FloatTensor({1})) 7 | local mpre = torch.cat(torch.cat(torch.FloatTensor({0}), prec), torch.FloatTensor({0})) 8 | for i=mpre:numel()-1, 1, -1 do 9 | mpre[i]=math.max(mpre[i], mpre[i+1]) 10 | end 11 | 12 | local i = (mrec:sub(2, mrec:numel())):ne(mrec:sub(1, mrec:numel() - 1)):nonzero():squeeze(2) + 1 13 | local ap = (mrec:index(1, i) - mrec:index(1, i - 1)):cmul(mpre:index(1, i)):sum() 14 | 15 | return ap 16 | end 17 | 18 | local function VOCevalcls(out, gt) 19 | local so,si= (-out):sort() 20 | 21 | local tp=gt:index(1, si):gt(0):float() 22 | local fp=gt:index(1, si):lt(0):float() 23 | 24 | fp=fp:cumsum() 25 | tp=tp:cumsum() 26 | 27 | local rec=tp/gt:gt(0):sum() 28 | local prec=tp:cdiv(fp+tp) 29 | 30 | local ap=VOCap(rec,prec) 31 | return rec, prec, ap 32 | end 33 | 34 | local prec = torch.FloatTensor(scores_all:size()) 35 | local rec = torch.FloatTensor(scores_all:size()) 36 | local ap = torch.FloatTensor(#classLabels) 37 | 38 | for classLabelInd = 1, #classLabels do 39 | local p, r, a = VOCevalcls(scores_all:narrow(2, classLabelInd, 1):squeeze(), labels_all:narrow(2, classLabelInd, 1):squeeze()) 40 | prec:narrow(2, classLabelInd, 1):copy(p) 41 | rec:narrow(2, classLabelInd, 1):copy(r) 42 | ap[classLabelInd] = a 43 | end 44 | 45 | return prec, rec, ap 46 | end 47 | 48 | return { 49 | classLabels = classLabels, 50 | numClasses = #classLabels, 51 | 52 | load = function(VOCdevkit_VOCYEAR) 53 | local xml = require 'xml' 54 | 55 | local filelists = 56 | { 57 | train = paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/train.txt'), 58 | val = paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/val.txt'), 59 | test = paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/test.txt'), 60 | } 61 | 62 | local numMaxExamples = 11000 63 | local numMaxObjectsPerExample = 5 64 | 65 | local mkDataset = function() return 66 | { 67 | filenames = torch.CharTensor(numMaxExamples, 16):zero(), 68 | labels = torch.FloatTensor(numMaxExamples, #classLabels):zero(), 69 | objectBoxes = torch.FloatTensor(numMaxExamples * numMaxObjectsPerExample, 5):zero(), 70 | objectBoxesInds = torch.IntTensor(numMaxExamples, 2):zero(), 71 | jpegs = torch.ByteTensor(numMaxExamples * 3 * 50000):zero(), 72 | jpegsInds = torch.IntTensor(numMaxExamples, 2):zero(), 73 | 74 | getNumExamples = function(self) 75 | return self.numExamples 76 | end, 77 | 78 | getImageFileName = function(self, exampleIdx) 79 | return self.filenames[exampleIdx]:clone():storage():string():match('%Z+') 80 | end, 81 | 82 | getGroundTruthBoxes = function(self, exampleIdx) 83 | return self.objectBoxes:sub(self.objectBoxesInds[exampleIdx][1], self.objectBoxesInds[exampleIdx][2]) 84 | end, 85 | 86 | getJpegBytes = function(self, exampleIdx) 87 | return self.jpegs:sub(self.jpegsInds[exampleIdx][1], self.jpegsInds[exampleIdx][2]) 88 | end, 89 | 90 | getLabels = function(self, exampleIdx) 91 | return self.labels[exampleIdx] 92 | end 93 | } end 94 | 95 | local voc = { train = mkDataset(), val = mkDataset(), test = mkDataset() } 96 | 97 | for _, subset in ipairs{'train', 'val', 'test'} do 98 | local exampleIdx = 1 99 | local jpegsFirstByteInd = 1 100 | for line in io.lines(filelists[subset]) do 101 | assert(exampleIdx <= numMaxExamples) 102 | assert(#line < voc[subset].filenames:size(2)) 103 | 104 | voc[subset].filenames[exampleIdx]:sub(1, #line):copy(torch.CharTensor(torch.CharStorage():string(line))) 105 | 106 | local f = torch.DiskFile(paths.concat(VOCdevkit_VOCYEAR, 'JPEGImages', line .. '.jpg'), 'r') 107 | f:binary() 108 | f:seekEnd() 109 | local file_size_bytes = f:position() - 1 110 | f:seek(1) 111 | local bytes = torch.ByteTensor(file_size_bytes) 112 | f:readByte(bytes:storage()) 113 | voc[subset].jpegsInds[exampleIdx] = torch.IntTensor({jpegsFirstByteInd, jpegsFirstByteInd + file_size_bytes - 1}) 114 | voc[subset]:getJpegBytes(exampleIdx):copy(bytes) 115 | f:close() 116 | 117 | jpegsFirstByteInd = voc[subset].jpegsInds[exampleIdx][2] + 1 118 | exampleIdx = exampleIdx + 1 119 | end 120 | voc[subset].numExamples = exampleIdx - 1 121 | end 122 | local testHasAnnotation = VOCdevkit_VOCYEAR:find('2007') ~= nil 123 | for _, subset in ipairs(testHasAnnotation and {'train', 'val', 'test'} or {'train', 'val'}) do 124 | for classLabelInd, v in ipairs(classLabels) do 125 | local exampleIdx = 1 126 | for line in io.lines(paths.concat(VOCdevkit_VOCYEAR, 'ImageSets/Main/'..v..'_'..subset..'.txt')) do 127 | if string.find(line, ' -1', 1, true) then 128 | voc[subset].labels[exampleIdx][classLabelInd] = -1 129 | elseif string.find(line, ' 1', 1, true) then 130 | voc[subset].labels[exampleIdx][classLabelInd] = 1 131 | end 132 | exampleIdx = exampleIdx + 1 133 | end 134 | end 135 | 136 | local exampleIdx = 1 137 | local objectBoxIdx = 1 138 | for line in io.lines(filelists[subset]) do 139 | local anno_xml = xml.loadpath(paths.concat(VOCdevkit_VOCYEAR, 'Annotations/' .. line ..'.xml')) 140 | 141 | local firstObjectBoxIdx = objectBoxIdx 142 | for i = 1, #anno_xml do 143 | if anno_xml[i].xml == 'object' then 144 | local classLabel = xml.find(anno_xml[i], 'name')[1] 145 | local xmin = xml.find(xml.find(anno_xml[i], 'bndbox'), 'xmin')[1] 146 | local xmax = xml.find(xml.find(anno_xml[i], 'bndbox'), 'xmax')[1] 147 | local ymin = xml.find(xml.find(anno_xml[i], 'bndbox'), 'ymin')[1] 148 | local ymax = xml.find(xml.find(anno_xml[i], 'bndbox'), 'ymax')[1] 149 | 150 | for classLabelInd = 1, #classLabels do 151 | if classLabels[classLabelInd] == classLabel then 152 | assert(objectBoxIdx <= voc[subset].objectBoxes:size(1)) 153 | 154 | voc[subset].objectBoxes[objectBoxIdx] = torch.FloatTensor({classLabelInd, xmin, ymin, xmax, ymax}) 155 | objectBoxIdx = objectBoxIdx + 1 156 | end 157 | end 158 | end 159 | end 160 | 161 | voc[subset].objectBoxesInds[exampleIdx] = torch.IntTensor({firstObjectBoxIdx, objectBoxIdx - 1}) 162 | exampleIdx = exampleIdx + 1 163 | end 164 | end 165 | 166 | if not testHasAnnotation then 167 | voc['test'].objectBoxesInds = nil 168 | voc['test'].objectBoxes = nil 169 | end 170 | 171 | for _, subset in ipairs{'train', 'val', 'test'} do 172 | voc[subset].filenames = voc[subset].filenames:sub(1, voc[subset].numExamples):clone() 173 | voc[subset].labels = voc[subset].labels:sub(1, voc[subset].numExamples):clone() 174 | voc[subset].jpegsInds = voc[subset].jpegsInds:sub(1, voc[subset].numExamples):clone() 175 | voc[subset].jpegs = voc[subset].jpegs:sub(1, voc[subset].jpegsInds[voc[subset].numExamples][2]):clone() 176 | 177 | if voc[subset].objectBoxes and voc[subset].objectBoxesInds then 178 | voc[subset].objectBoxesInds = voc[subset].objectBoxesInds:sub(1, voc[subset].numExamples):clone() 179 | voc[subset].objectBoxes = voc[subset].objectBoxes:sub(1, voc[subset].objectBoxesInds[voc[subset].numExamples][2]):clone() 180 | end 181 | end 182 | 183 | voc['trainval'] = { 184 | train = voc['train'], 185 | val = voc['val'], 186 | getNumExamples = function(self) 187 | return self.train:getNumExamples() + self.val:getNumExamples() 188 | end, 189 | 190 | getImageFileName = function(self, exampleIdx) 191 | return exampleIdx <= self.train:getNumExamples() and self.train:getImageFileName(exampleIdx) or self.val:getImageFileName(exampleIdx - self.train:getNumExamples()) 192 | end, 193 | 194 | getGroundTruthBoxes = function(self, exampleIdx) 195 | return exampleIdx <= self.train:getNumExamples() and self.train:getGroundTruthBoxes(exampleIdx) or self.val:getGroundTruthBoxes(exampleIdx - self.train:getNumExamples()) 196 | end, 197 | 198 | getJpegBytes = function(self, exampleIdx) 199 | return exampleIdx <= self.train:getNumExamples() and self.train:getJpegBytes(exampleIdx) or self.val:getJpegBytes(exampleIdx - self.train:getNumExamples()) 200 | end, 201 | 202 | getLabels = function(self, exampleIdx) 203 | return exampleIdx <= self.train:getNumExamples() and self.train:getLabels(exampleIdx) or self.val:getLabels(exampleIdx - self.train:getNumExamples()) 204 | end 205 | } 206 | 207 | return voc 208 | end, 209 | 210 | package_submission = function(OUT, voc, VOCYEAR, subset, task, ...) 211 | local task_a, task_b = task:match('(.+)_(.+)') 212 | local write = { 213 | cls = function(f, classLabelInd, scores) 214 | assert(voc[subset]:getNumExamples() == scores:size(1)) 215 | 216 | for exampleIdx = 1, voc[subset]:getNumExamples() do 217 | f:write(string.format('%s %.12f\n', voc[subset]:getImageFileName(exampleIdx), scores[exampleIdx][classLabelInd])) 218 | end 219 | end, 220 | det = function(f, classLabelInd, rois, scores, mask) 221 | assert(voc[subset]:getNumExamples() == #scores and voc[subset]:getNumExamples() == #rois) 222 | 223 | for exampleIdx = 1, voc[subset]:getNumExamples() do 224 | for roiInd = 1, scores[exampleIdx]:size(scores[exampleIdx]:dim()) do 225 | if mask[exampleIdx][classLabelInd][roiInd] > 0 then 226 | f:write(string.format('%s %.12f %.12f %.12f %.12f %.12f\n', 227 | voc[subset]:getImageFileName(exampleIdx), 228 | scores[exampleIdx][classLabelInd][roiInd], 229 | math.max(1, rois[exampleIdx][roiInd][1] + 1), 230 | math.max(1, rois[exampleIdx][roiInd][2] + 1), 231 | math.max(1, rois[exampleIdx][roiInd][3] + 1), 232 | math.max(1, rois[exampleIdx][roiInd][4] + 1) 233 | )) 234 | end 235 | end 236 | end 237 | end 238 | } 239 | 240 | os.execute(string.format('rm -rf "%s/results"', OUT)) 241 | os.execute(string.format('mkdir -p "%s/results/%s/Main"', OUT, VOCYEAR)) 242 | 243 | local respath = string.format('%s/results/%s/Main/%%s_%s_%s_%%s.txt', OUT, VOCYEAR, task_b, subset) 244 | 245 | threads = require 'threads' 246 | threads.Threads.serialization('threads.sharedserialize') 247 | jobQueue = threads.Threads(#classLabels) 248 | local writer = write[task_b] 249 | for classLabelInd, classLabel in ipairs(classLabels) do 250 | jobQueue:addjob(function(...) 251 | local f = assert(io.open(respath:format(task_a, classLabel), 'w')) 252 | writer(f, classLabelInd, ...) 253 | f:close() 254 | end, function() end, ...) 255 | end 256 | jobQueue:synchronize() 257 | os.execute(string.format('cd "%s" && tar -czf "results-%s-%s-%s.tar.gz" results', OUT, VOCYEAR, task, subset)) 258 | return respath 259 | end, 260 | 261 | vis_classification_submission = function(OUT, VOCYEAR, subset, classLabel, JPEGImages_DIR, top_k) 262 | top_k = top_k or 20 263 | local res_file_path = string.format('%s/results/%s/Main/comp2_cls_%s_%s.txt', OUT, VOCYEAR, subset, classLabel) 264 | 265 | local scores = {} 266 | for line in assert(io.open(res_file_path)):lines() do 267 | scores[#scores + 1] = line:split(' ') 268 | end 269 | 270 | table.sort(scores, function(a, b) return -tonumber(a[2]) < -tonumber(b[2]) end) 271 | 272 | local image = require 'image' 273 | local top_imgs = {} 274 | print('K = ', top_k) 275 | for i = 1, top_k do 276 | top_imgs[i] = image.scale(image.load(paths.concat(JPEGImages_DIR, scores[i][1] .. '.jpg')), 128, 128) 277 | print(scores[i][2], scores[i][1]) 278 | end 279 | 280 | image.display(top_imgs) 281 | end, 282 | 283 | precisionrecall = precisionrecall, 284 | 285 | meanAP = function(scores_all, labels_all) 286 | return ({precisionrecall(scores_all, labels_all)})[3]:mean() 287 | end 288 | } 289 | -------------------------------------------------------------------------------- /preprocess.lua: -------------------------------------------------------------------------------- 1 | require 'cudnn' 2 | require 'loadcaffe' 3 | require 'image' 4 | 5 | matio = require 'matio' 6 | voc_tools = dofile('pascal_voc.lua') 7 | 8 | dofile('opts.lua') 9 | 10 | function VGGF() 11 | local model_converted = loadcaffe.load(opts.PATHS.BASE_MODEL_RAW.PROTOTXT, opts.PATHS.BASE_MODEL_RAW.CAFFEMODEL, 'cudnn'):float() 12 | torch.save(opts.PATHS.BASE_MODEL_CACHED, model_converted) 13 | end 14 | 15 | function VOC() 16 | local function copy_proposals_in_dataset(trainval_test_mat_paths, voc) 17 | local subset_paths = {{'train', trainval_test_mat_paths.trainval}, {'val', trainval_test_mat_paths.trainval}, {'test', trainval_test_mat_paths.test}} 18 | 19 | local m = {train = {}, val = {}, test = {}} 20 | local b = {train = nil, val = nil, test = nil} 21 | local s = {train = nil, val = nil, test = nil} 22 | for _, t in ipairs(subset_paths) do 23 | local h = matio.load(t[2]) 24 | b[t[1]] = h.boxes 25 | s[t[1]] = h.boxScores 26 | for exampleIdx = 1, #b[t[1]] do 27 | m[t[1]][h.images[exampleIdx]:storage():string()] = exampleIdx 28 | end 29 | end 30 | 31 | for _, subset in ipairs{'train', 'val', 'test'} do 32 | voc[subset].rois = {} 33 | for exampleIdx = 1, voc[subset]:getNumExamples() do 34 | local ind = m[subset][voc[subset]:getImageFileName(exampleIdx)] 35 | local box_scores = s[subset] and s[subset][ind] or torch.FloatTensor(b[subset][ind]:size(1), 1):zero() 36 | --local box_scores = torch.FloatTensor(b[subset][ind]:size(1), 1):zero() 37 | voc[subset].rois[exampleIdx] = torch.cat(b[subset][ind]:index(2, torch.LongTensor{2, 1, 4, 3}):float() - 1, box_scores) 38 | 39 | if s[subset] then 40 | voc[subset].rois[exampleIdx] = voc[subset].rois[exampleIdx]:index(1, ({box_scores:squeeze(2):sort(1, true)})[2]:sub(1, math.min(box_scores:size(1), 2048))) 41 | end 42 | end 43 | voc[subset].getProposals = function(self, exampleIdx) 44 | return self.rois[exampleIdx] 45 | end 46 | end 47 | 48 | voc['trainval'].getProposals = function(self, exampleIdx) 49 | return exampleIdx <= self.train:getNumExamples() and self.train:getProposals(exampleIdx) or self.val:getProposals(exampleIdx - self.train:getNumExamples()) 50 | end 51 | end 52 | 53 | local function filter_proposals(voc) 54 | local min_width_height = 20 55 | for _, subset in ipairs{'train', 'val', 'test'} do 56 | for exampleIdx = 1, voc[subset]:getNumExamples() do 57 | local x1, y1, x2, y2 = unpack(voc[subset].rois[exampleIdx]:split(1, 2)) 58 | local channels, height, width = unpack(image.decompressJPG(voc[subset]:getJpegBytes(exampleIdx)):size():totable()) 59 | 60 | assert(x1:ge(0):all() and x1:le(width):all()) 61 | assert(x2:ge(0):all() and x2:le(width):all()) 62 | assert(y1:ge(0):all() and y1:le(height):all()) 63 | assert(y2:ge(0):all() and y2:le(height):all()) 64 | assert(x1:le(x2):all() and y1:le(y2):all()) 65 | 66 | voc[subset].rois[exampleIdx] = voc[subset].rois[exampleIdx]:index(1, (x2 - x1):ge(min_width_height):cmul((y2 - y1):ge(min_width_height)):squeeze(2):nonzero():squeeze(2)) 67 | end 68 | end 69 | end 70 | 71 | local voc = voc_tools.load(opts.PATHS.VOC_DEVKIT_VOCYEAR) 72 | copy_proposals_in_dataset(opts.PATHS.PROPOSALS, voc) 73 | filter_proposals(voc) 74 | torch.save(opts.PATHS.DATASET_CACHED, voc) 75 | end 76 | 77 | for _, a in ipairs(arg) do 78 | print('Preprocessing', a) 79 | _G[a]() 80 | end 81 | print('Done') 82 | -------------------------------------------------------------------------------- /test.lua: -------------------------------------------------------------------------------- 1 | dofile('opts.lua') 2 | dofile('util.lua') 3 | dofile('dataset.lua') 4 | dofile('model/util.lua') 5 | 6 | assert(os.getenv('CUDA_VISIBLE_DEVICES') ~= nil and cutorch.getDeviceCount() <= 1, 'SHOULD RUN ON ONE GPU FOR NOW') 7 | 8 | loaded = model_load(opts.PATHS.MODEL, opts) 9 | 10 | meta = { 11 | opts = opts, 12 | training_meta = loaded.meta, 13 | example_loader_options = { 14 | evaluate = { 15 | numRoisPerImage = 8192, 16 | subset = opts.SUBSET, 17 | hflips = true, 18 | numScales = opts.NUM_SCALES 19 | } 20 | } 21 | } 22 | 23 | batch_loader = ParallelBatchLoader(ExampleLoader(dataset, base_model.normalization_params, opts.IMAGE_SCALES, meta.example_loader_options)):setBatchSize({evaluate = 1}) 24 | 25 | print(meta) 26 | assert(model):cuda() 27 | assert(criterion):cuda() 28 | collectgarbage() 29 | 30 | tic_start = torch.tic() 31 | 32 | batch_loader:evaluate() 33 | model:evaluate() 34 | scores, labels, rois, outputs = {}, {}, {}, {} 35 | for batchIdx = 1, batch_loader:getNumBatches() do 36 | tic = torch.tic() 37 | 38 | scale_batches = batch_loader:forward()[1] 39 | scale0_rois = scale_batches[1][2] 40 | scale_outputs, scale_scores, scale_costs = {}, {}, {} 41 | for i = 2, #scale_batches do 42 | batch_images, batch_rois, batch_labels = unpack(scale_batches[i]) 43 | batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images) 44 | batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels) 45 | if nn.gModule then 46 | batch_scores = model:forward({batch_images_gpu, batch_rois, scale0_rois}) 47 | else 48 | batch_scores = model:forward({batch_images_gpu, batch_rois}) 49 | end 50 | 51 | cost = criterion:forward(batch_scores, batch_labels_gpu) 52 | 53 | table.insert(scale_scores, (type(batch_scores) == 'table' and batch_scores[1] or batch_scores):float()) 54 | table.insert(scale_costs, cost) 55 | for _, output_field in ipairs(opts.OUTPUT_FIELDS) do 56 | module = model:findModules(output_field)[1] 57 | if module then 58 | scale_outputs[output_field] = scale_outputs[output_field] or {} 59 | table.insert(scale_outputs[output_field], module.output:transpose(2, 3):float()) 60 | end 61 | end 62 | end 63 | 64 | for output_field, output in pairs(scale_outputs) do 65 | outputs[output_field] = outputs[output_field] or {} 66 | table.insert(outputs[output_field], torch.cat(output, 1):mean(1):squeeze(1)) 67 | end 68 | 69 | table.insert(scores, torch.cat(scale_scores, 1):mean(1)) 70 | table.insert(labels, batch_labels:clone()) 71 | table.insert(rois, scale0_rois:narrow(scale0_rois:dim(), 1, 4):clone()[1]) 72 | 73 | collectgarbage() 74 | print('val', 'batch', batchIdx, torch.FloatTensor(scale_costs):mean(), 'img/sec', (#scale_batches - 1) / torch.toc(tic)) 75 | end 76 | 77 | subset = batch_loader.example_loader:getSubset(batch_loader.train) 78 | hdf5_save(opts.PATHS.SCORES_PATTERN:format(subset), { 79 | subset = subset, 80 | meta = meta, 81 | 82 | rois = rois, 83 | labels = torch.cat(labels, 1), 84 | output = torch.cat(scores, 1), 85 | outputs = outputs, 86 | }) 87 | 88 | print('DONE:', torch.toc(tic_start), 'sec') 89 | -------------------------------------------------------------------------------- /train.lua: -------------------------------------------------------------------------------- 1 | dofile('opts.lua') 2 | dofile('util.lua') 3 | dofile('dataset.lua') 4 | dofile('model/util.lua') 5 | 6 | require 'optim' 7 | dofile('fbnn_Optim.lua') 8 | 9 | assert(os.getenv('CUDA_VISIBLE_DEVICES') ~= nil and cutorch.getDeviceCount() <= 1, 'SHOULD RUN ON ONE GPU FOR NOW') 10 | 11 | torch.manualSeed(opts.SEED) 12 | cutorch.manualSeedAll(opts.SEED) 13 | 14 | example_loader_options_preset = { 15 | training = { 16 | numRoisPerImage = 8192, 17 | subset = 'trainval', 18 | hflips = true, 19 | numScales = 5, 20 | }, 21 | evaluate = { 22 | numRoisPerImage = 8192, 23 | subset = 'trainval', 24 | hflips = true, 25 | numScales = 1, 26 | } 27 | } 28 | 29 | if paths.extname(opts.PATHS.MODEL) == 'lua' then 30 | loaded = model_load(opts.PATHS.MODEL, opts) 31 | meta = { 32 | model_path = loaded.model_path, 33 | opts = opts, 34 | example_loader_options = example_loader_options_preset 35 | } 36 | log = {{meta = meta}} 37 | else 38 | loaded = model_load(opts.PATHS.MODEL) 39 | meta = loaded.meta 40 | log = loaded.log 41 | previous_epoch = loaded.epoch 42 | end 43 | 44 | batch_loader = ParallelBatchLoader(ExampleLoader(dataset, base_model.normalization_params, opts.IMAGE_SCALES, meta.example_loader_options)):setBatchSize({training = 1, evaluate = 1}) 45 | 46 | print(meta) 47 | 48 | assert(model):cuda() 49 | assert(criterion):cuda() 50 | collectgarbage() 51 | 52 | model:apply(function (x) x.for_each = x.apply end) 53 | optimizer = nn.Optim(model, optimState) 54 | optimalg = optim.sgd 55 | 56 | for epoch = (previous_epoch or 0) + 1, opts.NUM_EPOCHS do 57 | if epoch > optimState_annealed.epoch then 58 | optimizer:setParameters(optimState_annealed) 59 | end 60 | 61 | batch_loader:training() 62 | model:training() 63 | for batchIdx = 1, batch_loader:getNumBatches() -1 do 64 | tic = torch.tic() 65 | 66 | scale_batches = batch_loader:forward()[1] 67 | scale0_rois = scale_batches[1][2] 68 | batch_images, batch_rois, batch_labels = unpack(scale_batches[2]) 69 | batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images) 70 | batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels) 71 | 72 | cost = optimizer:optimize(optimalg, {batch_images_gpu, batch_rois}, batch_labels_gpu, criterion) 73 | 74 | collectgarbage() 75 | print('epoch', epoch, 'batch', batchIdx, cost, 'img/sec', batch_images:size(1) / torch.toc(tic)) 76 | end 77 | 78 | if epoch % 5 == 0 or epoch == opts.NUM_EPOCHS or epoch == 1 then 79 | batch_loader:evaluate() 80 | model:evaluate() 81 | scores, labels, rois, costs, outputs, corlocs = {}, {}, {}, {}, {}, {} 82 | for batchIdx = 1, batch_loader:getNumBatches() - 1 do 83 | tic = torch.tic() 84 | 85 | scale_batches = batch_loader:forward()[1] 86 | scale0_rois = scale_batches[1][2] 87 | scale_outputs, scale_scores, scale_costs = {}, {}, {} 88 | for i = 2, #scale_batches do 89 | batch_images, batch_rois, batch_labels = unpack(scale_batches[i]) 90 | batch_images_gpu = (batch_images_gpu or torch.CudaTensor()):resize(batch_images:size()):copy(batch_images) 91 | batch_labels_gpu = (batch_labels_gpu or torch.CudaTensor()):resize(batch_labels:size()):copy(batch_labels) 92 | 93 | batch_scores = model:forward({batch_images_gpu, batch_rois}) 94 | 95 | cost = criterion:forward(batch_scores, batch_labels_gpu) 96 | 97 | table.insert(scale_scores, (type(batch_scores) == 'table' and batch_scores[1] or batch_scores):float()) 98 | table.insert(scale_costs, cost) 99 | for _, output_field in ipairs(opts.OUTPUT_FIELDS) do 100 | module = model:findModules(output_field)[1] 101 | if module then 102 | scale_outputs[output_field] = scale_outputs[output_field] or {} 103 | table.insert(scale_outputs[output_field], module.output:transpose(2, 3):float()) 104 | end 105 | end 106 | end 107 | 108 | for output_field, output in pairs(scale_outputs) do 109 | outputs[output_field] = outputs[output_field] or {} 110 | table.insert(outputs[output_field], torch.cat(output, 1):mean(1)[1]) 111 | end 112 | 113 | table.insert(costs, torch.FloatTensor(scale_costs):mean()) 114 | table.insert(scores, torch.cat(scale_scores, 1):mean(1)) 115 | table.insert(labels, batch_labels:clone()) 116 | table.insert(rois, scale0_rois:narrow(scale0_rois:dim(), 1, 4):clone()[1]) 117 | 118 | collectgarbage() 119 | print('val', 'epoch', epoch, 'batch', batchIdx, costs[#costs], 'img/sec', (#scale_batches - 1) / torch.toc(tic)) 120 | end 121 | 122 | for output_field, output in pairs(outputs) do 123 | corlocs[output_field] = corloc(dataset[batch_loader.example_loader:getSubset(batch_loader.train)], {output, rois}) 124 | end 125 | 126 | table.insert(log, { 127 | training = false, 128 | epoch = epoch, 129 | mAP = dataset_tools.meanAP(torch.cat(scores, 1), torch.cat(labels, 1)), 130 | corlocs = corlocs, 131 | valCost = torch.FloatTensor(costs):mean(), 132 | }) 133 | end 134 | 135 | if epoch % 5 == 0 or epoch == opts.NUM_EPOCHS then 136 | model:clearState() 137 | model_save(opts.PATHS.CHECKPOINT_PATTERN:format(epoch), model, meta, epoch, log) 138 | end 139 | 140 | json_save(opts.PATHS.LOG, log) 141 | io.stderr:write('log in "', opts.PATHS.LOG, '"\n') 142 | end 143 | 144 | -------------------------------------------------------------------------------- /util.lua: -------------------------------------------------------------------------------- 1 | require 'hdf5' 2 | rapidjson = require 'rapidjson' 3 | 4 | function hdf5_save(path, obj) 5 | local h = hdf5.open(path, 'w') 6 | local function r(prefix, o) 7 | for k, v in pairs(o) do 8 | local p = prefix..'/'..k 9 | if torch.isTypeOf(v, torch.CudaTensor) then 10 | h:write(p, v:float()) 11 | elseif torch.isTensor(v) then 12 | h:write(p, v) 13 | elseif type(v) == 'number' then 14 | h:write(p, torch.DoubleTensor(1):fill(v)) 15 | elseif type(v) == 'string' then 16 | h:write(p, torch.CharTensor(torch.CharStorage():string(v))) 17 | elseif type(v) == 'boolean' then 18 | h:write(p, torch.IntTensor(1):fill(v and 1 or 0)) 19 | else 20 | r(p, v) 21 | end 22 | end 23 | end 24 | r('', obj) 25 | h:close() 26 | end 27 | 28 | function hdf5_load(path, fields) 29 | local res = {} 30 | 31 | local h = hdf5.open(path, 'r') 32 | if fields then 33 | local returnValue = false 34 | if type(fields) ~= 'table' then 35 | returnValue = true 36 | fields = {fields} 37 | end 38 | for _, f in ipairs(fields) do 39 | if not pcall(function() res[f] = h:read('/'..f):all() end) then 40 | res[f] = nil 41 | end 42 | end 43 | if returnValue then 44 | res = res[fields[1]] 45 | end 46 | else 47 | res = h:all() 48 | end 49 | h:close() 50 | 51 | local function dfs(obj) 52 | for k, v in pairs(obj) do 53 | if tonumber(k) ~= nil then 54 | obj[k] = nil 55 | k = tonumber(k) 56 | obj[k] = v 57 | end 58 | 59 | if torch.isTypeOf(v, torch.CharTensor) or torch.isTypeOf(v, torch.ByteTensor) then 60 | obj[k] = v:storage():string() 61 | elseif torch.isTypeOf(v, torch.DoubleTensor) and v:nElement() == 1 then 62 | obj[k] = v:squeeze() 63 | elseif torch.isTypeOf(v, torch.IntTensor) and v:nElement() == 1 and (v:squeeze() == 0 or v:squeeze() == 1) then 64 | obj[k] = v:squeeze() == 1 and true or false 65 | elseif type(v) == 'table' then 66 | dfs(v) 67 | end 68 | end 69 | end 70 | 71 | if type(res) == 'table' then 72 | dfs(res) 73 | end 74 | 75 | return res 76 | end 77 | 78 | json_load = rapidjson.load 79 | json_save = function(path, obj) rapidjson.dump(obj, path, {pretty = true, sort_keys = true}) end 80 | 81 | function area_1(box) 82 | return (box[3] - box[1] + 1) * (box[4] - box[2] + 1) 83 | end 84 | 85 | function overlap(box1, box2) 86 | if torch.isTensor(box2) and box2:dim() == 2 then 87 | local res = box2.new(box2:size(1)) 88 | for i = 1, res:nElement() do 89 | res[i] = overlap(box1, box2[i]) 90 | end 91 | return res 92 | end 93 | 94 | local a1 = area_1(box1) 95 | local a2 = area_1(box2) 96 | 97 | local xx1 = math.max(box1[1], box2[1]) 98 | local yy1 = math.max(box1[2], box2[2]) 99 | local xx2 = math.min(box1[3], box2[3]) 100 | local yy2 = math.min(box1[4], box2[4]) 101 | 102 | local w = math.max(0.0, xx2 - xx1 + 1) 103 | local h = math.max(0.0, yy2 - yy1 + 1) 104 | local inter = w * h 105 | 106 | local ovr = inter / (a1 + a2 - inter) 107 | return ovr 108 | end 109 | 110 | function localizeMaxBox3d(scores, rois) 111 | if torch.isTensor(scores) and torch.isTensor(rois) then 112 | assert(scores:dim() == 3) -- numSamples x numClasses x numRois 113 | assert(rois:dim() == 3) -- numSamples x numRois x 4 114 | 115 | return rois:gather(2, ({scores:max(3)})[2]:expand(scores:size(1), scores:size(2), rois:size(3))) 116 | else 117 | assert(#scores == #rois) 118 | local res = torch.FloatTensor(#scores, scores[1]:size(1), 4) 119 | for exampleIdx = 1, res:size(1) do 120 | res[exampleIdx]:copy(rois[exampleIdx]:gather(1, ({scores[exampleIdx]:max(2)})[2]:expand(scores[exampleIdx]:size(1), rois[exampleIdx]:size(rois[exampleIdx]:dim())))) 121 | end 122 | return res 123 | end 124 | end 125 | 126 | function corloc(dataset_subset, localizedBoxes, classLabelInd) 127 | return mIOU(dataset_subset, localizedBoxes, 0.5, classLabelInd) 128 | end 129 | 130 | function mIOU(dataset_subset, localizedBoxes, corlocThreshold, classLabelInd) 131 | if type(localizedBoxes) == 'table' then 132 | localizedBoxes = localizeMaxBox3d(unpack(localizedBoxes)) 133 | end 134 | assert(localizedBoxes:dim() == 3 and localizedBoxes:size(3) == 4) 135 | local beg_classLabelInd = classLabelInd == nil and 1 or classLabelInd 136 | local end_classLabelInd = classLabelInd == nil and localizedBoxes:size(2) or classLabelInd 137 | 138 | local mIOUs = {} 139 | for classLabelInd = beg_classLabelInd, end_classLabelInd do 140 | local overlaps = {} 141 | for exampleIdx = 1, localizedBoxes:size(1) do 142 | local gtBoxes_ = dataset_subset:getGroundTruthBoxes(exampleIdx) 143 | local gtInds = gtBoxes_:select(2, 1):eq(classLabelInd):nonzero() 144 | if gtInds:nElement() > 0 then 145 | local gtBoxes = gtBoxes_:index(1, gtInds:squeeze(2)):narrow(2, 2, 4) 146 | local localizedBox = localizedBoxes[exampleIdx][classLabelInd] 147 | local maxOverlap = 0 148 | for i = 1, gtBoxes:size(1) do 149 | local o = overlap(gtBoxes[i], localizedBox) 150 | if corlocThreshold then 151 | o = o > corlocThreshold and 1 or 0 152 | end 153 | maxOverlap = math.max(maxOverlap, o) 154 | end 155 | table.insert(overlaps, maxOverlap) 156 | end 157 | end 158 | 159 | table.insert(mIOUs, torch.FloatTensor(#overlaps == 0 and {0.0} or overlaps):mean()) 160 | end 161 | return torch.FloatTensor(mIOUs):mean() 162 | end 163 | 164 | function nms_mask(boxes, scores, overlap_threshold, score_threshold) 165 | local function nmsEx(boxes, scores, mask) 166 | --https://raw.githubusercontent.com/fmassa/object-detection.torch/master/nms.lua 167 | local xx1, yy1, xx2, yy2, w, h, area = boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new(), boxes.new() 168 | local pick = torch.LongTensor() 169 | for classLabelInd = 1, scores:size(1) do 170 | local x1, y1, x2, y2 = boxes:select(2, 1), boxes:select(2, 2), boxes:select(2, 3), boxes:select(2, 4) 171 | area:cmul(x2 - x1 + 1, y2 - y1 + 1) 172 | pick:resize(area:size()):zero() 173 | 174 | local _, I = scores[classLabelInd]:sort(1) 175 | local overTh = scores[classLabelInd]:index(1, I):ge(score_threshold) 176 | if overTh:any() then 177 | I = I[overTh] 178 | else 179 | I:resize(0) 180 | end 181 | 182 | local count = 1 183 | while I:numel() > 0 do 184 | local last = I:size(1) 185 | local i = I[last] 186 | 187 | pick[count] = i 188 | count = count + 1 189 | 190 | if last == 1 then 191 | break 192 | end 193 | 194 | I = I[{{1, last-1}}] 195 | 196 | xx1:index(x1, 1, I) 197 | yy1:index(y1, 1, I) 198 | xx2:index(x2, 1, I) 199 | yy2:index(y2, 1, I) 200 | 201 | xx1:cmax(x1[i]) 202 | yy1:cmax(y1[i]) 203 | xx2:cmin(x2[i]) 204 | yy2:cmin(y2[i]) 205 | 206 | w:add(xx2, -1, xx1):add(1):cmax(0) 207 | h:add(yy2, -1, yy1):add(1):cmax(0) 208 | 209 | local intersection = w:cmul(h) 210 | local IoU = h 211 | 212 | xx1:index(area, 1, I) 213 | IoU:cdiv(intersection, xx1 + area[i] - intersection) 214 | 215 | I = I[IoU:le(overlap_threshold)] 216 | end 217 | 218 | if count >= 2 then 219 | mask[classLabelInd]:scatter(1, pick[{{1, count-1}}], 1) 220 | end 221 | end 222 | end 223 | 224 | local mask = {} 225 | 226 | local threads = require 'threads' 227 | threads.Threads.serialization('threads.sharedserialize') 228 | local jobQueue = threads.Threads(16) 229 | for exampleIdx = 1, #scores do 230 | mask[exampleIdx] = torch.ByteTensor(scores[exampleIdx]:size()):zero() 231 | jobQueue:addjob(nmsEx, function() end, boxes[exampleIdx], scores[exampleIdx], mask[exampleIdx]) 232 | end 233 | 234 | jobQueue:synchronize() 235 | 236 | return mask 237 | end 238 | --------------------------------------------------------------------------------