├── README.md
├── experiments
    ├── avg
    │   ├── classbalance_all.jpg
    │   ├── clean.sh
    │   ├── coverage_all.jpg
    │   ├── dataloader.lua
    │   ├── datapreprocess.lua
    │   ├── dataworker.lua
    │   ├── model.lua
    │   ├── opt.lua
    │   ├── process_answers.py
    │   ├── run_cross_entropy.sh
    │   ├── run_soft_cross_entropy.sh
    │   ├── train.lua
    │   └── valdataworker.lua
    └── pool
    │   ├── classbalance_all.jpg
    │   ├── clean.sh
    │   ├── coverage_all.jpg
    │   ├── dataloader.lua
    │   ├── datapreprocess.lua
    │   ├── dataworker.lua
    │   ├── model.lua
    │   ├── opt.lua
    │   ├── process_answers.py
    │   ├── run_cross_entropy.sh
    │   ├── run_soft_cross_entropy.sh
    │   ├── train.lua
    │   └── valdataworker.lua
├── image_preprocess
    ├── README.md
    ├── extract_whole_image_features_compressed.lua
    └── transforms.lua
├── loss_implementations
    ├── README.md
    ├── caffe
    │   ├── README.md
    │   └── src
    │   │   └── caffe
    │   │       └── layers
    │   │           ├── softmax_loss_layer.cpp
    │   │           └── softmax_loss_layer.cu
    ├── pytorch
    │   ├── README.md
    │   └── torch
    │   │   ├── lib
    │   │       ├── THC
    │   │       │   └── generic
    │   │       │   │   └── THCTensor.c
    │   │       ├── THCUNN
    │   │       │   ├── ClassNLLCriterion.cu
    │   │       │   ├── SoftClassNLLCriterion.cu
    │   │       │   └── generic
    │   │       │   │   ├── ClassNLLCriterion.cu
    │   │       │   │   ├── SoftClassNLLCriterion.cu
    │   │       │   │   └── THCUNN.h
    │   │       └── THNN
    │   │       │   ├── generic
    │   │       │       ├── SoftClassNLLCriterion.c
    │   │       │       └── THNN.h
    │   │       │   └── init.c
    │   │   ├── nn
    │   │       ├── _functions
    │   │       │   ├── auto.py
    │   │       │   └── thnn
    │   │       │   │   └── auto.py
    │   │       ├── functional.py
    │   │       └── modules
    │   │       │   ├── __init__.py
    │   │       │   └── loss.py
    │   │   └── utils
    │   │       └── serialization
    │   │           └── read_lua_file.py
    └── torch
    │   ├── README.md
    │   └── extra
    │       ├── cunn
    │           └── lib
    │           │   └── THCUNN
    │           │       ├── SoftClassNLLCriterion.cu
    │           │       └── generic
    │           │           ├── SoftClassNLLCriterion.cu
    │           │           └── THCUNN.h
    │       └── nn
    │           ├── SoftClassNLLCriterion.lua
    │           └── init.lua
├── utils
    ├── README.md
    ├── extract_fastText
    │   └── vocab_to_fastText.lua
    ├── extract_glove
    │   └── vocab_to_glove.lua
    ├── fastText.hash
    ├── glove.hash
    ├── logger.lua
    ├── repl.lua
    └── util.lua
└── vqa2_data
    └── README.md


/README.md:
--------------------------------------------------------------------------------
 1 | Accompanying code for "[A Simple Loss Function for Improving the Convergence and Accuracy of Visual Question Answering Models](http://bit.ly/cvpr_vqa)" CVPR 2017 VQA workshop paper.
 2 | 
 3 | The repo contains code for reproducing the paper's experiments and efficient GPU implementation of the proposed loss function for torch, pytorch, and caffe. 
 4 | 
 5 | ## Requirements
 6 | 
 7 | To run the experiments you would first need to install torch from [https://github.com/torch/distro/](https://github.com/torch/distro/). We used torch version from commit `5c1d3cfda8101123628a45e70435d545ae1bc771` but later versions probably would work too.
 8 | 
 9 | After installing torch you will need to install the following useful lua libraries:
10 | 
11 | C data structures for torch [https://github.com/torch/tds](https://github.com/torch/tds), so we can allocate data in C memory space instead of lua's and thus avoid lua's memory limit and garbage collection.
12 | 
13 | `luarocks install tds`
14 | 
15 | RNN lib for torch [https://github.com/Element-Research/rnn](https://github.com/Element-Research/rnn) for mask zero lookuptable and other useful modules.
16 | 
17 | `luarocks install rnn`
18 | 
19 | threads for lua [https://github.com/torch/threads](https://github.com/torch/threads) for multi-threaded code.
20 | 
21 | `luarocks install threads`
22 | 
23 | The following libraries are required but you can modify the code and still run the experiments. However we recommend installing them anyway.
24 | 
25 | fb-debugger a source-level debugger for lua
26 | 
27 | Follow the install instructions at [https://github.com/facebook/fblualib/blob/master/fblualib/debugger/README.md](https://github.com/facebook/fblualib/blob/master/fblualib/debugger/README.md).
28 | 
29 | OptNet - Reducing memory usage in torch neural nets [https://github.com/fmassa/optimize-net](https://github.com/fmassa/optimize-net).
30 | 
31 | `luarocks install optnet`
32 | 
33 | Visdom for visualization [https://github.com/facebookresearch/visdom](https://github.com/facebookresearch/visdom).
34 | 
35 | ```
36 | pip install visdom
37 | luarocks install visdom
38 | ```
39 | 
40 | ## Installation
41 | 
42 | We provide GPU implementation of the loss function for **torch**, **pytorch**, and **caffe**. 
43 | cd to `loss_implementations` to read further instructions on how to add the loss function to your framework's installation. 
44 | 
45 | 
46 | ## Experiments
47 | 
48 | First, head over the `image_preprocess` folder and follow the instructions there to extract feature tensors for MS COCO images. 
49 | 
50 | After obtaining image feature tensors, head over `experiments/pool` or `experiments/avg` to run the experiments reported in the paper.
51 | To run the models with the proposed loss function run:
52 | 
53 | `gpu=0 ./run_soft_cross_entropy.sh`
54 | 
55 | For standard cross entropy run:
56 | 
57 | `gpu=0 ./run_cross_entropy.sh`
58 | 
59 | `gpu=0` specifies the ID of the GPU to be used i.e. it's an alias for `CUDA_VISIBLE_DEVICES=0`.
60 | 
61 | 


--------------------------------------------------------------------------------
/experiments/avg/classbalance_all.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/experiments/avg/classbalance_all.jpg


--------------------------------------------------------------------------------
/experiments/avg/clean.sh:
--------------------------------------------------------------------------------
1 | /bin/rm -r checkpoints/
2 | /bin/rm -r logs/
3 | 


--------------------------------------------------------------------------------
/experiments/avg/coverage_all.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/experiments/avg/coverage_all.jpg


--------------------------------------------------------------------------------
/experiments/avg/dataloader.lua:
--------------------------------------------------------------------------------
  1 | local data = require'./datapreprocess.lua'
  2 | -- data private variables
  3 | local _lookuptable
  4 | -- train_tid2img maps a question tensor index to an image filename
  5 | local train_questions, train_tid2img, train_answers
  6 | local cache, itensor, storage
  7 | local val_cache, val_itensor, val_storage
  8 | 
  9 | -- for debugging purposes
 10 | local vocab, train_tid2qid, test_tid2qid
 11 | -- test_answers only not nil when test is val set
 12 | local test_questions, test_tid2img, soft_test_answers, test_answers, test_tid2anstype
 13 | 
 14 | local threads = require'threads'
 15 | threads.Threads.serialization('threads.sharedserialize')
 16 | local valbatch
 17 | local permutation, epoch_size, val_epoch_size
 18 | local opt, logger
 19 | 
 20 | local buffer = {}
 21 | local in_buffer = tds.Vec()
 22 | local running = tds.Vec()
 23 | local last_iter = 0
 24 | local epoch = 0
 25 | 
 26 | local val_buffer = {}
 27 | local val_in_buffer = tds.Vec()
 28 | local val_running = tds.Vec()
 29 | local val_last_iter = 0
 30 | 
 31 | local _addwork = function(iter)
 32 | 
 33 |    local work = function(iter, in_buffer, running)
 34 |       if not running[iter%opt.buffer_size + 1] and not in_buffer[iter%opt.buffer_size + 1] then
 35 |          running[iter%opt.buffer_size + 1] = true
 36 |          return worker.dowork(train_questions, train_tid2img, train_answers, cache, itensor, permutation, iter)
 37 |       else -- buffer is full, must wait
 38 |          logger.warn('Train buffer is full at iteration ' .. iter)
 39 |          return -1
 40 |       end
 41 |    end
 42 |    local endcallback = function(iter, ...) 
 43 |       if iter ~= -1 then
 44 |          assert(not buffer[iter%opt.buffer_size + 1])
 45 |          buffer[iter%opt.buffer_size + 1] = {...} 
 46 |          in_buffer[iter%opt.buffer_size + 1] = true
 47 |          running[iter%opt.buffer_size + 1] = false
 48 |       end
 49 |    end
 50 | 
 51 |    last_iter = math.max(iter, last_iter)
 52 | 
 53 |    pool:addjob(work, endcallback, iter, in_buffer, running)
 54 | end
 55 | 
 56 | local _getbatch = function(iter)
 57 |    local t = sys.clock()
 58 | 
 59 |    if iter%epoch_size == 0 then
 60 |       while pool:hasjob() do
 61 |          pool:dojob()
 62 |       end
 63 |       permutation = torch.randperm(permutation:size(1))
 64 |       epoch = epoch + 1
 65 |       for i=1,opt.dataworkers-1 do 
 66 |          _addwork(iter + i)
 67 |       end
 68 |    end
 69 |    if (last_iter-(epoch*epoch_size)) + 1 <= epoch_size then
 70 |       _addwork(last_iter + 1)
 71 |    end
 72 | 
 73 |    local ind = iter%opt.buffer_size + 1
 74 |    local c = 0
 75 |    while not buffer[ind] do -- wait for this specific batch
 76 |       pool:dojob()
 77 |       c = c + 1
 78 |       if c > 10 then
 79 |          logger.warn('Stuck')
 80 |       end
 81 |    end
 82 |    if c > 1 then
 83 |      logger.trace('Got batch after '..c..' tries.')
 84 |    end
 85 |    local batch = buffer[ind]
 86 |    buffer[ind] = nil
 87 |    in_buffer[ind] = false
 88 |    logger.trace('dojob', (sys.clock()-t))
 89 |    t = sys.clock()
 90 |    logger.trace('_addwork', (sys.clock()-t))
 91 |    return table.unpack(batch)
 92 | end
 93 | 
 94 | local _addvalwork = function(iter)
 95 |    local work = function(iter, val_in_buffer, val_running) 
 96 |       if not val_running[iter%opt.val_buffer_size + 1] and not val_in_buffer[iter%opt.val_buffer_size + 1] then
 97 |          val_running[iter%opt.val_buffer_size + 1] = true
 98 |          return valworker.dowork(test_questions, test_tid2img, test_answers, soft_test_answers, val_cache, val_itensor, test_tid2anstype, iter)
 99 |       else
100 |          logger.warn('Val buffer is full at iteration '.. iter)
101 |          return -1
102 |       end
103 |    end
104 |    local endcallback = function(iter, ...) 
105 |       if iter ~= -1 then
106 |          assert(not val_buffer[iter%opt.val_buffer_size +1])
107 |          val_buffer[iter%opt.val_buffer_size + 1] = {...}
108 |          val_in_buffer[iter%opt.val_buffer_size + 1] = true
109 |          val_running[iter%opt.val_buffer_size + 1] = false 
110 |       end
111 |    end
112 | 
113 |    val_last_iter = math.max(iter, val_last_iter)
114 | 
115 |    valpool:addjob(work, endcallback, iter, val_in_buffer, val_running)
116 | end
117 | 
118 | local _getvalbatch = function(iter)
119 |    local t = sys.clock()
120 | 
121 |    _addvalwork(val_last_iter + 1)
122 | 
123 |    local ind = iter%opt.val_buffer_size + 1
124 |    local c = 0
125 |    while not val_buffer[ind] do -- wait for this specific batch
126 |       valpool:dojob()
127 |       c = c + 1
128 |       if c > 10 then
129 |          logger.warn('Stuck at val')
130 |       end
131 |    end
132 |    if c > 1 then
133 |      logger.trace('Got batch after '..c..' tries.')
134 |    end
135 |    local batch = val_buffer[ind]
136 |    val_buffer[ind] = nil
137 |    val_in_buffer[ind] = false
138 |    logger.trace('dojob', (sys.clock()-t))
139 |    t = sys.clock()
140 |    logger.trace('_addwork', (sys.clock()-t))
141 |    return table.unpack(batch)
142 | end
143 | 
144 | local _setvalworkers = function()
145 |    for i=1, opt.dataworkers do 
146 |       _addvalwork(i)
147 |    end
148 | end
149 | 
150 | local _getvocab = function()
151 |    return vocab
152 | end
153 | 
154 | local _getlookup = function()
155 |    return _lookuptable
156 | end
157 | 
158 | local _init = function(_opt, _logger)
159 |    opt = _opt 
160 |    logger = _logger
161 |    paths.mkdir('./data/')
162 | 
163 |    -- not used, better to leave it to the OS to do RAM caching 
164 |    cache = {} 
165 |    val_cache = {}
166 | 
167 |    local t = sys.clock()
168 |    local q_data = data.get_qdata()
169 |    local answers, soft_answers = data.get_answers()
170 |    _lookuptable = data.get_lookup()
171 | 
172 |    data.clean()
173 | 
174 |    train_questions = q_data.train_questions
175 |    train_tid2img = q_data.train_tid2img
176 |    train_tid2qid = q_data.train_tid2qid
177 | 
178 |    train_answers = (opt.criterion:find('Soft') and soft_answers or answers)['train_answers']
179 | 
180 |    vocab = q_data.vocab
181 | 
182 |    test_questions = q_data.test_questions
183 |    test_tid2img = q_data.test_tid2img
184 |    test_tid2qid = q_data.test_tid2qid
185 |    test_tid2anstype = q_data.test_tid2anstype
186 | 
187 |    test_answers = answers['test_answers']
188 |    soft_test_answers = soft_answers['test_answers']
189 | 
190 |    logger.info('Vocab size '..#vocab)
191 |    logger.info('Train questions '..train_questions:size(1))
192 |    logger.info('Test questions ' ..test_questions:size(1))
193 | 
194 |    permutation = torch.randperm(train_questions:size(1))
195 |    epoch_size = math.floor(train_questions:size(1)/opt.batch_size)
196 |    val_epoch_size = math.ceil(test_questions:size(1)/opt.val_batch_size)
197 | 
198 |    opt.ans_aug = opt.criterion:find('Soft') and opt.ans_aug or false
199 | 
200 |    pool = threads.Threads(opt.dataworkers, 
201 |             function(threadid) 
202 |                require'sys' 
203 |                require'torch' 
204 |                require'xlua'
205 |                require'cunn'
206 |                require'torchzlib'
207 |                tds = require'tds'
208 |             end, 
209 |             function() 
210 |                torch.manualSeed(opt.rnd_seed + __threadid)
211 |                cutorch.manualSeed(opt.rnd_seed + __threadid)
212 |                torch.setnumthreads(1)
213 |                worker = paths.dofile('./dataworker.lua') 
214 |                worker.init(opt.batch_size, epoch_size, logger, opt.que_len, opt.img_dir, opt.criterion:find('Soft'), opt.ans_aug)
215 |             end)
216 | 
217 |    valpool = threads.Threads(opt.val_dataworkers, 
218 |             function(threadid) 
219 |                require'sys' 
220 |                require'torch' 
221 |                require'xlua'
222 |                require'cunn'
223 |                require'torchzlib'
224 |                tds = require'tds'
225 |             end, 
226 |             function() 
227 |                torch.manualSeed(opt.rnd_seed + __threadid)
228 |                cutorch.manualSeed(opt.rnd_seed + __threadid)
229 |                torch.setnumthreads(1)
230 |                valworker = paths.dofile('./valdataworker.lua') 
231 |                valworker.init(opt.val_batch_size, val_epoch_size, logger, opt.que_len, opt.img_dir)
232 |             end)
233 | 
234 |    for iter=1, opt.dataworkers do 
235 |       _addwork(iter) 
236 |    end
237 |    for iter=1, opt.val_dataworkers do
238 |       _addvalwork(iter)
239 |    end
240 | 
241 |    logger.trace('init', (sys.clock()-t))
242 | 
243 |    opt.word2vec = '../../utils/glove.hash'
244 |    ltbl = data.get_lookup()
245 | 
246 |    collectgarbage()
247 |    return epoch_size, val_epoch_size, test_questions:size(1)
248 | end
249 | 
250 | return {
251 |    init = _init,
252 |    getbatch = _getbatch,
253 |    getvalbatch = _getvalbatch,
254 |    setvalworkers = _setvalworkers,
255 |    getlookup = _getlookup,
256 |    getvocab = _getvocab,
257 |    test_tid2qid = test_tid2qid,
258 |    train_tid2qid = train_tid2qid,
259 |    answer_data = data.answer_data,
260 | }
261 | 


--------------------------------------------------------------------------------
/experiments/avg/dataworker.lua:
--------------------------------------------------------------------------------
 1 | local bsz, epoch_size, que, ans, img, loaded, ans_aug 
 2 | local logger, img_dir
 3 | 
 4 | local _init = function(_bsz, _epoch_size, _logger, que_len, _img_dir, _soft_ans, _ans_aug)
 5 |    bsz = _bsz
 6 |    epoch_size = _epoch_size
 7 |    ans_aug = _ans_aug
 8 |    que = torch.FloatTensor(bsz, que_len)
 9 |    img = torch.FloatTensor(bsz, 2048, 14, 14)
10 |    if _soft_ans then
11 |       if ans_aug then
12 |          ans = torch.FloatTensor(bsz, 1, 2)
13 |       else
14 |          ans = torch.FloatTensor(bsz, 10, 2)
15 |       end
16 |    else
17 |       ans = torch.FloatTensor(bsz, 11)
18 |    end
19 | 
20 |    loaded = {}
21 |    logger = _logger
22 |    img_dir = _img_dir
23 | end
24 | 
25 | local _dowork = function(questions, mapping, answers, cache, itensor, permutation, iter)
26 | 
27 |    local t = sys.clock()
28 |    collectgarbage()
29 |    logger.trace('collectgarbage',(sys.clock()-t))
30 | 
31 |    assert(answers)
32 | 
33 |    loaded = {}
34 |    for i=1, bsz do
35 |       local index = permutation[(i + bsz*((iter-1)%epoch_size))]
36 | 
37 |       img[i] = cache[mapping[index]] and itensor[cache[mapping[index]]] or torch.load(img_dir .. mapping[index]):decompress()
38 | 
39 |       que[i] = questions[index]
40 |       if ans_aug then 
41 |          local k = 1
42 |          for i=2, 10 do 
43 |             if answers[index][i][1] == 0 then
44 |                k = i-1
45 |                break
46 |             end
47 |          end
48 |          print(k)
49 |          k = torch.random(k)
50 |          ans[i][1] = answers[index][k]
51 |       else
52 |          ans[i] = answers[index]
53 |       end
54 |       loaded[i] = mapping[index]
55 |    end
56 | 
57 |    logger.trace('_dowork',(sys.clock()-t))
58 |    return iter, que:clone(), img:clone(), ans:clone(), tds.Hash(loaded) 
59 | end
60 | 
61 | return {
62 |    init = _init,
63 |    dowork = _dowork,
64 | }
65 | 


--------------------------------------------------------------------------------
/experiments/avg/model.lua:
--------------------------------------------------------------------------------
 1 | local nninit = require 'nninit'
 2 | local M = {}
 3 | 
 4 | M.new = function(opt, ltbl)
 5 | 
 6 |    local model = nn.Sequential()
 7 |          :add(nn.ParallelTable()
 8 |             :add(nn.Sequential()
 9 |                :add(ltbl)
10 |                :add(nn.Dropout(opt.dropout, true))
11 |                :add(cudnn[opt.activation](true)) 
12 |                -- :add(nn.SeqGRU(opt.vec_len, opt.size_rnn)
13 |                :add(cudnn.GRU(opt.vec_len, opt.size_rnn, opt.size_rnn_layer, true)
14 |                   :init('weight', nninit.uniform, -0.08, 0.08)) 
15 |                :add(nn.Select(2, -1))
16 |                :add(cudnn.BatchNormalization(opt.size_rnn))
17 |             )
18 |             :add(nn.Sequential()
19 |                :add(cudnn.SpatialAveragePooling(14,14,1,1))
20 |                :add(nn.View(opt.size_image))
21 |                :add(cudnn.BatchNormalization(opt.size_image))
22 |                :add(nn.Dropout(opt.dropout, true))
23 |                :add(nn.Linear(opt.size_image, opt.size_rnn*2)
24 |                :init('weight', nninit.xavier, {dist='normal', gain=opt.img_activation:lower()}))
25 |                :add(cudnn.BatchNormalization(opt.size_rnn*2))
26 |                :add(cudnn[opt.img_activation](true))
27 |                :add(nn.Dropout(opt.dropout, true))
28 |                :add(nn.Linear(opt.size_rnn*2, opt.size_rnn)
29 |                :init('weight', nninit.xavier, {dist='normal', gain=opt.img_activation:lower()}))
30 |                :add(cudnn.BatchNormalization(opt.size_rnn))
31 |                :add(cudnn[opt.img_activation](true))
32 |             )
33 |          )
34 |          :add(nn.CMulTable())
35 |          :add(cudnn.BatchNormalization(opt.size_rnn))
36 |          :add(nn.Dropout(opt.dropout, true))
37 |          :add(nn.Linear(opt.size_rnn, opt.size_classifier)
38 |          :init('weight', nninit.xavier, {dist='normal', gain=opt.activation:lower()}))
39 |          :add(cudnn.BatchNormalization(opt.size_classifier))
40 |          :add(cudnn[opt.activation](true))  
41 |          :add(nn.Dropout(opt.dropout, true))
42 |          :add(nn.Linear(opt.size_classifier, opt.answer_count)
43 |          :init('weight', nninit.xavier, {dist='normal', gain='linear'}))
44 | 
45 |      -- model.modules[1].modules[1].modules[4]:maskZero(1)
46 | 
47 |    return model
48 | end
49 | 
50 | return M
51 | 


--------------------------------------------------------------------------------
/experiments/avg/opt.lua:
--------------------------------------------------------------------------------
  1 | return {
  2 |    -- model loading and saving
  3 |    model          = './model.lua',
  4 |    eval           = true,
  5 |    log_loss_every = 350,
  6 |    save_dir       = './checkpoints/', 
  7 |    save_after     = 999999, -- in iterations
  8 |    eval_after     = 1, -- in epochs 
  9 |    criterion      = 'SoftClassNLLCriterion',
 10 |    checkpoint     = '',
 11 |    start_iter     = 1,
 12 |    -- model definitions
 13 |    size_multi      = 1500,
 14 |    size_common     = 1500,
 15 |    size_classifier = 3000,
 16 |    size_image      = 2048,
 17 |    size_rnn        = 2400,
 18 |    size_rnn_layer  = 1,
 19 |    -- training related
 20 |    max_iter       = 432000, -- about 70 epochs
 21 |    learning_rate  = 1e-4,
 22 |    momentum       = 0.9,
 23 |    batch_size     = 64,
 24 |    val_batch_size = 96,
 25 |    gradclip       = 0, -- 0 means disabled
 26 |    dropout        = 0.5,
 27 |    rnn_dropout    = 0.0,
 28 |    activation     = 'Tanh',
 29 |    img_activation = 'Tanh',
 30 |    glimpse        = 2,
 31 |    showprogress  = true,
 32 |    repl          = false,
 33 |    -- logging related
 34 |    log_dir        = './logs/',
 35 |    log_level      = 3, -- 1:trace, 2:debug, 3:info, 4:warn, 5:error, 6:fatal
 36 |    log_to_console = true,
 37 |    log_to_file    = true,
 38 |    version = 'vanila',
 39 |    -- plot related
 40 |    showplot      = false,
 41 |    plot_every    = 1000,
 42 |    plot_server   = 'http://localhost',
 43 |    plot_port     = 8097,
 44 |    -- misc
 45 |    num_gpus = 1,
 46 |    rnd_seed = 139, 
 47 | 
 48 |    -- data related
 49 |    
 50 |    -- number of dataloading threads
 51 |    dataworkers = 4,
 52 |    buffer_size = 8,
 53 | 
 54 |    val_dataworkers = 2,
 55 |    val_buffer_size = 8,
 56 | 
 57 |    img_dir = '../../resnet_features/',
 58 | 
 59 |    que_train   = '../../vqa2_data/v2_OpenEnded_mscoco_train2014_questions.json',
 60 |    que_val     = '../../vqa2_data/v2_OpenEnded_mscoco_val2014_questions.json',
 61 |    -- change this to point to test std json when needed
 62 |    que_test    = '../../vqa2_data/v2_OpenEnded_mscoco_test-dev2015_questions.json',
 63 | 
 64 |    ans_train    = '../../vqa2_data/v2_mscoco_train2014_annotations.json',
 65 |    ans_val      = '../../vqa2_data/v2_mscoco_val2014_annotations.json',
 66 | 
 67 | 
 68 |    -- train on trainval or just train
 69 |    train_on_val = false,
 70 |    -- left aligned questions for MLP and right for RNN language model
 71 |    left_aligned = false,
 72 | 
 73 |    -- at least how many times should the word appear in train set to be in vocab
 74 |    word_freq = 1,
 75 |    que_len = 10,
 76 |    ans_type = 'all', -- yes-no number other 
 77 |    ans_aug = false,
 78 | 
 79 |    -- outputs file prefix, the full filename depends on the options
 80 |    train_questions    = './data/train_questions',
 81 |    train_tid2qid      = './data/train_tid2qid',
 82 |    train_tid2img      = './data/train_tid2img',
 83 | 
 84 |    test_questions     = './data/test_questions',
 85 |    test_tid2qid       = './data/test_tid2qid',
 86 |    test_tid2img       = './data/test_tid2img',
 87 |    test_tid2anstype   = './data/test_tid2anstype', 
 88 | 
 89 |    vocab          = './data/vocab', 
 90 | 
 91 |    lookuptable    = './data/lookup',
 92 | 
 93 |    -- which word2vec to use
 94 |    word2vec = '../../utils/fastText.hash',
 95 |    vec_len  = 300,
 96 | 
 97 |    answer_count = 3000,
 98 | 
 99 |    -- outputs file prefix, the full filename depends on the options
100 |    qid2ans      = './data/qid2ans',
101 |    qid2type     = './data/qid2type',
102 |    qid2anstype  = './data/qid2anstype',
103 |    ans_id2str   = './data/ans_id2str',
104 | 
105 |    -- outputs file prefix, the full filename depends on the options
106 |    train_answers = './data/train_answers',
107 |    test_answers = './data/test_answers',
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/experiments/avg/process_answers.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | import matplotlib
  3 | matplotlib.use('Agg')
  4 | import matplotlib.pyplot as plt
  5 | plt.ioff()
  6 | import argparse
  7 | import sys
  8 | import numpy as np
  9 | import ujson
 10 | import operator 
 11 | from time import strftime, localtime
 12 | 
 13 | def log(msg):
 14 |     msg = str(msg)
 15 |     log_prefix ='['+strftime("%H:%M:%S", localtime())+']  process_answers.py:\033[1;32m '
 16 |     print(log_prefix + msg + '\033[0m')
 17 | 
 18 | 
 19 | parser = argparse.ArgumentParser(description='Process answers')
 20 | parser.add_argument('--input_train')
 21 | parser.add_argument('--input_val')
 22 | parser.add_argument('--output_qid2ans')
 23 | parser.add_argument('--output_qid2type')
 24 | parser.add_argument('--output_qid2anstype')
 25 | parser.add_argument('--output_ans_id2str')
 26 | parser.add_argument('--ans_type')
 27 | parser.add_argument('--answer_count', type=int)
 28 | parser.add_argument('--train_on_val', type=bool)
 29 | args = parser.parse_args()
 30 | 
 31 | counts = {}
 32 | mapping = {}
 33 | allans = {}
 34 | answers = []
 35 | qid2ans = {}
 36 | ans2id = {}
 37 | qid2type = {}
 38 | qid2anstype = {}
 39 | 
 40 | log('Train on val is '+str(args.train_on_val))
 41 | log('Reading '+args.input_train)
 42 | data = ujson.load(open(args.input_train))
 43 | log('Processing '+args.input_train)
 44 | for answer in data['annotations']:
 45 |     if args.ans_type != 'all' and answer['answer_type'] != args.ans_type:
 46 |         continue
 47 |     if answer['answer_type'] == 'yes/no':
 48 |         skip = False
 49 |         for a in answer['answers']:
 50 |             if not (a['answer'] == 'no' or a['answer'] == 'yes'):
 51 |                 skip = True
 52 |                 break
 53 |         if skip:
 54 |             continue
 55 |     ans_str = answer['multiple_choice_answer']
 56 |     qid = answer['question_id']
 57 |     qid2type[qid] = answer['question_type']
 58 |     qid2anstype[qid] = answer['answer_type']
 59 |     mapping[ans_str] = mapping[ans_str] if mapping.has_key(ans_str) else []
 60 |     mapping[ans_str].append(qid)
 61 |     cnt = counts[ans_str] if counts.has_key(ans_str) else 0
 62 |     counts[ans_str] = cnt + 1
 63 |     allans[qid] = allans[qid] if allans.has_key(qid) else []
 64 |     for a in answer['answers']:
 65 |         allans[qid].append(a['answer'])
 66 | 
 67 | if args.train_on_val:
 68 |     log('Processing ' + args.input_val)
 69 |     data = ujson.load(open(args.input_val))
 70 |     for answer in data['annotations']:
 71 |         if args.ans_type != 'all' and  answer['answer_type'] != args.ans_type:
 72 |             continue
 73 |         ans_str = answer['multiple_choice_answer']
 74 |         qid = answer['question_id']
 75 |         qid2type[qid] = answer['question_type']
 76 |         qid2anstype[qid] = answer['answer_type']
 77 |         mapping[ans_str] = mapping[ans_str] if mapping.has_key(ans_str) else []
 78 |         mapping[ans_str].append(qid)
 79 |         cnt = counts[ans_str] if counts.has_key(ans_str) else 0
 80 |         counts[ans_str] = cnt + 1
 81 |         allans[qid] = allans[qid] if allans.has_key(qid) else []
 82 |         for a in answer['answers']:
 83 |             allans[qid].append(a['answer'])
 84 | 
 85 | sorted_counts = sorted(counts.items(), key=operator.itemgetter(1), reverse=True)
 86 | 
 87 | 
 88 | log('Preparing answer set.')
 89 | # get most frequent answers
 90 | for i in range(min(args.answer_count, len(sorted_counts))):
 91 |     ans_str = sorted_counts[i][0]
 92 |     answers.append(ans_str)
 93 |     ans2id[ans_str] = i + 1 # lua is 1-index 
 94 | 
 95 | 
 96 | 
 97 | answer_count = len(qid2type) # + 214354  # train + val, one per question not all 10
 98 | 
 99 | all_questions = 0
100 | # log(answer_count)
101 | # log(answers)
102 | if not args.train_on_val:
103 |     # process val answers
104 |     log('Processing '+args.input_val+' as test set.')
105 |     data = ujson.load(open(args.input_val))
106 |     for answer in data['annotations']:
107 |         if args.ans_type != 'all' and  answer['answer_type'] != args.ans_type:
108 |             continue
109 |         all_questions = all_questions + 1
110 |         ans_str = answer['multiple_choice_answer']
111 |         qid = answer['question_id']
112 |         qid2type[qid] = answer['question_type']
113 |         qid2anstype[qid] = answer['answer_type']
114 |         if mapping.has_key(ans_str):
115 |             mapping[ans_str].append(qid)
116 |             allans[qid] = allans[qid] if allans.has_key(qid) else []
117 |             for a in answer['answers']:
118 |                 allans[qid].append(a['answer'])
119 | 
120 | log('All questions: '+str(all_questions))
121 | log('Preparing mappings...')
122 | # make que_id to ans_id mapping
123 | for ans_str in answers:
124 |     for qid in mapping[ans_str]:
125 |         qid2ans[qid] = [ans2id[ans_str]] # first element is the MC answer
126 |         for a in allans[qid]:
127 |             if ans2id.has_key(a): 
128 |                 qid2ans[qid].append(ans2id[a])
129 |             else:
130 |                 qid2ans[qid].append(-1) # the answer is not in the most freq
131 | 
132 | id2ans = {}
133 | for k in ans2id.keys():
134 |     id2ans[ans2id[k]] = k
135 | 
136 | 
137 | log('qid2ans size: '+str(len(qid2ans)))
138 | log('qid2type size: '+str(len(qid2type)))
139 | log('qid2anstype size: '+str(len(qid2anstype)))
140 | log('id2ans size: '+str(len(id2ans)))
141 | 
142 | log('Saving data...')
143 | ujson.dump(qid2ans, open(args.output_qid2ans,'w'))
144 | ujson.dump(qid2type, open(args.output_qid2type,'w'))
145 | ujson.dump(qid2anstype, open(args.output_qid2anstype,'w'))
146 | ujson.dump(id2ans, open(args.output_ans_id2str,'w'))
147 | 
148 | 
149 | 
150 | # plot the percentange of questions vs number of answers
151 | 
152 | # all_answers = 443757 + 214354  # train + val, one per question not all 10
153 | all_answers = 0
154 | for i in xrange(len(sorted_counts)):
155 |     all_answers = all_answers + sorted_counts[i][1]
156 | c = 0
157 | 
158 | log('ALL answers: ' + str(all_answers))
159 | p = []
160 | for i in xrange(len(sorted_counts)):
161 |     c = c + sorted_counts[i][1]
162 |     p.append(100.*c/all_answers)
163 | 
164 | fig = plt.figure(figsize=(8,5))
165 | ax = fig.add_subplot(111)
166 | ax.plot(range(min(22000,len(answers))),p[:min(22000, len(answers))],label='VQAv2')
167 | ax.grid(True)
168 | ax.set_ylabel("Percentage of questions covered")
169 | ax.set_xlabel("Number of top K answers")
170 | gridlines = ax.get_xgridlines() + ax.get_ygridlines()
171 | for line in gridlines:
172 |     line.set_linewidth(0.5)
173 |     line.set_color('lightgray')
174 | ax.set_xlim((0,min(22000, len(answers))))
175 | # ax.xaxis.set_ticks(np.arange(0,min(22001, len(answers)), 1000))
176 | # ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: str(x/1000)))
177 | # ax.set_ylim((50,100))
178 | # ax.yaxis.set_ticks(np.arange(50,101, 5))
179 | ax.legend()
180 | plt.tight_layout()
181 | fig.savefig('coverage_'+args.ans_type.replace('/','-')+'.jpg')
182 | 
183 | h = []
184 | for i in xrange(min(args.answer_count, len(sorted_counts))):
185 |     c = sorted_counts[i][1]
186 |     h.append(100.*c/answer_count)
187 | 
188 | # plot pie of classes
189 | fig = plt.figure(figsize=(8,5))
190 | ax = fig.add_subplot(111)
191 | ax.pie(h)
192 | plt.tight_layout()
193 | fig.savefig('classbalance_'+args.ans_type.replace('/','-')+'.jpg')
194 | 


--------------------------------------------------------------------------------
/experiments/avg/run_cross_entropy.sh:
--------------------------------------------------------------------------------
1 | version=cross_entropy \
2 | criterion=CrossEntropyCriterion \
3 | CUDA_VISIBLE_DEVICES=$gpu \
4 | th train.lua
5 | 


--------------------------------------------------------------------------------
/experiments/avg/run_soft_cross_entropy.sh:
--------------------------------------------------------------------------------
1 | version=SoftCrossEntropy \
2 | CUDA_VISIBLE_DEVICES=$gpu \
3 | th train.lua
4 | 


--------------------------------------------------------------------------------
/experiments/avg/train.lua:
--------------------------------------------------------------------------------
  1 | -- debugger is a global variable so it can be accessed from everywhere
  2 | _, debugger = pcall(require,'fb.debugger') 
  3 | require'nn'
  4 | require'rnn'
  5 | require'cunn'
  6 | require'cudnn'
  7 | require'optim'
  8 | -- require'pretty-nn'
  9 | cudnn.benchmark = true
 10 | cudnn.fastest = true 
 11 | tds = require'tds'
 12 | logger = require'../../utils/logger.lua'
 13 | util = require'../../utils/util.lua'
 14 | optnet = require'optnet'
 15 | threads = require'threads'
 16 | threads.Threads.serialization('threads.sharedserialize')
 17 | 
 18 | strf = string.format
 19 | 
 20 | opt = require('./opt.lua')
 21 | for k, v in pairs(opt) do 
 22 |    if type(v) == 'boolean' then
 23 |       opt[k] = os.getenv(k) == nil and opt[k] or os.getenv(k) == 'true'
 24 |    elseif type(v) ~= 'table' then
 25 |       opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] 
 26 |    end
 27 | end
 28 | print(opt)
 29 | logger.init(opt)
 30 | logger.trace(xlua.table2string(opt, true))
 31 | 
 32 | local experiment_id = paths.basename(paths.cwd())
 33 | torch.manualSeed(opt.rnd_seed)
 34 | cutorch.manualSeed(opt.rnd_seed)
 35 | 
 36 | dataloader = require('./dataloader.lua')
 37 | local nepo, val_nepo, val_num_que = dataloader.init(opt, logger)
 38 | 
 39 | logger.info('Loading and initializing model from:'..opt.model)
 40 | local vqa = require(opt.model)
 41 | 
 42 | local model 
 43 | if opt.checkpoint:len() == 0 then
 44 |    model = vqa.new(opt, dataloader.getlookup())
 45 |    model = model:add(nn.LogSoftMax())
 46 |    local function ConvInit(name)
 47 |       for k,v in pairs(model:findModules(name)) do
 48 |          if cudnn.version >= 4000 then
 49 |             v.bias = nil
 50 |             v.gradBias = nil
 51 |          else
 52 |             v.bias:zero()
 53 |          end
 54 |       end
 55 |    end
 56 |    
 57 |    local function BNInit(name)
 58 |       for k,v in pairs(model:findModules(name)) do
 59 |          v.weight:normal()
 60 |          v.bias:zero()
 61 |       end
 62 |    end
 63 |    BNInit('cudnn.SpatialBatchNormalization')
 64 |    BNInit('nn.SpatialBatchNormalization')
 65 |    ConvInit('cudnn.SpatialConvolution')
 66 |    ConvInit('nn.SpatialConvolution')
 67 |    
 68 |    for k,v in pairs(model:findModules('nn.Linear')) do
 69 |       v.bias:zero()
 70 |    end
 71 | 
 72 |    model = model:cuda()
 73 |    local sample = {
 74 |       torch.zeros(opt.batch_size,opt.que_len):type('torch.CudaTensor'), 
 75 |       torch.zeros(opt.batch_size,opt.size_image, 14, 14):type('torch.CudaTensor')
 76 |    }
 77 |    optnet.optimizeMemory(model, sample, {inplace = false, mode = 'training'})  
 78 |    sample = nil
 79 | else
 80 |    model = torch.load(opt.checkpoint):cuda()
 81 |    logger.info('Preloaded from '..opt.checkpoint)
 82 | end
 83 | 
 84 | local start_iter = tonumber(opt.start_iter)
 85 | 
 86 | if opt.num_gpus > 1 then
 87 |    net = nn.DataParallelTable(1, true, true)
 88 |    net:threads(function() 
 89 |       require 'cudnn'  
 90 |       require 'rnn'
 91 |       cudnn.benchmark = true
 92 |       cudnn.fastest = true 
 93 |    end)   
 94 |    net:add(model, util.range(opt.num_gpus))
 95 | else
 96 |    net = model
 97 | end
 98 | net = net:cuda()
 99 | local criterion = nn[opt.criterion]():cuda()
100 | local val_criterion = criterion:clone():cuda()
101 | 
102 | logger.info('Network:\n')
103 | logger.info(tostring(net))
104 | logger.info('Epoch in '..nepo..' iterations')
105 | 
106 | local iter = tonumber(opt.start_iter) - 1
107 | local val_iter = 1
108 | 
109 | local weights, dw = net:getParameters()
110 | 
111 | local que, img, ans, img_map, is_val, outputs, prob, soft, ans_type
112 | 
113 | local val_predictions = torch.IntTensor(val_num_que):zero()
114 | local val_groundtruth = torch.IntTensor(val_num_que, 10):zero()
115 | local eval_pool = threads.Threads(1, function() require'torch' require'xlua' tds = require'tds' end)
116 | 
117 | local train = function(x)
118 |    if opt.repl then debugger.enter() end
119 | 
120 |    net:training()
121 |    net:zeroGradParameters()
122 |    dw:zero()
123 |    if x ~= weights then
124 |       weights:copy(x)
125 |    end
126 | 
127 |    que, img, ans, img_map = dataloader.getbatch(iter-opt.start_iter+1)
128 | 
129 |    que = que:cuda()
130 |    img = img:cuda()
131 |    ans = opt.criterion:find('Soft') and ans:cuda() or ans[{{},{1}}]:cuda()
132 | 
133 |    outputs = net:forward({que, img})
134 | 
135 |    local loss = criterion:forward(outputs, ans)
136 | 
137 |    local dloss = criterion:backward(outputs, ans)
138 | 
139 |    net:backward({que, img}, dloss)
140 | 
141 | 
142 |    if opt.gradclip > 0 then
143 |       net:gradParamClip(opt.gradclip)
144 |    end
145 | 
146 |    return loss, dw 
147 | end
148 | 
149 | local calculate_score
150 | 
151 | local eval = function(dt, iter) 
152 |    net:evaluate()
153 |    if opt.repl then debugger.enter() end
154 | 
155 |    local val_answertypes = tds.Hash()
156 |    local st = 1
157 |    local correct = 0
158 | 
159 |    if iter == nepo then
160 |       xlua.log('Evaluating on val...', 1)
161 |    end
162 |    for i=1, val_nepo do
163 |       xlua.progress(i, val_nepo, 2)
164 |       que, img, ans, soft, img_map, ans_type = dataloader.getvalbatch(val_iter)
165 |       val_iter = val_iter + 1
166 | 
167 |       -- batch normalization of batch of 1 is impossible
168 |       if que:size(1) == 1 then
169 |          que = que:repeatTensor(2,1)
170 |          ans = ans:repeatTensor(2,1)
171 |          if img:size():size() == 2 then
172 |             img = img:repeatTensor(2,1) 
173 |          else
174 |             img:repeatTensor(2,1,1,1)
175 |          end
176 |       end
177 | 
178 |       outputs = net:forward({que:cuda(), img:cuda()})
179 |       local _, pred = outputs:max(2)
180 | 
181 | 
182 |       val_predictions[{{st,st+pred:size(1)-1}}]:copy(pred:squeeze():int())
183 |       val_groundtruth[{{st,st+pred:size(1)-1}, {}}]:copy(ans[{{},{2,11}}]:int())
184 |       for k=1,pred:size(1) do 
185 |          val_answertypes[st+k-1] = ans_type[k]
186 |       end
187 |       st = st + pred:size(1)
188 |    end
189 | 
190 |    calculate_score(dt, iter, val_predictions:clone(), val_groundtruth:clone(), val_answertypes)
191 | end
192 | 
193 | calculate_score = function(dt, iter, pred, ans, ans_type)
194 | 
195 |    local work = function(iter, pred, ans, ans_type)
196 |    
197 |       local accQA = {}
198 |       local accAnsType = {}
199 |       for i=1, pred:size(1) do 
200 |          local gtAcc = {}
201 |          for j=1, 10 do
202 |             gtAnsDatum = ans[i][j]
203 |             local otherGTAns = {}
204 |             for k=1, 10 do
205 |                if k ~= j then
206 |                   table.insert(otherGTAns, ans[i][k])
207 |                end
208 |             end
209 |             local matchingAns = {}
210 |             for _,v in pairs(otherGTAns) do
211 |                if v == pred[i] then
212 |                   table.insert(matchingAns, v)
213 |                end
214 |             end
215 |             local acc = math.min(1, #matchingAns/3)
216 |             table.insert(gtAcc, acc)
217 |          end
218 |       
219 |          local s = 0
220 |          for _,v in pairs(gtAcc) do
221 |             s = s + v
222 |          end
223 |          avgGTAcc = s/#gtAcc
224 |       
225 |          table.insert(accQA, avgGTAcc)
226 |       
227 |          accAnsType[ans_type[i]] =  accAnsType[ans_type[i]] and  accAnsType[ans_type[i]] or {} 
228 |          table.insert(accAnsType[ans_type[i]], avgGTAcc)
229 |       end
230 |       
231 |       local s = 0
232 |       for _,v in pairs(accQA) do
233 |          s = s + v
234 |       end
235 |       local type_acc = {}
236 |       for ansType,arr in pairs(accAnsType) do
237 |          type_acc[ansType] = type_acc[ansType] and type_acc[ansType] or 0
238 |       
239 |          local s = 0
240 |          for _,v in pairs(arr) do
241 |             s = s + v
242 |          end
243 |          type_acc[ansType] = type_acc[ansType] + s/(#arr/.9246)
244 |       end
245 | 
246 |       local overall = 100*s/(#accQA/.9246)
247 | 
248 | 
249 |       return iter, overall, type_acc 
250 |    end
251 | 
252 |    local after_work = function(iter, overall, type_acc)
253 |       strf = string.format
254 | 
255 |       paths.mkdir(opt.log_dir .. opt.version .. '/')
256 | 
257 |       local acc_log = io.open(opt.log_dir .. opt.version .. '/acc_'..dt..'.csv', 'a')
258 |       acc_log:write(os.date('%Y-%m-%d_%H:%M:%S') .. '\t')
259 |       acc_log:write(iter .. '\t')
260 |       acc_log:write((iter/nepo) .. '\t')
261 |       acc_log:write(strf('%.2f\t', overall))
262 |       if opt.ans_type == 'all' then
263 |          acc_log:write(strf('%.2f\t', 100*type_acc['yes/no']))
264 |          acc_log:write(strf('%.2f\t', 100*type_acc['number']))
265 |          acc_log:write(strf('%.2f\n', 100*type_acc['other']))
266 |       else
267 |          acc_log:write(strf('%.2f\n', 100*type_acc[opt.ans_type]))
268 |       end
269 |       acc_log:close()
270 |       xlua.log(strf('Overall val accuracy at epoch %d %.2f%% ',iter/nepo,overall), 1)
271 |    end
272 | 
273 |    eval_pool:addjob(work, after_work, iter, pred, ans, ans_type)
274 | end
275 | 
276 | local get_val_loss = function()
277 |    que, img, ans, soft = dataloader.getvalbatch(val_iter)
278 |    val_iter = val_iter + 1
279 | 
280 |    net:evaluate()
281 |    outputs = net:forward({que:cuda(), img:cuda()})
282 |    net:training()
283 | 
284 |    ans = opt.criterion:find('Soft') and soft:cuda() or ans[{{},{1}}]:cuda()
285 | 
286 |    return val_criterion:forward(outputs, ans), {que,img,ans}
287 | end
288 | 
289 | local log_losses = function(dt, iter, train_loss, val_loss)
290 |    paths.mkdir(opt.log_dir .. opt.version .. '/')
291 | 
292 |    local loss_log = io.open(opt.log_dir .. opt.version .. '/losses_'..dt..'.csv', 'a')
293 |    loss_log:write(os.date('%Y%m%d_%H:%M:%S')..'\t')
294 |    loss_log:write(iter .. '\t')
295 |    loss_log:write(strf('%d',(iter/nepo)) .. '\t')
296 |    loss_log:write(strf('%.2f', train_loss) .. '\t')
297 |    loss_log:write(strf('%.2f', val_loss) .. '\n')
298 |    loss_log:close()
299 | end
300 | 
301 | local function deep_copy(tbl)
302 |    -- creates a copy of a network with new modules and the same tensors
303 |    local copy = {}
304 |    for k, v in pairs(tbl) do
305 |       if type(v) == 'table' then
306 |          copy[k] = deep_copy(v)
307 |       else
308 |          copy[k] = v
309 |       end
310 |    end
311 |    if torch.typename(tbl) then
312 |       torch.setmetatable(copy, torch.typename(tbl))
313 |    end
314 |    return copy
315 | end
316 | 
317 | local make_checkpoint = function(iter, optim_config)
318 |    logger.info('Saving weights to '..opt.save_dir)
319 |    paths.mkdir(opt.save_dir)
320 |    local fn = {experiment_id}
321 |    table.insert(fn, iter)
322 |    table.insert(fn, (iter/nepo)..'_epoch')
323 |    table.insert(fn,'('.. opt.version .. ')' )
324 |    table.insert(fn, os.date('_%Y%m%d_%H%M%S')..'.t7')
325 |    fn = table.concat(fn, '_')
326 | 
327 |    local copy = net:clone():float()
328 |    if torch.type(copy) == 'nn.DataParallelTable' then
329 |       copy = copy:get(1)
330 |    end
331 | 
332 |    torch.save(opt.save_dir .. fn, deep_copy(copy):float():clearState())
333 |    torch.save(opt.save_dir .. fn .. '_optim', optim_config)
334 | end
335 | 
336 | local optim_config 
337 | if opt.checkpoint:len() == 0 then
338 |    optim_config = {
339 |       learningRate = opt.learning_rate,
340 |       momentum = opt.momentum,
341 |       state = {},
342 |    }
343 | else
344 |    optim_config = torch.load(opt.checkpoint .. '_optim', optim_config)
345 | end
346 | 
347 | local val_loss = 0
348 | local dt = os.date('%Y%m%d_%H%M%S')
349 | local loss_avg = 0
350 | 
351 | if opt.repl then
352 |    local vocab = dataloader.getvocab()
353 |    local inv = util.tableinvert(vocab)
354 |    local suffix = '_'..(opt.train_on_val and 'withVAL' or 'noVAL')
355 |    suffix = suffix ..'_type_'..opt.ans_type
356 |    suffix = suffix ..'_'..opt.answer_count..'.json'
357 |    local ansmap = util.load_json(opt.ans_id2str..suffix)
358 |    que, img, ans, img_map = dataloader.getbatch(1)
359 |    local visdom = require'visdom'
360 |    local plot = visdom{server = opt.plot_server, port = opt.plot_port}
361 | 
362 |    local show_imgs = function(que, img, ans, img_map, index)
363 |       for i=1, 10 do 
364 |          local imgfn = '/temp/ilija/fast/ms_coco_images/'.. img_map[i]:split('%.')[1]..'.jpg'
365 |          local a = ansmap[tostring(ans[i][1][1])]
366 |          plot:image{
367 |             img      = image.load(imgfn),
368 |             options  = {
369 |                title   = imgfn .. '   ' .. util.word_ids_to_word(inv, que[i]),
370 |                caption = index..' INDEX '.. (a and a or ' is test set')
371 |             }
372 |          }
373 |       end
374 |    end
375 | 
376 |    show_imgs(que,img,ans,img_map,1)
377 |    debugger.enter()
378 | end
379 | 
380 | paths.mkdir(opt.log_dir .. opt.version .. '/')
381 | 
382 | local train_log = io.open(opt.log_dir .. opt.version .. '/train_losses_'..dt..'.csv', 'a')
383 | 
384 | while iter < opt.max_iter do
385 | 
386 |    iter = iter + 1
387 | 
388 |    if iter%20 == 0 then
389 |       collectgarbage()
390 |       train_log:flush()
391 |    end
392 | 
393 |    if iter/nepo == 30 then
394 |       optim_config.learningRate = opt.learning_rate/10
395 |    elseif iter/nepo == 60 then
396 |       optim_config.learningRate = opt.learning_rate/100
397 |    end
398 |    
399 |    local _, loss = optim.adam(train, weights, optim_config, optim_config.state) 
400 | 
401 |    loss = loss[1]
402 |    loss_avg = loss_avg ~= 0 and loss_avg*.95+loss*.05 or loss
403 | 
404 |    train_log:write(strf('%.2f\t%.2f\n',loss_avg, loss))
405 | 
406 |    xlua.log(strf('"%s" Epoch %.2f%% Training Loss: %.2f Validation Loss: %.2f', opt.version, 100.0*iter/nepo, loss_avg, val_loss), 3)
407 |    xlua.progress(iter, opt.max_iter, 4) 
408 | 
409 |    if iter%opt.log_loss_every == 0 or iter%nepo == 0 then
410 | 
411 |       if not opt.train_on_val then
412 |         val_loss = get_val_loss()
413 |       end
414 | 
415 |       log_losses(dt, iter, loss_avg, val_loss)
416 |    end
417 |    if iter/nepo > 1 and iter%(nepo/3) == 0 and eval_pool:hasjob() then
418 |       eval_pool:synchronize()
419 |    end
420 | 
421 |    if iter/nepo > opt.eval_after and iter%nepo == 0 then
422 |       if not opt.train_on_val and opt.eval then
423 |          eval(dt, iter)
424 |       end
425 |       if iter > opt.save_after then
426 |          make_checkpoint(iter, optim_config)
427 |       end
428 |    end
429 | end
430 | train_log:close()
431 | 


--------------------------------------------------------------------------------
/experiments/avg/valdataworker.lua:
--------------------------------------------------------------------------------
 1 | local bsz, epoch_size, que, ans, img, permutation, p, loaded 
 2 | local logger, img_dir
 3 | 
 4 | local _init = function(_bsz, _epoch_size, _logger, que_len, _img_dir)
 5 |    bsz = _bsz
 6 |    epoch_size = _epoch_size
 7 |    que = torch.FloatTensor(bsz, que_len)
 8 |    img = torch.FloatTensor(bsz, 2048, 14, 14)
 9 |    ans = torch.FloatTensor(bsz, 11)
10 |    soft = torch.FloatTensor(bsz, 10, 2)
11 |    loaded = {}
12 |    logger = _logger
13 |    img_dir = _img_dir
14 | end
15 | 
16 | local _dowork = function(questions, mapping, answers, soft_answers, cache, itensor, anstypes, iter)
17 | 
18 |    local t = sys.clock()
19 |    collectgarbage()
20 |    logger.trace('collectgarbage',(sys.clock()-t))
21 | 
22 |    if not answers then
23 |       ans = ans:zero()
24 |    end
25 | 
26 |    loaded = {}
27 |    que_types = {}
28 |    for i=1, bsz do
29 |       local index = (i + bsz*((iter-1)%epoch_size))
30 |       if index > questions:size(1) then
31 |          break
32 |       end
33 | 
34 |       que[i] = questions[index]
35 | 
36 |       img[i] = cache[mapping[index]] and itensor[cache[mapping[index]]] or torch.load(img_dir .. mapping[index]):decompress()
37 | 
38 |       if answers then
39 |          ans[i] = answers[index]
40 |       end
41 | 
42 |       if soft_answers then
43 |          soft[i] = soft_answers[index]
44 |       end
45 | 
46 |       if anstypes then
47 |          que_types[i] = anstypes[index]
48 |       end
49 | 
50 |       loaded[i] = mapping[index]
51 |    end
52 | 
53 |    if #loaded == 0 then return -1 end
54 | 
55 |    -- not enough for full batch
56 |    if #loaded < bsz then
57 |       return iter, que[{{1, #loaded}}]:clone(), img[{{1, #loaded}}]:clone(), ans[{{1, #loaded}}]:clone(), soft[{{1,#loaded}}]:clone(), tds.Hash(loaded), tds.Hash(que_types)
58 |    end
59 | 
60 |    logger.trace('_dowork',(sys.clock()-t))
61 |    return iter, que:clone(), img:clone(), ans:clone(), soft:clone(), tds.Hash(loaded), tds.Hash(que_types)
62 | end
63 | 
64 | return {
65 |    init = _init,
66 |    dowork = _dowork,
67 | }
68 | 


--------------------------------------------------------------------------------
/experiments/pool/classbalance_all.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/experiments/pool/classbalance_all.jpg


--------------------------------------------------------------------------------
/experiments/pool/clean.sh:
--------------------------------------------------------------------------------
1 | /bin/rm -r checkpoints/
2 | /bin/rm -r logs/
3 | 


--------------------------------------------------------------------------------
/experiments/pool/coverage_all.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/experiments/pool/coverage_all.jpg


--------------------------------------------------------------------------------
/experiments/pool/dataloader.lua:
--------------------------------------------------------------------------------
  1 | local data = require'./datapreprocess.lua'
  2 | -- data private variables
  3 | local _lookuptable
  4 | -- train_tid2img maps a question tensor index to an image filename
  5 | local train_questions, train_tid2img, train_answers
  6 | local cache, itensor, storage
  7 | local val_cache, val_itensor, val_storage
  8 | 
  9 | -- for debugging purposes
 10 | local vocab, train_tid2qid, test_tid2qid
 11 | -- test_answers only not nil when test is val set
 12 | local test_questions, test_tid2img, soft_test_answers, test_answers, test_tid2anstype
 13 | 
 14 | local threads = require'threads'
 15 | threads.Threads.serialization('threads.sharedserialize')
 16 | local valbatch
 17 | local permutation, epoch_size, val_epoch_size
 18 | local opt, logger
 19 | 
 20 | local buffer = {}
 21 | local in_buffer = tds.Vec()
 22 | local running = tds.Vec()
 23 | local last_iter = 0
 24 | local epoch = 0
 25 | 
 26 | local val_buffer = {}
 27 | local val_in_buffer = tds.Vec()
 28 | local val_running = tds.Vec()
 29 | local val_last_iter = 0
 30 | 
 31 | local _addwork = function(iter)
 32 | 
 33 |    local work = function(iter, in_buffer, running)
 34 |       if not running[iter%opt.buffer_size + 1] and not in_buffer[iter%opt.buffer_size + 1] then
 35 |          running[iter%opt.buffer_size + 1] = true
 36 |          return worker.dowork(train_questions, train_tid2img, train_answers, cache, itensor, permutation, iter)
 37 |       else -- buffer is full, must wait
 38 |          logger.warn('Train buffer is full at iteration ' .. iter)
 39 |          return -1
 40 |       end
 41 |    end
 42 |    local endcallback = function(iter, ...) 
 43 |       if iter ~= -1 then
 44 |          assert(not buffer[iter%opt.buffer_size + 1])
 45 |          buffer[iter%opt.buffer_size + 1] = {...} 
 46 |          in_buffer[iter%opt.buffer_size + 1] = true
 47 |          running[iter%opt.buffer_size + 1] = false
 48 |       end
 49 |    end
 50 | 
 51 |    last_iter = math.max(iter, last_iter)
 52 | 
 53 |    pool:addjob(work, endcallback, iter, in_buffer, running)
 54 | end
 55 | 
 56 | local _getbatch = function(iter)
 57 |    local t = sys.clock()
 58 | 
 59 |    if iter%epoch_size == 0 then
 60 |       while pool:hasjob() do
 61 |          pool:dojob()
 62 |       end
 63 |       permutation = torch.randperm(permutation:size(1))
 64 |       epoch = epoch + 1
 65 |       for i=1,opt.dataworkers-1 do 
 66 |          _addwork(iter + i)
 67 |       end
 68 |    end
 69 |    if (last_iter-(epoch*epoch_size)) + 1 <= epoch_size then
 70 |       _addwork(last_iter + 1)
 71 |    end
 72 | 
 73 |    local ind = iter%opt.buffer_size + 1
 74 |    local c = 0
 75 |    while not buffer[ind] do -- wait for this specific batch
 76 |       pool:dojob()
 77 |       c = c + 1
 78 |       if c > 10 then
 79 |          logger.warn('Stuck')
 80 |       end
 81 |    end
 82 |    if c > 1 then
 83 |      logger.trace('Got batch after '..c..' tries.')
 84 |    end
 85 |    local batch = buffer[ind]
 86 |    buffer[ind] = nil
 87 |    in_buffer[ind] = false
 88 |    logger.trace('dojob', (sys.clock()-t))
 89 |    t = sys.clock()
 90 |    logger.trace('_addwork', (sys.clock()-t))
 91 |    return table.unpack(batch)
 92 | end
 93 | 
 94 | local _addvalwork = function(iter)
 95 |    local work = function(iter, val_in_buffer, val_running) 
 96 |       if not val_running[iter%opt.val_buffer_size + 1] and not val_in_buffer[iter%opt.val_buffer_size + 1] then
 97 |          val_running[iter%opt.val_buffer_size + 1] = true
 98 |          return valworker.dowork(test_questions, test_tid2img, test_answers, soft_test_answers, val_cache, val_itensor, test_tid2anstype, iter)
 99 |       else
100 |          logger.warn('Val buffer is full at iteration '.. iter)
101 |          return -1
102 |       end
103 |    end
104 |    local endcallback = function(iter, ...) 
105 |       if iter ~= -1 then
106 |          assert(not val_buffer[iter%opt.val_buffer_size +1])
107 |          val_buffer[iter%opt.val_buffer_size + 1] = {...}
108 |          val_in_buffer[iter%opt.val_buffer_size + 1] = true
109 |          val_running[iter%opt.val_buffer_size + 1] = false 
110 |       end
111 |    end
112 | 
113 |    val_last_iter = math.max(iter, val_last_iter)
114 | 
115 |    valpool:addjob(work, endcallback, iter, val_in_buffer, val_running)
116 | end
117 | 
118 | local _getvalbatch = function(iter)
119 |    local t = sys.clock()
120 | 
121 |    _addvalwork(val_last_iter + 1)
122 | 
123 |    local ind = iter%opt.val_buffer_size + 1
124 |    local c = 0
125 |    while not val_buffer[ind] do -- wait for this specific batch
126 |       valpool:dojob()
127 |       c = c + 1
128 |       if c > 10 then
129 |          logger.warn('Stuck at val')
130 |       end
131 |    end
132 |    if c > 1 then
133 |      logger.trace('Got batch after '..c..' tries.')
134 |    end
135 |    local batch = val_buffer[ind]
136 |    val_buffer[ind] = nil
137 |    val_in_buffer[ind] = false
138 |    logger.trace('dojob', (sys.clock()-t))
139 |    t = sys.clock()
140 |    logger.trace('_addwork', (sys.clock()-t))
141 |    return table.unpack(batch)
142 | end
143 | 
144 | local _setvalworkers = function()
145 |    for i=1, opt.dataworkers do 
146 |       _addvalwork(i)
147 |    end
148 | end
149 | 
150 | local _getvocab = function()
151 |    return vocab
152 | end
153 | 
154 | local _getlookup = function()
155 |    return _lookuptable
156 | end
157 | 
158 | local _init = function(_opt, _logger)
159 |    opt = _opt 
160 |    logger = _logger
161 |    paths.mkdir('./data/')
162 | 
163 |    -- not used, better to leave it to the OS to do RAM caching 
164 |    cache = {} 
165 |    val_cache = {}
166 | 
167 |    local t = sys.clock()
168 |    local q_data = data.get_qdata()
169 |    local answers, soft_answers = data.get_answers()
170 |    _lookuptable = data.get_lookup()
171 | 
172 |    data.clean()
173 | 
174 |    train_questions = q_data.train_questions
175 |    train_tid2img = q_data.train_tid2img
176 |    train_tid2qid = q_data.train_tid2qid
177 | 
178 |    train_answers = (opt.criterion:find('Soft') and soft_answers or answers)['train_answers']
179 | 
180 |    vocab = q_data.vocab
181 | 
182 |    test_questions = q_data.test_questions
183 |    test_tid2img = q_data.test_tid2img
184 |    test_tid2qid = q_data.test_tid2qid
185 |    test_tid2anstype = q_data.test_tid2anstype
186 | 
187 |    test_answers = answers['test_answers']
188 |    soft_test_answers = soft_answers['test_answers']
189 | 
190 |    logger.info('Vocab size '..#vocab)
191 |    logger.info('Train questions '..train_questions:size(1))
192 |    logger.info('Test questions ' ..test_questions:size(1))
193 | 
194 |    permutation = torch.randperm(train_questions:size(1))
195 |    epoch_size = math.floor(train_questions:size(1)/opt.batch_size)
196 |    val_epoch_size = math.ceil(test_questions:size(1)/opt.val_batch_size)
197 | 
198 |    opt.ans_aug = opt.criterion:find('Soft') and opt.ans_aug or false
199 | 
200 |    pool = threads.Threads(opt.dataworkers, 
201 |             function(threadid) 
202 |                require'sys' 
203 |                require'torch' 
204 |                require'xlua'
205 |                require'cunn'
206 |                require'torchzlib'
207 |                tds = require'tds'
208 |             end, 
209 |             function() 
210 |                torch.manualSeed(opt.rnd_seed + __threadid)
211 |                cutorch.manualSeed(opt.rnd_seed + __threadid)
212 |                torch.setnumthreads(1)
213 |                worker = paths.dofile('./dataworker.lua') 
214 |                worker.init(opt.batch_size, epoch_size, logger, opt.que_len, opt.img_dir, opt.criterion:find('Soft'), opt.ans_aug)
215 |             end)
216 | 
217 |    valpool = threads.Threads(opt.val_dataworkers, 
218 |             function(threadid) 
219 |                require'sys' 
220 |                require'torch' 
221 |                require'xlua'
222 |                require'cunn'
223 |                require'torchzlib'
224 |                tds = require'tds'
225 |             end, 
226 |             function() 
227 |                torch.manualSeed(opt.rnd_seed + __threadid)
228 |                cutorch.manualSeed(opt.rnd_seed + __threadid)
229 |                torch.setnumthreads(1)
230 |                valworker = paths.dofile('./valdataworker.lua') 
231 |                valworker.init(opt.val_batch_size, val_epoch_size, logger, opt.que_len, opt.img_dir)
232 |             end)
233 | 
234 |    for iter=1, opt.dataworkers do 
235 |       _addwork(iter) 
236 |    end
237 |    for iter=1, opt.val_dataworkers do
238 |       _addvalwork(iter)
239 |    end
240 | 
241 |    logger.trace('init', (sys.clock()-t))
242 | 
243 |    opt.word2vec = '../../utils/glove.hash'
244 |    ltbl = data.get_lookup()
245 | 
246 |    collectgarbage()
247 |    return epoch_size, val_epoch_size, test_questions:size(1)
248 | end
249 | 
250 | return {
251 |    init = _init,
252 |    getbatch = _getbatch,
253 |    getvalbatch = _getvalbatch,
254 |    setvalworkers = _setvalworkers,
255 |    getlookup = _getlookup,
256 |    getvocab = _getvocab,
257 |    test_tid2qid = test_tid2qid,
258 |    train_tid2qid = train_tid2qid,
259 |    answer_data = data.answer_data,
260 | }
261 | 


--------------------------------------------------------------------------------
/experiments/pool/dataworker.lua:
--------------------------------------------------------------------------------
 1 | local bsz, epoch_size, que, ans, img, loaded, ans_aug 
 2 | local logger, img_dir
 3 | 
 4 | local _init = function(_bsz, _epoch_size, _logger, que_len, _img_dir, _soft_ans, _ans_aug)
 5 |    bsz = _bsz
 6 |    epoch_size = _epoch_size
 7 |    ans_aug = _ans_aug
 8 |    que = torch.FloatTensor(bsz, que_len)
 9 |    img = torch.FloatTensor(bsz, 2048, 14, 14)
10 |    if _soft_ans then
11 |       if ans_aug then
12 |          ans = torch.FloatTensor(bsz, 1, 2)
13 |       else
14 |          ans = torch.FloatTensor(bsz, 10, 2)
15 |       end
16 |    else
17 |       ans = torch.FloatTensor(bsz, 11)
18 |    end
19 | 
20 |    loaded = {}
21 |    logger = _logger
22 |    img_dir = _img_dir
23 | end
24 | 
25 | local _dowork = function(questions, mapping, answers, cache, itensor, permutation, iter)
26 | 
27 |    local t = sys.clock()
28 |    collectgarbage()
29 |    logger.trace('collectgarbage',(sys.clock()-t))
30 | 
31 |    assert(answers)
32 | 
33 |    loaded = {}
34 |    for i=1, bsz do
35 |       local index = permutation[(i + bsz*((iter-1)%epoch_size))]
36 | 
37 |       img[i] = cache[mapping[index]] and itensor[cache[mapping[index]]] or torch.load(img_dir .. mapping[index]):decompress()
38 | 
39 |       que[i] = questions[index]
40 |       if ans_aug then 
41 |          local k = 1
42 |          for i=2, 10 do 
43 |             if answers[index][i][1] == 0 then
44 |                k = i-1
45 |                break
46 |             end
47 |          end
48 |          print(k)
49 |          k = torch.random(k)
50 |          ans[i][1] = answers[index][k]
51 |       else
52 |          ans[i] = answers[index]
53 |       end
54 |       loaded[i] = mapping[index]
55 |    end
56 | 
57 |    logger.trace('_dowork',(sys.clock()-t))
58 |    return iter, que:clone(), img:clone(), ans:clone(), tds.Hash(loaded) 
59 | end
60 | 
61 | return {
62 |    init = _init,
63 |    dowork = _dowork,
64 | }
65 | 


--------------------------------------------------------------------------------
/experiments/pool/model.lua:
--------------------------------------------------------------------------------
  1 | local nninit = require 'nninit'
  2 | local M = {}
  3 | 
  4 | M.new = function(opt, ltbl)
  5 | 
  6 | 
  7 |    local lang = nn.Sequential()
  8 |       :add(ltbl)
  9 |       :add(nn.Dropout(opt.dropout, true))
 10 |       :add(cudnn[opt.activation](true)) 
 11 |       :add(cudnn.LSTM(opt.vec_len, opt.size_rnn, opt.size_rnn_layer, true)
 12 |          :init('weight', nninit.uniform, -0.08, 0.08)) 
 13 |       :add(nn.Select(2, -1))
 14 |       :add(cudnn.BatchNormalization(opt.size_rnn))
 15 | 
 16 |    local vision = nn.Sequential()
 17 |       :add(nn.Dropout(opt.dropout, true))
 18 |       :add(cudnn.SpatialConvolution(opt.size_image, opt.size_common, 1, 1)
 19 |          :init('weight', nninit.xavier, {dist='normal', gain=opt.img_activation:lower()}))
 20 |       :add(cudnn[opt.img_activation](true))
 21 | 
 22 |    local attention = nn.Sequential()
 23 |       :add(nn.ParallelTable()
 24 |          :add(nn.Sequential()
 25 |             :add(nn.Dropout(opt.dropout, true))
 26 |             :add(nn.Linear(opt.size_rnn, opt.size_common)
 27 |             :init('weight', nninit.xavier, {dist='normal', gain=opt.activation:lower()}))
 28 |             :add(cudnn[opt.activation](true))
 29 |             :add(nn.Replicate(14*14, 3))  
 30 |             :add(nn.Reshape(opt.size_common, 14, 14, true))
 31 |             )
 32 |          :add(vision))
 33 |       :add(nn.CMulTable())
 34 |       :add(cudnn.SpatialConvolution(opt.size_common, opt.glimpse, 1, 1)
 35 |          :init('weight', nninit.xavier, {dist='normal', gain=opt.img_activation:lower()}))
 36 |       :add(nn.View(opt.glimpse, 14*14))
 37 |       :add(nn.SplitTable(2)) -- split the attentions in separate tables
 38 |       :add(nn.ParallelTable()
 39 |          :add(nn.Sequential()
 40 |             :add(nn.SoftMax())
 41 |          )
 42 |          :add(nn.Sequential()
 43 |             :add(nn.SoftMax())
 44 |          )
 45 |       )
 46 | 
 47 |    local att_applier = nn.Sequential()
 48 |             :add(nn.NarrowTable(2, 2))
 49 |             :add(nn.ConcatTable()
 50 |                :add(nn.Sequential()
 51 |                   :add(nn.ParallelTable()
 52 |                      :add(nn.Identity())
 53 |                      :add(nn.Sequential()
 54 |                         :add(nn.SelectTable(1))
 55 |                         :add(nn.View(14*14, 1))
 56 |                      )
 57 |                   )
 58 |                   :add(nn.MM())
 59 |                   :add(nn.Squeeze())
 60 |                   :add(nn.Dropout(opt.dropout, true))
 61 |                   :add(nn.Linear(opt.size_image, opt.size_multi)
 62 |                   :init('weight', nninit.xavier, {dist='uniform', gain=opt.activation:lower()}))
 63 |                   :add(cudnn[opt.activation](true))
 64 |                )
 65 |                :add(nn.Sequential()
 66 |                   :add(nn.ParallelTable()
 67 |                      :add(nn.Identity())
 68 |                      :add(nn.Sequential()
 69 |                         :add(nn.SelectTable(2))
 70 |                         :add(nn.View(14*14, 1))
 71 |                      )
 72 |                   )
 73 |                   :add(nn.MM())
 74 |                   :add(nn.Squeeze())
 75 |                   :add(nn.Dropout(opt.dropout, true))
 76 |                   :add(nn.Linear(opt.size_image, opt.size_multi)
 77 |                   :init('weight', nninit.xavier, {dist='normal', gain=opt.activation:lower()}))
 78 |                   :add(cudnn[opt.activation](true))
 79 |                )
 80 |             )
 81 |             :add(nn.JoinTable(2))
 82 | 
 83 |    local model = nn.Sequential()
 84 |       :add(nn.ParallelTable()
 85 |          :add(lang) -- from question words to LSTM
 86 |          :add(nn.Identity()) -- no change to the image yet
 87 |       )
 88 |       :add(nn.ConcatTable() -- feed {que, img} to all members
 89 |          :add(nn.SelectTable(1)) -- pass only the question to be later used
 90 |          :add(nn.Sequential()
 91 |             :add(nn.SelectTable(2)) -- pass only the image to be later used
 92 |             :add(nn.Reshape(opt.size_image, 14*14, true))
 93 |          )
 94 |          -- calculate the attention
 95 |          :add(attention) -- input {que,img}, output: {att_w1,att_w2,...att_wN} 
 96 |       ) -- the output is {que, img, {att_w1,att_w2, ...}} 
 97 |       :add(nn.ConcatTable()
 98 |          :add(nn.Sequential() -- transform the question before multiplication
 99 |             :add(nn.SelectTable(1)) -- select just the que
100 |             :add(nn.Dropout(opt.dropout, true))
101 |             :add(nn.Linear(opt.size_rnn, opt.glimpse*opt.size_multi) -- TODO think what to do with multiple glimpse
102 |                :init('weight', nninit.xavier, {dist='normal', gain=opt.activation:lower()}))
103 |             :add(cudnn.BatchNormalization(opt.glimpse*opt.size_multi))
104 |             :add(cudnn[opt.activation](true))
105 |          ) 
106 |          :add(att_applier)  -- apply the attention to the image
107 |       ) -- the output now is {que, att_img}
108 |       -- multiply or concat the two and pass them to the classifier
109 |       :add(nn.CMulTable())  -- final multiplicaiton of the question and attended image
110 |       -- classifier starts
111 |       :add(nn.Dropout(opt.dropout, true))
112 |       :add(nn.Linear((opt.glimpse*opt.size_multi), opt.glimpse*opt.size_multi)
113 |          :init('weight', nninit.xavier, {dist='normal', gain=opt.activation:lower()}))-- 'cls output'
114 |       :add(cudnn.BatchNormalization(opt.size_multi*opt.glimpse))
115 |       :add(cudnn[opt.activation](true))
116 |       :add(nn.Dropout(opt.dropout, true))
117 |       :add(nn.Linear(opt.size_multi*opt.glimpse, opt.answer_count)
118 |       :init('weight', nninit.xavier, {dist='normal', gain='linear'}))
119 | 
120 | 
121 |    return model
122 | end
123 | 
124 | return M
125 | 


--------------------------------------------------------------------------------
/experiments/pool/opt.lua:
--------------------------------------------------------------------------------
  1 | return {
  2 |    -- model loading and saving
  3 |    model          = './model.lua',
  4 |    eval           = true,
  5 |    log_loss_every = 350,
  6 |    save_dir       = './checkpoints/', 
  7 |    save_after     = 999999, -- in iterations
  8 |    eval_after     = 1, -- in epochs 
  9 |    criterion      = 'SoftClassNLLCriterion',
 10 |    checkpoint     = '',
 11 |    start_iter     = 1,
 12 |    -- model definitions
 13 |    size_multi      = 1500,
 14 |    size_common     = 1500,
 15 |    size_classifier = 3000,
 16 |    size_image      = 2048,
 17 |    size_rnn        = 2400,
 18 |    size_rnn_layer  = 1,
 19 |    -- training related
 20 |    max_iter       = 432000, -- about 70 epochs
 21 |    learning_rate  = 1e-4,
 22 |    momentum       = 0.9,
 23 |    batch_size     = 64,
 24 |    val_batch_size = 96,
 25 |    gradclip       = 0, -- 0 means disabled
 26 |    dropout        = 0.5,
 27 |    rnn_dropout    = 0.0,
 28 |    activation     = 'Tanh',
 29 |    img_activation = 'Tanh',
 30 |    glimpse        = 2,
 31 |    showprogress  = true,
 32 |    repl          = false,
 33 |    -- logging related
 34 |    log_dir        = './logs/',
 35 |    log_level      = 3, -- 1:trace, 2:debug, 3:info, 4:warn, 5:error, 6:fatal
 36 |    log_to_console = true,
 37 |    log_to_file    = true,
 38 |    version = 'vanila',
 39 |    -- plot related
 40 |    showplot      = false,
 41 |    plot_every    = 1000,
 42 |    plot_server   = 'http://localhost',
 43 |    plot_port     = 8097,
 44 |    -- misc
 45 |    num_gpus = 1,
 46 |    rnd_seed = 139, 
 47 | 
 48 |    -- data related
 49 |    
 50 |    -- number of dataloading threads
 51 |    dataworkers = 4,
 52 |    buffer_size = 8,
 53 | 
 54 |    val_dataworkers = 2,
 55 |    val_buffer_size = 8,
 56 | 
 57 |    img_dir = '../../resnet_features/',
 58 | 
 59 |    que_train   = '../../vqa2_data/v2_OpenEnded_mscoco_train2014_questions.json',
 60 |    que_val     = '../../vqa2_data/v2_OpenEnded_mscoco_val2014_questions.json',
 61 |    -- change this to point to test std json when needed
 62 |    que_test    = '../../vqa2_data/v2_OpenEnded_mscoco_test-dev2015_questions.json',
 63 | 
 64 |    ans_train    = '../../vqa2_data/v2_mscoco_train2014_annotations.json',
 65 |    ans_val      = '../../vqa2_data/v2_mscoco_val2014_annotations.json',
 66 | 
 67 | 
 68 |    -- train on trainval or just train
 69 |    train_on_val = false,
 70 |    -- left aligned questions for MLP and right for RNN language model
 71 |    left_aligned = false,
 72 | 
 73 |    -- at least how many times should the word appear in train set to be in vocab
 74 |    word_freq = 1,
 75 |    que_len = 10,
 76 |    ans_type = 'all', -- yes-no number other 
 77 |    ans_aug = false,
 78 | 
 79 |    -- outputs file prefix, the full filename depends on the options
 80 |    train_questions    = './data/train_questions',
 81 |    train_tid2qid      = './data/train_tid2qid',
 82 |    train_tid2img      = './data/train_tid2img',
 83 | 
 84 |    test_questions     = './data/test_questions',
 85 |    test_tid2qid       = './data/test_tid2qid',
 86 |    test_tid2img       = './data/test_tid2img',
 87 |    test_tid2anstype   = './data/test_tid2anstype', 
 88 | 
 89 |    vocab          = './data/vocab', 
 90 | 
 91 |    lookuptable    = './data/lookup',
 92 | 
 93 |    -- which word2vec to use
 94 |    word2vec = '../../utils/fastText.hash',
 95 |    vec_len  = 300,
 96 | 
 97 |    answer_count = 3000,
 98 | 
 99 |    -- outputs file prefix, the full filename depends on the options
100 |    qid2ans      = './data/qid2ans',
101 |    qid2type     = './data/qid2type',
102 |    qid2anstype  = './data/qid2anstype',
103 |    ans_id2str   = './data/ans_id2str',
104 | 
105 |    -- outputs file prefix, the full filename depends on the options
106 |    train_answers = './data/train_answers',
107 |    test_answers = './data/test_answers',
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/experiments/pool/process_answers.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | import matplotlib
  3 | matplotlib.use('Agg')
  4 | import matplotlib.pyplot as plt
  5 | plt.ioff()
  6 | import argparse
  7 | import sys
  8 | import numpy as np
  9 | import ujson
 10 | import operator 
 11 | from time import strftime, localtime
 12 | 
 13 | def log(msg):
 14 |     msg = str(msg)
 15 |     log_prefix ='['+strftime("%H:%M:%S", localtime())+']  process_answers.py:\033[1;32m '
 16 |     print(log_prefix + msg + '\033[0m')
 17 | 
 18 | 
 19 | parser = argparse.ArgumentParser(description='Process answers')
 20 | parser.add_argument('--input_train')
 21 | parser.add_argument('--input_val')
 22 | parser.add_argument('--output_qid2ans')
 23 | parser.add_argument('--output_qid2type')
 24 | parser.add_argument('--output_qid2anstype')
 25 | parser.add_argument('--output_ans_id2str')
 26 | parser.add_argument('--ans_type')
 27 | parser.add_argument('--answer_count', type=int)
 28 | parser.add_argument('--train_on_val', type=bool)
 29 | args = parser.parse_args()
 30 | 
 31 | counts = {}
 32 | mapping = {}
 33 | allans = {}
 34 | answers = []
 35 | qid2ans = {}
 36 | ans2id = {}
 37 | qid2type = {}
 38 | qid2anstype = {}
 39 | 
 40 | log('Train on val is '+str(args.train_on_val))
 41 | log('Reading '+args.input_train)
 42 | data = ujson.load(open(args.input_train))
 43 | log('Processing '+args.input_train)
 44 | for answer in data['annotations']:
 45 |     if args.ans_type != 'all' and answer['answer_type'] != args.ans_type:
 46 |         continue
 47 |     if answer['answer_type'] == 'yes/no':
 48 |         skip = False
 49 |         for a in answer['answers']:
 50 |             if not (a['answer'] == 'no' or a['answer'] == 'yes'):
 51 |                 skip = True
 52 |                 break
 53 |         if skip:
 54 |             continue
 55 |     ans_str = answer['multiple_choice_answer']
 56 |     qid = answer['question_id']
 57 |     qid2type[qid] = answer['question_type']
 58 |     qid2anstype[qid] = answer['answer_type']
 59 |     mapping[ans_str] = mapping[ans_str] if mapping.has_key(ans_str) else []
 60 |     mapping[ans_str].append(qid)
 61 |     cnt = counts[ans_str] if counts.has_key(ans_str) else 0
 62 |     counts[ans_str] = cnt + 1
 63 |     allans[qid] = allans[qid] if allans.has_key(qid) else []
 64 |     for a in answer['answers']:
 65 |         allans[qid].append(a['answer'])
 66 | 
 67 | if args.train_on_val:
 68 |     log('Processing ' + args.input_val)
 69 |     data = ujson.load(open(args.input_val))
 70 |     for answer in data['annotations']:
 71 |         if args.ans_type != 'all' and  answer['answer_type'] != args.ans_type:
 72 |             continue
 73 |         ans_str = answer['multiple_choice_answer']
 74 |         qid = answer['question_id']
 75 |         qid2type[qid] = answer['question_type']
 76 |         qid2anstype[qid] = answer['answer_type']
 77 |         mapping[ans_str] = mapping[ans_str] if mapping.has_key(ans_str) else []
 78 |         mapping[ans_str].append(qid)
 79 |         cnt = counts[ans_str] if counts.has_key(ans_str) else 0
 80 |         counts[ans_str] = cnt + 1
 81 |         allans[qid] = allans[qid] if allans.has_key(qid) else []
 82 |         for a in answer['answers']:
 83 |             allans[qid].append(a['answer'])
 84 | 
 85 | sorted_counts = sorted(counts.items(), key=operator.itemgetter(1), reverse=True)
 86 | 
 87 | 
 88 | log('Preparing answer set.')
 89 | # get most frequent answers
 90 | for i in range(min(args.answer_count, len(sorted_counts))):
 91 |     ans_str = sorted_counts[i][0]
 92 |     answers.append(ans_str)
 93 |     ans2id[ans_str] = i + 1 # lua is 1-index 
 94 | 
 95 | 
 96 | 
 97 | answer_count = len(qid2type) # + 214354  # train + val, one per question not all 10
 98 | 
 99 | all_questions = 0
100 | # log(answer_count)
101 | # log(answers)
102 | if not args.train_on_val:
103 |     # process val answers
104 |     log('Processing '+args.input_val+' as test set.')
105 |     data = ujson.load(open(args.input_val))
106 |     for answer in data['annotations']:
107 |         if args.ans_type != 'all' and  answer['answer_type'] != args.ans_type:
108 |             continue
109 |         all_questions = all_questions + 1
110 |         ans_str = answer['multiple_choice_answer']
111 |         qid = answer['question_id']
112 |         qid2type[qid] = answer['question_type']
113 |         qid2anstype[qid] = answer['answer_type']
114 |         if mapping.has_key(ans_str):
115 |             mapping[ans_str].append(qid)
116 |             allans[qid] = allans[qid] if allans.has_key(qid) else []
117 |             for a in answer['answers']:
118 |                 allans[qid].append(a['answer'])
119 | 
120 | log('All questions: '+str(all_questions))
121 | log('Preparing mappings...')
122 | # make que_id to ans_id mapping
123 | for ans_str in answers:
124 |     for qid in mapping[ans_str]:
125 |         qid2ans[qid] = [ans2id[ans_str]] # first element is the MC answer
126 |         for a in allans[qid]:
127 |             if ans2id.has_key(a): 
128 |                 qid2ans[qid].append(ans2id[a])
129 |             else:
130 |                 qid2ans[qid].append(-1) # the answer is not in the most freq
131 | 
132 | id2ans = {}
133 | for k in ans2id.keys():
134 |     id2ans[ans2id[k]] = k
135 | 
136 | 
137 | log('qid2ans size: '+str(len(qid2ans)))
138 | log('qid2type size: '+str(len(qid2type)))
139 | log('qid2anstype size: '+str(len(qid2anstype)))
140 | log('id2ans size: '+str(len(id2ans)))
141 | 
142 | log('Saving data...')
143 | ujson.dump(qid2ans, open(args.output_qid2ans,'w'))
144 | ujson.dump(qid2type, open(args.output_qid2type,'w'))
145 | ujson.dump(qid2anstype, open(args.output_qid2anstype,'w'))
146 | ujson.dump(id2ans, open(args.output_ans_id2str,'w'))
147 | 
148 | 
149 | 
150 | # plot the percentange of questions vs number of answers
151 | 
152 | # all_answers = 443757 + 214354  # train + val, one per question not all 10
153 | all_answers = 0
154 | for i in xrange(len(sorted_counts)):
155 |     all_answers = all_answers + sorted_counts[i][1]
156 | c = 0
157 | 
158 | log('ALL answers: ' + str(all_answers))
159 | p = []
160 | for i in xrange(len(sorted_counts)):
161 |     c = c + sorted_counts[i][1]
162 |     p.append(100.*c/all_answers)
163 | 
164 | fig = plt.figure(figsize=(8,5))
165 | ax = fig.add_subplot(111)
166 | ax.plot(range(min(22000,len(answers))),p[:min(22000, len(answers))],label='VQAv2')
167 | ax.grid(True)
168 | ax.set_ylabel("Percentage of questions covered")
169 | ax.set_xlabel("Number of top K answers")
170 | gridlines = ax.get_xgridlines() + ax.get_ygridlines()
171 | for line in gridlines:
172 |     line.set_linewidth(0.5)
173 |     line.set_color('lightgray')
174 | ax.set_xlim((0,min(22000, len(answers))))
175 | # ax.xaxis.set_ticks(np.arange(0,min(22001, len(answers)), 1000))
176 | # ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: str(x/1000)))
177 | # ax.set_ylim((50,100))
178 | # ax.yaxis.set_ticks(np.arange(50,101, 5))
179 | ax.legend()
180 | plt.tight_layout()
181 | fig.savefig('coverage_'+args.ans_type.replace('/','-')+'.jpg')
182 | 
183 | h = []
184 | for i in xrange(min(args.answer_count, len(sorted_counts))):
185 |     c = sorted_counts[i][1]
186 |     h.append(100.*c/answer_count)
187 | 
188 | # plot pie of classes
189 | fig = plt.figure(figsize=(8,5))
190 | ax = fig.add_subplot(111)
191 | ax.pie(h)
192 | plt.tight_layout()
193 | fig.savefig('classbalance_'+args.ans_type.replace('/','-')+'.jpg')
194 | 


--------------------------------------------------------------------------------
/experiments/pool/run_cross_entropy.sh:
--------------------------------------------------------------------------------
1 | version=cross_entropy \
2 | criterion=CrossEntropyCriterion \
3 | CUDA_VISIBLE_DEVICES=$gpu \
4 | th train.lua
5 | 


--------------------------------------------------------------------------------
/experiments/pool/run_soft_cross_entropy.sh:
--------------------------------------------------------------------------------
1 | version=SoftCrossEntropy \
2 | CUDA_VISIBLE_DEVICES=$gpu \
3 | th train.lua
4 | 


--------------------------------------------------------------------------------
/experiments/pool/valdataworker.lua:
--------------------------------------------------------------------------------
 1 | local bsz, epoch_size, que, ans, img, permutation, p, loaded 
 2 | local logger, img_dir
 3 | 
 4 | local _init = function(_bsz, _epoch_size, _logger, que_len, _img_dir)
 5 |    bsz = _bsz
 6 |    epoch_size = _epoch_size
 7 |    que = torch.FloatTensor(bsz, que_len)
 8 |    img = torch.FloatTensor(bsz, 2048, 14, 14)
 9 |    ans = torch.FloatTensor(bsz, 11)
10 |    soft = torch.FloatTensor(bsz, 10, 2)
11 |    loaded = {}
12 |    logger = _logger
13 |    img_dir = _img_dir
14 | end
15 | 
16 | local _dowork = function(questions, mapping, answers, soft_answers, cache, itensor, anstypes, iter)
17 | 
18 |    local t = sys.clock()
19 |    collectgarbage()
20 |    logger.trace('collectgarbage',(sys.clock()-t))
21 | 
22 |    if not answers then
23 |       ans = ans:zero()
24 |    end
25 | 
26 |    loaded = {}
27 |    que_types = {}
28 |    for i=1, bsz do
29 |       local index = (i + bsz*((iter-1)%epoch_size))
30 |       if index > questions:size(1) then
31 |          break
32 |       end
33 | 
34 |       que[i] = questions[index]
35 | 
36 |       img[i] = cache[mapping[index]] and itensor[cache[mapping[index]]] or torch.load(img_dir .. mapping[index]):decompress()
37 | 
38 |       if answers then
39 |          ans[i] = answers[index]
40 |       end
41 | 
42 |       if soft_answers then
43 |          soft[i] = soft_answers[index]
44 |       end
45 | 
46 |       if anstypes then
47 |          que_types[i] = anstypes[index]
48 |       end
49 | 
50 |       loaded[i] = mapping[index]
51 |    end
52 | 
53 |    if #loaded == 0 then return -1 end
54 | 
55 |    -- not enough for full batch
56 |    if #loaded < bsz then
57 |       return iter, que[{{1, #loaded}}]:clone(), img[{{1, #loaded}}]:clone(), ans[{{1, #loaded}}]:clone(), soft[{{1,#loaded}}]:clone(), tds.Hash(loaded), tds.Hash(que_types)
58 |    end
59 | 
60 |    logger.trace('_dowork',(sys.clock()-t))
61 |    return iter, que:clone(), img:clone(), ans:clone(), soft:clone(), tds.Hash(loaded), tds.Hash(que_types)
62 | end
63 | 
64 | return {
65 |    init = _init,
66 |    dowork = _dowork,
67 | }
68 | 


--------------------------------------------------------------------------------
/image_preprocess/README.md:
--------------------------------------------------------------------------------
 1 | Follow the instructions at: [https://github.com/akirafukui/vqa-mcb/tree/master/preprocess](https://github.com/akirafukui/vqa-mcb/tree/master/preprocess) to obtain image tensor representations. Use the default configuration which should give you `2048x14x14` dimensional tensor for each image.  
 2 | Modify the file `extract_resnet.py` at line 70 to save the tensors as uncompress numpy array or run the script `uncompress.py` located in this directory to uncompress the arrays. 
 3 | Then run `convert_to_torch.lua` to convert the numpy tensors to compressed torch tensors. 
 4 | 
 5 | Why do we obtain the image tensors in this convoluted way? Well, for some reason the pre-trained ResNet-152 caffe model produces image features that are more sparse and thus when compressed take up about three times less space then the pre-trained ResNet-152 torch model.
 6 |  The train+val features obtained from caffe model are 28.3GB and can be easily cached in 64GB RAM memory which brings the dataloading down to 1ms. 
 7 | On the other hand, the same features obtained from the torch model are 83GB and cannot be fully cached in RAM, so the model often needs to read them from disk which makes the dataloading a performance bottleneck.
 8 | 
 9 | If you still want to use the torch features for some reason, then you can use the `extract_whole_image_features_compressed.lua` file to do so. 
10 | The only advantage is that the torch code is optimized, multi-threaded code that can use 3 GPUs to extract all features in less than one hour. 
11 | On the other hand, the caffe code on 3 GPUs takes about 15 hours. 
12 | 
13 | 
14 | In the end you should have `resnet_features` directory under the main `vqa-soft` directory, i.e.:
15 | ```
16 | vqa-soft/resnet_features/
17 | |-- test2015/
18 | |-- train2014/
19 | `-- val2014/
20 | 
21 | ```
22 | 
23 | 


--------------------------------------------------------------------------------
/image_preprocess/extract_whole_image_features_compressed.lua:
--------------------------------------------------------------------------------
  1 | require 'nn'
  2 | require 'cudnn'
  3 | cudnn.benchmark = true 
  4 | cudnn.fastest = true 
  5 | require 'cunn'
  6 | require 'image'
  7 | require 'torchzlib'
  8 | 
  9 | local cjson = require('cjson') 
 10 | local t = require './transforms.lua'
 11 | local tds = require 'tds'
 12 | local threads = require 'threads'
 13 | optnet = require'optnet'
 14 | util = require '../utils/util.lua'
 15 | threads.Threads.serialization('threads.sharedserialize')
 16 | 
 17 | opt = {
 18 |    input           = './train_val_unique_img_fn.json' 
 19 | 
 20 |    -- folder containing train2014, val2014, and test2015 directories 
 21 |    imgroot         = '/temp/ilija/fast/ms_coco_images/',   
 22 |    model           = '../resnet_models/resnet-152.t7',
 23 |    outdir          = '../resnet_features/trainval2014/', 
 24 |    imgsize         = 448,
 25 |    region          =  14,
 26 |    bsz             = 50,
 27 |    nthreads        =  10, 
 28 |    num_gpus        =   3,
 29 |    rnd_seed        = 139,
 30 |    compress_factor =   0,
 31 | }
 32 | for k, v in pairs(opt) do 
 33 |    opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] 
 34 | end
 35 | print(opt)
 36 | paths.mkdir(opt.outdir)
 37 | 
 38 | torch.manualSeed(opt.rnd_seed)
 39 | cutorch.manualSeed(opt.rnd_seed)
 40 | torch.setnumthreads(opt.nthreads)
 41 | 
 42 | print('Reading file:', opt.input)
 43 | local file = io.open(opt.input, 'r')
 44 | local text = file:read()
 45 | file:close()
 46 | jsondata = cjson.decode(text)
 47 | 
 48 | local done = tds.Hash()
 49 | for line in io.lines('all.txt') do
 50 |    done[line:split('%.')[1] .. '.jpg'] = true
 51 | end
 52 | 
 53 | local imgs = tds.Vec()
 54 | for k, v in pairs(jsondata) do
 55 |    -- if not done[k] then
 56 |        imgs:insert(k)
 57 |     -- end
 58 | end
 59 | print('Total images:', #imgs)
 60 | local meanstd = {
 61 |      mean = {
 62 |     0.48462227599918,
 63 |     0.45624044862054,
 64 |     0.40588363755159,
 65 |   },
 66 |   std = {
 67 |     0.22889466674951,
 68 |     0.22446679341259,
 69 |     0.22495548344775,
 70 |   }
 71 | }
 72 | 
 73 | local transform = t.Compose{
 74 |    -- t.Scale(opt.imgsize),
 75 |    t.ScaleEqual(opt.imgsize),
 76 |    -- t.ColorNormalize(meanstd),
 77 |    t.CenterCrop(opt.imgsize),
 78 | }
 79 | 
 80 | img = image.load(opt.imgroot .. 'train2014/COCO_train2014_000000009064.jpg')
 81 | print(img:mean())
 82 | img = transform(img)
 83 | print(img:mean())
 84 | image.save('img.jpg', img)
 85 | 
 86 | local initfn = function()
 87 |     require 'torch'
 88 |     require 'image'
 89 |     local tds = require 'tds'
 90 |     torch.setnumthreads(1)
 91 | end
 92 | 
 93 | local nimgs = #imgs
 94 | 
 95 | print(nimgs)
 96 | 
 97 | local model = torch.load(opt.model)
 98 | 
 99 | model:remove(11)
100 | model:remove(10)
101 | model:remove(9)
102 | model.modules[8].modules[3]:remove(3)
103 | model = model:cuda()
104 | print(model)
105 | 
106 | local sample = torch.zeros(2,3, opt.imgsize, opt.imgsize):cuda()
107 | optnet.optimizeMemory(model, sample, {mode='inference', inplace=true, reuseBuffers=true, removeGradParams=true})
108 | sample = nil
109 | 
110 | if opt.num_gpus > 1 then
111 |    net = nn.DataParallelTable(1) 
112 |       :add(model, util.range(opt.num_gpus))
113 |       :threads(function()
114 |          local cudnn = require 'cudnn'
115 |          cudnn.fastest = true 
116 |          cudnn.benchmark = true
117 |       end)
118 | else
119 |    net = model 
120 | end
121 | net = net:cuda()
122 | net:evaluate()
123 | 
124 | local compress_pool = threads.Threads(4, function() torch.setnumthreads(1) require'torch' require'torchzlib' end)
125 | -- a thread job
126 | add_compress_job = function(out, batchmap, compress_factor, outdir)
127 |     
128 |     -- executed on the thread's thread
129 |     local loadimgs = function (out, batchmap, compress_factor, outdir)
130 |        for k,v in pairs(batchmap) do
131 |           local ct = torch.CompressedTensor(out[k], compress_factor)
132 |           torch.save(outdir..v..'.t7z', ct)
133 |        end
134 |     end
135 |     
136 |     compress_pool:addjob(loadimgs, nil, out, batchmap, compress_factor, outdir)
137 | end
138 | 
139 | -- use pool to load images from disk, otherwise 70% of time is spent there
140 | local pool = threads.Threads(opt.nthreads, initfn)
141 | -- a thread job
142 | addjob = function()
143 |     
144 |     -- executed on the thread's thread
145 |     local loadimgs = function (imgs, opt, transform, batch_index)
146 |         collectgarbage()
147 |         local batch = torch.FloatTensor(opt.bsz, 3, opt.imgsize, opt.imgsize)
148 |         local batchmap = {}
149 |         local thind = 1
150 |         for ind=batch_index, batch_index + opt.bsz - 1 do 
151 |             if not imgs[ind] then break end
152 |             local dir
153 |             if imgs[ind]:find('test') then
154 |                dir = 'test2015/'
155 |             else
156 |                dir = (imgs[ind]:match('train') and '/train' or '/val') 
157 |                dir = dir .. '2014/'
158 |             end
159 |             local img = image.load(opt.imgroot .. dir  .. imgs[ind], 3, 'float')
160 |             img = transform(img)
161 |             batch[thind]:copy(img)
162 |             batchmap[thind] = imgs[ind]
163 |             thind = thind + 1
164 |         end
165 | 
166 |         if #batchmap == 0 then
167 |            return nil
168 |         else 
169 |            return batch[{{1,#batchmap}}], batchmap
170 |         end
171 |     end
172 |     
173 |     -- executed on the main thread
174 |     local endcallback = function(batch, batchmap)
175 | 
176 |        if batch then 
177 |           local out = net:forward(batch:cuda()):float()
178 | 
179 |           add_compress_job(out:clone(), batchmap, opt.compress_factor, opt.outdir)
180 | 
181 |           completed = completed + #batchmap
182 |           xlua.progress(completed, nimgs)
183 |           
184 |            -- -- add new jobs 
185 |            if batch_index < nimgs then
186 |                addjob()
187 |                batch_index = batch_index + opt.bsz
188 |            end
189 |        end
190 |     end
191 |     
192 |     pool:addjob(loadimgs, endcallback, imgs, opt, transform, batch_index)
193 | end
194 | 
195 | batch_index = 1
196 | completed = 0
197 | 
198 | while batch_index < nimgs do 
199 | 
200 |     if batch_index < nimgs then
201 |       for k=1, opt.nthreads do
202 |           addjob()
203 |           batch_index = batch_index + opt.bsz
204 |       end
205 |    end
206 | 
207 |     while completed < nimgs do
208 |         pool:dojob()
209 |     end
210 | end
211 | 
212 | print('Still writing')
213 | compress_pool:synchronize()
214 | 


--------------------------------------------------------------------------------
/image_preprocess/transforms.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Image transforms for data augmentation and input normalization
 10 | --
 11 | 
 12 | require 'image'
 13 | 
 14 | local M = {}
 15 | 
 16 | function M.Compose(transforms)
 17 |    return function(input)
 18 |       for _, transform in ipairs(transforms) do
 19 |          input = transform(input)
 20 |       end
 21 |       return input
 22 |    end
 23 | end
 24 | 
 25 | function M.ColorNormalize(meanstd)
 26 |    return function(img)
 27 |       img = img:clone()
 28 |       for i=1,3 do
 29 |          img[i]:add(-meanstd.mean[i])
 30 |          img[i]:div(meanstd.std[i])
 31 |       end
 32 |       return img
 33 |    end
 34 | end
 35 | 
 36 | -- Scales the smaller edge to size
 37 | function M.Scale(size, interpolation)
 38 |    interpolation = interpolation or 'bicubic'
 39 |    return function(input)
 40 |       local w, h = input:size(3), input:size(2)
 41 |       if (w <= h and w == size) or (h <= w and h == size) then
 42 |          return input
 43 |       end
 44 |       if w < h then
 45 |          return image.scale(input, size, h/w * size, interpolation)
 46 |       else
 47 |          return image.scale(input, w/h * size, size, interpolation)
 48 |       end
 49 |    end
 50 | end
 51 | 
 52 | function M.ScaleEqual(size, interpolation)
 53 |    interpolation = interpolation or 'bicubic'
 54 |    return function(input)
 55 |       return image.scale(input, size, size, interpolation)
 56 |    end
 57 | end
 58 | 
 59 | -- Crop to centered rectangle
 60 | function M.CenterCrop(size)
 61 |    return function(input)
 62 |       local w1 = math.ceil((input:size(3) - size)/2)
 63 |       local h1 = math.ceil((input:size(2) - size)/2)
 64 |       return image.crop(input, w1, h1, w1 + size, h1 + size) -- center patch
 65 |    end
 66 | end
 67 | 
 68 | -- Random crop form larger image with optional zero padding
 69 | function M.RandomCrop(size, padding)
 70 |    padding = padding or 0
 71 | 
 72 |    return function(input)
 73 |       if padding > 0 then
 74 |          local temp = input.new(3, input:size(2) + 2*padding, input:size(3) + 2*padding)
 75 |          temp:zero()
 76 |             :narrow(2, padding+1, input:size(2))
 77 |             :narrow(3, padding+1, input:size(3))
 78 |             :copy(input)
 79 |          input = temp
 80 |       end
 81 | 
 82 |       local w, h = input:size(3), input:size(2)
 83 |       if w == size and h == size then
 84 |          return input
 85 |       end
 86 | 
 87 |       local x1, y1 = torch.random(0, w - size), torch.random(0, h - size)
 88 |       local out = image.crop(input, x1, y1, x1 + size, y1 + size)
 89 |       assert(out:size(2) == size and out:size(3) == size, 'wrong crop size')
 90 |       return out
 91 |    end
 92 | end
 93 | 
 94 | -- Four corner patches and center crop from image and its horizontal reflection
 95 | function M.TenCrop(size)
 96 |    local centerCrop = M.CenterCrop(size)
 97 | 
 98 |    return function(input)
 99 |       local w, h = input:size(3), input:size(2)
100 | 
101 |       local output = {}
102 |       for _, img in ipairs{input, image.hflip(input)} do
103 |          table.insert(output, centerCrop(img))
104 |          table.insert(output, image.crop(img, 0, 0, size, size))
105 |          table.insert(output, image.crop(img, w-size, 0, w, size))
106 |          table.insert(output, image.crop(img, 0, h-size, size, h))
107 |          table.insert(output, image.crop(img, w-size, h-size, w, h))
108 |       end
109 | 
110 |       -- View as mini-batch
111 |       for i, img in ipairs(output) do
112 |          output[i] = img:view(1, img:size(1), img:size(2), img:size(3))
113 |       end
114 | 
115 |       return input.cat(output, 1)
116 |    end
117 | end
118 | 
119 | -- Resized with shorter side randomly sampled from [minSize, maxSize] (ResNet-style)
120 | function M.RandomScale(minSize, maxSize)
121 |    return function(input)
122 |       local w, h = input:size(3), input:size(2)
123 | 
124 |       local targetSz = torch.random(minSize, maxSize)
125 |       local targetW, targetH = targetSz, targetSz
126 |       if w < h then
127 |          targetH = torch.round(h / w * targetW)
128 |       else
129 |          targetW = torch.round(w / h * targetH)
130 |       end
131 | 
132 |       return image.scale(input, targetW, targetH, 'bicubic')
133 |    end
134 | end
135 | 
136 | -- Random crop with size 8%-100% and aspect ratio 3/4 - 4/3 (Inception-style)
137 | function M.RandomSizedCrop(size)
138 |    local scale = M.Scale(size)
139 |    local crop = M.CenterCrop(size)
140 | 
141 |    return function(input)
142 |       local attempt = 0
143 |       repeat
144 |          local area = input:size(2) * input:size(3)
145 |          local targetArea = torch.uniform(0.08, 1.0) * area
146 | 
147 |          local aspectRatio = torch.uniform(3/4, 4/3)
148 |          local w = torch.round(math.sqrt(targetArea * aspectRatio))
149 |          local h = torch.round(math.sqrt(targetArea / aspectRatio))
150 | 
151 |          if torch.uniform() < 0.5 then
152 |             w, h = h, w
153 |          end
154 | 
155 |          if h <= input:size(2) and w <= input:size(3) then
156 |             local y1 = torch.random(0, input:size(2) - h)
157 |             local x1 = torch.random(0, input:size(3) - w)
158 | 
159 |             local out = image.crop(input, x1, y1, x1 + w, y1 + h)
160 |             assert(out:size(2) == h and out:size(3) == w, 'wrong crop size')
161 | 
162 |             return image.scale(out, size, size, 'bicubic')
163 |          end
164 |          attempt = attempt + 1
165 |       until attempt >= 10
166 | 
167 |       -- fallback
168 |       return crop(scale(input))
169 |    end
170 | end
171 | 
172 | function M.HorizontalFlip(prob)
173 |    return function(input)
174 |       if torch.uniform() < prob then
175 |          input = image.hflip(input)
176 |       end
177 |       return input
178 |    end
179 | end
180 | 
181 | function M.Rotation(deg)
182 |    return function(input)
183 |       if deg ~= 0 then
184 |          input = image.rotate(input, (torch.uniform() - 0.5) * deg * math.pi / 180, 'bilinear')
185 |       end
186 |       return input
187 |    end
188 | end
189 | 
190 | -- Lighting noise (AlexNet-style PCA-based noise)
191 | function M.Lighting(alphastd, eigval, eigvec)
192 |    return function(input)
193 |       if alphastd == 0 then
194 |          return input
195 |       end
196 | 
197 |       local alpha = torch.Tensor(3):normal(0, alphastd)
198 |       local rgb = eigvec:clone()
199 |          :cmul(alpha:view(1, 3):expand(3, 3))
200 |          :cmul(eigval:view(1, 3):expand(3, 3))
201 |          :sum(2)
202 |          :squeeze()
203 | 
204 |       input = input:clone()
205 |       for i=1,3 do
206 |          input[i]:add(rgb[i])
207 |       end
208 |       return input
209 |    end
210 | end
211 | 
212 | local function blend(img1, img2, alpha)
213 |    return img1:mul(alpha):add(1 - alpha, img2)
214 | end
215 | 
216 | local function grayscale(dst, img)
217 |    dst:resizeAs(img)
218 |    dst[1]:zero()
219 |    dst[1]:add(0.299, img[1]):add(0.587, img[2]):add(0.114, img[3])
220 |    dst[2]:copy(dst[1])
221 |    dst[3]:copy(dst[1])
222 |    return dst
223 | end
224 | 
225 | function M.Saturation(var)
226 |    local gs
227 | 
228 |    return function(input)
229 |       gs = gs or input.new()
230 |       grayscale(gs, input)
231 | 
232 |       local alpha = 1.0 + torch.uniform(-var, var)
233 |       blend(input, gs, alpha)
234 |       return input
235 |    end
236 | end
237 | 
238 | function M.Brightness(var)
239 |    local gs
240 | 
241 |    return function(input)
242 |       gs = gs or input.new()
243 |       gs:resizeAs(input):zero()
244 | 
245 |       local alpha = 1.0 + torch.uniform(-var, var)
246 |       blend(input, gs, alpha)
247 |       return input
248 |    end
249 | end
250 | 
251 | function M.Contrast(var)
252 |    local gs
253 | 
254 |    return function(input)
255 |       gs = gs or input.new()
256 |       grayscale(gs, input)
257 |       gs:fill(gs[1]:mean())
258 | 
259 |       local alpha = 1.0 + torch.uniform(-var, var)
260 |       blend(input, gs, alpha)
261 |       return input
262 |    end
263 | end
264 | 
265 | function M.RandomOrder(ts)
266 |    return function(input)
267 |       local img = input.img or input
268 |       local order = torch.randperm(#ts)
269 |       for i=1,#ts do
270 |          img = ts[order[i]](img)
271 |       end
272 |       return img
273 |    end
274 | end
275 | 
276 | function M.ColorJitter(opt)
277 |    local brightness = opt.brightness or 0
278 |    local contrast = opt.contrast or 0
279 |    local saturation = opt.saturation or 0
280 | 
281 |    local ts = {}
282 |    if brightness ~= 0 then
283 |       table.insert(ts, M.Brightness(brightness))
284 |    end
285 |    if contrast ~= 0 then
286 |       table.insert(ts, M.Contrast(contrast))
287 |    end
288 |    if saturation ~= 0 then
289 |       table.insert(ts, M.Saturation(saturation))
290 |    end
291 | 
292 |    if #ts == 0 then
293 |       return function(input) return input end
294 |    end
295 | 
296 |    return M.RandomOrder(ts)
297 | end
298 | 
299 | return M
300 | 


--------------------------------------------------------------------------------
/loss_implementations/README.md:
--------------------------------------------------------------------------------
1 | Here you will find GPU implementations of the loss function in torch, pytorch, and caffe. 
2 | Follow the instructions in the framework's respective folders to install and use the loss function in your code. 
3 | 
4 | 


--------------------------------------------------------------------------------
/loss_implementations/caffe/README.md:
--------------------------------------------------------------------------------
1 | For caffe is easy, just two files need to be changed. 
2 | I modified the standard cross entropy loss function, but you can also rename and add it as separate loss function if you want to keep both versions of cross entropy.  
3 | 
4 | I used commit `df412ac0da3e2e7eb194f0c16842fd126496d90d`, so if you are using different version you might need to update the loss function code as well.
5 | 


--------------------------------------------------------------------------------
/loss_implementations/caffe/src/caffe/layers/softmax_loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <cfloat>
  3 | #include <vector>
  4 | 
  5 | #include "caffe/layers/softmax_loss_layer.hpp"
  6 | #include "caffe/util/math_functions.hpp"
  7 | 
  8 | namespace caffe {
  9 | 
 10 | template <typename Dtype>
 11 | void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
 12 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 13 |   LossLayer<Dtype>::LayerSetUp(bottom, top);
 14 |   LayerParameter softmax_param(this->layer_param_);
 15 |   softmax_param.set_type("Softmax");
 16 |   softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_param);
 17 |   softmax_bottom_vec_.clear();
 18 |   softmax_bottom_vec_.push_back(bottom[0]);
 19 |   softmax_top_vec_.clear();
 20 |   softmax_top_vec_.push_back(&prob_);
 21 |   softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
 22 | 
 23 |   has_ignore_label_ =
 24 |     this->layer_param_.loss_param().has_ignore_label();
 25 |   if (has_ignore_label_) {
 26 |     ignore_label_ = this->layer_param_.loss_param().ignore_label();
 27 |   }
 28 |   if (!this->layer_param_.loss_param().has_normalization() &&
 29 |       this->layer_param_.loss_param().has_normalize()) {
 30 |     normalization_ = this->layer_param_.loss_param().normalize() ?
 31 |                      LossParameter_NormalizationMode_VALID :
 32 |                      LossParameter_NormalizationMode_BATCH_SIZE;
 33 |   } else {
 34 |     normalization_ = this->layer_param_.loss_param().normalization();
 35 |   }
 36 | }
 37 | 
 38 | template <typename Dtype>
 39 | void SoftmaxWithLossLayer<Dtype>::Reshape(
 40 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 41 |   LossLayer<Dtype>::Reshape(bottom, top);
 42 |   softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
 43 |   softmax_axis_ =
 44 |       bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis());
 45 |   outer_num_ = bottom[0]->count(0, softmax_axis_);
 46 |   inner_num_ = bottom[0]->count(softmax_axis_ + 1);
 47 | 
 48 |   // XXX bottom[1] is the labels
 49 |   /* std::cout << "outer" <<  outer_num_ << std::endl; */
 50 |   /* std::cout << "inner" << inner_num_ << std::endl; */
 51 |   /* std::cout << "softmax" << softmax_axis_ << std::endl; */
 52 | 
 53 |   /* std::cout << "1" << bottom[1]->data_at(0,0,0,0) << std::endl; */
 54 |   /* std::cout << "2" << bottom[1]->data_at(0,1,0,0) << std::endl; */
 55 |   /* std::cout << "3" << bottom[1]->data_at(1,0,0,0) << std::endl; */
 56 |   /* std::cout << "4" << bottom[1]->data_at(1,1,0,0) << std::endl; */
 57 | 
 58 |   /* std::cout << std::endl; */
 59 | 
 60 | 
 61 |   /* CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) */
 62 |   /*     << "Number of labels must match number of predictions; " */
 63 |   /*     << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " */
 64 |   /*     << "label count (number of labels) must be N*H*W, " */
 65 |   /*     << "with integer values in {0, 1, ..., C-1}."; */
 66 |   if (top.size() >= 2) {
 67 |     // softmax output
 68 |     top[1]->ReshapeLike(*bottom[0]);
 69 |   }
 70 | }
 71 | 
 72 | template <typename Dtype>
 73 | Dtype SoftmaxWithLossLayer<Dtype>::get_normalizer(
 74 |     LossParameter_NormalizationMode normalization_mode, int valid_count) {
 75 |   Dtype normalizer;
 76 |   switch (normalization_mode) {
 77 |     case LossParameter_NormalizationMode_FULL:
 78 |       normalizer = Dtype(outer_num_ * inner_num_);
 79 |       break;
 80 |     case LossParameter_NormalizationMode_VALID:
 81 |       if (valid_count == -1) {
 82 |         normalizer = Dtype(outer_num_ * inner_num_);
 83 |       } else {
 84 |         normalizer = Dtype(valid_count);
 85 |       }
 86 |       break;
 87 |     case LossParameter_NormalizationMode_BATCH_SIZE:
 88 |       normalizer = Dtype(outer_num_);
 89 |       break;
 90 |     case LossParameter_NormalizationMode_NONE:
 91 |       normalizer = Dtype(1);
 92 |       break;
 93 |     default:
 94 |       LOG(FATAL) << "Unknown normalization mode: "
 95 |           << LossParameter_NormalizationMode_Name(normalization_mode);
 96 |   }
 97 |   // Some users will have no labels for some examples in order to 'turn off' a
 98 |   // particular loss in a multi-task setup. The max prevents NaNs in that case.
 99 |   return std::max(Dtype(1.0), normalizer);
100 | }
101 | 
102 | template <typename Dtype>
103 | void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
104 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
105 |   // The forward pass computes the softmax prob values.
106 |   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
107 |   const Dtype* prob_data = prob_.cpu_data();
108 |   const Dtype* label = bottom[1]->cpu_data();
109 |   int dim = prob_.count() / outer_num_;
110 |   int count = 0;
111 |   Dtype loss = 0;
112 |   for (int i = 0; i < outer_num_; ++i) {
113 |     for (int j = 0; j < inner_num_; j++) {
114 |       const int label_value = static_cast<int>(label[i * inner_num_ + j]);
115 |       if (has_ignore_label_ && label_value == ignore_label_) {
116 |         continue;
117 |       }
118 |       DCHECK_GE(label_value, 0);
119 |       DCHECK_LT(label_value, prob_.shape(softmax_axis_));
120 |       loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j],
121 |                            Dtype(FLT_MIN)));
122 |       ++count;
123 |     }
124 |   }
125 |   top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
126 |   if (top.size() == 2) {
127 |     top[1]->ShareData(prob_);
128 |   }
129 | }
130 | 
131 | template <typename Dtype>
132 | void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
133 |     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
134 |   if (propagate_down[1]) {
135 |     LOG(FATAL) << this->type()
136 |                << " Layer cannot backpropagate to label inputs.";
137 |   }
138 |   if (propagate_down[0]) {
139 |     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
140 |     const Dtype* prob_data = prob_.cpu_data();
141 |     caffe_copy(prob_.count(), prob_data, bottom_diff);
142 |     const Dtype* label = bottom[1]->cpu_data();
143 |     int dim = prob_.count() / outer_num_;
144 |     int count = 0;
145 |     for (int i = 0; i < outer_num_; ++i) {
146 |       for (int j = 0; j < inner_num_; ++j) {
147 |         const int label_value = static_cast<int>(label[i * inner_num_ + j]);
148 |         if (has_ignore_label_ && label_value == ignore_label_) {
149 |           for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
150 |             bottom_diff[i * dim + c * inner_num_ + j] = 0;
151 |           }
152 |         } else {
153 |           bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
154 |           ++count;
155 |         }
156 |       }
157 |     }
158 |     // Scale gradient
159 |     Dtype loss_weight = top[0]->cpu_diff()[0] /
160 |                         get_normalizer(normalization_, count);
161 |     caffe_scal(prob_.count(), loss_weight, bottom_diff);
162 |   }
163 | }
164 | 
165 | #ifdef CPU_ONLY
166 | STUB_GPU(SoftmaxWithLossLayer);
167 | #endif
168 | 
169 | INSTANTIATE_CLASS(SoftmaxWithLossLayer);
170 | REGISTER_LAYER_CLASS(SoftmaxWithLoss);
171 | 
172 | }  // namespace caffe
173 | 


--------------------------------------------------------------------------------
/loss_implementations/caffe/src/caffe/layers/softmax_loss_layer.cu:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <cfloat>
  3 | #include <vector>
  4 | 
  5 | #include "caffe/layers/softmax_loss_layer.hpp"
  6 | #include "caffe/util/math_functions.hpp"
  7 | 
  8 | namespace caffe {
  9 | 
 10 | template <typename Dtype>
 11 | __global__ void SoftmaxLossForwardGPU(const int nthreads,
 12 |           const Dtype* prob_data, const Dtype* label, Dtype* loss,
 13 |           const int num, const int dim, const int spatial_dim,
 14 |           const bool has_ignore_label_, const int ignore_label_,
 15 |           Dtype* counts) {
 16 | 
 17 |    // XXX notice this 
 18 |   CUDA_KERNEL_LOOP(index, nthreads) {
 19 | 
 20 |      // XXX
 21 |      // data_at(0,0,0,0) is the class index
 22 |      // data_at(0,1,0,0) is the weight
 23 | 
 24 | 
 25 | 
 26 |     const int n = index / spatial_dim;
 27 |     const int s = index % spatial_dim;
 28 |     const int target = n * spatial_dim + s;
 29 |     int j;
 30 |     loss[index] = 0;
 31 |     for(j = 0; j < 10; j+=1) {
 32 |       const int label_value      = static_cast<int>(label[target*10*2 + j*2 + 0]);
 33 |       const float weight_value = static_cast<float>(label[target*10*2 + j*2 + 1]);
 34 | 
 35 |       if(label_value >= 0 && weight_value > 0) { 
 36 |          loss[index] -= weight_value * log(max(prob_data[n * dim + label_value * spatial_dim + s], Dtype(FLT_MIN)));
 37 |       }
 38 |     }
 39 |     counts[index] = 1;
 40 |   }
 41 | }
 42 | 
 43 | template <typename Dtype>
 44 | void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
 45 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 46 |   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
 47 |   const Dtype* prob_data = prob_.gpu_data();
 48 |   const Dtype* label = bottom[1]->gpu_data();
 49 |   const Dtype* lcpu = bottom[1]->cpu_data();
 50 |   const int dim = prob_.count() / outer_num_;
 51 |   const int nthreads = outer_num_ * inner_num_;
 52 | 
 53 | 
 54 |   /* int index; */
 55 |   /* int spatial_dim = inner_num_; */
 56 | 
 57 |   /* for(index=0; index<2; ++index){ */
 58 |   /*   const int n = index / spatial_dim; */
 59 |   /*   const int s = index % spatial_dim; */
 60 |   /*   const int target = n * spatial_dim + s; */
 61 |   /*   int j; */
 62 | 
 63 |   /*   for(j = 0; j < 10; j+=1) { */
 64 |   /*     std::cout << "weight_value " << weight_value << "label_value " << label_value << std::endl; */
 65 |   /*   } */
 66 |   /* } */
 67 | 
 68 | 
 69 |   /* std::cout<< "inn " << inner_num_ << std::endl; */
 70 |   /* std::cout<< "out " << outer_num_ << std::endl; */
 71 | 
 72 | 
 73 |   /* for(int i=0; i<4; ++i) { */
 74 |   /*    std::cout <<"at i: "<<lcpu[i] << std::endl; */
 75 |   /* } */
 76 | 
 77 |   // Since this memory is not used for anything until it is overwritten
 78 |   // on the backward pass, we use it here to avoid having to allocate new GPU
 79 |   // memory to accumulate intermediate results in the kernel.
 80 |   Dtype* loss_data = bottom[0]->mutable_gpu_diff();
 81 |   // Similarly, this memory is never used elsewhere, and thus we can use it
 82 |   // to avoid having to allocate additional GPU memory.
 83 |   Dtype* counts = prob_.mutable_gpu_diff();
 84 |   // NOLINT_NEXT_LINE(whitespace/operators)
 85 |   SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
 86 |       CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,
 87 |       outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
 88 |   Dtype loss;
 89 |   caffe_gpu_asum(nthreads, loss_data, &loss);
 90 |   Dtype valid_count = -1;
 91 |   // Only launch another CUDA kernel if we actually need the count of valid
 92 |   // outputs.
 93 |   if (normalization_ == LossParameter_NormalizationMode_VALID &&
 94 |       has_ignore_label_) {
 95 |     caffe_gpu_asum(nthreads, counts, &valid_count);
 96 |   }
 97 |   top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_,
 98 |                                                         valid_count);
 99 |   if (top.size() == 2) {
100 |     top[1]->ShareData(prob_);
101 |   }
102 | }
103 | 
104 | template <typename Dtype>
105 | __global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top,
106 |           const Dtype* label, Dtype* bottom_diff, const int num, const int dim,
107 |           const int spatial_dim, const bool has_ignore_label_,
108 |           const int ignore_label_, Dtype* counts) {
109 | 
110 |   CUDA_KERNEL_LOOP(index, nthreads) {
111 |     const int n = index / spatial_dim;
112 |     const int s = index % spatial_dim;
113 |     const int target = n * spatial_dim + s;
114 |     int j;
115 | 
116 |     for (j = 0; j < 10; j+=1) {
117 |       const int label_value      = static_cast<int>(label[target*10*2 + j*2 + 0]);
118 |       const float weight_value = static_cast<float>(label[target*10*2 + j*2 + 1]);
119 | 
120 |       if (label_value >= 0 && weight_value > 0) { 
121 |          bottom_diff[n * dim + label_value * spatial_dim + s] -= weight_value;
122 |       }
123 |     }
124 |     counts[index] = 1;
125 |   }
126 | }
127 | 
128 | template <typename Dtype>
129 | void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
130 |     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
131 |   if (propagate_down[1]) {
132 |     LOG(FATAL) << this->type()
133 |                << " Layer cannot backpropagate to label inputs.";
134 |   }
135 |   if (propagate_down[0]) {
136 |     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
137 |     const Dtype* prob_data = prob_.gpu_data();
138 |     const Dtype* top_data = top[0]->gpu_data();
139 |     caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);
140 |     const Dtype* label = bottom[1]->gpu_data();
141 |     const int dim = prob_.count() / outer_num_;
142 |     const int nthreads = outer_num_ * inner_num_;
143 |     // Since this memory is never used for anything else,
144 |     // we use to to avoid allocating new GPU memory.
145 |     Dtype* counts = prob_.mutable_gpu_diff();
146 |     // NOLINT_NEXT_LINE(whitespace/operators)
147 |     SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
148 |         CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,
149 |         outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
150 | 
151 |     Dtype valid_count = -1;
152 |     // Only launch another CUDA kernel if we actually need the count of valid
153 |     // outputs.
154 |     if (normalization_ == LossParameter_NormalizationMode_VALID &&
155 |         has_ignore_label_) {
156 |       caffe_gpu_asum(nthreads, counts, &valid_count);
157 |     }
158 |     const Dtype loss_weight = top[0]->cpu_diff()[0] /
159 |                               get_normalizer(normalization_, valid_count);
160 |     caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);
161 |   }
162 | }
163 | 
164 | INSTANTIATE_LAYER_GPU_FUNCS(SoftmaxWithLossLayer);
165 | 
166 | }  // namespace caffe
167 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/README.md:
--------------------------------------------------------------------------------
 1 | I'm not familiar with pytorch framework architecture and I probably have modified more files than I needed. 
 2 | I have used version v0.1.12 commit `ccd5f4dbfcf8ba4d5903a5b57f0200742833dd54`. 
 3 | 
 4 | `git status` says I have modified the following files: 
 5 | 
 6 | Note that I didn't add new loss function as in torch, but just modified the standard NLL criterion to behave like soft NLL. You can rename the loss function if you want to keep both criterions.
 7 | 
 8 | ```
 9 | On branch master
10 | Your branch and 'origin/master' have diverged,
11 | and have 3 and 486 different commits each, respectively.
12 |   (use "git pull" to merge the remote branch into yours)
13 | 
14 | Changes not staged for commit:
15 |   (use "git add <file>..." to update what will be committed)
16 |   (use "git checkout -- <file>..." to discard changes in working directory)
17 | 
18 | 	modified:   torch/lib/THC/generic/THCTensor.c
19 | 	modified:   torch/lib/THCUNN/ClassNLLCriterion.cu
20 | 	modified:   torch/lib/THCUNN/generic/ClassNLLCriterion.cu
21 | 	modified:   torch/lib/THCUNN/generic/THCUNN.h
22 | 	modified:   torch/lib/THNN/generic/THNN.h
23 | 	modified:   torch/lib/THNN/init.c
24 | 	modified:   torch/nn/_functions/thnn/auto.py
25 | 	modified:   torch/nn/functional.py
26 | 	modified:   torch/nn/modules/__init__.py
27 | 	modified:   torch/nn/modules/loss.py
28 | 	modified:   torch/utils/serialization/read_lua_file.py
29 | 
30 | Untracked files:
31 |   (use "git add <file>..." to include in what will be committed)
32 | 
33 | 	torch/csrc/generated/
34 | 	torch/lib/THCUNN/SoftClassNLLCriterion.cu
35 | 	torch/lib/THCUNN/generic/SoftClassNLLCriterion.cu
36 | 	torch/lib/THNN/generic/SoftClassNLLCriterion.c
37 | 
38 | no changes added to commit (use "git add" and/or "git commit -a")
39 | ```
40 | 
41 | You can find the modified and the new files in the respective folders. The new files are actually not needed since in the end I just modified the normal ClassNLLCriterion to behave as SoftClassNLLCriterion. 
42 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THCUNN/ClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #include "THCUNN.h"
  2 | #include "common.h"
  3 | #include "THCHalf.h"
  4 | #include "THCHalfAutoNumerics.cuh"
  5 | 
  6 | #include <stdio.h>
  7 | #include <assert.h>
  8 | 
  9 | static const int NTHREADS = 32;
 10 | 
 11 | template <typename Dtype>
 12 | __global__ void cunn_ClassNLLCriterion_updateOutput_kernel1(Dtype *output,
 13 |                                                            Dtype *total_weight,
 14 |                                                            Dtype *input,
 15 |                                                            THCIndex_t  *target,
 16 |                                                            Dtype *weights,
 17 |                                                            int size_average,
 18 |                                                            int n_classes) {
 19 |   assert(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
 20 | 
 21 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 22 |   // updateOutput_kernel.
 23 | 
 24 |   int t = (int)*target - TH_INDEX_BASE;
 25 |   assert(t >= 0 && t < n_classes);
 26 |   Dtype cur_weight = weights ? weights[t] : ScalarConvert<int, Dtype>::to(1);
 27 |   *output = -cur_weight * input[t];
 28 |   *total_weight = cur_weight;
 29 |   if (size_average && *total_weight > 0) {
 30 |     *output /= *total_weight;
 31 |   }
 32 | }
 33 | 
 34 | template <typename Dtype, typename Acctype>
 35 | __global__ void cunn_ClassNLLCriterion_updateOutput_kernel(Dtype *output,
 36 |                                                            Dtype *total_weight,
 37 |                                                            Dtype *input,
 38 |                                                            THCIndex_t *target,
 39 |                                                            Dtype *weights,
 40 |                                                            int size_average,
 41 |                                                            int nframe,
 42 |                                                            int ndim,
 43 |                                                            int n_classes) {
 44 |   __shared__ Acctype shInputs[NTHREADS], acc_weight[NTHREADS];
 45 |   int i,j, t;
 46 |   Dtype cur_weight;
 47 | 
 48 |   shInputs[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 49 |   acc_weight[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 50 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
 51 |       for (j = 0; j < 10; j +=1) {
 52 |           t = (int) target[i * 10   + j] - TH_INDEX_BASE;
 53 |           if (t >= 0) {
 54 |                assert(t < n_classes);
 55 |                cur_weight = weights[i * 10 + j];
 56 |                shInputs[threadIdx.x] -= input[i * ndim + t] * cur_weight;
 57 |           }
 58 |       }
 59 |       acc_weight[threadIdx.x] += ScalarConvert<int, Dtype>::to(1);
 60 |   }
 61 |   __syncthreads();
 62 | 
 63 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 64 |   // updateOutput_kernel
 65 | 
 66 |   if (threadIdx.x == 0) {
 67 |     *output = *total_weight = ScalarConvert<int, Dtype>::to(0);
 68 |     Acctype outputAcc = 0;
 69 |     Acctype total_weightAcc = 0;
 70 |     for (i = 0; i < NTHREADS; ++i){
 71 |       // FIXME should we do somethigng here
 72 |       outputAcc += shInputs[i];
 73 |       total_weightAcc += acc_weight[i];
 74 |     }
 75 |     *total_weight = ScalarConvert<Acctype, Dtype>::to(total_weightAcc);
 76 |     *output = ScalarConvert<Acctype, Dtype>::to(outputAcc);
 77 |     if (size_average && *total_weight > 0) {
 78 |       *output = ScalarConvert<Acctype, Dtype>::to(outputAcc / total_weightAcc);
 79 |     }
 80 | 
 81 |   }
 82 | }
 83 | 
 84 | template <typename Dtype>
 85 | __global__ void cunn_ClassNLLCriterion_updateGradInput_kernel1(
 86 |   Dtype* gradInput,
 87 |   Dtype* weights,
 88 |   THCIndex_t* target,
 89 |   Dtype* total_weight,
 90 |   int size_average,
 91 |   int n_classes)
 92 | {
 93 |   if (*total_weight <= 0) {
 94 |     return;
 95 |   }
 96 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
 97 |   int t = (int)*target - TH_INDEX_BASE;
 98 |   assert(t >= 0 && t < n_classes);
 99 |   gradInput[t] = -(weights ? weights[t] : ScalarConvert<int, Dtype>::to(1)) * norm;
100 | }
101 | 
102 | template <typename Dtype>
103 | __global__ void cunn_ClassNLLCriterion_updateGradInput_kernel(
104 |   Dtype *gradInput,
105 |   THCIndex_t *target,
106 |   Dtype *weights,
107 |   Dtype *total_weight,
108 |   int size_average,
109 |   int nframe,
110 |   int ndim,
111 |   int n_classes)
112 | {
113 |   if (*total_weight <= 0) {
114 |     return;
115 |   }
116 |   int i,j, t;
117 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
118 | 
119 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
120 |       for (j = 0; j < 10; ++j) {
121 |           t = (int) target[i * 10 + j] - TH_INDEX_BASE;
122 |           if (t >= 0) {
123 |             assert(t < n_classes);
124 |             gradInput[i * ndim + t] = -weights[i * 10 + j];
125 |           }
126 |       }
127 |   }
128 | }
129 | 
130 | #include "generic/ClassNLLCriterion.cu"
131 | #include "THCGenerateFloatTypes.h"
132 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THCUNN/SoftClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #include "THCUNN.h"
  2 | #include "common.h"
  3 | #include "THCHalf.h"
  4 | #include "THCHalfAutoNumerics.cuh"
  5 | 
  6 | #include <stdio.h>
  7 | #include <assert.h>
  8 | 
  9 | static const int NTHREADS = 32;
 10 | 
 11 | template <typename Dtype>
 12 | __global__ void cunn_SoftClassNLLCriterion_updateOutput_kernel1(Dtype *output,
 13 |                                                            Dtype *total_weight,
 14 |                                                            Dtype *input,
 15 |                                                            THCIndex_t  *target,
 16 |                                                            Dtype *weights,
 17 |                                                            int size_average,
 18 |                                                            int n_classes) {
 19 |   assert(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
 20 | 
 21 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 22 |   // updateOutput_kernel.
 23 | 
 24 |   int t = (int)*target - TH_INDEX_BASE;
 25 |   assert(t >= 0 && t < n_classes);
 26 |   Dtype cur_weight = weights ? weights[t] : ScalarConvert<int, Dtype>::to(1);
 27 |   *output = -cur_weight * input[t];
 28 |   *total_weight = cur_weight;
 29 |   if (size_average && *total_weight > 0) {
 30 |     *output /= *total_weight;
 31 |   }
 32 | }
 33 | 
 34 | template <typename Dtype, typename Acctype>
 35 | __global__ void cunn_SoftClassNLLCriterion_updateOutput_kernel(Dtype *output,
 36 |                                                            Dtype *total_weight,
 37 |                                                            Dtype *input,
 38 |                                                            THCIndex_t *target,
 39 |                                                            Dtype *weights,
 40 |                                                            int size_average,
 41 |                                                            int nframe,
 42 |                                                            int ndim,
 43 |                                                            int n_classes,
 44 |                                                            int n_weights) {
 45 |   __shared__ Acctype shInputs[NTHREADS], acc_weight[NTHREADS];
 46 |   int i,j, t;
 47 |   Dtype cur_weight;
 48 | 
 49 |   printf("%f",weights[0]);
 50 | 
 51 |   shInputs[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 52 |   acc_weight[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 53 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
 54 |       for (j = 0; j < n_weights; j +=1) {
 55 |           t = (int) target[i * n_weights   + j] - TH_INDEX_BASE;
 56 |           if (t >= 0) {
 57 |             assert(t < n_classes);
 58 |             cur_weight = weights[i * n_weights + j];
 59 |             /* cur_weight = ScalarConvert<int, Dtype>::to(1); */
 60 |             shInputs[threadIdx.x] -= input[i * ndim + t] * cur_weight;
 61 |             /* acc_weight[threadIdx.x] += cur_weight; */
 62 |           }
 63 |       }
 64 |       acc_weight[threadIdx.x] += ScalarConvert<int, Dtype>::to(1);
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 69 |   // updateOutput_kernel
 70 | 
 71 |   if (threadIdx.x == 0) {
 72 |     *output = *total_weight = ScalarConvert<int, Dtype>::to(0);
 73 |     Acctype outputAcc = 0;
 74 |     Acctype total_weightAcc = 0;
 75 |     for (i = 0; i < NTHREADS; ++i){
 76 |       // FIXME should we do somethigng here
 77 |       outputAcc += shInputs[i];
 78 |       total_weightAcc += acc_weight[i];
 79 |     }
 80 |     *total_weight = ScalarConvert<Acctype, Dtype>::to(total_weightAcc);
 81 |     *output = ScalarConvert<Acctype, Dtype>::to(outputAcc);
 82 |     if (size_average && *total_weight > 0) {
 83 |       *output = ScalarConvert<Acctype, Dtype>::to(outputAcc / total_weightAcc);
 84 |     }
 85 | 
 86 |   }
 87 | }
 88 | 
 89 | template <typename Dtype>
 90 | __global__ void cunn_SoftClassNLLCriterion_updateGradInput_kernel1(
 91 |   Dtype* gradInput,
 92 |   Dtype* weights,
 93 |   THCIndex_t* target,
 94 |   Dtype* total_weight,
 95 |   int size_average,
 96 |   int n_classes)
 97 | {
 98 |   if (*total_weight <= 0) {
 99 |     return;
100 |   }
101 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
102 |   int t = (int)*target - TH_INDEX_BASE;
103 |   assert(t >= 0 && t < n_classes);
104 |   gradInput[t] = -(weights ? weights[t] : ScalarConvert<int, Dtype>::to(1)) * norm;
105 | }
106 | 
107 | template <typename Dtype>
108 | __global__ void cunn_SoftClassNLLCriterion_updateGradInput_kernel(
109 |   Dtype *gradInput,
110 |   THCIndex_t *target,
111 |   Dtype *weights,
112 |   Dtype *total_weight,
113 |   int size_average,
114 |   int nframe,
115 |   int ndim,
116 |   int n_classes,
117 |   int n_weights) 
118 | {
119 |   if (*total_weight <= 0) {
120 |     return;
121 |   }
122 |   int i,j, t;
123 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
124 | 
125 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
126 |       for (j = 0; j < n_weights; ++j) {
127 |           t = (int) target[i * n_weights + j] - TH_INDEX_BASE;
128 |           if (t >= 0) {
129 |             assert(t < n_classes);
130 |             gradInput[i * ndim + t] = -weights[i * n_weights + j];
131 |           }
132 |       }
133 |   }
134 | }
135 | 
136 | #include "generic/SoftClassNLLCriterion.cu"
137 | #include "THCGenerateFloatTypes.h"
138 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THCUNN/generic/ClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #ifndef THC_GENERIC_FILE
  2 | #define THC_GENERIC_FILE "generic/ClassNLLCriterion.cu"
  3 | #else
  4 | 
  5 | void THNN_(ClassNLLCriterion_updateOutput)(
  6 |            THCState *state,
  7 |            THCTensor *input,
  8 |            THCIndexTensor *target,
  9 |            THCTensor *output,
 10 |            bool sizeAverage,
 11 |            THCTensor *weights,
 12 |            THCTensor *total_weight) {
 13 |   THCUNN_check_dim_size(state, output, 1, 0, 1);
 14 |   THCUNN_check_dim_size(state, total_weight, 1, 0, 1);
 15 | 
 16 |   /* if (THCIndexTensor_(nDimension)(state, target) > 1) { */
 17 |   /*   THError("multi-target not supported"); */
 18 |   /* } */
 19 | 
 20 |   int n_dims = THCTensor_(nDimension)(state, input);
 21 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
 22 | 
 23 |   if (weights) {
 24 |     THCUNN_assertSameGPU(
 25 |       state, 5, input, target, weights, output, total_weight
 26 |     );
 27 |   } else {
 28 |     THCUNN_assertSameGPU(
 29 |       state, 4, input, target, output, total_weight
 30 |     );
 31 |   }
 32 | 
 33 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
 34 | 
 35 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
 36 |   long num_targets = THCudaLongTensor_size(state, target, 0);
 37 |   THArgCheck(batch_size == num_targets,
 38 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
 39 |       batch_size, num_targets);
 40 | 
 41 |   /* if (weights && THCTensor_(nElement)(state, weights) != n_classes) { */
 42 |   /*   THCDescBuff s1 = THCTensor_(sizeDesc)(state, weights); */
 43 |   /*   THError("weight tensor should be defined either for all %d classes or no classes" */
 44 |   /*           " but got weight tensor of shape: %s", n_classes, s1.str); */
 45 |   /* } */
 46 | 
 47 |   input = THCTensor_(newContiguous)(state, input);
 48 |   weights = weights ? THCTensor_(newContiguous)(state, weights) : NULL;
 49 |   target = THCIndexTensor_(newContiguous)(state, target);
 50 | 
 51 |   real *input_data = THCTensor_(data)(state, input);
 52 |   real *weights_data = weights ? THCTensor_(data)(state, weights) : NULL;
 53 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
 54 |   real *output_data = THCTensor_(data)(state, output);
 55 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
 56 | 
 57 |   if (THCTensor_(nDimension)(state, input) == 1) {
 58 |     cunn_ClassNLLCriterion_updateOutput_kernel1<real>
 59 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
 60 |         output_data,
 61 |         total_weight_data,
 62 |         input_data,
 63 |         target_data,
 64 |         weights_data,
 65 |         sizeAverage,
 66 |         n_classes
 67 |     );
 68 | 
 69 |   } else if (THCTensor_(nDimension)(state, input) == 2) {
 70 |     cunn_ClassNLLCriterion_updateOutput_kernel<real, accreal>
 71 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
 72 |         output_data,
 73 |         total_weight_data,
 74 |         input_data,
 75 |         target_data,
 76 |         weights_data,
 77 |         sizeAverage,
 78 |         THCTensor_(size)(state, input, 0),
 79 |         THCTensor_(size)(state, input, 1),
 80 |         n_classes
 81 |     );
 82 |   }
 83 |   THCudaCheck(cudaGetLastError());
 84 | 
 85 |   if (weights) {
 86 |     THCTensor_(free)(state, weights);
 87 |   }
 88 |   THCIndexTensor_(free)(state, target);
 89 |   THCTensor_(free)(state, input);
 90 | }
 91 | 
 92 | void THNN_(ClassNLLCriterion_updateGradInput)(
 93 |            THCState *state,
 94 |            THCTensor *input,
 95 |            THCIndexTensor *target,
 96 |            THCTensor *gradInput,
 97 |            bool sizeAverage,
 98 |            THCTensor *weights,
 99 |            THCTensor *total_weight) {
100 |   /* if (THCIndexTensor_(nDimension)(state, target) > 1) { */
101 |   /*   THError("multi-target not supported"); */
102 |   /* } */
103 | 
104 |   int n_dims = THCTensor_(nDimension)(state, input);
105 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
106 | 
107 |   THArgCheck(THCTensor_(isContiguous)(state, gradInput), 4, "gradInput must be contiguous");
108 | 
109 |   if (weights) {
110 |     THCUNN_assertSameGPU(
111 |       state, 5, weights, input, target, gradInput, total_weight
112 |     );
113 |   }
114 |   else {
115 |     THCUNN_assertSameGPU(
116 |       state, 4, input, target, gradInput, total_weight
117 |     );
118 |   }
119 | 
120 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
121 | 
122 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
123 |   long num_targets = THCudaLongTensor_size(state, target, 0);
124 |   THArgCheck(batch_size == num_targets,
125 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
126 |       batch_size, num_targets);
127 | 
128 |   /* if (weights && THCTensor_(nElement)(state, weights) != n_classes) { */
129 |   /*   THError("weight tensor should be defined either for all or no classes"); */
130 |   /* } */
131 | 
132 |   weights = weights ? THCTensor_(newContiguous)(state, weights) : NULL;
133 |   target = THCIndexTensor_(newContiguous)(state, target);
134 | 
135 |   real *weights_data = weights ? THCTensor_(data)(state, weights) : NULL;
136 |   real *gradInput_data = THCTensor_(data)(state, gradInput);
137 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
138 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
139 | 
140 |   if (THCTensor_(nDimension)(state, input) == 1) {
141 |     cunn_ClassNLLCriterion_updateGradInput_kernel1<real>
142 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
143 |         gradInput_data,
144 |         weights_data,
145 |         target_data,
146 |         total_weight_data,
147 |         sizeAverage,
148 |         n_classes
149 |     );
150 |   } else {
151 |     cunn_ClassNLLCriterion_updateGradInput_kernel<real>
152 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
153 |         gradInput_data,
154 |         target_data,
155 |         weights_data,
156 |         total_weight_data,
157 |         sizeAverage,
158 |         THCTensor_(size)(state, input, 0),
159 |         THCTensor_(size)(state, input, 1),
160 |         n_classes
161 |     );
162 |   }
163 |   THCudaCheck(cudaGetLastError());
164 | 
165 |   if (weights) {
166 |     THCTensor_(free)(state, weights);
167 |   }
168 |   THCIndexTensor_(free)(state, target);
169 | }
170 | 
171 | #endif
172 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THCUNN/generic/SoftClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #ifndef THC_GENERIC_FILE
  2 | #define THC_GENERIC_FILE "generic/SoftClassNLLCriterion.cu"
  3 | #else
  4 | 
  5 | void THNN_(SoftClassNLLCriterion_updateOutput)(
  6 |            THCState *state,
  7 |            THCTensor *input,
  8 |            THCIndexTensor *target,
  9 |            THCTensor *output,
 10 |            bool sizeAverage,
 11 |            THCTensor *weights,
 12 |            THCTensor *total_weight) {
 13 |   THCUNN_check_dim_size(state, output, 1, 0, 1);
 14 |   THCUNN_check_dim_size(state, total_weight, 1, 0, 1);
 15 | 
 16 | 
 17 |   int n_dims = THCTensor_(nDimension)(state, input);
 18 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
 19 |   int n_weights = 10; //THCIndexTensor_(size)(state, target, n_dims -1);
 20 | 
 21 |   if (weights) {
 22 |     THCUNN_assertSameGPU(
 23 |       state, 5, input, target, weights, output, total_weight
 24 |     );
 25 |   } else {
 26 |     THCUNN_assertSameGPU(
 27 |       state, 4, input, target, output, total_weight
 28 |     );
 29 |   }
 30 | 
 31 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
 32 | 
 33 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
 34 |   long num_targets = THCudaLongTensor_size(state, target, 0);
 35 |   THArgCheck(batch_size == num_targets,
 36 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
 37 |       batch_size, num_targets);
 38 | 
 39 |   if (weights && THCTensor_(size)(state, weights, n_dims -1) != n_weights) {
 40 |     THCDescBuff s1 = THCTensor_(sizeDesc)(state, weights);
 41 |     THError("weight tensor should be defined for all %d targets "
 42 |             " but got weight tensor of shape: %s", n_weights, s1.str);
 43 |   }
 44 | 
 45 |   input = THCTensor_(newContiguous)(state, input);
 46 |   weights = weights ? THCTensor_(newContiguous)(state, weights) : NULL;
 47 |   target = THCIndexTensor_(newContiguous)(state, target);
 48 | 
 49 |   real *input_data = THCTensor_(data)(state, input);
 50 |   real *weights_data = weights ? THCTensor_(data)(state, weights) : NULL;
 51 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
 52 |   real *output_data = THCTensor_(data)(state, output);
 53 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
 54 | 
 55 |   if (THCTensor_(nDimension)(state, input) == 1) {
 56 |     cunn_SoftClassNLLCriterion_updateOutput_kernel1<real>
 57 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
 58 |         output_data,
 59 |         total_weight_data,
 60 |         input_data,
 61 |         target_data,
 62 |         weights_data,
 63 |         sizeAverage,
 64 |         n_classes
 65 |     );
 66 | 
 67 |   } else if (THCTensor_(nDimension)(state, input) == 2) {
 68 |     cunn_SoftClassNLLCriterion_updateOutput_kernel<real, accreal>
 69 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
 70 |         output_data,
 71 |         total_weight_data,
 72 |         input_data,
 73 |         target_data,
 74 |         weights_data,
 75 |         sizeAverage,
 76 |         THCTensor_(size)(state, input, 0),
 77 |         THCTensor_(size)(state, input, 1),
 78 |         n_classes,
 79 |         n_weights
 80 |     );
 81 |   }
 82 |   THCudaCheck(cudaGetLastError());
 83 | 
 84 |   if (weights) {
 85 |     THCTensor_(free)(state, weights);
 86 |   }
 87 |   THCIndexTensor_(free)(state, target);
 88 |   THCTensor_(free)(state, input);
 89 | }
 90 | 
 91 | void THNN_(SoftClassNLLCriterion_updateGradInput)(
 92 |            THCState *state,
 93 |            THCTensor *input,
 94 |            THCIndexTensor *target,
 95 |            THCTensor *gradInput,
 96 |            bool sizeAverage,
 97 |            THCTensor *weights,
 98 |            THCTensor *total_weight) {
 99 | 
100 |   int n_dims = THCTensor_(nDimension)(state, input);
101 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
102 |   int n_weights = 10; //THCIndexTensor_(size)(state, target, n_dims -1);
103 | 
104 |   THArgCheck(THCTensor_(isContiguous)(state, gradInput), 4, "gradInput must be contiguous");
105 | 
106 |   if (weights) {
107 |     THCUNN_assertSameGPU(
108 |       state, 5, weights, input, target, gradInput, total_weight
109 |     );
110 |   }
111 |   else {
112 |     THCUNN_assertSameGPU(
113 |       state, 4, input, target, gradInput, total_weight
114 |     );
115 |   }
116 | 
117 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
118 | 
119 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
120 |   long num_targets = THCudaLongTensor_size(state, target, 0);
121 |   THArgCheck(batch_size == num_targets,
122 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
123 |       batch_size, num_targets);
124 | 
125 |   if (weights && THCTensor_(size)(state, weights, n_dims -1) != n_weights) {
126 |     THCDescBuff s1 = THCTensor_(sizeDesc)(state, weights);
127 |     THError("weight tensor should be defined for all %d targets "
128 |             " but got weight tensor of shape: %s", n_weights, s1.str);
129 |   }
130 | 
131 |   weights = weights ? THCTensor_(newContiguous)(state, weights) : NULL;
132 |   target = THCIndexTensor_(newContiguous)(state, target);
133 | 
134 |   real *weights_data = weights ? THCTensor_(data)(state, weights) : NULL;
135 |   real *gradInput_data = THCTensor_(data)(state, gradInput);
136 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
137 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
138 | 
139 |   if (THCTensor_(nDimension)(state, input) == 1) {
140 |     cunn_SoftClassNLLCriterion_updateGradInput_kernel1<real>
141 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
142 |         gradInput_data,
143 |         weights_data,
144 |         target_data,
145 |         total_weight_data,
146 |         sizeAverage,
147 |         n_classes
148 |     );
149 |   } else {
150 |     cunn_SoftClassNLLCriterion_updateGradInput_kernel<real>
151 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
152 |         gradInput_data,
153 |         target_data,
154 |         weights_data,
155 |         total_weight_data,
156 |         sizeAverage,
157 |         THCTensor_(size)(state, input, 0),
158 |         THCTensor_(size)(state, input, 1),
159 |         n_classes,
160 |         n_weights
161 |     );
162 |   }
163 |   THCudaCheck(cudaGetLastError());
164 | 
165 |   if (weights) {
166 |     THCTensor_(free)(state, weights);
167 |   }
168 |   THCIndexTensor_(free)(state, target);
169 | }
170 | 
171 | #endif
172 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THNN/generic/SoftClassNLLCriterion.c:
--------------------------------------------------------------------------------
  1 | #ifndef TH_GENERIC_FILE
  2 | #define TH_GENERIC_FILE "generic/SoftClassNLLCriterion.c"
  3 | #else
  4 | 
  5 | void THNN_(SoftClassNLLCriterion_updateOutput)(
  6 |           THNNState *state,
  7 |           THTensor *input,
  8 |           THIndexTensor *target,
  9 |           THTensor *output,
 10 |           bool sizeAverage,
 11 |           THTensor *weights,
 12 |           THTensor *total_weight)
 13 | {
 14 |   THNN_CHECK_DIM_SIZE(output, 1, 0, 1);
 15 |   THNN_CHECK_DIM_SIZE(total_weight, 1, 0, 1);
 16 |   int n_dims = THTensor_(nDimension)(input);
 17 |   int n_classes = THTensor_(size)(input, n_dims - 1);
 18 | 
 19 |   if (THIndexTensor_(nDimension)(target) > 1) {
 20 |     THError("multi-target not supported");
 21 |   }
 22 |   if (THTensor_(nDimension)(input) > 2) {
 23 |     THError("input tensor should be 1D or 2D");
 24 |   }
 25 |   if (weights && THTensor_(nElement)(weights) != n_classes) {
 26 |     THDescBuff s1 = THTensor_(sizeDesc)(weights);
 27 |     THError("weight tensor should be defined either for all %d classes or no classes"
 28 | 	    " but got weight tensor of shape: %s", n_classes, s1.str);
 29 |   }
 30 | 
 31 |   input = THTensor_(newContiguous)(input);
 32 |   target = THIndexTensor_(newContiguous)(target);
 33 |   weights = weights ? THTensor_(newContiguous)(weights) : NULL;
 34 | 
 35 |   real *input_data = THTensor_(data)(input);
 36 |   THIndex_t *target_data = THIndexTensor_(data)(target);
 37 |   real *weights_data = weights ? THTensor_(data)(weights) : NULL;
 38 |   real *output_data = THTensor_(data)(output);
 39 |   real *total_weight_data = THTensor_(data)(total_weight);
 40 | 
 41 |   output_data[0] = total_weight_data[0] = 0.0;
 42 | 
 43 |   if (THTensor_(nDimension)(input) == 1) {
 44 |     int cur_target = target_data[0] - TH_INDEX_BASE;
 45 |     THAssert(cur_target >= 0 && cur_target < n_classes);
 46 |     total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
 47 |     output_data[0] = -input_data[cur_target] * total_weight_data[0];
 48 |   } else if (THTensor_(nDimension)(input) == 2) {
 49 |     int batch_size = THTensor_(size)(input, 0);
 50 |     THAssert(THIndexTensor_(size)(target, 0) == batch_size);
 51 | 
 52 |     int n_target = THTensor_(size)(input, 1);
 53 | 
 54 |     int i;
 55 |     for (i = 0; i < batch_size; i++) {
 56 |       int cur_target = target_data[i] - TH_INDEX_BASE;
 57 |       THAssert(cur_target >= 0 && cur_target < n_classes);
 58 | 
 59 |       real cur_weight = weights ? weights_data[cur_target] : 1.0f;
 60 |       total_weight_data[0] += cur_weight;
 61 |       output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;
 62 |     }
 63 |   }
 64 | 
 65 |   if (sizeAverage && total_weight_data[0]) {
 66 |     output_data[0] /= total_weight_data[0];
 67 |   }
 68 | 
 69 |   if (weights) {
 70 |     THTensor_(free)(weights);
 71 |   }
 72 |   THTensor_(free)(input);
 73 |   THIndexTensor_(free)(target);
 74 | }
 75 | 
 76 | void THNN_(SoftClassNLLCriterion_updateGradInput)(
 77 |           THNNState *state,
 78 |           THTensor *input,
 79 |           THIndexTensor *target,
 80 |           THTensor *gradInput,
 81 |           bool sizeAverage,
 82 |           THTensor *weights,
 83 |           THTensor *total_weight)
 84 | {
 85 |   int n_dims = THTensor_(nDimension)(input);
 86 |   int n_classes = THTensor_(size)(input, n_dims - 1);
 87 | 
 88 |   if (!THTensor_(isContiguous)(gradInput)) {
 89 |     THError("gradInput must be contiguous");
 90 |   }
 91 | 
 92 |   real *total_weight_data = THTensor_(data)(total_weight);
 93 | 
 94 |   if (!(*total_weight_data > 0)) {
 95 |     return;
 96 |   }
 97 | 
 98 |   if (THIndexTensor_(nDimension)(target) > 1) {
 99 |     THError("multi-target not supported");
100 |   }
101 | 
102 |   if (THTensor_(nDimension)(input) > 2) {
103 |     THError("input tensor should be 1D or 2D");
104 |   }
105 |   
106 |   if (weights && THTensor_(nElement)(weights) != n_classes) {
107 |     THError("weight tensor should be defined either for all or no classes");
108 |   }
109 | 
110 |   target = THIndexTensor_(newContiguous)(target);
111 |   weights = weights ? THTensor_(newContiguous)(weights) : NULL;
112 | 
113 |   THIndex_t *target_data = THIndexTensor_(data)(target);
114 |   real *weights_data = weights ? THTensor_(data)(weights) : NULL;
115 |   real *gradInput_data = THTensor_(data)(gradInput);
116 | 
117 |   if (THTensor_(nDimension)(input) == 1) {
118 |     int cur_target = target_data[0] - TH_INDEX_BASE;
119 |     THAssert(cur_target >= 0 && cur_target < n_classes);
120 | 
121 |     gradInput_data[cur_target] =
122 |       (!sizeAverage && weights) ? -weights_data[cur_target] : -1;
123 | 
124 |   } else if (THTensor_(nDimension)(input) == 2) {
125 |     int batch_size = THTensor_(size)(input, 0);
126 |     THAssert(THIndexTensor_(size)(target, 0) == batch_size);
127 | 
128 |     int n_target = THTensor_(size)(input, 1);
129 | 
130 |     int i;
131 |     for (i = 0; i < batch_size; i++){
132 |       int cur_target = target_data[i] - TH_INDEX_BASE;
133 | 
134 |       THAssert(cur_target >= 0 && cur_target < n_classes);
135 | 
136 |       gradInput_data[i * n_target + cur_target] =
137 |         -(weights ? weights_data[cur_target] : 1.0f);
138 | 
139 |       if (sizeAverage && *total_weight_data) {
140 |         gradInput_data[i * n_target + cur_target] /= *total_weight_data;
141 |       }
142 |     }
143 |   }
144 | 
145 |   THIndexTensor_(free)(target);
146 |   if (weights) {
147 |     THTensor_(free)(weights);
148 |   }
149 | }
150 | 
151 | #endif
152 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/lib/THNN/init.c:
--------------------------------------------------------------------------------
  1 | #include "TH.h"
  2 | #include "THNN.h"
  3 | 
  4 | #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
  5 | #define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
  6 | 
  7 | #define THNN_CHECK_SHAPE(I1, I2)			\
  8 |   if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2))	\
  9 |     {							\
 10 |        THDescBuff s1 = THTensor_(sizeDesc)(I1);		\
 11 |        THDescBuff s2 = THTensor_(sizeDesc)(I2);		\
 12 |        THError(#I1 " and " #I2 " shapes do not match: "	\
 13 | 	       #I1 " %s, " #I2 " %s", s1.str, s2.str);	\
 14 |     }
 15 | 
 16 | #define THNN_CHECK_SHAPE_INDICES(I1, I2)             \
 17 |   THLongStorage *size2 = THLongTensor_newSizeOf(I2); \
 18 |   if (I1 != NULL && I2 != NULL && !THTensor_(isSize)(I1, size2)) \
 19 |     {             \
 20 |       THDescBuff s1 = THTensor_(sizeDesc)(I1);       \
 21 |       THDescBuff s2 = THLongTensor_sizeDesc(I2);     \
 22 |       THLongStorage_free(size2);                     \
 23 |       THError(#I1 " and " #I2 " shapes do not match: " \
 24 |         #I1 " %s, " #I2 " %s", s1.str, s2.str);      \
 25 |     } else {      \
 26 |       THLongStorage_free(size2);                     \
 27 |     }
 28 | 
 29 | #define THNN_CHECK_NELEMENT(I1, I2) \
 30 |   if (I1 != NULL && I2 != NULL ) {					\
 31 |     ptrdiff_t n1 = THTensor_(nElement)(I1);					\
 32 |     ptrdiff_t n2 = THTensor_(nElement)(I2);	                                \
 33 |     if (n1 != n2)							\
 34 |       {									\
 35 | 	THDescBuff s1 = THTensor_(sizeDesc)(I1);			\
 36 | 	THDescBuff s2 = THTensor_(sizeDesc)(I2);			\
 37 | 	THError(#I1 " and " #I2 " have different number of elements: "	\
 38 | 		#I1 "%s has %ld elements, while "			\
 39 | 		#I2 "%s has %ld elements", s1.str, n1, s2.str, n2);	\
 40 |       }									\
 41 |   }
 42 | 
 43 | #define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE)			\
 44 |   if (THTensor_(nDimension)(T) != DIM ||				\
 45 |       THTensor_(size)(T, DIM_SIZE) != SIZE) {				\
 46 |       THDescBuff s1 = THTensor_(sizeDesc)(T);				\
 47 |       THError("Need " #T " of dimension %d and " #T ".size[%d] == %d"	\
 48 | 	      " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
 49 |   }
 50 | 
 51 | #define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE)			\
 52 |   if (THIndexTensor_(nDimension)(T) != DIM ||				\
 53 |       THIndexTensor_(size)(T, DIM_SIZE) != SIZE) {				\
 54 |       THDescBuff s1 = THIndexTensor_(sizeDesc)(T);				\
 55 |       THError("Need " #T " of dimension %d and " #T ".size[%d] == %d"	\
 56 |         " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
 57 |   }
 58 | 
 59 | #define THNN_ARGCHECK(COND, ARG, T, FORMAT)	\
 60 |   if (!(COND)) {				\
 61 |     THDescBuff s1 = THTensor_(sizeDesc)(T);	\
 62 |     THArgCheck(COND, ARG, FORMAT, s1.str);	\
 63 |   }
 64 | 
 65 | #include "generic/Abs.c"
 66 | #include "THGenerateFloatTypes.h"
 67 | 
 68 | #include "generic/AbsCriterion.c"
 69 | #include "THGenerateFloatTypes.h"
 70 | 
 71 | #include "generic/BCECriterion.c"
 72 | #include "THGenerateFloatTypes.h"
 73 | 
 74 | #include "generic/SoftClassNLLCriterion.c"
 75 | #include "THGenerateFloatTypes.h"
 76 | 
 77 | #include "generic/ClassNLLCriterion.c"
 78 | #include "THGenerateFloatTypes.h"
 79 | 
 80 | #include "generic/SpatialClassNLLCriterion.c"
 81 | #include "THGenerateFloatTypes.h"
 82 | 
 83 | #include "generic/DistKLDivCriterion.c"
 84 | #include "THGenerateFloatTypes.h"
 85 | 
 86 | #include "generic/ELU.c"
 87 | #include "THGenerateFloatTypes.h"
 88 | 
 89 | #include "generic/HardShrink.c"
 90 | #include "THGenerateFloatTypes.h"
 91 | 
 92 | #include "generic/HardTanh.c"
 93 | #include "THGenerateFloatTypes.h"
 94 | 
 95 | #include "generic/GatedLinearUnit.c"
 96 | #include "THGenerateFloatTypes.h"
 97 | 
 98 | #include "generic/L1Cost.c"
 99 | #include "THGenerateFloatTypes.h"
100 | 
101 | #include "generic/LeakyReLU.c"
102 | #include "THGenerateFloatTypes.h"
103 | 
104 | #include "generic/FusedRNNKernel.c"
105 | #include "THGenerateFloatTypes.h"
106 | 
107 | #include "generic/LogSigmoid.c"
108 | #include "THGenerateFloatTypes.h"
109 | 
110 | #include "generic/LogSoftMax.c"
111 | #include "THGenerateFloatTypes.h"
112 | 
113 | #include "generic/LookupTable.c"
114 | #include "THGenerateFloatTypes.h"
115 | 
116 | #include "generic/MSECriterion.c"
117 | #include "THGenerateFloatTypes.h"
118 | 
119 | #include "generic/MarginCriterion.c"
120 | #include "THGenerateFloatTypes.h"
121 | 
122 | #include "generic/SoftMarginCriterion.c"
123 | #include "THGenerateFloatTypes.h"
124 | 
125 | #include "generic/MultiLabelMarginCriterion.c"
126 | #include "THGenerateFloatTypes.h"
127 | 
128 | #include "generic/MultiMarginCriterion.c"
129 | #include "THGenerateFloatTypes.h"
130 | 
131 | #include "generic/Linear.c"
132 | #include "THGenerateFloatTypes.h"
133 | 
134 | #include "generic/PReLU.c"
135 | #include "THGenerateFloatTypes.h"
136 | 
137 | #include "generic/RReLU.c"
138 | #include "THGenerateFloatTypes.h"
139 | 
140 | #include "generic/Sigmoid.c"
141 | #include "THGenerateFloatTypes.h"
142 | 
143 | #include "generic/SmoothL1Criterion.c"
144 | #include "THGenerateFloatTypes.h"
145 | 
146 | #include "generic/SoftMax.c"
147 | #include "THGenerateFloatTypes.h"
148 | 
149 | #include "generic/SoftPlus.c"
150 | #include "THGenerateFloatTypes.h"
151 | 
152 | #include "generic/SoftShrink.c"
153 | #include "THGenerateFloatTypes.h"
154 | 
155 | #include "generic/SparseLinear.c"
156 | #include "THGenerateFloatTypes.h"
157 | 
158 | #include "generic/IndexLinear.c"
159 | #include "THGenerateFloatTypes.h"
160 | 
161 | #include "generic/Sqrt.c"
162 | #include "THGenerateFloatTypes.h"
163 | 
164 | #include "generic/Square.c"
165 | #include "THGenerateFloatTypes.h"
166 | 
167 | #include "generic/Tanh.c"
168 | #include "THGenerateFloatTypes.h"
169 | 
170 | #include "generic/Threshold.c"
171 | #include "THGenerateFloatTypes.h"
172 | 
173 | #include "generic/TemporalConvolution.c"
174 | #include "THGenerateFloatTypes.h"
175 | 
176 | #include "generic/TemporalSubSampling.c"
177 | #include "THGenerateFloatTypes.h"
178 | 
179 | #include "generic/TemporalMaxPooling.c"
180 | #include "THGenerateFloatTypes.h"
181 | 
182 | #include "generic/TemporalRowConvolution.c"
183 | #include "THGenerateFloatTypes.h"
184 | 
185 | #include "generic/BatchNormalization.c"
186 | #include "THGenerateFloatTypes.h"
187 | 
188 | #include "generic/unfold.c"
189 | #include "THGenerateFloatTypes.h"
190 | 
191 | #include "generic/SpatialConvolutionMap.c"
192 | #include "THGenerateFloatTypes.h"
193 | 
194 | #include "generic/SpatialConvolutionMM.c"
195 | #include "THGenerateFloatTypes.h"
196 | 
197 | #include "generic/SpatialConvolutionLocal.c"
198 | #include "THGenerateFloatTypes.h"
199 | 
200 | #include "generic/SpatialFullConvolution.c"
201 | #include "THGenerateFloatTypes.h"
202 | 
203 | #include "generic/SpatialFullConvolutionMap.c"
204 | #include "THGenerateFloatTypes.h"
205 | 
206 | #include "generic/SpatialDilatedConvolution.c"
207 | #include "THGenerateFloatTypes.h"
208 | 
209 | #include "generic/SpatialAdaptiveMaxPooling.c"
210 | #include "THGenerateFloatTypes.h"
211 | 
212 | #include "generic/SpatialAdaptiveAveragePooling.c"
213 | #include "THGenerateFloatTypes.h"
214 | 
215 | #include "generic/SpatialAveragePooling.c"
216 | #include "THGenerateFloatTypes.h"
217 | 
218 | #include "generic/SpatialFractionalMaxPooling.c"
219 | #include "THGenerateFloatTypes.h"
220 | 
221 | #include "generic/SpatialMaxPooling.c"
222 | #include "THGenerateFloatTypes.h"
223 | 
224 | #include "generic/SpatialDilatedMaxPooling.c"
225 | #include "THGenerateFloatTypes.h"
226 | 
227 | #include "generic/SpatialMaxUnpooling.c"
228 | #include "THGenerateFloatTypes.h"
229 | 
230 | #include "generic/SpatialSubSampling.c"
231 | #include "THGenerateFloatTypes.h"
232 | 
233 | #include "generic/SpatialUpSamplingNearest.c"
234 | #include "THGenerateFloatTypes.h"
235 | 
236 | #include "generic/SpatialUpSamplingBilinear.c"
237 | #include "THGenerateFloatTypes.h"
238 | 
239 | #include "generic/VolumetricAveragePooling.c"
240 | #include "THGenerateFloatTypes.h"
241 | 
242 | #include "generic/VolumetricConvolution.c"
243 | #include "THGenerateFloatTypes.h"
244 | 
245 | #include "generic/VolumetricConvolutionMM.c"
246 | #include "THGenerateFloatTypes.h"
247 | 
248 | #include "generic/VolumetricFullConvolution.c"
249 | #include "THGenerateFloatTypes.h"
250 | 
251 | #include "generic/VolumetricDilatedConvolution.c"
252 | #include "THGenerateFloatTypes.h"
253 | 
254 | #include "generic/VolumetricMaxPooling.c"
255 | #include "THGenerateFloatTypes.h"
256 | 
257 | #include "generic/VolumetricDilatedMaxPooling.c"
258 | #include "THGenerateFloatTypes.h"
259 | 
260 | #include "generic/VolumetricFractionalMaxPooling.c"
261 | #include "THGenerateFloatTypes.h"
262 | 
263 | #include "generic/VolumetricMaxUnpooling.c"
264 | #include "THGenerateFloatTypes.h"
265 | 
266 | #include "generic/SpatialReflectionPadding.c"
267 | #include "THGenerateFloatTypes.h"
268 | 
269 | #include "generic/SpatialReplicationPadding.c"
270 | #include "THGenerateFloatTypes.h"
271 | 
272 | #include "generic/VolumetricReplicationPadding.c"
273 | #include "THGenerateFloatTypes.h"
274 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/nn/_functions/auto.py:
--------------------------------------------------------------------------------
  1 | from itertools import repeat
  2 | from collections import defaultdict
  3 | 
  4 | import torch
  5 | from torch._thnn.utils import parse_header, THNN_H_PATH
  6 | from torch.autograd.function import Function, InplaceFunction
  7 | from torch._thnn import type2backend
  8 | 
  9 | from . import _all_functions
 10 | 
 11 | 
 12 | def _make_function_class_criterion(class_name, update_output, update_grad_input, acc_grad_parameters):
 13 |     weight_arg_idx = -1
 14 |     for i, arg in enumerate(update_output.arguments):
 15 |         if arg.name.startswith('weight'):
 16 |             weight_arg_idx = i
 17 |             break
 18 | 
 19 |     buffers_idx = []
 20 |     additional_arg_idx = 0
 21 |     for arg in update_output.arguments[4:]:
 22 |         if not arg.name.startswith('weight') and arg.type == 'THTensor*':
 23 |             buffers_idx.append(additional_arg_idx)
 24 |         additional_arg_idx += 1
 25 | 
 26 |     def __init__(self, *args, **kwargs):
 27 |         Function.__init__(self)
 28 |         self.weight = kwargs.get('weight')
 29 |         self.additional_args = list(args)
 30 | 
 31 |     def forward(self, input, target):
 32 |         self._backend = type2backend[type(input)]
 33 |         self.save_for_backward(input, target)
 34 |         if weight_arg_idx >= 0:
 35 |             insert_idx = weight_arg_idx - 4  # state, input, target, output
 36 |             self.additional_args.insert(insert_idx, self.weight)
 37 |         for idx in buffers_idx:
 38 |             self.additional_args.insert(idx, input.new(1))
 39 |         output = input.new(1)
 40 |         getattr(self._backend, update_output.name)(self._backend.library_state, input, target,
 41 |                                                    output, *self.additional_args)
 42 |         return output
 43 | 
 44 |     def backward(self, grad_output):
 45 |         input, target = self.saved_tensors
 46 |         grad_input = grad_output.new().resize_as_(input).zero_()
 47 |         getattr(self._backend, update_grad_input.name)(self._backend.library_state, input, target,
 48 |                                                        grad_input, *self.additional_args)
 49 |         grad_output_expanded = grad_output.view(*repeat(1, grad_input.dim()))
 50 |         grad_input.mul_(grad_output_expanded.expand_as(grad_input))
 51 |         return grad_input, None
 52 | 
 53 |     return type(class_name, (Function,), dict(__init__=__init__, forward=forward, backward=backward))
 54 | 
 55 | 
 56 | def _find_buffers(args, ignored_args):
 57 |     additional_arg_idx = 0
 58 |     buffers = []
 59 |     for arg in args:
 60 |         if arg.name in ignored_args:
 61 |             continue
 62 |         if arg.type == 'THTensor*':
 63 |             buffers.append((additional_arg_idx, arg.name))
 64 |         additional_arg_idx += 1
 65 |     return buffers
 66 | 
 67 | 
 68 | def _make_function_class(class_name, update_output, update_grad_input, acc_grad_parameters):
 69 |     def has_argument(fn, name):
 70 |         for arg in fn.arguments:
 71 |             if arg.name == name:
 72 |                 return True
 73 |         return False
 74 |     save_output = has_argument(update_grad_input, 'output')
 75 | 
 76 |     param_args = {'weight', 'bias'}
 77 |     ignored_args = {'weight', 'bias', 'gradWeight', 'gradBias', 'output'}
 78 |     expected_params = [arg for arg in update_output.arguments[3:]
 79 |                        if arg.name in param_args]
 80 |     buffers = {}
 81 |     buffers['update_output'] = _find_buffers(update_output.arguments[3:],
 82 |                                              ignored_args)
 83 |     buffers['update_grad_input'] = _find_buffers(
 84 |         update_grad_input.arguments[4:], ignored_args)
 85 |     if acc_grad_parameters is not None:
 86 |         buffers['acc_grad_parameters'] = _find_buffers(
 87 |             acc_grad_parameters.arguments[3:], ignored_args)
 88 | 
 89 |     # This and __init__ assume that only the last argument can be
 90 |     # an inplace flag
 91 |     is_inplace = update_output.arguments[-1].name == 'inplace'
 92 | 
 93 |     def __init__(self, *args):
 94 |         if is_inplace:
 95 |             InplaceFunction.__init__(self, args[-1])
 96 |         else:
 97 |             Function.__init__(self)
 98 |         self.additional_args = list(args)
 99 | 
100 |     def _initialize_buffers(self, fn_name):
101 |         additional_args = self.additional_args
102 |         for idx, name in buffers[fn_name]:
103 |             # TODO: some buffers are necessary only for update output and can be
104 |             # freed right afterwards
105 |             buffer = self.buffers[name]
106 |             additional_args = additional_args[:idx] + [buffer] + additional_args[idx:]
107 |         return tuple(additional_args)
108 | 
109 |     def forward(self, input, *params):
110 |         self._backend = type2backend[type(input)]
111 | 
112 |         for param in params:
113 |             if type(param) != type(input):
114 |                 raise RuntimeError("input type ({}) doesn't match the type of "
115 |                                    "a parameter tensor ({})".format(torch.typename(input),
116 |                                                                     torch.typename(param)))
117 | 
118 |         # Allocate temporary buffers and insert them into additional_args
119 |         self.buffers = defaultdict(type(input))
120 |         additional_args = self._initialize_buffers('update_output')
121 | 
122 |         # Fill in optional params with None
123 |         args = params
124 |         for i in range(len(params), len(expected_params)):
125 |             param = expected_params[i]
126 |             if param.is_optional:
127 |                 args += (None,)
128 |             else:
129 |                 raise ValueError("missing required argument '%s'" % param.name)
130 | 
131 |         args += tuple(additional_args)
132 | 
133 |         # If the module is working in-place it's output will be set to the
134 |         # same storage as input, but it's variable won't be dirty.
135 |         if is_inplace and self.inplace:
136 |             self.mark_dirty(input)
137 |             output = input
138 |         else:
139 |             output = input.new()
140 | 
141 |         if save_output:
142 |             self.save_for_backward(input, output, *params)
143 |         else:
144 |             self.save_for_backward(input, *params)
145 | 
146 |         if not self.requires_grad:
147 |             del self.buffers
148 | 
149 |         getattr(self._backend, update_output.name)(self._backend.library_state, input, output, *args)
150 |         return output
151 | 
152 |     def backward(self, grad_output):
153 |         t = self.saved_tensors
154 |         if save_output:
155 |             input, output, params = t[0], t[1], t[2:]
156 |         else:
157 |             input, params = t[0], t[1:]
158 |         grad_params = tuple(None for p in params)
159 |         grad_input_tuple = (None,)
160 | 
161 |         if self.needs_input_grad[0]:
162 |             additional_args = self._initialize_buffers('update_grad_input')
163 |             if save_output:
164 |                 additional_args = (output,) + additional_args
165 | 
166 |             if is_inplace and self.inplace:
167 |                 assert additional_args[-1] is True
168 |                 tmp_args = list(additional_args)
169 |                 tmp_args[-1] = False
170 |                 additional_args = tuple(tmp_args)
171 |             grad_input = input.new().resize_as_(input)
172 |             params_without_bias = params if len(params) < 2 else params[:1]
173 |             update_grad_input_fn = getattr(self._backend, update_grad_input.name)
174 |             gi_args = params_without_bias + additional_args
175 |             update_grad_input_fn(self._backend.library_state, input, grad_output, grad_input, *gi_args)
176 |             grad_input_tuple = (grad_input,)
177 | 
178 |         if acc_grad_parameters and any(self.needs_input_grad[1:]):
179 |             additional_args = self._initialize_buffers('acc_grad_parameters')
180 |             grad_params = tuple(p.new().resize_as_(p).zero_() for p in params)
181 |             appended_grads = len(expected_params) - len(grad_params)
182 |             grad_params += (None,) * appended_grads
183 |             acc_grad_parameters_fn = getattr(self._backend, acc_grad_parameters.name)
184 |             param_args = grad_params + additional_args + (1,)
185 |             acc_grad_parameters_fn(self._backend.library_state, input, grad_output, *param_args)
186 |             if appended_grads:
187 |                 grad_params = grad_params[:-appended_grads]
188 | 
189 |         return grad_input_tuple + grad_params
190 | 
191 |     base_class = Function if not is_inplace else InplaceFunction
192 |     return type(class_name, (base_class,), dict(__init__=__init__, forward=forward, backward=backward,
193 |                                                 _initialize_buffers=_initialize_buffers))
194 | 
195 | 
196 | def _generate_function_classes(scope_dict):
197 |     global function_list, function_by_name
198 |     function_list = parse_header(THNN_H_PATH)
199 |     function_by_name = {fn.name: fn for fn in function_list}
200 |     classes_to_generate = {fn.name.partition('_')[0] for fn in function_list}
201 |     exceptions = {
202 |         'Linear',
203 |         'IndexLinear',
204 |         'SpatialFullConvolution',
205 |         'SpatialConvolutionMM',
206 |         'SparseLinear',
207 |         'TemporalConvolution',
208 |         'SpatialAveragePooling',
209 |         'SpatialMaxPooling',
210 |         'SpatialDilatedMaxPooling',
211 |         'SpatialMaxUnpooling',
212 |         'SpatialAdaptiveMaxPooling',
213 |         'SpatialAdaptiveAveragePooling',
214 |         'VolumetricAveragePooling',
215 |         'VolumetricMaxPooling',
216 |         'VolumetricMaxUnpooling',
217 |         'VolumetricConvolution',
218 |         'VolumetricFullConvolution',
219 |         'VolumetricConvolutionMM',
220 |         'TemporalMaxPooling',
221 |         'BatchNormalization',
222 |         'LookupTable',
223 |         'PReLU',
224 |         'RReLU',
225 |         'GRUFused',
226 |         'LSTMFused',
227 |         'unfolded',
228 |     }
229 |     name_remap = {
230 |         'TemporalConvolution': 'Conv1d',
231 |         'SpatialDilatedConvolution': 'DilatedConv2d',
232 |         'SpatialMaxUnpooling': 'MaxUnpool2d',
233 |         'SpatialReflectionPadding': 'ReflectionPad2d',
234 |         'SpatialReplicationPadding': 'ReplicationPad2d',
235 |         'VolumetricReplicationPadding': 'ReplicationPad3d',
236 |         'VolumetricMaxUnpooling': 'MaxUnpool3d',
237 |         'SoftMax': 'Softmax',
238 |         'LogSoftMax': 'LogSoftmax',
239 |         'HardTanh': 'Hardtanh',
240 |         'HardShrink': 'Hardshrink',
241 |         'SoftPlus': 'Softplus',
242 |         'SoftShrink': 'Softshrink',
243 |         'MSECriterion': 'MSELoss',
244 |         'AbsCriterion': 'L1Loss',
245 |         'BCECriterion': '_BCELoss',  # TODO: move the glue code into THNN
246 |         'ClassNLLCriterion': 'NLLLoss',
247 |         'SoftClassNLLCriterion': 'SoftNLLLoss',
248 |         'DistKLDivCriterion': 'KLDivLoss',
249 |         'SpatialClassNLLCriterion': 'NLLLoss2d',
250 |         'MultiLabelMarginCriterion': 'MultiLabelMarginLoss',
251 |         'MultiMarginCriterion': 'MultiMarginLoss',
252 |         'SmoothL1Criterion': 'SmoothL1Loss',
253 |         'SoftMarginCriterion': 'SoftMarginLoss',
254 |     }
255 | 
256 |     classes_to_generate -= exceptions
257 |     for fn in classes_to_generate:
258 |         update_output = function_by_name[fn + '_updateOutput']
259 |         update_grad_input = function_by_name[fn + '_updateGradInput']
260 |         acc_grad_parameters = function_by_name.get(fn + '_accGradParameters')
261 |         class_name = name_remap.get(fn, fn)
262 |         # This has to call a function to retain correct references to functions
263 |         if 'Criterion' in fn:
264 |             cls = _make_function_class_criterion(class_name, update_output,
265 |                                                  update_grad_input, acc_grad_parameters)
266 |         else:
267 |             cls = _make_function_class(class_name, update_output,
268 |                                        update_grad_input, acc_grad_parameters)
269 |         scope_dict[class_name] = cls
270 |         if not class_name.startswith('_'):
271 |             _all_functions.append(cls)
272 | 
273 | 
274 | _generate_function_classes(locals())
275 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/nn/_functions/thnn/auto.py:
--------------------------------------------------------------------------------
  1 | from itertools import repeat
  2 | from collections import defaultdict
  3 | 
  4 | import torch
  5 | from torch._thnn.utils import parse_header, THNN_H_PATH
  6 | from torch.autograd.function import Function, InplaceFunction
  7 | from torch._thnn import type2backend
  8 | 
  9 | from . import _all_functions
 10 | 
 11 | 
 12 | def _make_function_class_criterion(class_name, update_output, update_grad_input, acc_grad_parameters):
 13 |     weight_arg_idx = -1
 14 |     for i, arg in enumerate(update_output.arguments):
 15 |         if arg.name.startswith('weight'):
 16 |             weight_arg_idx = i
 17 |             break
 18 | 
 19 |     buffers_idx = []
 20 |     additional_arg_idx = 0
 21 |     for arg in update_output.arguments[4:]:
 22 |         if not arg.name.startswith('weight') and arg.type == 'THTensor*':
 23 |             buffers_idx.append(additional_arg_idx)
 24 |         additional_arg_idx += 1
 25 | 
 26 |     def __init__(self, *args, **kwargs):
 27 |         Function.__init__(self)
 28 |         self.weight = kwargs.get('weight')
 29 |         self.additional_args = list(args)
 30 | 
 31 |     def forward(self, input, target):
 32 |         self._backend = type2backend[type(input)]
 33 |         self.save_for_backward(input, target)
 34 |         if weight_arg_idx >= 0:
 35 |             insert_idx = weight_arg_idx - 4  # state, input, target, output
 36 |             self.additional_args.insert(insert_idx, self.weight)
 37 |         for idx in buffers_idx:
 38 |             self.additional_args.insert(idx, input.new(1))
 39 |         output = input.new(1)
 40 |         getattr(self._backend, update_output.name)(self._backend.library_state, input, target,
 41 |                                                    output, *self.additional_args)
 42 |         return output
 43 | 
 44 |     def backward(self, grad_output):
 45 |         input, target = self.saved_tensors
 46 |         grad_input = grad_output.new().resize_as_(input).zero_()
 47 |         getattr(self._backend, update_grad_input.name)(self._backend.library_state, input, target,
 48 |                                                        grad_input, *self.additional_args)
 49 |         grad_output_expanded = grad_output.view(*repeat(1, grad_input.dim()))
 50 |         grad_input.mul_(grad_output_expanded.expand_as(grad_input))
 51 |         return grad_input, None
 52 | 
 53 |     return type(class_name, (Function,), dict(__init__=__init__, forward=forward, backward=backward))
 54 | 
 55 | 
 56 | def _find_buffers(args, ignored_args):
 57 |     additional_arg_idx = 0
 58 |     buffers = []
 59 |     for arg in args:
 60 |         if arg.name in ignored_args:
 61 |             continue
 62 |         if arg.type == 'THTensor*':
 63 |             buffers.append((additional_arg_idx, arg.name))
 64 |         additional_arg_idx += 1
 65 |     return buffers
 66 | 
 67 | 
 68 | def _make_function_class(class_name, update_output, update_grad_input, acc_grad_parameters):
 69 |     def has_argument(fn, name):
 70 |         for arg in fn.arguments:
 71 |             if arg.name == name:
 72 |                 return True
 73 |         return False
 74 |     save_output = has_argument(update_grad_input, 'output')
 75 | 
 76 |     param_args = {'weight', 'bias'}
 77 |     ignored_args = {'weight', 'bias', 'gradWeight', 'gradBias', 'output'}
 78 |     expected_params = [arg for arg in update_output.arguments[3:]
 79 |                        if arg.name in param_args]
 80 |     buffers = {}
 81 |     buffers['update_output'] = _find_buffers(update_output.arguments[3:],
 82 |                                              ignored_args)
 83 |     buffers['update_grad_input'] = _find_buffers(
 84 |         update_grad_input.arguments[4:], ignored_args)
 85 |     if acc_grad_parameters is not None:
 86 |         buffers['acc_grad_parameters'] = _find_buffers(
 87 |             acc_grad_parameters.arguments[3:], ignored_args)
 88 | 
 89 |     # This and __init__ assume that only the last argument can be
 90 |     # an inplace flag
 91 |     is_inplace = update_output.arguments[-1].name == 'inplace'
 92 | 
 93 |     def __init__(self, *args):
 94 |         if is_inplace:
 95 |             InplaceFunction.__init__(self, args[-1])
 96 |         else:
 97 |             Function.__init__(self)
 98 |         self.additional_args = list(args)
 99 | 
100 |     def _initialize_buffers(self, fn_name):
101 |         additional_args = self.additional_args
102 |         for idx, name in buffers[fn_name]:
103 |             # TODO: some buffers are necessary only for update output and can be
104 |             # freed right afterwards
105 |             buffer = self.buffers[name]
106 |             additional_args = additional_args[:idx] + [buffer] + additional_args[idx:]
107 |         return tuple(additional_args)
108 | 
109 |     def forward(self, input, *params):
110 |         self._backend = type2backend[type(input)]
111 | 
112 |         for param in params:
113 |             if type(param) != type(input):
114 |                 raise RuntimeError("input type ({}) doesn't match the type of "
115 |                                    "a parameter tensor ({})".format(torch.typename(input),
116 |                                                                     torch.typename(param)))
117 | 
118 |         # Allocate temporary buffers and insert them into additional_args
119 |         self.buffers = defaultdict(type(input))
120 |         additional_args = self._initialize_buffers('update_output')
121 | 
122 |         # Fill in optional params with None
123 |         args = params
124 |         for i in range(len(params), len(expected_params)):
125 |             param = expected_params[i]
126 |             if param.is_optional:
127 |                 args += (None,)
128 |             else:
129 |                 raise ValueError("missing required argument '%s'" % param.name)
130 | 
131 |         args += tuple(additional_args)
132 | 
133 |         # If the module is working in-place it's output will be set to the
134 |         # same storage as input, but it's variable won't be dirty.
135 |         if is_inplace and self.inplace:
136 |             self.mark_dirty(input)
137 |             output = input
138 |         else:
139 |             output = input.new()
140 | 
141 |         if save_output:
142 |             self.save_for_backward(input, output, *params)
143 |         else:
144 |             self.save_for_backward(input, *params)
145 | 
146 |         if not self.requires_grad:
147 |             del self.buffers
148 | 
149 |         getattr(self._backend, update_output.name)(self._backend.library_state, input, output, *args)
150 |         return output
151 | 
152 |     def backward(self, grad_output):
153 |         t = self.saved_tensors
154 |         if save_output:
155 |             input, output, params = t[0], t[1], t[2:]
156 |         else:
157 |             input, params = t[0], t[1:]
158 |         grad_params = tuple(None for p in params)
159 |         grad_input_tuple = (None,)
160 | 
161 |         if self.needs_input_grad[0]:
162 |             additional_args = self._initialize_buffers('update_grad_input')
163 |             if save_output:
164 |                 additional_args = (output,) + additional_args
165 | 
166 |             if is_inplace and self.inplace:
167 |                 assert additional_args[-1] is True
168 |                 tmp_args = list(additional_args)
169 |                 tmp_args[-1] = False
170 |                 additional_args = tuple(tmp_args)
171 |             grad_input = input.new().resize_as_(input)
172 |             params_without_bias = params if len(params) < 2 else params[:1]
173 |             update_grad_input_fn = getattr(self._backend, update_grad_input.name)
174 |             gi_args = params_without_bias + additional_args
175 |             update_grad_input_fn(self._backend.library_state, input, grad_output, grad_input, *gi_args)
176 |             grad_input_tuple = (grad_input,)
177 | 
178 |         if acc_grad_parameters and any(self.needs_input_grad[1:]):
179 |             additional_args = self._initialize_buffers('acc_grad_parameters')
180 |             grad_params = tuple(p.new().resize_as_(p).zero_() for p in params)
181 |             appended_grads = len(expected_params) - len(grad_params)
182 |             grad_params += (None,) * appended_grads
183 |             acc_grad_parameters_fn = getattr(self._backend, acc_grad_parameters.name)
184 |             param_args = grad_params + additional_args + (1,)
185 |             acc_grad_parameters_fn(self._backend.library_state, input, grad_output, *param_args)
186 |             if appended_grads:
187 |                 grad_params = grad_params[:-appended_grads]
188 | 
189 |         return grad_input_tuple + grad_params
190 | 
191 |     base_class = Function if not is_inplace else InplaceFunction
192 |     return type(class_name, (base_class,), dict(__init__=__init__, forward=forward, backward=backward,
193 |                                                 _initialize_buffers=_initialize_buffers))
194 | 
195 | 
196 | def _generate_function_classes(scope_dict):
197 |     global function_list, function_by_name
198 |     function_list = parse_header(THNN_H_PATH)
199 |     function_by_name = {fn.name: fn for fn in function_list}
200 |     classes_to_generate = {fn.name.partition('_')[0] for fn in function_list}
201 |     exceptions = {
202 |         'Linear',
203 |         'IndexLinear',
204 |         'SpatialFullConvolution',
205 |         'SpatialConvolutionMM',
206 |         'SparseLinear',
207 |         'TemporalConvolution',
208 |         'SpatialAveragePooling',
209 |         'SpatialMaxPooling',
210 |         'SpatialDilatedMaxPooling',
211 |         'SpatialMaxUnpooling',
212 |         'SpatialAdaptiveMaxPooling',
213 |         'SpatialAdaptiveAveragePooling',
214 |         'VolumetricAveragePooling',
215 |         'VolumetricMaxPooling',
216 |         'VolumetricMaxUnpooling',
217 |         'VolumetricConvolution',
218 |         'VolumetricFullConvolution',
219 |         'VolumetricConvolutionMM',
220 |         'TemporalMaxPooling',
221 |         'BatchNormalization',
222 |         'LookupTable',
223 |         'PReLU',
224 |         'RReLU',
225 |         'GRUFused',
226 |         'LSTMFused',
227 |         'unfolded',
228 |     }
229 |     name_remap = {
230 |         'TemporalConvolution': 'Conv1d',
231 |         'SpatialDilatedConvolution': 'DilatedConv2d',
232 |         'SpatialMaxUnpooling': 'MaxUnpool2d',
233 |         'SpatialReflectionPadding': 'ReflectionPad2d',
234 |         'SpatialReplicationPadding': 'ReplicationPad2d',
235 |         'VolumetricReplicationPadding': 'ReplicationPad3d',
236 |         'VolumetricMaxUnpooling': 'MaxUnpool3d',
237 |         'SoftMax': 'Softmax',
238 |         'LogSoftMax': 'LogSoftmax',
239 |         'HardTanh': 'Hardtanh',
240 |         'HardShrink': 'Hardshrink',
241 |         'SoftPlus': 'Softplus',
242 |         'SoftShrink': 'Softshrink',
243 |         'MSECriterion': 'MSELoss',
244 |         'AbsCriterion': 'L1Loss',
245 |         'BCECriterion': '_BCELoss',  # TODO: move the glue code into THNN
246 |         'ClassNLLCriterion': 'NLLLoss',
247 |         'SoftClassNLLCriterion': 'SoftNLLLoss',
248 |         'DistKLDivCriterion': 'KLDivLoss',
249 |         'SpatialClassNLLCriterion': 'NLLLoss2d',
250 |         'MultiLabelMarginCriterion': 'MultiLabelMarginLoss',
251 |         'MultiMarginCriterion': 'MultiMarginLoss',
252 |         'SmoothL1Criterion': 'SmoothL1Loss',
253 |         'SoftMarginCriterion': 'SoftMarginLoss',
254 |     }
255 | 
256 |     classes_to_generate -= exceptions
257 |     for fn in classes_to_generate:
258 |         update_output = function_by_name[fn + '_updateOutput']
259 |         update_grad_input = function_by_name[fn + '_updateGradInput']
260 |         acc_grad_parameters = function_by_name.get(fn + '_accGradParameters')
261 |         class_name = name_remap.get(fn, fn)
262 |         # This has to call a function to retain correct references to functions
263 |         if 'Criterion' in fn:
264 |             cls = _make_function_class_criterion(class_name, update_output,
265 |                                                  update_grad_input, acc_grad_parameters)
266 |         else:
267 |             cls = _make_function_class(class_name, update_output,
268 |                                        update_grad_input, acc_grad_parameters)
269 |         scope_dict[class_name] = cls
270 |         if not class_name.startswith('_'):
271 |             _all_functions.append(cls)
272 | 
273 | 
274 | _generate_function_classes(locals())
275 | 


--------------------------------------------------------------------------------
/loss_implementations/pytorch/torch/nn/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | from .module import Module
 2 | from .linear import Linear, Bilinear
 3 | from .conv import Conv1d, Conv2d, Conv3d, \
 4 |     ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
 5 | from .activation import Threshold, ReLU, Hardtanh, ReLU6, Sigmoid, Tanh, \
 6 |     Softmax, Softmax2d, LogSoftmax, ELU, Hardshrink, LeakyReLU, LogSigmoid, \
 7 |     Softplus, Softshrink, PReLU, Softsign, Softmin, Tanhshrink, RReLU
 8 | from .loss import L1Loss, SoftNLLLoss, NLLLoss, KLDivLoss, MSELoss, BCELoss, NLLLoss2d, \
 9 |     CosineEmbeddingLoss, HingeEmbeddingLoss, MarginRankingLoss, \
10 |     MultiLabelMarginLoss, MultiLabelSoftMarginLoss, MultiMarginLoss, \
11 |     SmoothL1Loss, SoftMarginLoss, CrossEntropyLoss, TripletMarginLoss
12 | from .container import Container, Sequential, ModuleList, ParameterList
13 | from .pooling import AvgPool1d, AvgPool2d, AvgPool3d, MaxPool1d, MaxPool2d, MaxPool3d, \
14 |     MaxUnpool1d, MaxUnpool2d, MaxUnpool3d, FractionalMaxPool2d, LPPool2d, AdaptiveMaxPool1d, \
15 |     AdaptiveMaxPool2d, AdaptiveAvgPool1d, AdaptiveAvgPool2d
16 | from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
17 | from .instancenorm import InstanceNorm1d, InstanceNorm2d, InstanceNorm3d
18 | from .dropout import Dropout, Dropout2d, Dropout3d
19 | from .padding import ReflectionPad2d, ReplicationPad2d, ReplicationPad3d, ZeroPad2d, ConstantPad2d
20 | from .normalization import CrossMapLRN2d
21 | from .sparse import Embedding
22 | from .rnn import RNNBase, RNN, LSTM, GRU, \
23 |     RNNCell, LSTMCell, GRUCell
24 | from .pixelshuffle import PixelShuffle
25 | from .upsampling import UpsamplingNearest2d, UpsamplingBilinear2d
26 | from .distance import PairwiseDistance
27 | 
28 | 
29 | __all__ = [
30 |     'Module', 'Linear', 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d',
31 |     'ConvTranspose2d', 'ConvTranspose3d', 'Threshold', 'ReLU', 'Hardtanh', 'ReLU6',
32 |     'Sigmoid', 'Tanh', 'Softmax', 'Softmax2d', 'LogSoftmax', 'ELU', 'Hardshrink',
33 |     'LeakyReLU', 'LogSigmoid', 'Softplus', 'Softshrink', 'PReLU', 'Softsign', 'Softmin',
34 |     'Tanhshrink', 'RReLU', 'L1Loss','SoftNLLLoss', 'NLLLoss', 'KLDivLoss', 'MSELoss', 'BCELoss',
35 |     'NLLLoss2d', 'CosineEmbeddingLoss', 'HingeEmbeddingLoss', 'MarginRankingLoss',
36 |     'MultiLabelMarginLoss', 'MultiLabelSoftMarginLoss', 'MultiMarginLoss', 'SmoothL1Loss',
37 |     'SoftMarginLoss', 'CrossEntropyLoss', 'Container', 'Sequential', 'ModuleList',
38 |     'ParameterList', 'AvgPool1d', 'AvgPool2d', 'AvgPool3d', 'MaxPool1d', 'MaxPool2d',
39 |     'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'FractionalMaxPool2d',
40 |     'LPPool2d', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d', 'InstanceNorm2d',
41 |     'InstanceNorm3d', 'Dropout', 'Dropout2d', 'Dropout3d', 'ReflectionPad2d',
42 |     'ReplicationPad2d', 'ReplicationPad3d', 'CrossMapLRN2d',
43 |     'Embedding', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell',
44 |     'PixelShuffle', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'PairwiseDistance',
45 |     'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool2d',
46 |     'TripletMarginLoss', 'ZeroPad2d', 'ConstantPad2d', 'Bilinear',
47 | ]
48 | 


--------------------------------------------------------------------------------
/loss_implementations/torch/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Follow these steps to add the loss function to your torch installation.
 3 | 
 4 | 1. add the lua file in extra/nn/
 5 | 2. edit extra/nn/init.lua and add it there
 6 | 4. add the cu file to extra/cunn/lib/THCUNN/
 7 | 5. add the cu file to extra/cunn/lib/THCUNN/generic/
 8 | 3. edit extra/cunn/lib/THCUNN/generic/THCUNN.h and add it there
 9 | 6. cd extra/nn/ ; luarocks make rocks/nn-scm-1.rockspec 
10 | 7. cd extra/cunn/ ; luarocks make rocks/cunn-scm-1.rockspec
11 | 
12 | We used torch commit 5c1d3cfda8101123628a45e70435d545ae1bc771
13 | It is very likely that you will have to modify the code if you are using different commit. 
14 | 


--------------------------------------------------------------------------------
/loss_implementations/torch/extra/cunn/lib/THCUNN/SoftClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #include "THCUNN.h"
  2 | #include "common.h"
  3 | #include "THCHalf.h"
  4 | #include "THCHalfAutoNumerics.cuh"
  5 | 
  6 | #include <stdio.h>
  7 | #include <assert.h>
  8 | 
  9 | static const int NTHREADS = 32;
 10 | 
 11 | template <typename Dtype>
 12 | __global__ void cunn_SoftClassNLLCriterion_updateOutput_kernel1(Dtype *output,
 13 |                                                            Dtype *total_weight,
 14 |                                                            Dtype *input,
 15 |                                                            THCIndex_t* target,
 16 |                                                            Dtype *weights,
 17 |                                                            int size_average,
 18 |                                                            int n_classes,
 19 |                                                            long ignore_index) {
 20 |   assert(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
 21 | 
 22 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 23 |   // updateOutput_kernel.
 24 | 
 25 |   printf("kernel1 NOT SUPPORTED\n\n");
 26 | 
 27 |   /* int t = (int)*target - TH_INDEX_BASE; */
 28 |   /* if (t != ignore_index) { */
 29 |   /*   assert(t >= 0 && t < n_classes); */
 30 |   /*   Dtype cur_weight = weights ? weights[t] : ScalarConvert<int, Dtype>::to(1); */
 31 |   /*   *output = -cur_weight * input[t]; */
 32 |   /*   *total_weight = cur_weight; */
 33 |   /*   if (size_average && *total_weight > 0) { */
 34 |   /*     *output /= *total_weight; */
 35 |   /*   } */
 36 |   /* } */
 37 | }
 38 | 
 39 | template <typename Dtype, typename Acctype>
 40 | __global__ void cunn_SoftClassNLLCriterion_updateOutput_kernel(Dtype *output,
 41 |                                                            Dtype *total_weight,
 42 |                                                            Dtype *input,
 43 |                                                            THCIndex_t* target,
 44 |                                                            Dtype *weights,
 45 |                                                            int size_average,
 46 |                                                            int nframe,
 47 |                                                            int ndim,
 48 |                                                            int n_classes,
 49 |                                                            int n_weights,
 50 |                                                            long ignore_index) {
 51 |   __shared__ Acctype shInputs[NTHREADS], acc_weight[NTHREADS];
 52 |   int i, j, t;
 53 |   Dtype cur_weight;
 54 | 
 55 |   shInputs[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 56 |   acc_weight[threadIdx.x] = ScalarConvert<int, Acctype>::to(0);
 57 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
 58 |       for (j = 0; j < n_weights; j +=1) {
 59 |           t = (int) target[i * n_weights   + j] - TH_INDEX_BASE;
 60 |           if (t >= 0 && t != ignore_index) {
 61 |             assert(t < n_classes);
 62 |             cur_weight = weights[i * n_weights + j];
 63 |             shInputs[threadIdx.x] -= input[i * ndim + t] * cur_weight;
 64 |             /* acc_weight[threadIdx.x] += cur_weight; */
 65 |           }
 66 |       }
 67 |       acc_weight[threadIdx.x] += ScalarConvert<int, Dtype>::to(1);
 68 |   }
 69 |   __syncthreads();
 70 | 
 71 |   // TODO: T4951791 Reuse code between updateOutput_kernel1 and
 72 |   // updateOutput_kernel
 73 | 
 74 |   if (threadIdx.x == 0) {
 75 |     *output = *total_weight = ScalarConvert<int, Dtype>::to(0);
 76 |     Acctype outputAcc = 0;
 77 |     Acctype total_weightAcc = 0;
 78 |     for (i = 0; i < NTHREADS; ++i){
 79 |       // FIXME should we do somethigng here
 80 |       outputAcc += shInputs[i];
 81 |       total_weightAcc += acc_weight[i];
 82 |     }
 83 |     *total_weight = ScalarConvert<Acctype, Dtype>::to(total_weightAcc);
 84 |     *output = ScalarConvert<Acctype, Dtype>::to(outputAcc);
 85 |     if (size_average && *total_weight > 0) {
 86 |       *output = ScalarConvert<Acctype, Dtype>::to(outputAcc / total_weightAcc);
 87 |     }
 88 | 
 89 |   }
 90 | }
 91 | 
 92 | template <typename Dtype>
 93 | __global__ void cunn_SoftClassNLLCriterion_updateGradInput_kernel1(
 94 |   Dtype* gradInput,
 95 |   Dtype* weights,
 96 |   THCIndex_t* target,
 97 |   Dtype* total_weight,
 98 |   int size_average,
 99 |   int n_classes,
100 |   long ignore_index)
101 | {
102 |   if (*total_weight <= 0) {
103 |     return;
104 |   }
105 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
106 |   int t = (int)*target - TH_INDEX_BASE;
107 |   if (t != ignore_index) {
108 |     assert(t >= 0 && t < n_classes);
109 |     gradInput[t] = -(weights ? weights[t] : ScalarConvert<int, Dtype>::to(1)) * norm;
110 |   }
111 | }
112 | 
113 | template <typename Dtype>
114 | __global__ void cunn_SoftClassNLLCriterion_updateGradInput_kernel(
115 |   Dtype *gradInput,
116 |   THCIndex_t *target,
117 |   Dtype *weights,
118 |   Dtype *total_weight,
119 |   int size_average,
120 |   int nframe,
121 |   int ndim,
122 |   int n_classes,
123 |   int n_weights, 
124 |   long ignore_index)
125 | {
126 |   if (*total_weight <= 0) {
127 |     return;
128 |   }
129 |   int i, j, t;
130 |   Dtype norm = size_average ? (ScalarConvert<int, Dtype>::to(1) / *total_weight) : ScalarConvert<int, Dtype>::to(1);
131 | 
132 |   for (i = threadIdx.x; i < nframe; i += NTHREADS) {
133 |       for (j = 0; j < n_weights; ++j) {
134 |           t = (int) target[i * n_weights + j] - TH_INDEX_BASE;
135 |           if (t >= 0 && t != ignore_index) {
136 |             assert(t < n_classes);
137 |             gradInput[i * ndim + t] = -weights[i * n_weights + j];
138 |           }
139 |       }
140 |   }
141 | }
142 | 
143 | #include "generic/SoftClassNLLCriterion.cu"
144 | #include "THCGenerateFloatTypes.h"
145 | 


--------------------------------------------------------------------------------
/loss_implementations/torch/extra/cunn/lib/THCUNN/generic/SoftClassNLLCriterion.cu:
--------------------------------------------------------------------------------
  1 | #ifndef THC_GENERIC_FILE
  2 | #define THC_GENERIC_FILE "generic/SoftClassNLLCriterion.cu"
  3 | #else
  4 | 
  5 | void THNN_(SoftClassNLLCriterion_updateOutput)(
  6 |            THCState *state,
  7 |            THCTensor *input,
  8 |            THCIndexTensor *target,
  9 |            THCTensor *output,
 10 |            bool sizeAverage,
 11 |            THCTensor *weights,
 12 |            THCTensor *total_weight,
 13 |            long ignore_index) {
 14 |   THCUNN_check_dim_size(state, output, 1, 0, 1);
 15 |   THCUNN_check_dim_size(state, total_weight, 1, 0, 1);
 16 |   ignore_index -= TH_INDEX_BASE;
 17 | 
 18 |   int n_dims = THCTensor_(nDimension)(state, input);
 19 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
 20 |   int n_weights = THCIndexTensor_(size)(state, target, n_dims -1);
 21 | 
 22 |   if (weights) {
 23 |     THCUNN_assertSameGPU(
 24 |       state, 5, input, target, weights, output, total_weight
 25 |     );
 26 |   } else {
 27 |     THCUNN_assertSameGPU(
 28 |       state, 4, input, target, output, total_weight
 29 |     );
 30 |   }
 31 | 
 32 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
 33 | 
 34 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
 35 |   long num_targets = THCudaLongTensor_size(state, target, 0);
 36 |   THArgCheck(batch_size == num_targets,
 37 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
 38 |       batch_size, num_targets);
 39 | 
 40 |   if (weights && THCTensor_(size)(state, weights, n_dims -1) != n_weights) {
 41 |     THCDescBuff s1 = THCTensor_(sizeDesc)(state, weights);
 42 |     THError("weight tensor should be defined for all %d targets "
 43 |             " but got weight tensor of shape: %s", n_weights, s1.str);
 44 |   }
 45 | 
 46 |   input = THCTensor_(newContiguous)(state, input);
 47 |   weights = THCTensor_(newContiguous)(state, weights);
 48 |   target = THCIndexTensor_(newContiguous)(state, target);
 49 | 
 50 |   real *input_data = THCTensor_(data)(state, input);
 51 |   real *weights_data = THCTensor_(data)(state, weights);
 52 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
 53 |   real *output_data = THCTensor_(data)(state, output);
 54 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
 55 | 
 56 |   if (THCTensor_(nDimension)(state, input) == 1) {
 57 |     cunn_SoftClassNLLCriterion_updateOutput_kernel1<real>
 58 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
 59 |         output_data,
 60 |         total_weight_data,
 61 |         input_data,
 62 |         target_data,
 63 |         weights_data,
 64 |         sizeAverage,
 65 |         n_classes,
 66 |         ignore_index
 67 |     );
 68 | 
 69 |   } else if (THCTensor_(nDimension)(state, input) == 2) {
 70 |     cunn_SoftClassNLLCriterion_updateOutput_kernel<real, accreal>
 71 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
 72 |         output_data,
 73 |         total_weight_data,
 74 |         input_data,
 75 |         target_data,
 76 |         weights_data,
 77 |         sizeAverage,
 78 |         THCTensor_(size)(state, input, 0),
 79 |         THCTensor_(size)(state, input, 1),
 80 |         n_classes,
 81 |         n_weights,
 82 |         ignore_index
 83 |     );
 84 |   }
 85 |   THCudaCheck(cudaGetLastError());
 86 | 
 87 |   THCTensor_(free)(state, weights);
 88 |   THCIndexTensor_(free)(state, target);
 89 |   THCTensor_(free)(state, input);
 90 | }
 91 | 
 92 | void THNN_(SoftClassNLLCriterion_updateGradInput)(
 93 |            THCState *state,
 94 |            THCTensor *input,
 95 |            THCIndexTensor *target,
 96 |            THCTensor *gradInput,
 97 |            bool sizeAverage,
 98 |            THCTensor *weights,
 99 |            THCTensor *total_weight,
100 |            long ignore_index) {
101 | 
102 |   ignore_index -= TH_INDEX_BASE;
103 | 
104 |   int n_dims = THCTensor_(nDimension)(state, input);
105 |   int n_classes = THCTensor_(size)(state, input, n_dims - 1);
106 |   int n_weights = THCIndexTensor_(size)(state, target, n_dims -1);
107 | 
108 |   THArgCheck(THCTensor_(isContiguous)(state, gradInput), 4, "gradInput must be contiguous");
109 | 
110 |   if (weights) {
111 |     THCUNN_assertSameGPU(
112 |       state, 5, weights, input, target, gradInput, total_weight
113 |     );
114 |   }
115 |   else {
116 |     THCUNN_assertSameGPU(
117 |       state, 4, input, target, gradInput, total_weight
118 |     );
119 |   }
120 | 
121 |   THArgCheck(n_dims <= 2 && n_dims > 0, 2, "vector or matrix expected");
122 | 
123 |   long batch_size = n_dims == 1 ? 1 : THCTensor_(size)(state, input, 0);
124 |   long num_targets = THCudaLongTensor_size(state, target, 0);
125 |   THArgCheck(batch_size == num_targets,
126 |       2, "mismatch between the batch size of input (%ld) and that of target (%ld)",
127 |       batch_size, num_targets);
128 | 
129 |   if (weights && THCTensor_(size)(state, weights, n_dims -1) != n_weights) {
130 |     THCDescBuff s1 = THCTensor_(sizeDesc)(state, weights);
131 |     THError("weight tensor should be defined for all %d targets "
132 |             " but got weight tensor of shape: %s", n_weights, s1.str);
133 |   }
134 | 
135 |   weights = THCTensor_(newContiguous)(state, weights);
136 |   target = THCIndexTensor_(newContiguous)(state, target);
137 | 
138 |   real *weights_data = THCTensor_(data)(state, weights);
139 |   real *gradInput_data = THCTensor_(data)(state, gradInput);
140 |   THCIndex_t  *target_data = THCIndexTensor_(data)(state, target);
141 |   real *total_weight_data = THCTensor_(data)(state, total_weight);
142 | 
143 |   if (THCTensor_(nDimension)(state, input) == 1) {
144 |     cunn_SoftClassNLLCriterion_updateGradInput_kernel1<real>
145 |       <<<1, 1, 0, THCState_getCurrentStream(state)>>>(
146 |         gradInput_data,
147 |         weights_data,
148 |         target_data,
149 |         total_weight_data,
150 |         sizeAverage,
151 |         n_classes,
152 |         ignore_index
153 |     );
154 |   } else {
155 |     cunn_SoftClassNLLCriterion_updateGradInput_kernel<real>
156 |       <<<1, NTHREADS, 0, THCState_getCurrentStream(state)>>>(
157 |         gradInput_data,
158 |         target_data,
159 |         weights_data,
160 |         total_weight_data,
161 |         sizeAverage,
162 |         THCTensor_(size)(state, input, 0),
163 |         THCTensor_(size)(state, input, 1),
164 |         n_classes,
165 |         n_weights,
166 |         ignore_index
167 |     );
168 |   }
169 |   THCudaCheck(cudaGetLastError());
170 | 
171 |   THCTensor_(free)(state, weights);
172 |   THCIndexTensor_(free)(state, target);
173 | }
174 | 
175 | #endif
176 | 


--------------------------------------------------------------------------------
/loss_implementations/torch/extra/nn/SoftClassNLLCriterion.lua:
--------------------------------------------------------------------------------
 1 | local THNN = require 'nn.THNN'
 2 | local SoftClassNLLCriterion, parent = torch.class('nn.SoftClassNLLCriterion', 'nn.Criterion')
 3 | 
 4 | function SoftClassNLLCriterion:__init(weights, sizeAverage, ignoreIndex)
 5 |     parent.__init(self)
 6 |     self.sizeAverage = (sizeAverage == nil) and true or sizeAverage
 7 |     self.ignoreIndex = ignoreIndex or -100 -- this target index will be ignored
 8 |     if weights then
 9 |        assert(weights:dim() == 1, "weights input should be 1-D Tensor")
10 |        self.weights = weights
11 |     end
12 | 
13 |     self.output_tensor = torch.zeros(1)
14 |     self.total_weight_tensor = torch.ones(1)
15 |     self.target = torch.zeros(1):long()
16 | end
17 | 
18 | function SoftClassNLLCriterion:__len()
19 |    if (self.weights) then
20 |       return #self.weights
21 |    else
22 |       return 0
23 |    end
24 | end
25 | 
26 | function SoftClassNLLCriterion:updateOutput(input, target)
27 |    if type(target) == 'number' then
28 |       if torch.typename(input):find('torch%.Cuda.*Tensor') then
29 |           self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
30 |       else
31 |           self.target = self.target:long()
32 |       end
33 |       self.target:resize(1)
34 |       self.target[1] = target
35 |    elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
36 |       self.target = torch.CudaLongTensor and target[{{},{},{1}}]:cudaLong() or target[{{},{},{1}}]
37 |    else
38 |       self.target = target[{{},{},{1}}]:long()
39 |    end
40 | 
41 |    self.weights = target[{{},{},{2}}]:float():cuda()
42 | 
43 |    input.THNN.SoftClassNLLCriterion_updateOutput(
44 |       input:cdata(),
45 |       self.target:cdata(),
46 |       self.output_tensor:cdata(),
47 |       self.sizeAverage,
48 |       self.weights:cdata(),
49 |       self.total_weight_tensor:cdata(),
50 |       self.ignoreIndex
51 |    )
52 |    self.output = self.output_tensor[1]
53 |    return self.output, self.total_weight_tensor[1]
54 | end
55 | 
56 | function SoftClassNLLCriterion:updateGradInput(input, target)
57 |    if type(target) == 'number' then
58 |       if torch.typename(input):find('torch%.Cuda.*Tensor') then
59 |           self.target = torch.CudaLongTensor and self.target:cudaLong() or self.target:cuda()
60 |       else
61 |           self.target = self.target:long()
62 |       end
63 |       self.target:resize(1)
64 |       self.target[1] = target
65 |    elseif torch.typename(input):find('torch%.Cuda.*Tensor') then
66 |       self.target = torch.CudaLongTensor and target[{{},{},{1}}]:cudaLong() or target[{{},{},{1}}]
67 |    else
68 |       self.target = target[{{},{},{1}}]:long()
69 |    end
70 | 
71 | 
72 |    self.gradInput:resizeAs(input):zero()
73 | 
74 |    input.THNN.SoftClassNLLCriterion_updateGradInput(
75 |       input:cdata(),
76 |       self.target:cdata(),
77 |       self.gradInput:cdata(),
78 |       self.sizeAverage,
79 |       self.weights:cdata(),
80 |       self.total_weight_tensor:cdata(),
81 |       self.ignoreIndex
82 |    )
83 | 
84 |    return self.gradInput
85 | end
86 | 


--------------------------------------------------------------------------------
/loss_implementations/torch/extra/nn/init.lua:
--------------------------------------------------------------------------------
  1 | require('torch')
  2 | 
  3 | nn = {} -- define the global nn table
  4 | 
  5 | require('nn.THNN')
  6 | 
  7 | require('nn.utils')
  8 | 
  9 | 
 10 | require('nn.ErrorMessages')
 11 | require('nn.Module')
 12 | 
 13 | require('nn.Container')
 14 | require('nn.Concat')
 15 | require('nn.Parallel')
 16 | require('nn.Sequential')
 17 | require('nn.DepthConcat')
 18 | 
 19 | require('nn.Decorator')
 20 | require('nn.Bottle')
 21 | require('nn.WeightNorm')
 22 | require('nn.DontCast')
 23 | require('nn.NaN')
 24 | require('nn.Profile')
 25 | 
 26 | require('nn.Linear')
 27 | require('nn.LinearWeightNorm')
 28 | require('nn.Bilinear')
 29 | require('nn.PartialLinear')
 30 | require('nn.SparseLinear')
 31 | require('nn.IndexLinear')
 32 | require('nn.Reshape')
 33 | require('nn.View')
 34 | require('nn.Contiguous')
 35 | require('nn.Select')
 36 | require('nn.Narrow')
 37 | require('nn.Index')
 38 | require('nn.Squeeze')
 39 | require('nn.Unsqueeze')
 40 | require('nn.Replicate')
 41 | require('nn.Transpose')
 42 | require('nn.BatchNormalization')
 43 | require('nn.LayerNormalization')
 44 | require('nn.Padding')
 45 | require('nn.GradientReversal')
 46 | require('nn.MaskedSelect')
 47 | 
 48 | require('nn.Copy')
 49 | require('nn.Min')
 50 | require('nn.Max')
 51 | require('nn.Sum')
 52 | require('nn.Mean')
 53 | require('nn.CMul')
 54 | require('nn.Mul')
 55 | require('nn.MulConstant')
 56 | require('nn.CAdd')
 57 | require('nn.Add')
 58 | require('nn.AddConstant')
 59 | require('nn.Dropout')
 60 | require('nn.SpatialDropout')
 61 | require('nn.VolumetricDropout')
 62 | 
 63 | require('nn.CAddTable')
 64 | require('nn.CDivTable')
 65 | require('nn.CMulTable')
 66 | require('nn.CSubTable')
 67 | require('nn.CMaxTable')
 68 | require('nn.CMinTable')
 69 | 
 70 | require('nn.Euclidean')
 71 | require('nn.WeightedEuclidean')
 72 | require('nn.PairwiseDistance')
 73 | require('nn.CosineDistance')
 74 | require('nn.DotProduct')
 75 | require('nn.Normalize')
 76 | require('nn.Cosine')
 77 | 
 78 | require('nn.Exp')
 79 | require('nn.Log')
 80 | require('nn.HardTanh')
 81 | require('nn.Clamp')
 82 | require('nn.LogSigmoid')
 83 | require('nn.LogSoftMax')
 84 | require('nn.Sigmoid')
 85 | require('nn.SoftMax')
 86 | require('nn.SoftMin')
 87 | require('nn.SoftPlus')
 88 | require('nn.SoftSign')
 89 | require('nn.Tanh')
 90 | require('nn.TanhShrink')
 91 | require('nn.Abs')
 92 | require('nn.Power')
 93 | require('nn.Square')
 94 | require('nn.Sqrt')
 95 | require('nn.HardShrink')
 96 | require('nn.SoftShrink')
 97 | require('nn.Threshold')
 98 | require('nn.Maxout')
 99 | require('nn.ReLU')
100 | require('nn.ReLU6')
101 | require('nn.PReLU')
102 | require('nn.CReLU')
103 | require('nn.LeakyReLU')
104 | require('nn.SpatialSoftMax')
105 | require('nn.SpatialLogSoftMax')
106 | require('nn.RReLU')
107 | require('nn.ELU')
108 | require('nn.GatedLinearUnit')
109 | 
110 | require('nn.LookupTable')
111 | require('nn.SpatialConvolution')
112 | require('nn.SpatialConvolutionLocal')
113 | require('nn.SpatialFullConvolution')
114 | require('nn.SpatialFullConvolutionMap')
115 | require('nn.SpatialConvolutionMM')
116 | require('nn.SpatialDepthWiseConvolution')
117 | require('nn.SpatialConvolutionMap')
118 | require('nn.SpatialDilatedConvolution')
119 | require('nn.SpatialSubSampling')
120 | require('nn.SpatialMaxPooling')
121 | require('nn.SpatialDilatedMaxPooling')
122 | require('nn.SpatialMaxUnpooling')
123 | require('nn.SpatialFractionalMaxPooling')
124 | require('nn.SpatialLPPooling')
125 | require('nn.SpatialAveragePooling')
126 | require('nn.SpatialAdaptiveMaxPooling')
127 | require('nn.SpatialAdaptiveAveragePooling')
128 | require('nn.TemporalConvolution')
129 | require('nn.TemporalSubSampling')
130 | require('nn.TemporalMaxPooling')
131 | require('nn.TemporalDynamicKMaxPooling')
132 | require('nn.TemporalRowConvolution')
133 | require('nn.SpatialSubtractiveNormalization')
134 | require('nn.SpatialDivisiveNormalization')
135 | require('nn.SpatialContrastiveNormalization')
136 | require('nn.SpatialCrossMapLRN')
137 | require('nn.SpatialZeroPadding')
138 | require('nn.SpatialReflectionPadding')
139 | require('nn.SpatialReplicationPadding')
140 | require('nn.SpatialUpSamplingNearest')
141 | require('nn.SpatialUpSamplingBilinear')
142 | require('nn.SpatialBatchNormalization')
143 | 
144 | require('nn.VolumetricConvolution')
145 | require('nn.VolumetricFullConvolution')
146 | require('nn.VolumetricDilatedConvolution')
147 | require('nn.VolumetricMaxPooling')
148 | require('nn.VolumetricDilatedMaxPooling')
149 | require('nn.VolumetricFractionalMaxPooling')
150 | require('nn.VolumetricMaxUnpooling')
151 | require('nn.VolumetricAveragePooling')
152 | require('nn.VolumetricBatchNormalization')
153 | require('nn.VolumetricReplicationPadding')
154 | 
155 | require('nn.GPU')
156 | 
157 | require('nn.ParallelTable')
158 | require('nn.Identity')
159 | require('nn.ConcatTable')
160 | require('nn.SplitTable')
161 | require('nn.JoinTable')
162 | require('nn.SelectTable')
163 | require('nn.MixtureTable')
164 | require('nn.CriterionTable')
165 | require('nn.FlattenTable')
166 | require('nn.NarrowTable')
167 | require('nn.MapTable')
168 | 
169 | require('nn.Criterion')
170 | require('nn.MSECriterion')
171 | require('nn.SpatialAutoCropMSECriterion')
172 | require('nn.SmoothL1Criterion')
173 | require('nn.MarginCriterion')
174 | require('nn.SoftMarginCriterion')
175 | require('nn.AbsCriterion')
176 | require('nn.ClassNLLCriterion')
177 | require('nn.SoftClassNLLCriterion')
178 | require('nn.SpatialClassNLLCriterion')
179 | require('nn.ClassSimplexCriterion')
180 | require('nn.DistKLDivCriterion')
181 | require('nn.MultiCriterion')
182 | require('nn.L1HingeEmbeddingCriterion')
183 | require('nn.HingeEmbeddingCriterion')
184 | require('nn.CosineEmbeddingCriterion')
185 | require('nn.MarginRankingCriterion')
186 | require('nn.MultiMarginCriterion')
187 | require('nn.MultiLabelMarginCriterion')
188 | require('nn.MultiLabelSoftMarginCriterion')
189 | require('nn.L1Cost')
190 | require('nn.L1Penalty')
191 | require('nn.WeightedMSECriterion')
192 | require('nn.BCECriterion')
193 | require('nn.CrossEntropyCriterion')
194 | require('nn.ParallelCriterion')
195 | require('nn.DistanceRatioCriterion')
196 | 
197 | require('nn.PixelShuffle')
198 | 
199 | require('nn.StochasticGradient')
200 | 
201 | require('nn.MM')
202 | require('nn.MV')
203 | 
204 | require('nn.Jacobian')
205 | require('nn.SparseJacobian')
206 | require('nn.hessian')
207 | require('nn.test')
208 | 
209 | 
210 | return nn
211 | 


--------------------------------------------------------------------------------
/utils/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains utility code. The most important are the `fastText.hash` and `glove.hash` which are `tds.Hash` mapping each word in the whole vqa2 dataset to a [fastText](https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md) and to a [glove](http://nlp.stanford.edu/data/glove.840B.300d.zip) vector.
 2 | 
 3 | 
 4 | Folders `extract_fastText` and `extract_glove` contain the scripts for generating these hashes from the raw vector files (downloaded from the links above).
 5 | 
 6 | This directory should contain:
 7 | 
 8 | ```
 9 | ../utils/
10 | |-- extract_fastText
11 | |   |-- vocab_to_fastText.lua
12 | |   `-- wiki.en.vec
13 | |-- extract_glove
14 | |   |-- glove.840B.300d.txt
15 | |   `-- vocab_to_glove.lua
16 | |-- fastText.hash
17 | |-- glove.hash
18 | |-- logger.lua
19 | |-- README.md
20 | |-- repl.lua
21 | `-- util.lua
22 | 
23 | 2 directories, 10 files
24 | 
25 | ```
26 | 
27 | Note that you can omit the `extract*` folders if using the pregenerated word maps.
28 | 


--------------------------------------------------------------------------------
/utils/extract_fastText/vocab_to_fastText.lua:
--------------------------------------------------------------------------------
  1 | -- debugger is a global variable so it can be accessed from everywhere
  2 | _, debugger = pcall(require,'fb.debugger') 
  3 | local tds = require'tds'
  4 | local util = require'../util.lua'
  5 | local logger = require'../logger.lua'
  6 | 
  7 | local opt = {
  8 |    train_que   = '../../vqa2_data/v2_OpenEnded_mscoco_train2014_questions.json',
  9 |    val_que     = '../../vqa2_data/v2_OpenEnded_mscoco_val2014_questions.json',
 10 |    testdev_que = '../../vqa2_data/v2_OpenEnded_mscoco_test-dev2015_questions.json',
 11 |    test_que    = '../../vqa2_data/v2_OpenEnded_mscoco_test2015_questions.json',
 12 | 
 13 |    word2vec    = './wiki.en.vec',
 14 |    vec_len     = 300,
 15 |    nvec        = 2519371,
 16 | 
 17 |    outhash     = '../fastText.hash'
 18 | }
 19 | 
 20 | for k, v in pairs(opt) do 
 21 |    opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] 
 22 | end
 23 | print(opt)
 24 | 
 25 | 
 26 | 
 27 | local build_vocab = function()
 28 |    logger.info('Building vocabulary out of all question words...')
 29 |    local vocab = tds.Hash()
 30 |    local jsons = {opt.train_que, opt.val_que, opt.testdev_que, opt.test_que}
 31 | 
 32 |    for i=1, #jsons do 
 33 |       logger.info('Processing '..jsons[i])
 34 |       local que_hash = util.json_to_hash(jsons[i], 'questions')
 35 |       for j=1, #que_hash do
 36 |          if j%100 == 0 then xlua.progress(j,#que_hash) end
 37 |          local question = que_hash[j]['question']
 38 |          local words = util.preprocess_string(question):split(' ') 
 39 |          for k=1, #words do
 40 |             if not vocab[words[k]] then
 41 |                vocab[words[k]] = #vocab + 1
 42 |             end
 43 |          end
 44 |       end
 45 |       xlua.progress(#que_hash, #que_hash)
 46 |    end
 47 | 
 48 |    logger.info('Done. Vocabulary size is '..#vocab)
 49 | 
 50 |    return vocab
 51 | end
 52 | 
 53 | vocab = build_vocab()
 54 | local lookup = torch.FloatTensor(#vocab, opt.vec_len)
 55 | 
 56 | local UNK
 57 | local done = 0
 58 | local buffsz = 2^13 -- == 8k
 59 | local f = io.input(opt.word2vec)
 60 | logger.info('Extracting word2vec vectors')
 61 | while true do -- breaks when no more lines
 62 |     local lines, leftover = f:read(buffsz, '*line')
 63 |     if not lines then break end  -- no more lines
 64 |     if leftover then lines = lines .. leftover .. '\n' end -- join the leftover
 65 |     lines = lines:split('\n')
 66 | 
 67 |     for i=1, #lines do
 68 |         if done % 1000 == 0 then xlua.progress(done, opt.nvec) end
 69 |         local line = lines[i]:split(' ')
 70 |         local word = line[1]
 71 |         table.remove(line, 1) -- remove the word
 72 |         if word == 'unk' then 
 73 |             UNK = torch.FloatTensor(line) 
 74 |         else
 75 |             local index = vocab[word]
 76 |             if index then
 77 |                vocab[word] = torch.FloatTensor(line)
 78 |             end
 79 |         end
 80 |         done = done + 1
 81 |     end
 82 | end
 83 | xlua.progress(done, opt.nvec)
 84 | f:close()
 85 | 
 86 | local unks = 0
 87 | for word,index in pairs(vocab) do
 88 |    if type(index) == 'number' then
 89 |       logger.debug('No word2vec vector for ' .. word)
 90 |       vocab[word] = UNK
 91 |       unks = unks + 1
 92 |    end
 93 | end
 94 | 
 95 | vocab['UNK'] = UNK
 96 | 
 97 | torch.save(opt.outhash, vocab)
 98 | logger.info('Words in vocab '.. #vocab)
 99 | logger.info('Words set to UNK vector '.. unks)
100 | 


--------------------------------------------------------------------------------
/utils/extract_glove/vocab_to_glove.lua:
--------------------------------------------------------------------------------
  1 | -- debugger is a global variable so it can be accessed from everywhere
  2 | _, debugger = pcall(require,'fb.debugger') 
  3 | local tds = require'tds'
  4 | local util = require'../util.lua'
  5 | local logger = require'../logger.lua'
  6 | 
  7 | local opt = {
  8 |    train_que   = '../../vqa2_data/v2_OpenEnded_mscoco_train2014_questions.json',
  9 |    val_que     = '../../vqa2_data/v2_OpenEnded_mscoco_val2014_questions.json',
 10 |    testdev_que = '../../vqa2_data/v2_OpenEnded_mscoco_test-dev2015_questions.json',
 11 |    test_que    = '../../vqa2_data/v2_OpenEnded_mscoco_test2015_questions.json',
 12 | 
 13 |    word2vec    = './glove.840B.300d.txt',
 14 |    vec_len     = 300,
 15 |    nvec        = 2196017,
 16 | 
 17 |    outhash     = '../glove.hash'
 18 | }
 19 | 
 20 | for k, v in pairs(opt) do 
 21 |    opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] 
 22 | end
 23 | print(opt)
 24 | 
 25 | 
 26 | 
 27 | local build_vocab = function()
 28 |    logger.info('Building vocabulary out of all question words...')
 29 |    local vocab = tds.Hash()
 30 |    local jsons = {opt.train_que, opt.val_que, opt.testdev_que, opt.test_que}
 31 | 
 32 |    for i=1, #jsons do 
 33 |       logger.info('Processing '..jsons[i])
 34 |       local que_hash = util.json_to_hash(jsons[i], 'questions')
 35 |       for j=1, #que_hash do
 36 |          if j%100 == 0 then xlua.progress(j,#que_hash) end
 37 |          local question = que_hash[j]['question']
 38 |          local words = util.preprocess_string(question):split(' ') 
 39 |          for k=1, #words do
 40 |             if not vocab[words[k]] then
 41 |                vocab[words[k]] = #vocab + 1
 42 |             end
 43 |          end
 44 |       end
 45 |       xlua.progress(#que_hash, #que_hash)
 46 |    end
 47 | 
 48 |    logger.info('Done. Vocabulary size is '..#vocab)
 49 | 
 50 |    return vocab
 51 | end
 52 | 
 53 | vocab = build_vocab()
 54 | local lookup = torch.FloatTensor(#vocab, opt.vec_len)
 55 | 
 56 | local UNK
 57 | local done = 0
 58 | local buffsz = 2^13 -- == 8k
 59 | local f = io.input(opt.word2vec)
 60 | logger.info('Extracting word2vec vectors')
 61 | while true do -- breaks when no more lines
 62 |     local lines, leftover = f:read(buffsz, '*line')
 63 |     if not lines then break end  -- no more lines
 64 |     if leftover then lines = lines .. leftover .. '\n' end -- join the leftover
 65 |     lines = lines:split('\n')
 66 | 
 67 |     for i=1, #lines do
 68 |         if done % 1000 == 0 then xlua.progress(done, opt.nvec) end
 69 |         local line = lines[i]:split(' ')
 70 |         local word = line[1]
 71 |         table.remove(line, 1) -- remove the word
 72 |         if word == 'UNK' then 
 73 |             UNK = torch.FloatTensor(line) 
 74 |         else
 75 |             local index = vocab[word]
 76 |             if index then
 77 |                vocab[word] = torch.FloatTensor(line)
 78 |             end
 79 |         end
 80 |         done = done + 1
 81 |     end
 82 | end
 83 | xlua.progress(done, opt.nvec)
 84 | f:close()
 85 | 
 86 | local unks = 0
 87 | for word,index in pairs(vocab) do
 88 |    if type(index) == 'number' then
 89 |       logger.debug('No word2vec vector for ' .. word)
 90 |       vocab[word] = UNK
 91 |       unks = unks + 1
 92 |    end
 93 | end
 94 | 
 95 | vocab['UNK'] = UNK
 96 | 
 97 | torch.save(opt.outhash, vocab)
 98 | logger.info('Words in vocab '.. #vocab)
 99 | logger.info('Words set to UNK vector '.. unks)
100 | 


--------------------------------------------------------------------------------
/utils/fastText.hash:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/utils/fastText.hash


--------------------------------------------------------------------------------
/utils/glove.hash:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ili3p/vqa-soft/c28c3414673adf80620f08e713274d8aed2edfea/utils/glove.hash


--------------------------------------------------------------------------------
/utils/logger.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | -- M.lua
  3 | --
  4 | -- Copyright (c) 2016 rxi
  5 | --
  6 | -- This library is free software; you can redistribute it and/or modify it
  7 | -- under the terms of the MIT license. See LICENSE for details.
  8 | --
  9 | 
 10 | local M = { _version = "0.1.0" }
 11 | 
 12 | M.usecolor = true
 13 | M.outfile = nil
 14 | M.level = "trace"
 15 | M.logToConsole = true
 16 | 
 17 | 
 18 | local modes = {
 19 |   { name = "trace", color = "\27[34m", },
 20 |   { name = "debug", color = "\27[36m", },
 21 |   { name = "info",  color = "\27[32m", },
 22 |   { name = "warn",  color = "\27[33m", },
 23 |   { name = "error", color = "\27[31m", },
 24 |   { name = "fatal", color = "\27[30m", },
 25 | }
 26 | 
 27 | 
 28 | local levels = {}
 29 | for i, v in ipairs(modes) do
 30 |   levels[v.name] = i
 31 | end
 32 | 
 33 | 
 34 | local round = function(x, increment)
 35 |   increment = increment or 1
 36 |   x = x / increment
 37 |   return (x > 0 and math.floor(x + .5) or math.ceil(x - .5)) * increment
 38 | end
 39 | 
 40 | local table_to_str
 41 | 
 42 | -- taken from http://lua-users.org/wiki/TableUtils
 43 | local val_to_str = function( v )
 44 |   if "string" == type( v ) then
 45 |     v = string.gsub( v, "\n", "\\n" )
 46 |      if string.match( string.gsub(v,"[^'\"]",""), '^"+$' ) then
 47 |       return " .. v .. "
 48 |     end
 49 |     return '"' .. string.gsub(v,'"', '\\"' ) .. '"'
 50 |   else
 51 |     return "table" == type( v ) and table_to_str( v ) or
 52 |       tostring( v )
 53 |   end
 54 | end
 55 | 
 56 | -- taken from http://lua-users.org/wiki/TableUtils
 57 | local key_to_str = function(k)
 58 |   if "string" == type( k ) and string.match( k, "^[_%a][_%a%d]*$" ) then
 59 |     return k
 60 |   else
 61 |     return "[" .. val_to_str( k ) .. "]"
 62 |   end
 63 | end
 64 | local spairs = function (t, order)
 65 |    -- collect the keys
 66 |    local keys = {}
 67 |    for k in pairs(t) do keys[#keys+1] = k end
 68 | 
 69 |    -- if order function given, sort by it by passing the table and keys a, b,
 70 |    -- otherwise just sort the keys
 71 |    if order then
 72 |       table.sort(keys, function(a,b) return order(t, a, b) end)
 73 |    else
 74 |       table.sort(keys)
 75 |    end
 76 | 
 77 |    -- return the iterator function
 78 |    local i = 0
 79 |    return function()
 80 |       i = i + 1
 81 |       if keys[i] then
 82 |          return keys[i], t[keys[i]]
 83 |       end
 84 |    end
 85 | end
 86 | 
 87 | -- taken from http://lua-users.org/wiki/TableUtils
 88 | table_to_str = function(tbl)
 89 |   local result, done = {}, {}
 90 |   -- for k, v in spairs(tbl) do
 91 |   --   table.insert( result, val_to_str(v) )
 92 |   --   done[ k ] = true
 93 |   -- end
 94 |   for k, v in spairs( tbl ) do
 95 |     if not done[ k ] then
 96 |       table.insert( result,
 97 |       '\t'..modes[1].color.. key_to_str( k ) .. " \27[0m : " .. val_to_str( v ) )
 98 |     end
 99 |   end
100 |   return "{\n" .. table.concat( result, ", \n" ) .. "}"
101 | end
102 | 
103 | 
104 | 
105 | -- local _tostring = tostring
106 | 
107 | local printstr = function(...)
108 | 
109 |   local t = {}
110 |   for i = 1, select('#', ...) do
111 |     local x = select(i, ...)
112 |     if type(x) == "number" then
113 |       x = round(x, .01)
114 |     elseif type(x) == "table" then
115 |       x = table_to_str(x)
116 |     end
117 |     t[#t + 1] = tostring(x)
118 |   end
119 |   return table.concat(t, " ")
120 | end
121 | 
122 | 
123 | for i, x in ipairs(modes) do
124 |   local nameupper = x.name:upper()
125 | 
126 |   M[x.name] = function(...)
127 |     
128 |     if i >= levels[M.level] and M.outfile then    
129 |        local msg = {...}
130 |        if #msg > 1 then
131 |           msg = msg[1] .. ' in '.. string.format('%d', msg[2]*1e3) ..'ms'
132 |        else
133 |           msg = ...
134 |        end
135 | 
136 | 
137 |        local info = debug.getinfo(2, "Sl")
138 |        local lineinfo = info.short_src .. ":" .. info.currentline
139 | 
140 |       local fp = io.open(M.outfile, "a")
141 |       local str = string.format("[%-6s%s]%s: %s\n",
142 |                                 nameupper, os.date(), lineinfo, printstr(msg))
143 |       fp:write(str)
144 |       fp:close()
145 |     end
146 | 
147 |     -- Output to console
148 |     if i >= levels[M.level] and M.logToConsole then
149 |       local msg = {...}
150 |       if #msg > 1 then
151 |          msg = msg[1] .. ' in '.. string.format('%d', msg[2]*1e3) ..'ms'
152 |       else
153 |          msg = ...
154 |       end
155 | 
156 | 
157 |       local info = debug.getinfo(2, "Sl")
158 |       local lineinfo = info.short_src .. ":" .. info.currentline
159 | 
160 |       print(string.format("%s[%-6s%s]%s: %s %s %s",
161 |                         M.usecolor and x.color or "",
162 |                         nameupper,
163 |                         os.date("%H:%M:%S"),
164 |                         lineinfo,
165 |                         M.usecolor and "\27[1;30m" or "",
166 |                         printstr(msg),
167 |                         M.usecolor and "\27[0m" or ""
168 |                         ))
169 |     end
170 | 
171 | 
172 |   end
173 | end
174 | 
175 | 
176 | local init = function(opt)
177 |    paths.mkdir(opt.log_dir..'/'..opt.version..'/')
178 |    if opt.log_to_file then 
179 |       M.outfile=opt.log_dir .. '/' .. opt.version ..'/'.. 'log_'.. os.date("_%Y%m%d_%H%M%S")..'.log'
180 |    end
181 |    M.logToConsole = opt.log_to_console
182 |    M.level = modes[opt.log_level].name
183 | end
184 | 
185 | M.init = init
186 | 
187 | return M
188 | 


--------------------------------------------------------------------------------
/utils/repl.lua:
--------------------------------------------------------------------------------
 1 | require "trepl"
 2 | 
 3 | function debugRepl(restoreGlobals)
 4 |   restoreGlobals = restoreGlobals or false
 5 | 
 6 |   -- optionally make a shallow copy of _G
 7 |   local oldG = {}
 8 |   if restoreGlobals then
 9 |     for k, v in pairs(_G) do
10 |       oldG[k] = v
11 |     end
12 |   end
13 | 
14 |   -- copy upvalues to _G
15 |   local i = 1
16 |   local func = debug.getinfo(2, "f").func
17 |   while true do
18 |     local k, v = debug.getupvalue(func, i)
19 |     if k ~= nil then
20 |       _G[k] = v
21 |     else
22 |       break
23 |     end
24 |     i = i + 1
25 |   end
26 | 
27 |   -- copy locals to _G
28 |   local i = 1
29 |   while true do
30 |     local k, v = debug.getlocal(2, i)
31 |     if k ~= nil then
32 |       _G[k] = v
33 |     else
34 |       break
35 |     end
36 |     i = i + 1
37 |   end
38 | 
39 |   repl()
40 | 
41 |   if restoreGlobals then
42 |     _G = oldG
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/utils/util.lua:
--------------------------------------------------------------------------------
  1 | local M = {}
  2 | 
  3 | M.len = function(tbl)
  4 |    local c = 0
  5 | 
  6 |    for _,_ in pairs(tbl) do
  7 |       c = c + 1
  8 |    end
  9 | 
 10 |    return c
 11 | end
 12 | M.set = function(hash, key, obj)
 13 |    tds = require'tds'
 14 |    hash[key] = tds.Hash()
 15 |    for k, v in pairs(obj) do
 16 |       if type(v) == 'table' then
 17 |          M.set(hash[key], k, v)
 18 |       else
 19 |          hash[key][k] = v
 20 |       end
 21 |    end
 22 | end
 23 | M.keys = function(tbl)
 24 |     local elems = {}
 25 |     for k, _ in pairs(tbl) do
 26 |         table.insert(elems, k)
 27 |     end
 28 | 
 29 |     return elems
 30 | end
 31 | 
 32 | M.file_as_string = function(fn)
 33 |     local f = io.open(fn, 'r')
 34 |     local str = f:read()
 35 |     f:close()
 36 | 
 37 |     return str
 38 | end
 39 | 
 40 | M.range = function(n)
 41 |    local r = {}
 42 |    for i=1,n do
 43 |       table.insert(r,i)
 44 |    end
 45 | 
 46 |    return r
 47 | end
 48 | 
 49 | M.hash2tbl = function(hash)
 50 |    local tbl = {}
 51 |    for k,v in pairs(hash) do
 52 |       tbl[k] = v
 53 |    end
 54 |    
 55 |    return tbl
 56 | end
 57 | 
 58 | M.tableinvert = function(tbl)
 59 |    local inv = {}
 60 |    for k,v in pairs(tbl) do
 61 |       if inv[v] then print('Error:','Values are not unique ('..v..')') end
 62 |       inv[v] = k
 63 |    end
 64 |    return inv
 65 | end
 66 | 
 67 | M.word_ids_to_word = function(id2word, wordTens)
 68 |    local words = {}
 69 |    for i=1,wordTens:size(1) do
 70 |       if wordTens[i] ~=  0 then
 71 |          table.insert(words, id2word[wordTens[i]])
 72 |          table.insert(words, ' ')
 73 |       end
 74 |    end
 75 | 
 76 |    return table.concat(words)
 77 | end
 78 | 
 79 | 
 80 | M.spairs = function (t, order)
 81 |    -- collect the keys
 82 |    local keys = {}
 83 |    for k in pairs(t) do keys[#keys+1] = k end
 84 | 
 85 |    -- if order function given, sort by it by passing the table and keys a, b,
 86 |    -- otherwise just sort the keys
 87 |    if order then
 88 |       table.sort(keys, function(a,b) return order(t, a, b) end)
 89 |    else
 90 |       table.sort(keys)
 91 |    end
 92 | 
 93 |    -- return the iterator function
 94 |    local i = 0
 95 |    return function()
 96 |       i = i + 1
 97 |       if keys[i] then
 98 |          return keys[i], t[keys[i]]
 99 |       end
100 |    end
101 | end
102 | 
103 | M.printsorted = function(t)
104 |    for k,v in M.spairs(t) do
105 |       -- TODO
106 |       print(' ',k,v)
107 |    end
108 | end
109 | 
110 | 
111 | M.plotter = function(plot,title, legend)
112 |    -- local colors = {'#FF0000','#00FF00','#0000FF'}
113 |    local plot = plot
114 |    local title = title
115 |    local legend = legend
116 |    local inv = M.tableinvert(legend)
117 |    local n = #legend
118 |    local ybuff = torch.DoubleTensor(2, n)
119 |    local xbuff = torch.DoubleTensor(1, 2)
120 |    local t = 0
121 |    local id 
122 |    return {
123 |       plot = function(name, x, y)
124 |          if id then
125 |             plot:updateTrace{
126 |                win      = id, 
127 |                name     = name,
128 |                X        = torch.DoubleTensor{x},
129 |                Y        = torch.DoubleTensor{y},
130 |                append   = true,
131 |                options  = {
132 |                   title = 'Updated at: ' .. os.date(" %H:%M:%S"),
133 |                },
134 |             }
135 |          else
136 |             t = t + 1
137 |             ybuff[math.ceil(t/n)][inv[name]] = y
138 |             xbuff[1][math.ceil(t/n)] = x
139 |             if t == 2*n then
140 |                id = plot:line{
141 |                   -- X        = xbuff:view(1,2):repeatTensor(2,1),
142 |                   Y        = ybuff:view(2,n),
143 |                   options  = {
144 |                      legend      = legend, 
145 |                      title       = title, 
146 |                      -- markercolor = colors,
147 |                   },
148 |                }
149 |                t = nil
150 |                xbuff = nil
151 |                ybuff = nil
152 |             end
153 |          end
154 |       end
155 |    }
156 | end
157 |          
158 | 
159 | 
160 | M.load_json = function(fn)
161 |    local cjson = require'cjson'
162 | 
163 |    local file = io.open(fn, 'r')
164 |    local jsondata = cjson.decode(file:read())
165 |    file:close()
166 | 
167 |    return jsondata
168 | end
169 | 
170 | M.json_to_hash2 = function(fn )
171 |    local tds = require'tds'
172 | 
173 |    local jsondata = M.load_json(fn)
174 | 
175 |    local hash = tds.Hash() 
176 |    for k,v in pairs(jsondata) do
177 |       hash[k] = tds.Hash()
178 |       for i=1,#v do
179 |          hash[k][i] = v[i]
180 |       end
181 |    end
182 | 
183 |    return hash
184 | end
185 |    
186 | M.json_to_hash = function(fn, key, _hash)
187 |    local tds = require'tds'
188 | 
189 |    local jsondata = M.load_json(fn)[key]
190 | 
191 |    local hash = _hash and _hash or tds.Hash() 
192 |    for i=1,#jsondata do
193 |       local index = #hash+1
194 |       hash[index] = tds.Hash()
195 |       for k,v in pairs(jsondata[i]) do
196 |          hash[index][k] = v
197 |       end
198 |    end
199 | 
200 |    return hash
201 | end
202 | 
203 | M.preprocess_string = function(str)
204 |     str = str:lower():gsub('"', ''):gsub('\'s',''):gsub('@',' ')
205 |     str = str:gsub('%w+%.com', 'url'):gsub('&',' ')
206 |     str = str:gsub('%(',''):gsub('%)',''):gsub('\'',''):gsub('#',' ')
207 |     str = str:gsub('!',' '):gsub(',',' '):gsub('-',' '):gsub('`',' ')
208 |     str = str:gsub('%$', 'dollar '):gsub('/',' '):gsub('%.',' '):gsub(';',' ')
209 |     str = str:gsub('>',' '):gsub(':',' ')
210 |     str = str:gsub('_',' '):gsub('%*',''):gsub('?',' ')
211 |     str = str:gsub('%d+',' digit ')
212 |     str = str:gsub(' +',' ')
213 | 
214 |     return str
215 | end
216 | 
217 | return M
218 | 


--------------------------------------------------------------------------------
/vqa2_data/README.md:
--------------------------------------------------------------------------------
 1 | Download and extract the VQAv2 data from:
 2 | 
 3 | * [Train questions](http://visualqa.org/data/mscoco/vqa/v2_Questions_Train_mscoco.zip)
 4 | * [Val questions](http://visualqa.org/data/mscoco/vqa/v2_Questions_Val_mscoco.zip)
 5 | * [Test questions](http://visualqa.org/data/mscoco/vqa/v2_Questions_Test_mscoco.zip)
 6 | * [Train answers](http://visualqa.org/data/mscoco/vqa/v2_Annotations_Train_mscoco.zip)
 7 | * [Val answers](http://visualqa.org/data/mscoco/vqa/v2_Annotations_Val_mscoco.zip) 
 8 | 
 9 | When done the directory should contain:
10 | ```
11 | ../vqa2_data/
12 | |-- README.md
13 | |-- v2_mscoco_train2014_annotations.json
14 | |-- v2_mscoco_train2014_complementary_pairs.json
15 | |-- v2_mscoco_val2014_annotations.json
16 | |-- v2_mscoco_val2014_complementary_pairs.json
17 | |-- v2_OpenEnded_mscoco_test2015_questions.json
18 | |-- v2_OpenEnded_mscoco_test-dev2015_questions.json
19 | |-- v2_OpenEnded_mscoco_train2014_questions.json
20 | `-- v2_OpenEnded_mscoco_val2014_questions.json
21 | 
22 | 0 directories, 9 files
23 | 
24 | ```
25 | 


--------------------------------------------------------------------------------