├── models
    ├── classifier.lua
    ├── pairwisecosine_GEfix.lua
    ├── motion_embedding.lua
    ├── motion_row.lua
    └── image_row.lua
├── LICENSE
├── main.lua
├── data.lua
├── README.md
├── layers
    └── SmoothPairwiseCosineSimilarity.lua
├── io
    ├── compute_mean_std.lua
    ├── readGEFiles.lua
    ├── readBatch.lua
    └── readFiles.lua
├── train_functions.lua
├── setting_options.lua
├── networks
    └── ModelConstruction_IM_GEFixParallel.lua
└── utils.lua


/models/classifier.lua:
--------------------------------------------------------------------------------
1 | return nn.Sequential():add(nn.Linear(4096,config.nClasses)):add(nn.LogSoftMax())


--------------------------------------------------------------------------------
/models/pairwisecosine_GEfix.lua:
--------------------------------------------------------------------------------
1 | local mlp = nn.Sequential();
2 | 
3 | mlp:add(nn.SmoothPairwiseCosineSimilarity());
4 | mlp:add(nn.Reshape(config.batchSize,config.nClasses,10,false));
5 | mlp:add(nn.Exp());
6 | mlp:add(nn.Sum(3));
7 | 
8 | mlp:add(nn:LogSoftMax())
9 | return mlp;


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | N3: Newtonian Image Understanding.
 2 | 
 3 | All rights reserved.
 4 | 
 5 | MIT License
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a
 8 | copy of this software and associated documentation files (the "Software"),
 9 | to deal in the Software without restriction, including without limitation
10 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 | and/or sell copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included
15 | in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | OTHER DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/models/motion_embedding.lua:
--------------------------------------------------------------------------------
 1 | local mlp=nn.Sequential()
 2 | 
 3 | mlp:add(nn.Reshape(config.nCategories,1,10,4096))
 4 | 
 5 | arg[6] = "10*4096FC_1_bn"
 6 | 
 7 | local m, var, bnorm = arg[6]:match("([^_]+)_([^_]+)_([^_]+)")
 8 | 
 9 | if m=="2*convolve" then
10 | 	mlp:add(cudnn.SpatialConvolution(1,10,1001,1,1,1,500,0))
11 | 	mlp:add(nn.ReLU(true))
12 | 	mlp:add(cudnn.SpatialConvolution(10,20,1,7,1,1,0,3))
13 | elseif m=="10*4096FC" then
14 | 	mlp:add(nn.Reshape(config.nCategories*10,4096,false))
15 | 	mlp:add(nn.Linear(4096,4096))
16 | 	if var == "2" then
17 | 		mlp:add(nn.ReLU(true))
18 | 		mlp:add(nn.Linear(4096,4096))
19 | 	end
20 | 	mlp:add(nn.Reshape(config.nCategories,1,10,4096,false))
21 | elseif m=="4096*10FC" then
22 | 	mlp:add(nn.Transpose{3,4})
23 | 	mlp:add(nn.Reshape(config.nClasses*4096, 10, false))
24 | 	mlp:add(nn.Linear(10,10))
25 | 	if var == "2" then
26 | 		mlp:add(nn.ReLU(true))
27 | 		mlp:add(nn.Linear(10,10))
28 | 	end
29 | 	mlp:add(nn.Reshape(config.nClasses, 1, 4096, 10, false))
30 | 	mlp:add(nn.Transpose{3,4})
31 | else
32 | 	mlp:add(cudnn.SpatialConvolution(1,20,1,7,1,1,0,3))
33 | end
34 | 
35 | mlp:add(nn.Max(2))
36 | mlp:add(nn.ReLU(true))
37 | mlp:add(nn.Reshape(config.nCategories*10,4096,false))
38 | 
39 | if bnorm == "bn" then
40 | 	mlp:add(nn.BatchNormalization(4096, 1e-3))
41 | end
42 | 
43 | return mlp;


--------------------------------------------------------------------------------
/models/motion_row.lua:
--------------------------------------------------------------------------------
 1 | local C3D = nn.Sequential()
 2 | 
 3 | --------------------- Convolutional Layers ------------------
 4 | ----------------------- 1st layer group ---------------------
 5 | C3D:add(cudnn.VolumetricConvolution(10,64,3,3,3,1,1,1,1,1,1))      -- Conv1a
 6 | C3D:add(cudnn.ReLU(true))
 7 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
 8 | ----------------------- 2nd layer group ---------------------
 9 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1))    -- Conv2a
10 | C3D:add(cudnn.ReLU(true))
11 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
12 | ----------------------- 3rd layer group ---------------------
13 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1))   -- Conv3a
14 | C3D:add(cudnn.ReLU(true))
15 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
16 | ------------------------4th layer group-------------------------------
17 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1))   -- Conv3b
18 | C3D:add(cudnn.ReLU(true))
19 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
20 | ----------------------- 5th layer group ---------------------
21 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1))   -- Conv4a
22 | C3D:add(cudnn.ReLU(true))
23 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
24 | 
25 | C3D:add(nn.Max(3))
26 | C3D:add(nn.View(64*8*8))     
27 | C3D:add(cudnn.ReLU(true)) 
28 | C3D:add(nn.Dropout(0.5))
29 | 
30 | return C3D
31 | 


--------------------------------------------------------------------------------
/main.lua:
--------------------------------------------------------------------------------
 1 | -- Usage th main.lua {train|test}
 2 | 
 3 | mode = arg[1]
 4 | assert (mode=='train' or mode=='test', "Bad arguments. Usage th main.lua {train|test}")
 5 | 
 6 | require 'cunn'
 7 | -- require 'fbcunn'
 8 | require 'cudnn'
 9 | require 'xlua'
10 | require 'optim'
11 | require 'math'
12 | require 'gnuplot'
13 | require 'sys'
14 | require 'image'
15 | 
16 | mattorch = require('fb.mattorch');
17 | pl = require'pl.import_into'()
18 | debugger = require('fb.debugger');
19 | 
20 | -- fix the random seed for ease of debugging
21 | paths.dofile('setting_options.lua');
22 | cutorch.setDevice(config.GPU);
23 | torch.manualSeed(config.GPU);
24 | ----------------------------
25 | paths.dofile('utils.lua');
26 | ----------------------------
27 | paths.dofile('data.lua');
28 | ----------------------------------
29 | paths.dofile('layers/SmoothPairwiseCosineSimilarity.lua');
30 | -----------------------------
31 | paths.dofile('networks/ModelConstruction_IM_GEFixParallel.lua');
32 | --------------------------------
33 | paths.dofile('train_functions.lua');
34 | ------------------------------
35 | log(config)
36 | 
37 | if mode == 'test' then
38 | 	config.nIter = GetVideoCount(testset)
39 | 	model:LoadModelFull(config.initModelPath.fullNN)
40 | 	log(model.fullNN)
41 | 	test()
42 | else
43 | 	model:LoadModel(config.initModelPath.imageNN,config.initModelPath.animNN)
44 | 	log(model.fullNN)
45 | 	train()
46 | end
47 | 


--------------------------------------------------------------------------------
/models/image_row.lua:
--------------------------------------------------------------------------------
 1 | require 'cudnn'
 2 | require 'cunn'
 3 | local alexnet = nn.Sequential()
 4 | require 'inn'
 5 | 
 6 | local input_channels = 3
 7 | if (mode == 'train' and config.train.mask.enable)
 8 |    or (mode == 'test' and config.test.mask.enable) then
 9 | 	input_channels = input_channels + 1
10 | end
11 | 
12 | alexnet:add(cudnn.SpatialConvolution(input_channels, 96, 11, 11, 4, 4, 0, 0, 1));
13 | alexnet:add(cudnn.ReLU(true))
14 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
15 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
16 | alexnet:add(cudnn.SpatialConvolution(96, 256, 5, 5, 1, 1, 2, 2, 2))
17 | alexnet:add(cudnn.ReLU(true))
18 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
19 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
20 | alexnet:add(cudnn.SpatialConvolution(256, 384, 3, 3, 1, 1, 1, 1, 1))
21 | alexnet:add(cudnn.ReLU(true))
22 | alexnet:add(cudnn.SpatialConvolution(384, 384, 3, 3, 1, 1, 1, 1, 2))
23 | alexnet:add(cudnn.ReLU(true))
24 | alexnet:add(cudnn.SpatialConvolution(384, 256, 3, 3, 1, 1, 1, 1, 2))
25 | alexnet:add(cudnn.ReLU(true))
26 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
27 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
28 | alexnet:add(nn.View(-1):setNumInputDims(3))
29 | alexnet:add(nn.Linear(9216, 4096))
30 | alexnet:add(cudnn.ReLU(true))
31 | alexnet:add(nn.Dropout(config.dropoutProb))
32 | alexnet:add(nn.Linear(4096, 4096))
33 | alexnet:add(cudnn.ReLU(true))
34 | 
35 | return alexnet


--------------------------------------------------------------------------------
/data.lua:
--------------------------------------------------------------------------------
 1 | paths.dofile('io/readFiles.lua')
 2 | paths.dofile('io/readGEFiles.lua');
 3 | paths.dofile('io/compute_mean_std.lua');
 4 | paths.dofile('io/readBatch.lua');
 5 | function GetASiameseBatch(nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, opt)
 6 |   local status, input, target = coroutine.resume(GetASiameseBatchCoroutine,
 7 |               nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, opt.test);
 8 |   return input, target
 9 | end
10 | 
11 | function GetAnImageBatch(batchSize, opt)
12 |   local status, input, target = coroutine.resume(GetAnImageBatchCoroutine,
13 |                                                  batchSize, opt.viewpoint, opt.test,
14 |                                                  opt.deterministic, opt.spline);
15 |   return input, target
16 | end
17 | 
18 | function GetAUniformImageBatch(batchSize, opt)
19 |   local status, input, target = coroutine.resume(GetAUniformImageBatchCoroutine,
20 |                                                  batchSize, opt.viewpoint, opt.test,
21 |                                                  opt.spline);
22 |   return input, target
23 | end
24 | 
25 | function GetAUniformAnimationBatch(batchSize, opt)
26 |   local status, input, target = coroutine.resume(GetAUniformAnimationBatchCoroutine,
27 |                                                  batchSize, opt.viewpoint, opt.spline);
28 |   return input, target
29 | end
30 | 
31 | function GetAVideoBatch(opt)
32 |   local status, input, target = coroutine.resume(GetAVideoBatchCoroutine,
33 |                                                  opt.viewpoint, opt.test, opt.spline);
34 |   return input, target
35 | end
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # N<sup>3</sup>: Newtonian Image Understanding: Unfolding the Dynamics of Objects in Statis Images
 2 | This is the source code for Newtonian Neural Networks N<sup>3</sup>, which predicts the dynamics of objects in scenes.
 3 | 
 4 | ### Citation
 5 | If you find N<sup>3</sup> useful in your research, please consider citing:
 6 | ```
 7 | @inproceedings{mottaghiCVPR16N3,
 8 |     Author = {Roozbeh Mottaghi and Hessam Bagherinezhad and Mohammad Rastegari and Ali Farhadi},
 9 |     Title = {Newtonian Image Understanding: Unfolding the Dynamics of Objects in Static Images},
10 |     Booktitle = {CVPR},
11 |     Year = {2016}
12 | }
13 | ```
14 | 
15 | ### Requirements
16 | This code is written in Lua, based on [Torch](http://torch.ch). If you are on [Ubuntu 14.04+](http://ubuntu.com), you can follow [this instruction](https://github.com/facebook/fbcunn/blob/master/INSTALL.md) to install torch.
17 | 
18 | You need the [VIND dataset](https://docs.google.com/forms/d/1OROeoj55hfhwiMsDuVyzMgfnhatTUOBGz0qGnMXor4Y/viewform). Extract it in the current directory, and rename it to `VIND`. Or you can put it somewhere else and change the `config.DataRootPath` in `setting_options.lua`.
19 | 
20 | ### Training
21 | To run the training:
22 | ```
23 | th main.lua train
24 | ```
25 | 
26 | This trains the model on training data, and once in every 10 iterations, evalutates on one `val_images` batch. If you want to validate on `val_videos` go to `setting_options.lua` and change the line `valmeta = imvalmeta` to `valmeta = vidvalmeta`.
27 | 
28 | ### Test
29 | You need to [get the weights](https://drive.google.com/file/d/0B7H3g3rb2Blwcm51dXdKbGxzLTQ/view). Extract the weights in the current directory and rename it `weights`. To run the test:
30 | ```
31 | th main.lua test
32 | ```
33 | 
34 | ### License
35 | This code is released under MIT License.
36 | 


--------------------------------------------------------------------------------
/layers/SmoothPairwiseCosineSimilarity.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 | Input: a table of two inputs {M, k}, where
 3 |   M = an n-by-d matrix
 4 |   k = an m-by-d matrix
 5 | Output: a n-by-m matrix
 6 | Each element is an approximation of the cosine similarity between  a row in k and the 
 7 | corresponding row of M. It's an approximation since we add a constant to the
 8 | denominator of the cosine similarity function to remove the singularity when
 9 | one of the inputs is zero. 
10 | --]]
11 | 
12 | -- This file is a modified version of https://github.com/kaishengtai/torch-ntm/blob/master/layers/SmoothCosineSimilarity.lua
13 | 
14 | local SmoothPairwiseCosineSimilarity, parent = torch.class('nn.SmoothPairwiseCosineSimilarity', 'nn.Module')
15 | 
16 | function SmoothPairwiseCosineSimilarity:__init(smoothen)
17 |   parent.__init(self)
18 |   self.gradInput = {}
19 |   self.smooth = smoothen or 1e-5
20 | end
21 | 
22 | function SmoothPairwiseCosineSimilarity:updateOutput(input)
23 |   local M, k = unpack(input)
24 |    assert(M:size(2)==k:size(2),"ERROR: dimensions are not equal !!!")
25 |   self.rownorms = torch.cmul(M, M):sum(2):sqrt():view(M:size(1))
26 |   self.colnorms = torch.cmul(k, k):sum(2):sqrt():view(k:size(1)) 
27 |   self.rowcol = torch.ger(self.rownorms,self.colnorms);
28 |   self.dot = M * (k:t());
29 |   self.output:set(torch.cdiv(self.dot, self.rowcol + self.smooth))
30 |   return self.output
31 | end
32 | 
33 | function SmoothPairwiseCosineSimilarity:updateGradInput(input, gradOutput)
34 |   local M, k = unpack(input)
35 |   local nrow = M:size(1);
36 |   local ncol = k:size(1);
37 |   local ndim = k:size(2);
38 |   
39 |   self.gradInput[1] = self.gradInput[1] or input[1].new()
40 |   self.gradInput[2] = self.gradInput[2] or input[2].new()
41 |   
42 |   
43 |   -- M gradient
44 |   self.gradInput[1]:set(torch.cdiv(gradOutput, self.rowcol + self.smooth)*k)
45 |     local scale = torch.cmul(self.output, (torch.repeatTensor(self.colnorms,nrow,1)))
46 |       :cdiv(self.rowcol + self.smooth)
47 |       :cmul(gradOutput):sum(2)
48 |       :cdiv(self.rownorms+self.smooth)
49 |     self.gradInput[1]:add(torch.cmul(-torch.repeatTensor(scale,1,ndim), M))
50 | 
51 |   -- k gradient
52 |   self.gradInput[2]:set(torch.cdiv(gradOutput, self.rowcol + self.smooth):t()* M)
53 |     local scale = torch.cmul(self.output, (torch.repeatTensor(self.rownorms,ncol,1):t()))
54 |       :cdiv(self.rowcol + self.smooth)
55 |       :cmul(gradOutput):sum(1)
56 |       :cdiv(self.colnorms+self.smooth)
57 |     self.gradInput[2]:add(torch.cmul(-torch.repeatTensor(scale,ndim,1):t(), k))
58 | 
59 |   
60 |   return self.gradInput
61 | end
62 | 


--------------------------------------------------------------------------------
/io/compute_mean_std.lua:
--------------------------------------------------------------------------------
 1 | function compute_mean_std(dataset, dataset_GE)
 2 |   --------------------  COMPUTE MEAN AND STD OF REAL VIDEOS -------------------
 3 |   for input_type, train_config in pairs(config.train) do
 4 |     if type(train_config) == 'table' and train_config.enable then
 5 |       local test_config = config.test[input_type]
 6 |       local meanstdFile = config.train.annotation.dir .. '/.meanstd_real_' .. input_type .. '.t7';
 7 |       if paths.filep(meanstdFile) then
 8 |         local meanstd = torch.load(meanstdFile)
 9 |         train_config.mean     = meanstd.mean;
10 |         train_config.std    = meanstd.std;
11 |         if test_config and test_config.enable then
12 |           test_config.mean, test_config.std = train_config.mean, train_config.std;
13 |         end
14 |       else
15 |         local trainDir = train_config.dir;
16 |         local allfiles = MakeListTrainFrames(dataset, trainDir, train_config.type);
17 |         train_config.mean, train_config.std = ComputeMeanStd(1000, allfiles, config.imH, config.imW);
18 |         if test_config and test_config.enable then
19 |           test_config.mean, test_config.std = train_config.mean, train_config.std;
20 |         end
21 |         local cache = {};
22 |         cache.mean  = train_config.mean;
23 |         cache.std   = train_config.std;
24 |         torch.save(meanstdFile,cache);
25 |       end
26 |     end
27 |   end
28 | 
29 | 
30 |   -----------------  COMPUTE MEAN AND STD OF GAME ENGINE VIDEOS ----------------
31 | 
32 |   for input_type, conf in pairs(config.GE) do
33 |     if type(conf) == 'table' and conf.enable then
34 |       local meanstdFile = config.GE.dir .. '/.meanstd_GE_' .. input_type .. '.t7';
35 |       if paths.filep(meanstdFile) then
36 |         local meanstd = torch.load(meanstdFile)
37 |         conf.mean    = meanstd.mean;
38 |         conf.std     = meanstd.std;
39 |       else
40 |         local allfiles = MakeListGEFrames(dataset_GE, conf.suffix);
41 |         conf.mean, conf.std = ComputeMeanStd(1000, allfiles, config.GE.imH, config.GE.imH);
42 |         local cache = {};
43 |         cache.mean  = conf.mean;
44 |         cache.std   = conf.std;
45 |         torch.save(meanstdFile,cache);
46 |       end
47 |     end
48 |   end
49 | end
50 | 
51 | function LoadCaffeMeanStd(meanFilePath)
52 |   local meanFile = mattorch.load(meanFilePath)
53 |   for input_type, train_config in pairs(config.train) do
54 |     if type(train_config) == 'table' and train_config.enable then
55 |       local test_config = config.test[input_type]
56 |       for i=1,3 do
57 |         train_config.mean[i] = meanFile.mean_data:select(3,i):mean() / 255
58 |         train_config.std[i]  = 1/255
59 |       end
60 |       if test_config and test_config.enable then
61 |         test_config.mean, test_config.std = train_config.mean, train_config.std;
62 |       end
63 |     end
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/train_functions.lua:
--------------------------------------------------------------------------------
  1 | log('Loading Train Functions ... ')
  2 | 
  3 | function train()
  4 |   config.testing = false
  5 | 
  6 |   local batchSize = config.batchSize;
  7 |   local animFeatures = GetAnimationFeatures(model.animationNN);
  8 | 
  9 |   for iter=1,config.nIter do
 10 |     ---- load one batch
 11 |     tt = iter
 12 |     local tic= os.clock()
 13 |     local imgFeatures, TrTarget = GetAUniformImageBatch(batchSize, {
 14 |                               viewpoint = true,
 15 |                               test      = false,
 16 |                               spline    = false,
 17 |                               })
 18 |     local TrInput = {imgFeatures,animFeatures};
 19 |     local toc = os.clock() - tic;
 20 |     log('loading time :' .. tostring(toc))
 21 |     
 22 |     -------- train the network--------------
 23 |     model.learningRate = model:LearningRateComp(iter);
 24 |     local acc, loss = model:TrainOneBatch(TrInput,TrTarget);
 25 |     if (iter % 10) == 0 then
 26 |       local  tic = os.clock()
 27 |       collectgarbage();
 28 |       local toc = os.clock() - tic;
 29 |       print("garbage collection :", toc)
 30 |     end
 31 |     if (iter % config.nDisplay) == 0 then
 32 |       log(('Iter = %d | Train Accuracy = %f | Train Loss = %f\n'):format(iter,acc,loss));
 33 |     end
 34 | 
 35 |     if (iter % config.nEval) == 0 then
 36 |       local TeInput, TeTarget = GetAUniformImageBatch(batchSize, {
 37 |                                   viewpoint = true,
 38 |                                   test      = true,
 39 |                                   spline    = false,
 40 |                                   });      
 41 |       local acc, loss = model:EvaluateOneBatch(TeInput,TeTarget);
 42 |       log(('Testing ---------> Iter = %d | Test Accuracy = %f | Test Loss = %f\n'):format(iter,acc,loss));
 43 |     end
 44 |     
 45 |     if (iter % config.saveModelIter) == 0 then
 46 |       local fileName = 'Model_iter_' .. iter .. '.t7';
 47 |       log('Saving NN model in ----> ' .. paths.concat(config.logDirectory, fileName) .. '\n');
 48 |       model:SaveModel(paths.concat(config.logDirectory, fileName));
 49 |     end
 50 | 
 51 |   end
 52 | end
 53 | 
 54 | 
 55 | ---------------------------------------------------------
 56 | function test()
 57 |   config.testing = true
 58 |   ----------------------------
 59 | 
 60 |   local batchSize = config.batchSize;
 61 |   local meanAcc = 0;
 62 |   local sumFrameAcc = 0;
 63 |   local sumFramables = 0;
 64 |   local per_class_cum = torch.Tensor(config.nCategories, 2):fill(0)
 65 |   local all_predictions
 66 | 
 67 |   for iter=1,config.nIter do
 68 |     tt = iter
 69 |     ---- load one batch
 70 |     local tic= os.clock()
 71 |     local TeInput, TeTarget = GetAnImageBatch(batchSize, {
 72 |                                         viewpoint     = true,
 73 |                                         test          = true,
 74 |                                         deterministic = true,
 75 |                                         spline        = false,
 76 |                                       });
 77 |     local toc = os.clock() - tic;
 78 |     log('loading time :' .. tostring(toc))
 79 |     
 80 |     if (iter % 10) == 0 then
 81 |       local  tic = os.clock()
 82 |         collectgarbage();
 83 |       local toc = os.clock() - tic;
 84 |       print("garbage collection :", toc)
 85 |     end
 86 |     local acc, loss, per_class, predicts, frames = model:EvaluateOneBatch(TeInput,TeTarget);
 87 |     meanAcc = ((iter -1)* meanAcc + acc)/ iter;
 88 |     per_class_cum = per_class_cum + per_class
 89 |     
 90 |     log(('Iter = %d | Current Test Accuracy = %f | Average Test Accuracy = %f\n'):format(iter,acc,meanAcc));
 91 | 
 92 |     local predictions = torch.cat(TeTarget, predicts, 2)
 93 |     if not all_predictions then
 94 |       all_predictions = predictions
 95 |     else
 96 |       all_predictions = torch.cat(all_predictions, predictions, 1)
 97 |     end
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/setting_options.lua:
--------------------------------------------------------------------------------
  1 | ---- options
  2 | config={};
  3 | 
  4 | config.GPU  = 1
  5 | config.nGPU = 1
  6 | 
  7 | config.DataRootPath = "VIND"
  8 | config.SaveRootPath = "logs"
  9 | config.CacheRootPath = "cache"
 10 | 
 11 | config.logDirectory = config.SaveRootPath .. '/' .. "LOG_" .. os.getenv('USER') .. "_" .. os.date():gsub(' ','-');
 12 | os.execute('mkdir -p ' .. config.logDirectory)
 13 | config.logFile = assert(io.open(paths.concat(config.logDirectory, 'log.txt'), 'w'))
 14 | 
 15 | config.GE = {
 16 |   image = {
 17 |     suffix = 'im',
 18 |     mean = {},
 19 |     std = {},
 20 |     nChannels = 3,
 21 |     enable = true,
 22 |   },
 23 |   depth = {
 24 |     suffix = 'depth',
 25 |     mean = {},
 26 |     std = {},
 27 |     nChannels = 1,
 28 |     enable = true,
 29 |   },
 30 |   normal = {
 31 |     suffix = 'normal',
 32 |     mean = {},
 33 |     std = {},
 34 |     nChannels = 3,
 35 |     enable = true,
 36 |   },
 37 |   flow = {
 38 |     suffix = 'flow',
 39 |     mean = {},
 40 |     std = {},
 41 |     nChannels = 3,
 42 |     enable = true,
 43 |   },
 44 |   imH               = 256,
 45 |   imW               = 256,
 46 |   frame_per_video   = 10,
 47 |   use_multiple_vars = false,
 48 |   dir               = config.DataRootPath .. "/ge_videos",
 49 |   saveDir           = config.CacheRootPath .. "/ge_cache",
 50 |   featsDir          = config.CacheRootPath .. "/ge_feats",
 51 |   splinesFile       = config.DataRootPath .. "/ge_videos/.splines.mat",
 52 | }
 53 | 
 54 | config.imH = 227;
 55 | config.imW = 227;
 56 | config.max_angles = 8;
 57 | 
 58 | config.train = {
 59 |   annotation = {
 60 |     dir = config.DataRootPath .. "/train/labels",
 61 |   },
 62 |   image = {
 63 |     dir       = config.DataRootPath .. "/train/images",
 64 |     nChannels = 3,
 65 |     type      = "png",
 66 |     suffix    = "im",
 67 |     mean      = {},
 68 |     std       = {},
 69 |     enable    = true,
 70 |     croppable = true,
 71 |   },
 72 |   depth = {
 73 |     enable = false,
 74 |   },
 75 |   normal = {
 76 |     enable = false,
 77 |   },
 78 |   flow = {
 79 |     enable = false,
 80 |   },
 81 |   mask = {
 82 |     dir       = config.DataRootPath .. "/train/objmask",
 83 |     nChannels = 1,
 84 |     type      = "png",
 85 |     suffix    = "mask",
 86 |     mean      = {},
 87 |     std       = {},
 88 |     enable    = true,
 89 |   },
 90 |   save_dir = config.CacheRootPath .. "/train_cache",
 91 |   batch_size = 128,
 92 |   nIter = 1000000,
 93 | }
 94 | 
 95 | vidvalmeta = {
 96 |   annotation = {
 97 |     dir = config.DataRootPath .. "/val_videos/labels",
 98 |   },
 99 |   image = {
100 |     dir       = config.DataRootPath .. "/val_videos/images",
101 |     nChannels = 3,
102 |     type      = "png",
103 |     suffix    = "im",
104 |     mean      = {},
105 |     std       = {},
106 |     enable    = true,
107 |     croppable = true,
108 |   },
109 |   depth = {
110 |     enable = false,
111 |   },
112 |   normal = {
113 |     enable = false,
114 |   },
115 |   flow = {
116 |     enable = false,
117 |   },
118 |   mask = {
119 |     dir       = config.DataRootPath .. "/val_videos/objmask",
120 |     nChannels = 1,
121 |     type      = "png",
122 |     suffix    = "mask",
123 |     mean      = {},
124 |     std       = {},
125 |     enable    = true,
126 |   },
127 |   save_dir = config.CacheRootPath .. "/val_video_cache",
128 |   batch_size = 243,
129 |   nIter = 6,
130 | }
131 | 
132 | imvalmeta = {
133 |   annotation = {
134 |     dir = config.DataRootPath .. "/val_images/labels",
135 |   },
136 |   image = {
137 |     dir       = config.DataRootPath .. "/val_images/images",
138 |     nChannels = 3,
139 |     type      = "png",
140 |     suffix    = "im",
141 |     mean      = {},
142 |     std       = {},
143 |     enable    = true,
144 |     croppable = true,
145 |   },
146 |   depth = {
147 |     enable = false,
148 |   },
149 |   normal = {
150 |     enable = false,
151 |   },
152 |   flow = {
153 |     enable = false,
154 |   },
155 |   mask = {
156 |     dir       = config.DataRootPath .. "/val_images/objmask",
157 |     nChannels = 1,
158 |     type      = "png",
159 |     suffix    = "mask",
160 |     mean      = {},
161 |     std       = {},
162 |     enable    = true,
163 |   },
164 |   save_dir = config.CacheRootPath .. "/val_images_cache",
165 |   batch_size = 243,
166 |   nIter = 6,
167 | }
168 | 
169 | testmeta = {
170 |   annotation = {
171 |     dir = config.DataRootPath .. "/test/labels",
172 |   },
173 |   image = {
174 |     dir       = config.DataRootPath .. "/test/images",
175 |     nChannels = 3,
176 |     type      = "png",
177 |     suffix    = "im",
178 |     mean      = {},
179 |     std       = {},
180 |     enable    = true,
181 |     croppable = true,
182 |   },
183 |   depth = {
184 |     enable = false,
185 |   },
186 |   normal = {
187 |     enable = false,
188 |   },
189 |   flow = {
190 |     enable = false,
191 |   },
192 |   mask = {
193 |     dir       = config.DataRootPath .. "/test/objmask",
194 |     nChannels = 1,
195 |     type      = "png",  
196 |     suffix    = "mask",
197 |     mean      = {},
198 |     std       = {},
199 |     enable    = true,
200 |   },
201 |   save_dir = config.CacheRootPath .. "/test_cache",
202 |   batch_size = 1,
203 |   nIter = 1,
204 | }
205 | 
206 | valmeta = imvalmeta
207 | config.test = mode == 'train' and valmeta or testmeta
208 | config.classes = {'scenario11', 'scenario3', 'scenario10', 'scenario7', 'scenario6', 'scenario12', 'scenario9', 'scenario4', 'scenario1', 'scenario2', 'scenario8', 'scenario5'}
209 | config.class_angles= {3, 8, 8, 3, 3, 4, 8, 8, 8, 4, 8, 1};
210 | 
211 | -- excluded_categories is a list of regexes. In lua, to escape special chars you need to add %
212 | config.excluded_categories = {};
213 | 
214 | --------   BEGIN: Network configuration  -----
215 | if mode == 'test' then
216 |   config.nIter    = config.test.nIter
217 |   config.batchSize = config.test.batch_size
218 | else
219 |   config.nIter    = config.train.nIter
220 |   config.batchSize = config.train.batch_size
221 | end
222 |   
223 | config.nDisplay = 1;
224 | config.saveModelIter = 500;
225 | config.nResetLR = 50000;
226 | config.nCategories = 66
227 | config.nClasses = config.nCategories
228 | config.nEval    = 10;
229 | config.lambda   = 0.5
230 | 
231 | config.initModelPath = {  imageNN = "caffe"
232 |                         , animNN = "weights/motion_row.t7"
233 |                         , fullNN = "weights/N3.t7" }
234 | 
235 | config.caffeInit = true;
236 | config.caffeFilePath = {  
237 |   proto  = 'weights/zoo/deploy.prototxt',
238 |   model  = 'weights/zoo/bvlc_alexnet.caffemodel',
239 |   mean   = 'weights/zoo/ilsvrc_2012_mean.mat'
240 | };
241 | config.regimes = {
242 |     -- start, end,    LR,
243 |     {  1,     100,   1e-2, },
244 |     { 101,     1000,   1e-2, },
245 |     { 1001,     10000,   1e-3, },
246 |     {10001,      100000,  1e-4,},
247 |   };
248 | config.dropoutProb = 0.5;
249 | 
250 | --------   END :  Network configuration -------
251 | 


--------------------------------------------------------------------------------
/networks/ModelConstruction_IM_GEFixParallel.lua:
--------------------------------------------------------------------------------
  1 | --Constructing The NN model
  2 | log('Constructing Network Model ..... \n');
  3 | ---------------------------------------
  4 | 
  5 | model={};
  6 | model.imageNN = require('models.image_row')
  7 | model.animationFix = require('models.motion_embedding')
  8 | model.animationNN =require('models.motion_row');
  9 | model.jointNN_1 = require('models.classifier')
 10 | model.jointNN_2 = require('models.pairwisecosine_GEfix')
 11 | model.criterion =  nn.ClassNLLCriterion():cuda()
 12 | 
 13 | function model:infer(input, k)
 14 |   if not model.animFeatures then
 15 |     model.animFeatures = GetAnimationFeatures(model.animationNN)
 16 |   end
 17 |   -- Forward passs
 18 |   local tic = os.clock()
 19 |   model.fullNN:forward({input, model.animFeatures});
 20 |   local toc = os.clock()
 21 |   print("Forward time ", tic - toc)
 22 |   return model.fullNN.output
 23 | end
 24 | 
 25 | function model:LearningRateComp(iter)
 26 |   local lIter = (iter % config.nResetLR)+1;
 27 |   local regimes= config.regimes;
 28 |   for _, row in ipairs(regimes) do
 29 |     if lIter >= row[1] and lIter <= row[2] then
 30 |       return row[3];
 31 |     end
 32 |   end
 33 | end
 34 | 
 35 | function model:TrainOneBatch(input,target)
 36 |   -- Set into training phase (just active the droputs)
 37 |   model.fullNN:training();
 38 |   -- Forward passs
 39 |   model.fullNN:forward(input);
 40 |   
 41 |   -- Compute loss and accuracy
 42 |   local loss = model.criterion:forward(model.fullNN.output,target)
 43 |   local output = model.fullNN.output
 44 |   local _, predictedLabel = torch.max(output,2);
 45 |   predictedLabel = predictedLabel[{{}, 1}]
 46 |   local acc, per_class = GetPerClassAccuracy(predictedLabel, target)
 47 |   
 48 |   -- Make sure gradients are zero
 49 |   model.fullNN:zeroGradParameters();
 50 | 
 51 |   -- Backward pass
 52 |   local bwCri = model.criterion:backward(model.fullNN.output,target)
 53 |   model.fullNN:backward(input,bwCri);
 54 | 
 55 |   -- updating the weights
 56 |   model.fullNN:updateParameters(model.learningRate);
 57 |   return acc,loss;
 58 | end
 59 | 
 60 | function model:EvaluateOneBatch(input,target)
 61 |   -- Set into Evaluation mode (just deactive the dropouts)
 62 |   model.fullNN:evaluate();
 63 |   local loss = 0;  
 64 |   local infer_output = model:infer(input,1);
 65 |   local max,predictedLabel = torch.max(infer_output,2);
 66 | 
 67 |   predictedLabel = predictedLabel[{{},1}] -- convert matrix to vector
 68 | 
 69 |   local _, bestFrame = torch.max(model.jointNN_2:get(2).output, 3)
 70 |   local acc, per_class = GetPerClassAccuracy(predictedLabel, target)
 71 | 
 72 |   return acc, loss, per_class, predictedLabel
 73 | end
 74 | 
 75 | function model:SaveModel(fileName)
 76 |   local saveModel ={};
 77 |   -- reading model parameters to CPU
 78 |   saveModel.imageNN       = GetNNParamsToCPU(model.imageNN);
 79 |   saveModel.animationNN   = GetNNParamsToCPU(model.animationNN);
 80 |   saveModel.animationFix  = GetNNParamsToCPU(model.animationFix);
 81 |   saveModel.jointNN_1     = GetNNParamsToCPU(model.jointNN_1);
 82 |   saveModel.jointNN_2     = GetNNParamsToCPU(model.jointNN_2);
 83 |   -- saving into the file
 84 |   torch.save(fileName,saveModel)
 85 | end
 86 | 
 87 | function model:LoadCaffeImageNN(caffeFilePath)
 88 |   local protoFile = caffeFilePath.proto
 89 |   local modelFile = caffeFilePath.model
 90 |   local meanFile  = caffeFilePath.mean
 91 | 
 92 |   require 'loadcaffe'
 93 |   local caffeModel = loadcaffe.load(protoFile,modelFile,'cudnn');
 94 |   caffeModel:remove(24);
 95 |   caffeModel:remove(23);
 96 |   caffeModel:remove(22);
 97 |   local caffeParams = GetNNParamsToCPU(caffeModel);
 98 |   if config.w_crop then
 99 |     caffeParams[1] = caffeParams[1]:repeatTensor(1, 5, 1, 1)
100 |   end
101 |   if config.train.mask.enable then
102 |     local firstLayerRandom = torch.FloatTensor(96, 1, 11, 11)
103 |     firstLayerRandom:apply(rand_initialize)
104 |     caffeParams[1] = torch.cat(firstLayerRandom, caffeParams[1], 2)
105 |   end
106 |   LoadNNlParams(model.imageNN, caffeParams);
107 |   
108 |   LoadCaffeMeanStd(meanFile);
109 | end
110 | 
111 | 
112 | function model:LoadModel(fileNameImg,fileNameAnim)
113 |   log('Loding Network Model ....')
114 |   for mm = 19,16,-1 do
115 |    model.animationNN:remove(mm);
116 |   end
117 |   model.animationNN:add(nn.Transpose{2,3}):add(nn.Reshape(10,4096,false)  );
118 |   model.animationNN:cuda()
119 | 
120 |   if fileNameImg == "caffe" then
121 |     model:LoadCaffeImageNN(config.caffeFilePath);
122 |     model.jointNN_1:apply(rand_initialize);
123 |   elseif fileNameImg then
124 |     local saveModel = torch.load(fileNameImg);
125 |     LoadNNlParams(model.imageNN ,saveModel.imageNN);
126 |     LoadNNlParams(model.jointNN_1 ,saveModel.jointNN);
127 |   else
128 |     -- Initialize the model randomly
129 |     model.imageNN:apply(rand_initialize);
130 |   end
131 |   model.jointNN_2:apply(rand_initialize);
132 | 
133 |   if config.caffeInit then 
134 |       LoadCaffeMeanStd(config.caffeFilePath.mean);
135 |   end
136 | 
137 |   if fileNameAnim then
138 |     local saveModel = torch.load(fileNameAnim);
139 |     LoadNNlParams(model.animationNN ,saveModel.imageNN);
140 |   else
141 |     -- Initialize the model randomly
142 |     model.animationNN:apply(rand_initialize);
143 |   end
144 |   
145 |   model.animationFix:apply(rand_initialize);
146 |  
147 |   local featuresTable = nn.ParallelTable():add(model.imageNN):add(model.animationFix);
148 |   local classifier    = nn.Sequential():add(nn.SelectTable(1)):add(model.jointNN_1):add(nn.MulConstant(config.lambda,true));
149 |   local matcher       = nn.Sequential():add(model.jointNN_2):add(nn.MulConstant((1-config.lambda),true));
150 |   local concatTable   = nn.ConcatTable():add(classifier):add(matcher)
151 |   model.fullNN = nn.Sequential():add(featuresTable):add(concatTable):add(nn.CAddTable())
152 |   model.fullNN:cuda();
153 | 
154 |   model:SaveModel( paths.concat(config.logDirectory, 'init.t7'))
155 | end
156 | 
157 | function model:LoadModelFull(fileName)
158 |   log('Loding Network Model ....')
159 | 
160 |   for mm = 19,16,-1 do
161 |    model.animationNN:remove(mm);
162 |   end
163 |   model.animationNN:add(nn.Transpose{2,3}):add(nn.Reshape(10,4096,false)  );
164 |   model.animationNN:cuda()
165 | 
166 |   if fileName  then
167 |     local saveModel = torch.load(fileName);
168 |     LoadNNlParams(model.imageNN ,saveModel.imageNN);
169 |     LoadNNlParams(model.animationNN ,saveModel.animationNN);
170 |     -- debugger.enter()
171 |     LoadNNlParams(model.animationFix ,saveModel.animationFix);
172 |     LoadNNlParams(model.jointNN_1,saveModel.jointNN_1);
173 |     LoadNNlParams(model.jointNN_2,saveModel.jointNN_2);
174 |   else
175 |     -- Initialize the model randomly
176 |     model.imageNN:apply(rand_initialize);
177 |     model.animationNN:apply(rand_initialize);
178 |     model.animationFix:apply(rand_initialize);
179 |     model.jointNN_1:apply(rand_initialize);
180 |     model.jointNN_2:apply(rand_initialize);
181 |   end
182 |   if config.caffeInit then 
183 |       LoadCaffeMeanStd(config.caffeFilePath.mean);
184 |   end
185 |   
186 |   local featuresTable = nn.ParallelTable():add(model.imageNN):add(model.animationFix);
187 |   local classifier    = nn.Sequential():add(nn.SelectTable(1)):add(model.jointNN_1):add(nn.MulConstant(config.lambda,true));
188 |   local matcher       = nn.Sequential():add(model.jointNN_2):add(nn.MulConstant((1-config.lambda),true));
189 |   local concatTable   = nn.ConcatTable():add(classifier):add(matcher)
190 |   model.fullNN = nn.Sequential():add(featuresTable):add(concatTable):add(nn.CAddTable())
191 |   model.fullNN:cuda();
192 | 
193 |   model:SaveModel( paths.concat(config.logDirectory, 'init.t7'))
194 | end
195 | 


--------------------------------------------------------------------------------
/io/readGEFiles.lua:
--------------------------------------------------------------------------------
  1 | function LoadGEDatabase()
  2 |   -----------------------------------------------------------------
  3 |   -- Reads the list of images in the Game Engine videos
  4 |   -- outputs:
  5 |   --       dataset: a table with list of files for each category
  6 |   --                datatset[category][angle][variation], e.g.,
  7 |   --                datatset['rolling'][1]['1_1']
  8 |   -----------------------------------------------------------------
  9 |   os.execute('mkdir -p ' .. config.GE.saveDir)
 10 | 
 11 |   local geDir = config.GE.dir;
 12 | 
 13 |   local dataset = {};
 14 | 
 15 |   -- physics categories
 16 |   local physicsCategories = paths.dir(geDir);
 17 |   RemoveDotDirs(physicsCategories);
 18 | 
 19 |   local nClasses=table.getn(physicsCategories);
 20 | 
 21 |   for i=1,nClasses do
 22 |     dataset[physicsCategories[i]] = {};
 23 |   end
 24 | 
 25 |   for i=1,nClasses do
 26 | 
 27 |     -- angle directories
 28 |     local dir1 = paths.concat(geDir,physicsCategories[i]);
 29 |     local angleCategories = paths.dir(dir1);
 30 |     RemoveDotDirs(angleCategories);
 31 | 
 32 |     local anglebins = {}
 33 |     for _,a in pairs(angleCategories) do
 34 |       abin, tmp = a:match("([^,]+)_([^,]+)");
 35 |       anglebins[tonumber(abin)] = 1;
 36 |     end
 37 | 
 38 |     for k,_ in pairs(anglebins) do
 39 |       dataset[physicsCategories[i]][k] = {};
 40 |     end
 41 | 
 42 |     for k,_ in pairs(anglebins) do
 43 |       local dir2 = paths.concat(geDir,physicsCategories[i],tostring(k) .. '_' .. 'im');
 44 |       local alldirs = paths.dir(dir2);
 45 |       RemoveDotDirs(alldirs);
 46 |       table.sort(alldirs, function (a,b) return a < b end);
 47 | 
 48 |       for _,d in pairs(alldirs) do
 49 |         local dir3 = paths.concat(geDir,physicsCategories[i],tostring(k) .. '_' .. 'im', d);
 50 |         local files = paths.dir(dir3);
 51 |         RemoveDotDirs(files);
 52 | 
 53 |         table.sort(files, function (a,b) return a < b end);
 54 |         dataset[physicsCategories[i]][k][d] = {};
 55 |         dataset[physicsCategories[i]][k][d] = files;
 56 | 
 57 |       end
 58 | 
 59 |     end
 60 | 
 61 |   end
 62 | 
 63 |   local splines = mattorch.load(config.GE.splinesFile)['splines']
 64 |   setmetatable(dataset, {splines = splines})
 65 |   
 66 |   return dataset;
 67 | end
 68 | 
 69 | 
 70 | function ReadGEImagesPerCategory(physicsCategory, angle, dataset, savefile, input_type)
 71 |   -----------------------------------------------------------------
 72 |   -- Reads game engine videos for a category and an angle
 73 |   -- inputs:
 74 |   --       physicsCategory: 'rolling', 'falling', etc.
 75 |   --       angle:            the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
 76 |   --       dataset:          the output of "LoadGEDatabase" function
 77 |   --       savefile:         the filename for stroing the videos in our format
 78 |   --       opts
 79 |   -- outputs:
 80 |   --       images:  5D Tensor,
 81 |   --                nvariations (different z's for the camera, different forces, etc. ) x
 82 |   --                fr_per_video x 3 (channels) x imH (image height) x imW (image width)
 83 |   -----------------------------------------------------------------
 84 | 
 85 |   local images  = {};
 86 | 
 87 |   local geDir = config.GE.dir;
 88 | 
 89 |   local imH          = config.GE.imH;
 90 |   local imW          = config.GE.imW;
 91 |   local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
 92 | 
 93 |   local mean = config.GE[input_type].mean;
 94 |   local std = config.GE[input_type].std;
 95 |   local nChannels = config.GE[input_type].nChannels;
 96 | 
 97 |   local nvariations = getTableSize(dataset[physicsCategory][angle]);
 98 | 
 99 |   local imTensor     = torch.Tensor(nvariations, fr_per_video, nChannels, imH, imW);
100 |   local suffix = config.GE[input_type].suffix
101 | 
102 |   local cnt = 0;
103 |   for dir,files in pairs(dataset[physicsCategory][angle]) do
104 |     cnt = cnt + 1;
105 |     for f = 1,#files do
106 |       local fname_im    = paths.concat(geDir,physicsCategory, angle .. '_' .. suffix, dir, files[f]);
107 |       local im          = normalizeImage(image.scale(loadImageOrig(fname_im), imW, imH), mean, std);
108 |       imTensor[cnt][f]  = im[{{1,nChannels}, {}, {}}];
109 |     end
110 |   end
111 | 
112 |   images  = imTensor;
113 |   collectgarbage()
114 |   torch.save(savefile, images)
115 |   return images
116 | end
117 | 
118 | function LoadGEPositionPerCategory(physicsCategory, angle, dataset)
119 |   -----------------------------------------------------------------
120 |   -- If files do not exist, it calls "ReadGEPositionPerCategory". Otherwise,
121 |   -- it loads from the disk.
122 |   --
123 |   -- inputs:
124 |   --       physicsCategory: 'rolling', 'falling', etc.
125 |   --       angle:            the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
126 |   --       dataset:          the output of "LoadGEDatabase" function
127 |   -- outputs:
128 |   --       positions:  3D Tensor,
129 |   --                nvariations (different z's for the camera, different forces, etc. ) x
130 |   --                fr_per_video x 3 (x,y,z)
131 |   -----------------------------------------------------------------
132 | 
133 |   saveDir = config.GE.saveDir;
134 | 
135 |   local positions;
136 | 
137 |   fname = paths.concat(saveDir, physicsCategory .. '_' .. angle .. '_positions' .. '.t7');
138 | 
139 |   if paths.filep(fname) then
140 |     positions = torch.load(fname)
141 |   else
142 |     positions = ReadGEPositionPerCategory(physicsCategory, angle, dataset, fname);
143 |   end
144 | 
145 |   return positions;
146 | end
147 | 
148 | 
149 | 
150 | function LoadGEPerCategory(physicsCategory, angle, dataset, input_type)
151 |   -----------------------------------------------------------------
152 |   -- If files do not exist, it calls "ReadGEImagesPerCategory". Otherwise,
153 |   -- it loads from the disk.
154 |   --
155 |   -- inputs:
156 |   --       physicsCategory: 'rolling', 'falling', etc.
157 |   --       angle:            the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
158 |   --       dataset:          the output of "LoadGEDatabase" function
159 |   --       opts
160 |   -- outputs:
161 |   --       images:  5D Tensor,
162 |   --                nvariations (different z's for the camera, different forces, etc. ) x
163 |   --                fr_per_video x 3 (channels) x imH (image height) x imW (image width)
164 |   -----------------------------------------------------------------
165 |   if not input_type then
166 |     local imH          = config.GE.imH;
167 |     local imW          = config.GE.imW;
168 |     local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
169 |     local nvariations = config.GE.use_multiple_vars and getTableSize(dataset[physicsCategory][angle]) or 1;
170 |     local all_input_types = GetEnableInputTypes(config.GE)
171 |     local nChannels = GetValuesSum(all_input_types)
172 |     local result = torch.Tensor(nvariations, fr_per_video, nChannels, imH, imW);
173 | 
174 |     local i = 1
175 |     for input_type, nChannels in pairs(all_input_types) do
176 |       result[{{}, {}, {i, i+nChannels-1}, {}, {}}] = LoadGEPerCategory(physicsCategory, angle, dataset, input_type)
177 |       i = i + nChannels
178 |     end
179 |     return result
180 |   end
181 | 
182 | 
183 |   local saveDir = config.GE.saveDir
184 |   local suffix = config.GE[input_type].suffix
185 | 
186 |   local images;
187 |   local fname = paths.concat(saveDir, physicsCategory .. '_' .. angle .. '_' .. suffix .. '.t7');
188 | 
189 |   if paths.filep(fname) then
190 |     images = torch.load(fname)
191 |   else
192 |     images = ReadGEImagesPerCategory(physicsCategory, angle, dataset, fname, input_type);
193 |   end
194 | 
195 |   if not config.GE.use_multiple_vars then
196 |     local var_id = images:size(1) == 1 and 1 or 2
197 |     images = images[{{var_id}, {}, {}, {}, {}}]
198 |   end
199 |   return images;
200 | end
201 | 
202 | 
203 | function ReadGEImagesAll(dataset, savefile, opts)
204 | -----------------------------------------------------------------
205 |   -- Reads all game engine videos
206 |   -- inputs:
207 |   --       dataset:          the output of "LoadGEDatabase" function
208 |   --       savefile:         the filename for stroing the videos in our format
209 |   --       opts
210 |   -- outputs:
211 |   --       images:  5D Tensor,
212 |   --                nvariations (different z's for the camera, different forces, etc. ) x
213 |   --                fr_per_video x 3 (channels) x imH (image height) x imW (image width)
214 |   -----------------------------------------------------------------
215 | 
216 |   local images  = {};
217 |   local geDir = config.GE.dir;
218 | 
219 |   local imH          = config.GE.imH;
220 |   local imW          = config.GE.imW;
221 |   local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
222 | 
223 |   local input_type = opts.input_type;
224 |   local mean = opts.mean;
225 |   local std = opts.std;
226 | 
227 |   for physicsCategory,_ in pairs(dataset) do
228 |     images[physicsCategory] = {};
229 |     for angle,_ in pairs(dataset[physicsCategory]) do
230 |       images[physicsCategory][angle] = {};
231 | 
232 |       local nvariations = getTableSize(dataset[physicsCategory][angle]);
233 |       local imTensor     = torch.Tensor(nvariations, fr_per_video, 3, imH, imW);
234 | 
235 |       local cnt = 0;
236 |       for dir,files in pairs(dataset[physicsCategory][angle]) do
237 |         cnt = cnt + 1;
238 |         for f = 1,#files do
239 |           local fname_im     = paths.concat(geDir,physicsCategory, angle .. '_' .. input_type, dir, files[f]);
240 |           local im     = normalizeImage(image.scale(loadImageOrig(fname_im), imW, imH), mean, std);
241 |           imTensor[cnt][f] = im;
242 |         end
243 | 
244 |       end
245 |       images [physicsCategory][angle]  = imTensor;
246 |     end
247 |   end
248 |   collectgarbage()
249 | 
250 |   torch.save(savefile, images)
251 |   return images
252 | end
253 | 
254 | function LoadGEAll(dataset, opts)
255 |   -----------------------------------------------------------------
256 |   -- If files do not exist, it calls "ReadGEImagesAll". Otherwise,
257 |   -- it loads from the disk.
258 |   --
259 |   -- inputs:
260 |   --       dataset: the output of "LoadGEDatabase" function
261 |   --       opts
262 |   -- outputs:
263 |   --       images:  5D Tensor,
264 |   --                nvariations (different z's for the camera, different forces, etc. ) x
265 |   --                fr_per_video x 3 (channels) x imH (image height) x imW (image width)
266 |   -----------------------------------------------------------------
267 | 
268 |   saveDir = config.GEsaveDir;
269 |   local images;
270 | 
271 |   fname = paths.concat(saveDir, 'allGE_' .. opts.input_type .. '.t7');
272 | 
273 |   if paths.filep(fname) then
274 |     images = torch.load(fname)
275 |   else
276 |     images = ReadGEImagesAll(dataset, fname, opts);
277 |   end
278 |   return images
279 | end
280 | 


--------------------------------------------------------------------------------
/io/readBatch.lua:
--------------------------------------------------------------------------------
  1 | log ("loading datasets metadata");
  2 | dataset_GE = LoadGEDatabase();
  3 | trainset   = LoadTrainDatabase(config.excluded_categories);
  4 | testset    = LoadTestDatabase(config.excluded_categories);
  5 | log ("computing mean_std");
  6 | compute_mean_std(trainset, dataset_GE);
  7 | log ("dataset done");
  8 | 
  9 | GetASiameseBatchCoroutine = coroutine.create(function(nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, test)
 10 |   local dataset = test and testset or trainset; -- TODO(hessam): local or global?
 11 |   assert(nPositiveImages > 0);
 12 |   local n1, n2, n3 = nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages;
 13 |   local batchSize = nPositiveImages + nDifferentAngleImages + nDifferentCategoryImages;
 14 |   local target;
 15 |   local images;
 16 | 
 17 |   local all_input_types = GetEnableInputTypes(dataset.config)
 18 |   local nChannels       = GetValuesSum(all_input_types)
 19 |   if config.GPU == -1 then -- CPU mode
 20 |     target = torch.FloatTensor(batchSize);
 21 |     images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
 22 |   else
 23 |     target = torch.CudaTensor(batchSize);
 24 |     images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
 25 |   end
 26 | 
 27 |   local trainIndex = nil
 28 |   local testIndex = nil
 29 |   local trainvideos = MakeShuffledTuples(trainset);
 30 |   local testvideos = MakeShuffledTuples(testset);
 31 | 
 32 |   repeat
 33 |     nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages = n1, n2, n3;
 34 |     -- Iterate on real videos not game engines to make sure all videos are seen.
 35 |     local v
 36 |     if test then
 37 |       testIndex, v = next(testvideos, testIndex)
 38 |       if testIndex == nil then
 39 |         testvideos = MakeShuffledTuples(testset);
 40 |         testIndex, v = next(testvideos, testIndex)
 41 |       end
 42 |     else
 43 |       trainIndex, v = next(trainvideos, trainIndex)
 44 |       if trainIndex == nil then
 45 |         trainvideos = MakeShuffledTuples(trainset);
 46 |         trainIndex, v = next(trainvideos, trainIndex)
 47 |       end
 48 |     end
 49 | 
 50 |     local batchIndex       = 1;
 51 |     local category         = v[1];
 52 |     local physicsCategory  = v[2];
 53 |     local angle            = v[3];
 54 |     local folder           = v[4];
 55 | 
 56 |     local gameEngineVideo = LoadGEPerCategory(physicsCategory, angle, dataset_GE);
 57 |     
 58 |     images[1]  = LoadRandomFrameOfVideo(dataset, category, angle, folder);
 59 | 
 60 |     target[1] = 1;
 61 |     nPositiveImages = nPositiveImages - 1;
 62 |     batchIndex = batchIndex + 1;
 63 | 
 64 |     repeat
 65 |       local shuffledDataset = MakeShuffledTuples(dataset);
 66 |       for _,sample in pairs(shuffledDataset) do
 67 |         sampleCategory         = sample[1];
 68 |         samplePhysicsCategory  = sample[2];
 69 |         sampleAngle            = sample[3];
 70 |         sampleFolder           = sample[4];
 71 |         if nPositiveImages > 0 and samplePhysicsCategory == physicsCategory
 72 |                 and sampleAngle == angle then
 73 |           -- Add the positive example
 74 |           images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
 75 |           target[batchIndex] = 1;
 76 |           nPositiveImages = nPositiveImages - 1;
 77 |           batchIndex = batchIndex + 1;
 78 |         elseif nDifferentAngleImages > 0 and samplePhysicsCategory == physicsCategory
 79 |                 and sampleAngle ~= angle then
 80 |           -- Add the negative example with different angle
 81 |           images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
 82 |           target[batchIndex] = 0;
 83 |           nDifferentAngleImages = nDifferentAngleImages - 1;
 84 |           batchIndex = batchIndex + 1;
 85 |         elseif nDifferentCategoryImages > 0 and samplePhysicsCategory ~= physicsCategory then
 86 |           -- Add the negative example with different physics category
 87 |           images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
 88 |           target[batchIndex] = 0;
 89 |           nDifferentCategoryImages = nDifferentCategoryImages - 1;
 90 |           batchIndex = batchIndex + 1;
 91 |         end
 92 |         if batchIndex > batchSize then
 93 |           break;
 94 |         end
 95 |       end
 96 | 
 97 |       if nDifferentAngleImages == n2 then -- no different angle exists
 98 |         nDifferentCategoryImages = nDifferentCategoryImages + nDifferentAngleImages
 99 |         nDifferentAngleImages = 0
100 |       end
101 |     until batchIndex > batchSize
102 | 
103 |     assert(batchIndex > batchSize, "Not enough data to generate a batch for category="
104 |           .. physicsCategory .. " and angle=" .. tostring(angle) .. ". Requirments = ("
105 |           .. tostring(nPositiveImages) .. "," .. tostring(nDifferentAngleImages) .. ","
106 |           .. tostring(nDifferentCategoryImages) .. ")");
107 |     -- shuffle data
108 |     local shuffle = torch.randperm(batchSize):type('torch.LongTensor')
109 |     images = images:index(1, shuffle)
110 |     target = target:index(1, shuffle)
111 |     local randomForce = torch.random( gameEngineVideo:size(1) )
112 |     local gameEngineVideoRandomForce = gameEngineVideo[{{randomForce}, {}, {}, {}, {}}]:transpose(2, 3)
113 |     if config.GPU ~= -1 then
114 |       gameEngineVideoRandomForce = gameEngineVideoRandomForce:cuda()
115 |     end
116 | 
117 |     -- yeild the output
118 |     _, _, _, test = coroutine.yield({images, gameEngineVideoRandomForce}, target);
119 |     dataset = test and testset or trainset;
120 |   until false -- repeat until the end of the world
121 | end)
122 | 
123 | GetAnImageBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, test, deterministic, spline)
124 |   assert(batchSize > 0);
125 |   assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
126 | 
127 |   local splinesMat = getmetatable(dataset_GE)['splines']
128 | 
129 |   local target;
130 |   local images;
131 |   local dataset = test and testset or trainset
132 | 
133 |   local all_input_types = GetEnableInputTypes(dataset.config)
134 |   local nChannels       = GetValuesSum(all_input_types)
135 | 
136 |   if config.GPU == -1 then -- CPU mode
137 |     target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
138 |     images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
139 |   else
140 |     target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
141 |     images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
142 |   end
143 | 
144 |   local batchIndex = 1;
145 |   repeat
146 |     local videos = MakeShuffledTuples(dataset, deterministic);
147 |     for _,v in pairs(videos) do
148 |       local category         = v[1];
149 |       local physicsCategory  = v[2];
150 |       local angle            = v[3];
151 |       local folder           = v[4];
152 |       local categoryId
153 |       if (useViewPoint) then
154 |         categoryId = GetCategoryViewPointId(physicsCategory, angle);
155 |       else
156 |         categoryId = GetCategoryId(physicsCategory);
157 |       end
158 |       images[batchIndex] = LoadRandomFrameOfVideo(dataset, category, angle, folder);
159 |       target[batchIndex] = spline and splinesMat[categoryId] or categoryId
160 | 
161 |       batchIndex = batchIndex + 1
162 |       if batchIndex > batchSize then
163 |         if config.GPU ~= -1 then
164 |           images:cuda()
165 |           target:cuda()
166 |         end
167 |         _, _, test, deterministic, _ = coroutine.yield(images, target);
168 |         -- re-initialize vars for the next batch:
169 |         dataset = test and testset or trainset
170 |         batchIndex = 1
171 |       end
172 |     end
173 |   until false -- repeat until the end of the world
174 | end)
175 | 
176 | GetAUniformImageBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, test, spline)
177 |   assert(batchSize > 0);
178 |   assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
179 | 
180 |   local splinesMat = getmetatable(dataset_GE)['splines']
181 | 
182 |   local target;
183 |   local images;
184 |   local dataset = test and testset or trainset
185 | 
186 |   local all_input_types = GetEnableInputTypes(dataset.config)
187 |   local nChannels       = GetValuesSum(all_input_types)
188 |   if config.GPU == -1 then -- CPU mode
189 |     target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
190 |     images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
191 |   else
192 |     target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
193 |     images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
194 |   end
195 | 
196 |   repeat
197 |     local batchIndex = 1
198 |     dataset = test and testset or trainset
199 |     repeat
200 |       local randomData = GetUniformRandomData(dataset)
201 |       local category = randomData[1]
202 |       local physicsCategory = randomData[2]
203 |       local angle = randomData[3]
204 |       local folder = randomData[4]
205 |       if (useViewPoint) then
206 |         categoryId = GetCategoryViewPointId(physicsCategory, angle);
207 |       else
208 |         categoryId = GetCategoryId(physicsCategory);
209 |       end
210 |       images[batchIndex] = LoadRandomFrameOfVideo(dataset, category, angle, folder)
211 |       target[batchIndex] = spline and splinesMat[categoryId] or categoryId
212 | 
213 |       batchIndex = batchIndex + 1
214 |     until batchIndex > batchSize
215 | 
216 |     if config.GPU ~= -1 then
217 |       images:cuda()
218 |       target:cuda()
219 |     end
220 |     _, _, test, _ = coroutine.yield(images, target);
221 |   until false -- repeat until the end of the world
222 | end)
223 | 
224 | GetAUniformAnimationBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, spline)
225 |   assert(batchSize > 0);
226 |   assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
227 | 
228 |   local splinesMat = getmetatable(dataset_GE)['splines']
229 | 
230 |   local nChannels = GetValuesSum(GetEnableInputTypes(config.GE))
231 |   local target;
232 |   local videos;
233 |   if config.GPU == -1 then -- CPU mode
234 |     target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
235 |     videos = torch.FloatTensor(batchSize, nChannels, config.GE.frame_per_video, config.GE.imH, config.GE.imW);
236 |   else
237 |     target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
238 |     videos = torch.CudaTensor(batchSize, nChannels, config.GE.frame_per_video, config.GE.imH, config.GE.imW);
239 |   end
240 | 
241 |   repeat
242 |     local batchIndex = 1;
243 |     repeat
244 |       local randomCategoryIndex = torch.random( #config.classes )
245 |       local physicsCategory = config.classes[ randomCategoryIndex ]
246 |       local angle = torch.random( config.class_angles[randomCategoryIndex] )
247 |       if (useViewPoint) then
248 |         categoryId = GetCategoryViewPointId(physicsCategory, angle)
249 |       else
250 |         categoryId = GetCategoryId(physicsCategory)
251 |       end
252 |       local gameEngineVideo = LoadGEPerCategory(physicsCategory, angle, dataset_GE)
253 |       local gameEngineVideoRandomForce = gameEngineVideo[ torch.random(gameEngineVideo:size(1)) ]
254 |       videos[batchIndex]  = gameEngineVideoRandomForce:transpose(1,2);
255 |       target[batchIndex]  = spline and splinesMat[categoryId] or categoryId
256 | 
257 |       batchIndex = batchIndex + 1
258 |     until batchIndex > batchSize
259 | 
260 |     if config.GPU ~= -1 then
261 |       videos:cuda()
262 |       target:cuda()
263 |     end
264 |     coroutine.yield(videos, target);
265 |   until false -- repeat until the end of the world
266 | end)
267 | 
268 | GetAVideoBatchCoroutine = coroutine.create(function(useViewPoint, test, spline)
269 |   assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
270 | 
271 |   local dataset = test and testset or trainset
272 | 
273 |   local splinesMat = getmetatable(dataset_GE)['splines']
274 | 
275 |   repeat
276 |     local videos = MakeShuffledTuples(dataset);
277 |     for _,v in pairs(videos) do
278 |       local category         = v[1];
279 |       local physicsCategory  = v[2];
280 |       local angle            = v[3];
281 |       local folder           = v[4];
282 |       local categoryId
283 |       if (useViewPoint) then
284 |         categoryId = GetCategoryViewPointId(physicsCategory, angle);
285 |       else
286 |         categoryId = GetCategoryId(physicsCategory);
287 |       end
288 | 
289 |       local video   = LoadTrainImagesPerVideo(dataset, category, angle, folder);
290 |       local target  = spline and splinesMat[categoryId] or categoryId
291 |       if config.GPU ~= -1 then
292 |         video = video:cuda()
293 |       end
294 |       _, test, _ = coroutine.yield(video, target);
295 |       -- re-initialize vars for the next batch:
296 |       dataset = test and testset or trainset
297 |     end
298 |   until false -- repeat until the end of the world
299 | end)
300 | 


--------------------------------------------------------------------------------
/io/readFiles.lua:
--------------------------------------------------------------------------------
  1 | function LoadDatabase(dataConfig, excluded_categories)
  2 |   -----------------------------------------------------------------
  3 |   -- Reads the list of images in the videos from annotDir
  4 |   -- inputs:
  5 |   --       dataConfig: The data configuration to load from. Look at config.train
  6 |   --       and config.test.
  7 |   --       exclude_category: exclude this category in training
  8 |   -- outputs:
  9 |   --       dataset: a table with list of files for each category
 10 |   --       dataset[category][angle][video_directory]
 11 |   --       e.g., dataset['sliding-ski'][1]["181_1"] contains the files
 12 |   --       for video "181_1", which is annotated as the first angle
 13 |   -----------------------------------------------------------------
 14 | 
 15 |   local max_angles = config.max_angles; -- 8
 16 |   local annotDir = dataConfig.annotation.dir;
 17 | 
 18 |   local dataset = {};
 19 | 
 20 |   -- categories
 21 |   local categories = paths.dir(annotDir);
 22 |   RemoveDotDirs(categories);
 23 |   categories = removeExcludedCategories(categories, excluded_categories);
 24 |   
 25 |   local nClasses = table.getn(categories);
 26 | 
 27 |   for i=1,nClasses do
 28 |     dataset[categories[i]] = {};
 29 |   end
 30 | 
 31 |   for i=1,nClasses do
 32 | 
 33 |     -- videos
 34 |     local viddir = paths.concat(annotDir,categories[i]);
 35 |     local videos = paths.dir(viddir);
 36 |     RemoveDotDirs(videos);
 37 |     -- all viewpoint annotations will be similar to 00000_00's
 38 |     local angles = {};
 39 |     for k,v in pairs(videos) do
 40 |       local viewannot
 41 |       if paths.filep(paths.concat(annotDir, categories[i], videos[k], '00000_00_ge.mat')) then
 42 |         viewannot = mattorch.load(paths.concat(annotDir, categories[i], videos[k], '00000_00_ge.mat'));
 43 |       else
 44 |         viewannot = mattorch.load(paths.concat(annotDir, categories[i], videos[k], 'view.mat'));
 45 |       end
 46 |       -- if categories[i] == 'scenario6-basketball' then
 47 |       --   debugger.enter()
 48 |       -- end
 49 |       angles[k] = viewannot.ge;
 50 |     end
 51 | 
 52 |     for j=1,max_angles do --maximum 8 different angles
 53 |       dataset[categories[i]][j] = {};
 54 |     end
 55 | 
 56 |     for k,v in pairs(videos) do
 57 |       -- 1  018_03  scenario4-bowling
 58 |       -- if k == 1 and categories[i] == 'scenario4-bowling' then
 59 |       --   debugger.enter()
 60 |       -- end
 61 |       -- print(k,v,categories[i])
 62 |       dataset[categories[i]][angles[k][1][1]][v] = {};
 63 |     end
 64 | 
 65 |     for j=1,#dataset[categories[i]] do
 66 |       for k,v in pairs(dataset[categories[i]][j]) do
 67 |         local dir2  = paths.concat(annotDir,categories[i],k);
 68 |         local flist = paths.dir(dir2);
 69 |         RemoveDotDirs(flist);
 70 |         table.sort(flist, function (a,b) return a < b end);
 71 |         local pruned_flist = {}
 72 |         for id,f in pairs(flist) do
 73 |           if f:find("_00_ge.mat") then
 74 |             pruned_flist[#pruned_flist+1] = f
 75 |           end
 76 |         end
 77 |         dataset[categories[i]][j][k] = {};
 78 |         dataset[categories[i]][j][k] = pruned_flist;
 79 |       end
 80 |     end
 81 | 
 82 |   end
 83 | 
 84 |   dataset.config = dataConfig;
 85 |   return dataset;
 86 | end
 87 | 
 88 | function LoadTrainDatabase(exclude_category)
 89 |   return LoadDatabase(config.train, exclude_category)
 90 | end
 91 | 
 92 | function LoadTestDatabase(exclude_category)
 93 |   return LoadDatabase(config.test, exclude_category)
 94 | end
 95 | 
 96 | function ReadIndividualFrame(dataset, category, angle, video_id, imname, savefile, input_type)
 97 |   -----------------------------------------------------------------
 98 |   -- Reads a specific frame of a video for a category and an angle
 99 |   -- inputs:
100 |   --       dataset:          The output of "LoadTrainDatabase"
101 |   --       category:         Video category, e.g., 'sliding-ski', 'falling-diving', etc.
102 |   --       angle:            View angle (1 out of 8 or 1 out of 3 for symmetric categories)
103 |   --       video_id:         Video folder
104 |   --       imname:           The name of frame's image to be read.
105 |   --       savefile:         Save the tensor in this file.
106 |   --       input_type:       The type of the data to be read. Should be one of
107 |   --                         image, depth, normal or flow.
108 |   -- output:
109 |   --       images:  4D or 3D Tensor,
110 |   --                [5 (orig + 4 crops) x] 3 (channels) x imH (image height) x imW (image width)
111 |   -----------------------------------------------------------------
112 |   local imH        = config.imH;
113 |   local imW        = config.imW;
114 |   local w_crop     = config.w_crop;
115 | 
116 |   local annotDir   = dataset.config.annotation.dir
117 |   local trainDir   = dataset.config[input_type].dir;
118 |   local image_type = dataset.config[input_type].type;
119 |   local mean       = dataset.config[input_type].mean;
120 |   local std        = dataset.config[input_type].std;
121 | 
122 |   local impath  = paths.concat(trainDir, category, video_id, imname .. "." .. image_type);
123 |   local im     = loadImageOrig(impath);
124 |   local imnorm = normalizeImage(image.scale(im, imW, imH), mean, std);
125 | 
126 |   local nChannels = dataset.config[input_type].nChannels;
127 | 
128 |   if w_crop and dataset.config[input_type].croppable then
129 |     local images = torch.Tensor(5, nChannels, imH, imW)
130 | 
131 |     local coord  = mattorch.load(paths.concat(annotDir, category, video_id, imname .. "_00.mat"));
132 |     local imSize = im:size();
133 |     local height = imSize[2];
134 |     local width  = imSize[3];
135 | 
136 |     local x1 = math.max(math.floor(coord.box[1][1]), 1);
137 |     local y1 = math.max(math.floor(coord.box[1][2]), 1);
138 |     local x2 = math.min(math.floor(coord.box[1][3]), width);
139 |     local y2 = math.min(math.floor(coord.box[1][4]), height);
140 | 
141 |     local crop1 = im[{{},{y1,height},{x1,width}}];
142 |     local crop2 = im[{{},{1,y2},{1,x2}}];
143 |     local crop3 = im[{{},{y1,height},{1,x2}}];
144 |     local crop4 = im[{{},{1,y2},{x1,width}}];
145 |     images[1] = imnorm;
146 |     images[2] = normalizeImage(image.scale(crop1, imW, imH), mean, std);
147 |     images[3] = normalizeImage(image.scale(crop2, imW, imH), mean, std);
148 |     images[4] = normalizeImage(image.scale(crop3, imW, imH), mean, std);
149 |     images[5] = normalizeImage(image.scale(crop4, imW, imH), mean, std);
150 | 
151 |     for i=1,5 do
152 |       images[i] = images[i][{{1,nChannels}, {}, {}}]
153 |     end
154 | 
155 |     images = images:reshape(5 * nChannels, imH, imW)
156 |     torch.save(savefile, images);
157 |     return images
158 |   else
159 |     imnorm = imnorm[{{1, nChannels}, {}}]
160 |     torch.save(savefile, imnorm);
161 |     return imnorm
162 |   end
163 | end
164 | 
165 | function LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
166 |   -----------------------------------------------------------------
167 |   -- Loads a specific frame of a video for a category and an angle
168 |   -- inputs:
169 |   --       dataset:          The output of "LoadTrainDatabase"
170 |   --       category:         Video category, e.g., 'sliding-ski', 'falling-diving', etc.
171 |   --       angle:            View angle (1 out of 8 or 1 out of 3 for symmetric categories)
172 |   --       video_id:         Video folder
173 |   --       imname:           The name of frame's image to be read.
174 |   --       savefile:         Save the tensor in this file.
175 |   --       input_type:       Optional type of the data to be read. Should be one of
176 |   --                         image, depth, normal, flow or mask.
177 |   -- output:
178 |   --       images:  4D or 3D Tensor,
179 |   --                [5 (orig + 4 crops) x] 3 (channels) x imH (image height) x imW (image width)
180 |   -----------------------------------------------------------------
181 |   if not input_type then
182 |     local imH             = config.imH;
183 |     local imW             = config.imW;
184 |     local all_input_types = GetEnableInputTypes(dataset.config)
185 |     local nChannels       = GetValuesSum(all_input_types)
186 |     local result          = torch.Tensor(nChannels, imH, imW);
187 | 
188 |     local i = 1
189 |     for input_type, nChannels in pairs(all_input_types) do
190 |       result[{{i, i+nChannels-1}, {}, {}}] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
191 |       i = i + nChannels
192 |     end
193 |     return result
194 |   end
195 |   
196 |   local suffix = dataset.config[input_type].suffix;
197 |   local w_crop     = config.w_crop;
198 | 
199 |   local saveDir = dataset.config.save_dir;
200 |   if not paths.dirp(saveDir) then
201 |    paths.mkdir(saveDir)
202 |   end
203 |   -- NOTE: If we may have different oids for a video, we need to use different
204 |   -- save paths for w_crop = true.
205 |   local fname = paths.concat(saveDir, category .. '_' .. video_id .. '_' ..
206 |     (w_crop and '1' or '0') .. '_' .. suffix .. '_' .. imname .. '.t7');
207 | 
208 |   if paths.filep(fname) then
209 |     return torch.load(fname)
210 |   else
211 |     return ReadIndividualFrame(dataset, category, angle, video_id, imname, fname, input_type)
212 |   end
213 | end
214 | 
215 | function ReadTrainImagesPerVideo(dataset, category, angle, video_id, savefile, input_type)
216 |   -----------------------------------------------------------------
217 |   -- Reads training images for a video for a category and an angle
218 |   -- inputs:
219 |   --       dataset:          The output of "LoadTrainDatabase"
220 |   --       category:         Video category, e.g., 'sliding-ski', 'falling-diving', etc.
221 |   --       angle:            View angle (1 out of 8 or 1 out of 3 for symmetric categories)
222 |   --       video_id:         Video folder
223 |   --       savefile:        Save the tensor in this file.
224 |   --       opts
225 |   -- output:
226 |   --       images:  5D Tensor,
227 |   --                # of images x 5 (orig + 4 crops) x 3 (channels) x imH (image height) x imW (image width)
228 |   -----------------------------------------------------------------
229 | 
230 |   local imH        = config.imH;
231 |   local imW        = config.imW;
232 | 
233 |   local trainDir   = dataset.config[input_type].dir;
234 |   local mean       = dataset.config[input_type].mean;
235 |   local std        = dataset.config[input_type].std;
236 |   local image_type = dataset.config[input_type].type;
237 |   local w_crop     = config.w_crop;
238 | 
239 |   local nImages = #dataset[category][angle][video_id];
240 | 
241 |   local images
242 |   if w_crop then -- FIXME(hessam): nChannel needs to be fixe
243 |     images = torch.Tensor(nImages, 5, 3, imH, imW)
244 |   else
245 |     images = torch.Tensor(nImages, 3, imH, imW)
246 |   end
247 | 
248 |   local cnt = 0;
249 |   for _,f in ipairs(dataset[category][angle][video_id]) do
250 |     cnt = cnt + 1;
251 |     local matname = f;
252 |     local imname, oid = f:match("([^_]+)_([^_]+)");
253 | 
254 |     images[cnt] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
255 |   end
256 | 
257 |   collectgarbage()
258 |   torch.save(savefile, images)
259 |   return images
260 | end
261 | 
262 | function LoadTrainImagesPerVideo(dataset, category, angle, video_id, input_type)
263 |   -----------------------------------------------------------------
264 |   -- If files do not exist, it calls "ReadTrainImagesPerVideo" or "ReadTrainImagesPerVideoNoCrop".
265 |   -- Otherwise, it loads from the disk.
266 |   --
267 |   -- inputs:
268 |   --       dataset:          The output of "LoadTrainDatabase"
269 |   --       category:         Video category, e.g., 'sliding-ski', 'falling-diving', etc.
270 |   --       angle:            View angle (1 out of 8 or 1 out of 3 for symmetric categories)
271 |   --       video_id:         Video folder
272 |   --       opts
273 |   -- outputs:
274 |   --       images:           4D or 5D Tensor,
275 |   --                         # of images x 5 (orig + 4 crops)? x 3 (channels) x
276 |   --                         imH (image height) x imW (image width)
277 |   -----------------------------------------------------------------
278 | 
279 |   local imH             = config.imH;
280 |   local imW             = config.imW;
281 |   local nImages = #dataset[category][angle][video_id];
282 |   local images, nChannels
283 |   if input_type then
284 |     nChannels = dataset.config[input_type].nChannels
285 |   else
286 |     local all_input_types = GetEnableInputTypes(dataset.config)
287 |     nChannels       = GetValuesSum(all_input_types)
288 |   end
289 |   images = torch.Tensor(nImages, nChannels, imH, imW)
290 | 
291 |   local cnt = 0;
292 |   for _,f in ipairs(dataset[category][angle][video_id]) do
293 |     cnt = cnt + 1;
294 |     local matname = f;
295 |     local imname, oid = f:match("([^_]+)_([^_]+)");
296 | 
297 |     images[cnt] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
298 |   end
299 | 
300 |   return images
301 | end
302 | 
303 | function LoadRandomFrameOfVideo(dataset, category, angle, video_id, input_type)
304 |   -----------------------------------------------------------------
305 |   -- If files do not exist, it calls "ReadTrainImagesPerVideo" or "ReadTrainImagesPerVideoNoCrop".
306 |   -- Otherwise, it loads from the disk.
307 |   --
308 |   -- inputs:
309 |   --       dataset:          The output of "LoadTrainDatabase"
310 |   --       category:         Video category, e.g., 'sliding-ski', 'falling-diving', etc.
311 |   --       angle:            View angle (1 out of 8 or 1 out of 3 for symmetric categories)
312 |   --       video_id:         Video folder
313 |   --       opts
314 |   -- outputs:
315 |   --       images:           3D or 4D Tensor,
316 |   --                         5 (orig + 4 crops)? x 3 (channels) x
317 |   --                         imH (image height) x imW (image width)
318 |   -----------------------------------------------------------------
319 |   local randomFrame = GetRandomValue(dataset[category][angle][video_id])
320 |   local imname = randomFrame:match('[^_]+')
321 |   return LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
322 | end
323 | 
324 | 


--------------------------------------------------------------------------------
/utils.lua:
--------------------------------------------------------------------------------
  1 |   function RemoveDotDirs(aTable)
  2 |   if aTable == nil or type(aTable) ~= 'table' then
  3 |     return aTable
  4 |   end
  5 |   --remove the two directories "." , ".."
  6 |   local i = 1
  7 |   while i <= #aTable do
  8 |     while aTable[i] ~= nil and aTable[i]:sub(1,1) == '.' do
  9 |       aTable[i] = aTable[#aTable]
 10 |       aTable[#aTable] = nil
 11 |     end
 12 |     i = i + 1
 13 |   end
 14 | end
 15 | 
 16 | function getTableSize(aTable)
 17 |   local numItems = 0
 18 |   for k,v in pairs(aTable) do
 19 |       numItems = numItems + 1
 20 |   end
 21 |   return numItems
 22 | end
 23 | 
 24 | function GetRandomValue(aTable)
 25 |   local values = {}
 26 |   for key, value in pairs(aTable) do
 27 |     values[ #values+1 ] = value
 28 |   end
 29 |   return values[ torch.random(#values) ]
 30 | end
 31 | 
 32 | function GetValuesSum(aTable)
 33 |   local total = 0
 34 |   for key, value in pairs(aTable) do
 35 |     total = total + value
 36 |   end
 37 |   return total
 38 | end
 39 | 
 40 | function loadImageOrig(path)
 41 |   -----------------------------------------------------------------
 42 |   -- Reads an image
 43 |   -- inputs:
 44 |   --        "path": path to the image
 45 |   -- output:
 46 |   --        "im": the image
 47 |   -----------------------------------------------------------------
 48 |    local im = image.load(path)
 49 |       if im:dim() == 2 then -- 1-channel image loaded as 2D tensor
 50 |       im = im:view(1,im:size(1), im:size(2)):repeatTensor(3,1,1)
 51 |    elseif im:dim() == 3 and im:size(1) == 1 then -- 1-channel image
 52 |       im = im:repeatTensor(3,1,1)
 53 |    elseif im:dim() == 3 and im:size(1) == 3 then -- 3-channel image
 54 |    elseif im:dim() == 3 and im:size(1) == 4 then -- image with alpha
 55 |       im = im[{{1,3},{},{}}]
 56 |    else
 57 |       error("image structure not compatible")
 58 |    end
 59 |    return im
 60 | end
 61 | 
 62 | function loadImage(path, imH, imW)
 63 |   -----------------------------------------------------------------
 64 |   -- Reads an image and rescales it
 65 |   -- inputs:
 66 |   --        "path": path to the image
 67 |   --        "imH" and "imW": the image is rescaled to imH x imW
 68 |   -- output:
 69 |   --        "im": the rescaled image
 70 |   -----------------------------------------------------------------
 71 |    local im = loadImageOrig(path)
 72 |    im = image.scale(im, imW, imH)
 73 |    return im
 74 | end
 75 | 
 76 | function normalizeImage(im, mean, std)
 77 |   -----------------------------------------------------------------
 78 |   -- Normalizes image "im" by subtracting the "mean" and dividing by "std"
 79 |   -----------------------------------------------------------------
 80 |   for channel=1,3 do
 81 |     im[{channel,{},{}}]:add(-mean[channel]);
 82 |     im[{channel,{},{}}]:div(std[channel]);
 83 |   end
 84 |   return im;
 85 | end
 86 | 
 87 | function LoadRandomSamples(nSamples, allfiles, imH, imW);
 88 |   -----------------------------------------------------------------
 89 |   -- Loads "nSamples" images from the "allfiles" and rescaled them to imH x imW
 90 |   -- inputs:
 91 |   --       nSamples: # of images that is sampled
 92 |   --       allfiles: an array of paths of the images in the dataset
 93 |   --       imH, imW: size of the rescaled image
 94 |   -- outputs:
 95 |   --       images: 4D Tensor that includes "nSamples" number of imHximW images
 96 |   -----------------------------------------------------------------
 97 |   local images = torch.Tensor(nSamples, 3, imH, imW);
 98 |   local randnums = torch.randperm(#allfiles);
 99 |   local idx = randnums[{{1,nSamples}}];
100 |   for i = 1,nSamples do
101 |     local fname = allfiles[idx[i]];
102 |     local im = loadImage(fname, imH, imW);
103 |     images[{{i},{},{},{}}] = im;
104 |   end
105 |   return images;
106 | end
107 | 
108 | function ComputeMeanStd(nSample, allfiles, imH, imW)
109 |   -----------------------------------------------------------------
110 |   -- Computes the mean and std of randomly sampled images
111 |   -- inputs:
112 |   --       nSample: # of images that is sampled
113 |   --       allfiles: an array of paths of the images in the dataset
114 |   --       imH, imW: size of the rescaled image
115 |   -- outputs:
116 |   --       mean: a 3-element array (the mean for each channel)
117 |   --       std:  a 3-element array (the std for each channel)
118 |   -----------------------------------------------------------------
119 | 
120 |   local images    = LoadRandomSamples(nSample, allfiles, imH, imW);
121 |   local mean = {};
122 |   local std  = {};
123 | 
124 |   mean[1]   = torch.mean(images[{{},1,{},{}}]);
125 |   mean[2]   = torch.mean(images[{{},2,{},{}}]);
126 |   mean[3]   = torch.mean(images[{{},3,{},{}}]);
127 | 
128 |   std[1]    = torch.std(images[{{},1,{},{}}]);
129 |   std[2]    = torch.std(images[{{},2,{},{}}]);
130 |   std[3]    = torch.std(images[{{},3,{},{}}]);
131 | 
132 |   return mean, std;
133 | end
134 | 
135 | function MakeListTrainFrames(dataset, trainDir, image_type)
136 |   allfiles = {};
137 |   for category, subdataset in pairs(dataset) do
138 |     if category ~= 'config' then
139 |       for angles, subsubdataset in pairs(subdataset) do
140 |         for dirs, files in pairs(subsubdataset) do
141 |           for _, f in pairs(files) do
142 |             fname = string.sub(f, 1, -11) .. "." .. image_type;
143 |             table.insert(allfiles, paths.concat(trainDir, category, dirs, fname));
144 |           end
145 |         end
146 |       end
147 |     end
148 |   end
149 |   return allfiles;
150 | end
151 | 
152 | function MakeListGEFrames(dataset, data_type)
153 |   local geDir   = config.GE.dir;
154 |   allfiles = {};
155 |   for categories, subdataset in pairs(dataset) do
156 |     for angles, subsubdataset in pairs(subdataset) do
157 |       for dirs, files in pairs(subsubdataset) do
158 |         for _, f in pairs(files) do
159 |           table.insert(allfiles, paths.concat(geDir, categories, categories .. "_" .. angles .. "_" .. data_type, dirs, f));
160 |         end
161 |       end
162 |     end
163 |   end
164 |   return allfiles;
165 | end
166 | 
167 | function shuffleList(list, deterministic)
168 |   local rand
169 |   if deterministic then -- shuffle! but deterministicly.
170 |     math.randomseed(2)
171 |     rand = math.random
172 |   else
173 |     rand = torch.random
174 |   end
175 | 
176 |   for i = #list, 2, -1 do
177 |       local j = rand(i)
178 |       list[i], list[j] = list[j], list[i]
179 |   end
180 | end
181 | 
182 | function GetPhysicsCategory(category)
183 |   return category:match("[^-]+")
184 | end
185 | 
186 | function MakeShuffledTuples(dataset, deterministic)
187 |   -- tuple: category, physics category, angle, folder
188 |   local trainDir   = config.trainDir;
189 |   tuples = {};
190 |   for category, subdataset in pairs(dataset) do
191 |     if category ~= 'config' then
192 |       local physicsCategory = GetPhysicsCategory(category)
193 |       for angles, subsubdataset in pairs(subdataset) do
194 |         for dirs, _ in pairs(subsubdataset) do
195 |           table.insert(tuples, {category, physicsCategory, angles, dirs});
196 |         end
197 |       end
198 |     end
199 |   end
200 |   shuffleList(tuples, deterministic);
201 |   return tuples;
202 | end
203 | 
204 | function isExcluded(excluded_categories, category)
205 |   for _, ecat in pairs(excluded_categories) do
206 |     if category:find(ecat) then
207 |       return true
208 |     end
209 |   end
210 |   return false
211 | end
212 | 
213 | function removeExcludedCategories(categories, excluded_categories)
214 |   local result = {};
215 |   for k,v in pairs(categories) do
216 |     if not isExcluded(excluded_categories, v) then
217 |       table.insert(result, v);
218 |     end
219 |   end
220 |   assert(#result + #excluded_categories <= #categories, "At least one category" ..
221 |                   "should be removed per excluded_categories.")
222 |   assert(#result > 0, "Cannot exclude all categories.")
223 |   return result;
224 | end
225 | 
226 | function getAllCategoriesandAngles(dataset)
227 |   physics_category_list = {};
228 |   category_list = {};
229 |   angle_list = {};
230 |   for k,v in pairs(dataset) do
231 |     table.insert(physics_category_list, GetPhysicsCategory(k))
232 |     table.insert(category_list, k);
233 |     table.insert(angle_list, getTableSize(dataset[k]));
234 |   end
235 |   return physics_category_list, category_list, angle_list;
236 | end
237 | 
238 | function GetNNParamsToCPU(nnModel)
239 |   -- Convert model into FloatTensor and save.
240 |   local params, gradParams = nnModel:parameters()
241 |   if params ~= nill then
242 |     paramsCPU = pl.tablex.map(function(param) return param:float() end, params)
243 |   else
244 |     paramsCPU = {};
245 |   end
246 |   return paramsCPU
247 | end
248 | 
249 | function LoadNNlParams(current_model,saved_params)
250 |   local params, gradparams = current_model:parameters()
251 |   if params ~= nill then
252 |     assert(#params == #saved_params,
253 |       string.format('#layer != #saved_layers (%d vs %d)!',
254 |         #params, #saved_params));
255 |     for i = 1,#params do
256 |       assert(params[i]:nDimension() == saved_params[i]:nDimension(),
257 |         string.format("Layer %d: dimension mismatch (%d vs %d).",
258 |           i, params[i]:nDimension(), saved_params[i]:nDimension()))
259 |       for j = 1, params[i]:nDimension() do
260 |         assert(params[i]:size(j) == saved_params[i]:size(j),
261 |           string.format("Layer %d, Dim %d: size does not match (%d vs %d).",
262 |             i, j, params[i]:size(j), saved_params[i]:size(j)))
263 |       end
264 |       params[i]:copy(saved_params[i]);
265 |     end
266 |   end
267 | end
268 | 
269 | function rand_initialize(layer)
270 |   local tn = torch.type(layer)
271 |   if tn == "cudnn.SpatialConvolution" then
272 |     local c  = math.sqrt(10.0 / (layer.kH * layer.kW * layer.nInputPlane));
273 |     layer.weight:copy(torch.randn(layer.weight:size()) * c)
274 |     layer.bias:fill(0)
275 |   elseif tn == "cudnn.VolumetricConvolution" then
276 |     local c  = math.sqrt(10.0 / (layer.kH * layer.kW * layer.nInputPlane));
277 |     layer.weight:copy(torch.randn(layer.weight:size()) * c)
278 |     layer.bias:fill(0)
279 |   elseif tn == "nn.Linear" then
280 |     local c =  math.sqrt(10.0 / layer.weight:size(2));
281 |     layer.weight:copy(torch.randn(layer.weight:size()) * c)
282 |     layer.bias:fill(0)
283 |   end
284 | end
285 | 
286 | function GetCategoryViewPointId(physicsCategory, viewpoint)
287 |   local offset = 0;
288 |   for i, class in ipairs(config.classes) do
289 |     if class == physicsCategory then
290 |       return offset + viewpoint
291 |     end
292 |     offset = offset + config.class_angles[i];
293 |   end
294 |   error("failed to find the physicsCategory:" .. physicsCategory);
295 |   return -1; -- invalid physics category
296 | end
297 | 
298 | function DecryptCategoryViewPointId(categoryId)
299 |   assert(categoryId > 0, "Invalid categoryId " .. tostring(categoryId))
300 | 
301 |   local offset = 0;
302 |   for i, class in ipairs(config.classes) do
303 |     if offset + config.class_angles[i] >= categoryId then
304 |       return class, categoryId - offset
305 |     end
306 |     offset = offset + config.class_angles[i];
307 |   end
308 |   error("Invalid categoryId " .. tostring(categoryId));
309 | end
310 | 
311 | function GetCategoryId(physicsCategory)
312 |   for i, class in pairs(config.classes) do
313 |     if class == physicsCategory then
314 |       return i
315 |     end
316 |   end
317 |   error("failed to find the physicsCategory:" .. physicsCategory);
318 |   return -1; -- invalid physics category
319 | end
320 | 
321 | function CategoryViewPointId2CategoryId(categoryId)
322 |   assert(categoryId > 0, "Invalid categoryId " .. tostring(categoryId))
323 | 
324 |   local offset = 0;
325 |   for i, class in ipairs(config.classes) do
326 |     if offset + config.class_angles[i] >= categoryId then
327 |       return i
328 |     end
329 |     offset = offset + config.class_angles[i];
330 |   end
331 |   error("Invalid categoryId " .. tostring(categoryId));
332 | end
333 | 
334 | function GetUniformRandomElement(data)
335 |   local result = {}
336 |   while type(data) == 'table' do
337 |     local keys = {}
338 |     for key, value in pairs(data) do
339 |       if key ~= 'config' and (type(value) ~= 'table' or next(value) ~= nil) then
340 |         keys[ #keys+1 ] = key
341 |       end
342 |     end
343 |     local randomKey = keys[torch.random(#keys)]
344 |     data = data[randomKey]
345 |     result[#result+1] = randomKey
346 |   end
347 |   result[#result+1] = data
348 |   return result
349 | end
350 | 
351 | function GetUniformRandomCategory(dataset, physicsCategory, angle)
352 |   local keys = {}
353 |   for key, value in pairs(dataset) do
354 |     if string.sub(key,1,string.len(physicsCategory)) == physicsCategory then
355 |       if value[angle] and next(value[angle]) then
356 |         keys[ #keys+1 ] = key
357 |       end
358 |     end
359 |   end
360 |   if next(keys) then
361 |     return keys[torch.random(#keys)]
362 |   else
363 |     return nil
364 |   end
365 | end
366 | 
367 | function GetUniformRandomData(dataset)
368 |   local randomData = GetUniformRandomElement(dataset)
369 |   local category = randomData[1]
370 |   local physicsCategory = GetPhysicsCategory(category)
371 |   local angle = randomData[2]
372 |   local folder = randomData[3]
373 |   return {category, physicsCategory, angle, folder}
374 | end
375 | 
376 | function log(...)
377 |   -- Log to file:
378 |   io.output(config.logFile)
379 |   print(...)
380 |   -- Log to stdout:
381 |   io.output(io.stdout)
382 |   print(...)
383 | end
384 | 
385 | function GetEnableInputTypes(input_config)
386 |   local result = {}
387 |   for input_type, conf in pairs(input_config) do
388 |     if type(conf) == 'table' and conf.enable then
389 |       if config.w_crop and conf.croppable then
390 |         result[ input_type ] = conf.nChannels * 5
391 |       else
392 |         result[ input_type ] = conf.nChannels
393 |       end
394 |     end
395 |   end
396 |   return result
397 | end
398 | 
399 | function GetPerClassAccuracy(predictions, labels)
400 |   local per_class = torch.Tensor(config.nCategories, 2):fill(0)
401 |   local nAccurate = 0
402 |   labels = labels:clone()
403 |   predictions = predictions:clone()
404 |   for i=1,labels:size(1) do
405 |     if labels[i] == predictions[i] then
406 |       nAccurate = nAccurate + 1
407 |       per_class[ labels[i] ][1] = per_class[ labels[i] ][1] + 1
408 |     end
409 |     per_class[ labels[i] ][2] = per_class[ labels[i] ][2] + 1
410 |   end
411 |   local acc = nAccurate / labels:size(1)
412 |   return acc, per_class
413 | end
414 | 
415 | function GetAnimationFeatures(model, convLayer)  
416 |   local n = GetValuesSum(config.class_angles) -- Total number of classes
417 |   local feats
418 |   local labels = {}
419 |   for i=1,n do
420 |     local featsDir = paths.concat(config.GE.featsDir, i)
421 |     local featFiles = paths.dir(featsDir)
422 |     RemoveDotDirs( featFiles )
423 |     if not featFiles or #featFiles==0 then
424 |       log("Animation vectors for category " .. tostring(i) .. " not found.")
425 |       os.execute('mkdir -p ' .. featsDir)
426 | 
427 |       local category, angle = DecryptCategoryViewPointId(i)
428 |       local gameEngineVideos = LoadGEPerCategory(category, angle, dataset_GE):transpose(2, 3):cuda()
429 |       log("Feed-forward animation to get features.")
430 |       for j=1,gameEngineVideos:size(1) do
431 |         local cur = model:forward( gameEngineVideos[ {{j}, {}, {}, {}, {}} ] )
432 |         if feats then
433 |           feats = torch.cat(feats, cur, 3)
434 |         else
435 |           feats = cur
436 |         end
437 | 
438 |         for k=1,cur:size(1) do
439 |           labels[ #labels+1 ] = i
440 |         end
441 |         -- Cache for future use:
442 |         torch.save( paths.concat(featsDir, tostring(j) .. '.t7'), cur)
443 |       end
444 |     else
445 |       for j, v in pairs(featFiles) do
446 |         local cur = torch.load( paths.concat(featsDir, v) )
447 |         if feats then
448 |           feats = torch.cat(feats, cur, 3)
449 |         else
450 |           feats = cur
451 |         end
452 |         for k=1,cur:size(1) do
453 |           labels[ #labels+1 ] = i
454 |         end
455 |       end
456 |     end
457 |   end
458 |   feats = feats:transpose(2, 3):transpose(1, 2)
459 |   if convLayer then
460 |     feats = convLayer:forward(feats):reshape(config.nClasses, 10, 4096)
461 |     torch.save(paths.concat( config.DataRootPath, 'all.t7'), feats)
462 |   end
463 |   return feats, labels
464 | end
465 | 
466 | function GetPairwiseCosine(M1, M2)
467 |   assert(M1:size(2) == M2:size(2), "ERROR: dimensions mismatch!")
468 |   local smooth = 1e-5
469 | 
470 |   local M1rownorms = torch.cmul(M1, M1):sum(2):sqrt():view(M1:size(1))
471 |   local M2rownorms = torch.cmul(M2, M2):sum(2):sqrt():view(M2:size(1))
472 |   local pairwiseNorms = torch.ger(M1rownorms, M2rownorms)
473 |   local dot = M1 * M2:t()
474 |   return torch.cdiv(dot, pairwiseNorms + smooth)
475 | end
476 | 
477 | function GetVideoCount(dataset)
478 |   local total = 0
479 |   for _1, cat in pairs(dataset) do
480 |     if _1 ~= 'config' then
481 |       for _2, view in pairs(cat) do
482 |         for _3, fold in pairs(view) do
483 |           total = total + 1
484 |         end
485 |       end
486 |     end
487 |   end
488 |   return total
489 | end
490 | 
491 | function Choose(tensor, indices)
492 |   assert(tensor:size(1) == indices:size(1), "Dimension mismatch")
493 |   local result = torch.Tensor( indices:size() )
494 |   for i = 1, indices:size(1) do
495 |     result[i] = tensor[i][ indices[i] ]
496 |   end
497 |   return result:cuda()
498 | end
499 | 
500 | function ContainsValue(dict, value)
501 |   for k,v in pairs(dict) do
502 |     if v == value then
503 |       return true
504 |     end
505 |   end
506 |   return false
507 | end
508 | 
509 | function GetGaussianTarget(target)
510 |   local result = torch.CudaTensor(target:size(1), config.nClasses):fill(0)
511 |   local frames = target - (torch.floor((target-1) / 10) * 10)
512 |   for i=1,target:size(1) do
513 |     local sigma = 1
514 |     for j = target[i]-frames[i]+1,target[i]-frames[i]+10 do
515 |       result[i][j] = torch.exp( -(target[i] - j)^2 / sigma)
516 |     end
517 | 
518 |     result[i] = result[i] / result[i]:sum()
519 |   end
520 |   return result
521 | end
522 | 


--------------------------------------------------------------------------------