├── models
├── classifier.lua
├── pairwisecosine_GEfix.lua
├── motion_embedding.lua
├── motion_row.lua
└── image_row.lua
├── LICENSE
├── main.lua
├── data.lua
├── README.md
├── layers
└── SmoothPairwiseCosineSimilarity.lua
├── io
├── compute_mean_std.lua
├── readGEFiles.lua
├── readBatch.lua
└── readFiles.lua
├── train_functions.lua
├── setting_options.lua
├── networks
└── ModelConstruction_IM_GEFixParallel.lua
└── utils.lua
/models/classifier.lua:
--------------------------------------------------------------------------------
1 | return nn.Sequential():add(nn.Linear(4096,config.nClasses)):add(nn.LogSoftMax())
--------------------------------------------------------------------------------
/models/pairwisecosine_GEfix.lua:
--------------------------------------------------------------------------------
1 | local mlp = nn.Sequential();
2 |
3 | mlp:add(nn.SmoothPairwiseCosineSimilarity());
4 | mlp:add(nn.Reshape(config.batchSize,config.nClasses,10,false));
5 | mlp:add(nn.Exp());
6 | mlp:add(nn.Sum(3));
7 |
8 | mlp:add(nn:LogSoftMax())
9 | return mlp;
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | N3: Newtonian Image Understanding.
2 |
3 | All rights reserved.
4 |
5 | MIT License
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a
8 | copy of this software and associated documentation files (the "Software"),
9 | to deal in the Software without restriction, including without limitation
10 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 | and/or sell copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included
15 | in all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | OTHER DEALINGS IN THE SOFTWARE.
24 |
--------------------------------------------------------------------------------
/models/motion_embedding.lua:
--------------------------------------------------------------------------------
1 | local mlp=nn.Sequential()
2 |
3 | mlp:add(nn.Reshape(config.nCategories,1,10,4096))
4 |
5 | arg[6] = "10*4096FC_1_bn"
6 |
7 | local m, var, bnorm = arg[6]:match("([^_]+)_([^_]+)_([^_]+)")
8 |
9 | if m=="2*convolve" then
10 | mlp:add(cudnn.SpatialConvolution(1,10,1001,1,1,1,500,0))
11 | mlp:add(nn.ReLU(true))
12 | mlp:add(cudnn.SpatialConvolution(10,20,1,7,1,1,0,3))
13 | elseif m=="10*4096FC" then
14 | mlp:add(nn.Reshape(config.nCategories*10,4096,false))
15 | mlp:add(nn.Linear(4096,4096))
16 | if var == "2" then
17 | mlp:add(nn.ReLU(true))
18 | mlp:add(nn.Linear(4096,4096))
19 | end
20 | mlp:add(nn.Reshape(config.nCategories,1,10,4096,false))
21 | elseif m=="4096*10FC" then
22 | mlp:add(nn.Transpose{3,4})
23 | mlp:add(nn.Reshape(config.nClasses*4096, 10, false))
24 | mlp:add(nn.Linear(10,10))
25 | if var == "2" then
26 | mlp:add(nn.ReLU(true))
27 | mlp:add(nn.Linear(10,10))
28 | end
29 | mlp:add(nn.Reshape(config.nClasses, 1, 4096, 10, false))
30 | mlp:add(nn.Transpose{3,4})
31 | else
32 | mlp:add(cudnn.SpatialConvolution(1,20,1,7,1,1,0,3))
33 | end
34 |
35 | mlp:add(nn.Max(2))
36 | mlp:add(nn.ReLU(true))
37 | mlp:add(nn.Reshape(config.nCategories*10,4096,false))
38 |
39 | if bnorm == "bn" then
40 | mlp:add(nn.BatchNormalization(4096, 1e-3))
41 | end
42 |
43 | return mlp;
--------------------------------------------------------------------------------
/models/motion_row.lua:
--------------------------------------------------------------------------------
1 | local C3D = nn.Sequential()
2 |
3 | --------------------- Convolutional Layers ------------------
4 | ----------------------- 1st layer group ---------------------
5 | C3D:add(cudnn.VolumetricConvolution(10,64,3,3,3,1,1,1,1,1,1)) -- Conv1a
6 | C3D:add(cudnn.ReLU(true))
7 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
8 | ----------------------- 2nd layer group ---------------------
9 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1)) -- Conv2a
10 | C3D:add(cudnn.ReLU(true))
11 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
12 | ----------------------- 3rd layer group ---------------------
13 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1)) -- Conv3a
14 | C3D:add(cudnn.ReLU(true))
15 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
16 | ------------------------4th layer group-------------------------------
17 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1)) -- Conv3b
18 | C3D:add(cudnn.ReLU(true))
19 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
20 | ----------------------- 5th layer group ---------------------
21 | C3D:add(cudnn.VolumetricConvolution(64,64,3,3,3,1,1,1,1,1,1)) -- Conv4a
22 | C3D:add(cudnn.ReLU(true))
23 | C3D:add(cudnn.VolumetricMaxPooling(1,2,2))
24 |
25 | C3D:add(nn.Max(3))
26 | C3D:add(nn.View(64*8*8))
27 | C3D:add(cudnn.ReLU(true))
28 | C3D:add(nn.Dropout(0.5))
29 |
30 | return C3D
31 |
--------------------------------------------------------------------------------
/main.lua:
--------------------------------------------------------------------------------
1 | -- Usage th main.lua {train|test}
2 |
3 | mode = arg[1]
4 | assert (mode=='train' or mode=='test', "Bad arguments. Usage th main.lua {train|test}")
5 |
6 | require 'cunn'
7 | -- require 'fbcunn'
8 | require 'cudnn'
9 | require 'xlua'
10 | require 'optim'
11 | require 'math'
12 | require 'gnuplot'
13 | require 'sys'
14 | require 'image'
15 |
16 | mattorch = require('fb.mattorch');
17 | pl = require'pl.import_into'()
18 | debugger = require('fb.debugger');
19 |
20 | -- fix the random seed for ease of debugging
21 | paths.dofile('setting_options.lua');
22 | cutorch.setDevice(config.GPU);
23 | torch.manualSeed(config.GPU);
24 | ----------------------------
25 | paths.dofile('utils.lua');
26 | ----------------------------
27 | paths.dofile('data.lua');
28 | ----------------------------------
29 | paths.dofile('layers/SmoothPairwiseCosineSimilarity.lua');
30 | -----------------------------
31 | paths.dofile('networks/ModelConstruction_IM_GEFixParallel.lua');
32 | --------------------------------
33 | paths.dofile('train_functions.lua');
34 | ------------------------------
35 | log(config)
36 |
37 | if mode == 'test' then
38 | config.nIter = GetVideoCount(testset)
39 | model:LoadModelFull(config.initModelPath.fullNN)
40 | log(model.fullNN)
41 | test()
42 | else
43 | model:LoadModel(config.initModelPath.imageNN,config.initModelPath.animNN)
44 | log(model.fullNN)
45 | train()
46 | end
47 |
--------------------------------------------------------------------------------
/models/image_row.lua:
--------------------------------------------------------------------------------
1 | require 'cudnn'
2 | require 'cunn'
3 | local alexnet = nn.Sequential()
4 | require 'inn'
5 |
6 | local input_channels = 3
7 | if (mode == 'train' and config.train.mask.enable)
8 | or (mode == 'test' and config.test.mask.enable) then
9 | input_channels = input_channels + 1
10 | end
11 |
12 | alexnet:add(cudnn.SpatialConvolution(input_channels, 96, 11, 11, 4, 4, 0, 0, 1));
13 | alexnet:add(cudnn.ReLU(true))
14 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
15 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
16 | alexnet:add(cudnn.SpatialConvolution(96, 256, 5, 5, 1, 1, 2, 2, 2))
17 | alexnet:add(cudnn.ReLU(true))
18 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
19 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
20 | alexnet:add(cudnn.SpatialConvolution(256, 384, 3, 3, 1, 1, 1, 1, 1))
21 | alexnet:add(cudnn.ReLU(true))
22 | alexnet:add(cudnn.SpatialConvolution(384, 384, 3, 3, 1, 1, 1, 1, 2))
23 | alexnet:add(cudnn.ReLU(true))
24 | alexnet:add(cudnn.SpatialConvolution(384, 256, 3, 3, 1, 1, 1, 1, 2))
25 | alexnet:add(cudnn.ReLU(true))
26 | alexnet:add(inn.SpatialCrossResponseNormalization(5, 0.000100, 0.7500, 1.000000))
27 | alexnet:add(cudnn.SpatialMaxPooling(3, 3, 2, 2, 0, 0):ceil())
28 | alexnet:add(nn.View(-1):setNumInputDims(3))
29 | alexnet:add(nn.Linear(9216, 4096))
30 | alexnet:add(cudnn.ReLU(true))
31 | alexnet:add(nn.Dropout(config.dropoutProb))
32 | alexnet:add(nn.Linear(4096, 4096))
33 | alexnet:add(cudnn.ReLU(true))
34 |
35 | return alexnet
--------------------------------------------------------------------------------
/data.lua:
--------------------------------------------------------------------------------
1 | paths.dofile('io/readFiles.lua')
2 | paths.dofile('io/readGEFiles.lua');
3 | paths.dofile('io/compute_mean_std.lua');
4 | paths.dofile('io/readBatch.lua');
5 | function GetASiameseBatch(nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, opt)
6 | local status, input, target = coroutine.resume(GetASiameseBatchCoroutine,
7 | nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, opt.test);
8 | return input, target
9 | end
10 |
11 | function GetAnImageBatch(batchSize, opt)
12 | local status, input, target = coroutine.resume(GetAnImageBatchCoroutine,
13 | batchSize, opt.viewpoint, opt.test,
14 | opt.deterministic, opt.spline);
15 | return input, target
16 | end
17 |
18 | function GetAUniformImageBatch(batchSize, opt)
19 | local status, input, target = coroutine.resume(GetAUniformImageBatchCoroutine,
20 | batchSize, opt.viewpoint, opt.test,
21 | opt.spline);
22 | return input, target
23 | end
24 |
25 | function GetAUniformAnimationBatch(batchSize, opt)
26 | local status, input, target = coroutine.resume(GetAUniformAnimationBatchCoroutine,
27 | batchSize, opt.viewpoint, opt.spline);
28 | return input, target
29 | end
30 |
31 | function GetAVideoBatch(opt)
32 | local status, input, target = coroutine.resume(GetAVideoBatchCoroutine,
33 | opt.viewpoint, opt.test, opt.spline);
34 | return input, target
35 | end
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # N3: Newtonian Image Understanding: Unfolding the Dynamics of Objects in Statis Images
2 | This is the source code for Newtonian Neural Networks N3, which predicts the dynamics of objects in scenes.
3 |
4 | ### Citation
5 | If you find N3 useful in your research, please consider citing:
6 | ```
7 | @inproceedings{mottaghiCVPR16N3,
8 | Author = {Roozbeh Mottaghi and Hessam Bagherinezhad and Mohammad Rastegari and Ali Farhadi},
9 | Title = {Newtonian Image Understanding: Unfolding the Dynamics of Objects in Static Images},
10 | Booktitle = {CVPR},
11 | Year = {2016}
12 | }
13 | ```
14 |
15 | ### Requirements
16 | This code is written in Lua, based on [Torch](http://torch.ch). If you are on [Ubuntu 14.04+](http://ubuntu.com), you can follow [this instruction](https://github.com/facebook/fbcunn/blob/master/INSTALL.md) to install torch.
17 |
18 | You need the [VIND dataset](https://docs.google.com/forms/d/1OROeoj55hfhwiMsDuVyzMgfnhatTUOBGz0qGnMXor4Y/viewform). Extract it in the current directory, and rename it to `VIND`. Or you can put it somewhere else and change the `config.DataRootPath` in `setting_options.lua`.
19 |
20 | ### Training
21 | To run the training:
22 | ```
23 | th main.lua train
24 | ```
25 |
26 | This trains the model on training data, and once in every 10 iterations, evalutates on one `val_images` batch. If you want to validate on `val_videos` go to `setting_options.lua` and change the line `valmeta = imvalmeta` to `valmeta = vidvalmeta`.
27 |
28 | ### Test
29 | You need to [get the weights](https://drive.google.com/file/d/0B7H3g3rb2Blwcm51dXdKbGxzLTQ/view). Extract the weights in the current directory and rename it `weights`. To run the test:
30 | ```
31 | th main.lua test
32 | ```
33 |
34 | ### License
35 | This code is released under MIT License.
36 |
--------------------------------------------------------------------------------
/layers/SmoothPairwiseCosineSimilarity.lua:
--------------------------------------------------------------------------------
1 | --[[
2 | Input: a table of two inputs {M, k}, where
3 | M = an n-by-d matrix
4 | k = an m-by-d matrix
5 | Output: a n-by-m matrix
6 | Each element is an approximation of the cosine similarity between a row in k and the
7 | corresponding row of M. It's an approximation since we add a constant to the
8 | denominator of the cosine similarity function to remove the singularity when
9 | one of the inputs is zero.
10 | --]]
11 |
12 | -- This file is a modified version of https://github.com/kaishengtai/torch-ntm/blob/master/layers/SmoothCosineSimilarity.lua
13 |
14 | local SmoothPairwiseCosineSimilarity, parent = torch.class('nn.SmoothPairwiseCosineSimilarity', 'nn.Module')
15 |
16 | function SmoothPairwiseCosineSimilarity:__init(smoothen)
17 | parent.__init(self)
18 | self.gradInput = {}
19 | self.smooth = smoothen or 1e-5
20 | end
21 |
22 | function SmoothPairwiseCosineSimilarity:updateOutput(input)
23 | local M, k = unpack(input)
24 | assert(M:size(2)==k:size(2),"ERROR: dimensions are not equal !!!")
25 | self.rownorms = torch.cmul(M, M):sum(2):sqrt():view(M:size(1))
26 | self.colnorms = torch.cmul(k, k):sum(2):sqrt():view(k:size(1))
27 | self.rowcol = torch.ger(self.rownorms,self.colnorms);
28 | self.dot = M * (k:t());
29 | self.output:set(torch.cdiv(self.dot, self.rowcol + self.smooth))
30 | return self.output
31 | end
32 |
33 | function SmoothPairwiseCosineSimilarity:updateGradInput(input, gradOutput)
34 | local M, k = unpack(input)
35 | local nrow = M:size(1);
36 | local ncol = k:size(1);
37 | local ndim = k:size(2);
38 |
39 | self.gradInput[1] = self.gradInput[1] or input[1].new()
40 | self.gradInput[2] = self.gradInput[2] or input[2].new()
41 |
42 |
43 | -- M gradient
44 | self.gradInput[1]:set(torch.cdiv(gradOutput, self.rowcol + self.smooth)*k)
45 | local scale = torch.cmul(self.output, (torch.repeatTensor(self.colnorms,nrow,1)))
46 | :cdiv(self.rowcol + self.smooth)
47 | :cmul(gradOutput):sum(2)
48 | :cdiv(self.rownorms+self.smooth)
49 | self.gradInput[1]:add(torch.cmul(-torch.repeatTensor(scale,1,ndim), M))
50 |
51 | -- k gradient
52 | self.gradInput[2]:set(torch.cdiv(gradOutput, self.rowcol + self.smooth):t()* M)
53 | local scale = torch.cmul(self.output, (torch.repeatTensor(self.rownorms,ncol,1):t()))
54 | :cdiv(self.rowcol + self.smooth)
55 | :cmul(gradOutput):sum(1)
56 | :cdiv(self.colnorms+self.smooth)
57 | self.gradInput[2]:add(torch.cmul(-torch.repeatTensor(scale,ndim,1):t(), k))
58 |
59 |
60 | return self.gradInput
61 | end
62 |
--------------------------------------------------------------------------------
/io/compute_mean_std.lua:
--------------------------------------------------------------------------------
1 | function compute_mean_std(dataset, dataset_GE)
2 | -------------------- COMPUTE MEAN AND STD OF REAL VIDEOS -------------------
3 | for input_type, train_config in pairs(config.train) do
4 | if type(train_config) == 'table' and train_config.enable then
5 | local test_config = config.test[input_type]
6 | local meanstdFile = config.train.annotation.dir .. '/.meanstd_real_' .. input_type .. '.t7';
7 | if paths.filep(meanstdFile) then
8 | local meanstd = torch.load(meanstdFile)
9 | train_config.mean = meanstd.mean;
10 | train_config.std = meanstd.std;
11 | if test_config and test_config.enable then
12 | test_config.mean, test_config.std = train_config.mean, train_config.std;
13 | end
14 | else
15 | local trainDir = train_config.dir;
16 | local allfiles = MakeListTrainFrames(dataset, trainDir, train_config.type);
17 | train_config.mean, train_config.std = ComputeMeanStd(1000, allfiles, config.imH, config.imW);
18 | if test_config and test_config.enable then
19 | test_config.mean, test_config.std = train_config.mean, train_config.std;
20 | end
21 | local cache = {};
22 | cache.mean = train_config.mean;
23 | cache.std = train_config.std;
24 | torch.save(meanstdFile,cache);
25 | end
26 | end
27 | end
28 |
29 |
30 | ----------------- COMPUTE MEAN AND STD OF GAME ENGINE VIDEOS ----------------
31 |
32 | for input_type, conf in pairs(config.GE) do
33 | if type(conf) == 'table' and conf.enable then
34 | local meanstdFile = config.GE.dir .. '/.meanstd_GE_' .. input_type .. '.t7';
35 | if paths.filep(meanstdFile) then
36 | local meanstd = torch.load(meanstdFile)
37 | conf.mean = meanstd.mean;
38 | conf.std = meanstd.std;
39 | else
40 | local allfiles = MakeListGEFrames(dataset_GE, conf.suffix);
41 | conf.mean, conf.std = ComputeMeanStd(1000, allfiles, config.GE.imH, config.GE.imH);
42 | local cache = {};
43 | cache.mean = conf.mean;
44 | cache.std = conf.std;
45 | torch.save(meanstdFile,cache);
46 | end
47 | end
48 | end
49 | end
50 |
51 | function LoadCaffeMeanStd(meanFilePath)
52 | local meanFile = mattorch.load(meanFilePath)
53 | for input_type, train_config in pairs(config.train) do
54 | if type(train_config) == 'table' and train_config.enable then
55 | local test_config = config.test[input_type]
56 | for i=1,3 do
57 | train_config.mean[i] = meanFile.mean_data:select(3,i):mean() / 255
58 | train_config.std[i] = 1/255
59 | end
60 | if test_config and test_config.enable then
61 | test_config.mean, test_config.std = train_config.mean, train_config.std;
62 | end
63 | end
64 | end
65 | end
66 |
--------------------------------------------------------------------------------
/train_functions.lua:
--------------------------------------------------------------------------------
1 | log('Loading Train Functions ... ')
2 |
3 | function train()
4 | config.testing = false
5 |
6 | local batchSize = config.batchSize;
7 | local animFeatures = GetAnimationFeatures(model.animationNN);
8 |
9 | for iter=1,config.nIter do
10 | ---- load one batch
11 | tt = iter
12 | local tic= os.clock()
13 | local imgFeatures, TrTarget = GetAUniformImageBatch(batchSize, {
14 | viewpoint = true,
15 | test = false,
16 | spline = false,
17 | })
18 | local TrInput = {imgFeatures,animFeatures};
19 | local toc = os.clock() - tic;
20 | log('loading time :' .. tostring(toc))
21 |
22 | -------- train the network--------------
23 | model.learningRate = model:LearningRateComp(iter);
24 | local acc, loss = model:TrainOneBatch(TrInput,TrTarget);
25 | if (iter % 10) == 0 then
26 | local tic = os.clock()
27 | collectgarbage();
28 | local toc = os.clock() - tic;
29 | print("garbage collection :", toc)
30 | end
31 | if (iter % config.nDisplay) == 0 then
32 | log(('Iter = %d | Train Accuracy = %f | Train Loss = %f\n'):format(iter,acc,loss));
33 | end
34 |
35 | if (iter % config.nEval) == 0 then
36 | local TeInput, TeTarget = GetAUniformImageBatch(batchSize, {
37 | viewpoint = true,
38 | test = true,
39 | spline = false,
40 | });
41 | local acc, loss = model:EvaluateOneBatch(TeInput,TeTarget);
42 | log(('Testing ---------> Iter = %d | Test Accuracy = %f | Test Loss = %f\n'):format(iter,acc,loss));
43 | end
44 |
45 | if (iter % config.saveModelIter) == 0 then
46 | local fileName = 'Model_iter_' .. iter .. '.t7';
47 | log('Saving NN model in ----> ' .. paths.concat(config.logDirectory, fileName) .. '\n');
48 | model:SaveModel(paths.concat(config.logDirectory, fileName));
49 | end
50 |
51 | end
52 | end
53 |
54 |
55 | ---------------------------------------------------------
56 | function test()
57 | config.testing = true
58 | ----------------------------
59 |
60 | local batchSize = config.batchSize;
61 | local meanAcc = 0;
62 | local sumFrameAcc = 0;
63 | local sumFramables = 0;
64 | local per_class_cum = torch.Tensor(config.nCategories, 2):fill(0)
65 | local all_predictions
66 |
67 | for iter=1,config.nIter do
68 | tt = iter
69 | ---- load one batch
70 | local tic= os.clock()
71 | local TeInput, TeTarget = GetAnImageBatch(batchSize, {
72 | viewpoint = true,
73 | test = true,
74 | deterministic = true,
75 | spline = false,
76 | });
77 | local toc = os.clock() - tic;
78 | log('loading time :' .. tostring(toc))
79 |
80 | if (iter % 10) == 0 then
81 | local tic = os.clock()
82 | collectgarbage();
83 | local toc = os.clock() - tic;
84 | print("garbage collection :", toc)
85 | end
86 | local acc, loss, per_class, predicts, frames = model:EvaluateOneBatch(TeInput,TeTarget);
87 | meanAcc = ((iter -1)* meanAcc + acc)/ iter;
88 | per_class_cum = per_class_cum + per_class
89 |
90 | log(('Iter = %d | Current Test Accuracy = %f | Average Test Accuracy = %f\n'):format(iter,acc,meanAcc));
91 |
92 | local predictions = torch.cat(TeTarget, predicts, 2)
93 | if not all_predictions then
94 | all_predictions = predictions
95 | else
96 | all_predictions = torch.cat(all_predictions, predictions, 1)
97 | end
98 | end
99 | end
100 |
--------------------------------------------------------------------------------
/setting_options.lua:
--------------------------------------------------------------------------------
1 | ---- options
2 | config={};
3 |
4 | config.GPU = 1
5 | config.nGPU = 1
6 |
7 | config.DataRootPath = "VIND"
8 | config.SaveRootPath = "logs"
9 | config.CacheRootPath = "cache"
10 |
11 | config.logDirectory = config.SaveRootPath .. '/' .. "LOG_" .. os.getenv('USER') .. "_" .. os.date():gsub(' ','-');
12 | os.execute('mkdir -p ' .. config.logDirectory)
13 | config.logFile = assert(io.open(paths.concat(config.logDirectory, 'log.txt'), 'w'))
14 |
15 | config.GE = {
16 | image = {
17 | suffix = 'im',
18 | mean = {},
19 | std = {},
20 | nChannels = 3,
21 | enable = true,
22 | },
23 | depth = {
24 | suffix = 'depth',
25 | mean = {},
26 | std = {},
27 | nChannels = 1,
28 | enable = true,
29 | },
30 | normal = {
31 | suffix = 'normal',
32 | mean = {},
33 | std = {},
34 | nChannels = 3,
35 | enable = true,
36 | },
37 | flow = {
38 | suffix = 'flow',
39 | mean = {},
40 | std = {},
41 | nChannels = 3,
42 | enable = true,
43 | },
44 | imH = 256,
45 | imW = 256,
46 | frame_per_video = 10,
47 | use_multiple_vars = false,
48 | dir = config.DataRootPath .. "/ge_videos",
49 | saveDir = config.CacheRootPath .. "/ge_cache",
50 | featsDir = config.CacheRootPath .. "/ge_feats",
51 | splinesFile = config.DataRootPath .. "/ge_videos/.splines.mat",
52 | }
53 |
54 | config.imH = 227;
55 | config.imW = 227;
56 | config.max_angles = 8;
57 |
58 | config.train = {
59 | annotation = {
60 | dir = config.DataRootPath .. "/train/labels",
61 | },
62 | image = {
63 | dir = config.DataRootPath .. "/train/images",
64 | nChannels = 3,
65 | type = "png",
66 | suffix = "im",
67 | mean = {},
68 | std = {},
69 | enable = true,
70 | croppable = true,
71 | },
72 | depth = {
73 | enable = false,
74 | },
75 | normal = {
76 | enable = false,
77 | },
78 | flow = {
79 | enable = false,
80 | },
81 | mask = {
82 | dir = config.DataRootPath .. "/train/objmask",
83 | nChannels = 1,
84 | type = "png",
85 | suffix = "mask",
86 | mean = {},
87 | std = {},
88 | enable = true,
89 | },
90 | save_dir = config.CacheRootPath .. "/train_cache",
91 | batch_size = 128,
92 | nIter = 1000000,
93 | }
94 |
95 | vidvalmeta = {
96 | annotation = {
97 | dir = config.DataRootPath .. "/val_videos/labels",
98 | },
99 | image = {
100 | dir = config.DataRootPath .. "/val_videos/images",
101 | nChannels = 3,
102 | type = "png",
103 | suffix = "im",
104 | mean = {},
105 | std = {},
106 | enable = true,
107 | croppable = true,
108 | },
109 | depth = {
110 | enable = false,
111 | },
112 | normal = {
113 | enable = false,
114 | },
115 | flow = {
116 | enable = false,
117 | },
118 | mask = {
119 | dir = config.DataRootPath .. "/val_videos/objmask",
120 | nChannels = 1,
121 | type = "png",
122 | suffix = "mask",
123 | mean = {},
124 | std = {},
125 | enable = true,
126 | },
127 | save_dir = config.CacheRootPath .. "/val_video_cache",
128 | batch_size = 243,
129 | nIter = 6,
130 | }
131 |
132 | imvalmeta = {
133 | annotation = {
134 | dir = config.DataRootPath .. "/val_images/labels",
135 | },
136 | image = {
137 | dir = config.DataRootPath .. "/val_images/images",
138 | nChannels = 3,
139 | type = "png",
140 | suffix = "im",
141 | mean = {},
142 | std = {},
143 | enable = true,
144 | croppable = true,
145 | },
146 | depth = {
147 | enable = false,
148 | },
149 | normal = {
150 | enable = false,
151 | },
152 | flow = {
153 | enable = false,
154 | },
155 | mask = {
156 | dir = config.DataRootPath .. "/val_images/objmask",
157 | nChannels = 1,
158 | type = "png",
159 | suffix = "mask",
160 | mean = {},
161 | std = {},
162 | enable = true,
163 | },
164 | save_dir = config.CacheRootPath .. "/val_images_cache",
165 | batch_size = 243,
166 | nIter = 6,
167 | }
168 |
169 | testmeta = {
170 | annotation = {
171 | dir = config.DataRootPath .. "/test/labels",
172 | },
173 | image = {
174 | dir = config.DataRootPath .. "/test/images",
175 | nChannels = 3,
176 | type = "png",
177 | suffix = "im",
178 | mean = {},
179 | std = {},
180 | enable = true,
181 | croppable = true,
182 | },
183 | depth = {
184 | enable = false,
185 | },
186 | normal = {
187 | enable = false,
188 | },
189 | flow = {
190 | enable = false,
191 | },
192 | mask = {
193 | dir = config.DataRootPath .. "/test/objmask",
194 | nChannels = 1,
195 | type = "png",
196 | suffix = "mask",
197 | mean = {},
198 | std = {},
199 | enable = true,
200 | },
201 | save_dir = config.CacheRootPath .. "/test_cache",
202 | batch_size = 1,
203 | nIter = 1,
204 | }
205 |
206 | valmeta = imvalmeta
207 | config.test = mode == 'train' and valmeta or testmeta
208 | config.classes = {'scenario11', 'scenario3', 'scenario10', 'scenario7', 'scenario6', 'scenario12', 'scenario9', 'scenario4', 'scenario1', 'scenario2', 'scenario8', 'scenario5'}
209 | config.class_angles= {3, 8, 8, 3, 3, 4, 8, 8, 8, 4, 8, 1};
210 |
211 | -- excluded_categories is a list of regexes. In lua, to escape special chars you need to add %
212 | config.excluded_categories = {};
213 |
214 | -------- BEGIN: Network configuration -----
215 | if mode == 'test' then
216 | config.nIter = config.test.nIter
217 | config.batchSize = config.test.batch_size
218 | else
219 | config.nIter = config.train.nIter
220 | config.batchSize = config.train.batch_size
221 | end
222 |
223 | config.nDisplay = 1;
224 | config.saveModelIter = 500;
225 | config.nResetLR = 50000;
226 | config.nCategories = 66
227 | config.nClasses = config.nCategories
228 | config.nEval = 10;
229 | config.lambda = 0.5
230 |
231 | config.initModelPath = { imageNN = "caffe"
232 | , animNN = "weights/motion_row.t7"
233 | , fullNN = "weights/N3.t7" }
234 |
235 | config.caffeInit = true;
236 | config.caffeFilePath = {
237 | proto = 'weights/zoo/deploy.prototxt',
238 | model = 'weights/zoo/bvlc_alexnet.caffemodel',
239 | mean = 'weights/zoo/ilsvrc_2012_mean.mat'
240 | };
241 | config.regimes = {
242 | -- start, end, LR,
243 | { 1, 100, 1e-2, },
244 | { 101, 1000, 1e-2, },
245 | { 1001, 10000, 1e-3, },
246 | {10001, 100000, 1e-4,},
247 | };
248 | config.dropoutProb = 0.5;
249 |
250 | -------- END : Network configuration -------
251 |
--------------------------------------------------------------------------------
/networks/ModelConstruction_IM_GEFixParallel.lua:
--------------------------------------------------------------------------------
1 | --Constructing The NN model
2 | log('Constructing Network Model ..... \n');
3 | ---------------------------------------
4 |
5 | model={};
6 | model.imageNN = require('models.image_row')
7 | model.animationFix = require('models.motion_embedding')
8 | model.animationNN =require('models.motion_row');
9 | model.jointNN_1 = require('models.classifier')
10 | model.jointNN_2 = require('models.pairwisecosine_GEfix')
11 | model.criterion = nn.ClassNLLCriterion():cuda()
12 |
13 | function model:infer(input, k)
14 | if not model.animFeatures then
15 | model.animFeatures = GetAnimationFeatures(model.animationNN)
16 | end
17 | -- Forward passs
18 | local tic = os.clock()
19 | model.fullNN:forward({input, model.animFeatures});
20 | local toc = os.clock()
21 | print("Forward time ", tic - toc)
22 | return model.fullNN.output
23 | end
24 |
25 | function model:LearningRateComp(iter)
26 | local lIter = (iter % config.nResetLR)+1;
27 | local regimes= config.regimes;
28 | for _, row in ipairs(regimes) do
29 | if lIter >= row[1] and lIter <= row[2] then
30 | return row[3];
31 | end
32 | end
33 | end
34 |
35 | function model:TrainOneBatch(input,target)
36 | -- Set into training phase (just active the droputs)
37 | model.fullNN:training();
38 | -- Forward passs
39 | model.fullNN:forward(input);
40 |
41 | -- Compute loss and accuracy
42 | local loss = model.criterion:forward(model.fullNN.output,target)
43 | local output = model.fullNN.output
44 | local _, predictedLabel = torch.max(output,2);
45 | predictedLabel = predictedLabel[{{}, 1}]
46 | local acc, per_class = GetPerClassAccuracy(predictedLabel, target)
47 |
48 | -- Make sure gradients are zero
49 | model.fullNN:zeroGradParameters();
50 |
51 | -- Backward pass
52 | local bwCri = model.criterion:backward(model.fullNN.output,target)
53 | model.fullNN:backward(input,bwCri);
54 |
55 | -- updating the weights
56 | model.fullNN:updateParameters(model.learningRate);
57 | return acc,loss;
58 | end
59 |
60 | function model:EvaluateOneBatch(input,target)
61 | -- Set into Evaluation mode (just deactive the dropouts)
62 | model.fullNN:evaluate();
63 | local loss = 0;
64 | local infer_output = model:infer(input,1);
65 | local max,predictedLabel = torch.max(infer_output,2);
66 |
67 | predictedLabel = predictedLabel[{{},1}] -- convert matrix to vector
68 |
69 | local _, bestFrame = torch.max(model.jointNN_2:get(2).output, 3)
70 | local acc, per_class = GetPerClassAccuracy(predictedLabel, target)
71 |
72 | return acc, loss, per_class, predictedLabel
73 | end
74 |
75 | function model:SaveModel(fileName)
76 | local saveModel ={};
77 | -- reading model parameters to CPU
78 | saveModel.imageNN = GetNNParamsToCPU(model.imageNN);
79 | saveModel.animationNN = GetNNParamsToCPU(model.animationNN);
80 | saveModel.animationFix = GetNNParamsToCPU(model.animationFix);
81 | saveModel.jointNN_1 = GetNNParamsToCPU(model.jointNN_1);
82 | saveModel.jointNN_2 = GetNNParamsToCPU(model.jointNN_2);
83 | -- saving into the file
84 | torch.save(fileName,saveModel)
85 | end
86 |
87 | function model:LoadCaffeImageNN(caffeFilePath)
88 | local protoFile = caffeFilePath.proto
89 | local modelFile = caffeFilePath.model
90 | local meanFile = caffeFilePath.mean
91 |
92 | require 'loadcaffe'
93 | local caffeModel = loadcaffe.load(protoFile,modelFile,'cudnn');
94 | caffeModel:remove(24);
95 | caffeModel:remove(23);
96 | caffeModel:remove(22);
97 | local caffeParams = GetNNParamsToCPU(caffeModel);
98 | if config.w_crop then
99 | caffeParams[1] = caffeParams[1]:repeatTensor(1, 5, 1, 1)
100 | end
101 | if config.train.mask.enable then
102 | local firstLayerRandom = torch.FloatTensor(96, 1, 11, 11)
103 | firstLayerRandom:apply(rand_initialize)
104 | caffeParams[1] = torch.cat(firstLayerRandom, caffeParams[1], 2)
105 | end
106 | LoadNNlParams(model.imageNN, caffeParams);
107 |
108 | LoadCaffeMeanStd(meanFile);
109 | end
110 |
111 |
112 | function model:LoadModel(fileNameImg,fileNameAnim)
113 | log('Loding Network Model ....')
114 | for mm = 19,16,-1 do
115 | model.animationNN:remove(mm);
116 | end
117 | model.animationNN:add(nn.Transpose{2,3}):add(nn.Reshape(10,4096,false) );
118 | model.animationNN:cuda()
119 |
120 | if fileNameImg == "caffe" then
121 | model:LoadCaffeImageNN(config.caffeFilePath);
122 | model.jointNN_1:apply(rand_initialize);
123 | elseif fileNameImg then
124 | local saveModel = torch.load(fileNameImg);
125 | LoadNNlParams(model.imageNN ,saveModel.imageNN);
126 | LoadNNlParams(model.jointNN_1 ,saveModel.jointNN);
127 | else
128 | -- Initialize the model randomly
129 | model.imageNN:apply(rand_initialize);
130 | end
131 | model.jointNN_2:apply(rand_initialize);
132 |
133 | if config.caffeInit then
134 | LoadCaffeMeanStd(config.caffeFilePath.mean);
135 | end
136 |
137 | if fileNameAnim then
138 | local saveModel = torch.load(fileNameAnim);
139 | LoadNNlParams(model.animationNN ,saveModel.imageNN);
140 | else
141 | -- Initialize the model randomly
142 | model.animationNN:apply(rand_initialize);
143 | end
144 |
145 | model.animationFix:apply(rand_initialize);
146 |
147 | local featuresTable = nn.ParallelTable():add(model.imageNN):add(model.animationFix);
148 | local classifier = nn.Sequential():add(nn.SelectTable(1)):add(model.jointNN_1):add(nn.MulConstant(config.lambda,true));
149 | local matcher = nn.Sequential():add(model.jointNN_2):add(nn.MulConstant((1-config.lambda),true));
150 | local concatTable = nn.ConcatTable():add(classifier):add(matcher)
151 | model.fullNN = nn.Sequential():add(featuresTable):add(concatTable):add(nn.CAddTable())
152 | model.fullNN:cuda();
153 |
154 | model:SaveModel( paths.concat(config.logDirectory, 'init.t7'))
155 | end
156 |
157 | function model:LoadModelFull(fileName)
158 | log('Loding Network Model ....')
159 |
160 | for mm = 19,16,-1 do
161 | model.animationNN:remove(mm);
162 | end
163 | model.animationNN:add(nn.Transpose{2,3}):add(nn.Reshape(10,4096,false) );
164 | model.animationNN:cuda()
165 |
166 | if fileName then
167 | local saveModel = torch.load(fileName);
168 | LoadNNlParams(model.imageNN ,saveModel.imageNN);
169 | LoadNNlParams(model.animationNN ,saveModel.animationNN);
170 | -- debugger.enter()
171 | LoadNNlParams(model.animationFix ,saveModel.animationFix);
172 | LoadNNlParams(model.jointNN_1,saveModel.jointNN_1);
173 | LoadNNlParams(model.jointNN_2,saveModel.jointNN_2);
174 | else
175 | -- Initialize the model randomly
176 | model.imageNN:apply(rand_initialize);
177 | model.animationNN:apply(rand_initialize);
178 | model.animationFix:apply(rand_initialize);
179 | model.jointNN_1:apply(rand_initialize);
180 | model.jointNN_2:apply(rand_initialize);
181 | end
182 | if config.caffeInit then
183 | LoadCaffeMeanStd(config.caffeFilePath.mean);
184 | end
185 |
186 | local featuresTable = nn.ParallelTable():add(model.imageNN):add(model.animationFix);
187 | local classifier = nn.Sequential():add(nn.SelectTable(1)):add(model.jointNN_1):add(nn.MulConstant(config.lambda,true));
188 | local matcher = nn.Sequential():add(model.jointNN_2):add(nn.MulConstant((1-config.lambda),true));
189 | local concatTable = nn.ConcatTable():add(classifier):add(matcher)
190 | model.fullNN = nn.Sequential():add(featuresTable):add(concatTable):add(nn.CAddTable())
191 | model.fullNN:cuda();
192 |
193 | model:SaveModel( paths.concat(config.logDirectory, 'init.t7'))
194 | end
195 |
--------------------------------------------------------------------------------
/io/readGEFiles.lua:
--------------------------------------------------------------------------------
1 | function LoadGEDatabase()
2 | -----------------------------------------------------------------
3 | -- Reads the list of images in the Game Engine videos
4 | -- outputs:
5 | -- dataset: a table with list of files for each category
6 | -- datatset[category][angle][variation], e.g.,
7 | -- datatset['rolling'][1]['1_1']
8 | -----------------------------------------------------------------
9 | os.execute('mkdir -p ' .. config.GE.saveDir)
10 |
11 | local geDir = config.GE.dir;
12 |
13 | local dataset = {};
14 |
15 | -- physics categories
16 | local physicsCategories = paths.dir(geDir);
17 | RemoveDotDirs(physicsCategories);
18 |
19 | local nClasses=table.getn(physicsCategories);
20 |
21 | for i=1,nClasses do
22 | dataset[physicsCategories[i]] = {};
23 | end
24 |
25 | for i=1,nClasses do
26 |
27 | -- angle directories
28 | local dir1 = paths.concat(geDir,physicsCategories[i]);
29 | local angleCategories = paths.dir(dir1);
30 | RemoveDotDirs(angleCategories);
31 |
32 | local anglebins = {}
33 | for _,a in pairs(angleCategories) do
34 | abin, tmp = a:match("([^,]+)_([^,]+)");
35 | anglebins[tonumber(abin)] = 1;
36 | end
37 |
38 | for k,_ in pairs(anglebins) do
39 | dataset[physicsCategories[i]][k] = {};
40 | end
41 |
42 | for k,_ in pairs(anglebins) do
43 | local dir2 = paths.concat(geDir,physicsCategories[i],tostring(k) .. '_' .. 'im');
44 | local alldirs = paths.dir(dir2);
45 | RemoveDotDirs(alldirs);
46 | table.sort(alldirs, function (a,b) return a < b end);
47 |
48 | for _,d in pairs(alldirs) do
49 | local dir3 = paths.concat(geDir,physicsCategories[i],tostring(k) .. '_' .. 'im', d);
50 | local files = paths.dir(dir3);
51 | RemoveDotDirs(files);
52 |
53 | table.sort(files, function (a,b) return a < b end);
54 | dataset[physicsCategories[i]][k][d] = {};
55 | dataset[physicsCategories[i]][k][d] = files;
56 |
57 | end
58 |
59 | end
60 |
61 | end
62 |
63 | local splines = mattorch.load(config.GE.splinesFile)['splines']
64 | setmetatable(dataset, {splines = splines})
65 |
66 | return dataset;
67 | end
68 |
69 |
70 | function ReadGEImagesPerCategory(physicsCategory, angle, dataset, savefile, input_type)
71 | -----------------------------------------------------------------
72 | -- Reads game engine videos for a category and an angle
73 | -- inputs:
74 | -- physicsCategory: 'rolling', 'falling', etc.
75 | -- angle: the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
76 | -- dataset: the output of "LoadGEDatabase" function
77 | -- savefile: the filename for stroing the videos in our format
78 | -- opts
79 | -- outputs:
80 | -- images: 5D Tensor,
81 | -- nvariations (different z's for the camera, different forces, etc. ) x
82 | -- fr_per_video x 3 (channels) x imH (image height) x imW (image width)
83 | -----------------------------------------------------------------
84 |
85 | local images = {};
86 |
87 | local geDir = config.GE.dir;
88 |
89 | local imH = config.GE.imH;
90 | local imW = config.GE.imW;
91 | local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
92 |
93 | local mean = config.GE[input_type].mean;
94 | local std = config.GE[input_type].std;
95 | local nChannels = config.GE[input_type].nChannels;
96 |
97 | local nvariations = getTableSize(dataset[physicsCategory][angle]);
98 |
99 | local imTensor = torch.Tensor(nvariations, fr_per_video, nChannels, imH, imW);
100 | local suffix = config.GE[input_type].suffix
101 |
102 | local cnt = 0;
103 | for dir,files in pairs(dataset[physicsCategory][angle]) do
104 | cnt = cnt + 1;
105 | for f = 1,#files do
106 | local fname_im = paths.concat(geDir,physicsCategory, angle .. '_' .. suffix, dir, files[f]);
107 | local im = normalizeImage(image.scale(loadImageOrig(fname_im), imW, imH), mean, std);
108 | imTensor[cnt][f] = im[{{1,nChannels}, {}, {}}];
109 | end
110 | end
111 |
112 | images = imTensor;
113 | collectgarbage()
114 | torch.save(savefile, images)
115 | return images
116 | end
117 |
118 | function LoadGEPositionPerCategory(physicsCategory, angle, dataset)
119 | -----------------------------------------------------------------
120 | -- If files do not exist, it calls "ReadGEPositionPerCategory". Otherwise,
121 | -- it loads from the disk.
122 | --
123 | -- inputs:
124 | -- physicsCategory: 'rolling', 'falling', etc.
125 | -- angle: the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
126 | -- dataset: the output of "LoadGEDatabase" function
127 | -- outputs:
128 | -- positions: 3D Tensor,
129 | -- nvariations (different z's for the camera, different forces, etc. ) x
130 | -- fr_per_video x 3 (x,y,z)
131 | -----------------------------------------------------------------
132 |
133 | saveDir = config.GE.saveDir;
134 |
135 | local positions;
136 |
137 | fname = paths.concat(saveDir, physicsCategory .. '_' .. angle .. '_positions' .. '.t7');
138 |
139 | if paths.filep(fname) then
140 | positions = torch.load(fname)
141 | else
142 | positions = ReadGEPositionPerCategory(physicsCategory, angle, dataset, fname);
143 | end
144 |
145 | return positions;
146 | end
147 |
148 |
149 |
150 | function LoadGEPerCategory(physicsCategory, angle, dataset, input_type)
151 | -----------------------------------------------------------------
152 | -- If files do not exist, it calls "ReadGEImagesPerCategory". Otherwise,
153 | -- it loads from the disk.
154 | --
155 | -- inputs:
156 | -- physicsCategory: 'rolling', 'falling', etc.
157 | -- angle: the view angle (1 out of 8 or 1 out of 3 for symmetric categories)
158 | -- dataset: the output of "LoadGEDatabase" function
159 | -- opts
160 | -- outputs:
161 | -- images: 5D Tensor,
162 | -- nvariations (different z's for the camera, different forces, etc. ) x
163 | -- fr_per_video x 3 (channels) x imH (image height) x imW (image width)
164 | -----------------------------------------------------------------
165 | if not input_type then
166 | local imH = config.GE.imH;
167 | local imW = config.GE.imW;
168 | local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
169 | local nvariations = config.GE.use_multiple_vars and getTableSize(dataset[physicsCategory][angle]) or 1;
170 | local all_input_types = GetEnableInputTypes(config.GE)
171 | local nChannels = GetValuesSum(all_input_types)
172 | local result = torch.Tensor(nvariations, fr_per_video, nChannels, imH, imW);
173 |
174 | local i = 1
175 | for input_type, nChannels in pairs(all_input_types) do
176 | result[{{}, {}, {i, i+nChannels-1}, {}, {}}] = LoadGEPerCategory(physicsCategory, angle, dataset, input_type)
177 | i = i + nChannels
178 | end
179 | return result
180 | end
181 |
182 |
183 | local saveDir = config.GE.saveDir
184 | local suffix = config.GE[input_type].suffix
185 |
186 | local images;
187 | local fname = paths.concat(saveDir, physicsCategory .. '_' .. angle .. '_' .. suffix .. '.t7');
188 |
189 | if paths.filep(fname) then
190 | images = torch.load(fname)
191 | else
192 | images = ReadGEImagesPerCategory(physicsCategory, angle, dataset, fname, input_type);
193 | end
194 |
195 | if not config.GE.use_multiple_vars then
196 | local var_id = images:size(1) == 1 and 1 or 2
197 | images = images[{{var_id}, {}, {}, {}, {}}]
198 | end
199 | return images;
200 | end
201 |
202 |
203 | function ReadGEImagesAll(dataset, savefile, opts)
204 | -----------------------------------------------------------------
205 | -- Reads all game engine videos
206 | -- inputs:
207 | -- dataset: the output of "LoadGEDatabase" function
208 | -- savefile: the filename for stroing the videos in our format
209 | -- opts
210 | -- outputs:
211 | -- images: 5D Tensor,
212 | -- nvariations (different z's for the camera, different forces, etc. ) x
213 | -- fr_per_video x 3 (channels) x imH (image height) x imW (image width)
214 | -----------------------------------------------------------------
215 |
216 | local images = {};
217 | local geDir = config.GE.dir;
218 |
219 | local imH = config.GE.imH;
220 | local imW = config.GE.imW;
221 | local fr_per_video = config.GE.frame_per_video; -- # of frames that we want to keep from each video
222 |
223 | local input_type = opts.input_type;
224 | local mean = opts.mean;
225 | local std = opts.std;
226 |
227 | for physicsCategory,_ in pairs(dataset) do
228 | images[physicsCategory] = {};
229 | for angle,_ in pairs(dataset[physicsCategory]) do
230 | images[physicsCategory][angle] = {};
231 |
232 | local nvariations = getTableSize(dataset[physicsCategory][angle]);
233 | local imTensor = torch.Tensor(nvariations, fr_per_video, 3, imH, imW);
234 |
235 | local cnt = 0;
236 | for dir,files in pairs(dataset[physicsCategory][angle]) do
237 | cnt = cnt + 1;
238 | for f = 1,#files do
239 | local fname_im = paths.concat(geDir,physicsCategory, angle .. '_' .. input_type, dir, files[f]);
240 | local im = normalizeImage(image.scale(loadImageOrig(fname_im), imW, imH), mean, std);
241 | imTensor[cnt][f] = im;
242 | end
243 |
244 | end
245 | images [physicsCategory][angle] = imTensor;
246 | end
247 | end
248 | collectgarbage()
249 |
250 | torch.save(savefile, images)
251 | return images
252 | end
253 |
254 | function LoadGEAll(dataset, opts)
255 | -----------------------------------------------------------------
256 | -- If files do not exist, it calls "ReadGEImagesAll". Otherwise,
257 | -- it loads from the disk.
258 | --
259 | -- inputs:
260 | -- dataset: the output of "LoadGEDatabase" function
261 | -- opts
262 | -- outputs:
263 | -- images: 5D Tensor,
264 | -- nvariations (different z's for the camera, different forces, etc. ) x
265 | -- fr_per_video x 3 (channels) x imH (image height) x imW (image width)
266 | -----------------------------------------------------------------
267 |
268 | saveDir = config.GEsaveDir;
269 | local images;
270 |
271 | fname = paths.concat(saveDir, 'allGE_' .. opts.input_type .. '.t7');
272 |
273 | if paths.filep(fname) then
274 | images = torch.load(fname)
275 | else
276 | images = ReadGEImagesAll(dataset, fname, opts);
277 | end
278 | return images
279 | end
280 |
--------------------------------------------------------------------------------
/io/readBatch.lua:
--------------------------------------------------------------------------------
1 | log ("loading datasets metadata");
2 | dataset_GE = LoadGEDatabase();
3 | trainset = LoadTrainDatabase(config.excluded_categories);
4 | testset = LoadTestDatabase(config.excluded_categories);
5 | log ("computing mean_std");
6 | compute_mean_std(trainset, dataset_GE);
7 | log ("dataset done");
8 |
9 | GetASiameseBatchCoroutine = coroutine.create(function(nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages, test)
10 | local dataset = test and testset or trainset; -- TODO(hessam): local or global?
11 | assert(nPositiveImages > 0);
12 | local n1, n2, n3 = nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages;
13 | local batchSize = nPositiveImages + nDifferentAngleImages + nDifferentCategoryImages;
14 | local target;
15 | local images;
16 |
17 | local all_input_types = GetEnableInputTypes(dataset.config)
18 | local nChannels = GetValuesSum(all_input_types)
19 | if config.GPU == -1 then -- CPU mode
20 | target = torch.FloatTensor(batchSize);
21 | images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
22 | else
23 | target = torch.CudaTensor(batchSize);
24 | images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
25 | end
26 |
27 | local trainIndex = nil
28 | local testIndex = nil
29 | local trainvideos = MakeShuffledTuples(trainset);
30 | local testvideos = MakeShuffledTuples(testset);
31 |
32 | repeat
33 | nPositiveImages, nDifferentAngleImages, nDifferentCategoryImages = n1, n2, n3;
34 | -- Iterate on real videos not game engines to make sure all videos are seen.
35 | local v
36 | if test then
37 | testIndex, v = next(testvideos, testIndex)
38 | if testIndex == nil then
39 | testvideos = MakeShuffledTuples(testset);
40 | testIndex, v = next(testvideos, testIndex)
41 | end
42 | else
43 | trainIndex, v = next(trainvideos, trainIndex)
44 | if trainIndex == nil then
45 | trainvideos = MakeShuffledTuples(trainset);
46 | trainIndex, v = next(trainvideos, trainIndex)
47 | end
48 | end
49 |
50 | local batchIndex = 1;
51 | local category = v[1];
52 | local physicsCategory = v[2];
53 | local angle = v[3];
54 | local folder = v[4];
55 |
56 | local gameEngineVideo = LoadGEPerCategory(physicsCategory, angle, dataset_GE);
57 |
58 | images[1] = LoadRandomFrameOfVideo(dataset, category, angle, folder);
59 |
60 | target[1] = 1;
61 | nPositiveImages = nPositiveImages - 1;
62 | batchIndex = batchIndex + 1;
63 |
64 | repeat
65 | local shuffledDataset = MakeShuffledTuples(dataset);
66 | for _,sample in pairs(shuffledDataset) do
67 | sampleCategory = sample[1];
68 | samplePhysicsCategory = sample[2];
69 | sampleAngle = sample[3];
70 | sampleFolder = sample[4];
71 | if nPositiveImages > 0 and samplePhysicsCategory == physicsCategory
72 | and sampleAngle == angle then
73 | -- Add the positive example
74 | images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
75 | target[batchIndex] = 1;
76 | nPositiveImages = nPositiveImages - 1;
77 | batchIndex = batchIndex + 1;
78 | elseif nDifferentAngleImages > 0 and samplePhysicsCategory == physicsCategory
79 | and sampleAngle ~= angle then
80 | -- Add the negative example with different angle
81 | images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
82 | target[batchIndex] = 0;
83 | nDifferentAngleImages = nDifferentAngleImages - 1;
84 | batchIndex = batchIndex + 1;
85 | elseif nDifferentCategoryImages > 0 and samplePhysicsCategory ~= physicsCategory then
86 | -- Add the negative example with different physics category
87 | images[batchIndex] = LoadRandomFrameOfVideo(dataset, sampleCategory, sampleAngle, sampleFolder, 'image');
88 | target[batchIndex] = 0;
89 | nDifferentCategoryImages = nDifferentCategoryImages - 1;
90 | batchIndex = batchIndex + 1;
91 | end
92 | if batchIndex > batchSize then
93 | break;
94 | end
95 | end
96 |
97 | if nDifferentAngleImages == n2 then -- no different angle exists
98 | nDifferentCategoryImages = nDifferentCategoryImages + nDifferentAngleImages
99 | nDifferentAngleImages = 0
100 | end
101 | until batchIndex > batchSize
102 |
103 | assert(batchIndex > batchSize, "Not enough data to generate a batch for category="
104 | .. physicsCategory .. " and angle=" .. tostring(angle) .. ". Requirments = ("
105 | .. tostring(nPositiveImages) .. "," .. tostring(nDifferentAngleImages) .. ","
106 | .. tostring(nDifferentCategoryImages) .. ")");
107 | -- shuffle data
108 | local shuffle = torch.randperm(batchSize):type('torch.LongTensor')
109 | images = images:index(1, shuffle)
110 | target = target:index(1, shuffle)
111 | local randomForce = torch.random( gameEngineVideo:size(1) )
112 | local gameEngineVideoRandomForce = gameEngineVideo[{{randomForce}, {}, {}, {}, {}}]:transpose(2, 3)
113 | if config.GPU ~= -1 then
114 | gameEngineVideoRandomForce = gameEngineVideoRandomForce:cuda()
115 | end
116 |
117 | -- yeild the output
118 | _, _, _, test = coroutine.yield({images, gameEngineVideoRandomForce}, target);
119 | dataset = test and testset or trainset;
120 | until false -- repeat until the end of the world
121 | end)
122 |
123 | GetAnImageBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, test, deterministic, spline)
124 | assert(batchSize > 0);
125 | assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
126 |
127 | local splinesMat = getmetatable(dataset_GE)['splines']
128 |
129 | local target;
130 | local images;
131 | local dataset = test and testset or trainset
132 |
133 | local all_input_types = GetEnableInputTypes(dataset.config)
134 | local nChannels = GetValuesSum(all_input_types)
135 |
136 | if config.GPU == -1 then -- CPU mode
137 | target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
138 | images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
139 | else
140 | target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
141 | images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
142 | end
143 |
144 | local batchIndex = 1;
145 | repeat
146 | local videos = MakeShuffledTuples(dataset, deterministic);
147 | for _,v in pairs(videos) do
148 | local category = v[1];
149 | local physicsCategory = v[2];
150 | local angle = v[3];
151 | local folder = v[4];
152 | local categoryId
153 | if (useViewPoint) then
154 | categoryId = GetCategoryViewPointId(physicsCategory, angle);
155 | else
156 | categoryId = GetCategoryId(physicsCategory);
157 | end
158 | images[batchIndex] = LoadRandomFrameOfVideo(dataset, category, angle, folder);
159 | target[batchIndex] = spline and splinesMat[categoryId] or categoryId
160 |
161 | batchIndex = batchIndex + 1
162 | if batchIndex > batchSize then
163 | if config.GPU ~= -1 then
164 | images:cuda()
165 | target:cuda()
166 | end
167 | _, _, test, deterministic, _ = coroutine.yield(images, target);
168 | -- re-initialize vars for the next batch:
169 | dataset = test and testset or trainset
170 | batchIndex = 1
171 | end
172 | end
173 | until false -- repeat until the end of the world
174 | end)
175 |
176 | GetAUniformImageBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, test, spline)
177 | assert(batchSize > 0);
178 | assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
179 |
180 | local splinesMat = getmetatable(dataset_GE)['splines']
181 |
182 | local target;
183 | local images;
184 | local dataset = test and testset or trainset
185 |
186 | local all_input_types = GetEnableInputTypes(dataset.config)
187 | local nChannels = GetValuesSum(all_input_types)
188 | if config.GPU == -1 then -- CPU mode
189 | target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
190 | images = torch.FloatTensor(batchSize, nChannels, config.imH, config.imW);
191 | else
192 | target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
193 | images = torch.CudaTensor(batchSize, nChannels, config.imH, config.imW);
194 | end
195 |
196 | repeat
197 | local batchIndex = 1
198 | dataset = test and testset or trainset
199 | repeat
200 | local randomData = GetUniformRandomData(dataset)
201 | local category = randomData[1]
202 | local physicsCategory = randomData[2]
203 | local angle = randomData[3]
204 | local folder = randomData[4]
205 | if (useViewPoint) then
206 | categoryId = GetCategoryViewPointId(physicsCategory, angle);
207 | else
208 | categoryId = GetCategoryId(physicsCategory);
209 | end
210 | images[batchIndex] = LoadRandomFrameOfVideo(dataset, category, angle, folder)
211 | target[batchIndex] = spline and splinesMat[categoryId] or categoryId
212 |
213 | batchIndex = batchIndex + 1
214 | until batchIndex > batchSize
215 |
216 | if config.GPU ~= -1 then
217 | images:cuda()
218 | target:cuda()
219 | end
220 | _, _, test, _ = coroutine.yield(images, target);
221 | until false -- repeat until the end of the world
222 | end)
223 |
224 | GetAUniformAnimationBatchCoroutine = coroutine.create(function(batchSize, useViewPoint, spline)
225 | assert(batchSize > 0);
226 | assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
227 |
228 | local splinesMat = getmetatable(dataset_GE)['splines']
229 |
230 | local nChannels = GetValuesSum(GetEnableInputTypes(config.GE))
231 | local target;
232 | local videos;
233 | if config.GPU == -1 then -- CPU mode
234 | target = spline and torch.FloatTensor(batchSize, splinesMat:size(2)) or torch.FloatTensor(batchSize);
235 | videos = torch.FloatTensor(batchSize, nChannels, config.GE.frame_per_video, config.GE.imH, config.GE.imW);
236 | else
237 | target = spline and torch.CudaTensor(batchSize, splinesMat:size(2)) or torch.CudaTensor(batchSize);
238 | videos = torch.CudaTensor(batchSize, nChannels, config.GE.frame_per_video, config.GE.imH, config.GE.imW);
239 | end
240 |
241 | repeat
242 | local batchIndex = 1;
243 | repeat
244 | local randomCategoryIndex = torch.random( #config.classes )
245 | local physicsCategory = config.classes[ randomCategoryIndex ]
246 | local angle = torch.random( config.class_angles[randomCategoryIndex] )
247 | if (useViewPoint) then
248 | categoryId = GetCategoryViewPointId(physicsCategory, angle)
249 | else
250 | categoryId = GetCategoryId(physicsCategory)
251 | end
252 | local gameEngineVideo = LoadGEPerCategory(physicsCategory, angle, dataset_GE)
253 | local gameEngineVideoRandomForce = gameEngineVideo[ torch.random(gameEngineVideo:size(1)) ]
254 | videos[batchIndex] = gameEngineVideoRandomForce:transpose(1,2);
255 | target[batchIndex] = spline and splinesMat[categoryId] or categoryId
256 |
257 | batchIndex = batchIndex + 1
258 | until batchIndex > batchSize
259 |
260 | if config.GPU ~= -1 then
261 | videos:cuda()
262 | target:cuda()
263 | end
264 | coroutine.yield(videos, target);
265 | until false -- repeat until the end of the world
266 | end)
267 |
268 | GetAVideoBatchCoroutine = coroutine.create(function(useViewPoint, test, spline)
269 | assert((not spline) or useViewPoint, "Can't get splines with no viewpoint");
270 |
271 | local dataset = test and testset or trainset
272 |
273 | local splinesMat = getmetatable(dataset_GE)['splines']
274 |
275 | repeat
276 | local videos = MakeShuffledTuples(dataset);
277 | for _,v in pairs(videos) do
278 | local category = v[1];
279 | local physicsCategory = v[2];
280 | local angle = v[3];
281 | local folder = v[4];
282 | local categoryId
283 | if (useViewPoint) then
284 | categoryId = GetCategoryViewPointId(physicsCategory, angle);
285 | else
286 | categoryId = GetCategoryId(physicsCategory);
287 | end
288 |
289 | local video = LoadTrainImagesPerVideo(dataset, category, angle, folder);
290 | local target = spline and splinesMat[categoryId] or categoryId
291 | if config.GPU ~= -1 then
292 | video = video:cuda()
293 | end
294 | _, test, _ = coroutine.yield(video, target);
295 | -- re-initialize vars for the next batch:
296 | dataset = test and testset or trainset
297 | end
298 | until false -- repeat until the end of the world
299 | end)
300 |
--------------------------------------------------------------------------------
/io/readFiles.lua:
--------------------------------------------------------------------------------
1 | function LoadDatabase(dataConfig, excluded_categories)
2 | -----------------------------------------------------------------
3 | -- Reads the list of images in the videos from annotDir
4 | -- inputs:
5 | -- dataConfig: The data configuration to load from. Look at config.train
6 | -- and config.test.
7 | -- exclude_category: exclude this category in training
8 | -- outputs:
9 | -- dataset: a table with list of files for each category
10 | -- dataset[category][angle][video_directory]
11 | -- e.g., dataset['sliding-ski'][1]["181_1"] contains the files
12 | -- for video "181_1", which is annotated as the first angle
13 | -----------------------------------------------------------------
14 |
15 | local max_angles = config.max_angles; -- 8
16 | local annotDir = dataConfig.annotation.dir;
17 |
18 | local dataset = {};
19 |
20 | -- categories
21 | local categories = paths.dir(annotDir);
22 | RemoveDotDirs(categories);
23 | categories = removeExcludedCategories(categories, excluded_categories);
24 |
25 | local nClasses = table.getn(categories);
26 |
27 | for i=1,nClasses do
28 | dataset[categories[i]] = {};
29 | end
30 |
31 | for i=1,nClasses do
32 |
33 | -- videos
34 | local viddir = paths.concat(annotDir,categories[i]);
35 | local videos = paths.dir(viddir);
36 | RemoveDotDirs(videos);
37 | -- all viewpoint annotations will be similar to 00000_00's
38 | local angles = {};
39 | for k,v in pairs(videos) do
40 | local viewannot
41 | if paths.filep(paths.concat(annotDir, categories[i], videos[k], '00000_00_ge.mat')) then
42 | viewannot = mattorch.load(paths.concat(annotDir, categories[i], videos[k], '00000_00_ge.mat'));
43 | else
44 | viewannot = mattorch.load(paths.concat(annotDir, categories[i], videos[k], 'view.mat'));
45 | end
46 | -- if categories[i] == 'scenario6-basketball' then
47 | -- debugger.enter()
48 | -- end
49 | angles[k] = viewannot.ge;
50 | end
51 |
52 | for j=1,max_angles do --maximum 8 different angles
53 | dataset[categories[i]][j] = {};
54 | end
55 |
56 | for k,v in pairs(videos) do
57 | -- 1 018_03 scenario4-bowling
58 | -- if k == 1 and categories[i] == 'scenario4-bowling' then
59 | -- debugger.enter()
60 | -- end
61 | -- print(k,v,categories[i])
62 | dataset[categories[i]][angles[k][1][1]][v] = {};
63 | end
64 |
65 | for j=1,#dataset[categories[i]] do
66 | for k,v in pairs(dataset[categories[i]][j]) do
67 | local dir2 = paths.concat(annotDir,categories[i],k);
68 | local flist = paths.dir(dir2);
69 | RemoveDotDirs(flist);
70 | table.sort(flist, function (a,b) return a < b end);
71 | local pruned_flist = {}
72 | for id,f in pairs(flist) do
73 | if f:find("_00_ge.mat") then
74 | pruned_flist[#pruned_flist+1] = f
75 | end
76 | end
77 | dataset[categories[i]][j][k] = {};
78 | dataset[categories[i]][j][k] = pruned_flist;
79 | end
80 | end
81 |
82 | end
83 |
84 | dataset.config = dataConfig;
85 | return dataset;
86 | end
87 |
88 | function LoadTrainDatabase(exclude_category)
89 | return LoadDatabase(config.train, exclude_category)
90 | end
91 |
92 | function LoadTestDatabase(exclude_category)
93 | return LoadDatabase(config.test, exclude_category)
94 | end
95 |
96 | function ReadIndividualFrame(dataset, category, angle, video_id, imname, savefile, input_type)
97 | -----------------------------------------------------------------
98 | -- Reads a specific frame of a video for a category and an angle
99 | -- inputs:
100 | -- dataset: The output of "LoadTrainDatabase"
101 | -- category: Video category, e.g., 'sliding-ski', 'falling-diving', etc.
102 | -- angle: View angle (1 out of 8 or 1 out of 3 for symmetric categories)
103 | -- video_id: Video folder
104 | -- imname: The name of frame's image to be read.
105 | -- savefile: Save the tensor in this file.
106 | -- input_type: The type of the data to be read. Should be one of
107 | -- image, depth, normal or flow.
108 | -- output:
109 | -- images: 4D or 3D Tensor,
110 | -- [5 (orig + 4 crops) x] 3 (channels) x imH (image height) x imW (image width)
111 | -----------------------------------------------------------------
112 | local imH = config.imH;
113 | local imW = config.imW;
114 | local w_crop = config.w_crop;
115 |
116 | local annotDir = dataset.config.annotation.dir
117 | local trainDir = dataset.config[input_type].dir;
118 | local image_type = dataset.config[input_type].type;
119 | local mean = dataset.config[input_type].mean;
120 | local std = dataset.config[input_type].std;
121 |
122 | local impath = paths.concat(trainDir, category, video_id, imname .. "." .. image_type);
123 | local im = loadImageOrig(impath);
124 | local imnorm = normalizeImage(image.scale(im, imW, imH), mean, std);
125 |
126 | local nChannels = dataset.config[input_type].nChannels;
127 |
128 | if w_crop and dataset.config[input_type].croppable then
129 | local images = torch.Tensor(5, nChannels, imH, imW)
130 |
131 | local coord = mattorch.load(paths.concat(annotDir, category, video_id, imname .. "_00.mat"));
132 | local imSize = im:size();
133 | local height = imSize[2];
134 | local width = imSize[3];
135 |
136 | local x1 = math.max(math.floor(coord.box[1][1]), 1);
137 | local y1 = math.max(math.floor(coord.box[1][2]), 1);
138 | local x2 = math.min(math.floor(coord.box[1][3]), width);
139 | local y2 = math.min(math.floor(coord.box[1][4]), height);
140 |
141 | local crop1 = im[{{},{y1,height},{x1,width}}];
142 | local crop2 = im[{{},{1,y2},{1,x2}}];
143 | local crop3 = im[{{},{y1,height},{1,x2}}];
144 | local crop4 = im[{{},{1,y2},{x1,width}}];
145 | images[1] = imnorm;
146 | images[2] = normalizeImage(image.scale(crop1, imW, imH), mean, std);
147 | images[3] = normalizeImage(image.scale(crop2, imW, imH), mean, std);
148 | images[4] = normalizeImage(image.scale(crop3, imW, imH), mean, std);
149 | images[5] = normalizeImage(image.scale(crop4, imW, imH), mean, std);
150 |
151 | for i=1,5 do
152 | images[i] = images[i][{{1,nChannels}, {}, {}}]
153 | end
154 |
155 | images = images:reshape(5 * nChannels, imH, imW)
156 | torch.save(savefile, images);
157 | return images
158 | else
159 | imnorm = imnorm[{{1, nChannels}, {}}]
160 | torch.save(savefile, imnorm);
161 | return imnorm
162 | end
163 | end
164 |
165 | function LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
166 | -----------------------------------------------------------------
167 | -- Loads a specific frame of a video for a category and an angle
168 | -- inputs:
169 | -- dataset: The output of "LoadTrainDatabase"
170 | -- category: Video category, e.g., 'sliding-ski', 'falling-diving', etc.
171 | -- angle: View angle (1 out of 8 or 1 out of 3 for symmetric categories)
172 | -- video_id: Video folder
173 | -- imname: The name of frame's image to be read.
174 | -- savefile: Save the tensor in this file.
175 | -- input_type: Optional type of the data to be read. Should be one of
176 | -- image, depth, normal, flow or mask.
177 | -- output:
178 | -- images: 4D or 3D Tensor,
179 | -- [5 (orig + 4 crops) x] 3 (channels) x imH (image height) x imW (image width)
180 | -----------------------------------------------------------------
181 | if not input_type then
182 | local imH = config.imH;
183 | local imW = config.imW;
184 | local all_input_types = GetEnableInputTypes(dataset.config)
185 | local nChannels = GetValuesSum(all_input_types)
186 | local result = torch.Tensor(nChannels, imH, imW);
187 |
188 | local i = 1
189 | for input_type, nChannels in pairs(all_input_types) do
190 | result[{{i, i+nChannels-1}, {}, {}}] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
191 | i = i + nChannels
192 | end
193 | return result
194 | end
195 |
196 | local suffix = dataset.config[input_type].suffix;
197 | local w_crop = config.w_crop;
198 |
199 | local saveDir = dataset.config.save_dir;
200 | if not paths.dirp(saveDir) then
201 | paths.mkdir(saveDir)
202 | end
203 | -- NOTE: If we may have different oids for a video, we need to use different
204 | -- save paths for w_crop = true.
205 | local fname = paths.concat(saveDir, category .. '_' .. video_id .. '_' ..
206 | (w_crop and '1' or '0') .. '_' .. suffix .. '_' .. imname .. '.t7');
207 |
208 | if paths.filep(fname) then
209 | return torch.load(fname)
210 | else
211 | return ReadIndividualFrame(dataset, category, angle, video_id, imname, fname, input_type)
212 | end
213 | end
214 |
215 | function ReadTrainImagesPerVideo(dataset, category, angle, video_id, savefile, input_type)
216 | -----------------------------------------------------------------
217 | -- Reads training images for a video for a category and an angle
218 | -- inputs:
219 | -- dataset: The output of "LoadTrainDatabase"
220 | -- category: Video category, e.g., 'sliding-ski', 'falling-diving', etc.
221 | -- angle: View angle (1 out of 8 or 1 out of 3 for symmetric categories)
222 | -- video_id: Video folder
223 | -- savefile: Save the tensor in this file.
224 | -- opts
225 | -- output:
226 | -- images: 5D Tensor,
227 | -- # of images x 5 (orig + 4 crops) x 3 (channels) x imH (image height) x imW (image width)
228 | -----------------------------------------------------------------
229 |
230 | local imH = config.imH;
231 | local imW = config.imW;
232 |
233 | local trainDir = dataset.config[input_type].dir;
234 | local mean = dataset.config[input_type].mean;
235 | local std = dataset.config[input_type].std;
236 | local image_type = dataset.config[input_type].type;
237 | local w_crop = config.w_crop;
238 |
239 | local nImages = #dataset[category][angle][video_id];
240 |
241 | local images
242 | if w_crop then -- FIXME(hessam): nChannel needs to be fixe
243 | images = torch.Tensor(nImages, 5, 3, imH, imW)
244 | else
245 | images = torch.Tensor(nImages, 3, imH, imW)
246 | end
247 |
248 | local cnt = 0;
249 | for _,f in ipairs(dataset[category][angle][video_id]) do
250 | cnt = cnt + 1;
251 | local matname = f;
252 | local imname, oid = f:match("([^_]+)_([^_]+)");
253 |
254 | images[cnt] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
255 | end
256 |
257 | collectgarbage()
258 | torch.save(savefile, images)
259 | return images
260 | end
261 |
262 | function LoadTrainImagesPerVideo(dataset, category, angle, video_id, input_type)
263 | -----------------------------------------------------------------
264 | -- If files do not exist, it calls "ReadTrainImagesPerVideo" or "ReadTrainImagesPerVideoNoCrop".
265 | -- Otherwise, it loads from the disk.
266 | --
267 | -- inputs:
268 | -- dataset: The output of "LoadTrainDatabase"
269 | -- category: Video category, e.g., 'sliding-ski', 'falling-diving', etc.
270 | -- angle: View angle (1 out of 8 or 1 out of 3 for symmetric categories)
271 | -- video_id: Video folder
272 | -- opts
273 | -- outputs:
274 | -- images: 4D or 5D Tensor,
275 | -- # of images x 5 (orig + 4 crops)? x 3 (channels) x
276 | -- imH (image height) x imW (image width)
277 | -----------------------------------------------------------------
278 |
279 | local imH = config.imH;
280 | local imW = config.imW;
281 | local nImages = #dataset[category][angle][video_id];
282 | local images, nChannels
283 | if input_type then
284 | nChannels = dataset.config[input_type].nChannels
285 | else
286 | local all_input_types = GetEnableInputTypes(dataset.config)
287 | nChannels = GetValuesSum(all_input_types)
288 | end
289 | images = torch.Tensor(nImages, nChannels, imH, imW)
290 |
291 | local cnt = 0;
292 | for _,f in ipairs(dataset[category][angle][video_id]) do
293 | cnt = cnt + 1;
294 | local matname = f;
295 | local imname, oid = f:match("([^_]+)_([^_]+)");
296 |
297 | images[cnt] = LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
298 | end
299 |
300 | return images
301 | end
302 |
303 | function LoadRandomFrameOfVideo(dataset, category, angle, video_id, input_type)
304 | -----------------------------------------------------------------
305 | -- If files do not exist, it calls "ReadTrainImagesPerVideo" or "ReadTrainImagesPerVideoNoCrop".
306 | -- Otherwise, it loads from the disk.
307 | --
308 | -- inputs:
309 | -- dataset: The output of "LoadTrainDatabase"
310 | -- category: Video category, e.g., 'sliding-ski', 'falling-diving', etc.
311 | -- angle: View angle (1 out of 8 or 1 out of 3 for symmetric categories)
312 | -- video_id: Video folder
313 | -- opts
314 | -- outputs:
315 | -- images: 3D or 4D Tensor,
316 | -- 5 (orig + 4 crops)? x 3 (channels) x
317 | -- imH (image height) x imW (image width)
318 | -----------------------------------------------------------------
319 | local randomFrame = GetRandomValue(dataset[category][angle][video_id])
320 | local imname = randomFrame:match('[^_]+')
321 | return LoadIndividualFrame(dataset, category, angle, video_id, imname, input_type)
322 | end
323 |
324 |
--------------------------------------------------------------------------------
/utils.lua:
--------------------------------------------------------------------------------
1 | function RemoveDotDirs(aTable)
2 | if aTable == nil or type(aTable) ~= 'table' then
3 | return aTable
4 | end
5 | --remove the two directories "." , ".."
6 | local i = 1
7 | while i <= #aTable do
8 | while aTable[i] ~= nil and aTable[i]:sub(1,1) == '.' do
9 | aTable[i] = aTable[#aTable]
10 | aTable[#aTable] = nil
11 | end
12 | i = i + 1
13 | end
14 | end
15 |
16 | function getTableSize(aTable)
17 | local numItems = 0
18 | for k,v in pairs(aTable) do
19 | numItems = numItems + 1
20 | end
21 | return numItems
22 | end
23 |
24 | function GetRandomValue(aTable)
25 | local values = {}
26 | for key, value in pairs(aTable) do
27 | values[ #values+1 ] = value
28 | end
29 | return values[ torch.random(#values) ]
30 | end
31 |
32 | function GetValuesSum(aTable)
33 | local total = 0
34 | for key, value in pairs(aTable) do
35 | total = total + value
36 | end
37 | return total
38 | end
39 |
40 | function loadImageOrig(path)
41 | -----------------------------------------------------------------
42 | -- Reads an image
43 | -- inputs:
44 | -- "path": path to the image
45 | -- output:
46 | -- "im": the image
47 | -----------------------------------------------------------------
48 | local im = image.load(path)
49 | if im:dim() == 2 then -- 1-channel image loaded as 2D tensor
50 | im = im:view(1,im:size(1), im:size(2)):repeatTensor(3,1,1)
51 | elseif im:dim() == 3 and im:size(1) == 1 then -- 1-channel image
52 | im = im:repeatTensor(3,1,1)
53 | elseif im:dim() == 3 and im:size(1) == 3 then -- 3-channel image
54 | elseif im:dim() == 3 and im:size(1) == 4 then -- image with alpha
55 | im = im[{{1,3},{},{}}]
56 | else
57 | error("image structure not compatible")
58 | end
59 | return im
60 | end
61 |
62 | function loadImage(path, imH, imW)
63 | -----------------------------------------------------------------
64 | -- Reads an image and rescales it
65 | -- inputs:
66 | -- "path": path to the image
67 | -- "imH" and "imW": the image is rescaled to imH x imW
68 | -- output:
69 | -- "im": the rescaled image
70 | -----------------------------------------------------------------
71 | local im = loadImageOrig(path)
72 | im = image.scale(im, imW, imH)
73 | return im
74 | end
75 |
76 | function normalizeImage(im, mean, std)
77 | -----------------------------------------------------------------
78 | -- Normalizes image "im" by subtracting the "mean" and dividing by "std"
79 | -----------------------------------------------------------------
80 | for channel=1,3 do
81 | im[{channel,{},{}}]:add(-mean[channel]);
82 | im[{channel,{},{}}]:div(std[channel]);
83 | end
84 | return im;
85 | end
86 |
87 | function LoadRandomSamples(nSamples, allfiles, imH, imW);
88 | -----------------------------------------------------------------
89 | -- Loads "nSamples" images from the "allfiles" and rescaled them to imH x imW
90 | -- inputs:
91 | -- nSamples: # of images that is sampled
92 | -- allfiles: an array of paths of the images in the dataset
93 | -- imH, imW: size of the rescaled image
94 | -- outputs:
95 | -- images: 4D Tensor that includes "nSamples" number of imHximW images
96 | -----------------------------------------------------------------
97 | local images = torch.Tensor(nSamples, 3, imH, imW);
98 | local randnums = torch.randperm(#allfiles);
99 | local idx = randnums[{{1,nSamples}}];
100 | for i = 1,nSamples do
101 | local fname = allfiles[idx[i]];
102 | local im = loadImage(fname, imH, imW);
103 | images[{{i},{},{},{}}] = im;
104 | end
105 | return images;
106 | end
107 |
108 | function ComputeMeanStd(nSample, allfiles, imH, imW)
109 | -----------------------------------------------------------------
110 | -- Computes the mean and std of randomly sampled images
111 | -- inputs:
112 | -- nSample: # of images that is sampled
113 | -- allfiles: an array of paths of the images in the dataset
114 | -- imH, imW: size of the rescaled image
115 | -- outputs:
116 | -- mean: a 3-element array (the mean for each channel)
117 | -- std: a 3-element array (the std for each channel)
118 | -----------------------------------------------------------------
119 |
120 | local images = LoadRandomSamples(nSample, allfiles, imH, imW);
121 | local mean = {};
122 | local std = {};
123 |
124 | mean[1] = torch.mean(images[{{},1,{},{}}]);
125 | mean[2] = torch.mean(images[{{},2,{},{}}]);
126 | mean[3] = torch.mean(images[{{},3,{},{}}]);
127 |
128 | std[1] = torch.std(images[{{},1,{},{}}]);
129 | std[2] = torch.std(images[{{},2,{},{}}]);
130 | std[3] = torch.std(images[{{},3,{},{}}]);
131 |
132 | return mean, std;
133 | end
134 |
135 | function MakeListTrainFrames(dataset, trainDir, image_type)
136 | allfiles = {};
137 | for category, subdataset in pairs(dataset) do
138 | if category ~= 'config' then
139 | for angles, subsubdataset in pairs(subdataset) do
140 | for dirs, files in pairs(subsubdataset) do
141 | for _, f in pairs(files) do
142 | fname = string.sub(f, 1, -11) .. "." .. image_type;
143 | table.insert(allfiles, paths.concat(trainDir, category, dirs, fname));
144 | end
145 | end
146 | end
147 | end
148 | end
149 | return allfiles;
150 | end
151 |
152 | function MakeListGEFrames(dataset, data_type)
153 | local geDir = config.GE.dir;
154 | allfiles = {};
155 | for categories, subdataset in pairs(dataset) do
156 | for angles, subsubdataset in pairs(subdataset) do
157 | for dirs, files in pairs(subsubdataset) do
158 | for _, f in pairs(files) do
159 | table.insert(allfiles, paths.concat(geDir, categories, categories .. "_" .. angles .. "_" .. data_type, dirs, f));
160 | end
161 | end
162 | end
163 | end
164 | return allfiles;
165 | end
166 |
167 | function shuffleList(list, deterministic)
168 | local rand
169 | if deterministic then -- shuffle! but deterministicly.
170 | math.randomseed(2)
171 | rand = math.random
172 | else
173 | rand = torch.random
174 | end
175 |
176 | for i = #list, 2, -1 do
177 | local j = rand(i)
178 | list[i], list[j] = list[j], list[i]
179 | end
180 | end
181 |
182 | function GetPhysicsCategory(category)
183 | return category:match("[^-]+")
184 | end
185 |
186 | function MakeShuffledTuples(dataset, deterministic)
187 | -- tuple: category, physics category, angle, folder
188 | local trainDir = config.trainDir;
189 | tuples = {};
190 | for category, subdataset in pairs(dataset) do
191 | if category ~= 'config' then
192 | local physicsCategory = GetPhysicsCategory(category)
193 | for angles, subsubdataset in pairs(subdataset) do
194 | for dirs, _ in pairs(subsubdataset) do
195 | table.insert(tuples, {category, physicsCategory, angles, dirs});
196 | end
197 | end
198 | end
199 | end
200 | shuffleList(tuples, deterministic);
201 | return tuples;
202 | end
203 |
204 | function isExcluded(excluded_categories, category)
205 | for _, ecat in pairs(excluded_categories) do
206 | if category:find(ecat) then
207 | return true
208 | end
209 | end
210 | return false
211 | end
212 |
213 | function removeExcludedCategories(categories, excluded_categories)
214 | local result = {};
215 | for k,v in pairs(categories) do
216 | if not isExcluded(excluded_categories, v) then
217 | table.insert(result, v);
218 | end
219 | end
220 | assert(#result + #excluded_categories <= #categories, "At least one category" ..
221 | "should be removed per excluded_categories.")
222 | assert(#result > 0, "Cannot exclude all categories.")
223 | return result;
224 | end
225 |
226 | function getAllCategoriesandAngles(dataset)
227 | physics_category_list = {};
228 | category_list = {};
229 | angle_list = {};
230 | for k,v in pairs(dataset) do
231 | table.insert(physics_category_list, GetPhysicsCategory(k))
232 | table.insert(category_list, k);
233 | table.insert(angle_list, getTableSize(dataset[k]));
234 | end
235 | return physics_category_list, category_list, angle_list;
236 | end
237 |
238 | function GetNNParamsToCPU(nnModel)
239 | -- Convert model into FloatTensor and save.
240 | local params, gradParams = nnModel:parameters()
241 | if params ~= nill then
242 | paramsCPU = pl.tablex.map(function(param) return param:float() end, params)
243 | else
244 | paramsCPU = {};
245 | end
246 | return paramsCPU
247 | end
248 |
249 | function LoadNNlParams(current_model,saved_params)
250 | local params, gradparams = current_model:parameters()
251 | if params ~= nill then
252 | assert(#params == #saved_params,
253 | string.format('#layer != #saved_layers (%d vs %d)!',
254 | #params, #saved_params));
255 | for i = 1,#params do
256 | assert(params[i]:nDimension() == saved_params[i]:nDimension(),
257 | string.format("Layer %d: dimension mismatch (%d vs %d).",
258 | i, params[i]:nDimension(), saved_params[i]:nDimension()))
259 | for j = 1, params[i]:nDimension() do
260 | assert(params[i]:size(j) == saved_params[i]:size(j),
261 | string.format("Layer %d, Dim %d: size does not match (%d vs %d).",
262 | i, j, params[i]:size(j), saved_params[i]:size(j)))
263 | end
264 | params[i]:copy(saved_params[i]);
265 | end
266 | end
267 | end
268 |
269 | function rand_initialize(layer)
270 | local tn = torch.type(layer)
271 | if tn == "cudnn.SpatialConvolution" then
272 | local c = math.sqrt(10.0 / (layer.kH * layer.kW * layer.nInputPlane));
273 | layer.weight:copy(torch.randn(layer.weight:size()) * c)
274 | layer.bias:fill(0)
275 | elseif tn == "cudnn.VolumetricConvolution" then
276 | local c = math.sqrt(10.0 / (layer.kH * layer.kW * layer.nInputPlane));
277 | layer.weight:copy(torch.randn(layer.weight:size()) * c)
278 | layer.bias:fill(0)
279 | elseif tn == "nn.Linear" then
280 | local c = math.sqrt(10.0 / layer.weight:size(2));
281 | layer.weight:copy(torch.randn(layer.weight:size()) * c)
282 | layer.bias:fill(0)
283 | end
284 | end
285 |
286 | function GetCategoryViewPointId(physicsCategory, viewpoint)
287 | local offset = 0;
288 | for i, class in ipairs(config.classes) do
289 | if class == physicsCategory then
290 | return offset + viewpoint
291 | end
292 | offset = offset + config.class_angles[i];
293 | end
294 | error("failed to find the physicsCategory:" .. physicsCategory);
295 | return -1; -- invalid physics category
296 | end
297 |
298 | function DecryptCategoryViewPointId(categoryId)
299 | assert(categoryId > 0, "Invalid categoryId " .. tostring(categoryId))
300 |
301 | local offset = 0;
302 | for i, class in ipairs(config.classes) do
303 | if offset + config.class_angles[i] >= categoryId then
304 | return class, categoryId - offset
305 | end
306 | offset = offset + config.class_angles[i];
307 | end
308 | error("Invalid categoryId " .. tostring(categoryId));
309 | end
310 |
311 | function GetCategoryId(physicsCategory)
312 | for i, class in pairs(config.classes) do
313 | if class == physicsCategory then
314 | return i
315 | end
316 | end
317 | error("failed to find the physicsCategory:" .. physicsCategory);
318 | return -1; -- invalid physics category
319 | end
320 |
321 | function CategoryViewPointId2CategoryId(categoryId)
322 | assert(categoryId > 0, "Invalid categoryId " .. tostring(categoryId))
323 |
324 | local offset = 0;
325 | for i, class in ipairs(config.classes) do
326 | if offset + config.class_angles[i] >= categoryId then
327 | return i
328 | end
329 | offset = offset + config.class_angles[i];
330 | end
331 | error("Invalid categoryId " .. tostring(categoryId));
332 | end
333 |
334 | function GetUniformRandomElement(data)
335 | local result = {}
336 | while type(data) == 'table' do
337 | local keys = {}
338 | for key, value in pairs(data) do
339 | if key ~= 'config' and (type(value) ~= 'table' or next(value) ~= nil) then
340 | keys[ #keys+1 ] = key
341 | end
342 | end
343 | local randomKey = keys[torch.random(#keys)]
344 | data = data[randomKey]
345 | result[#result+1] = randomKey
346 | end
347 | result[#result+1] = data
348 | return result
349 | end
350 |
351 | function GetUniformRandomCategory(dataset, physicsCategory, angle)
352 | local keys = {}
353 | for key, value in pairs(dataset) do
354 | if string.sub(key,1,string.len(physicsCategory)) == physicsCategory then
355 | if value[angle] and next(value[angle]) then
356 | keys[ #keys+1 ] = key
357 | end
358 | end
359 | end
360 | if next(keys) then
361 | return keys[torch.random(#keys)]
362 | else
363 | return nil
364 | end
365 | end
366 |
367 | function GetUniformRandomData(dataset)
368 | local randomData = GetUniformRandomElement(dataset)
369 | local category = randomData[1]
370 | local physicsCategory = GetPhysicsCategory(category)
371 | local angle = randomData[2]
372 | local folder = randomData[3]
373 | return {category, physicsCategory, angle, folder}
374 | end
375 |
376 | function log(...)
377 | -- Log to file:
378 | io.output(config.logFile)
379 | print(...)
380 | -- Log to stdout:
381 | io.output(io.stdout)
382 | print(...)
383 | end
384 |
385 | function GetEnableInputTypes(input_config)
386 | local result = {}
387 | for input_type, conf in pairs(input_config) do
388 | if type(conf) == 'table' and conf.enable then
389 | if config.w_crop and conf.croppable then
390 | result[ input_type ] = conf.nChannels * 5
391 | else
392 | result[ input_type ] = conf.nChannels
393 | end
394 | end
395 | end
396 | return result
397 | end
398 |
399 | function GetPerClassAccuracy(predictions, labels)
400 | local per_class = torch.Tensor(config.nCategories, 2):fill(0)
401 | local nAccurate = 0
402 | labels = labels:clone()
403 | predictions = predictions:clone()
404 | for i=1,labels:size(1) do
405 | if labels[i] == predictions[i] then
406 | nAccurate = nAccurate + 1
407 | per_class[ labels[i] ][1] = per_class[ labels[i] ][1] + 1
408 | end
409 | per_class[ labels[i] ][2] = per_class[ labels[i] ][2] + 1
410 | end
411 | local acc = nAccurate / labels:size(1)
412 | return acc, per_class
413 | end
414 |
415 | function GetAnimationFeatures(model, convLayer)
416 | local n = GetValuesSum(config.class_angles) -- Total number of classes
417 | local feats
418 | local labels = {}
419 | for i=1,n do
420 | local featsDir = paths.concat(config.GE.featsDir, i)
421 | local featFiles = paths.dir(featsDir)
422 | RemoveDotDirs( featFiles )
423 | if not featFiles or #featFiles==0 then
424 | log("Animation vectors for category " .. tostring(i) .. " not found.")
425 | os.execute('mkdir -p ' .. featsDir)
426 |
427 | local category, angle = DecryptCategoryViewPointId(i)
428 | local gameEngineVideos = LoadGEPerCategory(category, angle, dataset_GE):transpose(2, 3):cuda()
429 | log("Feed-forward animation to get features.")
430 | for j=1,gameEngineVideos:size(1) do
431 | local cur = model:forward( gameEngineVideos[ {{j}, {}, {}, {}, {}} ] )
432 | if feats then
433 | feats = torch.cat(feats, cur, 3)
434 | else
435 | feats = cur
436 | end
437 |
438 | for k=1,cur:size(1) do
439 | labels[ #labels+1 ] = i
440 | end
441 | -- Cache for future use:
442 | torch.save( paths.concat(featsDir, tostring(j) .. '.t7'), cur)
443 | end
444 | else
445 | for j, v in pairs(featFiles) do
446 | local cur = torch.load( paths.concat(featsDir, v) )
447 | if feats then
448 | feats = torch.cat(feats, cur, 3)
449 | else
450 | feats = cur
451 | end
452 | for k=1,cur:size(1) do
453 | labels[ #labels+1 ] = i
454 | end
455 | end
456 | end
457 | end
458 | feats = feats:transpose(2, 3):transpose(1, 2)
459 | if convLayer then
460 | feats = convLayer:forward(feats):reshape(config.nClasses, 10, 4096)
461 | torch.save(paths.concat( config.DataRootPath, 'all.t7'), feats)
462 | end
463 | return feats, labels
464 | end
465 |
466 | function GetPairwiseCosine(M1, M2)
467 | assert(M1:size(2) == M2:size(2), "ERROR: dimensions mismatch!")
468 | local smooth = 1e-5
469 |
470 | local M1rownorms = torch.cmul(M1, M1):sum(2):sqrt():view(M1:size(1))
471 | local M2rownorms = torch.cmul(M2, M2):sum(2):sqrt():view(M2:size(1))
472 | local pairwiseNorms = torch.ger(M1rownorms, M2rownorms)
473 | local dot = M1 * M2:t()
474 | return torch.cdiv(dot, pairwiseNorms + smooth)
475 | end
476 |
477 | function GetVideoCount(dataset)
478 | local total = 0
479 | for _1, cat in pairs(dataset) do
480 | if _1 ~= 'config' then
481 | for _2, view in pairs(cat) do
482 | for _3, fold in pairs(view) do
483 | total = total + 1
484 | end
485 | end
486 | end
487 | end
488 | return total
489 | end
490 |
491 | function Choose(tensor, indices)
492 | assert(tensor:size(1) == indices:size(1), "Dimension mismatch")
493 | local result = torch.Tensor( indices:size() )
494 | for i = 1, indices:size(1) do
495 | result[i] = tensor[i][ indices[i] ]
496 | end
497 | return result:cuda()
498 | end
499 |
500 | function ContainsValue(dict, value)
501 | for k,v in pairs(dict) do
502 | if v == value then
503 | return true
504 | end
505 | end
506 | return false
507 | end
508 |
509 | function GetGaussianTarget(target)
510 | local result = torch.CudaTensor(target:size(1), config.nClasses):fill(0)
511 | local frames = target - (torch.floor((target-1) / 10) * 10)
512 | for i=1,target:size(1) do
513 | local sigma = 1
514 | for j = target[i]-frames[i]+1,target[i]-frames[i]+10 do
515 | result[i][j] = torch.exp( -(target[i] - j)^2 / sigma)
516 | end
517 |
518 | result[i] = result[i] / result[i]:sum()
519 | end
520 | return result
521 | end
522 |
--------------------------------------------------------------------------------