├── .gitignore
├── LICENSE
├── README.md
└── code
    ├── CMakeLists.txt
    ├── cnn.h
    ├── dataset.h
    ├── generic_io.h
    ├── lua
        ├── Entropy.lua
        ├── MyL1Criterion.lua
        ├── score_incount_ec6.lua
        ├── train_obj.lua
        ├── train_obj_e2e.lua
        ├── train_obj_e2e_nomodel.lua
        └── train_obj_nomodel.lua
    ├── lua_calls.h
    ├── maxloss.h
    ├── properties.cpp
    ├── properties.h
    ├── read_data.cpp
    ├── read_data.h
    ├── stop_watch.h
    ├── test_ransac.cpp
    ├── thread_rand.cpp
    ├── thread_rand.h
    ├── train_obj.cpp
    ├── train_ransac.cpp
    ├── train_repro.cpp
    ├── types.h
    ├── util.cpp
    ├── util.h
    ├── write_data.cpp
    └── write_data.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Visual Learning Lab
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DSAC++ Code Documentation
  2 | 
  3 | - [General Setup](#general-setup)
  4 | - [Programs](#programs)
  5 | - [Data Structure](#data-structure)
  6 | 
  7 | ## Introduction
  8 | 
  9 | This document explains the high level concepts and setup of our improved DSAC code for camera localization. See it in action [here](https://www.youtube.com/watch?v=DjJFRTFEUq0), and also see the [project page](https://hci.iwr.uni-heidelberg.de/vislearn/research/scene-understanding/pose-estimation/#CVPR18). You find an explanation of the method and theory in the following paper. Please cite this paper if you use this code in your own work:
 10 | 
 11 | E. Brachmann,  C. Rother,  
 12 | ”[Learning Less is More - 6D Camera Localization via 3D Surface Regression](https://arxiv.org/abs/1711.10228)”,  
 13 | CVPR 2018
 14 | 
 15 | We publish the code under the BSD License 2.0. The predecessor of this pipeline can be found [here](https://github.com/cvlab-dresden/DSAC).
 16 | 
 17 | Compiling the code creates four programs:
 18 | 
 19 | * `train_obj`: Trains a scene coordinate regression CNN by optimizing the 3D distance between target scene coordinates and the CNN predictions (see Sec. 2.4. in our paper, "Scene Coordinate Initalization").
 20 | Target scene coordinates are either calculated from depth images (measured, rendered or estimated) or using our scene coordinate heuristic. 
 21 | The method requires RGB images, ground truth camera poses, and (optionally) depth images. 
 22 | * `train_repro`: Trains a scene coordinate regression CNN by optimizing the reprojection error of the CNN predictions (see Sec. 2.4. in our paper, "Optimization of Reprojection Error").
 23 | The method requires RGB images and ground truth camera poses, and a CNN model initialized e.g. by `train_obj`.
 24 | * `train_ransac`: Trains the whole camera localization pipeline end-to-end using DSAC (see Sec. 2.4. in our paper, "End-to-End Optimization.").
 25 | The method requires RGB images and ground truth camera poses, and a CNN model.
 26 | * `test_ransac`: Runs the camera localization pipeline on test images.
 27 | The method requires RGB images and a CNN model. 
 28 | Because the method calculates evaluation metrics, ground truth poses should also be provided.
 29 | 
 30 | Compiling the code will require: OpenCV 2.4, PNG++, Torch and cuDNN. We compiled the code using Ubuntu 16.04 and GCC 4.9.1.
 31 | 
 32 | In the following, we will first give more details about the general setup and the individual programs. 
 33 | Then, we describe how to run the code on the datasets used in the paper.
 34 | In case you have questions regarding the code, feel free to contact the first author of the associated paper.
 35 | 
 36 | ## General Setup
 37 | 
 38 | The DSAC++ source code is a combination of a C++ framework and multiple Torch scripts. 
 39 | All data access, geometric operations and accuracy evaluation is performed in C++. 
 40 | The Torch scripts are called from C++ to grant access to the CNNs used in the pose estimation pipeline. 
 41 | We use two separate Torch scripts, one for regressing scene coordinates given an RGB image, and another one regressing hypothesis scores given re-projection error images. 
 42 | Note that the second script does not load a learnable CNN but instead constructs a soft inlier count for easy back propagation and the implementation of our entropy control schema (see Sec. 2.3. of our paper).
 43 | The Torch scripts are contained in the sub-folder core/lua. The script variants ending with nomodel are used when training our pipeline with the scene coordinate heuristic. 
 44 | They offer the exact same functionality as the standard scripts but store their respective CNNs under a different file name.
 45 | 
 46 | ### Setting Parameters
 47 | 
 48 | All C++ programs share a set of parameters which the user can specify, see below for a complete list. 
 49 | There are two ways of setting the parameters. The first and direct way is to append it to the program call in the command line. 
 50 | Every parameter has a few letter abbreviation (case sensitive, see below) which can be given to the command line starting with a dash and followed by a space and the value to set, e.g. -ih 480 -iw 640. 
 51 | Because there might be a large number of parameters to set, there is also the second way which is providing a default.config file. 
 52 | This file should contain one parameter per line. 
 53 | More specifically, each line should start with the parameter abbreviation (without a leading dash) followed by a space and the value to set. 
 54 | The config file can also include comment lines which start with the # symbol. 
 55 | All programs will first look for the default.config file which will be parsed at the very beginning of each run. 
 56 | After that, command line parameters will be processed (overwriting parameters from the default.config). 
 57 | Our data package (see below) contains an example default.config file for the 3 datasets used in the experiments of the paper.
 58 | 
 59 | **Parameters related to the data:**
 60 | 
 61 | **iw** width of input images (px)
 62 | 
 63 | **ih** height of input images (px)
 64 | 
 65 | **fl** focal length of the RGB camera
 66 | 
 67 | **xs** x position of the principal point of the RGB camera
 68 | 
 69 | **ys** y position of the principal point of the RGB camera
 70 | 
 71 | **oscript** Torch script file for learning scene coordinate regression
 72 | 
 73 | **sscript** Torch script file for hypothesis scoring
 74 | 
 75 | **omodel** file storing the scene coordinate regression CNN
 76 | 
 77 | **cd** constant depth prior in meters to be used for the scene coordinate heuristic (set 0 to use provided depth images)
 78 | 
 79 | **sid** arbitrary string to be attached to output files (but does not affect CNN model file names), handy to differentiate mutliple runs of the pipeline
 80 | 
 81 | **iSS** sub-sampling of training images, e.g. iSS 10 will use 10% of the training data
 82 | 
 83 | **cSS** sub-sampling of the CNN architecture output w.r.t. the input (necessary for data exchange between C++ and Torch, e.g. the paper architecture takes an 640x480 image and predicts 80x60 coordinates, i.e. cSS is 8)
 84 | 
 85 | **Parameters related to pose estimation:**
 86 | 
 87 | **rdraw** draw a hypothesis randomly (rdraw 1), i.e. DSAC, or take the one with the largest score (rdraw 0), i.e. RANSAC
 88 | 
 89 | **rT** re-projection error threshold (in px) for measuring inliers in the pose estimation pipeline
 90 | 
 91 | **rI** initial number of pose hypotheses drawn per frame
 92 | 
 93 | **rRI** number of refinement iterations
 94 | 
 95 | **Parameters related to learning:**
 96 | 
 97 | Parameters which concern the learning of the scene coordinate regression CNN and the soft inlier score (e.g. learning
 98 | rate or file names to store the CNNs) are defined directly in the corresponding Torch scripts (see core/lua).
 99 | There are also some parameters defined in the respective main cpp file of each program, such as the total number of training iterations. 
100 | 
101 | ### Training Procedure
102 | 
103 | As stated in the paper, end-to-end training (i.e. calling `train_ransac`) needs a good initialization in terms of the scene coordinate regression CNN. 
104 | Therefore, pre-training (i.e. calling `train_obj`, potentially followed by `train_repro`) should be executed
105 | first. 
106 | An example order of executing the programs is given at further below.
107 | 
108 | ### Scene Coordinate Ground Truth
109 | 
110 | In order to pre-train the scene coordinate regression CNN (i.e. calling `train_obj`), scene coordinate ground truth has to be available for each training frame. 
111 | Our code generates scene coordinate ground truth from depth frames, the ground truth poses and camera calibration parameters. 
112 | Depth frames can come from a RGB-D camera, a renderer or an arbitrary depth estimation algorithm.
113 | Alignment is assumed between RGB and depth images.
114 | Alternatively, `train_obj` can utilize a scene coordiante heuristic based on a constant depth assumption (parameter **cd** in meters). 
115 | In this case, the scene coordinate regression CNN should be further optimized using `train_repro` for good accuracy.
116 | 
117 | ## Programs
118 | 
119 | In the following, we describe input and output of the individual programs created by compiling the code. 
120 | An example order of executing the programs is given at the end of the document.
121 | 
122 | ### `train_obj`
123 | 
124 | The program `train_obj` pre-trains a scene coordinate regression CNN by minimizing the L1 distance between the predicted coordinate and the ground truth coordinate. 
125 | The program needs access to a Torch script which constructs and grants access to a scene coordinate regression CNN (default: `train obj.lua`).
126 | The program will store the current state of the CNN in a fixed interval of parameter updates. 
127 | This interval and the file name is defined in the Torch script.
128 | The program creates a training loss txt file which contains per line the training iteration number followed by the training loss at that iteration (mean over an image). 
129 | When no depth images are available, the **cd** parameter should be set to a positive value in meters (e.g. 3 for indoor and 10 for outdoor scenes).
130 | The program will then calculate ground truth scene coordinate assuming a constant depth for each image.
131 | 
132 | ### `train_repro`
133 | 
134 | The program `train_repro` loads a pre-trained scene coordinate regression CNN and refines it by optimizing the reprojection error of its predictions. 
135 | The program needs access to a Torch script to load and access the scene coordinate regression CNN (default: `train_obj.lua`).
136 | The CNN to load can be specified via -omodel. 
137 | The program will store the current states of both CNNs in fixed intervals of parameter updates. 
138 | These intervals and the file names are defined in the Torch scripts. 
139 | The program also creates a repro training loss txt file which contains the training loss per training
140 | iteration. 
141 | 
142 | ### `train_ransac`
143 | 
144 | The program `train_ransac` loads a pre-trained scene coordinate regression CNN and refines it by training the complete camera localization pipeline end-to-end using the DSAC strategy. 
145 | The program optimizes the expectation of the pose loss. 
146 | The program needs access to two Torch scripts to load and access the scene coordinate regression CNN (default: `train_obj_e2e.lua`) and the soft inlier score (default: `score_incount_ec6.lua`). 
147 | The CNN to load can be specified via -omodel. 
148 | The program will store the current states of both CNNs in fixed intervals of parameter updates. 
149 | These intervals and the file names are defined in the Torch scripts. 
150 | The program also creates a ransac training loss txt file which contains training statistics per training
151 | iteration (see `train_ransac.cpp`, bottom for a listing). 
152 | 
153 | ### `test_ransac`
154 | 
155 | The program test ransac loads a scene coordinate regression CNN and performs camera localization on a set of test images using
156 | RANSAC (-rdraw 0) or DSAC -rdraw 1. 
157 | The program needs access to two Torch scripts to load and access the scene coordinate regression CNN (default: `train_obj.lua`) and the soft inlier score (default: `score_incount_ec6.lua`). 
158 | The CNNs to load can be specified via -omodel. 
159 | The program creates a ransac test loss txt file which contains statistics of the test run (see `test_ransac.cpp` bottom for a listing). 
160 | Most importantly, the first number is the ratio of test images with correctly estimated poses (i.e. a pose error below 5cm 5deg). Furthermore, a ransac test errors txt file will be created, which contains per line statistics for each test image, most importantly the estimated pose (see `test_ransac.cpp`, bottom for a complete listing).
161 | 
162 | ## Data Structure
163 | 
164 | We provide a [data package](https://heidata.uni-heidelberg.de/api/access/datafile/:persistentId?persistentId=doi:10.11588/data/EGCMUU/GCZUDD) for deployment of our code for the datasets used in the paper. 
165 | Note that we do not provide the data set itself, but merely the structure outline and associated meta data to reproduce the results
166 | from our paper. (As an exception we provide rendered depth images for [7scenes](https://heidata.uni-heidelberg.de/api/access/datafile/:persistentId?persistentId=doi:10.11588/data/N07HKC/4PLEEJ), [cambridge](https://heidata.uni-heidelberg.de/api/access/datafile/:persistentId?persistentId=doi:10.11588/data/EGCMUU/7LBIQJ)). 
167 | Our code assumes the following data structure:
168 | 
169 | 1. `dataset`
170 | 2. `dataset\scene`
171 | 3. `dataset\scene\training`
172 | 4. `dataset\scene\training\depth`
173 | 5. `dataset\scene\training\rgb`
174 | 6. `dataset\scene\training\poses`
175 | 7. `dataset\scene\test`
176 | 8. `dataset\scene\test\depth`
177 | 9. `dataset\scene\test\rgb`
178 | 10. `dataset\scene\test\poses`
179 | 11. `dataset\scene\default.config`
180 | 12. `dataset\scene\Entropy.lua`
181 | 13. `dataset\scene\MyL1Criterion.lua`
182 | 14. `dataset\scene\score_incount_ec6.lua`
183 | 15. `dataset\scene\train_obj.lua`
184 | 16. `dataset\scene\train_obj_e2e.lua`
185 | 17. `dataset\scene\train_obj_nomodel.lua`
186 | 18. `dataset\scene\train_obj_nomodel.lua`
187 | 
188 | For example `dataset` could be 7Scenes and `scene` could be Chess. 
189 | Folders 3-10 should be filled with the associated files from the data set, i.e. RGB frames, depth frames and pose files (7Scenes convention) according to the training/test split. 
190 | For RGB images we support 8-bit 3-channel PNG or JPG files.
191 | For depth images we support 16-bit 1-channel PNG files or TIFF files. 
192 | The depth should be stored in millimeters. 
193 | Config file 11 is specific to each dataset.
194 | Files 11 to 18 are implemented as soft links in our data package. The soft links assume that the `dataset` lie within `core/build` (for access to `core/lua`.
195 | The data package also contains fully trained models for each scene (after end-to-end refinement).
196 | 
197 | You find the rendered depth images we used for training with a 3D model here: [7scenes](https://heidata.uni-heidelberg.de/api/access/datafile/:persistentId?persistentId=doi:10.11588/data/N07HKC/4PLEEJ), [cambridge](https://heidata.uni-heidelberg.de/api/access/datafile/:persistentId?persistentId=doi:10.11588/data/EGCMUU/7LBIQJ)
198 | 
199 | ### Executing the code
200 | 
201 | The following calls assume you are within `core/build/dataset/scene` and the binaries lie in `core/build`.
202 | 
203 | *When training with depth images:*
204 | 
205 | 1. `../../train_obj -oscript train_obj.lua`
206 | 2. `../../train_repro -oscript train_obj.lua -omodel obj_model_fcn_init.net`
207 | 3. `../../train_ransac -oscript train_obj_e2e.lua -omodel obj_model_fcn_repro.net -sscript score_incount_ec6.lua`
208 | 4. `../../test_ransac -oscript train_obj.lua -omodel obj_model_fcn_e2e.net -sscript score_incount_ec6.lua -rdraw 0`
209 | 
210 | *When training without depth images:*
211 | 
212 | 1. `../../train_obj -oscript train_obj_nomodel.lua -iSS 20 -cd 3` (for outdor scenes we used `-cd 10`)
213 | 2. `../../train_repro -oscript train_obj_nomodel.lua -omodel obj_model_fcn_init_nomodel.net`
214 | 3. `../../train_ransac -oscript train_obj_e2e_nomodel.lua -omodel obj_model_fcn_repro_nomodel.net -sscript score_incount_ec6.lua`
215 | 4. `../../test_ransac -oscript train_obj.lua -omodel obj_model_fcn_e2e_nomodel.net -sscript score_incount_ec6.lua -rdraw 0`
216 | 
217 | Note that you can test the pipeline after each training step (1-3) by providing the appropriate model file to `test_ransac`.
218 | 


--------------------------------------------------------------------------------
/code/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #Specify the version being used aswell as the language
 2 | cmake_minimum_required(VERSION 2.8)
 3 | #Name your project here
 4 | project(dsac++)
 5 | 
 6 | #Sends the -std=c99 flag to the gcc compiler
 7 | add_definitions(-std=c++11)
 8 | 
 9 | # PNG++
10 | include(FindPNG)
11 | include_directories(${PNG_INCLUDE_DIR})
12 | 
13 | # LUA and Torch
14 | include_directories(/usr/include/lua5.3)
15 | include_directories(~/lib/torch/install/include/)
16 | link_directories(~/lib/torch/install/lib/)
17 | 
18 | # OpenCV
19 | find_package(OpenCV REQUIRED)
20 | if(NOT OpenCV_FOUND)
21 |         # make FIND_PACKAGE friendly
22 |         if(NOT OpenCV_FIND_QUIETLY)
23 |                 if(OpenCV_FIND_REQUIRED)
24 |                         message(FATAL_ERROR "OpenCV required but some headers or libs not found. ${ERR_MSG}")
25 |                 else(OpenCV_FIND_REQUIRED)
26 |                         message(STATUS "WARNING: OpenCV was not found. ${ERR_MSG}")
27 |                 endif(OpenCV_FIND_REQUIRED)
28 |         endif(NOT OpenCV_FIND_QUIETLY)
29 | else(NOT OpenCV_FOUND)
30 | 	message(STATUS "OpenCV Include Directory: ${OpenCV_INCLUDE_DIRS}")
31 | 	message(STATUS "OpenCV Link Libraries: ${OpenCV_LIBS}")
32 | endif(NOT OpenCV_FOUND)
33 | include_directories(${OpenCV_INCLUDE_DIRS} )
34 | 
35 | # OpenMP
36 | find_package(OpenMP REQUIRED)
37 | if (OPENMP_FOUND)
38 |     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
39 |     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
40 | endif()
41 | 
42 | set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
43 | set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
44 | 
45 | add_library("opencv_dep_cudart" UNKNOWN IMPORTED)
46 | set_target_properties("opencv_dep_cudart" PROPERTIES IMPORTED_LOCATION /usr/local/cuda/lib64/libcudart.so) 
47 |    
48 | add_executable(train_obj 
49 |     train_obj.cpp
50 |     properties.cpp 
51 |     util.cpp
52 |     thread_rand.cpp
53 |     read_data.cpp)
54 |     
55 | add_executable(train_ransac
56 |     train_ransac.cpp
57 |     properties.cpp
58 |     util.cpp
59 |     thread_rand.cpp
60 |     read_data.cpp)
61 | 
62 | add_executable(train_repro
63 |     train_repro.cpp
64 |     properties.cpp
65 |     util.cpp
66 |     thread_rand.cpp
67 |     read_data.cpp)
68 | 
69 | add_executable(test_ransac
70 |     test_ransac.cpp
71 |     properties.cpp
72 |     util.cpp
73 |     thread_rand.cpp
74 |     read_data.cpp
75 |     write_data.cpp)
76 | 
77 | target_link_libraries(train_obj ${PNG_LIBRARY} ${OpenCV_LIBS} luajit) 
78 | target_link_libraries(train_ransac ${PNG_LIBRARY} ${OpenCV_LIBS} luajit) 
79 | target_link_libraries(train_repro ${PNG_LIBRARY} ${OpenCV_LIBS} luajit) 
80 | target_link_libraries(test_ransac ${PNG_LIBRARY} ${OpenCV_LIBS} luajit)
81 | 


--------------------------------------------------------------------------------
/code/cnn.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #pragma once
 30 | 
 31 | #define CNN_OBJ_MAXINPUT 100.0 // reprojection errors are clamped at this magnitude
 32 | 
 33 | #include "util.h"
 34 | #include "maxloss.h"
 35 | 
 36 | /**
 37 | * @brief Checks whether the given matrix contains NaN entries.
 38 | * @param m Input matrix.
 39 | * @return True if m contrains NaN entries.
 40 | */
 41 | inline bool containsNaNs(const cv::Mat& m)
 42 | {
 43 |     return cv::sum(cv::Mat(m != m))[0] > 0;
 44 | }
 45 | 
 46 | /**
 47 |  * @brief Wrapper around the OpenCV PnP function that returns a zero pose in case PnP fails. See also documentation of cv::solvePnP.
 48 |  * @param objPts List of 3D points.
 49 |  * @param imgPts Corresponding 2D points.
 50 |  * @param camMat Calibration matrix of the camera.
 51 |  * @param distCoeffs Distortion coefficients.
 52 |  * @param rot Output parameter. Camera rotation.
 53 |  * @param trans Output parameter. Camera translation.
 54 |  * @param extrinsicGuess If true uses input rot and trans as initialization.
 55 |  * @param methodFlag Specifies the PnP algorithm to be used.
 56 |  * @return True if PnP succeeds.
 57 |  */
 58 | inline bool safeSolvePnP(
 59 |     const std::vector<cv::Point3f>& objPts,
 60 |     const std::vector<cv::Point2f>& imgPts,
 61 |     const cv::Mat& camMat,
 62 |     const cv::Mat& distCoeffs,
 63 |     cv::Mat& rot,
 64 |     cv::Mat& trans,
 65 |     bool extrinsicGuess,
 66 |     int methodFlag)
 67 | {
 68 |     if(rot.type() == 0) rot = cv::Mat_<double>::zeros(1, 3);
 69 |     if(trans.type() == 0) trans= cv::Mat_<double>::zeros(1, 3);
 70 | 
 71 |     if(!cv::solvePnP(objPts, imgPts, camMat, distCoeffs, rot, trans, extrinsicGuess,methodFlag))
 72 |     {
 73 |         rot = cv::Mat_<double>::zeros(1, 3);
 74 |         trans = cv::Mat_<double>::zeros(1, 3);
 75 |         return false;
 76 |     }
 77 |     return true;
 78 | }
 79 | 
 80 | /**
 81 |  * @brief Calculate the Shannon entropy of a discrete distribution.
 82 |  * @param dist Discrete distribution. Probability per entry, should sum to 1.
 83 |  * @return  Shannon entropy.
 84 |  */
 85 | double entropy(const std::vector<double>& dist)
 86 | {
 87 |     double e = 0;
 88 |     for(unsigned i = 0; i < dist.size(); i++)
 89 | 	if(dist[i] > 0)
 90 | 	    e -= dist[i] * std::log2(dist[i]);
 91 |     
 92 |     return e;
 93 | }
 94 | 
 95 | /**
 96 |  * @brief Draws an entry of a discrete distribution according to the given probabilities.
 97 |  *
 98 |  * If randomDraw is false in the properties, this function will return the entry with the max. probability.
 99 |  *
100 |  * @param probs Discrete distribution. Probability per entry, should sum to 1.
101 |  * @return Chosen entry.
102 |  */
103 | int draw(const std::vector<double>& probs)
104 | {
105 |     std::map<double, int> cumProb;
106 |     double probSum = 0;
107 |     double maxProb = -1;
108 |     double maxIdx = 0; 
109 |     
110 |     for(unsigned idx = 0; idx < probs.size(); idx++)
111 |     {
112 | 	if(probs[idx] < EPS) continue;
113 | 	
114 | 	probSum += probs[idx];
115 | 	cumProb[probSum] = idx;
116 | 	
117 | 	if(maxProb < 0 || probs[idx] > maxProb)
118 | 	{
119 | 	    maxProb = probs[idx];
120 | 	    maxIdx = idx;
121 | 	}
122 |     }
123 |     
124 |     if(GlobalProperties::getInstance()->tP.randomDraw)
125 |       return cumProb.upper_bound(drand(0, probSum))->second;
126 |     else
127 |       return maxIdx;
128 | }
129 | 
130 | /**
131 |  * @brief Calculates the expected loss of a list of poses with associated probabilities.
132 |  * @param gt Ground truth pose.
133 |  * @param hyps List of estimated poses.
134 |  * @param probs List of probabilities associated with the estimated poses.
135 |  * @param losses Output parameter. List of losses for each estimated pose.
136 |  * @return Expectation of loss.
137 |  */
138 | double expectedMaxLoss(
139 |     const jp::cv_trans_t& gt,
140 |     const std::vector<jp::cv_trans_t>& hyps,
141 |     const std::vector<double>& probs,
142 |     std::vector<double>& losses)
143 | {
144 |     double loss = 0;
145 |     losses.resize(hyps.size());
146 |     
147 |     for(unsigned i = 0; i < hyps.size(); i++)
148 |     {
149 |         losses[i] = maxLoss(gt, hyps.at(i));
150 |         loss += probs[i] * losses[i];
151 |     }
152 |     
153 |     return loss;
154 | }
155 | 
156 | /**
157 |  * @brief Calculates the Jacobean of the PNP function w.r.t. the object coordinate inputs.
158 |  *
159 |  * PNP is treated as a n x 3 -> 6 fnuction, i.e. it takes n 3D coordinates and maps them to a 6D pose.
160 |  * The Jacobean is therefore 6x3n. The Jacobean is calculated using central differences.
161 |  *
162 |  * @param imgPts List of 2D points.
163 |  * @param objPts List of corresponding 3D points.
164 |  * @param eps Epsilon used in central differences approximation.
165 |  * @return 6x3n Jacobean matrix of partial derivatives.
166 |  */
167 | cv::Mat_<double> dPNP(    
168 |     const std::vector<cv::Point2f>& imgPts,
169 |     std::vector<cv::Point3f> objPts,
170 |     float eps = 0.001f)
171 | {
172 |     int pnpMethod = (imgPts.size() == 4) ? CV_P3P : CV_ITERATIVE;
173 | 
174 |     //in case of P3P the 4th point is needed to resolve ambiguities, its derivative is zero
175 |     int effectiveObjPoints = (pnpMethod == CV_P3P) ? 3 : objPts.size();
176 | 
177 |     cv::Mat_<float> camMat = GlobalProperties::getInstance()->getCamMat();
178 |     cv::Mat_<double> jacobean = cv::Mat_<double>::zeros(6, objPts.size() * 3);
179 |     bool success;
180 |     
181 |     // central differences
182 |     for(unsigned i = 0; i < effectiveObjPoints; i++)
183 |     for(unsigned j = 0; j < 3; j++)
184 |     {
185 |         if(j == 0) objPts[i].x += eps;
186 |         else if(j == 1) objPts[i].y += eps;
187 |         else if(j == 2) objPts[i].z += eps;
188 | 
189 |         // forward step
190 |         jp::cv_trans_t fStep;
191 |         success = safeSolvePnP(objPts, imgPts, camMat, cv::Mat(), fStep.first, fStep.second, false, pnpMethod);
192 | 
193 |         if(!success)
194 |             return cv::Mat_<double>::zeros(6, objPts.size() * 3);
195 | 
196 |         if(j == 0) objPts[i].x -= 2 * eps;
197 |         else if(j == 1) objPts[i].y -= 2 * eps;
198 |         else if(j == 2) objPts[i].z -= 2 * eps;
199 | 
200 |         // backward step
201 |         jp::cv_trans_t bStep;
202 |         success = safeSolvePnP(objPts, imgPts, camMat, cv::Mat(), bStep.first, bStep.second, false, pnpMethod);
203 | 
204 |         if(!success)
205 |             return cv::Mat_<double>::zeros(6, objPts.size() * 3);
206 | 
207 |         if(j == 0) objPts[i].x += eps;
208 |         else if(j == 1) objPts[i].y += eps;
209 |         else if(j == 2) objPts[i].z += eps;
210 | 
211 |         // gradient calculation
212 |         fStep.first = (fStep.first - bStep.first) / (2 * eps);
213 |         fStep.second = (fStep.second - bStep.second) / (2 * eps);
214 | 
215 |         fStep.first.copyTo(jacobean.col(i * 3 + j).rowRange(0, 3));
216 |         fStep.second.copyTo(jacobean.col(i * 3 + j).rowRange(3, 6));
217 | 
218 |         if(containsNaNs(jacobean.col(i * 3 + j)))
219 |             return cv::Mat_<double>::zeros(6, objPts.size() * 3);
220 |     }
221 | 
222 |     return jacobean;
223 | }
224 | 
225 | /**
226 |  * @brief Calculate the average of all matrix entries.
227 |  * @param mat Input matrix.
228 |  * @return Average of entries.
229 |  */
230 | double getAvg(const cv::Mat_<double>& mat)
231 | {
232 |     double avg = 0;
233 |     
234 |     for(unsigned x = 0; x < mat.cols; x++)
235 |     for(unsigned y = 0; y < mat.rows; y++)
236 |     {
237 | 	avg += std::abs(mat(y, x));
238 |     }
239 |     
240 |     return avg / mat.cols / mat.rows;
241 | }
242 | 
243 | /**
244 |  * @brief Return the maximum entry of the given matrix.
245 |  * @param mat Input matrix.
246 |  * @return Maximum entry.
247 |  */
248 | double getMax(const cv::Mat_<double>& mat)
249 | {
250 |     double m = -1;
251 |     
252 |     for(unsigned x = 0; x < mat.cols; x++)
253 |     for(unsigned y = 0; y < mat.rows; y++)
254 |     {
255 | 	double val = std::abs(mat(y, x));
256 | 	if(m < 0 || val > m)
257 | 	  m = val;
258 |     }
259 |     
260 |     return m;
261 | }
262 | 
263 | /**
264 |  * @brief Return the median of all entries of the given matrix.
265 |  * @param mat Input matrix.
266 |  * @return Median entry.
267 |  */
268 | double getMed(const cv::Mat_<double>& mat)
269 | {
270 |     std::vector<double> vals;
271 |     
272 |     for(unsigned x = 0; x < mat.cols; x++)
273 |     for(unsigned y = 0; y < mat.rows; y++)
274 | 	vals.push_back(std::abs(mat(y, x)));
275 | 
276 |     std::sort(vals.begin(), vals.end());
277 |     
278 |     return vals[vals.size() / 2];
279 | }
280 | 
281 | /**
282 |  * @brief Transform an RGB image to a floating point CNN input map.
283 |  *
284 |  * The image will be cropped to CNN input size.
285 |  * In training mode, the input will be randomly shifted by small amounts, depending on the subsampling in the CNN output.
286 |  * The method also creates a sampling map which maps each output of the CNN to a 2D input position in the original RGB image.
287 |  *
288 |  * @param img Input RGB image.
289 |  * @param sampling Map that contains for each position in the CNN output the corresponding position in the RGB input (use to establish 2D-3D correspondences).
290 |  * @param training True for training mode.
291 |  * @return
292 |  */
293 | cv::Mat_<cv::Vec3f> getImgMap(const jp::img_bgr_t& img, cv::Mat_<cv::Point2i>& sampling, bool training)
294 | {
295 |     GlobalProperties* gp = GlobalProperties::getInstance();
296 | 
297 |     int cnnInputW = gp->getCNNInputDimX();
298 |     int cnnInputH = gp->getCNNInputDimY();
299 |     int cnnOutputW = gp->getCNNOutputDimX();
300 |     int cnnOutputH = gp->getCNNOutputDimY();
301 |     int cnnSubSampling = gp->dP.cnnSubSample;
302 | 
303 |     cv::Mat_<cv::Vec3f> imgMap(cnnInputH, cnnInputW);
304 |     sampling = cv::Mat_<cv::Point2i>(cnnOutputH, cnnOutputW);
305 | 
306 |     int offsetX = img.cols - cnnInputW;
307 |     int offsetY = img.rows - cnnInputH;
308 | 
309 |     if(training)
310 |     {
311 |         // random shift
312 |         offsetX = irand(0, offsetX);
313 |         offsetY = irand(0, offsetY);
314 |     }
315 |     else
316 |     {
317 |         // crop at the center
318 |         offsetX /= 2;
319 |         offsetY /= 2;
320 |     }
321 | 
322 |     // crop image
323 |     for(unsigned x = 0; x < cnnInputW; x++)
324 |     for(unsigned y = 0; y < cnnInputH; y++)
325 |     {
326 |         imgMap(y, x) = img(y + offsetY, x + offsetX);
327 |     }
328 | 
329 |     // create sampling map
330 |     for(unsigned x = 0; x < sampling.cols; x++)
331 |     for(unsigned y = 0; y < sampling.rows; y++)
332 |     {
333 |         sampling(y, x) = cv::Point2i(
334 |             offsetX + x * cnnSubSampling + cnnSubSampling / 2,
335 |             offsetY + y * cnnSubSampling + cnnSubSampling / 2);
336 |     }
337 | 
338 |     return imgMap;
339 | }
340 | 
341 | /**
342 |  * @brief Process a RGB image with the object coordinate CNN.
343 |  * @param colorData Input RGB image.
344 |  * @param sampling Output paramter. Subsampling information. Each 2D location contains the pixel location in the original RGB image (needed again for backward pass).
345 |  * @param imgMaps Output parameter. RGB image transformed to CNN input maps (needed again for backward pass).
346 |  * @param training True if training mode (controls cropping if input image).
347 |  * @param state Lua state for access to the object coordinate CNN.
348 |  * @return Object coordinate estimation (sub sampled).
349 |  */
350 | jp::img_coord_t getCoordImg(
351 |     const jp::img_bgr_t& colorData, 
352 |     cv::Mat_<cv::Point2i>& sampling,
353 |     std::vector<cv::Mat_<cv::Vec3f>>& imgMaps,
354 |     bool training,
355 |     lua_State* state)
356 | {
357 |     StopWatch stopW;
358 | 
359 |     imgMaps.resize(1);
360 |     imgMaps[0] = getImgMap(colorData, sampling, training);
361 | 
362 |     // forward pass
363 |     std::vector<cv::Vec3f> prediction = forward(imgMaps, sampling, state);
364 | 
365 |     // reorganize
366 |     jp::img_coord_t modeImg =
367 |         jp::img_coord_t::zeros(sampling.rows, sampling.cols);
368 | 
369 |     for(unsigned i = 0; i < prediction.size(); i++)
370 |     {
371 |    	int x = i % modeImg.cols;
372 | 	int y = i / modeImg.cols;   
373 |       
374 |         modeImg(y, x) = prediction[i];
375 |     }
376 |     
377 |     std::cout << "CNN prediction took " << stopW.stop() / 1000 << "s." << std::endl;
378 |     
379 |     return modeImg;
380 | }
381 | 
382 | /**
383 |  * @brief Calculate an image of reprojection errors for the given object coordinate prediction and the given pose.
384 |  * @param hyp Pose estimate.
385 |  * @param objectCoordinates Object coordinate estimate.
386 |  * @param sampling Subsampling of the input image.
387 |  * @param camMat Calibration matrix of the camera.
388 |  * @return Image of reprojectiob errors.
389 |  */
390 | cv::Mat_<float> getDiffMap(
391 |   const jp::cv_trans_t& hyp,
392 |   const jp::img_coord_t& objectCoordinates,
393 |   const cv::Mat_<cv::Point2i>& sampling,
394 |   const cv::Mat& camMat)
395 | {
396 |     cv::Mat_<float> diffMap(sampling.size());
397 |   
398 |     std::vector<cv::Point3f> points3D;
399 |     std::vector<cv::Point2f> projections;	
400 |     std::vector<cv::Point2f> points2D;
401 |     std::vector<cv::Point2f> sources2D;
402 |     
403 |     // collect 2D-3D correspondences
404 |     for(unsigned x = 0; x < sampling.cols; x++)
405 |     for(unsigned y = 0; y < sampling.rows; y++)
406 |     {
407 |         // get 2D location of the original RGB frame
408 | 	cv::Point2f pt2D(sampling(y, x).x, sampling(y, x).y);
409 | 	
410 |         // get associated 3D object coordinate prediction
411 | 	points3D.push_back(cv::Point3f(
412 | 	    objectCoordinates(y, x)(0), 
413 | 	    objectCoordinates(y, x)(1), 
414 | 	    objectCoordinates(y, x)(2)));
415 | 	points2D.push_back(pt2D);
416 | 	sources2D.push_back(cv::Point2f(x, y));
417 |     }
418 |     
419 |     if(points3D.empty()) return diffMap;
420 | 
421 |     // project object coordinate into the image using the given pose
422 |     cv::projectPoints(points3D, hyp.first, hyp.second, camMat, cv::Mat(), projections);
423 |     
424 |     // measure reprojection errors
425 |     for(unsigned p = 0; p < projections.size(); p++)
426 |     {
427 | 	cv::Point2f curPt = points2D[p] - projections[p];
428 | 	float l = std::min(cv::norm(curPt), CNN_OBJ_MAXINPUT);
429 | 	diffMap(sources2D[p].y, sources2D[p].x) = l;
430 |     }
431 | 
432 |     return diffMap;    
433 | }
434 | 
435 | /**
436 |  * @brief Project a 3D point into the image an measures the reprojection error.
437 |  * @param pt Ground truth 2D location.
438 |  * @param obj 3D point.
439 |  * @param hyp Pose estimate.
440 |  * @param camMat Calibration matrix of the camera.
441 |  * @return Reprojection error in pixels.
442 |  */
443 | float project(const cv::Point2f& pt, const cv::Point3f& obj, const jp::cv_trans_t hyp, const cv::Mat& camMat)
444 | {
445 |     double f = camMat.at<float>(0, 0);
446 |     double ppx = camMat.at<float>(0, 2);
447 |     double ppy = camMat.at<float>(1, 2);
448 |     
449 |     //transform point
450 |     cv::Mat objMat = cv::Mat(obj);
451 |     objMat.convertTo(objMat, CV_64F);
452 |     
453 |     cv::Mat rot;
454 |     cv::Rodrigues(hyp.first, rot);
455 | 
456 |     objMat = rot * objMat + hyp.second;
457 |     
458 |     // project
459 |     double px = f * objMat.at<double>(0, 0) / objMat.at<double>(2, 0) + ppx;
460 |     double py = f * objMat.at<double>(1, 0) / objMat.at<double>(2, 0) + ppy;
461 |     
462 |     //std::cout << "Projected position: " << px << ", " << py << std::endl;
463 |     
464 |     // return error
465 |     return std::min(std::sqrt((pt.x - px) * (pt.x - px) + (pt.y - py) * (pt.y - py)), CNN_OBJ_MAXINPUT);
466 | }
467 | 
468 | /**
469 |  * @brief Calculates the Jacobean of the projection function w.r.t the given 3D point, ie. the function has the form 3 -> 1
470 |  * @param pt Ground truth 2D location.
471 |  * @param obj 3D point.
472 |  * @param hyp Pose estimate.
473 |  * @param camMat Calibration matrix of the camera.
474 |  * @return 1x3 Jacobean matrix of partial derivatives.
475 |  */
476 | cv::Mat_<double> dProjectdObj(const cv::Point2f& pt, const cv::Point3f& obj, const jp::cv_trans_t hyp, const cv::Mat& camMat)
477 | {
478 |     double f = camMat.at<float>(0, 0);
479 |     double ppx = camMat.at<float>(0, 2);
480 |     double ppy = camMat.at<float>(1, 2);
481 |     
482 |     //transform point
483 |     cv::Mat objMat = cv::Mat(obj);
484 |     objMat.convertTo(objMat, CV_64F);
485 | 
486 |     cv::Mat rot;
487 |     cv::Rodrigues(hyp.first, rot);
488 |     
489 |     objMat = rot * objMat + hyp.second;
490 |     
491 |     if(std::abs(objMat.at<double>(2, 0)) < EPS) // prevent division by zero
492 |         return cv::Mat_<double>::zeros(1, 3);
493 | 
494 |     // project
495 |     double px = f * objMat.at<double>(0, 0) / objMat.at<double>(2, 0) + ppx;
496 |     double py = f * objMat.at<double>(1, 0) / objMat.at<double>(2, 0) + ppy;
497 |     
498 |     // calculate error
499 |     double err = std::sqrt((pt.x - px) * (pt.x - px) + (pt.y - py) * (pt.y - py));
500 |     
501 |     // early out if projection error is above threshold
502 |     if(err > CNN_OBJ_MAXINPUT)
503 | 	return cv::Mat_<double>::zeros(1, 3);
504 |     
505 |     err += EPS; // avoid dividing by zero
506 |     
507 |     // derivative in x direction of obj coordinate
508 |     double pxdx = f * rot.at<double>(0, 0) / objMat.at<double>(2, 0) - f * objMat.at<double>(0, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 0);
509 |     double pydx = f * rot.at<double>(1, 0) / objMat.at<double>(2, 0) - f * objMat.at<double>(1, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 0);
510 |     double dx = 0.5 / err * (2 * (pt.x - px) * -pxdx + 2 * (pt.y - py) * -pydx);
511 | 
512 |     // derivative in x direction of obj coordinate
513 |     double pxdy = f * rot.at<double>(0, 1) / objMat.at<double>(2, 0) - f * objMat.at<double>(0, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 1);
514 |     double pydy = f * rot.at<double>(1, 1) / objMat.at<double>(2, 0) - f * objMat.at<double>(1, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 1);
515 |     double dy = 0.5 / err * (2 * (pt.x - px) * -pxdy + 2 * (pt.y - py) * -pydy);
516 |     
517 |     // derivative in x direction of obj coordinate
518 |     double pxdz = f * rot.at<double>(0, 2) / objMat.at<double>(2, 0) - f * objMat.at<double>(0, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 2);
519 |     double pydz = f * rot.at<double>(1, 2) / objMat.at<double>(2, 0) - f * objMat.at<double>(1, 0) / objMat.at<double>(2, 0) / objMat.at<double>(2, 0) * rot.at<double>(2, 2);
520 |     double dz = 0.5 / err * (2 * (pt.x - px) * -pxdz + 2 * (pt.y - py) * -pydz);	
521 |     
522 |     cv::Mat_<double> jacobean(1, 3);
523 |     jacobean(0, 0) = dx;
524 |     jacobean(0, 1) = dy;
525 |     jacobean(0, 2) = dz;
526 |     
527 |     return jacobean;
528 | }
529 | 
530 | /**
531 |  * @brief Calculates the Jacobean of the projection function w.r.t the given 6D pose, ie. the function has the form 6 -> 1
532 |  * @param pt Ground truth 2D location.
533 |  * @param obj 3D point.
534 |  * @param hyp Pose estimate.
535 |  * @param camMat Calibration matrix of the camera.
536 |  * @return 1x6 Jacobean matrix of partial derivatives.
537 |  */
538 | cv::Mat_<double> dProjectdHyp(const cv::Point2f& pt, const cv::Point3f& obj, const jp::cv_trans_t hyp, const cv::Mat& camMat)
539 | {
540 |     double f = camMat.at<float>(0, 0);
541 |     double ppx = camMat.at<float>(0, 2);
542 |     double ppy = camMat.at<float>(1, 2);
543 |     
544 |     //transform point
545 |     cv::Mat objMat = cv::Mat(obj);
546 |     objMat.convertTo(objMat, CV_64F);
547 |     
548 |     cv::Mat rot, dRdH;
549 |     cv::Rodrigues(hyp.first, rot, dRdH);
550 |     dRdH = dRdH.t();
551 | 
552 |     cv::Mat eyeMat = rot * objMat + hyp.second;
553 |     
554 |     if(std::abs(eyeMat.at<double>(2, 0)) < EPS) // prevent division by zero
555 |         return cv::Mat_<double>::zeros(1, 6);
556 | 
557 |     // project
558 |     double px = f * eyeMat.at<double>(0, 0) / eyeMat.at<double>(2, 0) + ppx; // flip x because of reasons (to conform with OpenCV implementation)
559 |     double py = f * eyeMat.at<double>(1, 0) / eyeMat.at<double>(2, 0) + ppy;
560 |     
561 |     // calculate error
562 |     double err = std::sqrt((pt.x - px) * (pt.x - px) + (pt.y - py) * (pt.y - py));
563 | 
564 |     // early out if projection error is above threshold
565 |     if(err > CNN_OBJ_MAXINPUT)
566 | 	return cv::Mat_<double>::zeros(1, 6);
567 |     
568 |     err += EPS; // avoid dividing by zero
569 |     
570 |     // derivative of the error wrt to projection
571 |     cv::Mat_<double> dNdP = cv::Mat_<double>::zeros(1, 2);
572 |     dNdP(0, 0) = -1 / err * (pt.x - px);
573 |     dNdP(0, 1) = -1 / err * (pt.y - py);
574 |     
575 |     // derivative of projection function wrt rotation matrix
576 |     cv::Mat_<double> dPdR = cv::Mat_<double>::zeros(2, 9);
577 |     dPdR.row(0).colRange(0, 3) = f * objMat.t() / eyeMat.at<double>(2, 0);
578 |     dPdR.row(1).colRange(3, 6) = f * objMat.t() / eyeMat.at<double>(2, 0);
579 |     dPdR.row(0).colRange(6, 9) = -f * eyeMat.at<double>(0, 0) / eyeMat.at<double>(2, 0) / eyeMat.at<double>(2, 0) * objMat.t();
580 |     dPdR.row(1).colRange(6, 9) = -f * eyeMat.at<double>(1, 0) / eyeMat.at<double>(2, 0) / eyeMat.at<double>(2, 0) * objMat.t();
581 |         
582 |     // combined derivative of the error wrt the rodriguez vector
583 |     cv::Mat_<double> dNdH = dNdP * dPdR * dRdH;
584 |     
585 |     // derivative of projection wrt the translation vector
586 |     cv::Mat_<double> dPdT = cv::Mat_<double>::zeros(2, 3);
587 |     dPdT(0, 0) = f / eyeMat.at<double>(2, 0);
588 |     dPdT(1, 1) = f / eyeMat.at<double>(2, 0);
589 |     dPdT(0, 2) = -f * eyeMat.at<double>(0, 0) / eyeMat.at<double>(2, 0) / eyeMat.at<double>(2, 0);
590 |     dPdT(1, 2) = -f * eyeMat.at<double>(1, 0) / eyeMat.at<double>(2, 0) / eyeMat.at<double>(2, 0);
591 |     
592 |     // combined derivative of error wrt the translation vector 
593 |     cv::Mat_<double> dNdT = dNdP * dPdT;
594 |     
595 |     cv::Mat_<double> jacobean(1, 6);
596 |     dNdH.copyTo(jacobean.colRange(0, 3));
597 |     dNdT.copyTo(jacobean.colRange(3, 6));
598 |     return jacobean;
599 | }
600 | 
601 | /**
602 |  * @brief Applies soft max to the given list of scores.
603 |  * @param scores List of scores.
604 |  * @return Soft max distribution (sums to 1)
605 |  */
606 | std::vector<double> softMax(const std::vector<double>& scores)
607 | {
608 |     double maxScore = 0;
609 |     for(unsigned i = 0; i < scores.size(); i++)
610 |         if(i == 0 || scores[i] > maxScore) maxScore = scores[i];
611 | 	
612 |     std::vector<double> sf(scores.size());
613 |     double sum = 0.0;
614 |     
615 |     for(unsigned i = 0; i < scores.size(); i++)
616 |     {
617 | 	sf[i] = std::exp(scores[i] - maxScore);
618 | 	sum += sf[i];
619 |     }
620 |     for(unsigned i = 0; i < scores.size(); i++)
621 |     {
622 | 	sf[i] /= sum;
623 | // 	std::cout << "score: " << scores[i] << ", prob: " << sf[i] << std::endl;
624 |     }
625 |     
626 |     return sf;
627 | }
628 | 
629 | /**
630 |  * @brief Calculates the Jacobean matrix of the function that maps n estimated object coordinates to a score, ie. the function has the form n x 3 -> 1. Returns one Jacobean matrix per hypothesis.
631 |  * @param estObj Object coordinate estimation.
632 |  * @param sampling Sub sampling of the RGB image.
633 |  * @param points List of minimal sets. Each one (4 correspondences) defines one hypothesis.
634 |  * @param stateObj Lua state for access to the score CNN.
635 |  * @param jacobeans Output paramter. List of Jacobean matrices. One 1 x 3n matrix per pose hypothesis.
636 |  * @param scoreOutputGradients Gradients w.r.t the score i.e. the gradients of the output of the score CNN.
637 |  */
638 | void dScore(
639 |     jp::img_coord_t estObj,
640 |     const cv::Mat_<cv::Point2i>& sampling,
641 |     const std::vector<std::vector<cv::Point2i>>& points,
642 |     lua_State* stateObj,
643 |     std::vector<cv::Mat_<double>>& jacobeans,
644 |     const std::vector<double>& scoreOutputGradients)
645 | {
646 |     GlobalProperties* gp = GlobalProperties::getInstance();
647 |     cv::Mat_<float> camMat = gp->getCamMat();
648 |   
649 |     int hypCount = points.size();
650 |     
651 |     std::vector<std::vector<cv::Point2f>> imgPts(hypCount);
652 |     std::vector<std::vector<cv::Point3f>> objPts(hypCount);
653 |     std::vector<jp::cv_trans_t> hyps(hypCount);
654 |     std::vector<cv::Mat_<float>> diffMaps(hypCount);
655 |     
656 |     #pragma omp parallel for
657 |     for(unsigned h = 0; h < hypCount; h++)
658 |     {
659 |         for(unsigned i = 0; i < points[h].size(); i++)
660 |         {
661 |             int x = points[h][i].x;
662 |             int y = points[h][i].y;
663 | 	  
664 |             imgPts[h].push_back(sampling(y, x));
665 |             objPts[h].push_back(cv::Point3f(estObj(y, x)));
666 |         }
667 |       
668 |         // calculate hypothesis
669 |         jp::cv_trans_t cvHyp;
670 |         safeSolvePnP(objPts[h], imgPts[h], camMat, cv::Mat(), cvHyp.first, cvHyp.second, false, CV_P3P);
671 |         hyps[h] = cvHyp;
672 | 	    
673 |         // calculate projection errors
674 |         diffMaps[h] = getDiffMap(cvHyp, estObj, sampling, camMat);
675 |     }
676 |     
677 |     std::vector<cv::Mat_<double>> dDiffMaps;
678 |     backward(diffMaps, stateObj, scoreOutputGradients, dDiffMaps);
679 | 
680 |     jacobeans.resize(hypCount);
681 | 
682 |     #pragma omp parallel for
683 |     for(unsigned h = 0; h < hypCount; h++)
684 |     {        
685 |         cv::Mat_<double> jacobean = cv::Mat_<double>::zeros(1, estObj.cols * estObj.rows * 3);
686 |         jacobeans[h] = jacobean;
687 | 
688 |         if(cv::norm(dDiffMaps[h]) < EPS) continue;
689 | 
690 |         // accumulate derivate of score wrt the object coordinates that are used to calculate the pose
691 |         cv::Mat_<double> supportPointGradients = cv::Mat_<double>::zeros(1, 12);
692 | 
693 |         cv::Mat_<double> dHdO = dPNP(imgPts[h], objPts[h]); // 6x12
694 | 
695 |         for(unsigned x = 0; x < dDiffMaps[h].cols; x++)
696 |         for(unsigned y = 0; y < dDiffMaps[h].rows; y++)
697 |         {
698 |             cv::Point2f pt(sampling(y, x).x, sampling(y, x).y);
699 |             cv::Point3f obj(estObj(y, x));
700 | 	  
701 |             // account for the direct influence of all object coordinates in the score
702 |             cv::Mat_<double> dPdO = dProjectdObj(pt, obj, hyps[h], camMat);
703 |             dPdO *= dDiffMaps[h](y, x);
704 |             dPdO.copyTo(jacobean.colRange(x * dDiffMaps[h].rows * 3 + y * 3, x * dDiffMaps[h].rows * 3 + y * 3 + 3));
705 | 	    
706 |             // account for the indirect influence of the object coorindates that are used to calculate the pose
707 |             cv::Mat_<double> dPdH = dProjectdHyp(sampling(y, x), cv::Point3f(estObj(y, x)), hyps[h], camMat);
708 |             supportPointGradients += dDiffMaps[h](y, x) * dPdH * dHdO;
709 |         }
710 | 
711 |         // add the accumulated derivatives for the object coordinates that are used to calculate the pose
712 |         for(unsigned i = 0; i < points[h].size(); i++)
713 |         {
714 |             unsigned x = points[h][i].x;
715 |             unsigned y = points[h][i].y;
716 | 	    
717 |             jacobean.colRange(x * dDiffMaps[h].rows * 3 + y * 3, x * dDiffMaps[h].rows * 3 + y * 3 + 3) += supportPointGradients.colRange(i * 3, i * 3 + 3);
718 |         }
719 |     }
720 | }
721 | 
722 | /**
723 |  * @brief Calculates the Jacobean matrix of the function that maps n estimated object coordinates to a soft max score, ie. the function has the form n x 3 -> 1. Returns one Jacobean matrix per hypothesis.
724 |  *
725 |  * This is the Soft maxed version of dScore (see above).
726 |  *
727 |  * @param estObj Object coordinate estimation.
728 |  * @param sampling Sub sampling of the RGB image.
729 |  * @param points List of minimal sets. Each one (4 correspondences) defines one hypothesis.
730 |  * @param losses Loss measured for the hypotheses given by the points parameter.
731 |  * @param sfScores Soft max probabilities for the hypotheses given by the points parameter.
732 |  * @param stateObj Lua state for access to the score CNN.
733 |  * @return List of Jacobean matrices. One 1 x 3n matrix per pose hypothesis.
734 |  */
735 | std::vector<cv::Mat_<double>> dSMScore(
736 |     jp::img_coord_t estObj,
737 |     const cv::Mat_<cv::Point2i>& sampling,
738 |     const std::vector<std::vector<cv::Point2i>>& points,
739 |     const std::vector<double>& losses,
740 |     const std::vector<double>& sfScores,
741 |     lua_State* stateObj)
742 | {
743 |     // assemble the gradients wrt the scores, ie the gradients of soft max function
744 |     std::vector<double> scoreOutputGradients(points.size());
745 |         
746 |     for(unsigned i = 0; i < points.size(); i++)
747 |     {
748 |         scoreOutputGradients[i] = sfScores[i] * losses[i];
749 |         for(unsigned j = 0; j < points.size(); j++)
750 |             scoreOutputGradients[i] -= sfScores[i] * sfScores[j] * losses[j];
751 |     }
752 |  
753 |     // calculate gradients of the score function
754 |     std::vector<cv::Mat_<double>> jacobeans;
755 |     dScore(estObj, sampling, points, stateObj, jacobeans, scoreOutputGradients);
756 | 
757 |     // data conversion
758 |     for(unsigned i = 0; i < jacobeans.size(); i++)
759 |     {
760 |         // reorder to points row first into rows
761 |         cv::Mat_<double> reformat(estObj.cols * estObj.rows, 3);
762 | 	
763 |         for(unsigned x = 0; x < estObj.cols; x++)
764 |         for(unsigned y = 0; y < estObj.rows; y++)
765 |         {
766 |             cv::Mat_<double> patchGrad = jacobeans[i].colRange(
767 |               x * estObj.rows * 3 + y * 3,
768 |               x * estObj.rows * 3 + y * 3 + 3);
769 | 	    
770 |             patchGrad.copyTo(reformat.row(y * estObj.cols + x));
771 |         }
772 | 	
773 |         jacobeans[i] = reformat;
774 |     }
775 |     
776 |     return jacobeans;
777 | }
778 | 
779 | /**
780 |  * @brief Processes a frame, ie. takes object coordinates, estimates poses, selects the best one and measures the error.
781 |  *
782 |  * This function performs the forward pass of DSAC but also calculates many intermediate results
783 |  * for the backward pass (ie it can be made faster if one cares only about the forward pass).
784 |  *
785 |  * @param poseGT Ground truth pose (for evaluation only).
786 |  * @param stateObj Lua state for access to the score CNN.
787 |  * @param objHyps Number of hypotheses to be drawn.
788 |  * @param camMat Calibration parameters of the camera.
789 |  * @param inlierThreshold2D Inlier threshold in pixels.
790 |  * @param refSteps Max. refinement steps (iterations).
791 |  * @param expectedLoss Output paramter. Expectation of loss of the discrete hypothesis distributions.
792 |  * @param sfEntropy Output parameter. Shannon entropy of the soft max distribution of hypotheses.
793 |  * @param correct Output parameter. Was the final, selected hypothesis correct?
794 |  * @param refHyps Output parameter. List of refined hypotheses sampled for the given image.
795 |  * @param sfScores Output parameter. Soft max distribution for the sampled hypotheses.
796 |  * @param estObj Output parameter. Estimated object coordinates (subsampling of the complete image).
797 |  * @param sampling Output parameter. Subsampling of the RGB image.
798 |  * @param sampledPoints Output parameter. List of initial 2D pixel locations of the subsampled input RGB image. 4 pixels per hypothesis.
799 |  * @param losses Output parameter. List of losses of the sampled hypotheses.
800 |  * @param inlierMaps Output parameter. Maps indicating which pixels of the subsampled input image have been inliers in the last step of hypothesis refinement, one map per hypothesis.
801 |  * @param tErr Output parameter. Translational (in m) error of the final, selected hypothesis.
802 |  * @param rotErr Output parameter. Rotational error of the final, selected hypothesis.
803 |  * @param hypIdx Output parameter. Index of the final, selected hypothesis.
804 |  * @param training True if training mode. Controls whether all hypotheses are refined or just the selected one.
805 |  */
806 | void processImage(
807 |     const jp::cv_trans_t& hypGT,
808 |     lua_State* stateObj,
809 |     int objHyps,
810 |     const cv::Mat& camMat,
811 |     int inlierThreshold2D,
812 |     int refSteps,
813 |     double& expectedLoss,
814 |     double& sfEntropy,
815 |     bool& correct,
816 |     std::vector<jp::cv_trans_t>& refHyps,
817 |     std::vector<double>& sfScores,
818 |     const jp::img_coord_t& estObj,
819 |     const cv::Mat_<cv::Point2i>& sampling,
820 |     std::vector<std::vector<cv::Point2i>>& sampledPoints,
821 |     std::vector<double>& losses,
822 |     std::vector<cv::Mat_<int>>& inlierMaps,
823 |     double& tErr,
824 |     double& rotErr,
825 |     int& hypIdx,
826 |     bool training = true)
827 | {
828 |     std::cout << BLUETEXT("Sampling " << objHyps << " hypotheses.") << std::endl;
829 |     StopWatch stopW;
830 | 
831 |     sampledPoints.resize(objHyps);    // keep track of the points each hypothesis is sampled from
832 |     refHyps.resize(objHyps);
833 |     std::vector<std::vector<cv::Point2f>> imgPts(objHyps);
834 |     std::vector<std::vector<cv::Point3f>> objPts(objHyps);
835 | 
836 |     // sample hypotheses
837 |     #pragma omp parallel for
838 |     for(unsigned h = 0; h < refHyps.size(); h++)
839 |     while(true)
840 |     {
841 | 	std::vector<cv::Point2f> projections;
842 | 	cv::Mat_<uchar> alreadyChosen = cv::Mat_<uchar>::zeros(estObj.size());
843 | 	imgPts[h].clear();
844 |         objPts[h].clear();
845 | 	sampledPoints[h].clear();
846 | 
847 |         for(int j = 0; j < 4; j++)
848 | 	{
849 |             // 2D location in the subsampled image
850 | 	    int x = irand(0, estObj.cols);
851 | 	    int y = irand(0, estObj.rows);
852 | 	    
853 | 	    if(alreadyChosen(y, x) > 0)
854 | 	    {
855 |                 j--;
856 |                 continue;
857 | 	    }
858 | 	    
859 | 	    alreadyChosen(y, x) = 1;
860 | 	    
861 |             imgPts[h].push_back(sampling(y, x)); // 2D location in the original RGB image
862 |             objPts[h].push_back(cv::Point3f(estObj(y, x))); // 3D object coordinate
863 |             sampledPoints[h].push_back(cv::Point2i(x, y)); // 2D pixel location in the subsampled image
864 | 	}
865 | 
866 |         if(!safeSolvePnP(objPts[h], imgPts[h], camMat, cv::Mat(), refHyps[h].first, refHyps[h].second, false, CV_P3P))
867 |         {
868 |             continue;
869 |         }
870 | 
871 |         cv::projectPoints(objPts[h], refHyps[h].first, refHyps[h].second, camMat, cv::Mat(), projections);
872 | 
873 |         // check reconstruction, 4 sampled points should be reconstructed perfectly
874 |         bool foundOutlier = false;
875 |         for(unsigned j = 0; j < imgPts[h].size(); j++)
876 |         {
877 |             if(cv::norm(imgPts[h][j] - projections[j]) < inlierThreshold2D)
878 |                 continue;
879 |             foundOutlier = true;
880 |             break;
881 |         }
882 |         if(foundOutlier)
883 |             continue;
884 |         else
885 |             break;
886 |     }	
887 | 
888 |     std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;	
889 |     std::cout << BLUETEXT("Calculating scores.") << std::endl;
890 | 
891 |     // compute reprojection error images
892 |     std::vector<cv::Mat_<float>> diffMaps(objHyps);
893 |     #pragma omp parallel for 
894 |     for(unsigned h = 0; h < refHyps.size(); h++)
895 |         diffMaps[h] = getDiffMap(refHyps[h], estObj, sampling, camMat);
896 | 
897 |     // execute score script to get hypothesis scores
898 |     std::vector<double> scores = forward(diffMaps, stateObj);
899 |     
900 |     std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;	
901 |     std::cout << BLUETEXT("Drawing final Hypothesis.") << std::endl;	
902 |     
903 |     // apply soft max to scores to get a distribution
904 |     sfScores = softMax(scores);
905 |     sfEntropy = entropy(sfScores); // measure distribution entropy
906 |     hypIdx = draw(sfScores); // select winning hypothesis
907 | 
908 |     std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;	
909 |     std::cout << BLUETEXT("Refining poses:") << std::endl;
910 |     
911 |     // collect inliers
912 |     inlierMaps.resize(refHyps.size());
913 |     
914 |     double convergenceThresh = 0.01; // stop refinement if 6D pose vector converges
915 | 
916 |     #pragma omp parallel for
917 |     for(unsigned h = 0; h < refHyps.size(); h++)
918 |     {
919 |         if(!training && hypIdx != h)
920 |             continue; // in test mode only refine selected hypothesis
921 | 
922 |         cv::Mat_<float> localDiffMap = diffMaps[h];
923 | 
924 |         // refine current hypothesis
925 | 	for(unsigned rStep = 0; rStep < refSteps; rStep++)
926 |         {
927 |             // collect inliers
928 | 	    std::vector<cv::Point2f> localImgPts;
929 | 	    std::vector<cv::Point3f> localObjPts; 
930 |             cv::Mat_<int> localInlierMap = cv::Mat_<int>::zeros(diffMaps[h].size());
931 | 	    
932 |             for(unsigned x = 0; x < localDiffMap.cols; x++)
933 |             for(unsigned y = 0; y < localDiffMap.rows; y++)
934 | 	    {
935 |     		if(localDiffMap(y, x) < inlierThreshold2D)
936 | 		{
937 | 		    localImgPts.push_back(sampling(y, x));
938 | 		    localObjPts.push_back(cv::Point3f(estObj(y, x)));
939 |                     localInlierMap(y, x) = 1;
940 | 		}
941 |             }
942 | 
943 |             if(localImgPts.size() < 4)
944 |                 break;
945 | 
946 |             // recalculate pose
947 | 	    jp::cv_trans_t hypUpdate;
948 | 	    hypUpdate.first = refHyps[h].first.clone();
949 | 	    hypUpdate.second = refHyps[h].second.clone();
950 | 
951 |             if(!safeSolvePnP(localObjPts, localImgPts, camMat, cv::Mat(), hypUpdate.first, hypUpdate.second, true, (localImgPts.size() > 4) ? CV_ITERATIVE : CV_P3P))
952 |                 break; //abort if PnP fails
953 | 
954 |             if(maxLoss(hypUpdate, refHyps[h]) < convergenceThresh)
955 |                 break; // convergned
956 | 
957 | 	    refHyps[h] = hypUpdate;
958 |             inlierMaps[h] = localInlierMap;
959 | 
960 | 	    // recalculate pose errors
961 | 	    localDiffMap = getDiffMap(refHyps[h], estObj, sampling, camMat);
962 | 	}
963 |     }
964 | 
965 |     std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
966 |     std::cout << BLUETEXT("Final Result:") << std::endl;
967 |     
968 |     // evaluated poses
969 |     expectedLoss = expectedMaxLoss(hypGT, refHyps, sfScores, losses);
970 |     std::cout << "Loss of winning hyp: " << maxLoss(hypGT, refHyps[hypIdx]) << ", prob: " << sfScores[hypIdx] << ", expected loss: " << expectedLoss << std::endl;
971 | 
972 |     // we measure error of inverted poses (because we estimate scene poses, not camera poses)
973 |     jp::cv_trans_t invHypGT = getInvHyp(hypGT);
974 |     jp::cv_trans_t invHypEst = getInvHyp(refHyps[hypIdx]);
975 | 
976 |     rotErr = calcAngularDistance(invHypGT, invHypEst);
977 |     tErr = cv::norm(invHypEst.second - invHypGT.second);
978 | 
979 |     correct = false;
980 |     if(rotErr < 5 && tErr < 0.05)
981 |     {
982 |         std::cout << GREENTEXT("Rotation Err: " << rotErr << "deg, Translation Err: " << tErr * 100 << "cm") << std::endl << std::endl;
983 |         correct = true;
984 |     }
985 |     else
986 |         std::cout << REDTEXT("Rotation Err: " << rotErr << "deg, Translation Err: " << tErr * 100 << "cm") << std::endl << std::endl;
987 | }
988 | 


--------------------------------------------------------------------------------
/code/dataset.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include "properties.h"
 33 | #include "util.h"
 34 | #include "read_data.h"
 35 | #include <stdexcept>
 36 | 
 37 | namespace jp
 38 | {
 39 |     /**
 40 |      * @brief Calculate the camera coordinate given a pixel position and a depth value.
 41 |      *
 42 |      * @param x X component of the pixel position.
 43 |      * @param y Y component of the pixel position.
 44 |      * @param depth Depth value at that position in mm.
 45 |      * @return jp::coord3_t Camera coordinate.
 46 |      */
 47 |     inline cv::Mat_<double> pxToEye(double x, double y, double depth)
 48 |     {
 49 |         cv::Mat_<double> eye = cv::Mat_<double>::zeros(3, 1);
 50 | 
 51 |         if(depth == 0)
 52 |             return eye;
 53 | 
 54 |         GlobalProperties* gp = GlobalProperties::getInstance();
 55 | 
 56 |         eye(0, 0) = ((x - (gp->dP.imageWidth / 2.0 + gp->dP.xShift)) / (gp->dP.focalLength / depth));
 57 |         eye(1, 0) = ((y - (gp->dP.imageHeight / 2.0 + gp->dP.yShift)) / (gp->dP.focalLength / depth));
 58 |         eye(2, 0) = (jp::coord1_t) depth;
 59 | 
 60 |         return eye;
 61 |     }
 62 | 
 63 |     /**
 64 |      * @brief Class that is a interface for reading and writing object specific data.
 65 |      *
 66 |      */
 67 |     class Dataset
 68 |     {
 69 |         public:
 70 | 
 71 |         Dataset()
 72 |         {
 73 |         }
 74 |       
 75 |         /**
 76 |          * @brief Constructor.
 77 |          *
 78 |          * @param basePath The directory with subdirectories "rgb", "depth" and "poses".
 79 |          */
 80 |         Dataset(const std::string& basePath)
 81 | 	{
 82 |             readFileNames(basePath);
 83 | 	}
 84 | 
 85 |         /**
 86 |          * @brief Size of the dataset (number of frames).
 87 |          *
 88 |          * @return size_t Size.
 89 |          */
 90 | 	size_t size() const 
 91 | 	{ 
 92 |             return bgrFiles.size();
 93 | 	}
 94 | 
 95 |         /**
 96 |          * @brief Return the RGB image file name of the given frame number.
 97 |          *
 98 |          * @param i Frame number.
 99 |          * @return std::string File name.
100 |          */
101 | 	std::string getFileName(size_t i) const
102 | 	{
103 |             return bgrFiles[i];
104 | 	}
105 | 	
106 |         /**
107 |          * @brief Get ground truth pose for the given frame.
108 |          *
109 |          * @param i Frame number.
110 |          * @return bool Returns false if there is no valid ground truth for this frame.
111 |          */
112 |         bool getPose(size_t i, jp::cv_trans_t& pose) const
113 | 	{
114 |             if(infoFiles.empty())
115 |                 return false;
116 |             if(!readData(infoFiles[i], pose))
117 |                 return false;
118 | 	    return true;
119 | 	}	
120 | 	
121 |         /**
122 |          * @brief Get the RGB image of the given frame.
123 |          *
124 |          * @param i Frame number.
125 |          * @param img Output parameter. RGB image.
126 |          * @return void
127 |          */
128 |         void getBGR(size_t i, jp::img_bgr_t& img) const
129 | 	{
130 |             std::string bgrFile = bgrFiles[i];
131 | 	    
132 | 	    readData(bgrFile, img);
133 | 
134 |             GlobalProperties* gp = GlobalProperties::getInstance();
135 | 
136 |             int imgWidth = gp->dP.imageWidth;
137 |             int imgHeight = gp->dP.imageHeight;
138 |             int imgPadding = gp->dP.imgPadding;
139 | 
140 |             // add zero padding to image for random shifting in training mode
141 |             int realImgWidth = gp->getCNNInputDimX();
142 |             int realImgHeight = gp->getCNNInputDimY();
143 | 
144 |             // rescale input image
145 |             if((img.cols != realImgWidth) || (img.rows != realImgHeight))
146 |             cv::resize(img, img, cv::Size(realImgWidth, realImgHeight));
147 | 
148 |             jp::img_bgr_t imgPadded = jp::img_bgr_t::zeros(imgHeight, imgWidth);
149 |             img.copyTo(imgPadded.colRange(imgPadding, imgPadding + img.cols).rowRange(imgPadding, imgPadding + img.rows));
150 |             img = imgPadded;
151 | 	}
152 | 
153 |         /**
154 |          * @brief Get the depth image of the given frame.
155 |          *
156 |          * If RGB and Depth are not registered (rawData flag in properties.h), Depth will be
157 |          * mapped to RGB using calibration parameters and the external sensor transformation matrix.
158 |          * If the constD paramters has a positive value, the depth channel will be filled with this value.
159 |          *
160 |          * @param i Frame number.
161 |          * @param img Output parameter. depth image.
162 |          * @return void
163 |          */
164 |         void getDepth(size_t i, jp::img_depth_t& img) const
165 | 	{
166 |             if(GlobalProperties::getInstance()->dP.constD > 0)
167 |             {
168 |                 // return constant depth channel
169 |                 img = jp::img_depth_t::ones(
170 |                     GlobalProperties::getInstance()->dP.imageHeight,
171 |                     GlobalProperties::getInstance()->dP.imageWidth);
172 | 
173 |                 img *= GlobalProperties::getInstance()->dP.constD * 1000;
174 |             }
175 |             else
176 |             {
177 |                 std::string dFile = depthFiles[i];
178 | 
179 |                 readData(dFile, img);
180 | 
181 |                 // zero pad image for random shifting in training mode
182 |                 int imgPadding = GlobalProperties::getInstance()->dP.imgPadding;
183 |                 jp::img_depth_t imgPadded = jp::img_depth_t::zeros(img.rows + 2 * imgPadding, img.cols + 2 * imgPadding);
184 |                 img.copyTo(imgPadded.colRange(imgPadding, imgPadding + img.cols).rowRange(imgPadding, imgPadding + img.rows));
185 |                 img = imgPadded;
186 |             }
187 | 	}
188 | 
189 |         /**
190 |          * @brief Get the RGB-D image of the given frame.
191 |          *
192 |          * @param i Frame number.
193 |          * @param img Output parameter. RGB-D image.
194 |          * @return void
195 |          */
196 |         void getBGRD(size_t i, jp::img_bgrd_t& img) const
197 | 	{
198 |             getBGR(i, img.bgr);
199 |             getDepth(i, img.depth);
200 | 	}
201 | 
202 |         /**
203 |          * @brief Get the ground truth object coordinate image of the given frame.
204 |          *
205 |          * Object coordinates will be generated from image depth and the ground truth pose.
206 |          *
207 |          * @param i Frame number.
208 |          * @param img Output parameter. Object coordinate image.
209 |          * @return void
210 |          */
211 | 	void getObj(size_t i, jp::img_coord_t& img) const
212 | 	{
213 |             jp::img_depth_t depthData;
214 |             getDepth(i, depthData);
215 | 
216 |             // get ground truth pose
217 |             jp::cv_trans_t poseData;
218 |             getPose(i, poseData);
219 | 
220 |             cv::Mat rot;
221 |             cv::Rodrigues(poseData.first, rot);
222 | 
223 |             img = jp::img_coord_t(depthData.rows, depthData.cols);
224 | 
225 |             #pragma omp parallel for
226 |             for(unsigned x = 0; x < img.cols; x++)
227 |             for(unsigned y = 0; y < img.rows; y++)
228 |             {
229 |                 if(depthData(y, x) == 0)
230 |                 {
231 |                     img(y, x) = jp::coord3_t(0, 0, 0);
232 |                     continue;
233 |                 }
234 | 
235 |                 // transform depth to camera coordinate
236 |                 cv::Mat_<double> eye = pxToEye(x, y, depthData(y, x) / 1000.0);
237 | 
238 |                 // transform camera coordinte to object coordinate
239 |                 eye = eye - poseData.second;
240 |                 eye = rot.t() * eye;
241 | 
242 |                 img(y, x) = jp::coord3_t(eye(0, 0), eye(1, 0), eye(2, 0));
243 |             }
244 | 	}
245 | 
246 |         private:
247 | 
248 |         /**
249 |          * @brief Reads all file names in the various sub-folders of a dataset.
250 |          *
251 |          * @param basePath Folder where all data sub folders lie.
252 |          * @return void
253 |          */
254 | 	void readFileNames(const std::string& basePath)
255 | 	{
256 |             std::cout << "Reading file names... " << std::endl;
257 |             std::string bgrPath = "/rgb/", bgrSuf = ".png";
258 |             std::string dPath = "/depth/", dSuf = ".png";
259 |             std::string infoPath = "/poses/", infoSuf = ".txt";
260 | 
261 |             bgrFiles = getFiles(basePath + bgrPath, bgrSuf, true);
262 |             if(bgrFiles.empty())
263 |                 bgrFiles = getFiles(basePath + bgrPath, ".jpg");
264 |                 depthFiles = getFiles(basePath + dPath, dSuf);
265 |             if(depthFiles.empty())
266 |                 depthFiles = getFiles(basePath + dPath, ".tiff");
267 |             infoFiles = getFiles(basePath + infoPath, infoSuf, true);
268 | 
269 |             // optional subsampling of images
270 |             std::vector<std::string> bgrFilesTemp;
271 |             std::vector<std::string> depthFilesTemp;
272 |             std::vector<std::string> infoFilesTemp;
273 | 
274 |             for(unsigned i = 0; i < bgrFiles.size(); i+=GlobalProperties::getInstance()->dP.imageSubSample)
275 |             {
276 |                 if(!bgrFiles.empty()) bgrFilesTemp.push_back(bgrFiles[i]);
277 |                 if(!depthFiles.empty()) depthFilesTemp.push_back(depthFiles[i]);
278 |                 if(!infoFiles.empty()) infoFilesTemp.push_back(infoFiles[i]);
279 |             }
280 | 
281 |             bgrFiles = bgrFilesTemp;
282 |             depthFiles = depthFilesTemp;
283 |             infoFiles = infoFilesTemp;
284 | 	}
285 | 	
286 |         // image data files
287 | 	std::vector<std::string> bgrFiles;
288 |         std::vector<std::string> depthFiles;
289 |         // groundtruth data files
290 |         std::vector<std::string> infoFiles;
291 |     };
292 | }
293 | 


--------------------------------------------------------------------------------
/code/generic_io.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include <fstream>
 33 | 
 34 | /** Methods for reading and writing base and complex types to binary files. 
 35 |  * Everything is heavily overloaded. You can call these methods for any 
 36 |  * supported complex type and it will recursively deconstruct it. */
 37 | 
 38 | namespace jp
 39 | {
 40 |     /**
 41 |      * @brief Write basic types (double, float, etc).
 42 |      * 
 43 |      * @param file Binary file to write to.
 44 |      * @param b Value to write.
 45 |      * @return void
 46 |      */
 47 |     template<class T>
 48 |     void write(std::ofstream& file, const T& b)
 49 |     {
 50 | 	file.write((char*) &b, sizeof(T));
 51 |     }
 52 |     
 53 |     /**
 54 |      * @brief Read basic types (double, float, etc).
 55 |      * 
 56 |      * @param file Binary file to read  from.
 57 |      * @param b Value to read.
 58 |      * @return void
 59 |      */
 60 |     template<class T>
 61 |     void read(std::ifstream& file, T& b)
 62 |     {
 63 | 	file.read(reinterpret_cast<char*>(&b), sizeof(T));
 64 |     }
 65 | 
 66 |     /**
 67 |      * @brief Write vectors.
 68 |      * 
 69 |      * @param file Binary file to write to.
 70 |      * @param v Vector to write.
 71 |      * @return void
 72 |      */
 73 |     template<class T>
 74 |     void write(std::ofstream& file, const std::vector<T>& v)
 75 |     {
 76 | 	write<unsigned>(file, v.size());
 77 | 	for(unsigned i = 0; i < v.size(); i++)
 78 | 	    write(file, v[i]);
 79 |     }
 80 |     
 81 |     /**
 82 |      * @brief Read vectors.
 83 |      * 
 84 |      * @param file Binary file to read  from.
 85 |      * @param v Vector to read.
 86 |      * @return void
 87 |      */
 88 |     template<class T>
 89 |     void read(std::ifstream& file, std::vector<T>& v)
 90 |     {
 91 | 	unsigned size;
 92 | 	read<unsigned>(file, size);
 93 | 	v.resize(size);
 94 | 	for(unsigned i = 0; i < v.size(); i++)
 95 | 	{
 96 | 	    read(file, v[i]);
 97 | 	}
 98 |     }
 99 | 
100 |     /**
101 |      * @brief Write maps.
102 |      * 
103 |      * @param file Binary file to write to.
104 |      * @param m Map to write.
105 |      * @return void
106 |      */
107 |     template<class T1, class T2>
108 |     void write(std::ofstream& file, const std::map<T1, T2>& m)
109 |     {
110 | 	write<unsigned>(file, m.size());
111 | 	for(typename std::map<T1, T2>::const_iterator it = m.begin(); it != m.end(); it++)
112 | 	{
113 | 	    write(file, it->first);
114 | 	    write(file, it->second);
115 | 	}
116 |     }
117 | 
118 |     /**
119 |      * @brief Read maps.
120 |      * 
121 |      * @param file Binary file to read  from.
122 |      * @param m Map to read.
123 |      * @return void
124 |      */
125 |     template<class T1, class T2>
126 |     void read(std::ifstream& file, std::map<T1, T2>& m)
127 |     {
128 | 	unsigned size;
129 | 	T1 key;
130 | 	T2 value;
131 | 	read<unsigned>(file, size);
132 | 	for(unsigned i = 0; i < size; i++)
133 | 	{
134 | 	    read(file, key);
135 | 	    read(file, value);
136 | 	    m[key] = value;
137 | 	}
138 |     }    
139 |     
140 |     /**
141 |      * @brief Write OpenCV matrices.
142 |      * 
143 |      * @param file Binary file to write to.
144 |      * @param m Matrix to write.
145 |      * @return void
146 |      */
147 |     template<class T>
148 |     void write(std::ofstream& file, const cv::Mat_<T>& m)
149 |     {
150 | 	write<int>(file, m.rows);
151 | 	write<int>(file, m.cols);      
152 | 	for(unsigned i = 0; i < m.rows; i++)
153 | 	for(unsigned j = 0; j < m.cols; j++)
154 | 	    write(file, m(i, j));
155 |     }
156 |     
157 |     /**
158 |      * @brief Read OpenCV matrices.
159 |      * 
160 |      * @param file Binary file to read  from.
161 |      * @param m Matrix to read.
162 |      * @return void
163 |      */
164 |     template<class T>
165 |     void read(std::ifstream& file, cv::Mat_<T>& m)
166 |     {
167 | 	int rows, cols;
168 | 	read<int>(file, rows);
169 | 	read<int>(file, cols);
170 | 	m = cv::Mat_<T>(rows, cols);
171 | 	for(unsigned i = 0; i < rows; i++)
172 | 	for(unsigned j = 0; j < cols; j++)
173 | 	    read(file, m(i, j));
174 |     }
175 |     
176 |     /**
177 |      * @brief Write OpenCV vectors.
178 |      * 
179 |      * @param file Binary file to write to.
180 |      * @param v Vector to write.
181 |      * @return void
182 |      */
183 |     template<class T, int dim>
184 |     void write(std::ofstream& file, const cv::Vec<T, dim>& v)
185 |     {
186 | 	for(unsigned i = 0; i < dim; i++)
187 | 	    write(file, v[i]);
188 |     }
189 |     
190 |     /**
191 |      * @brief Read OpenCV vectors.
192 |      * 
193 |      * @param file Binary file to read  from.
194 |      * @param v Vector to read.
195 |      * @return void
196 |      */
197 |     template<class T, int dim>
198 |     void read(std::ifstream& file, const cv::Vec<T, dim>& v)
199 |     {
200 | 	for(unsigned i = 0; i < dim; i++)
201 | 	    read(file, v[i]);
202 |     }
203 |     
204 |     /**
205 |      * @brief Creates a binary file from the given file name and write the given value. Close the file afterwards.
206 |      * 
207 |      * @param file File name of the file to create.
208 |      * @param b Value to write.
209 |      * @return void
210 |      */
211 |     template<class T>
212 |     void write(std::string& fileName, const T& b)
213 |     {
214 |         std::ofstream file;
215 | 	file.open(fileName, std::ofstream::binary);  
216 | 	jp::write(file, b);
217 | 	file.close();
218 |     }
219 | }
220 | 


--------------------------------------------------------------------------------
/code/lua/Entropy.lua:
--------------------------------------------------------------------------------
 1 | -- TAKEN FROM https://github.com/davidBelanger/torch-util/blob/master/Entropy.lua
 2 | 
 3 | local Entropy, parent = torch.class('nn.Entropy', 'nn.Module')
 4 | 
 5 | local eps = 1e-12
 6 | 
 7 | 
 8 | --This doesn't assume that each element of input is a single bernoulli probability
 9 | --instead, it assumes that each row indexes a distribution. e.g., each row is for a minibatch element. it returns the entropy of each row.
10 | 
11 | --todo: pass it some flag if you're treating the whole input tensor as a single distribution
12 | function Entropy:__init()
13 |    parent.__init(self)
14 | end
15 | 
16 | function Entropy:updateOutput(input)
17 |    -- -log(input) * input (and sum over all but the minibatch dimension)
18 |    self.term1 = self.term1 or input.new()
19 |    
20 |    self.term1:resizeAs(input)
21 |   
22 |    self.term1:copy(input):add(eps):log()
23 |    self.term1:cmul(input)
24 | 
25 |    if(input:dim() == 1) then
26 |        self.output:resize(1)
27 |        self.output[1] = -self.term1:sum()
28 |    else
29 |       local sizePerBatchElement = input:nElement()/input:size(1)
30 |       self.output = self.term1:reshape(input:size(1),sizePerBatchElement):sum(2):mul(-1.0)
31 |    end
32 |    return self.output
33 | end
34 | 
35 | function Entropy:updateGradInput(input,gradOutput)
36 |    --  d = -(1 + log(x))   
37 |    local d = gradOutput:dim()
38 |    assert(d == 1 or (d == 2 and gradOutput:size(2) == 1))
39 | 
40 |    self.term2 = self.term2 or input.new()
41 |    self.term2:resizeAs(gradOutput)
42 |    self.term2:copy(gradOutput)
43 | 
44 |    --the next 4 lines add a bunch of singleton dimensions, which is necessary for the later call to expandAs()
45 |    local s = input:size()
46 |    s[1] = input:size(1)
47 |    for i = 2,s:size() do s[i] = 1 end
48 |    
49 |    self.gradInput:resizeAs(input)
50 | --   self.gradInput:copy(input):add(eps):log():add(1.0):mul(-1.0):cmul(self.term2:reshape(s):expandAs(input))
51 | 
52 |    self.term2 = torch.expand(self.term2, self.gradInput:size(1))
53 |    self.gradInput:copy(input):add(eps):log():add(1.0):mul(-1.0):cmul(self.term2)
54 | 
55 |    return self.gradInput
56 | end
57 | 
58 | 


--------------------------------------------------------------------------------
/code/lua/MyL1Criterion.lua:
--------------------------------------------------------------------------------
 1 | MyL1Criterion, parent = torch.class('nn.MyL1Criterion', 'nn.Criterion')
 2 | 
 3 | function MyL1Criterion:__init()
 4 |    parent.__init(self)
 5 | end
 6 | 
 7 | function MyL1Criterion:updateOutput(input, target)
 8 |    -- loss is the Euclidean distance between predicted and ground truth coordinate, mean calculated over batch
 9 |    self.output = torch.mean(torch.norm(input - target, 2, 2))
10 |    return self.output
11 | end
12 | 
13 | function MyL1Criterion:updateGradInput(input, target)
14 |    -- gradients are the difference of predicted and ground truth coordinate divided (scaled) by the Euclidean distance
15 |    local dists = torch.norm(input - target, 2, 2)
16 |    dists = torch.expand(dists, dists:size(1), 3)
17 |    self.gradInput = torch.cdiv(input-target,dists)
18 |    self.gradInput = torch.div(self.gradInput, dists:size(1))
19 |    return self.gradInput
20 | end
21 | 


--------------------------------------------------------------------------------
/code/lua/score_incount_ec6.lua:
--------------------------------------------------------------------------------
  1 | require "nn"
  2 | require "cunn"
  3 | require 'optim'
  4 | require 'cudnn'
  5 | 
  6 | -- general parameters
  7 | storeCounter = 0
  8 | lrInitE2E = 0.001
  9 | 
 10 | dofile('Entropy.lua')
 11 | 
 12 | function loadModel(t, outW, outH)
 13 | 
 14 |   outputWidth = outW
 15 |   outputHeight = outH
 16 | 
 17 |   print('TORCH: Loading Score.')
 18 | 
 19 |   -- position and softness of inlier threshold
 20 |   inlierThresh = t
 21 |   inlierSoft = 0.5 -- sigmoid softness (beta)
 22 |   etarget = 6 -- target entropy
 23 | 
 24 |   print('TORCH: Inlier threshold: ' .. inlierThresh)
 25 |   print('TORCH: Target entropy: ' .. etarget)
 26 | 
 27 |   model = nn.Sequential()
 28 | 
 29 |   -- apply inlier threshold (non-learnable)
 30 |   model:add(nn.AddConstant(-inlierThresh))
 31 |   model:add(nn.MulConstant(inlierSoft))
 32 | 
 33 |   model:add(nn.Sigmoid())
 34 | 
 35 |   -- inliers is 1 - sigmoid()
 36 |   model:add(nn.MulConstant(-1))
 37 |   model:add(nn.AddConstant(1))
 38 | 
 39 |   model:add(nn.Sum(3)) 
 40 |   model:add(nn.Sum(3)) 
 41 | 
 42 |   model:add(nn.Mul())
 43 | 
 44 |   model = model:cuda()
 45 |   cudnn.convert(model, cudnn)
 46 | 
 47 |   model:evaluate()
 48 | 
 49 |   params, gradParams = model:getParameters()
 50 |   optimState = {learningRate = lrInitE2E}
 51 | 
 52 |   params[{1}] = 0.1
 53 |   gradParams:zero()
 54 | 
 55 |   criterion = nn.AbsCriterion()
 56 |   criterion = criterion:cuda()
 57 | 
 58 |   -- stuff for entropy controlled training
 59 |   emodel = nn.Sequential()
 60 |   emodel:add(nn.SoftMax())
 61 |   emodel:add(nn.Entropy())
 62 |   emodel:add(nn.MulConstant(1/math.log(2)))
 63 | 
 64 |   emodel = emodel:cuda()
 65 |   cudnn.convert(emodel, cudnn)
 66 | 
 67 |   ecriterion = nn.MSECriterion()
 68 |   ecriterion = ecriterion:cuda()
 69 | 
 70 |   model2 = model:clone()
 71 |   params2, gradParams2 = model2:getParameters()
 72 | end
 73 | 
 74 | function setEvaluate()
 75 |     model:evaluate()
 76 |     print('TORCH: Set score to evaluation mode.')
 77 | end
 78 | 
 79 | function setTraining()
 80 |     model:training()
 81 |     print('TORCH: Set score to training mode.')
 82 | end
 83 | 
 84 | function forward(count, data)
 85 |   print('TORCH: Doing a forward pass for ' .. count .. ' images.')
 86 |   local input = torch.FloatTensor(data):reshape(count, 1, outputHeight, outputWidth);
 87 |   input = input:cuda()
 88 | 
 89 |   local results = model:forward(input)
 90 | 
 91 |   local r = {}
 92 |   for i = 1,results:size(1) do
 93 |     if count == 1 then
 94 |       r[i] = results[{i}]
 95 |     else
 96 |       r[i] = results[{i, 1}]
 97 |     end
 98 |   end
 99 | 
100 |   return unpack(r)
101 | end
102 | 
103 | function backward(count, data, outputGradients)
104 |   print('TORCH: Doing a backward pass for ' .. count .. ' images.')
105 | 
106 |   local input = torch.FloatTensor(data):reshape(count, 1, outputHeight, outputWidth);
107 |   input = input:cuda()
108 | 
109 |   local gradOutput = torch.FloatTensor(outputGradients):reshape(count, 1);
110 |   gradOutput = gradOutput:cuda()
111 | 
112 |   local gradInput = model:backward(input, gradOutput)
113 |   gradInput = gradInput:double()
114 |  
115 |   storeCounter = storeCounter + 1
116 | 
117 |   -- entropy control
118 |   local scores = model2:forward(input)
119 |   scores = scores:reshape(scores:size()[1])
120 | 
121 |   local eresult = emodel:forward(scores)
122 | 
123 |   local egt = eresult:clone()
124 |   egt[1] = etarget
125 | 
126 |   local eloss = ecriterion:forward(eresult, egt)
127 |   local ecritgrad = ecriterion:backward(eresult, egt) -- gradient of the loss
128 |   local emodelgrad = emodel:backward(scores, ecritgrad) -- gradient of the entropy
129 | 
130 |   emodelgrad = emodelgrad:reshape(1, scores:size()[1])
131 |   model2:backward(input, emodelgrad)
132 | 
133 |   -- insert optimizer here
134 |   local function feval(params2)
135 |     return 0, gradParams2
136 |   end
137 | 
138 |   gradParams2:mul(0.1)
139 | 
140 |   optim.adam(feval, params2, optimState)
141 | 
142 |   print('Current score scale:')
143 |   print(params)
144 | 
145 |   params:copy(params2)
146 | 
147 |   gradParams2:zero()
148 |   gradParams:zero()
149 | 
150 |   local gradInputR = {}
151 |   for c = 1,count do
152 |     for x = 1,outputWidth do
153 |       for y = 1,outputHeight do
154 |          local idx = (c-1) * outputHeight * outputWidth + (x-1) * outputHeight + y
155 |          gradInputR[idx] = gradInput[{c, 1, y, x}]
156 |       end
157 |     end
158 |   end
159 | 
160 |   return gradInputR
161 | end
162 | 


--------------------------------------------------------------------------------
/code/lua/train_obj.lua:
--------------------------------------------------------------------------------
  1 | require 'nn'
  2 | require 'cunn'
  3 | require 'optim'
  4 | require 'cudnn'
  5 | 
  6 | -- general parameters
  7 | storeCounter = 0 -- counts parameter updates
  8 | 
  9 | -- parameters of pretraining
 10 | storeInterval = 1000 		-- storing snapshot after x updates
 11 | lrInit = 0.0001 		-- initial learning rate
 12 | lrInterval = 50000 		-- cutting learning rate in half after x updates
 13 | lrIntervalInit = 100000 	-- number if initial iteration without learning rate cutting
 14 | gradClamp = 0.5 		-- maximum gradient magnitude (reprojection opt. only)
 15 | 
 16 | oFileInit = 'obj_model_fcn_init.net'
 17 | oFileRepro = 'obj_model_fcn_repro.net'
 18 | 
 19 | mean = {127, 127, 127} 
 20 | 
 21 | dofile('MyL1Criterion.lua')
 22 | 
 23 | function loadModel(f, inW, inH, outW, outH)
 24 | 
 25 |   inputWidth = inW
 26 |   inputHeight = inH
 27 |   outputWidth = outW
 28 |   outputHeight = outH
 29 | 
 30 |   print('TORCH: Loading network from file: ' .. f)
 31 | 
 32 |   model = torch.load(f)
 33 |   model = model:cuda()
 34 |   cudnn.convert(model, cudnn)
 35 | 
 36 |   model:evaluate()
 37 | 
 38 |   criterion = nn.MyL1Criterion()
 39 |   criterion = criterion:cuda()
 40 | 
 41 |   params, gradParams = model:getParameters()
 42 |   optimState = {learningRate = lrInit}
 43 | end
 44 | 
 45 | function constructModel(inW, inH, outW, outH)
 46 | 
 47 |   inputWidth = inW
 48 |   inputHeight = inH
 49 |   outputWidth = outW
 50 |   outputHeight = outH
 51 | 
 52 |   print('TORCH: Creating network.')
 53 | 
 54 |   -- 640 x 480
 55 |   model = nn.Sequential()
 56 |   model:add(nn.SpatialConvolution(3, 64, 3, 3, 1, 1, 1, 1)) -- 3
 57 |   model:add(nn.ReLU()) 
 58 |   model:add(nn.SpatialConvolution(64, 128, 3, 3, 2, 2, 1, 1))  -- 5 
 59 |   model:add(nn.ReLU()) 
 60 |   -- 320 x 240
 61 |   model:add(nn.SpatialConvolution(128, 128, 3, 3, 2, 2, 1, 1)) -- 9
 62 |   model:add(nn.ReLU()) 
 63 |   -- 160 x 120
 64 |   model:add(nn.SpatialConvolution(128, 256, 3, 3, 1, 1, 1, 1)) -- 17
 65 |   model:add(nn.ReLU()) 
 66 |   model:add(nn.SpatialConvolution(256, 256, 3, 3, 2, 2, 1, 1)) -- 19
 67 |   model:add(nn.ReLU()) 
 68 |   -- 80 x 60
 69 |   model:add(nn.SpatialConvolution(256, 512, 3, 3, 1, 1, 1, 1)) -- 37
 70 |   model:add(nn.ReLU()) 
 71 |   model:add(nn.SpatialConvolution(512, 512, 3, 3, 1, 1, 1, 1)) -- 39
 72 |   model:add(nn.ReLU()) 
 73 |   model:add(nn.SpatialConvolution(512, 512, 3, 3, 1, 1, 1, 1)) -- 41
 74 |   model:add(nn.ReLU()) 
 75 | 
 76 |   model:add(nn.SpatialConvolution(512, 4096, 1, 1, 1, 1, 0, 0))
 77 |   model:add(nn.ReLU()) 
 78 |   model:add(nn.SpatialConvolution(4096, 4096, 1, 1, 1, 1, 0, 0))
 79 |   model:add(nn.ReLU()) 
 80 |   model:add(nn.SpatialConvolution(4096, 3, 1, 1, 1, 1, 0, 0))
 81 | 
 82 |   criterion = nn.MyL1Criterion()
 83 | 
 84 |   model = model:cuda()
 85 |   cudnn.convert(model, cudnn)
 86 | 
 87 |   model:evaluate()
 88 | 
 89 |   criterion = criterion:cuda()
 90 | 
 91 |   params, gradParams = model:getParameters()
 92 |   optimState = {learningRate = lrInit}
 93 | end
 94 | 
 95 | function setEvaluate()
 96 |     model:evaluate()
 97 |     print('TORCH: Set model to evaluation mode.')
 98 | end
 99 | 
100 | function setTraining()
101 |     model:training()
102 |     print('TORCH: Set model to training mode.')
103 | end
104 | 
105 | function forward(count, data)
106 | 
107 |   local input = torch.FloatTensor(data):reshape(count, 3, inputHeight, inputWidth);
108 |   input = input:cuda()
109 | 
110 |   -- normalize data
111 |   for c=1,3 do
112 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
113 |   end
114 | 
115 |   print('TORCH: Doing a forward pass.')
116 | 
117 |   local results = model:forward(input)
118 |   results = results:reshape(3, outputHeight * outputWidth):transpose(1,2)
119 |   results = results:double()
120 | 
121 |   local resultsR = {}
122 |   for i = 1,results:size(1) do
123 |     for j = 1,3 do
124 |       local idx = (i-1) * 3 + j
125 |       resultsR[idx] = results[{i, j}]
126 |     end
127 |   end
128 | 
129 |   return resultsR
130 | end
131 | 
132 | 
133 | function backward(count, loss, data, gradients)
134 | 
135 |   print('TORCH: Doing a backward pass.')
136 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
137 |   local dloss_dpred = torch.FloatTensor(gradients):reshape(count, 3):transpose(1,2):reshape(1, 3, outputHeight, outputWidth)
138 | 
139 |   input = input:cuda()
140 |   dloss_dpred = dloss_dpred:cuda()
141 | 
142 |   dloss_dpred:clamp(-gradClamp,gradClamp)
143 | 
144 |   -- normalize data
145 |   for c=1,3 do
146 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
147 |   end
148 | 
149 |   gradParams:zero()
150 | 
151 |   local function feval(params)
152 |     model:backward(input, dloss_dpred)
153 |     return loss,gradParams
154 |   end
155 |   optim.adam(feval, params, optimState)
156 | 
157 |   storeCounter = storeCounter + 1
158 | 
159 |   if (storeCounter % storeInterval) == 0 then
160 |     print('TORCH: Storing a snapshot of the network.')
161 |     model:clearState()
162 |     torch.save(oFileRepro, model)
163 |   end
164 | 
165 |   if storeCounter > (lrIntervalInit - 1) and (storeCounter % lrInterval) == 0 then
166 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
167 |     optimState.learningRate = optimState.learningRate * 0.5
168 | 
169 |   end
170 | end
171 | 
172 | function train(data, labels)
173 |   print('TORCH: Doing a training pass.')
174 | 
175 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
176 |   local output = torch.FloatTensor(labels):reshape(3, outputHeight * outputWidth):transpose(1,2)
177 |   
178 |   input = input:cuda()
179 |   output = output:cuda()
180 | 
181 |   -- normalize data
182 |   for c=1,3 do
183 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
184 |   end
185 | 
186 |   local loss = 0
187 | 
188 |   local function feval(params)
189 |     gradParams:zero()
190 | 
191 |     local pred = model:forward(input)
192 |     pred = pred:reshape(3, outputHeight * outputWidth):transpose(1,2)
193 |     loss = criterion:forward(pred, output)
194 |     local dloss_dpred = criterion:backward(pred, output)
195 |     dloss_dpred = dloss_dpred:transpose(1,2):reshape(1, 3, outputWidth, outputHeight)
196 |     model:backward(input, dloss_dpred)
197 | 
198 |     return loss,gradParams
199 |   end
200 |   optim.adam(feval, params, optimState)
201 | 
202 |   storeCounter = storeCounter + 1
203 | 
204 |   if (storeCounter % storeInterval) == 0 then
205 |     print('TORCH: Storing a snapshot of the network.')
206 |     model:clearState()
207 |     torch.save(oFileInit, model)
208 |   end
209 | 
210 |   if storeCounter > (lrIntervalInit - 1) and (storeCounter % lrInterval) == 0 then
211 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
212 |     optimState.learningRate = optimState.learningRate * 0.5
213 |   end
214 | 
215 |   return loss
216 | end
217 | 


--------------------------------------------------------------------------------
/code/lua/train_obj_e2e.lua:
--------------------------------------------------------------------------------
  1 | require "nn"
  2 | require "cunn"
  3 | require 'optim'
  4 | require 'cudnn'
  5 | 
  6 | -- general parameters
  7 | storeCounter = 0 -- counts parameter updates
  8 | 
  9 | -- parameters of end to end training
 10 | storeInterval = 100 -- storing snapshot after x updates
 11 | lrInit = 0.000001 -- learning rate
 12 | lrInterval = 25000 --cutting learning rate in half after x updates
 13 | gradClamp = 0.001 -- maximum gradient magnitude
 14 | oFile = 'obj_model_fcn_e2e.net'
 15 | 
 16 | mean = {127, 127, 127} 
 17 | 
 18 | dofile('MyL1Criterion.lua')
 19 | 
 20 | function loadModel(f, inW, inH, outW, outH)
 21 | 
 22 |   inputWidth = inW
 23 |   inputHeight = inH
 24 |   outputWidth = outW
 25 |   outputHeight = outH
 26 | 
 27 |   print('TORCH: Loading network from file: ' .. f)
 28 | 
 29 |   model = torch.load(f)
 30 |   model = model:cuda()
 31 |   cudnn.convert(model, cudnn)
 32 | 
 33 |   model:evaluate()
 34 | 
 35 |   criterion = nn.MyL1Criterion()
 36 |   criterion = criterion:cuda()
 37 | 
 38 |   params, gradParams = model:getParameters()
 39 |   optimState = {learningRate = lrInit}
 40 | end
 41 | 
 42 | function setEvaluate()
 43 |     model:evaluate()
 44 |     print('TORCH: Set model to evaluation mode.')
 45 | end
 46 | 
 47 | function setTraining()
 48 |     model:training()
 49 |     print('TORCH: Set model to training mode.')
 50 | end
 51 | 
 52 | function forward(count, data)
 53 | 
 54 |   local input = torch.FloatTensor(data):reshape(count, 3, inputHeight, inputWidth);
 55 |   input = input:cuda()
 56 | 
 57 |   -- normalize data
 58 |   for c=1,3 do
 59 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
 60 |   end
 61 | 
 62 |   print('TORCH: Doing a forward pass.')
 63 | 
 64 |   local results = model:forward(input)
 65 |   results = results:reshape(3, outputHeight * outputWidth):transpose(1,2)
 66 |   results = results:double()
 67 | 
 68 |   local resultsR = {}
 69 |   for i = 1,results:size(1) do
 70 |     for j = 1,3 do
 71 |       local idx = (i-1) * 3 + j
 72 |       resultsR[idx] = results[{i, j}]
 73 |     end
 74 |   end
 75 | 
 76 |   return resultsR
 77 | end
 78 | 
 79 | 
 80 | function backward(count, loss, data, gradients)
 81 | 
 82 |   print('TORCH: Doing a backward pass.')
 83 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
 84 |   local dloss_dpred = torch.FloatTensor(gradients):reshape(count, 3):transpose(1,2):reshape(1, 3, outputHeight, outputWidth)
 85 | 
 86 |   input = input:cuda()
 87 |   dloss_dpred = dloss_dpred:cuda()
 88 | 
 89 |   dloss_dpred:clamp(-gradClamp,gradClamp)
 90 | 
 91 |   -- normalize data
 92 |   for c=1,3 do
 93 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
 94 |   end
 95 | 
 96 |   gradParams:zero()
 97 | 
 98 |   local function feval(params)
 99 |     model:backward(input, dloss_dpred)
100 |     return loss,gradParams
101 |   end
102 |   optim.adam(feval, params, optimState)
103 | 
104 |   storeCounter = storeCounter + 1
105 | 
106 |   if (storeCounter % storeInterval) == 0 then
107 |     print('TORCH: Storing a snapshot of the network.')
108 |     model:clearState()
109 |     torch.save(oFile, model)
110 |   end
111 |     
112 |   if (storeCounter % lrInterval) == 0 then
113 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
114 |     optimState.learningRate = optimState.learningRate * 0.5
115 |   end
116 | 
117 | end
118 | 


--------------------------------------------------------------------------------
/code/lua/train_obj_e2e_nomodel.lua:
--------------------------------------------------------------------------------
  1 | require "nn"
  2 | require "cunn"
  3 | require 'optim'
  4 | require 'cudnn'
  5 | 
  6 | -- general parameters
  7 | storeCounter = 0 -- counts parameter updates
  8 | 
  9 | -- parameters of end to end training
 10 | storeInterval = 100 -- storing snapshot after x updates
 11 | lrInit = 0.000001 -- learning rate
 12 | lrInterval = 25000 --cutting learning rate in half after x updates
 13 | gradClamp = 0.001 -- maximum gradient magnitude
 14 | oFile = 'obj_model_fcn_e2e_nomodel.net'
 15 | 
 16 | mean = {127, 127, 127} 
 17 | 
 18 | dofile('MyL1Criterion.lua')
 19 | 
 20 | function loadModel(f, inW, inH, outW, outH)
 21 | 
 22 |   inputWidth = inW
 23 |   inputHeight = inH
 24 |   outputWidth = outW
 25 |   outputHeight = outH
 26 | 
 27 |   print('TORCH: Loading network from file: ' .. f)
 28 | 
 29 |   model = torch.load(f)
 30 |   model = model:cuda()
 31 |   cudnn.convert(model, cudnn)
 32 | 
 33 |   model:evaluate()
 34 | 
 35 |   criterion = nn.MyL1Criterion()
 36 |   criterion = criterion:cuda()
 37 | 
 38 |   params, gradParams = model:getParameters()
 39 |   optimState = {learningRate = lrInit}
 40 | end
 41 | 
 42 | function setEvaluate()
 43 |     model:evaluate()
 44 |     print('TORCH: Set model to evaluation mode.')
 45 | end
 46 | 
 47 | function setTraining()
 48 |     model:training()
 49 |     print('TORCH: Set model to training mode.')
 50 | end
 51 | 
 52 | function forward(count, data)
 53 | 
 54 |   local input = torch.FloatTensor(data):reshape(count, 3, inputHeight, inputWidth);
 55 |   input = input:cuda()
 56 | 
 57 |   -- normalize data
 58 |   for c=1,3 do
 59 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
 60 |   end
 61 | 
 62 |   print('TORCH: Doing a forward pass.')
 63 | 
 64 |   local results = model:forward(input)
 65 |   results = results:reshape(3, outputHeight * outputWidth):transpose(1,2)
 66 |   results = results:double()
 67 | 
 68 |   local resultsR = {}
 69 |   for i = 1,results:size(1) do
 70 |     for j = 1,3 do
 71 |       local idx = (i-1) * 3 + j
 72 |       resultsR[idx] = results[{i, j}]
 73 |     end
 74 |   end
 75 | 
 76 |   return resultsR
 77 | end
 78 | 
 79 | 
 80 | function backward(count, loss, data, gradients)
 81 | 
 82 |   print('TORCH: Doing a backward pass.')
 83 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
 84 |   local dloss_dpred = torch.FloatTensor(gradients):reshape(count, 3):transpose(1,2):reshape(1, 3, outputHeight, outputWidth)
 85 | 
 86 |   input = input:cuda()
 87 |   dloss_dpred = dloss_dpred:cuda()
 88 | 
 89 |   dloss_dpred:clamp(-gradClamp,gradClamp)
 90 | 
 91 |   -- normalize data
 92 |   for c=1,3 do
 93 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
 94 |   end
 95 | 
 96 |   gradParams:zero()
 97 | 
 98 |   local function feval(params)
 99 |     model:backward(input, dloss_dpred)
100 |     return loss,gradParams
101 |   end
102 |   optim.adam(feval, params, optimState)
103 | 
104 |   storeCounter = storeCounter + 1
105 | 
106 |   if (storeCounter % storeInterval) == 0 then
107 |     print('TORCH: Storing a snapshot of the network.')
108 |     model:clearState()
109 |     torch.save(oFile, model)
110 |   end
111 |     
112 |   if (storeCounter % lrInterval) == 0 then
113 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
114 |     optimState.learningRate = optimState.learningRate * 0.5
115 |   end
116 | 
117 | end
118 | 


--------------------------------------------------------------------------------
/code/lua/train_obj_nomodel.lua:
--------------------------------------------------------------------------------
  1 | require 'nn'
  2 | require 'cunn'
  3 | require 'optim'
  4 | require 'cudnn'
  5 | 
  6 | -- general parameters
  7 | storeCounter = 0 -- counts parameter updates
  8 | 
  9 | -- parameters of pretraining
 10 | storeInterval = 1000 		-- storing snapshot after x updates
 11 | lrInit = 0.0001 		-- initial learning rate
 12 | lrInterval = 50000 		-- cutting learning rate in half after x updates
 13 | lrIntervalInit = 100000 	-- number if initial iteration without learning rate cutting
 14 | gradClamp = 0.5 		-- maximum gradient magnitude (reprojection opt. only)
 15 | 
 16 | oFileInit = 'obj_model_fcn_init_nomodel.net'
 17 | oFileRepro = 'obj_model_fcn_repro_nomodel.net'
 18 | 
 19 | mean = {127, 127, 127} 
 20 | 
 21 | dofile('MyL1Criterion.lua')
 22 | 
 23 | function loadModel(f, inW, inH, outW, outH)
 24 | 
 25 |   inputWidth = inW
 26 |   inputHeight = inH
 27 |   outputWidth = outW
 28 |   outputHeight = outH
 29 | 
 30 |   print('TORCH: Loading network from file: ' .. f)
 31 | 
 32 |   model = torch.load(f)
 33 |   model = model:cuda()
 34 |   cudnn.convert(model, cudnn)
 35 | 
 36 |   model:evaluate()
 37 | 
 38 |   criterion = nn.MyL1Criterion()
 39 |   criterion = criterion:cuda()
 40 | 
 41 |   params, gradParams = model:getParameters()
 42 |   optimState = {learningRate = lrInit}
 43 | end
 44 | 
 45 | function constructModel(inW, inH, outW, outH)
 46 | 
 47 |   inputWidth = inW
 48 |   inputHeight = inH
 49 |   outputWidth = outW
 50 |   outputHeight = outH
 51 | 
 52 |   print('TORCH: Creating network.')
 53 | 
 54 |   -- 640 x 480
 55 |   model = nn.Sequential()
 56 |   model:add(nn.SpatialConvolution(3, 64, 3, 3, 1, 1, 1, 1)) -- 3
 57 |   model:add(nn.ReLU()) 
 58 |   model:add(nn.SpatialConvolution(64, 128, 3, 3, 2, 2, 1, 1))  -- 5 
 59 |   model:add(nn.ReLU()) 
 60 |   -- 320 x 240
 61 |   model:add(nn.SpatialConvolution(128, 128, 3, 3, 2, 2, 1, 1)) -- 9
 62 |   model:add(nn.ReLU()) 
 63 |   -- 160 x 120
 64 |   model:add(nn.SpatialConvolution(128, 256, 3, 3, 1, 1, 1, 1)) -- 17
 65 |   model:add(nn.ReLU()) 
 66 |   model:add(nn.SpatialConvolution(256, 256, 3, 3, 2, 2, 1, 1)) -- 19
 67 |   model:add(nn.ReLU()) 
 68 |   -- 80 x 60
 69 |   model:add(nn.SpatialConvolution(256, 512, 3, 3, 1, 1, 1, 1)) -- 37
 70 |   model:add(nn.ReLU()) 
 71 |   model:add(nn.SpatialConvolution(512, 512, 3, 3, 1, 1, 1, 1)) -- 39
 72 |   model:add(nn.ReLU()) 
 73 |   model:add(nn.SpatialConvolution(512, 512, 3, 3, 1, 1, 1, 1)) -- 41
 74 |   model:add(nn.ReLU()) 
 75 | 
 76 |   model:add(nn.SpatialConvolution(512, 4096, 1, 1, 1, 1, 0, 0))
 77 |   model:add(nn.ReLU()) 
 78 |   model:add(nn.SpatialConvolution(4096, 4096, 1, 1, 1, 1, 0, 0))
 79 |   model:add(nn.ReLU()) 
 80 |   model:add(nn.SpatialConvolution(4096, 3, 1, 1, 1, 1, 0, 0))
 81 | 
 82 |   criterion = nn.MyL1Criterion()
 83 | 
 84 |   model = model:cuda()
 85 |   cudnn.convert(model, cudnn)
 86 | 
 87 |   model:evaluate()
 88 | 
 89 |   criterion = criterion:cuda()
 90 | 
 91 |   params, gradParams = model:getParameters()
 92 |   optimState = {learningRate = lrInit}
 93 | end
 94 | 
 95 | function setEvaluate()
 96 |     model:evaluate()
 97 |     print('TORCH: Set model to evaluation mode.')
 98 | end
 99 | 
100 | function setTraining()
101 |     model:training()
102 |     print('TORCH: Set model to training mode.')
103 | end
104 | 
105 | function forward(count, data)
106 | 
107 |   local input = torch.FloatTensor(data):reshape(count, 3, inputHeight, inputWidth);
108 |   input = input:cuda()
109 | 
110 |   -- normalize data
111 |   for c=1,3 do
112 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
113 |   end
114 | 
115 |   print('TORCH: Doing a forward pass.')
116 | 
117 |   local results = model:forward(input)
118 |   results = results:reshape(3, outputHeight * outputWidth):transpose(1,2)
119 |   results = results:double()
120 | 
121 |   local resultsR = {}
122 |   for i = 1,results:size(1) do
123 |     for j = 1,3 do
124 |       local idx = (i-1) * 3 + j
125 |       resultsR[idx] = results[{i, j}]
126 |     end
127 |   end
128 | 
129 |   return resultsR
130 | end
131 | 
132 | 
133 | function backward(count, loss, data, gradients)
134 | 
135 |   print('TORCH: Doing a backward pass.')
136 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
137 |   local dloss_dpred = torch.FloatTensor(gradients):reshape(count, 3):transpose(1,2):reshape(1, 3, outputHeight, outputWidth)
138 | 
139 |   input = input:cuda()
140 |   dloss_dpred = dloss_dpred:cuda()
141 | 
142 |   dloss_dpred:clamp(-gradClamp,gradClamp)
143 | 
144 |   -- normalize data
145 |   for c=1,3 do
146 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
147 |   end
148 | 
149 |   gradParams:zero()
150 | 
151 |   local function feval(params)
152 |     model:backward(input, dloss_dpred)
153 |     return loss,gradParams
154 |   end
155 |   optim.adam(feval, params, optimState)
156 | 
157 |   storeCounter = storeCounter + 1
158 | 
159 |   if (storeCounter % storeInterval) == 0 then
160 |     print('TORCH: Storing a snapshot of the network.')
161 |     model:clearState()
162 |     torch.save(oFileRepro, model)
163 |   end
164 | 
165 |   if storeCounter > (lrIntervalInit - 1) and (storeCounter % lrInterval) == 0 then
166 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
167 |     optimState.learningRate = optimState.learningRate * 0.5
168 | 
169 |   end
170 | end
171 | 
172 | function train(data, labels)
173 |   print('TORCH: Doing a training pass.')
174 | 
175 |   local input = torch.FloatTensor(data):reshape(1, 3, inputHeight, inputWidth)
176 |   local output = torch.FloatTensor(labels):reshape(3, outputHeight * outputWidth):transpose(1,2)
177 |   
178 |   input = input:cuda()
179 |   output = output:cuda()
180 | 
181 |   -- normalize data
182 |   for c=1,3 do
183 |     input[{ {}, {c}, {}, {}  }]:add(-mean[c]) 
184 |   end
185 | 
186 |   local loss = 0
187 | 
188 |   local function feval(params)
189 |     gradParams:zero()
190 | 
191 |     local pred = model:forward(input)
192 |     pred = pred:reshape(3, outputHeight * outputWidth):transpose(1,2)
193 |     loss = criterion:forward(pred, output)
194 |     local dloss_dpred = criterion:backward(pred, output)
195 |     dloss_dpred = dloss_dpred:transpose(1,2):reshape(1, 3, outputWidth, outputHeight)
196 |     model:backward(input, dloss_dpred)
197 | 
198 |     return loss,gradParams
199 |   end
200 |   optim.adam(feval, params, optimState)
201 | 
202 |   storeCounter = storeCounter + 1
203 | 
204 |   if (storeCounter % storeInterval) == 0 then
205 |     print('TORCH: Storing a snapshot of the network.')
206 |     model:clearState()
207 |     torch.save(oFileInit, model)
208 |   end
209 | 
210 |   if storeCounter > (lrIntervalInit - 1) and (storeCounter % lrInterval) == 0 then
211 |     print('TORCH: Cutting learningrate by half. Is now: ' .. optimState.learningRate)
212 |     optimState.learningRate = optimState.learningRate * 0.5
213 |   end
214 | 
215 |   return loss
216 | end
217 | 


--------------------------------------------------------------------------------
/code/lua_calls.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include <lua.hpp>
 33 | 
 34 | /**
 35 |  * @brief Transfer a matrix to lua.
 36 |  * @param mat Input matrix.
 37 |  * @param state Lua state.
 38 |  */
 39 | void pushMat(const cv::Mat_<double>& mat, lua_State* state)
 40 | {
 41 |     lua_createtable(state, mat.rows * mat.cols, 0);
 42 |     int newTable = lua_gettop(state);
 43 |   
 44 |     int i = 1;
 45 |     
 46 |     for(int y = 0; y < mat.rows; y++)
 47 |     for(int x = 0; x < mat.cols; x++)
 48 |     {
 49 | 	lua_pushnumber(state, mat(y, x));
 50 | 	lua_rawseti(state, newTable, i++);
 51 |     }
 52 | }
 53 | 
 54 | /**
 55 |  * @brief Transfer a list of 3-channel matrices to lua (e.g. for RGB data).
 56 |  * @param maps List of 3-channel matrices.
 57 |  * @param state Lua state.
 58 |  */
 59 | void pushMaps(const std::vector<cv::Mat_<cv::Vec3f>>& maps, lua_State* state)
 60 | {
 61 |     if(maps.empty()) return;
 62 |   
 63 |     lua_createtable(state, maps.size() * 3 * maps[0].rows * maps[0].cols, 0);
 64 |     int newTable = lua_gettop(state);
 65 |   
 66 |     int i = 1;
 67 |     
 68 |     for(int n = 0; n < maps.size(); n++)
 69 |     for(int c = 0; c < 3; c++)
 70 |     for(int y = 0; y < maps[n].rows; y++)
 71 |     for(int x = 0; x < maps[n].cols; x++)
 72 |     {
 73 | 	lua_pushnumber(state, maps[n](y, x)[c]);
 74 | 	lua_rawseti(state, newTable, i++);
 75 |     }
 76 | }
 77 | 
 78 | /**
 79 |  * @brief Transfer a list of 1-channel matrices to lua (e.g. for reprojection error images).
 80 |  * @param maps List of 3-channel matrices.
 81 |  * @param state Lua state.
 82 |  */
 83 | void pushMaps(const std::vector<cv::Mat_<float>>& maps, lua_State* state)
 84 | {
 85 |     if(maps.empty()) return;
 86 |   
 87 |     lua_createtable(state, maps.size() * maps[0].rows * maps[0].cols, 0);
 88 |     int newTable = lua_gettop(state);
 89 |   
 90 |     int i = 1;
 91 |     
 92 |     for(int n = 0; n < maps.size(); n++)
 93 |     for(int y = 0; y < maps[n].rows; y++)
 94 |     for(int x = 0; x < maps[n].cols; x++)
 95 |     {
 96 | 	lua_pushnumber(state, maps[n](y, x));
 97 | 	lua_rawseti(state, newTable, i++);
 98 |     }
 99 | }
100 | 
101 | /**
102 |  * @brief Transfer a list of 3D vectors to lua.
103 |  * @param vec List of 3D vectors.
104 |  * @param state Lua state.
105 |  */
106 | void pushVec(const std::vector<cv::Vec3f>& vec, lua_State* state)
107 | {
108 |     if(vec.empty()) return;
109 |   
110 |     lua_createtable(state, vec.size() * 3, 0);
111 |     int newTable = lua_gettop(state);
112 |   
113 |     int i = 1;
114 |     
115 |     for(int n = 0; n < vec.size(); n++)
116 |     {
117 | 	lua_pushnumber(state, vec[n][0]);
118 | 	lua_rawseti(state, newTable, i++);
119 | 	lua_pushnumber(state, vec[n][1]);
120 | 	lua_rawseti(state, newTable, i++);
121 | 	lua_pushnumber(state, vec[n][2]);
122 | 	lua_rawseti(state, newTable, i++);
123 |     }
124 | }
125 | 
126 | /**
127 |  * @brief Transfer a list of real numbers to lua.
128 |  * @param vec List of real numbers.
129 |  * @param state Lua state.
130 |  */
131 | void pushVec(const std::vector<double>& vec, lua_State* state)
132 | {
133 |     if(vec.empty()) return;
134 |   
135 |     lua_createtable(state, vec.size(), 0);
136 |     int newTable = lua_gettop(state);
137 |   
138 |     for(int n = 0; n < vec.size(); n++)
139 |     {
140 | 	lua_pushnumber(state, vec[n]);
141 | 	lua_rawseti(state, newTable, n+1);
142 |     }
143 | }
144 | 
145 | /**
146 |  * @brief Print lua error message.
147 |  * @param Lua state.
148 |  */
149 | void print_error(lua_State* state) 
150 | {
151 |     // The error message is on top of the stack.
152 |     // Fetch it, print it and then pop it off the stack.
153 |     const char* message = lua_tostring(state, -1);
154 |     puts(message);
155 |     lua_pop(state, 1);
156 | }
157 | 
158 | /**
159 |  * @brief Execute the given lua script.
160 |  * @param filename Lua script.
161 |  * @param state Lua state.
162 |  */
163 | void execute(const char* filename, lua_State* state)
164 | {
165 |     int result;
166 | 
167 |     result = luaL_loadfile(state, filename);
168 | 
169 |     if ( result != 0 ) 
170 |     {
171 | 	print_error(state);
172 | 	return;
173 |     }
174 | 
175 |     result = lua_pcall(state, 0, LUA_MULTRET, 0);
176 | 
177 |     if ( result != 0 ) 
178 |     {
179 | 	print_error(state);
180 | 	return;
181 |     }
182 | }
183 | 
184 | /**
185 |  * @brief Calls the constructModel function of the given lua state (to construct a CNN).
186 |  *
187 | 
188 |  * @param inW Width of CNN input.
189 |  * @param inH Height of CNN input.
190 |  * @param outW Width of CNN output.
191 |  * @param outH Height of CNN output.
192 |  * @param state Lua state.
193 |  */
194 | void constructModel(int inW, int inH, int outW, int outH, lua_State* state)
195 | {
196 |     lua_getglobal(state, "constructModel");
197 |     lua_pushnumber(state, inW);
198 |     lua_pushnumber(state, inH);
199 |     lua_pushnumber(state, outW);
200 |     lua_pushnumber(state, outH);
201 |     lua_pcall(state, 4, 0, 0);
202 | }
203 | 
204 | /**
205 |  * @brief Load a model (CNN) stored in the given file.
206 |  * @param modelFile File storing the model.
207 |  * @param inW Width of CNN input.
208 |  * @param inH Height of CNN input.
209 |  * @param outW Width of CNN output.
210 |  * @param outH Height of CNN output.
211 |  * @param state Lua state.
212 |  */
213 | void loadModel(std::string modelFile, int inW, int inH, int outW, int outH, lua_State* state)
214 | {
215 |     lua_getglobal(state, "loadModel"); 
216 |     lua_pushstring(state, modelFile.c_str());
217 |     lua_pushnumber(state, inW);
218 |     lua_pushnumber(state, inH);
219 |     lua_pushnumber(state, outW);
220 |     lua_pushnumber(state, outH);
221 |     lua_pcall(state, 5, 0, 0);
222 | }
223 | 
224 | /**
225 |  * @brief Initalize the scoring script.
226 |  * @param threshold Inlier threshold to use in the soft inlier counting.
227 |  * @param outW Width of CNN output.
228 |  * @param outH Height of CNN output.
229 |  * @param state Lua state.
230 |  */
231 | void loadScore(int threshold, int outW, int outH, lua_State* state)
232 | {
233 |     lua_getglobal(state, "loadModel");
234 |     lua_pushnumber(state, threshold);
235 |     lua_pushnumber(state, outW);
236 |     lua_pushnumber(state, outH);
237 |     lua_pcall(state, 3, 0, 0);
238 | }
239 | 
240 | /**
241 |  * @brief Call the backward function of the given lua state.
242 |  *
243 |  * This function concerns the object coordinate CNN.
244 |  *
245 |  * @param loss Loss measured during the forward pass.
246 |  * @param maps Input data of the forward pass.
247 |  * @param dLoss Gradients of the loss.
248 |  * @param state Lua state.
249 |  */
250 | void backward(
251 |     double loss,
252 |     const std::vector<cv::Mat_<cv::Vec3f>>& maps,
253 |     const cv::Mat_<double>& dLoss,
254 |     lua_State* state)
255 | {
256 |     lua_getglobal(state, "backward"); 
257 |     lua_pushinteger(state, dLoss.rows);
258 |     lua_pushnumber(state, loss);
259 |     pushMaps(maps, state);
260 | 
261 |     pushMat(dLoss, state);
262 |     lua_pcall(state, 4, 0, 0);
263 | }
264 | 
265 | /**
266 |  * @brief Call of the forward function of the given lua state.
267 |  *
268 |  * This function concerns the object coordinate CNN.
269 |  *
270 |  * @param maps Input data (eg. RGB images).
271 |  * @param sampling Subsampling map that defines the CNN output size.
272 |  * @return List of predictions.
273 |  */
274 | std::vector<cv::Vec3f> forward(
275 |     const std::vector<cv::Mat_<cv::Vec3f>>& maps,
276 |     const cv::Mat_<cv::Point2i>& sampling,
277 |     lua_State* state)
278 | {
279 |     lua_getglobal(state, "forward"); 
280 |     lua_pushinteger(state, maps.size());
281 |     pushMaps(maps, state);
282 |     lua_pcall(state, 2, 1, 0);
283 | 
284 |     std::vector<cv::Vec3f> results(sampling.cols * sampling.rows);
285 | 
286 |     for(unsigned i = 0; i < results.size(); i++)
287 |     for(unsigned j = 0; j < 3; j++)
288 |     {
289 |         int idx = i * 3 + j;
290 | 	lua_pushnumber(state, idx + 1);	    
291 | 	lua_gettable(state, -2);
292 | 	results[i][j] = lua_tonumber(state, -1);
293 | 	lua_pop(state, 1);
294 |     }
295 |     lua_pop(state, 1);      
296 | 
297 |     return results;
298 | }
299 | 
300 | /**
301 |  * @brief Call of the forward function of the given lua state.
302 |  *
303 |  * This function concerns the score CNN.
304 |  *
305 |  * @param maps Input data (eg. reprojection error images).
306 |  * @param state Lua state.
307 |  * @return List of predictions, one per input datum.
308 |  */
309 | std::vector<double> forward(const std::vector<cv::Mat_<float>>& maps, lua_State* state)
310 | {
311 |     lua_getglobal(state, "forward");
312 |     lua_pushinteger(state, maps.size());
313 |     pushMaps(maps, state);
314 |     lua_pcall(state, 2, maps.size(), 0);
315 | 
316 |     std::vector<double> results(maps.size());
317 | 
318 |     for(int i = 0; i < results.size(); i++)
319 |     {
320 |         results[results.size() - 1 - i] = lua_tonumber(state, -1);
321 |         lua_pop(state, 1);
322 |     }
323 | 
324 |     return results;
325 | }
326 | 
327 | /**
328 |  * @brief Call the backward function of the given lua state.
329 |  *
330 |  * This function concerns the score CNN.
331 |  *
332 |  * @param maps Input data of the forward pass (eg. reprojection error images).
333 |  * @param state Lua state.
334 |  * @param scoreOutputGradients Gradients at the output of the Score CNN.
335 |  * @param gradients Output parameter. Gradients at the input of the score CNN.
336 |  */
337 | void backward(
338 |     const std::vector<cv::Mat_<float>>& maps,
339 |     lua_State* state,
340 |     const std::vector<double>& scoreOutputGradients,
341 |     std::vector<cv::Mat_<double>>& gradients)
342 | {
343 |     lua_getglobal(state, "backward");
344 |     lua_pushinteger(state, maps.size());
345 |     pushMaps(maps, state);
346 |     pushVec(scoreOutputGradients, state);
347 |     lua_pcall(state, 3, 1, 0);
348 | 
349 |     gradients.resize(maps.size());
350 | 
351 |     for(unsigned c = 0; c < maps.size(); c++)
352 |     {
353 |         gradients[c] = cv::Mat_<double>(maps[c].size());
354 | 
355 |         for(unsigned x = 0; x < gradients[c].cols; x++)
356 |         for(unsigned y = 0; y < gradients[c].rows; y++)
357 |         {
358 |             int idx = c * gradients[c].cols * gradients[c].rows + x * gradients[c].rows + y;
359 |             lua_pushnumber(state, idx + 1);
360 |             lua_gettable(state, -2);
361 |             gradients[c](y, x) = lua_tonumber(state, -1);
362 |             lua_pop(state, 1);
363 |         }
364 |     }
365 |     lua_pop(state, 1);
366 | }
367 | 
368 | /**
369 |  * @brief Sets CNN model to evaluate mode.
370 |  * @param state LUA state for access to CNN model.
371 |  */
372 | void setEvaluate(lua_State* state)
373 | {
374 |     lua_getglobal(state, "setEvaluate");
375 |     lua_pcall(state, 0, 0, 0);
376 | }
377 | 
378 | /**
379 |  * @brief Sets CNN model to training mode.
380 |  * @param state LUA state for access to CNN model.
381 |  */
382 | void setTraining(lua_State* state)
383 | {
384 |     lua_getglobal(state, "setTraining");
385 |     lua_pcall(state, 0, 0, 0);
386 | }
387 | 


--------------------------------------------------------------------------------
/code/maxloss.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #define MAXLOSS 10000000.0
 33 | 
 34 | /**
 35 |  * @brief Inverts a given transformation.
 36 |  * @param hyp Input transformation.
 37 |  * @return Inverted transformation.
 38 |  */
 39 | jp::cv_trans_t getInvHyp(const jp::cv_trans_t& hyp)
 40 | {
 41 |     cv::Mat_<double> hypR, trans = cv::Mat_<float>::eye(4, 4);
 42 |     cv::Rodrigues(hyp.first, hypR);
 43 | 
 44 |     hypR.copyTo(trans.rowRange(0,3).colRange(0,3));
 45 |     trans(0, 3) = hyp.second.at<double>(0, 0);
 46 |     trans(1, 3) = hyp.second.at<double>(0, 1);
 47 |     trans(2, 3) = hyp.second.at<double>(0, 2);
 48 | 
 49 |     trans = trans.inv();
 50 | 
 51 |     jp::cv_trans_t invHyp;
 52 |     cv::Rodrigues(trans.rowRange(0,3).colRange(0,3), invHyp.first);
 53 |     invHyp.second = cv::Mat_<double>(1, 3);
 54 |     invHyp.second.at<double>(0, 0) = trans(0, 3);
 55 |     invHyp.second.at<double>(0, 1) = trans(1, 3);
 56 |     invHyp.second.at<double>(0, 2) = trans(2, 3);
 57 | 
 58 |     return invHyp;
 59 | }
 60 | 
 61 | /**
 62 |  * @brief Calculates the rotational distance in degree between two transformations.
 63 |  * Translation will be ignored.
 64 |  *
 65 |  * @param h1 Transformation 1.
 66 |  * @param h2 Transformation 2.
 67 |  * @return Angle in degree.
 68 |  */
 69 | double calcAngularDistance(const jp::cv_trans_t& h1, const jp::cv_trans_t& h2)
 70 | {
 71 |     cv::Mat r1, r2;
 72 |     cv::Rodrigues(h1.first, r1);
 73 |     cv::Rodrigues(h2.first, r2);
 74 | 
 75 |     cv::Mat rotDiff= r2 * r1.t();
 76 |     double trace = cv::trace(rotDiff)[0];
 77 | 
 78 |     trace = std::min(3.0, std::max(-1.0, trace));
 79 |     return 180*acos((trace-1.0)/2.0)/CV_PI;
 80 | }
 81 | 
 82 | /**
 83 |  * @brief Maximum of translational error (cm) and rotational error (deg) between two pose hypothesis.
 84 |  * @param h1 Pose 1.
 85 |  * @param h2 Pose 2.
 86 |  * @return Loss.
 87 |  */
 88 | double maxLoss(const jp::cv_trans_t& h1, const jp::cv_trans_t& h2)
 89 | {
 90 |     // measure loss of inverted poses (camera pose instead of scene pose)
 91 |     jp::cv_trans_t invH1 = getInvHyp(h1);
 92 |     jp::cv_trans_t invH2 = getInvHyp(h2);
 93 | 
 94 |     double rotErr = calcAngularDistance(invH1, invH2);
 95 |     double tErr = cv::norm(invH1.second - invH2.second);
 96 | 
 97 |     return std::min(std::max(rotErr, tErr * 100), MAXLOSS);
 98 | }
 99 | 
100 | /**
101 |  * @brief Calculate the derivative of the loss w.r.t. the estimated pose.
102 |  * @param est Estimated pose (6 DoF).
103 |  * @param gt Ground truth pose (6 DoF).
104 |  * @return 1x6 Jacobean.
105 |  */
106 | cv::Mat_<double> dLossMax(const jp::cv_trans_t& est, const jp::cv_trans_t& gt)
107 | {
108 |     cv::Mat rot1, rot2, dRod;
109 |     cv::Rodrigues(est.first, rot1, dRod);
110 |     cv::Rodrigues(gt.first, rot2);
111 | 
112 |     // measure loss of inverted poses (camera pose instead of scene pose)
113 |     cv::Mat_<double> invRot1 = rot1.t();
114 |     cv::Mat_<double> invRot2 = rot2.t();
115 | 
116 |     // get the difference rotation
117 |     cv::Mat diffRot = rot1 * invRot2;
118 | 
119 |     // calculate rotational and translational error
120 |     double trace = cv::trace(diffRot)[0];
121 |     trace = std::min(3.0, std::max(-1.0, trace));
122 |     double rotErr = 180*acos((trace-1.0)/2.0)/CV_PI;
123 | 
124 |     cv::Mat_<double> invT1 = est.second * 100;
125 |     invT1 = invRot1 * invT1;
126 | 
127 |     cv::Mat_<double> invT2 = gt.second * 100;
128 |     invT2 = invRot2 * invT2;
129 | 
130 |     // zero error, abort
131 |     double tErr = cv::norm(invT1 - invT2);
132 | 
133 |     cv::Mat_<double> jacobean = cv::Mat_<double>::zeros(1, 6);
134 | 
135 |     // clamped loss, return zero gradient if loss is bigger than threshold
136 |     if(std::max(rotErr, tErr) > MAXLOSS)
137 |         return jacobean;
138 | 
139 |     if((tErr + rotErr) < EPS)
140 |         return jacobean;
141 | 
142 |     if(tErr > rotErr)
143 |     {
144 |         // return gradient of translational error
145 |         cv::Mat_<double> dDist_dInvT1(1, 3);
146 |         for(unsigned i = 0; i < 3; i++)
147 |             dDist_dInvT1(0, i) = (invT1(i, 0) - invT2(i, 0)) / tErr;
148 | 
149 |         cv::Mat_<double> dInvT1_dEstT(3, 3);
150 |         dInvT1_dEstT = invRot1 * 100;
151 | 
152 |         cv::Mat_<double> dDist_dEstT = dDist_dInvT1 * dInvT1_dEstT;
153 |         dDist_dEstT.copyTo(jacobean.colRange(3, 6));
154 | 
155 |         cv::Mat_<double> dInvT1_dInvRot1 = cv::Mat_<double>::zeros(3, 9);
156 | 
157 |         dInvT1_dInvRot1(0, 0) = est.second.at<double>(0, 0) * 100;
158 |         dInvT1_dInvRot1(0, 3) = est.second.at<double>(1, 0) * 100;
159 |         dInvT1_dInvRot1(0, 6) = est.second.at<double>(2, 0) * 100;
160 | 
161 |         dInvT1_dInvRot1(1, 1) = est.second.at<double>(0, 0) * 100;
162 |         dInvT1_dInvRot1(1, 4) = est.second.at<double>(1, 0) * 100;
163 |         dInvT1_dInvRot1(1, 7) = est.second.at<double>(2, 0) * 100;
164 | 
165 |         dInvT1_dInvRot1(2, 2) = est.second.at<double>(0, 0) * 100;
166 |         dInvT1_dInvRot1(2, 5) = est.second.at<double>(1, 0) * 100;
167 |         dInvT1_dInvRot1(2, 8) = est.second.at<double>(2, 0) * 100;
168 | 
169 |         dRod = dRod.t();
170 | 
171 |         cv::Mat_<double> dDist_dRod = dDist_dInvT1 * dInvT1_dInvRot1 * dRod;
172 |         dDist_dRod.copyTo(jacobean.colRange(0, 3));
173 |     }
174 |     else
175 |     {
176 |         // return gradient of rotational error
177 |         dRod = dRod.t();
178 | 
179 |         cv::Mat_<double> dRotDiff = cv::Mat_<double>::zeros(9, 9);
180 |         invRot2.row(0).copyTo(dRotDiff.row(0).colRange(0, 3));
181 |         invRot2.row(1).copyTo(dRotDiff.row(1).colRange(0, 3));
182 |         invRot2.row(2).copyTo(dRotDiff.row(2).colRange(0, 3));
183 | 
184 |         invRot2.row(0).copyTo(dRotDiff.row(3).colRange(3, 6));
185 |         invRot2.row(1).copyTo(dRotDiff.row(4).colRange(3, 6));
186 |         invRot2.row(2).copyTo(dRotDiff.row(5).colRange(3, 6));
187 | 
188 |         invRot2.row(0).copyTo(dRotDiff.row(6).colRange(6, 9));
189 |         invRot2.row(1).copyTo(dRotDiff.row(7).colRange(6, 9));
190 |         invRot2.row(2).copyTo(dRotDiff.row(8).colRange(6, 9));
191 | 
192 |         dRotDiff = dRotDiff.t();
193 | 
194 |         cv::Mat_<double> dTrace = cv::Mat_<double>::zeros(1, 9);
195 |         dTrace(0, 0) = 1;
196 |         dTrace(0, 4) = 1;
197 |         dTrace(0, 8) = 1;
198 | 
199 |         cv::Mat_<double> dAngle = (180 / CV_PI * -1 / sqrt(3 - trace * trace + 2 * trace)) * dTrace * dRotDiff * dRod;
200 |         dAngle.copyTo(jacobean.colRange(0, 3));
201 |     }
202 | 
203 |     if(cv::sum(cv::Mat(jacobean != jacobean))[0] > 0) //check for NaNs
204 |         return cv::Mat_<double>::zeros(1, 6);
205 | 
206 |     return jacobean;
207 | }
208 | 
209 | 


--------------------------------------------------------------------------------
/code/properties.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include "properties.h"
 31 | #include "util.h"
 32 | #include "thread_rand.h"
 33 | 
 34 | #include <iostream>
 35 | #include <fstream>
 36 | #include <valarray>
 37 | #include "generic_io.h"
 38 | 
 39 | GlobalProperties* GlobalProperties::instance = NULL;
 40 | 
 41 | GlobalProperties::GlobalProperties()
 42 | {
 43 |     // testing parameters
 44 |     tP.sessionString = "";
 45 | 
 46 |     tP.objScript = "train_obj.lua";
 47 |     tP.scoreScript = "score_incount_ec6.lua";
 48 |     tP.objModel = "obj_model_fcn_init.net";
 49 | 
 50 |     tP.ransacIterations = 256;
 51 |     tP.ransacRefinementIterations = 100;
 52 |     tP.ransacInlierThreshold = 10;
 53 | 
 54 |     tP.randomDraw = true;
 55 | 
 56 |     //dataset parameters
 57 |     dP.config = "default";
 58 | 
 59 |     dP.focalLength = 585.0f;
 60 |     dP.xShift = 0.f;
 61 |     dP.yShift = 0.f;
 62 |     dP.imgPadding = 4;
 63 | 
 64 |     dP.imageWidth = 640;
 65 |     dP.imageHeight = 480;
 66 |     
 67 |     dP.constD = 0;
 68 | 
 69 |     dP.imageSubSample = 1;
 70 | 
 71 |     dP.cnnSubSample = 8;
 72 | }
 73 | 
 74 | GlobalProperties* GlobalProperties::getInstance()
 75 | {
 76 |     if(instance == NULL)
 77 |     instance = new GlobalProperties();
 78 |     return instance;
 79 | }
 80 | 
 81 | bool GlobalProperties::readArguments(std::vector<std::string> argv)
 82 | {
 83 |     int argc = argv.size();
 84 | 
 85 |     for(int i = 0; i < argc; i++)
 86 |     {
 87 |         std::string s = argv[i];
 88 | 
 89 |         if(s == "-iw")
 90 |         {
 91 |             i++;
 92 |             dP.imageWidth = std::atoi(argv[i].c_str());
 93 |             std::cout << "image width: " << dP.imageWidth << "\n";
 94 |             continue;
 95 |         }
 96 | 
 97 |         if(s == "-ih")
 98 |         {
 99 |             i++;
100 |             dP.imageHeight = std::atoi(argv[i].c_str());
101 |             std::cout << "image height: " << dP.imageHeight << "\n";
102 |             continue;
103 |         }
104 | 
105 |         if(s == "-fl")
106 |         {
107 |             i++;
108 |             dP.focalLength = (float)std::atof(argv[i].c_str());
109 |             std::cout << "focal length: " << dP.focalLength << "\n";
110 |             continue;
111 |         }
112 | 
113 |         if(s == "-xs")
114 |         {
115 |             i++;
116 |             dP.xShift = (float)std::atof(argv[i].c_str());
117 |             std::cout << "x shift: " << dP.xShift << "\n";
118 |             continue;
119 |         }
120 | 
121 |         if(s == "-ys")
122 |         {
123 |             i++;
124 |             dP.yShift = (float)std::atof(argv[i].c_str());
125 |             std::cout << "y shift: " << dP.yShift << "\n";
126 |             continue;
127 |         }
128 | 
129 |         if(s == "-cd")
130 |         {
131 |             i++;
132 |             dP.constD = std::atof(argv[i].c_str());
133 |             std::cout << "const depth: " << dP.constD << "\n";
134 |             continue;
135 |         }
136 | 
137 | 
138 |         if(s == "-rdraw")
139 |         {
140 |             i++;
141 |             tP.randomDraw = std::atoi(argv[i].c_str());
142 |             std::cout << "random draw: " << tP.randomDraw << "\n";
143 |             continue;
144 |         }
145 | 
146 |         if(s == "-sid")
147 |         {
148 |             i++;
149 |             tP.sessionString = argv[i];
150 |             std::cout << "session string: " << tP.sessionString << "\n";
151 |             dP.config = tP.sessionString;
152 |             parseConfig();
153 |             continue;
154 |         }
155 | 
156 |         if(s == "-oscript")
157 |         {
158 |             i++;
159 |             tP.objScript = argv[i];
160 |             std::cout << "object script: " << tP.objScript << "\n";
161 |             continue;
162 |         }
163 | 
164 |         if(s == "-sscript")
165 |         {
166 |             i++;
167 |             tP.scoreScript = argv[i];
168 |             std::cout << "score script: " << tP.scoreScript << "\n";
169 |             continue;
170 |         }
171 | 
172 |         if(s == "-omodel")
173 |         {
174 |             i++;
175 |             tP.objModel = argv[i];
176 |             std::cout << "object model: " << tP.objModel << "\n";
177 |             continue;
178 |         }
179 | 
180 | 
181 |         if(s == "-rI")
182 |         {
183 |             i++;
184 |             tP.ransacIterations = std::atoi(argv[i].c_str());
185 |             std::cout << "ransac iterations: " << tP.ransacIterations << "\n";
186 |             continue;
187 |         }
188 | 
189 |         if(s == "-rT")
190 |         {
191 |             i++;
192 |             tP.ransacInlierThreshold = (float)std::atof(argv[i].c_str());
193 |             std::cout << "ransac inlier threshold: " << tP.ransacInlierThreshold << "\n";
194 |             continue;
195 |         }
196 | 
197 |         if(s == "-rRI")
198 |         {
199 |             i++;
200 |             tP.ransacRefinementIterations = std::atoi(argv[i].c_str());
201 |             std::cout << "ransac iterations (refinement): " << tP.ransacRefinementIterations << "\n";
202 |             continue;
203 |         }
204 | 
205 |         if(s == "-iSS")
206 |         {
207 |             i++;
208 |             dP.imageSubSample = std::atoi(argv[i].c_str());
209 |             std::cout << "test image sub sampling: " << dP.imageSubSample << "\n";
210 |             continue;
211 |         }
212 | 
213 |         if(s == "-cSS")
214 |         {
215 |             i++;
216 |             dP.cnnSubSample = std::atoi(argv[i].c_str());
217 |             std::cout << "CNN sub sampling: " << dP.cnnSubSample << "\n";
218 |             continue;
219 |         }
220 | 
221 |         //nothing found
222 |         {
223 |             std::cout << "unkown argument: " << argv[i] << "\n";
224 |             continue;
225 |         }
226 |     }
227 | }
228 |   
229 | void GlobalProperties::parseCmdLine(int argc, const char* argv[])
230 | {
231 |     std::vector<std::string> argVec;
232 |     for(int i = 1; i < argc; i++) argVec.push_back(argv[i]);
233 |     readArguments(argVec);
234 | 
235 |     dP.imageWidth += 2 * dP.imgPadding;
236 |     dP.imageHeight += 2 * dP.imgPadding;
237 | }
238 | 
239 | void GlobalProperties::parseConfig()
240 | {
241 |     std::string configFile = dP.config + ".config";
242 |     std::cout << BLUETEXT("Parsing config file: ") << configFile << std::endl;
243 |     
244 |     std::ifstream file(configFile);
245 |     if(!file.is_open()) return;
246 | 
247 |     std::vector<std::string> argVec;
248 | 
249 |     std::string line;
250 |     std::vector<std::string> tokens;
251 | 	
252 |     while(true)
253 |     {
254 |         if(file.eof()) break;
255 | 
256 |         std::getline(file, line);
257 |         if(line.length() == 0) continue;
258 |         if(line.at(0) == '#') continue;
259 | 
260 |         tokens = split(line);
261 |         if(tokens.empty()) continue;
262 | 
263 |         argVec.push_back("-" + tokens[0]);
264 |         argVec.push_back(tokens[1]);
265 |     }
266 |     
267 |     readArguments(argVec);
268 | }
269 | 
270 | cv::Mat_<float> GlobalProperties::getCamMat()
271 | {
272 |     float centerX = dP.imageWidth / 2 + dP.xShift;
273 |     float centerY = dP.imageHeight / 2 + dP.yShift;
274 |     float f = dP.focalLength;
275 | 
276 |     cv::Mat_<float> camMat = cv::Mat_<float>::zeros(3, 3);
277 |     camMat(0, 0) = f;
278 |     camMat(1, 1) = f;
279 |     camMat(2, 2) = 1.f;
280 |     
281 |     camMat(0, 2) = centerX;
282 |     camMat(1, 2) = centerY;
283 |     
284 |     return camMat;
285 | }
286 | 
287 | static GlobalProperties* instance;
288 | 


--------------------------------------------------------------------------------
/code/properties.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include "types.h"
 33 | 
 34 | #include <string>
 35 | #include <vector>
 36 | 
 37 | /** Here global parameters are defined which are made available throughout the code. */
 38 | 
 39 | /**
 40 |  * @brief Parameters that affect the pose estimation.
 41 |  */
 42 | struct TestingParameters
 43 | {
 44 |     std::string sessionString; // arbitrary string to be appended to output files
 45 | 
 46 |     std::string objScript; // lua script for learning object coordinate regression
 47 |     std::string scoreScript; // lua script for hypothesis score regression
 48 |     std::string objModel; // file storing the object coordinate regression CNN
 49 | 
 50 |     int ransacIterations; // initial number of pose hypotheses drawn per frame
 51 |     int ransacRefinementIterations; // number of refinement iterations
 52 |     float ransacInlierThreshold; // reprojection error threshold (in px) for measuring inliers in the pose pipeline
 53 | 
 54 |     bool randomDraw; // draw a hypothesis randomly (true) or take the one with the largest score (false)
 55 | };
 56 | 
 57 | /**
 58 |  * @brief Parameters that affect the data.
 59 |  */
 60 | struct DatasetParameters
 61 | { 
 62 |     std::string config; // name of the config file to read (a file that lists parameter values)
 63 | 
 64 |     //dataset parameters
 65 |     float focalLength; // focal length of the RGB camera
 66 |     float xShift; // x position of the principal point of the RGB camera
 67 |     float yShift; // y position of the principal point of the RGB camera
 68 |     int imgPadding;
 69 |     
 70 |     int imageWidth; // width of the input images (px)
 71 |     int imageHeight; // height of the input images (px)
 72 |     
 73 |     float constD; // if positive value that a constant depth heursitic will be used when initializing scene coordinates
 74 |     int imageSubSample; // use only every n th dataset image
 75 | 
 76 |     int cnnSubSample; // sub sampling of the CNN output wrt the input
 77 | 
 78 | };
 79 | 
 80 | /**
 81 |  * @brief Singelton class for providing parameter setting globally throughout the code.
 82 |  */
 83 | class GlobalProperties
 84 | {
 85 | protected:
 86 |     /**
 87 |      * @brief Consgtructor. Sets default values for all parameters.
 88 |      */
 89 |     GlobalProperties();
 90 | public:
 91 |     // Dataset parameters
 92 |     DatasetParameters dP;
 93 |   
 94 |     // Testing parameters
 95 |     TestingParameters tP;
 96 | 
 97 |     /**
 98 |      * @brief Get a pointer to the singleton. It will create an instance if none exists yet.
 99 |      *
100 |      * @return GlobalProperties* Singleton pointer.
101 |      */
102 |     static GlobalProperties* getInstance();
103 | 
104 |     /**
105 |      * @brief Returns the 3x3 camera matrix consisting of the intrinsic camera parameters.
106 |      *
107 |      * @return cv::Mat_< float > Camera/calibration matrix.
108 |      */
109 |     cv::Mat_<float> getCamMat();
110 | 
111 |     /**
112 |      * @brief Parse the arguments given in the command line and set parameters accordingly
113 |      *
114 |      * @param argc Number of parameters.
115 |      * @param argv Array of parameters.
116 |      * @return void
117 |      */
118 |     void parseCmdLine(int argc, const char* argv[]);
119 | 
120 |     /**
121 |      * @brief Parse a config file (given by the global parameter "config") and set parameters accordingly
122 |      *
123 |      * @return void
124 |      */
125 |     void parseConfig();
126 |     
127 |     /**
128 |      * @brief Process a list of arguments and set parameters accordingly.
129 |      *
130 |      * Each parameter consists of a dash followed by a abbreviation of the parameter. In most cases the next entry in the list should be the value that the parameter should take.
131 |      *
132 |      * @param argv List of parameters and parameter values.
133 |      * @return bool
134 |      */
135 |     bool readArguments(std::vector<std::string> argv);
136 |     
137 |     /**
138 |      * @brief Returns the image X dimension as passed to the CNN (without padding).
139 |      * @return CNN input width.
140 |      */
141 |     int getCNNInputDimX(){return dP.imageWidth - 2 * dP.imgPadding;}
142 | 
143 |     /**
144 |      * @brief Returns the image Y dimension as passed to the CNN (without padding).
145 |      * @return CNN input height.
146 |      */
147 |     int getCNNInputDimY(){return dP.imageHeight - 2 * dP.imgPadding;}
148 | 
149 |     /**
150 |      * @brief Returns the X dimension of the CNN output.
151 |      * @return CNN output width.
152 |      */
153 |     int getCNNOutputDimX(){return ceil(getCNNInputDimX() / (float) dP.cnnSubSample);}
154 | 
155 |     /**
156 |      * @brief Returns the Y dimension of the CNN output.
157 |      * @return CNN output height.
158 |      */
159 |     int getCNNOutputDimY(){return ceil(getCNNInputDimY() / (float) dP.cnnSubSample);}
160 | 
161 | private:
162 |     static GlobalProperties* instance; // singelton instance
163 | };
164 | 


--------------------------------------------------------------------------------
/code/read_data.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include "read_data.h"
 31 | #include "util.h"
 32 | 
 33 | #include <fstream>
 34 | #include "png++/png.hpp"
 35 | 
 36 | namespace jp
 37 | {
 38 |     void readData(const std::string dFile, jp::img_depth_t& image)
 39 |     {
 40 |         if(endsWith(dFile, "png"))
 41 |         {
 42 |             png::image<unsigned short> imgPng(dFile);
 43 |             image = jp::img_depth_t(imgPng.get_height(), imgPng.get_width());
 44 | 
 45 |             for(int x = 0; x < imgPng.get_width(); x++)
 46 |             for(int y = 0; y < imgPng.get_height(); y++)
 47 |             {
 48 |                 image(y, x) = (jp::depth_t) imgPng.get_pixel(x, y);
 49 |             }
 50 |         }
 51 |         else if(endsWith(dFile, "tiff"))
 52 |         {
 53 |             image = cv::imread(dFile, CV_LOAD_IMAGE_UNCHANGED);
 54 |         }
 55 |         else
 56 |         {
 57 |             std::cout << REDTEXT("ERROR: Unknown file format while reading depth files!") << std::endl;
 58 |         }
 59 |     }
 60 | 
 61 |     void readData(const std::string bgrFile, jp::img_bgr_t& image)
 62 |     {
 63 |         image = cv::imread(bgrFile);
 64 |     }
 65 |   
 66 |     void readData(const std::string bgrFile, const std::string dFile, jp::img_bgrd_t& image)
 67 |     {
 68 |         readData(bgrFile, image.bgr);
 69 |         readData(dFile, image.depth);
 70 |     }
 71 | 
 72 |     void readData(const std::string coordFile, jp::img_coord_t& image)
 73 |     {
 74 |         png::image<png::basic_rgb_pixel<unsigned short>> imgPng(coordFile);
 75 |         image = jp::img_coord_t(imgPng.get_height(), imgPng.get_width());
 76 | 
 77 |         for(int x = 0; x < imgPng.get_width(); x++)
 78 |         for(int y = 0; y < imgPng.get_height(); y++)
 79 |         {
 80 |             image(y, x)(0) = (jp::coord1_t) imgPng.get_pixel(x, y).red;
 81 |             image(y, x)(1) = (jp::coord1_t) imgPng.get_pixel(x, y).green;
 82 |             image(y, x)(2) = (jp::coord1_t) imgPng.get_pixel(x, y).blue;
 83 |         }
 84 |     }
 85 | 
 86 | 
 87 |     bool readData(const std::string infoFile, jp::cv_trans_t& pose)
 88 |     {
 89 |         std::ifstream file(infoFile);
 90 |         if(!file.is_open())
 91 |         {
 92 |             return false;
 93 |         }
 94 | 
 95 |         std::string line;
 96 |         std::vector<std::string> tokens;
 97 | 
 98 |         cv::Mat_<double> trans = cv::Mat_<double>::eye(4, 4);
 99 | 
100 |         for(unsigned i = 0; i < 3; i++)
101 |         {
102 |             std::getline(file, line);
103 |             tokens = split(line);
104 | 
105 |             trans(i, 0) = std::atof(tokens[0].c_str());
106 |             trans(i, 1) = std::atof(tokens[1].c_str());
107 |             trans(i, 2) = std::atof(tokens[2].c_str());
108 |             trans(i, 3) = std::atof(tokens[3].c_str());
109 |         }
110 | 
111 |         // our code estimates the inverted pose (i.e. the scene pose instead of the camera pose)
112 |         trans = trans.inv();
113 | 
114 |         // copy rotation
115 |         cv::Rodrigues(trans.colRange(0, 3).rowRange(0, 3), pose.first);
116 | 
117 |         // copy translation
118 |         pose.second = cv::Mat_<double>(3, 1);
119 |         trans.col(3).rowRange(0, 3).copyTo(pose.second);
120 | 
121 |         file.close();
122 |         return true;
123 |     }
124 | }
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/code/read_data.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016, TU Dresden
 3 | Copyright (c) 2017, Heidelberg University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | 
30 | #pragma once
31 | 
32 | #include "types.h"
33 | 
34 | namespace jp
35 | {
36 |     /**
37 |     * @brief Read a depth image.
38 |     *
39 |     * Depth images are stored as 1 channel, 16 bit, unsigned short PNGs.
40 |     * TIFF images are also supported.
41 |     *
42 |     * @param dFile Name of the file to read including the path.
43 |     * @param image Output parameter. Depth image to read.
44 |     * @return void
45 |     */
46 |     void readData(const std::string dFile, jp::img_depth_t& image);
47 | 
48 |     /**
49 |     * @brief Read a bgr image.
50 |     *
51 |     * BGR images are stored as 3 channel, 8 bit, unsigned char PNGs or JPGs. Channels are swapped from RGB.
52 |     *
53 |     * @param bgrFile Name of the file to read including the path.
54 |     * @param image Output parameter. BGR image to read.
55 |     * @return void
56 |     */
57 |     void readData(const std::string bgrFile, jp::img_bgr_t& image);
58 | 
59 |     /**
60 |     * @brief Reads an image with BGR channels and a depth channel.
61 |     *
62 |     * BGR and depth are read from separate files. See documentation of the respective readData methods.
63 |     *
64 |     * @param bgrFile Name of the file to read for the BGR image including the path.
65 |     * @param dFile Name of the file to read for the depth image including the path.
66 |     * @param image Output parameter. RGBD image to read.
67 |     * @return void
68 |     */
69 |     void readData(const std::string bgrFile, const std::string dFile, jp::img_bgrd_t& image);
70 | 
71 |     /**
72 |     * @brief Read an object coordinate image.
73 |     *
74 |     * Coordinate images are stored as 3 channel, 16 bit, unsigned short PNGs.
75 |     *
76 |     * @param coordFile Name of the file to read including the path.
77 |     * @param image Output parameter. Coordinate image to read.
78 |     * @return void
79 |     */
80 |     void readData(const std::string coordFile, jp::img_coord_t& image);
81 |     
82 |     /**
83 |      * @brief Read a ground truth pose file.
84 |      *
85 |      * @param infoFile Name of the file to read including the path.
86 |      * @param info Output parameter. Pose to read.
87 |      * @return void
88 |      */
89 |     bool readData(const std::string infoFile, cv_trans_t &pose);
90 | }
91 | 


--------------------------------------------------------------------------------
/code/stop_watch.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016, TU Dresden
 3 | Copyright (c) 2017, Heidelberg University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | 
30 | #pragma once
31 | 
32 | #include <chrono>
33 | 
34 | /**
35 |  * @brief Class for time measurements.
36 |  */
37 | class StopWatch
38 | {
39 | public:
40 |     /**
41 |      * @brief Construction. Initializes the stop watch.
42 |      */
43 |     StopWatch(){ init(); }
44 |   
45 |     /**
46 |      * @brief Initialization. Starts the time measurement.
47 |      * 
48 |      * @return void
49 |      */
50 |     void init()
51 |     {
52 | 	start = std::chrono::high_resolution_clock::now();
53 |     }
54 |     
55 |     /**
56 |      * @brief Stops and restarts the time measurement.
57 |      * 
58 |      * @return float The time in ms since the last init or stop call.
59 |      */
60 |     float stop()
61 |     {
62 | 	std::chrono::high_resolution_clock::time_point now;
63 | 	now = std::chrono::high_resolution_clock::now();
64 | 	
65 | 	std::chrono::high_resolution_clock::duration duration = now - start;
66 | 	
67 | 	start = now;
68 | 	
69 | 	return static_cast<float>(
70 | 	    1000.0 * std::chrono::duration_cast<std::chrono::duration<double>>(
71 | 	    duration).count());
72 |     }
73 |     
74 | private:
75 |     std::chrono::high_resolution_clock::time_point start; // start time of the current measurement.
76 | };
77 | 


--------------------------------------------------------------------------------
/code/test_ransac.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include <iostream>
 31 | #include <fstream>
 32 | 
 33 | #include "properties.h"
 34 | #include "thread_rand.h"
 35 | #include "util.h"
 36 | #include "stop_watch.h"
 37 | #include "dataset.h"
 38 | 
 39 | #include "lua_calls.h"
 40 | #include "cnn.h"
 41 | 
 42 | int main(int argc, const char* argv[])
 43 | {
 44 |     // read parameters
 45 |     GlobalProperties* gp = GlobalProperties::getInstance();
 46 |     gp->parseConfig();
 47 |     gp->parseCmdLine(argc, argv);
 48 | 
 49 |     int objHyps = gp->tP.ransacIterations;
 50 |     int inlierThreshold2D = gp->tP.ransacInlierThreshold;
 51 |     int refSteps = gp->tP.ransacRefinementIterations;  
 52 |     
 53 |     std::string baseScriptRGB = gp->tP.objScript;
 54 |     std::string baseScriptObj = gp->tP.scoreScript;
 55 |     std::string modelFileRGB = gp->tP.objModel;
 56 | 
 57 |     // setup data and torch
 58 |     std::cout << std::endl << BLUETEXT("Loading test set ...") << std::endl;
 59 |     jp::Dataset testDataset = jp::Dataset("./test/");
 60 | 
 61 |     // lua and models
 62 |     std::cout << "Loading script: " << baseScriptObj << std::endl;
 63 |     lua_State* stateObj = luaL_newstate();
 64 |     luaL_openlibs(stateObj);
 65 |     execute(baseScriptObj.c_str(), stateObj);
 66 |     loadScore(inlierThreshold2D, gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), stateObj);
 67 |     
 68 |     std::cout << "Loading script: " << baseScriptRGB << std::endl;
 69 |     lua_State* stateRGB = luaL_newstate();
 70 |     luaL_openlibs(stateRGB);
 71 |     execute(baseScriptRGB.c_str(), stateRGB);
 72 |     loadModel(modelFileRGB, gp->getCNNInputDimX(), gp->getCNNInputDimY(), gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), stateRGB);
 73 |        
 74 |     cv::Mat camMat = gp->getCamMat();
 75 |     
 76 |     std::ofstream testFile;
 77 |     testFile.open("ransac_test_loss_"+baseScriptRGB+"_rdraw"+intToString(gp->tP.randomDraw)+"_"+gp->tP.sessionString+".txt"); // contains evaluation information for the whole test sequence
 78 |     
 79 |     setEvaluate(stateRGB);
 80 |     setEvaluate(stateObj);
 81 |     
 82 |     double avgCorrect = 0;
 83 |     
 84 |     std::vector<double> expLosses;
 85 |     std::vector<double> sfEntropies;
 86 |     std::vector<double> rotErrs;
 87 |     std::vector<double> tErrs;
 88 |     
 89 |     std::ofstream testErrFile;
 90 |     testErrFile.open("ransac_test_errors_"+baseScriptRGB+"_rdraw"+intToString(gp->tP.randomDraw)+"_"+gp->tP.sessionString+".txt"); // contains evaluation information for each test image
 91 | 
 92 |     for(unsigned i = 0; i < testDataset.size(); i+= gp->dP.imageSubSample)
 93 |     {
 94 |         std::cout << YELLOWTEXT("Processing test image " << i << " of " << testDataset.size()) << "." << std::endl;
 95 | 
 96 |         // load test image
 97 |         jp::img_bgr_t testRGB;
 98 |         testDataset.getBGR(i, testRGB);
 99 | 
100 |         jp::cv_trans_t hypGT;
101 |         testDataset.getPose(i, hypGT);
102 | 
103 |         std::cout << BLUETEXT("Predicting object coordinates.") << std::endl;
104 | 
105 |         cv::Mat_<cv::Point2i> sampling;
106 |         std::vector<cv::Mat_<cv::Vec3f>> imgMaps;
107 |         jp::img_coord_t estObj = getCoordImg(testRGB, sampling, imgMaps, false, stateRGB);
108 | 
109 |         // process frame (same function used in training, hence most of the variables below are not used here), see method documentation for parameter explanation
110 |         std::vector<jp::cv_trans_t> refHyps;
111 |         std::vector<double> sfScores;
112 |         std::vector<std::vector<cv::Point2i>> sampledPoints;
113 |         std::vector<double> losses;
114 |         std::vector<cv::Mat_<int>> inlierMaps;
115 |         double tErr;
116 |         double rotErr;
117 |         int hypIdx;
118 | 
119 |         double expectedLoss;
120 |         double sfEntropy;
121 |         bool correct;
122 | 
123 |         processImage(
124 |             hypGT,
125 |             stateObj,
126 |             objHyps,
127 |             camMat,
128 |             inlierThreshold2D,
129 |             refSteps,
130 |             expectedLoss,
131 |             sfEntropy,
132 |             correct,
133 |             refHyps,
134 |             sfScores,
135 |             estObj,
136 |             sampling,
137 |             sampledPoints,
138 |             losses,
139 |             inlierMaps,
140 |             tErr,
141 |             rotErr,
142 |             hypIdx,
143 |             false);
144 | 
145 |         avgCorrect += correct;
146 | 
147 |         // invert pose to get camera pose (we estimated the scene pose)
148 |         jp::cv_trans_t invHyp = getInvHyp(refHyps[hypIdx]);
149 | 
150 |         testErrFile
151 |             << expectedLoss << " "      // 0  - expected loss over the hypothesis pool
152 |             << sfEntropy << " "         // 1  - entropy of the hypothesis score distribution
153 |             << losses[hypIdx] << " "    // 2  - loss of the selected hypothesis
154 |             << tErr << " "              // 3  - translational error in m
155 |             << rotErr << " "            // 4  - rotational error in deg
156 |             << invHyp.first.at<double>(0, 0) << " "     // 5  - selected pose, rotation (1st component of Rodriguez vector)
157 |             << invHyp.first.at<double>(1, 0) << " "     // 6  - selected pose, rotation (2nd component of Rodriguez vector)
158 |             << invHyp.first.at<double>(2, 0) << " "     // 7  - selected pose, rotation (3th component of Rodriguez vector)
159 |             << invHyp.second.at<double>(0, 0) << " "    // 8  - selected pose, translation in m (x)
160 |             << invHyp.second.at<double>(0, 1) << " "    // 9  - selected pose, translation in m (y)
161 |             << invHyp.second.at<double>(0, 2) << " "    // 10 - selected pose, translation in m (z)
162 |             << std::endl;
163 | 
164 |         expLosses.push_back(expectedLoss);
165 |         sfEntropies.push_back(sfEntropy);
166 |         tErrs.push_back(tErr);
167 |         rotErrs.push_back(rotErr);
168 |     }
169 |     // mean and stddev of loss
170 |     std::vector<double> lossMean;
171 |     std::vector<double> lossStdDev;
172 |     cv::meanStdDev(expLosses, lossMean, lossStdDev);
173 |     
174 |     // mean and stddev of score entropy
175 |     std::vector<double> entropyMean;
176 |     std::vector<double> entropyStdDev;
177 |     cv::meanStdDev(sfEntropies, entropyMean, entropyStdDev);
178 | 	
179 |     avgCorrect /= testDataset.size() / gp->dP.imageSubSample;
180 |     
181 |     // median of rotational and translational errors
182 |     std::sort(rotErrs.begin(), rotErrs.end());
183 |     std::sort(tErrs.begin(), tErrs.end());
184 |     
185 |     double medianRotErr = rotErrs[rotErrs.size() / 2];
186 |     double medianTErr = tErrs[tErrs.size() / 2];
187 |     
188 |     std::cout << "-----------------------------------------------------------" << std::endl;
189 |     std::cout << BLUETEXT("Avg. test loss: " << lossMean[0] << ", accuracy: " << avgCorrect * 100 << "%") << std::endl;
190 |     std::cout << "Median Rot. Error: " << medianRotErr << "deg, Median T. Error: " << medianTErr * 100 << "cm." << std::endl;
191 | 
192 |     testFile
193 |             << avgCorrect << " "            // 0 - percentage of correct poses
194 |             << lossMean[0] << " "           // 1 - mean loss of selected hypotheses
195 |             << lossStdDev[0] << " "         // 2 - standard deviation of losses of selected hypotheses
196 |             << entropyMean[0] << " "        // 3 - mean of the score distribution entropy
197 |             << entropyStdDev[0] << " "      // 4 - standard deviation of the score distribution entropy
198 |             << medianRotErr << " "          // 5 - median rotational error of selected hypotheses
199 |             << medianTErr                   // 6 - median translational error (in m) of selected hypotheses
200 |             << std::endl;
201 | 
202 |     testFile.close();
203 |     testErrFile.close();
204 |     
205 |     lua_close(stateRGB);
206 |     lua_close(stateObj);
207 |     
208 |     return 0;    
209 | }
210 | 


--------------------------------------------------------------------------------
/code/thread_rand.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include "thread_rand.h"
 31 | #include <omp.h>
 32 | 
 33 | std::vector<std::mt19937> ThreadRand::generators;
 34 | bool ThreadRand::initialised = false;
 35 | 
 36 | void ThreadRand::forceInit(unsigned seed)
 37 | {
 38 |     initialised = false;
 39 |     init(seed);
 40 | }
 41 | 
 42 | void ThreadRand::init(unsigned seed)
 43 | {
 44 |     #pragma omp critical
 45 |     {
 46 | 	if(!initialised)
 47 | 	{
 48 | 	    unsigned nThreads = omp_get_max_threads();
 49 | 	    
 50 | 	    for(unsigned i = 0; i < nThreads; i++)
 51 | 	    {    
 52 | 		generators.push_back(std::mt19937());
 53 | 		generators[i].seed(i+seed);
 54 | 	    }
 55 | 
 56 | 	    initialised = true;
 57 | 	}    
 58 |     }
 59 | }
 60 | 
 61 | int ThreadRand::irand(int min, int max, int tid)
 62 | {
 63 |     std::uniform_int_distribution<int> dist(min, max);
 64 | 
 65 |     unsigned threadID = omp_get_thread_num();
 66 |     if(tid >= 0) threadID = tid;
 67 |     
 68 |     if(!initialised) init();
 69 |   
 70 |     return dist(ThreadRand::generators[threadID]);
 71 | }
 72 | 
 73 | double ThreadRand::drand(double min, double max, int tid)
 74 | {
 75 |     std::uniform_real_distribution<double> dist(min, max);
 76 |     
 77 |     unsigned threadID = omp_get_thread_num();
 78 |     if(tid >= 0) threadID = tid;
 79 | 
 80 |     if(!initialised) init();
 81 | 
 82 |     return dist(ThreadRand::generators[threadID]);
 83 | }
 84 | 
 85 | double ThreadRand::dgauss(double mean, double stdDev, int tid)
 86 | {
 87 |     std::normal_distribution<double> dist(mean, stdDev);
 88 |     
 89 |     unsigned threadID = omp_get_thread_num();
 90 |     if(tid >= 0) threadID = tid;
 91 | 
 92 |     if(!initialised) init();
 93 | 
 94 |     return dist(ThreadRand::generators[threadID]);
 95 | }
 96 | 
 97 | int irand(int incMin, int excMax, int tid)
 98 | {
 99 |     return ThreadRand::irand(incMin, excMax - 1, tid);
100 | }
101 | 
102 | double drand(double incMin, double incMax,int tid)
103 | {
104 |     return ThreadRand::drand(incMin, incMax, tid);
105 | }
106 | 
107 | int igauss(int mean, int stdDev, int tid)
108 | {
109 |     return (int) ThreadRand::dgauss(mean, stdDev, tid);
110 | }
111 | 
112 | double dgauss(double mean, double stdDev, int tid)
113 | {
114 |     return ThreadRand::dgauss(mean, stdDev, tid);
115 | }
116 | 


--------------------------------------------------------------------------------
/code/thread_rand.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include <random>
 33 | 
 34 | /** Classes and methods for generating random numbers in multi-threaded programs. */
 35 | 
 36 | /**
 37 |  * @brief Provides random numbers for multiple threads.
 38 |  * 
 39 |  * Singelton class. Holds a random number generator for each thread and gives random numbers for the current thread.
 40 |  */
 41 | class ThreadRand
 42 | {
 43 | public:
 44 |   /**
 45 |    * @brief Returns a random integer (uniform distribution).
 46 |    * 
 47 |    * @param min Minimum value of the random integer (inclusive).
 48 |    * @param max Maximum value of the random integer (exclusive).
 49 |    * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
 50 |    * @return int Random integer value.
 51 |    */
 52 |   static int irand(int min, int max, int tid = -1);
 53 |   
 54 |   /**
 55 |    * @brief Returns a random double value (uniform distribution).
 56 |    * 
 57 |    * @param min Minimum value of the random double (inclusive).
 58 |    * @param max Maximum value of the random double (inclusive).
 59 |    * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
 60 |    * @return double Random double value.
 61 |    */
 62 |   static double drand(double min, double max, int tid = -1);
 63 |   
 64 |   /**
 65 |    * @brief Returns a random double value (Gauss distribution).
 66 |    * 
 67 |    * @param mean Mean of the Gauss distribution to sample from.
 68 |    * @param stdDev Standard deviation of the Gauss distribution to sample from.
 69 |    * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
 70 |    * @return double Random double value.
 71 |    */
 72 |   static double dgauss(double mean, double stdDev, int tid = -1);
 73 |     
 74 |   /**
 75 |    * @brief Re-Initialize the object with the given seed.
 76 |    * 
 77 |    * @param seed Seed to initialize the random number generators (seed is incremented by one for each generator).
 78 |    * @return void
 79 |    */
 80 |   static void forceInit(unsigned seed);
 81 |   
 82 | private:  
 83 |   /**
 84 |    * @brief List of random number generators. One for each thread.
 85 |    * 
 86 |    */
 87 |   static std::vector<std::mt19937> generators;
 88 |   /**
 89 |    * @brief True if the class has been initialized already
 90 |    */
 91 |   static bool initialised;
 92 |   /**
 93 |    * @brief Initialize class with the given seed.
 94 |    * 
 95 |    * Method will create a random number generator for each thread. The given seed 
 96 |    * will be incremented by one for each generator. This methods is automatically 
 97 |    * called when this calss is used the first time.
 98 |    * 
 99 |    * @param seed Optional parameter. Seed to be used when initializing the generators. Will be incremented by one for each generator.
100 |    * @return void
101 |    */
102 |   static void init(unsigned seed = 1305);
103 | };
104 | 
105 | /**
106 |   * @brief Returns a random integer (uniform distribution).
107 |   * 
108 |   * This method used the ThreadRand class.
109 |   * 
110 |   * @param min Minimum value of the random integer (inclusive).
111 |   * @param max Maximum value of the random integer (exclusive).
112 |   * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
113 |   * @return int Random integer value.
114 |   */
115 | int irand(int incMin, int excMax, int tid = -1);
116 | /**
117 |   * @brief Returns a random double value (uniform distribution).
118 |   * 
119 |   * This method used the ThreadRand class.
120 |   * 
121 |   * @param min Minimum value of the random double (inclusive).
122 |   * @param max Maximum value of the random double (inclusive).
123 |   * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
124 |   * @return double Random double value.
125 |   */
126 | double drand(double incMin, double incMax, int tid = -1);
127 | 
128 |   /**
129 |    * @brief Returns a random integer value (Gauss distribution).
130 |    * 
131 |    * This method used the ThreadRand class.
132 |    * 
133 |    * @param mean Mean of the Gauss distribution to sample from.
134 |    * @param stdDev Standard deviation of the Gauss distribution to sample from.
135 |    * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
136 |    * @return double Random integer value.
137 |    */
138 | int igauss(int mean, int stdDev, int tid = -1);
139 | 
140 |   /**
141 |    * @brief Returns a random double value (Gauss distribution).
142 |    * 
143 |    * This method used the ThreadRand class.
144 |    * 
145 |    * @param mean Mean of the Gauss distribution to sample from.
146 |    * @param stdDev Standard deviation of the Gauss distribution to sample from.
147 |    * @param tid Optional parameter. ID of the thread to use. If not given, the method will obtain the thread ID itself.
148 |    * @return double Random double value.
149 |    */
150 | double dgauss(double mean, double stdDev, int tid = -1);
151 | 


--------------------------------------------------------------------------------
/code/train_obj.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include <fstream>
 31 | 
 32 | #include "properties.h"
 33 | #include "thread_rand.h"
 34 | #include "util.h"
 35 | #include "stop_watch.h"
 36 | #include "dataset.h"
 37 | 
 38 | #include "lua_calls.h"
 39 | #include "cnn.h"
 40 | 
 41 | /**
 42 |  * @brief Transforms a coordinate image to a floating point CNN output format.
 43 |  *
 44 |  * The image will be subsampled according to the CNN output dimensions.
 45 |  *
 46 |  * @param obj Coordinate image.
 47 |  * @param sampling Subsampling information of CNN output wrt to RGB input.
 48 |  * @return Coordinate image in CNN output format.
 49 |  */
 50 | cv::Mat_<cv::Vec3f> getObjMap(const jp::img_coord_t& obj, const cv::Mat_<cv::Point2i>& sampling)
 51 | {
 52 |     cv::Mat_<cv::Vec3f> objMap(sampling.size());
 53 | 
 54 |     for(unsigned x = 0; x < sampling.cols; x++)
 55 |     for(unsigned y = 0; y < sampling.rows; y++)
 56 |     {
 57 |         objMap(y, x) = obj(sampling(y, x).y, sampling(y, x).x);
 58 |     }
 59 | 
 60 |     return objMap;
 61 | }
 62 | 
 63 | /**
 64 |  * @brief Calls the torch training function (forward + backward pass)
 65 |  * @param img Input RGB image.
 66 |  * @param labels Target output coordinates.
 67 |  * @param state LUA state.
 68 |  * @return Loss of current iteration.
 69 |  */
 70 | double train(const cv::Mat_<cv::Vec3f>& img, const cv::Mat_<cv::Vec3f>& labels, lua_State* state)
 71 | {
 72 |     std::vector<cv::Mat_<cv::Vec3f>> imgV;
 73 |     imgV.push_back(img);
 74 | 
 75 |     std::vector<cv::Mat_<cv::Vec3f>> labelV;
 76 |     labelV.push_back(labels);
 77 | 
 78 |     lua_getglobal(state, "train");
 79 |     pushMaps(imgV, state);
 80 |     pushMaps(labelV, state);
 81 |     lua_pcall(state, 2, 1, 0);
 82 |     
 83 |     double loss = lua_tonumber(state, -1);
 84 |     lua_pop(state, 1);
 85 |     
 86 |     return loss;
 87 | }
 88 | 
 89 | int main(int argc, const char* argv[])
 90 | {
 91 |     int trainingLimit = 300000; // total number of updates
 92 |     
 93 |     // read parameters
 94 |     GlobalProperties* gp = GlobalProperties::getInstance();
 95 |     gp->parseConfig();
 96 |     gp->parseCmdLine(argc, argv);
 97 |    
 98 |     std::string baseScriptRGB = gp->tP.objScript;
 99 | 
100 |     std::cout << std::endl << BLUETEXT("Loading training set ...") << std::endl;
101 |     jp::Dataset trainDataset = jp::Dataset("./training/");
102 | 
103 |     std::cout << "Found " << trainDataset.size() << " training images." << std::endl;
104 | 
105 |     // lua and model setup
106 |     lua_State* state = luaL_newstate();
107 |     luaL_openlibs(state);
108 | 
109 |     execute(baseScriptRGB.c_str(), state);
110 |     constructModel(gp->getCNNInputDimX(), gp->getCNNInputDimY(), gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), state);
111 |     setTraining(state);
112 |         
113 |     std::cout << GREENTEXT("Training CNN.") << std::endl;
114 |     
115 |     std::ofstream trainFile;
116 |     trainFile.open("training_loss_"+baseScriptRGB+"_"+gp->tP.sessionString+".txt"); // contains the training loss per iteration
117 | 
118 |     cv::Mat_<cv::Point2i> sampling;
119 |     StopWatch stopW;
120 | 
121 |     for(unsigned r = 0; r < trainingLimit; r++)
122 |     {
123 |         std::cout << BLUETEXT("Starting training round " << r) << std::endl;
124 |         int imgIdx = irand(0, trainDataset.size());
125 | 
126 |         // load training image
127 |         jp::img_bgr_t img;
128 |         trainDataset.getBGR(imgIdx, img);
129 | 
130 |         jp::img_coord_t obj;
131 |         trainDataset.getObj(imgIdx, obj);
132 | 
133 |         // convert to CNN format
134 |         cv::Mat_<cv::Vec3f> imgMap = getImgMap(img, sampling, true);
135 |         cv::Mat_<cv::Vec3f> objMap = getObjMap(obj, sampling);
136 | 
137 |         // pass to LUA for forward + backward pass
138 |         float loss = train(imgMap, objMap, state);
139 | 
140 |         trainFile << r << " " << loss << std::endl;
141 |         std::cout << YELLOWTEXT("Training loss: " << loss << " in " << stopW.stop() << "ms.")  << std::endl;
142 |     }
143 |     
144 |     trainFile.close();
145 | 
146 |     lua_close(state);
147 |     return 0;
148 | }
149 | 


--------------------------------------------------------------------------------
/code/train_ransac.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include <iostream>
 31 | #include <fstream>
 32 | 
 33 | #include "properties.h"
 34 | #include "thread_rand.h"
 35 | #include "util.h"
 36 | #include "stop_watch.h"
 37 | #include "dataset.h"
 38 | 
 39 | #include "lua_calls.h"
 40 | #include "cnn.h"
 41 | 
 42 | int main(int argc, const char* argv[])
 43 | {
 44 |     // read parameters
 45 |     GlobalProperties* gp = GlobalProperties::getInstance();
 46 |     gp->parseConfig();
 47 |     gp->parseCmdLine(argc, argv);
 48 | 
 49 |     int trainingRounds = 50000;
 50 |     
 51 |     int refSteps = gp->tP.ransacRefinementIterations;
 52 |     int objHyps = gp->tP.ransacIterations;
 53 |     int inlierThreshold2D = gp->tP.ransacInlierThreshold;
 54 |   
 55 |     std::string baseScriptRGB = gp->tP.objScript;
 56 |     std::string baseScriptObj = gp->tP.scoreScript;
 57 |     std::string modelFileRGB = gp->tP.objModel;
 58 |    
 59 |     std::cout << std::endl << BLUETEXT("Loading training set ...") << std::endl;
 60 |     jp::Dataset trainingDataset = jp::Dataset("./training/");
 61 |         
 62 |     // lua and models
 63 |     std::cout << "Loading script: " << baseScriptObj << std::endl;    
 64 |     lua_State* stateObj = luaL_newstate();
 65 |     luaL_openlibs(stateObj);
 66 |     execute(baseScriptObj.c_str(), stateObj);
 67 |     loadScore(inlierThreshold2D, gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), stateObj);
 68 |     setTraining(stateObj);
 69 |     
 70 |     std::cout << "Loading script: " << baseScriptRGB << std::endl;    
 71 |     lua_State* stateRGB = luaL_newstate();
 72 |     luaL_openlibs(stateRGB);
 73 |     execute(baseScriptRGB.c_str(), stateRGB);    
 74 |     loadModel(modelFileRGB, gp->getCNNInputDimX(), gp->getCNNInputDimY(), gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), stateRGB);
 75 |     setTraining(stateRGB);
 76 |        
 77 |     cv::Mat camMat = gp->getCamMat();
 78 |     
 79 |     std::ofstream trainFile;
 80 |     trainFile.open("ransac_training_loss"+gp->tP.objScript+"_"+gp->tP.sessionString+".txt"); // contains training information per iteration
 81 | 
 82 |     for(unsigned round = 0; round <= trainingRounds; round++)
 83 |     {
 84 |         std::cout << YELLOWTEXT("Round " << round << " of " << trainingRounds << ".") << std::endl;
 85 | 
 86 |         int imgID = irand(0, trainingDataset.size());
 87 | 
 88 |         // load training image
 89 |         jp::img_bgr_t imgBGR;
 90 |         trainingDataset.getBGR(imgID, imgBGR);
 91 | 
 92 |         jp::cv_trans_t hypGT;
 93 |         trainingDataset.getPose(imgID, hypGT);
 94 | 
 95 |         std::cout << BLUETEXT("Predicting object coordinates.") << std::endl;
 96 | 
 97 |         // forward pass
 98 |         cv::Mat_<cv::Point2i> sampling;
 99 |         std::vector<cv::Mat_<cv::Vec3f>> imgMaps;
100 |         jp::img_coord_t estObj = getCoordImg(imgBGR, sampling, imgMaps, true, stateRGB);
101 | 
102 |         cv::Mat_<double> dLoss_dObj = cv::Mat_<double>::zeros(sampling.rows * sampling.cols, 3); // acumulate hypotheses gradients for patches
103 | 
104 |         StopWatch stopW;
105 |         StopWatch globalStopW;
106 | 
107 |         double expectedLoss;
108 |         double sfEntropy;
109 |         bool correct;
110 | 
111 |         std::vector<jp::cv_trans_t> refHyps;
112 |         std::vector<double> sfScores;
113 |         std::vector<std::vector<cv::Point2i>> sampledPoints;
114 |         std::vector<double> losses;
115 |         std::vector<cv::Mat_<int>> inlierMaps;
116 |         double tErr;
117 |         double rotErr;
118 |         int hypIdx;
119 | 
120 |         processImage(
121 |             hypGT,
122 |             stateObj,
123 |             objHyps,
124 |             camMat,
125 |             inlierThreshold2D,
126 |             refSteps,
127 |             expectedLoss,
128 |             sfEntropy,
129 |             correct,
130 |             refHyps,
131 |             sfScores,
132 |             estObj,
133 |             sampling,
134 |             sampledPoints,
135 |             losses,
136 |             inlierMaps,
137 |             tErr,
138 |             rotErr,
139 |             hypIdx);
140 | 
141 |         // === doing the backward pass ====================================================================
142 | 
143 |         // --- path I, hypothesis path --------------------------------------------------------------------
144 |         std::cout << BLUETEXT("Calculating gradients wrt hypotheses.") << std::endl;
145 | 
146 |         // precalculate gradients per of hypotheis wrt object coordinates
147 |         std::vector<cv::Mat_<double>> dHyp_dObjs(refHyps.size());
148 | 
149 |         #pragma omp parallel for
150 |         for(unsigned h = 0; h < refHyps.size(); h++)
151 |         {
152 |             // differentiate refinement around optimum found in last optimization iteration
153 |             dHyp_dObjs[h] = cv::Mat_<double>::zeros(6, sampling.rows * sampling.cols * 3);
154 | 
155 |             if(sfScores[h] < EPS) continue; // skip hypothesis with no impact on expectation
156 | 
157 |             // collect inlier correspondences of last refinemen iteration
158 |             std::vector<cv::Point2f> imgPts;
159 |             std::vector<cv::Point2i> srcPts;
160 |             std::vector<cv::Point3f> objPts;
161 | 
162 |             for(unsigned x = 0; x < inlierMaps[h].cols; x++)
163 |             for(unsigned y = 0; y < inlierMaps[h].rows; y++)
164 |             {
165 |                 if(inlierMaps[h](y, x))
166 |                 {
167 |                     imgPts.push_back(sampling(y, x));
168 |                     srcPts.push_back(cv::Point2i(x, y));
169 |                     objPts.push_back(cv::Point3f(estObj(y, x)));
170 |                 }
171 |             }
172 | 
173 |             if(imgPts.empty())
174 |                 continue;
175 | 
176 |             // calculate reprojection errors
177 |             std::vector<cv::Point2f> projections;
178 |             cv::Mat_<double> projectionsJ;
179 |             cv::projectPoints(objPts, refHyps[h].first, refHyps[h].second, camMat, cv::Mat(), projections, projectionsJ);
180 | 
181 |             projectionsJ = projectionsJ.colRange(0, 6);
182 | 
183 |             //assemble the jacobean of the refinement residuals
184 |             cv::Mat_<double> jacobeanR = cv::Mat_<double> ::zeros(objPts.size(), 6);
185 |             cv::Mat_<double> dNdP(1, 2);
186 |             cv::Mat_<double> dNdH(1, 6);
187 | 
188 |             for(int ptIdx = 0; ptIdx < objPts.size(); ptIdx++)
189 |             {
190 |                 double err = std::max(cv::norm(projections[ptIdx] - imgPts[ptIdx]), EPS);
191 |                 if(err > CNN_OBJ_MAXINPUT)
192 |                     continue;
193 | 
194 |                 // derivative of norm
195 |                 dNdP(0, 0) = 1 / err * (projections[ptIdx].x - imgPts[ptIdx].x);
196 |                 dNdP(0, 1) = 1 / err * (projections[ptIdx].y - imgPts[ptIdx].y);
197 | 
198 |                 dNdH = dNdP * projectionsJ.rowRange(2 * ptIdx, 2 * ptIdx + 2);
199 |                 dNdH.copyTo(jacobeanR.row(ptIdx));
200 |             }
201 | 
202 |             //calculate the pseudo inverse
203 |             jacobeanR = - (jacobeanR.t() * jacobeanR).inv() * jacobeanR.t();
204 | 
205 |             for(int ptIdx = 0; ptIdx < objPts.size(); ptIdx++)
206 |             {
207 |                 cv::Mat_<double> dNdO = dProjectdObj(imgPts[ptIdx], objPts[ptIdx], refHyps[h], camMat);
208 |                 dNdO = jacobeanR.col(ptIdx) * dNdO;
209 | 
210 |                 int dIdx = srcPts[ptIdx].y * sampling.cols * 3 + srcPts[ptIdx].x * 3;
211 |                 dNdO.copyTo(dHyp_dObjs[h].colRange(dIdx, dIdx + 3));
212 |             }
213 |         }
214 | 
215 |         // combine gradients per hypothesis
216 |         std::vector<cv::Mat_<double>> gradients(refHyps.size());
217 | 
218 |         #pragma omp parallel for
219 |         for(unsigned h = 0; h < refHyps.size(); h++)
220 |         {
221 |             cv::Mat_<double> dLoss_dHyp = dLossMax(refHyps[h], hypGT);
222 |             gradients[h] = dLoss_dHyp * dHyp_dObjs[h];
223 |         }
224 | 
225 |         for(unsigned h = 0; h < refHyps.size(); h++)
226 |         for(unsigned idx = 0; idx < sampling.rows * sampling.cols; idx++)
227 |         {
228 |             dLoss_dObj(idx, 0) += sfScores[h] * gradients[h](idx * 3 + 0);
229 |             dLoss_dObj(idx, 1) += sfScores[h] * gradients[h](idx * 3 + 1);
230 |             dLoss_dObj(idx, 2) += sfScores[h] * gradients[h](idx * 3 + 2);
231 |         }
232 | 
233 |         std::cout << BLUETEXT("Coord statistics:") << std::endl;
234 |         std::cout << "Max gradient: " << getMax(dLoss_dObj) << std::endl;
235 |         std::cout << "Avg gradient: " << getAvg(dLoss_dObj) << std::endl;
236 |         std::cout << "Med gradient: " << getMed(dLoss_dObj) << std::endl << std::endl;
237 | 
238 |         std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
239 | 
240 |         // --- path II, score path --------------------------------------------------------------------
241 |         std::cout << BLUETEXT("Calculating gradients wrt scores.") << std::endl;
242 | 
243 |         std::vector<cv::Mat_<double>> dLoss_dScore_dObjs = dSMScore(estObj, sampling, sampledPoints, losses, sfScores, stateObj);
244 | 
245 |         // accumulate score gradients
246 |         cv::Mat_<double> dLoss_dScore_dObj = cv::Mat_<double>::zeros(sampling.rows * sampling.cols, 3);
247 | 
248 |         for(unsigned h = 0; h < refHyps.size(); h++)
249 |         {
250 |             dLoss_dScore_dObj += dLoss_dScore_dObjs[h];
251 |         }
252 | 
253 |         std::cout << BLUETEXT("Score statistics:") << std::endl;
254 |         std::cout << "Max gradient: " << getMax(dLoss_dScore_dObj) << std::endl;
255 |         std::cout << "Avg gradient: " << getAvg(dLoss_dScore_dObj) << std::endl;
256 |         std::cout << "Med gradient: " << getMed(dLoss_dScore_dObj) << std::endl << std::endl;
257 | 
258 |         dLoss_dObj += dLoss_dScore_dObj;
259 | 
260 |         std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
261 | 
262 |         std::cout << "Time of RANSAC Iteration: " << globalStopW.stop() / 1000 << "s." << std::endl;
263 | 
264 |         std::cout << BLUETEXT("Update object coordinate CNN.") << std::endl;
265 | 
266 |         // learning hyperparameters were tuned for object coordinates in mm
267 |         // we rescale gradients here instead of scaling hyperparameters (learning rate, clamping etc)
268 |         dLoss_dObj /= 1000.0;
269 | 
270 |         std::cout << BLUETEXT("Combined statistics:") << std::endl;
271 |         int zeroGrads = 0;
272 |         for(int row = 0; row < dLoss_dObj.rows; row++)
273 |         {
274 |             if(cv::norm(dLoss_dObj.row(row)) < EPS)
275 |             zeroGrads++;
276 |         }
277 | 
278 |         std::cout << "Max gradient: " << getMax(dLoss_dObj) << std::endl;
279 |         std::cout << "Avg gradient: " << getAvg(dLoss_dObj) << std::endl;
280 |         std::cout << "Med gradient: " << getMed(dLoss_dObj) << std::endl;
281 |         std::cout << "Zero gradients: " << zeroGrads << std::endl;
282 | 
283 |         // backward pass
284 |         backward(expectedLoss, imgMaps, dLoss_dObj, stateRGB);
285 | 
286 |         std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
287 |         globalStopW.stop();
288 | 
289 |         trainFile
290 |             << round << " "              // 0 - iteration number
291 |             << expectedLoss << " "       // 1 - expected loss
292 |             << sfEntropy                 // 2 - entropy of score distribution
293 |             << std::endl;
294 | 
295 |         std::cout << std::endl;
296 |     }
297 |     
298 |     trainFile.close();
299 |     
300 |     lua_close(stateRGB);
301 |     lua_close(stateObj);
302 |     
303 |     return 0;    
304 | }
305 | 


--------------------------------------------------------------------------------
/code/train_repro.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include <iostream>
 31 | #include <fstream>
 32 | 
 33 | #include "properties.h"
 34 | #include "thread_rand.h"
 35 | #include "util.h"
 36 | #include "stop_watch.h"
 37 | #include "dataset.h"
 38 | 
 39 | #include "lua_calls.h"
 40 | #include "cnn.h"
 41 | 
 42 | int main(int argc, const char* argv[])
 43 | {
 44 |     // read parameters
 45 |     GlobalProperties* gp = GlobalProperties::getInstance();
 46 |     gp->parseConfig();
 47 |     gp->parseCmdLine(argc, argv);
 48 | 
 49 |     int trainingRounds = 300000; // total number of updates
 50 | 
 51 |     std::string baseScriptRGB = gp->tP.objScript;
 52 |     std::string modelFileRGB = gp->tP.objModel; // initialized CNN
 53 | 
 54 |     std::cout << std::endl << BLUETEXT("Loading training set ...") << std::endl;
 55 |     jp::Dataset trainingDataset = jp::Dataset("./training/");
 56 | 
 57 |     // setup LUA and load model
 58 |     std::cout << "Loading script: " << baseScriptRGB << std::endl;
 59 |     lua_State* stateRGB = luaL_newstate();
 60 |     luaL_openlibs(stateRGB);
 61 |     execute(baseScriptRGB.c_str(), stateRGB);
 62 |     loadModel(modelFileRGB, gp->getCNNInputDimX(), gp->getCNNInputDimY(), gp->getCNNOutputDimX(), gp->getCNNOutputDimY(), stateRGB);
 63 |     setTraining(stateRGB);
 64 | 
 65 |     cv::Mat camMat = gp->getCamMat();
 66 | 
 67 |     std::ofstream trainFile;
 68 |     trainFile.open("repro_training_loss"+gp->tP.objScript+"_"+gp->tP.sessionString+".txt"); // contains training loss per iteration
 69 | 
 70 |     for(unsigned round = 0; round <= trainingRounds; round++)
 71 |     {
 72 |         std::cout << YELLOWTEXT("Round " << round << " of " << trainingRounds << ".") << std::endl;
 73 | 
 74 |         int imgID = irand(0, trainingDataset.size());
 75 | 
 76 |         // load training image
 77 |         jp::img_bgr_t imgBGR;
 78 |         trainingDataset.getBGR(imgID, imgBGR);
 79 | 
 80 |         jp::cv_trans_t hypGT;
 81 |         trainingDataset.getPose(imgID, hypGT);
 82 | 
 83 |         // forward pass
 84 |         std::cout << BLUETEXT("Predicting object coordinates.") << std::endl;
 85 | 
 86 |         cv::Mat_<cv::Point2i> sampling;
 87 |         std::vector<cv::Mat_<cv::Vec3f>> imgMaps;
 88 |         jp::img_coord_t estObj = getCoordImg(imgBGR, sampling, imgMaps, true, stateRGB);
 89 | 
 90 |         std::cout << BLUETEXT("Calculating gradients wrt projection error.") << std::endl;
 91 |         StopWatch stopW;
 92 | 
 93 |         cv::Mat_<double> dLoss_dObj = cv::Mat_<double>::zeros(sampling.rows * sampling.cols, 3); // acumulate hypotheses gradients for patches
 94 |         double loss = 0;
 95 | 
 96 |         #pragma omp parallel for
 97 |         for(unsigned x = 0; x < sampling.cols; x++)
 98 |         for(unsigned y = 0; y < sampling.rows; y++)
 99 |         {
100 |             cv::Point2f imgPt = sampling(y, x);
101 |             cv::Point3f objPt(estObj(y, x));
102 | 
103 |             loss += project(imgPt, objPt, hypGT, camMat);
104 |             cv::Mat_<double> dNdO = dProjectdObj(imgPt, objPt, hypGT, camMat);
105 | 
106 |             int dIdx = y * sampling.cols + x;
107 |             dNdO.copyTo(dLoss_dObj.row(dIdx));
108 |         }
109 | 
110 |         loss /= sampling.cols * sampling.rows;
111 | 
112 |         std::cout << GREENTEXT("Projection Loss: " << loss) << std::endl;
113 | 
114 |         // learning hyperparameters were tuned for object coordinates in mm
115 |         // we rescale gradients here instead of scaling hyperparameters (learning rate, clamping etc)
116 |         dLoss_dObj /= 1000.0;
117 | 
118 |         std::cout << BLUETEXT("Gradient statistics:") << std::endl;
119 |         std::cout << "Max gradient: " << getMax(dLoss_dObj) << std::endl;
120 |         std::cout << "Avg gradient: " << getAvg(dLoss_dObj) << std::endl;
121 |         std::cout << "Med gradient: " << getMed(dLoss_dObj) << std::endl << std::endl;
122 | 
123 |         std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
124 | 
125 |         // backward pass
126 |         backward(loss, imgMaps, dLoss_dObj, stateRGB);
127 | 
128 |         std::cout << "Done in " << stopW.stop() / 1000 << "s." << std::endl;
129 |         stopW.stop();
130 | 
131 |         trainFile << round << " " << loss << std::endl;
132 |         std::cout << std::endl;
133 |     }
134 | 
135 |     trainFile.close();
136 |     lua_close(stateRGB);
137 | 
138 |     return 0;
139 | }
140 | 


--------------------------------------------------------------------------------
/code/types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016, TU Dresden
 3 | Copyright (c) 2017, Heidelberg University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | 
30 | #pragma once
31 | 
32 | #include "opencv2/opencv.hpp"
33 | 
34 | #define EPS 0.00000001
35 | #define PI 3.1415926
36 | 
37 | /** Several important types used troughout all this code. If types have to be changed, it can be done here, conveniently. */
38 | 
39 | namespace jp
40 | {
41 |     // object coordinates 
42 |     typedef double coord1_t; // one dimension
43 |     typedef cv::Vec<coord1_t, 3> coord3_t; // three dimensions
44 | 
45 |     // rgb-d
46 |     typedef cv::Vec<uchar, 3> bgr_t;
47 |     typedef double depth_t;
48 | 
49 |     // image types
50 |     typedef cv::Mat_<coord3_t> img_coord_t;
51 |     typedef cv::Mat_<bgr_t> img_bgr_t;
52 |     typedef cv::Mat_<depth_t> img_depth_t;
53 |        
54 |     struct img_bgrd_t
55 |     {
56 | 	img_bgr_t bgr;
57 | 	img_depth_t depth;
58 |     };
59 | 
60 |     // pose type (OpenCV convention)
61 |     typedef std::pair<cv::Mat, cv::Mat> cv_trans_t;
62 | }
63 | 


--------------------------------------------------------------------------------
/code/util.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #include "util.h"
 31 | 
 32 | #include <iterator>
 33 | #include <sstream>
 34 | #include <iostream>
 35 | 
 36 | #include <algorithm>
 37 | #include <dirent.h>
 38 | 
 39 | std::vector<std::string> split(const std::string& s, char delim) 
 40 | {
 41 |     std::vector<std::string> elems;
 42 |     std::stringstream ss(s);
 43 |     std::string item;
 44 |     
 45 |     while (std::getline(ss, item, delim)) elems.push_back(item);
 46 |     
 47 |     return elems;
 48 | }
 49 | 
 50 | std::vector<std::string> split(const std::string& s) 
 51 | {
 52 |     std::istringstream iss(s);
 53 |     std::vector<std::string> elems;
 54 | 
 55 |     std::copy(
 56 | 	std::istream_iterator<std::string>(iss),
 57 | 	std::istream_iterator<std::string>(),
 58 | 	std::back_inserter<std::vector<std::string>>(elems));
 59 | 
 60 |     return elems;
 61 | }
 62 | 
 63 | std::pair<std::string, std::string> splitOffDigits(std::string s)
 64 | {
 65 |     int splitIndex = -1;
 66 |     for(int i = 0; i < (int)s.length(); i++)
 67 |     {
 68 | 	char c = s[i];
 69 | 	if(('0' <= c && c <= '9') || (c == '.'))
 70 | 	{
 71 | 	    splitIndex = i;
 72 | 	    break;
 73 | 	}
 74 |     }
 75 |     
 76 |     if(splitIndex == -1)
 77 | 	return std::pair<std::string,std::string>(s, "");
 78 |     else
 79 | 	return std::pair<std::string,std::string>(s.substr(0,splitIndex), s.substr(splitIndex, s.length() - splitIndex));
 80 | }
 81 | 
 82 | bool endsWith(std::string str, std::string key)
 83 | {
 84 |     size_t keylen = key.length();
 85 |     size_t strlen = str.length();
 86 | 
 87 |     if(keylen <= strlen)
 88 | 	return str.substr(strlen - keylen, keylen) == key;
 89 |     else 
 90 | 	return false;
 91 | }
 92 | 
 93 | std::string intToString(int number, int minLength)
 94 | {
 95 |    std::stringstream ss; //create a stringstream
 96 |    ss << number; //add number to the stream
 97 |    std::string out = ss.str();
 98 |    while((int)out.length() < minLength) out = "0" + out;
 99 |    return out; //return a string with the contents of the stream
100 | }
101 | 
102 | std::string floatToString(float number)
103 | {
104 |    std::stringstream ss; //create a stringstream
105 |    ss << number; //add number to the stream
106 |    return ss.str(); //return a string with the contents of the stream
107 | }
108 | 
109 | int clamp(int val, int min_val, int max_val)
110 | {
111 |     return std::max(min_val, std::min(max_val, val));
112 | }
113 | 
114 | std::vector<std::string> getSubPaths(std::string path)
115 | {
116 |     std::vector<std::string> subPaths;  
117 |   
118 |     DIR *dir = opendir(path.c_str());
119 |     struct dirent *ent;
120 |     
121 |     if(dir != NULL) 
122 |     {
123 | 	while((ent = readdir(dir)) != NULL) 
124 | 	{
125 | 	    std::string entry = ent->d_name;
126 | 	    if(entry.find(".") == std::string::npos)
127 | 		subPaths.push_back(path + entry);
128 | 	}
129 | 	closedir(dir);
130 |     } 
131 |     else 
132 | 	std::cout << REDTEXT("Could not open directory: ") << path << std::endl;
133 | 
134 |     std::sort(subPaths.begin(), subPaths.end());
135 |     return subPaths;
136 | }
137 | 
138 | std::vector<std::string> getFiles(std::string path, std::string ext, bool silent)
139 | {
140 |     std::vector<std::string> files;  
141 |   
142 |     DIR *dir = opendir(path.c_str());
143 |     struct dirent *ent;
144 |     
145 |     if(dir != NULL) 
146 |     {
147 | 	while((ent = readdir(dir)) != NULL) 
148 | 	{
149 | 	    std::string entry = ent->d_name;
150 | 	    if(endsWith(entry, ext))
151 | 		files.push_back(path + entry);
152 | 	}
153 | 	closedir(dir);
154 |     } 
155 |     else 
156 | 	if(!silent) std::cout << REDTEXT("Could not open directory: ") << path << std::endl;
157 | 
158 |     std::sort(files.begin(), files.end());
159 |     return files;  
160 | }
161 | 
162 | cv::Mat convertForDisplay(const jp::img_coord_t& img, float maxExtent)
163 | {
164 |     cv::Mat result(img.size(), CV_8UC3);
165 | 
166 |     for(int x = 0; x < img.cols; x++)
167 |     for(int y = 0; y < img.rows; y++)
168 |     for(int channel = 0; channel < 3; channel++)
169 |     {
170 |         int coord = (int) (img(y, x)(channel) + maxExtent / 2.f);
171 |         result.at<cv::Vec3b>(y, x)[channel] = (uchar) ((coord / maxExtent) * 255);
172 |     }
173 | 
174 |     cv::resize(result, result, cv::Size(640, 480), 0, 0, cv::INTER_NEAREST);
175 | 
176 |     return result;
177 | }
178 | 


--------------------------------------------------------------------------------
/code/util.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016, TU Dresden
  3 | Copyright (c) 2017, Heidelberg University
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are met:
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright
 11 |       notice, this list of conditions and the following disclaimer in the
 12 |       documentation and/or other materials provided with the distribution.
 13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
 14 |       names of its contributors may be used to endorse or promote products
 15 |       derived from this software without specific prior written permission.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | 
 30 | #pragma once
 31 | 
 32 | #include <string>
 33 | #include <vector>
 34 | #include "types.h"
 35 | 
 36 | /** General utility functions.*/
 37 | 
 38 | // makros for coloring console output
 39 | #define GREENTEXT(output) "\x1b[32;1m" << output << "\x1b[0m"
 40 | #define REDTEXT(output) "\x1b[31;1m" << output << "\x1b[0m"
 41 | #define BLUETEXT(output) "\x1b[34;1m" << output << "\x1b[0m"
 42 | #define YELLOWTEXT(output) "\x1b[33;1m" << output << "\x1b[0m"
 43 | 
 44 | /**
 45 |  * @brief Splits a string using the given delimiter character.
 46 |  *
 47 |  * @param s String to split.
 48 |  * @param delim Delimiter used to split the string into chunks. The delimiter will be removed.
 49 |  * @return std::vector< std::string, std::allocator< void > > List of string chunks.
 50 |  */
 51 | std::vector<std::string> split(const std::string& s, char delim);
 52 | 
 53 | /**
 54 |  * @brief Splits a string at spaces.
 55 |  *
 56 |  * @param s String to split.
 57 |  * @return std::vector< std::string, std::allocator< void > > List of string chunks.
 58 |  */
 59 | std::vector<std::string> split(const std::string& s);
 60 | 
 61 | /**
 62 |  * @brief Splits a given string in two parts. The split location is before the first number from the right.
 63 |  *
 64 |  * @param s String to split.
 65 |  * @return std::pair< std::string, std::string > Two parts of the string.
 66 |  */
 67 | std::pair<std::string, std::string> splitOffDigits(std::string s);
 68 | 
 69 | /**
 70 |  * @brief Checks whether a string is ending with the key.
 71 |  *
 72 |  * @param str String to check.
 73 |  * @param key Key to look for a the end of the string.
 74 |  * @return bool True if the string ends with the key.
 75 |  */
 76 | bool endsWith(std::string str, std::string key);
 77 | 
 78 | /**
 79 |  * @brief Converts a integer number to a string. The string can be filled with leading zeros.
 80 |  *
 81 |  * @param number Integer to convert.
 82 |  * @param minLength String is padded with leading zeros to achieve this minimal length. Defaults to 0.
 83 |  * @return std::string
 84 |  */
 85 | std::string intToString(int number, int minLength = 0);
 86 | 
 87 | /**
 88 | * @brief Converts a floating point number to a string.
 89 | *
 90 | * @param number Number to convert.
 91 | * @return std::string
 92 | */
 93 | std::string floatToString(float number);
 94 | 
 95 | /**
 96 |  * @brief Clamps a value at the given min and max values.
 97 |  *
 98 |  * @param val Value to clamp.
 99 |  * @param min_val Minimal allowed value.
100 |  * @param max_val Maximal allowed value.
101 |  * @return int Clamped value.
102 |  */
103 | int clamp(int val, int min_val, int max_val);
104 | 
105 | /**
106 |  * @brief Returns a list of directories contained under the given path. The directories are full paths, i.e. they contain the base path.
107 |  *
108 |  * @param basePath Path were the directories lie.
109 |  * @return std::vector< std::string, std::allocator< void > > List of directories (full paths).
110 |  */
111 | std::vector<std::string> getSubPaths(std::string basePath);
112 | 
113 | /**
114 |  * @brief Returns a list of files with a given extension contained under the given path. Files are returned including the full path.
115 |  *
116 |  * @param path Path were the files lie.
117 |  * @param ext Only files with this extension will be returned.
118 |  * @param silent The method will print a message in case the given path does not exist. This can be supressed by setting silent to true. Defaults to false.
119 |  * @return std::vector< std::string, std::allocator< void > > List of files (contain the full path).
120 |  */
121 | std::vector<std::string> getFiles(std::string path, std::string ext, bool silent = false);
122 | 
123 | /**
124 |  * @brief Maps an object coordinate image to RGB.
125 |  * @param img Input RGB image.
126 |  * @param maxExtent Maximum extent of the scene in meters.
127 |  * @return RGB visualization of object coordinates.
128 |  */
129 | cv::Mat convertForDisplay(const jp::img_coord_t& img, float maxExtent);
130 | 


--------------------------------------------------------------------------------
/code/write_data.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016, TU Dresden
 3 | Copyright (c) 2017, Heidelberg University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | 
30 | #include "read_data.h"
31 | #include "util.h"
32 | 
33 | #include <fstream>
34 | #include "png++/png.hpp"
35 | 
36 | namespace jp
37 | {
38 |     void writeData(const std::string dFile, jp::img_depth_t& image)
39 |     {
40 |         if(endsWith(dFile, "png"))
41 |         {
42 |             png::image<unsigned short> imgPng(image.cols, image.rows);
43 | 
44 |             for(int x = 0; x < imgPng.get_width(); x++)
45 |             for(int y = 0; y < imgPng.get_height(); y++)
46 |                 imgPng.set_pixel(x, y, image(y, x));
47 | 
48 |             imgPng.write(dFile);
49 |         }
50 |         else
51 |         {
52 |             std::cout << REDTEXT("ERROR: Unknown file format while writing depth files!") << std::endl;
53 |         }
54 |     }
55 | 
56 |     void writeData(const std::string bgrFile, jp::img_bgr_t& image)
57 |     {
58 |         cv::imwrite(bgrFile, image);
59 |     }
60 |   
61 |     void writeData(const std::string bgrFile, const std::string dFile, jp::img_bgrd_t& image)
62 |     {
63 |         writeData(bgrFile, image.bgr);
64 |         writeData(dFile, image.depth);
65 |     }
66 | 
67 |     void writeData(const std::string coordFile, jp::img_coord_t& image)
68 |     {
69 |         png::image<png::basic_rgb_pixel<unsigned short>> imgPng(image.cols, image.rows);
70 | 
71 |         for(int x = 0; x < imgPng.get_width(); x++)
72 |         for(int y = 0; y < imgPng.get_height(); y++)
73 |         {
74 |             png::basic_rgb_pixel<unsigned short> px;
75 |             px.red = (unsigned short) image(y, x)(0);
76 |             px.green = (unsigned short) image(y, x)(1);
77 |             px.blue = (unsigned short) image(y, x)(2);
78 | 
79 |             imgPng.set_pixel(x, y, px);
80 |         }
81 | 
82 |         imgPng.write(coordFile);
83 |     }
84 | 
85 | }
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/code/write_data.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016, TU Dresden
 3 | Copyright (c) 2017, Heidelberg University
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the TU Dresden, Heidelberg University nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL TU DRESDEN OR HEIDELBERG UNIVERSITY BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | 
30 | #pragma once
31 | 
32 | #include "types.h"
33 | 
34 | namespace jp
35 | {
36 |     /**
37 |     * @brief Write a depth image.
38 |     *
39 |     * Depth images are stored as 1 channel, 16 bit, unsigned short PNGs.
40 |     *
41 |     * @param dFile Name of the file to write including the path.
42 |     * @param image Output parameter. Depth image to write.
43 |     * @return void
44 |     */
45 |     void writeData(const std::string dFile, jp::img_depth_t& image);
46 | 
47 |     /**
48 |     * @brief Write a bgr image.
49 |     *
50 |     * RGB images are stored as 3 channel, 8 bit, unsigned char PNGs or JPGs. Channels are swapped from BGR.
51 |     *
52 |     * @param bgrFile Name of the file to write including the path.
53 |     * @param image Output parameter. BGR image to write.
54 |     * @return void
55 |     */
56 |     void writeData(const std::string bgrFile, jp::img_bgr_t& image);
57 | 
58 |     /**
59 |     * @brief Writes an image with BGR channels and a depth channel.
60 |     *
61 |     * BGR and depth are written to separate files. See documentation of the respective writeData methods.
62 |     *
63 |     * @param bgrFile Name of the file to write for the BGR image including the path.
64 |     * @param dFile Name of the file to write for the depth image including the path.
65 |     * @param image Output parameter. RGBD image to write.
66 |     * @return void
67 |     */
68 |     void writeData(const std::string bgrFile, const std::string dFile, jp::img_bgrd_t& image);
69 | 
70 |     /**
71 |     * @brief Write an object coordinate image.
72 |     *
73 |     * Coordinate images are stored as 3 channel, 16 bit, unsigned short PNGs.
74 |     *
75 |     * @param coordFile Name of the file to write including the path.
76 |     * @param image Output parameter. Coordinate image to write.
77 |     * @return void
78 |     */
79 |     void writeData(const std::string coordFile, jp::img_coord_t& image);
80 | }
81 | 


--------------------------------------------------------------------------------