├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE.txt
├── README.md
├── datasets
    ├── .gitignore
    ├── download_testdata.sh
    ├── download_traindata.sh
    ├── generate_sun3d_train_datasets.py
    ├── sun3d_train_sequences.txt
    ├── testdata.md5
    └── traindata
    │   └── traindata.md5
├── examples
    ├── .gitignore
    ├── create_dataset_and_use_readerop.py
    ├── evaluation.py
    ├── example.py
    ├── example_v2.py
    ├── sculpture1.png
    ├── sculpture2.png
    ├── sculpture_Rt1.txt
    ├── sculpture_Rt2.txt
    ├── sculpture_depth1.npy
    └── sculpture_depth2.npy
├── multivih5datareaderop
    ├── CMakeLists.txt
    ├── README.md
    ├── lz4_cmakelists.txt
    ├── multivih5datareader.cpp
    ├── multivih5datareader.h
    ├── multivih5datareaderop.cc
    ├── simpleh5file.cpp
    └── simpleh5file.h
├── python
    └── depthmotionnet
    │   ├── __init__.py
    │   ├── blocks_original.py
    │   ├── datareader
    │       ├── __init__.py
    │       └── helpers.py
    │   ├── dataset_tools
    │       ├── __init__.py
    │       ├── helpers.py
    │       ├── lz4.py
    │       ├── sun3d_utils.py
    │       ├── view.py
    │       ├── view_io.py
    │       ├── view_tools.py
    │       ├── view_tools_cython.pyx
    │       └── webp.py
    │   ├── evaluation
    │       ├── __init__.py
    │       ├── evaluate_to_xarray.py
    │       ├── helpers.py
    │       └── metrics.py
    │   ├── helpers.py
    │   ├── networks_original.py
    │   ├── v2
    │       ├── __init__.py
    │       ├── blocks.py
    │       ├── helpers.py
    │       ├── losses.py
    │       └── networks.py
    │   ├── vis.py
    │   └── vis_cython.pyx
├── teaser.png
├── training
    ├── README.md
    ├── network_training_evolutions.gif
    └── v2
    │   └── training.py
└── weights
    ├── .gitignore
    └── download_weights.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.swp
3 | build
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lmbspecialops"]
2 | 	path = lmbspecialops
3 | 	url = https://github.com/lmb-freiburg/lmbspecialops.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required( VERSION 3.5.1 )
 2 | 
 3 | project(depthmotionnet)
 4 | 
 5 | if( NOT CMAKE_BUILD_TYPE )
 6 |         set( CMAKE_BUILD_TYPE "Release" CACHE STRING "Build configuration 'Release' or 'Debug'." FORCE )
 7 | endif()
 8 | 
 9 | # enable all warnings
10 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
11 | 
12 | add_subdirectory( multivih5datareaderop ) 
13 | 
14 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
 2 | 
 3 | RUN apt update && apt install -y python3-pip    \
 4 |                                  libhdf5-dev    \
 5 |                                  libopencv-dev  \
 6 |                                  python3-tk     \
 7 |                                  cmake          \
 8 |                                  gcc-4.8        \
 9 |                                  g++-4.8        \
10 |                                  cmake          \
11 |                                  x11-apps
12 | 
13 | RUN python3 -m pip install numpy==1.12.1            \
14 |                            tensorflow-gpu===1.4.0   \
15 |                            pillow==2.0.0            \
16 |                            pyparsing===2.1.4        \
17 |                            cycler===0.10.0          \
18 |                            matplotlib===2.1.2
19 | ADD . /home/demon
20 | RUN mkdir /home/demon/lmbspecialops/build
21 | WORKDIR /home/demon/lmbspecialops/build
22 | ENV CC=/usr/bin/gcc-4.8
23 | ENV CXX=/usr/bin/g++-4.8
24 | RUN cmake -DCMAKE_BUILD_TYPE=Release ..
25 | RUN make
26 | ENV PYTHONPATH=/home/demon/lmbspecialops/python
27 | ENV LMBSPECIALOPS_LIB=/home/demon/lmbspecialops/build/lib/lmbspecialops.so
28 | WORKDIR /home/demon/examples
29 | CMD ["python3", "example.py"]
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeMoN: Depth and Motion Network
  2 | 
  3 | [![License](https://img.shields.io/badge/license-GPLv3-blue.svg)](LICENSE)
  4 | 
  5 | DeMoN is "A computer algorithm for reconstructing a scene from two projections"<sup>1</sup>.
  6 | The network estimates the depth and relative camera motion for pairs of images; it addresses the important two view case in structure from motion.
  7 | 
  8 | ![Teaser](teaser.png)
  9 | 
 10 | If you use this code for research please cite:
 11 |    
 12 |     @InProceedings{UZUMIDB17,
 13 |       author       = "B. Ummenhofer and H. Zhou and J. Uhrig and N. Mayer and E. Ilg and A. Dosovitskiy and T. Brox",
 14 |       title        = "DeMoN: Depth and Motion Network for Learning Monocular Stereo",
 15 |       booktitle    = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
 16 |       month        = " ",
 17 |       year         = "2017",
 18 |       url          = "http://lmb.informatik.uni-freiburg.de//Publications/2017/UZUMIDB17"
 19 |     }
 20 | 
 21 | See the [project website](https://lmb.informatik.uni-freiburg.de/people/ummenhof/depthmotionnet) for the paper and other material.
 22 | 
 23 | <sup>1 This is the title of H. C. Longuet-Higgins paper from 1981, which perfectly describes what our method does. DeMoN shows that complex geometric relations can be learnt by a ConvNet.</sup>
 24 | 
 25 | ## Requirements
 26 | 
 27 | Building and using requires the following libraries and programs
 28 | 
 29 |     tensorflow 1.4.0
 30 |     cmake 3.7.1
 31 |     python 3.5
 32 |     cuda 8.0.61 (required for gpu support)
 33 |     VTK 7.1 with python3 interface (required for visualizing point clouds)
 34 | 
 35 | The versions match the configuration we have tested on an ubuntu 16.04 system.
 36 | DeMoN can work with other versions of the aforementioned dependencies, e.g. tensorflow 1.3, but this is not well tested.
 37 | 
 38 | The binary package from [vtk.org](http://www.vtk.org) does not come with a python3 interface.
 39 | To enable python3 support VTK needs to be built from source.
 40 | Alternatively, there are also VTK packages with python3 support available in Anaconda via the conda package manager.
 41 | 
 42 | The network also depends on our [lmbspecialops](https://github.com/lmb-freiburg/lmbspecialops) library which is included as a submodule.
 43 | 
 44 | 
 45 | 
 46 | ## Build instructions
 47 | 
 48 | The following describes how to install tensorflow and demon into a new virtualenv and run the inference example.
 49 | We will use ```pew``` (```pip3 install pew```) to manage a new virtualenv named ```demon_venv``` in the following:
 50 | 
 51 | ```bash
 52 | # create virtualenv
 53 | pew new demon_venv
 54 | ```
 55 | 
 56 | The following commands all run inside the virtualenv:
 57 | 
 58 | ```bash
 59 | # install python module dependencies
 60 | pip3 install tensorflow-gpu # or 'tensorflow' without gpu support
 61 | pip3 install pillow # for reading images
 62 | pip3 install matplotlib # required for visualizing depth maps
 63 | pip3 install Cython # required for visualizing point clouds
 64 | ```
 65 | 
 66 | ```bash
 67 | # clone repo with submodules
 68 | git clone --recursive https://github.com/lmb-freiburg/demon.git
 69 | 
 70 | # build lmbspecialops
 71 | DEMON_DIR=$PWD/demon
 72 | mkdir $DEMON_DIR/lmbspecialops/build
 73 | cd $DEMON_DIR/lmbspecialops/build
 74 | cmake .. # add '-DBUILD_WITH_CUDA=OFF' to build without gpu support
 75 | # (optional) run 'ccmake .' here to adjust settings for gpu code generation
 76 | make
 77 | pew add $DEMON_DIR/lmbspecialops/python # add to python path
 78 | 
 79 | # download weights
 80 | cd $DEMON_DIR/weights
 81 | ./download_weights.sh
 82 | 
 83 | # run example
 84 | cd $DEMON_DIR/examples
 85 | python3 example.py # opens a window with the depth map (and the point cloud if vtk is available)
 86 | ```
 87 | 
 88 | ## Data reader op & evaluation
 89 | 
 90 | The data reader op and the evaluation code have additional dependencies.
 91 | The code for the data reader is in the ```multivih5datareaderop``` directory. 
 92 | See the corresponding [readme](multivih5datareaderop/README.md) for more details.
 93 | 
 94 | For the evaluation see the example [```examples/evaluation.py```](examples/evaluation.py).
 95 | The evaluation code requires the following additional python3 packages, which can be installed with ```pip```:
 96 | 
 97 | ```
 98 | h5py
 99 | minieigen
100 | pandas
101 | scipy
102 | scikit-image
103 | xarray
104 | ```
105 | Note that the evaluation code also depends on the data reader op.
106 | 
107 | 
108 | ## Training code
109 | 
110 | Instructions for training a clean tensorflow version of DeMoN are [here](training/README.md).
111 | Note that the tensorflow training code and model are work in progress and are not identical to the original Caffe version.
112 | 
113 | 
114 | ## Datasets
115 | 
116 | Download scripts for training and testing are located in the ```datasets``` subdirectory.
117 | Note that due to a bug that some of the dataset files with the prefix ```rgbd``` did contain some samples from the test set.
118 | The affected files have been replaced and now have the prefix ```rgbd_bugfix```.
119 | MD5 checksums for all files can be found in the file ```traindata.md5```.
120 | 
121 | ## Docker build
122 | Ensure Docker is installed on your system, and that the default Docker runtime
123 | is Nvidia:
124 | 
125 | ```
126 | {
127 |   "runtimes": {
128 |     "nvidia": {
129 |       "path": "/usr/bin/nvidia-container-runtime",
130 |         "runtimeArgs": []
131 |     }
132 |   },
133 |   "default-runtime": "nvidia"
134 | }
135 | ```
136 | 
137 | Then issue the Docker build command:
138 | 
139 | ```
140 | $ docker build . -t demon
141 | ```
142 | 
143 | To visualize the example:
144 | 
145 | ```
146 | $ docker run --gpus all -it -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix:ro demon
147 | ```
148 | 
149 | 
150 | ## License
151 | 
152 | DeMoN is under the [GNU General Public License v3.0](LICENSE.txt)
153 | 


--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.tgz
3 | 


--------------------------------------------------------------------------------
/datasets/download_testdata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | clear
 3 | cat << EOF
 4 | 
 5 | ================================================================================
 6 | 
 7 | 
 8 | The test datasets are provided for research purposes only.
 9 | 
10 | Some of the test datasets build upon other publicly available data.
11 | Make sure to cite the respective original source of the data if you use the 
12 | provided files for your research.
13 | 
14 |   * sun3d_test.h5 is based on the SUN3D dataset http://sun3d.cs.princeton.edu/
15 | 
16 |     J. Xiao, A. Owens, and A. Torralba, “SUN3D: A Database of Big Spaces Reconstructed Using SfM and Object Labels,” in 2013 IEEE International Conference on Computer Vision (ICCV), 2013, pp. 1625–1632.
17 | 
18 | 
19 |   
20 | 
21 |   * rgbd_test.h5 is based on the RGBD SLAM benchmark http://vision.in.tum.de/data/datasets/rgbd-dataset (licensed under CC-BY 3.0)
22 |     
23 |     J. Sturm, N. Engelhard, F. Endres, W. Burgard, and D. Cremers, “A benchmark for the evaluation of RGB-D SLAM systems,” in 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2012, pp. 573–580.
24 | 
25 | 
26 | 
27 |   * scenes11_test.h5 uses objects from shapenet https://www.shapenet.org/
28 |     
29 |     A. X. Chang et al., “ShapeNet: An Information-Rich 3D Model Repository,” arXiv:1512.03012 [cs], Dec. 2015.
30 | 
31 | 
32 | 
33 |   * mvs_test.h5 contains scenes from https://colmap.github.io/datasets.html
34 |     
35 |     J. L. Schönberger and J. M. Frahm, “Structure-from-Motion Revisited,” in 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016, pp. 4104–4113.
36 |     J. L. Schönberger, E. Zheng, J.-M. Frahm, and M. Pollefeys, “Pixelwise View Selection for Unstructured Multi-View Stereo,” in Computer Vision – ECCV 2016, 2016, pp. 501–518.
37 | 
38 | 
39 | 
40 |   * nyu2_test.h5 is based on http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html
41 |   
42 |     N. Silberman, D. Hoiem, P. Kohli, and R. Fergus, “Indoor Segmentation and Support Inference from RGBD Images,” in Computer Vision – ECCV 2012, 2012, pp. 746–760.
43 | 
44 | 
45 | ================================================================================
46 | 
47 | type Y to start the download.
48 | 
49 | EOF
50 | 
51 | read -s -n 1 answer
52 | if [ "$answer" != "Y" -a "$answer" != "y" ]; then
53 | 	exit 0
54 | fi
55 | echo
56 | 
57 | datasets=(sun3d rgbd mvs scenes11 nyu2)
58 | 
59 | for ds in ${datasets[@]}; do
60 | 	if [ -e "${ds}_test.h5" ]; then
61 | 		echo "${ds}_test.h5 already exists, skipping ${ds}"
62 | 	else
63 | 		wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/testdata/${ds}_test.tgz"
64 | 		tar -xvf "${ds}_test.tgz"
65 | 	fi
66 | done
67 | 


--------------------------------------------------------------------------------
/datasets/download_traindata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | clear
 3 | cat << EOF
 4 | 
 5 | ================================================================================
 6 | 
 7 | 
 8 | The train datasets are provided for research purposes only.
 9 | 
10 | Some of the test datasets build upon other publicly available data.
11 | Make sure to cite the respective original source of the data if you use the 
12 | provided files for your research.
13 | 
14 |   * sun3d_train.h5 is based on the SUN3D dataset http://sun3d.cs.princeton.edu/
15 | 
16 |     J. Xiao, A. Owens, and A. Torralba, “SUN3D: A Database of Big Spaces Reconstructed Using SfM and Object Labels,” in 2013 IEEE International Conference on Computer Vision (ICCV), 2013, pp. 1625–1632.
17 | 
18 | 
19 |   
20 | 
21 |   * rgbd_bugfix_train.h5 is based on the RGBD SLAM benchmark http://vision.in.tum.de/data/datasets/rgbd-dataset (licensed under CC-BY 3.0)
22 |     
23 |     J. Sturm, N. Engelhard, F. Endres, W. Burgard, and D. Cremers, “A benchmark for the evaluation of RGB-D SLAM systems,” in 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2012, pp. 573–580.
24 | 
25 | 
26 | 
27 |   * scenes11_train.h5 uses objects from shapenet https://www.shapenet.org/
28 |     
29 |     A. X. Chang et al., “ShapeNet: An Information-Rich 3D Model Repository,” arXiv:1512.03012 [cs], Dec. 2015.
30 | 
31 | 
32 | 
33 |   * mvs_train.h5 contains the Citywall and Achteck-Turm scenes from MVE (Multi-View Environment) http://www.gcc.tu-darmstadt.de/home/proj/mve/
34 |     
35 |     S. Fuhrmann, F. Langguth, and M. Goesele, “MVE: A Multi-view Reconstruction Environment,” in Proceedings of the Eurographics Workshop on Graphics and Cultural Heritage, Aire-la-Ville, Switzerland, Switzerland, 2014, pp. 11–18.
36 | 
37 | 
38 | 
39 | ================================================================================
40 | 
41 | type Y to start the download.
42 | 
43 | EOF
44 | 
45 | read -s -n 1 answer
46 | if [ "$answer" != "Y" -a "$answer" != "y" ]; then
47 | 	exit 0
48 | fi
49 | echo
50 | 
51 | datasets=(sun3d rgbd_bugfix mvs scenes11)
52 | 
53 | OLD_PWD="$PWD"
54 | DESTINATION=traindata
55 | mkdir $DESTINATION
56 | cd $DESTINATION
57 | 
58 | if [ ! -e "README_traindata" ]; then
59 | 	wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/traindata/README_traindata"
60 | fi
61 | 
62 | for ds in ${datasets[@]}; do
63 | 	if [ -e "${ds}_train.h5" ]; then
64 | 		echo "${ds}_train.h5 already exists, skipping ${ds}"
65 | 	else
66 | 		wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/traindata/${ds}_train.tgz"
67 | 		tar -xvf "${ds}_train.tgz"
68 | 	fi
69 | done
70 | 
71 | cd "$OLD_PWD"
72 | 


--------------------------------------------------------------------------------
/datasets/generate_sun3d_train_datasets.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import os
 19 | import sys
 20 | import math
 21 | import pickle
 22 | import argparse
 23 | import itertools
 24 | import h5py
 25 | from multiprocessing import Pool
 26 | datasets_dir = os.path.dirname(__file__)
 27 | sys.path.insert(0, os.path.join(datasets_dir, '..', 'python'))
 28 | 
 29 | from depthmotionnet.dataset_tools.sun3d_utils import *
 30 | from depthmotionnet.dataset_tools.view_tools import *
 31 | from depthmotionnet.dataset_tools.view_io import *
 32 | 
 33 | 
 34 | def create_train_file(outfile, sun3d_data_path, seq_name, baseline_range, seq_sharpness_dict):
 35 |     """Creates a h5 file with training samples with a specific baseline range
 36 | 
 37 |     outfile: str
 38 |         Output file
 39 | 
 40 |     sun3d_data_path: str
 41 |         The path to the sun3d data directory
 42 | 
 43 |     seq_name: str
 44 |         sequence name
 45 | 
 46 |     baseline_range: tuple(float, float)
 47 |         Minimum and maximum baseline
 48 | 
 49 |     seq_sharpness_dict: dict
 50 |         Dictionary with the sharpness score of all sequences.
 51 |         key: str with sequence name
 52 |         value: numpy.ndarray with sharpness scores
 53 | 
 54 |     """
 55 |     created_groups = 0
 56 |     with h5py.File(outfile,'w') as f:
 57 |         created_groups += create_samples_from_sequence(f, sun3d_data_path, seq_name, baseline_range, seq_sharpness_dict[seq_name])
 58 |     return created_groups
 59 | 
 60 | 
 61 | def merge_h5files(outfile, files):
 62 |     """Merges multiple h5 files into a new file and removes the input files afterwards.
 63 | 
 64 |     outfile: str
 65 |         Output file
 66 | 
 67 |     files: list of str
 68 |         List of files to merge
 69 |     """
 70 |     with h5py.File(outfile,'w') as dst:
 71 |         for f in files:
 72 |             print('copy', f, 'to', outfile)
 73 |             with h5py.File(f,'r') as src:
 74 |                 for group_name in src:
 75 |                     src.copy(source=group_name, dest=dst)
 76 |     for f in files:
 77 |         os.remove(f)
 78 | 
 79 | 
 80 | 
 81 | 
 82 | def main():
 83 | 
 84 |     print(
 85 | """================================================================================
 86 | 
 87 |  This script runs for about 1 day on a computer with 16 threads and requires
 88 |  up to 50GB of disk space in the output directory!
 89 | 
 90 | ================================================================================""")
 91 | 
 92 |     parser = argparse.ArgumentParser(description="Generates the sun3d training datasets.")
 93 |     parser.add_argument("--sun3d_path", type=str, required=True, help="The path to the sun3d data directory")
 94 |     parser.add_argument("--outputdir", type=str, default='training_data', help="Output directory for the generated h5 files")
 95 |     parser.add_argument("--threads", type=int, default=16, help="Number of threads")
 96 | 
 97 |     args = None
 98 |     try:
 99 |         args = parser.parse_args()
100 |         print(args)
101 |     except:
102 |         return 1
103 | 
104 |     sun3d_data_path = args.sun3d_path
105 |     outputdir = args.outputdir
106 |     os.makedirs(outputdir, exist_ok=True)
107 |     threads = args.threads
108 | 
109 |     # read txt file with the train sequence names
110 |     with open('sun3d_train_sequences.txt', 'r') as f:
111 |         sequences = f.read().splitlines()
112 | 
113 |     # compute the sharpness scores for all sequences and images
114 |     if os.path.isfile('sun3d_seq_sharpness_dict.pkl'):
115 |         print('Reading sequence sharpness file seq_sharpness_dict.pkl')
116 |         with open('sun3d_seq_sharpness_dict.pkl','rb') as f:
117 |             seq_sharpness_dict = pickle.load(f)
118 |     else:
119 |         print('Computing sharpness for all images. This could take a while.')
120 |         with Pool(threads) as pool:
121 |             args = [(sun3d_data_path, seq,) for seq in sequences]    
122 |             sequence_sharpness = pool.starmap(compute_sharpness, args, chunksize=1)
123 | 
124 |         seq_sharpness_dict = dict(zip(sequences, sequence_sharpness))
125 | 
126 |         with open('sun3d_seq_sharpness_dict.pkl','wb') as f:
127 |             pickle.dump(seq_sharpness_dict, f)
128 | 
129 | 
130 |     # baseline ranges from 1cm-10cm to 1.6m-inf
131 |     baseline_ranges = [(0.01,0.10), (0.10,0.20), (0.20,0.40), (0.40,0.80), (0.80,1.60), (1.60, float('inf'))]
132 | 
133 |     with Pool(threads) as pool:
134 | 
135 |         # create temporary h5 files for each baseline and sequence combination
136 |         baseline_range_files_dict = {b:[] for b in baseline_ranges}
137 |         args = []
138 |         for i, base_range_seq_name in enumerate(itertools.product(baseline_ranges, sequences)):
139 |             base_range, seq_name = base_range_seq_name
140 |             #print(base_range, seq_name)
141 |             outfile = os.path.join(outputdir,"{0}.h5".format(i))
142 |             args.append((outfile, sun3d_data_path, seq_name, base_range, seq_sharpness_dict))
143 |             baseline_range_files_dict[base_range].append(outfile)
144 | 
145 |         created_groups = pool.starmap(create_train_file, args, chunksize=1)
146 | 
147 |     # merge temporary files by creating one file per baseline range
148 |     for base_range in baseline_ranges:
149 |         outfile = os.path.join(outputdir, 'sun3d_train_{0}m_to_{1}m.h5'.format(*base_range))
150 |         merge_h5files(outfile, baseline_range_files_dict[base_range])
151 | 
152 | 
153 |     print('created', sum(created_groups), 'groups')
154 | 
155 |     return 0
156 | 
157 | 
158 |     
159 |     
160 | 
161 | if __name__ == "__main__":
162 |     sys.exit(main())
163 | 
164 | 


--------------------------------------------------------------------------------
/datasets/sun3d_train_sequences.txt:
--------------------------------------------------------------------------------
  1 | brown_bm_1/brown_bm_1
  2 | brown_bm_2/brown_bm_2
  3 | brown_bm_3/brown_bm_3
  4 | brown_bm_4/brown_bm_4
  5 | brown_bm_5/brown_bm_5
  6 | brown_bm_6/brown_bm_6
  7 | brown_cogsci_1/brown_cogsci_1
  8 | brown_cogsci_3/brown_cogsci_3
  9 | brown_cogsci_4/brown_cogsci_4
 10 | brown_cogsci_5/brown_cogsci_5
 11 | brown_cogsci_7/brown_cogsci_7
 12 | brown_cs_1/brown_cs1
 13 | brown_cs_2/brown_cs2
 14 | brown_cs_4/brown_cs4
 15 | brown_cs_5/brown_cs5
 16 | brown_cs_6/brown_cs6
 17 | brown_cs_8/brown_cs8
 18 | brown_cs_9/brown_cs9
 19 | brown_cs_10/brown_cs10
 20 | harvard_computer_lab/hv_c1_1
 21 | harvard_computer_lab/hv_c1_2
 22 | harvard_c2/hv_c2_1
 23 | harvard_c2/hv_c2_2
 24 | harvard_c3/hv_c3_1
 25 | harvard_c3/hv_c3_2
 26 | harvard_c5/hv_c5_1
 27 | harvard_c5/hv_c5_2
 28 | harvard_c6/hv_c6_1
 29 | harvard_c6/hv_c6_2
 30 | harvard_c7/hv_c7_1
 31 | harvard_c7/hv_c7_2
 32 | harvard_c7/hv_c7_3
 33 | harvard_c8/hv_c8_1
 34 | harvard_c8/hv_c8_2
 35 | harvard_c8/hv_c8_3
 36 | harvard_c9/hv_c9_1
 37 | harvard_c9/hv_c9_2
 38 | harvard_c11/hv_c11_1
 39 | harvard_c11/hv_c11_2
 40 | harvard_conf_big/hv_conf_big_1
 41 | harvard_conf_big/hv_conf_big_2
 42 | harvard_conf_big/hv_conf_big_3
 43 | harvard_dq_office/hv_dq_office_1
 44 | harvard_dq_office/hv_dq_office_2
 45 | harvard_corridor_lounge/hv_lounge_corridor2_1
 46 | harvard_corridor_lounge/hv_lounge_corridor3_whole_floor
 47 | harvard_corridor_lounge/hv_corridor1_1
 48 | harvard_corridor_lounge/hv_corridor1_2
 49 | harvard_corridor_lounge/hv_lounge_corridor_1
 50 | harvard_corridor_lounge/hv_lounge2_1
 51 | harvard_corridor_lounge/hv_lounge2_2
 52 | harvard_corridor_lounge/hv_lounge3_1
 53 | harvard_corridor_lounge/hv_lounge3_2
 54 | harvard_printroom1/hv_printroom1_1
 55 | harvard_printroom1/hv_printroom1_2
 56 | harvard_restroom_1/hv_restroom1_1
 57 | harvard_restroom_1/hv_restroom1_2
 58 | harvard_restroom_2/hv_restroom4_1
 59 | harvard_restroom_2/hv_restroom4_2
 60 | harvard_tea_1/hv_tea1_1
 61 | harvard_tea_1/hv_tea1_2
 62 | harvard_tea_3/hv_tea3_1
 63 | home_ac/home_ac_scan1_2012_aug_22
 64 | home_ac/home_ac_scan2_2012_aug_22
 65 | home_ac/home_ac_scan3_2012_aug_24
 66 | home_ag/apartment_ag_nov_7_2012_scan1_erika
 67 | home_amo/amo-apt-floor1-1
 68 | home_amo/floor1v2
 69 | home_amo/floor2-1
 70 | home_at/home_at_scan1_2013_jan_1
 71 | home_bksh/home_bksh_2012_oct_19_scan1
 72 | home_bksh/home_bksh_oct_30_2012_scan2_erika
 73 | home_han/apartment_han_oct_31_2012_scan1_erika
 74 | home_md/home_md_scan1_2012_july_9
 75 | home_md/home_md_scan1_lowres_2012_july_14
 76 | home_md/home_md_scan2_2012_july_9
 77 | home_md/home_md_scan3_2012_july_9
 78 | home_md/home_md_scan4_2012_july_29
 79 | home_md/home_md_scan5_2012_aug_20
 80 | home_md/home_md_scan6_2012_aug_21
 81 | home_md/home_md_scan7_2012_aug_27
 82 | home_md/home_md_scan8_2012_aug_28
 83 | home_md/home_md_scan9_2012_sep_30
 84 | home_ph_cz/ph_cz_1
 85 | home_ph_cz/ph_cz_2
 86 | home_ph_cz/pk_cz_3
 87 | home_pt/home_pt_scan1_2012_oct_19
 88 | home_puigpunyen/home_puigpunyent_scan1_2012_aug_22
 89 | home_puigpunyen/home_puigpunyent_scan2_2012_aug_23
 90 | home_puigpunyen/home_puigpunyent_scan3_2012_aug_26
 91 | home_puigpunyen/home_puigpunyent_scan4_2012_dec_23
 92 | home_rz/home_rz_scan1_2012_oct_31
 93 | home_ts/apartment_ts_oct_31_2012_scan1_erika
 94 | hotel_barcelona/scan1_2012_july_23
 95 | hotel_beijing/beijing_hotel_1
 96 | hotel_beijing/beijing_hotel_2
 97 | hotel_beijing/beijing_hotel_3
 98 | hotel_beijing/beijing_hotel_4
 99 | hotel_casarural/casarural_scan1_2012_july_22
100 | hotel_florence_ant/hotel_room_florence_scan1_2012_oct_09
101 | hotel_florence_jx/florence_hotel_another_room
102 | hotel_florence_jx/florence_hotel_kitchen
103 | hotel_florence_jx/myhotel
104 | hotel_graz/scan1_2012_aug_29
105 | hotel_grenoble/scan1_2012_july_11
106 | hotel_grenoble/scan1_2012_july_11_another
107 | hotel_grenoble/scan2_2012_july_11
108 | hotel_hkust/hk_hotel_1
109 | hotel_m/scan1
110 | hotel_mr/scan1
111 | hotel_nips2012/nips_1
112 | hotel_ny/hotel_ny_2012_oct_21_scan1
113 | hotel_pedraza/hotel_room_pedraza_2012_nov_25
114 | hotel_pittsburg/hotel_pittsburg_scan1_2012_dec_12
115 | hotel_sf/scan1
116 | hotel_singapore_2012/singapore_hotel1
117 | hotel_stb/scan1
118 | hotel_uc/scan1
119 | hotel_ucla_ant/hotel_room_ucla_scan1_2012_oct_05
120 | hotel_ucla_jx/ucla_hotel_1
121 | hotel_ucsd/la1
122 | hotel_umd/maryland_hotel1
123 | mit_1_242/classroom_1242_oct_30_2012_scan1_erika
124 | mit_1_375/classroom_1375_nov_6_2012_scan1_erika
125 | mit_13_xh_lab1/xh_lab1_1
126 | mit_13_xh_lab2/xh_lab2_1
127 | mit_26_100/classroom_26100_nov_2_2012_scan1_erika
128 | mit_3_133/classroom_3133_nov_6_2012_scan1_erika
129 | mit_3_270/classroom_3270_oct_30_2012_scan1_erika
130 | mit_3_huge_office/cl_1
131 | mit_32_123/123_1
132 | mit_32_124/124_1
133 | mit_32_144/classroom_32144_nov_2_2012_scan1_erika
134 | mit_32_261/261_1
135 | mit_32_262/262_1
136 | mit_32_397/397_1
137 | mit_32_bar/bar_1
138 | mit_32_bikeroom/bikeroom_1
139 | mit_32_lounge_d429/d4_lounge_1
140 | mit_32_lounge_d4_bill/d4_lounge_vision_1
141 | mit_32_d407/d407-1
142 | mit_32_d428/bs4j179mmv
143 | mit_32_d446/d446_1
144 | mit_32_d451/d451-1
145 | mit_32_d460/d460-1
146 | mit_32_d463/d463_1
147 | mit_32_d466/d466-1
148 | mit_32_d475a/d475a-1
149 | mit_32_d507/d507_1
150 | mit_32_d530/d530_scan_oldest
151 | mit_32_d6_lounge/d6_lounge_1
152 | mit_32_g431/g431_1
153 | mit_32_g442/g442_1
154 | mit_32_g449/g449_1
155 | mit_32_g451/g451_1
156 | mit_32_g460/g460_1
157 | mit_32_g5_lounge/g5_lounge_1
158 | mit_32_g531/g531_1
159 | mit_32_g631/g631_1
160 | mit_32_g660/g660_1
161 | mit_32_g7_lounge/g7_lounge_1
162 | mit_32_g825/g825_1
163 | mit_32_g882/g882_1
164 | mit_32_g9_lounge/g9_lounge_1
165 | mit_32_pingpong/pingpong_1
166 | mit_32_pool/pool_1
167 | mit_34_302/classroom_34302_oct_30_2012_scan1_erika
168 | mit_35_225/classroom_35225_nov_2_2012_scan1_erika
169 | mit_36_144/classroom_36144_nov_2_2012_scan1_erika
170 | mit_36_ti_lab/tian_lab_1
171 | mit_36_ti_lab2/tian_lab_a
172 | mit_36_ti_office/tian_office_1
173 | mit_46_5bathroom/bcs_floor5_bathroom
174 | mit_46_6bathroom/bcs_floor6_restroom
175 | mit_46_4conf_1/bcs_floor4_conf_1
176 | mit_46_4conf_2/bcs_floor4_conf_a
177 | mit_46_5conf_1/bcs_floor5_conf_1
178 | mit_46_5conf_2/bcs_floor5_conf_a
179 | mit_46_5lounge/bcs_floor5_lounge
180 | mit_46_ted_lab1/ted_lab_1
181 | mit_5_234/classroom_5234_nov_6_2012_scan1_erika
182 | mit_6_120/classroom_6120_nov_2_2012_scan1_erika
183 | mit_76_417/76-417a
184 | mit_76_458/76-458a
185 | mit_76_459/76-459a
186 | mit_76_4kitchen/76-4kitchen1
187 | mit_76_4lounge/76-4lounge
188 | mit_76_4lounge/76-4lounge2
189 | mit_76_4printroom/76-4printroom1
190 | mit_76_studyroom/76-1studyroom1
191 | mit_dorm_baker_cj/dorm_baker_cj_oct_29_2012_scan1_erika
192 | mit_dorm_ec_tam/dorm_ec_tam_oct_30_2012_scan1_erika
193 | mit_dorm_masseeh_flr4a/dorm_masseeh_flr4_scan1_oct_26_2012_erika
194 | mit_dorm_masseeh_md/dorm_masseeh_md_scan1_oct_26_2012_erika
195 | mit_dorm_masseeh_sr/dorm_masseeh_sr_scan1_oct_26_2012_erika
196 | mit_dorm_masseeh_wm/dorm_masseeh_wm_oct_29_2012_scan1_erika
197 | mit_dorm_masseeh_xc/dorm_masseeh_xc_oct_29_2012_scan1_erika
198 | mit_dorm_mcc_313/dorm_mcc_313_oct_31_2012_scan1_erika
199 | mit_dorm_mcc_flr1/dorm_mcc_flr1_oct_31_2012_scan1_erika
200 | mit_dorm_mcc_nk/dorm_mcc_nk_oct_31_2012_scan1_erika
201 | mit_dorm_mcc_wflr7/dorm_mcc_wflr7_oct_31_2012_scan1_erika
202 | mit_dorm_mcc_wph/dorm_mcc_wph_oct_31_2012_scan1_erika
203 | mit_dorm_next_536/dorm_next_536_oct_29_2012_scan1_erika
204 | mit_dorm_next_552/dorm_next_552_oct_30_2012_scan1_erika
205 | mit_dorm_next_dn/dorm_next_dn_oct_30_2012_scan1_erika
206 | mit_dorm_next_jc/dorm_next_jc_oct_30_2012_scan1_erika
207 | mit_dorm_next_sj/dorm_next_sj_oct_30_2012_scan1_erika
208 | mit_dorm_sh_basement/dorm_sh_basement_oct_28_2012_scan1_erika
209 | mit_gym_dupont/gym_dupont_nov_1_2012_scan1.erika
210 | mit_gym_tohoku/gym_tohoku_oct_28_2012_scan1_erika
211 | mit_gym_wrestling/gym_wrestling_nov_1_2012_scan1_erika
212 | mit_gym_z_squash/gym_z_squash_scan1_oct_26_2012_erika
213 | mit_dorm3/corridor
214 | mit_lab_16/lab_16_nov_2_2012_scan1_erika
215 | mit_lab_hj/lab_hj_hall5_nov_2_2012_scan1_erika
216 | mit_lab_koch/lab_koch_bench_nov_2_2012_scan1_erika
217 | mit_lab_pdl/lab_pdl_nov_2_2012_scan1_erika
218 | mit_ne47_2corridor/ne47_floor2_corridor
219 | mit_ne47_2biolab/ne47_floor2
220 | mit_ne47_2conf/ne47_floor2_conf_room
221 | mit_ne47_huge_office/ne47_floor2_office_1
222 | mit_ne47_huge_office/ne47_floor2_office_2
223 | mit_w16/kresge_back
224 | mit_w20_athena/sc_athena_oct_29_2012_scan1_erika
225 | mit_w20_flr4/sc_flr4_oct_29_2012_scan1_erika
226 | mit_w59_conference/conference_w59_nov_6_2012_scan1_erika
227 | mit_w85_2floor/2_stair_4
228 | mit_w85_4/4_1
229 | mit_w85_5/5_1
230 | mit_w85_8/8_1
231 | mit_w85_10/10_1
232 | mit_w85_11/11_1_1
233 | mit_w85_12/12_1_1
234 | mit_w85_16/16_1
235 | mit_w85_basement/wg_big_lounge_1
236 | mit_w85_basement/wg_laundary_1
237 | mit_w85_basement/wg_small_lounge_1
238 | mit_w85_lounge1/wg_gym_lounge_1
239 | mit_w85_lounge1/wg_lounge1_1
240 | mit_w85_playroom/westgate_playroom_1
241 | mit_w85a/a2
242 | mit_w85d/d1
243 | mit_w85h/h4_1
244 | mit_w85j/j1
245 | mit_w85k2/k1
246 | mit_w85k1/whole_apartment
247 | providence_station/providence_station
248 | 


--------------------------------------------------------------------------------
/datasets/testdata.md5:
--------------------------------------------------------------------------------
 1 | f42e0ffd1243274a2fcee1ea5137176c *mvs_test.h5
 2 | 25f8f3749341b193ce507889bde122b7 *nyu2_test.h5
 3 | 56393d4d56acb189c5b5e698dc8ec0ae *rgbd_test.h5
 4 | cf9314cb4229967bb9978e7e900c1a50 *scenes11_test.h5
 5 | 8d9d742497b6c3928bcab2f1ff14f32c *sun3d_test.h5
 6 | 7b427e0b45c7e8c5a74a3ce8c96de407 *mvs_test.tgz
 7 | 6d67223a13a013dff2730612e950d191 *nyu2_test.tgz
 8 | c94d735e99fa2c9e8f83d357db96f347 *rgbd_test.tgz
 9 | d8d3df17400b7e763c8aaa0f3b81963d *scenes11_test.tgz
10 | 30ee9c8861c4439e04fec7405a4e0c8d *sun3d_test.tgz
11 | 


--------------------------------------------------------------------------------
/datasets/traindata/traindata.md5:
--------------------------------------------------------------------------------
 1 | d968ca0fa1785d57559469fdcb476121 *mvs_achteck_turm.h5
 2 | 34818891c3b6ae66c90badc9a24ed015 *mvs_breisach.h5
 3 | b3af67b01725f3cd99d6cebad08857e9 *mvs_citywall.h5
 4 | #  2543a4b1a2cc8385d342958b2557d25e *rgbd_10_to_20_3d_train.h5
 5 | #  ee6c5805c0244b01268e394e33b4e455 *rgbd_10_to_20_handheld_train.h5
 6 | a316f02a84abe42ccb6a1d5450ac4bf1 *rgbd_10_to_20_simple_train.h5
 7 | #  5be8f60f0e1d9f5afaa3ee0afcb5e3be *rgbd_20_to_inf_3d_train.h5
 8 | #  3e6c78142bfd7a92d647abf443e98e3c *rgbd_20_to_inf_handheld_train.h5
 9 | 0ee80f4b7450bbaa4df59854b73ca770 *rgbd_20_to_inf_simple_train.h5
10 | 2e07e7b76a4e39254dd046cfe6754fe7 *scenes11_train.h5
11 | ab1190b58350ed3f39b4d5e2966fb45c *sun3d_train_0.01m_to_0.1m.h5
12 | d72b646f474559e7338a65c2199da544 *sun3d_train_0.1m_to_0.2m.h5
13 | d31bbefcffbfd795d2a19193ab61478f *sun3d_train_0.2m_to_0.4m.h5
14 | 8aeaae77e38f6493e52a0a0a7bcac8e3 *sun3d_train_0.4m_to_0.8m.h5
15 | 6e872b0fec5cd5a404e25c034e1c9322 *sun3d_train_0.8m_to_1.6m.h5
16 | 0d306ed5e6e4e4dcb5371b4c0c132e6a *sun3d_train_1.6m_to_infm.h5
17 | a981ec421da35df09225e875ce7531f8 *mvs_train.tgz
18 | #  cd11f27d9c5d170030ad46dc1e032cf8 *rgbd_train.tgz
19 | 2cc58de179d11a5fa88f3a67edb0a5d2 *scenes11_train.tgz
20 | a77e1a5159258fadc0216e647fa21f33 *sun3d_train.tgz
21 | 3c8111feb0eebe50b5068c4a71ac6a28 *rgbd_bugfix_10_to_20_3d_train.h5
22 | 2c02bd33e6e5a7550ef5abe8eee1609e *rgbd_bugfix_10_to_20_handheld_train.h5
23 | 286c2b2bee520a427f347e848660636a *rgbd_bugfix_20_to_inf_3d_train.h5
24 | 9a1907607af8d7543b99df33ae4d4733 *rgbd_bugfix_20_to_inf_handheld_train.h5
25 | 3975c81540462d7ba009190381f2d88b *rgbd_bugfix_train.tgz
26 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.json
3 | 
4 | 


--------------------------------------------------------------------------------
/examples/create_dataset_and_use_readerop.py:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | # Create a new dataset h5 file that could be used for training
  3 | #
  4 | import os
  5 | import sys
  6 | import numpy as np
  7 | from PIL import Image
  8 | import h5py
  9 | 
 10 | examples_dir = os.path.dirname(__file__)
 11 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
 12 | from depthmotionnet.dataset_tools import *
 13 | 
 14 | 
 15 | # intrinsics supported by DeMoN
 16 | normalized_intrinsics = [0.89115971, 1.18821287, 0.5, 0.5]
 17 | 
 18 | # unique group name not starting with '.'
 19 | group_name = 'sculpture-0001'
 20 | 
 21 | # write a new dataset with a single group and two views
 22 | with h5py.File('dataset.h5','w') as f:
 23 |     
 24 |     for i in range(2):
 25 |         img = Image.open('sculpture{0}.png'.format(i+1))
 26 |         Rt = np.loadtxt('sculpture_Rt{0}.txt'.format(i+1))
 27 |         depth = np.load('sculpture_depth{0}.npy'.format(i+1))
 28 |         K = np.eye(3)
 29 |         K[0,0] = normalized_intrinsics[0] * img.size[0]
 30 |         K[1,1] = normalized_intrinsics[1] * img.size[1]
 31 |         K[0,2] = normalized_intrinsics[2] * img.size[0]
 32 |         K[1,2] = normalized_intrinsics[2] * img.size[1]
 33 | 
 34 |         # create a View tuple
 35 |         view = View(R=Rt[:,:3], t=Rt[:,3], K=K, image=img, depth=depth, depth_metric='camera_z')
 36 | 
 37 |         # write view to the h5 file
 38 |         # view enumeration must start with 0 ('v0')
 39 |         view_group = f.require_group(group_name+'/frames/t0/v{0}'.format(i))
 40 |         write_view(view_group, view)
 41 | 
 42 |     # write valid image pair combinations to the group t0
 43 |     viewpoint_pairs = np.array([0, 1, 1, 0], dtype=np.int32)
 44 |     time_group = f[group_name]['frames/t0']
 45 |     time_group.attrs['viewpoint_pairs'] = viewpoint_pairs
 46 | 
 47 | 
 48 | ################################################################################
 49 | # Use the reader op to read the created h5 file
 50 | #
 51 | from depthmotionnet.datareader import *
 52 | import json
 53 | import tensorflow as tf
 54 | from matplotlib import pyplot as plt
 55 | 
 56 | 
 57 | # keys for the requested output tensors. 
 58 | # These keys will be passed to the data reader op.
 59 | data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
 60 | 
 61 | # the following parameters are just an example and are not optimized for training
 62 | reader_params = {
 63 |      'batch_size': 1,
 64 |      'test_phase': False,
 65 |      'builder_threads': 1,
 66 |      'inverse_depth': True,
 67 |      'motion_format': 'ANGLEAXIS6',
 68 |      'norm_trans_scale_depth': True,
 69 |      # downsampling of image and depth is supported
 70 |      'scaled_height': 96,
 71 |      'scaled_width': 128,
 72 |      'scene_pool_size': 5, # for actual training this should be around 500
 73 |      'augment_rot180': 0,
 74 |      'augment_mirror_x': 0,
 75 |      'top_output': data_tensors_keys, # request data tensors
 76 |      'source': [{'path': 'dataset.h5', 'weight': [{'t': 0, 'v': 1.0}]},],
 77 |     }
 78 | 
 79 | reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
 80 | # create a dict to make the distinct data tensors accessible via keys
 81 | data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
 82 | 
 83 | gpu_options = tf.GPUOptions()
 84 | gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
 85 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
 86 | 
 87 | result =  session.run(data_dict)
 88 | 
 89 | # show the depth ground truth.
 90 | # Note that the data reader op replaces invalid depth values with nan.
 91 | plt.imshow(result['DEPTH'].squeeze(), cmap='Greys')
 92 | plt.show()
 93 | 
 94 | # visualize the data as point cloud if vtk is available
 95 | try:
 96 |     from depthmotionnet.vis import *
 97 |     visualize_prediction(
 98 |         inverse_depth=result['DEPTH'], 
 99 |         image=result['IMAGE_PAIR'][0,0:3], 
100 |         rotation=result['MOTION'][0,0:3], 
101 |         translation=result['MOTION'][0,3:])
102 | except ImportError as err:
103 |     print("Cannot visualize as pointcloud.", err)
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/examples/evaluation.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # This script computes the depth and motion errors for the network predictions.
  3 | #
  4 | # Note that numbers are not identical to the values reported in the paper, due 
  5 | # to implementation differences between the caffe and tensorflow version.
  6 | #
  7 | # Running this script requires about 4gb of disk space.
  8 | #
  9 | # This script expects the test datasets in the folder ../datasets
 10 | # Use the provided script in ../datasets for downloading the data.
 11 | #
 12 | import os
 13 | import sys
 14 | import json
 15 | import h5py
 16 | import xarray
 17 | import numpy as np
 18 | import lmbspecialops as sops
 19 | import tensorflow as tf
 20 | 
 21 | examples_dir = os.path.dirname(__file__)
 22 | weights_dir = os.path.join(examples_dir,'..','weights')
 23 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
 24 | 
 25 | from depthmotionnet.datareader import *
 26 | from depthmotionnet.networks_original import *
 27 | from depthmotionnet.helpers import convert_NCHW_to_NHWC, convert_NHWC_to_NCHW
 28 | from depthmotionnet.evaluation import *
 29 | 
 30 | 
 31 | def create_ground_truth_file(dataset, dataset_dir):
 32 |     """Creates a hdf5 file with the ground truth test data
 33 |     
 34 |     dataset: str
 35 |         name of the dataset
 36 |     dataset_dir: str
 37 |         path to the directory containing the datasets
 38 | 
 39 |     Returns the path to the created file
 40 |     """
 41 |     ds = dataset
 42 |     # destination file
 43 |     ground_truth_file = '{0}_ground_truth.h5'.format(ds)
 44 |     
 45 |     if os.path.isfile(ground_truth_file):
 46 |         return ground_truth_file # skip existing files
 47 |     
 48 |     print('creating {0}'.format(ground_truth_file))
 49 |     
 50 |     # data types requested from the reader op
 51 |     data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
 52 | 
 53 |     reader_params = {
 54 |          'batch_size': 1,
 55 |          'test_phase': True,   # deactivates randomization
 56 |          'builder_threads': 1, # must be 1 in test phase
 57 |          'inverse_depth': True,
 58 |          'motion_format': 'ANGLEAXIS6',
 59 |          # True is also possible here. If set to True we store ground truth with 
 60 |          # precomputed normalization. False keeps the original information.
 61 |          'norm_trans_scale_depth': False,
 62 |          # original data resolution
 63 |          'scaled_height': 480,
 64 |          'scaled_width': 640,
 65 |          'scene_pool_size': 5, 
 66 |          # no augmentation
 67 |          'augment_rot180': 0,
 68 |          'augment_mirror_x': 0,
 69 |          'top_output': data_tensors_keys,
 70 |          'source': [{'path': os.path.join(dataset_dir,'{0}_test.h5'.format(ds))}],
 71 |         }
 72 | 
 73 |     reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
 74 |     
 75 |     # create a dict to make the distinct data tensors accessible via keys
 76 |     data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
 77 |     info_tensor = reader_tensors[0]
 78 |     sample_ids_tensor = reader_tensors[1]
 79 |     rotation_tensor, translation_tensor = tf.split(data_dict['MOTION'], 2, axis=1)
 80 | 
 81 |     flow_tensor = sops.depth_to_flow(data_dict['DEPTH'], data_dict['INTRINSICS'], rotation_tensor, translation_tensor, inverse_depth=True, normalize_flow=True)
 82 | 
 83 |     gpu_options = tf.GPUOptions()
 84 |     gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
 85 |     session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
 86 | 
 87 | 
 88 |     fetch_dict = {'INFO': info_tensor, 'SAMPLE_IDS': sample_ids_tensor, 'FLOW': flow_tensor}
 89 |     fetch_dict.update(data_dict)
 90 | 
 91 |     with h5py.File(ground_truth_file) as f:
 92 | 
 93 |         number_of_test_iterations = 1 # will be set to the correct value in the while loop
 94 |         iteration = 0
 95 |         while iteration < number_of_test_iterations:
 96 |             
 97 |             data =  session.run(fetch_dict)
 98 |             
 99 |             # get number of iterations from the info vector
100 |             number_of_test_iterations = int(data['INFO'][0])
101 | 
102 |             # write ground truth data to the file
103 |             group = f.require_group(str(iteration))
104 |             group['image_pair'] = data['IMAGE_PAIR'][0]
105 |             group['depth'] = data['DEPTH'][0]
106 |             group['motion'] = data['MOTION'][0]
107 |             group['flow'] = data['FLOW'][0]
108 |             group['intrinsics'] = data['INTRINSICS'][0]
109 |             
110 |             # save sample id as attribute of the group.
111 |             # the evaluation code will use this to check if prediction and ground truth match.
112 |             sample_id = (''.join(map(chr, data['SAMPLE_IDS']))).strip()
113 |             group.attrs['sample_id'] = np.string_(sample_id)
114 |             iteration += 1
115 |             
116 |     del session
117 |     tf.reset_default_graph()
118 |     return ground_truth_file
119 | 
120 | 
121 | 
122 | def create_prediction_file(dataset, dataset_dir):
123 |     """Creates a hdf5 file with the predictions
124 |     
125 |     dataset: str
126 |         name of the dataset
127 |     dataset_dir: str
128 |         path to the directory containing the datasets
129 | 
130 |     Returns the path to the created file
131 |     """
132 |   
133 |     if tf.test.is_gpu_available(True):
134 |         data_format='channels_first'
135 |     else: # running on cpu requires channels_last data format
136 |         data_format='channels_last'
137 |     print('Using data_format "{0}"'.format(data_format))
138 | 
139 |     ds = dataset
140 |     # destination file
141 |     prediction_file = '{0}_prediction.h5'.format(ds)
142 | 
143 |     # data types requested from the reader op
144 |     data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
145 | 
146 |     reader_params = {
147 |              'batch_size': 1,
148 |              'test_phase': True,   # deactivates randomization
149 |              'builder_threads': 1, # must be 1 in test phase
150 |              'inverse_depth': True,
151 |              'motion_format': 'ANGLEAXIS6',
152 |              'norm_trans_scale_depth': True,
153 |              # inpu resolution for demon
154 |              'scaled_height': 192,
155 |              'scaled_width': 256,
156 |              'scene_pool_size': 5, 
157 |              # no augmentation
158 |              'augment_rot180': 0,
159 |              'augment_mirror_x': 0,
160 |              'top_output': data_tensors_keys,
161 |              'source': [{'path': os.path.join(dataset_dir,'{0}_test.h5'.format(ds))}],
162 |             }
163 | 
164 |     reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
165 |     
166 |     # create a dict to make the distinct data tensors accessible via keys
167 |     data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
168 |     info_tensor = reader_tensors[0]
169 |     sample_ids_tensor = reader_tensors[1]
170 |     image1, image2 = tf.split(data_dict['IMAGE_PAIR'],2,axis=1)
171 |     
172 |     # downsample second image
173 |     image2_2 = sops.median3x3_downsample(sops.median3x3_downsample(image2))
174 | 
175 |     gpu_options = tf.GPUOptions()
176 |     gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
177 |     session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
178 | 
179 |     
180 |     # init networks
181 |     bootstrap_net = BootstrapNet(session, data_format)
182 |     iterative_net = IterativeNet(session, data_format)
183 |     refine_net = RefinementNet(session, data_format)
184 | 
185 |     session.run(tf.global_variables_initializer())
186 | 
187 |     # load weights
188 |     saver = tf.train.Saver()
189 |     saver.restore(session,os.path.join(weights_dir,'demon_original'))
190 | 
191 |     fetch_dict = {
192 |         'INFO': info_tensor,
193 |         'SAMPLE_IDS': sample_ids_tensor,
194 |         'image1': image1,
195 |         'image2_2': image2_2,
196 |     }
197 |     fetch_dict.update(data_dict)
198 | 
199 |     if data_format == 'channels_last':
200 |         for k in ('image1', 'image2_2', 'IMAGE_PAIR',):
201 |             fetch_dict[k] = convert_NCHW_to_NHWC(fetch_dict[k])
202 |     
203 |     with h5py.File(prediction_file, 'w') as f:
204 | 
205 |         number_of_test_iterations = 1 # will be set to the correct value in the while loop
206 |         test_iteration = 0
207 |         while test_iteration < number_of_test_iterations:
208 |             
209 |             data =  session.run(fetch_dict)
210 |             
211 |             # get number of iterations from the info vector
212 |             number_of_test_iterations = int(data['INFO'][0])
213 | 
214 |             # create group for the current test sample and save the sample id.
215 |             group = f.require_group('snapshot_1/{0}'.format(test_iteration))
216 |             sample_id = (''.join(map(chr, data['SAMPLE_IDS']))).strip()
217 |             group.attrs['sample_id'] = np.string_(sample_id)
218 |             
219 |             # save intrinsics
220 |             group['intrinsics'] = data['INTRINSICS']
221 |             
222 |             # run the network and save outputs for each network iteration 'i'.
223 |             # iteration 0 corresponds to the bootstrap network.
224 |             # we also store the refined depth for each iteration.
225 |             for i in range(4):
226 |                 if i == 0:
227 |                     result = bootstrap_net.eval(data['IMAGE_PAIR'], data['image2_2'])      
228 |                 else:
229 |                     result = iterative_net.eval(
230 |                         data['IMAGE_PAIR'], 
231 |                         data['image2_2'], 
232 |                         result['predict_depth2'], 
233 |                         result['predict_normal2'], 
234 |                         result['predict_rotation'], 
235 |                         result['predict_translation']
236 |                     )
237 |                 # write predictions
238 |                 if data_format == 'channels_last':
239 |                     group['predicted_flow/{0}'.format(i)] = result['predict_flow2'][0].transpose([2,0,1])
240 |                     group['predicted_depth/{0}'.format(i)] = result['predict_depth2'][0,:,:,0]
241 |                 else:
242 |                     group['predicted_flow/{0}'.format(i)] = result['predict_flow2'][0]
243 |                     group['predicted_depth/{0}'.format(i)] = result['predict_depth2'][0,0]
244 |                     
245 |                 predict_motion = np.concatenate((result['predict_rotation'],result['predict_translation']),axis=1)
246 |                 group['predicted_motion/{0}'.format(i)] = predict_motion[0]
247 |                 
248 |                 # run refinement network
249 |                 result_refined = refine_net.eval(data['image1'],result['predict_depth2'])
250 |                 
251 |                 # write refined depth prediction
252 |                 if data_format == 'channels_last':
253 |                     group['predicted_depth/{0}_refined'.format(i)] = result_refined['predict_depth0'][0,:,:,0]
254 |                 else:
255 |                     group['predicted_depth/{0}_refined'.format(i)] = result_refined['predict_depth0'][0,0]
256 |                 
257 |             test_iteration += 1
258 |             
259 |     del session
260 |     tf.reset_default_graph()
261 |     return prediction_file
262 | 
263 | def main():
264 | 
265 |     # list the test datasets names for evaluation
266 |     datasets = ('mvs', 'scenes11', 'rgbd', 'sun3d', 'nyu2')
267 |     dataset_dir = os.path.join('..', 'datasets')
268 | 
269 | 
270 | 
271 |     # creating the ground truth and prediction files requires about 11gb of disk space
272 |     for dataset in datasets:
273 |         gt_file = create_ground_truth_file(dataset, dataset_dir)
274 |         
275 |         print('creating predictions for', dataset)
276 |         pr_file = create_prediction_file(dataset, dataset_dir)
277 | 
278 |         # compute errors
279 |         # the evaluate function expects the path to a prediction and the corresponding
280 |         # ground truth file.
281 |         print('computing errors for', dataset)
282 | 
283 |         # compute errors for comparison with single image depth methods
284 |         eval_result = evaluate(pr_file, gt_file, depthmask=False, eigen_crop_gt_and_pred=True)
285 |         # save evaluation results to disk
286 |         write_xarray_json(eval_result, '{0}_eval_crop_allpix.json'.format(dataset))
287 |         
288 |         if dataset != 'nyu2':
289 |             # depthmask=True will compute depth errors only for pixels visible in both images.
290 |             eval_result = evaluate(pr_file, gt_file, depthmask=True)
291 |             # save evaluation results to disk
292 |             write_xarray_json(eval_result, '{0}_eval.json'.format(dataset))
293 |             
294 | 
295 | 
296 |     # print errors
297 |     for dataset in datasets:
298 |         
299 |         # In the following eval_result is a 5D array with the following dimensions:
300 |         #  - snapshots: stores results of different network training states
301 |         #  - iteration: network iterations '0' stores the result of the bootstrap network.
302 |         #               '3' stores the results after bootstrap + 3 times iterative network.
303 |         #               '3_refined' stores the result after the refinement network.
304 |         #  - sample: the sample number.
305 |         #  - errors: stores the different error metrics.
306 |         #  - scaled: is a boolean dimension used for storing errors after optimal scaling 
307 |         #            the prediction with a scalar factor. This was meant as an alternative
308 |         #            to scale invariant error measures. Just set this to False and ignore.
309 |         # 
310 |         # The following prints the error metrics as used in the paper.
311 | 
312 |         depth_errors = ['depth_l1_inverse','depth_scale_invariant','depth_abs_relative']
313 |         motion_errors = ['rot_err','tran_angle_err']
314 |         print('======================================')
315 |         print('dataset: ', dataset)
316 |         if dataset != 'nyu2':
317 |             eval_result = read_xarray_json('{0}_eval.json'.format(dataset))
318 |             print('  depth', eval_result[0].loc['3_refined',:,depth_errors,False].mean('sample').to_pandas().to_string())
319 |             print('  motion', eval_result[0].loc['3',:,motion_errors,False].mean('sample').to_pandas().to_string())
320 |         eval_result = read_xarray_json('{0}_eval_crop_allpix.json'.format(dataset))
321 |         print('  depth cropped+all pixels', eval_result[0].loc['3_refined',:,['depth_scale_invariant'],False].mean('sample').to_pandas().to_string())
322 |         
323 | 
324 | if __name__ == "__main__":
325 |     main()
326 | 
327 | 
328 | 


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from PIL import Image
  4 | from matplotlib import pyplot as plt
  5 | import os
  6 | import sys
  7 | 
  8 | examples_dir = os.path.dirname(__file__)
  9 | weights_dir = os.path.join(examples_dir,'..','weights')
 10 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
 11 | 
 12 | from depthmotionnet.networks_original import *
 13 | 
 14 | 
 15 | def prepare_input_data(img1, img2, data_format):
 16 |     """Creates the arrays used as input from the two images."""
 17 |     # scale images if necessary
 18 |     if img1.size[0] != 256 or img1.size[1] != 192:
 19 |         img1 = img1.resize((256,192))
 20 |     if img2.size[0] != 256 or img2.size[1] != 192:
 21 |         img2 = img2.resize((256,192))
 22 |     img2_2 = img2.resize((64,48))
 23 |         
 24 |     # transform range from [0,255] to [-0.5,0.5]
 25 |     img1_arr = np.array(img1).astype(np.float32)/255 -0.5
 26 |     img2_arr = np.array(img2).astype(np.float32)/255 -0.5
 27 |     img2_2_arr = np.array(img2_2).astype(np.float32)/255 -0.5
 28 |     
 29 |     if data_format == 'channels_first':
 30 |         img1_arr = img1_arr.transpose([2,0,1])
 31 |         img2_arr = img2_arr.transpose([2,0,1])
 32 |         img2_2_arr = img2_2_arr.transpose([2,0,1])
 33 |         image_pair = np.concatenate((img1_arr,img2_arr), axis=0)
 34 |     else:
 35 |         image_pair = np.concatenate((img1_arr,img2_arr),axis=-1)
 36 |     
 37 |     result = {
 38 |         'image_pair': image_pair[np.newaxis,:],
 39 |         'image1': img1_arr[np.newaxis,:], # first image
 40 |         'image2_2': img2_2_arr[np.newaxis,:], # second image with (w=64,h=48)
 41 |     }
 42 |     return result
 43 | 
 44 | 
 45 | if tf.test.is_gpu_available(True):
 46 |     data_format='channels_first'
 47 | else: # running on cpu requires channels_last data format
 48 |     data_format='channels_last'
 49 | 
 50 | # 
 51 | # DeMoN has been trained for specific internal camera parameters.
 52 | #
 53 | # If you use your own images try to adapt the intrinsics by cropping
 54 | # to match the following normalized intrinsics:
 55 | #
 56 | #  K = (0.89115971  0           0.5)
 57 | #      (0           1.18821287  0.5)
 58 | #      (0           0           1  ),
 59 | #  where K(1,1), K(2,2) are the focal lengths for x and y direction.
 60 | #  and (K(1,3), K(2,3)) is the principal point.
 61 | #  The parameters are normalized such that the image height and width is 1.
 62 | #
 63 | 
 64 | # read data
 65 | img1 = Image.open(os.path.join(examples_dir,'sculpture1.png'))
 66 | img2 = Image.open(os.path.join(examples_dir,'sculpture2.png'))
 67 | 
 68 | input_data = prepare_input_data(img1,img2,data_format)
 69 | 
 70 | gpu_options = tf.GPUOptions()
 71 | gpu_options.per_process_gpu_memory_fraction=0.8
 72 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
 73 | 
 74 | # init networks
 75 | bootstrap_net = BootstrapNet(session, data_format)
 76 | iterative_net = IterativeNet(session, data_format)
 77 | refine_net = RefinementNet(session, data_format)
 78 | 
 79 | session.run(tf.global_variables_initializer())
 80 | 
 81 | # load weights
 82 | saver = tf.train.Saver()
 83 | saver.restore(session,os.path.join(weights_dir,'demon_original'))
 84 | 
 85 | 
 86 | # run the network
 87 | result = bootstrap_net.eval(input_data['image_pair'], input_data['image2_2'])
 88 | for i in range(3):
 89 |     result = iterative_net.eval(
 90 |         input_data['image_pair'], 
 91 |         input_data['image2_2'], 
 92 |         result['predict_depth2'], 
 93 |         result['predict_normal2'], 
 94 |         result['predict_rotation'], 
 95 |         result['predict_translation']
 96 |     )
 97 | rotation = result['predict_rotation']
 98 | translation = result['predict_translation']
 99 | result = refine_net.eval(input_data['image1'],result['predict_depth2'])
100 | 
101 | 
102 | plt.imshow(result['predict_depth0'].squeeze(), cmap='Greys')
103 | plt.show()
104 | 
105 | # try to visualize the point cloud
106 | try:
107 |     from depthmotionnet.vis import *
108 |     visualize_prediction(
109 |         inverse_depth=result['predict_depth0'], 
110 |         image=input_data['image_pair'][0,0:3] if data_format=='channels_first' else input_data['image_pair'].transpose([0,3,1,2])[0,0:3], 
111 |         rotation=rotation, 
112 |         translation=translation)
113 | except ImportError as err:
114 |     print("Cannot visualize as pointcloud.", err)
115 | 
116 | 


--------------------------------------------------------------------------------
/examples/example_v2.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from PIL import Image
  4 | from matplotlib import pyplot as plt
  5 | import os
  6 | import sys
  7 | import argparse
  8 | 
  9 | examples_dir = os.path.dirname(__file__)
 10 | weights_dir = os.path.join(examples_dir,'..','weights')
 11 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
 12 | 
 13 | from depthmotionnet.v2.networks import *
 14 | 
 15 | parser = argparse.ArgumentParser(description="Runs the v2 network on the example image pair.")
 16 | parser.add_argument("--checkpoint", type=str, required=True, help="Path to the checkpoint without the file extension")
 17 | args = parser.parse_args()
 18 | 
 19 | 
 20 | def prepare_input_data(img1, img2, data_format):
 21 |     """Creates the arrays used as input from the two images."""
 22 |     # scale images if necessary
 23 |     if img1.size[0] != 256 or img1.size[1] != 192:
 24 |         img1 = img1.resize((256,192))
 25 |     if img2.size[0] != 256 or img2.size[1] != 192:
 26 |         img2 = img2.resize((256,192))
 27 |     img2_2 = img2.resize((64,48))
 28 |         
 29 |     # transform range from [0,255] to [-0.5,0.5]
 30 |     img1_arr = np.array(img1).astype(np.float32)/255 -0.5
 31 |     img2_arr = np.array(img2).astype(np.float32)/255 -0.5
 32 |     img2_2_arr = np.array(img2_2).astype(np.float32)/255 -0.5
 33 |     
 34 |     if data_format == 'channels_first':
 35 |         img1_arr = img1_arr.transpose([2,0,1])
 36 |         img2_arr = img2_arr.transpose([2,0,1])
 37 |         img2_2_arr = img2_2_arr.transpose([2,0,1])
 38 |         image_pair = np.concatenate((img1_arr,img2_arr), axis=0)
 39 |     else:
 40 |         image_pair = np.concatenate((img1_arr,img2_arr),axis=-1)
 41 |     
 42 |     result = {
 43 |         'image_pair': image_pair[np.newaxis,:],
 44 |         'image1': img1_arr[np.newaxis,:], # first image
 45 |         'image2_2': img2_2_arr[np.newaxis,:], # second image with (w=64,h=48)
 46 |     }
 47 |     return result
 48 | 
 49 | 
 50 | if tf.test.is_gpu_available(True):
 51 |     data_format='channels_first'
 52 | else: # running on cpu requires channels_last data format
 53 |     print('Running this example requires a GPU')
 54 |     sys.exit(1)
 55 | 
 56 | # 
 57 | # DeMoN has been trained for specific internal camera parameters.
 58 | #
 59 | # If you use your own images try to adapt the intrinsics by cropping
 60 | # to match the following normalized intrinsics:
 61 | #
 62 | #  K = (0.89115971  0           0.5)
 63 | #      (0           1.18821287  0.5)
 64 | #      (0           0           1  ),
 65 | #  where K(1,1), K(2,2) are the focal lengths for x and y direction.
 66 | #  and (K(1,3), K(2,3)) is the principal point.
 67 | #  The parameters are normalized such that the image height and width is 1.
 68 | #
 69 | 
 70 | # read data
 71 | img1 = Image.open(os.path.join(examples_dir,'sculpture1.png'))
 72 | img2 = Image.open(os.path.join(examples_dir,'sculpture2.png'))
 73 | 
 74 | input_data = prepare_input_data(img1,img2,data_format)
 75 | 
 76 | gpu_options = tf.GPUOptions()
 77 | gpu_options.per_process_gpu_memory_fraction=0.8
 78 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
 79 | 
 80 | # init networks
 81 | bootstrap_net = BootstrapNet(session)
 82 | iterative_net = IterativeNet(session)
 83 | refine_net = RefinementNet(session)
 84 | 
 85 | session.run(tf.global_variables_initializer())
 86 | 
 87 | # load weights
 88 | saver = tf.train.Saver()
 89 | saver.restore(session, args.checkpoint)
 90 | 
 91 | 
 92 | # run the network
 93 | result = bootstrap_net.eval(input_data['image_pair'], input_data['image2_2'])
 94 | for i in range(3):
 95 |     result = iterative_net.eval(
 96 |         input_data['image_pair'], 
 97 |         input_data['image2_2'], 
 98 |         result['predict_depth2'], 
 99 |         result['predict_normal2'], 
100 |         result['predict_rotation'], 
101 |         result['predict_translation']
102 |     )
103 | rotation = result['predict_rotation']
104 | translation = result['predict_translation']
105 | result = refine_net.eval(input_data['image1'],result['predict_depth2'], result['predict_normal2'])
106 | 
107 | 
108 | plt.imshow(result['predict_depth0'].squeeze(), cmap='Greys')
109 | plt.show()
110 | 
111 | # try to visualize the point cloud
112 | try:
113 |     from depthmotionnet.vis import *
114 |     visualize_prediction(
115 |         inverse_depth=result['predict_depth0'], 
116 |         image=input_data['image_pair'][0,0:3] if data_format=='channels_first' else input_data['image_pair'].transpose([0,3,1,2])[0,0:3], 
117 |         rotation=rotation, 
118 |         translation=translation)
119 | except ImportError as err:
120 |     print("Cannot visualize as pointcloud.", err)
121 | 
122 | 


--------------------------------------------------------------------------------
/examples/sculpture1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture1.png


--------------------------------------------------------------------------------
/examples/sculpture2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture2.png


--------------------------------------------------------------------------------
/examples/sculpture_Rt1.txt:
--------------------------------------------------------------------------------
1 | 1 0 0 0
2 | 0 1 0 0
3 | 0 0 1 0
4 | 


--------------------------------------------------------------------------------
/examples/sculpture_Rt2.txt:
--------------------------------------------------------------------------------
1 | 9.634469799073742680e-01 -7.059393257190359205e-02 2.584306746332428184e-01 -9.577744007110595703e-01
2 | 6.105031979073534398e-02 9.971296798444659881e-01 4.478012982028589661e-02 -1.465892046689987183e-01
3 | -2.608501013241291688e-01 -2.736600550511965324e-02 9.649913607809581517e-01 2.473454177379608154e-01
4 | 


--------------------------------------------------------------------------------
/examples/sculpture_depth1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture_depth1.npy


--------------------------------------------------------------------------------
/examples/sculpture_depth2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture_depth2.npy


--------------------------------------------------------------------------------
/multivih5datareaderop/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | find_package( HDF5 REQUIRED )
  2 | find_package( OpenCV QUIET COMPONENTS core highgui imgproc)
  3 | find_package( OpenMP REQUIRED )
  4 | set(Python_ADDITIONAL_VERSIONS 3)
  5 | find_package( PythonInterp REQUIRED )
  6 | 
  7 | include(ExternalProject)
  8 | 
  9 | #
 10 | # Download some not so common dependencies
 11 | #
 12 | 
 13 | # webp
 14 | ExternalProject_Add( 
 15 |         webp
 16 |         PREFIX "${CMAKE_BINARY_DIR}/webp"
 17 |         URL "http://downloads.webmproject.org/releases/webp/libwebp-0.5.1.tar.gz"
 18 |         # do not update
 19 |         UPDATE_COMMAND ""
 20 |         CONFIGURE_COMMAND CFLAGS=-fPIC ../webp/configure --disable-gl --disable-png --disable-jpeg  --disable-tiff --disable-gif --disable-wic
 21 |         BUILD_COMMAND "make"
 22 |         # do not install
 23 |         INSTALL_COMMAND ""
 24 | )
 25 | ExternalProject_Get_Property( webp SOURCE_DIR BINARY_DIR )
 26 | set( webp_INCLUDE_DIR "${SOURCE_DIR}/src" )
 27 | set( webp_LIBRARY "${BINARY_DIR}/src/.libs/libwebp.a" )
 28 | 
 29 | 
 30 | # lz4
 31 | ExternalProject_Add( 
 32 |         lz4
 33 |         PREFIX "${CMAKE_BINARY_DIR}/lz4"
 34 | 	URL "https://github.com/lz4/lz4/archive/v1.7.5.tar.gz"
 35 |         # do not update
 36 |         UPDATE_COMMAND ""
 37 |         # copy SOURCE_DIR CMakeLists.txt
 38 |         PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/multivih5datareaderop/lz4_cmakelists.txt <SOURCE_DIR>/CMakeLists.txt
 39 |         CMAKE_CACHE_ARGS "-DLZ4_BUNDLED_MODE:string=off" "-DBUILD_STATIC_LIBS:string=on" "-DBUILD_SHARED_LIBS:string=on"
 40 |         # do not install
 41 |         INSTALL_COMMAND ""
 42 | )
 43 | ExternalProject_Get_Property( lz4 SOURCE_DIR BINARY_DIR )
 44 | set( lz4_INCLUDE_DIR "${SOURCE_DIR}/lib" )
 45 | set( lz4_STATIC_LIB "${BINARY_DIR}/contrib/cmake_unofficial/liblz4.a" )
 46 | 
 47 | 
 48 | # json for modern c++
 49 | message( STATUS "Downloading json" )
 50 | file( DOWNLOAD "https://github.com/nlohmann/json/releases/download/v2.1.1/json.hpp"
 51 |       "${CMAKE_BINARY_DIR}/json_for_modern_cpp/json.hpp" SHOW_PROGRESS )
 52 | set( json_for_modern_cpp_INCLUDE_DIR "${CMAKE_BINARY_DIR}/json_for_modern_cpp/" )
 53 | 
 54 | 
 55 | # half
 56 | message( STATUS "Downloading half" )
 57 | file( DOWNLOAD "https://sourceforge.net/p/half/code/HEAD/tree/tags/release-1.12.0/include/half.hpp?format=raw"
 58 |       "${CMAKE_BINARY_DIR}/half/include/half.hpp" SHOW_PROGRESS )
 59 | set( half_INCLUDE_DIR "${CMAKE_BINARY_DIR}/half/include" )
 60 | 
 61 | 
 62 | # retrieve tensorflow include dir and lib dir
 63 | execute_process( COMMAND ${PYTHON_EXECUTABLE} "-c" "from __future__ import print_function; import tensorflow as tf; print(tf.sysconfig.get_include(), end='')" 
 64 |         OUTPUT_VARIABLE TENSORFLOW_INCLUDE_DIR )
 65 | execute_process( COMMAND ${PYTHON_EXECUTABLE} "-c" "from __future__ import print_function; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='')" 
 66 |         OUTPUT_VARIABLE TENSORFLOW_LIB_DIR )
 67 | find_library( TENSORFLOW_FRAMEWORK_LIB tensorflow_framework PATHS "${TENSORFLOW_LIB_DIR}" NO_DEFAULT_PATH )
 68 | message( STATUS "${TENSORFLOW_INCLUDE_DIR}" )
 69 | message( STATUS "${TENSORFLOW_LIB_DIR}" )
 70 | message( STATUS "${TENSORFLOW_FRAMEWORK_LIB}" )
 71 | 
 72 | file( GLOB multivih5datareaderop_SOURCES *.cpp *.cc )
 73 | 
 74 | 
 75 | add_library( multivih5datareaderop SHARED ${multivih5datareaderop_SOURCES} )
 76 | # do not add 'lib' prefix
 77 | set_target_properties( multivih5datareaderop PROPERTIES PREFIX "" )
 78 | set_target_properties( multivih5datareaderop PROPERTIES DEBUG_POSTFIX "_debug" )
 79 | set_target_properties( multivih5datareaderop PROPERTIES COMPILE_FLAGS "-std=c++11 ${OpenMP_CXX_FLAGS} -Wabi-tag" )
 80 | set_target_properties( multivih5datareaderop PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS}" )
 81 | # use old ABI with gcc 5.x
 82 | set_target_properties( multivih5datareaderop PROPERTIES COMPILE_DEFINITIONS "_GLIBCXX_USE_CXX11_ABI=0" )
 83 | add_dependencies( multivih5datareaderop lz4 webp )
 84 | 
 85 | target_include_directories( multivih5datareaderop PRIVATE
 86 |         ${TENSORFLOW_INCLUDE_DIR}
 87 |         ${TENSORFLOW_INCLUDE_DIR}/external/nsync/public/
 88 |         ${lz4_INCLUDE_DIR}
 89 |         ${webp_INCLUDE_DIR}
 90 |         ${HDF5_INCLUDE_DIRS}        
 91 |         ${OpenCV_INCLUDE_DIRS}
 92 | 	${json_for_modern_cpp_INCLUDE_DIR}
 93 | 	${half_INCLUDE_DIR}
 94 | )
 95 | 
 96 | target_link_libraries( multivih5datareaderop
 97 |         ${lz4_STATIC_LIB}
 98 |         ${webp_LIBRARY}
 99 |         ${HDF5_LIBRARIES}        
100 |         ${OpenCV_LIBS}
101 | )
102 | if( TENSORFLOW_FRAMEWORK_LIB )
103 |         target_link_libraries( multivih5datareaderop ${TENSORFLOW_FRAMEWORK_LIB} )
104 | endif()
105 | 
106 | 


--------------------------------------------------------------------------------
/multivih5datareaderop/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Multi View H5 Data Reader
  3 | 
  4 | This document describes the op and the data format used for training DeMoN.
  5 | 
  6 | ## Building the op
  7 | 
  8 | To build the op, create a ```build``` directory inside the demon root directory.
  9 | The location of the ```build``` directory is important, because the python package
 10 | ```depthmotionnet.datareader``` will search this path for the data reader library.
 11 | 
 12 | Then run cmake inside the build folder to configure and generate the build
 13 | files.
 14 | If you use a virtualenv make sure to activate it before running cmake.
 15 | 
 16 | Assuming the virtualenv is managed with ```pew``` and named ```demon_venv```
 17 | and the demon root directory is stored in the variable ```DEMON_DIR``` we can
 18 | build the data reader op with:
 19 | ```bash
 20 | cd $DEMON_DIR # change to the demon root directory
 21 | 
 22 | mkdir build 
 23 | cd build 
 24 | 
 25 | pew in demon_venv
 26 | cmake ..
 27 | make
 28 | ```
 29 | 
 30 | ### Dependencies
 31 | The op depends on the following libraries:
 32 | ```
 33 | cmake 3.5.1
 34 | tensorflow 1.0.0
 35 | hdf5 1.8.16
 36 | OpenCV 3.2.0
 37 | ```
 38 | The versions match the configuration we have tested on an ubuntu 16.04 system. 
 39 | 
 40 | In addition, the cmake build script will download and build ```lz4```, ```webp```, [```json```](https://github.com/nlohmann/json) and [```half```](http://half.sourceforge.net/)
 41 | 
 42 | 
 43 | ## `multi_vi_h5_data_reader` Op
 44 | 
 45 | ```multi_vi_h5_data_reader(num_outputs, param_json)```
 46 | 
 47 | Reads data in the multi view h5 format.
 48 | 
 49 | This op reads one or more hdf5 files and generates data samples.
 50 | The data is returned in NCHW format.
 51 | 
 52 | #### Args
 53 | 
 54 | **num_outputs**: The number of data tensors to return.  This number depends on the
 55 |   values passed in `param_json`.
 56 | 
 57 | **param_json**:
 58 |   The parameters passed to the reader in JSON format as a string.
 59 |   It is recommended to create a python dict with all parameters first and then convert
 60 |   the dict to str with json.dumps().
 61 |   Here is an example python dict with comments and good values for training:
 62 | 
 63 |   ```python
 64 |   {
 65 |    'batch_size': 32,               # the batch size
 66 |    'test_phase': False,            # If True enables testing mode which disables randomization.
 67 |    
 68 |    # the number of threads used for building batches. For testing set this to 1.
 69 |    'builder_threads': 4,           
 70 |                                    
 71 |    'inverse_depth': True,          # return depth with inverse depth values (1/z)
 72 | 
 73 |    # return the motion as one of 'ANGLEAXIS6', 'ANGLEAXIS7' 'QUATERNION', 'FMATRIX'.
 74 |    # The translation is stored always in the last 3 elements.
 75 |    #
 76 |    # ANGLEAXIS6: uses 3 elements for the rotation as angle axis [aa0, aa1, aa2, tx, ty, tz]
 77 |    # ANGLEAXIS7: uses 4 elements for the rotation as angle axis [angle, ax, ay, az, tx, ty, tz]
 78 |    # QUATERNION: uses 4 elements for the rotation as quaternion [qw, qx, qy, qz, tx, ty, tz]
 79 |    # FMATRIX: returns a fundamental matrix in column major order without the last element 
 80 |    #          which is defined as 1. [f11, f21, f31, f12, f22, f32, f13, f23]
 81 |    'motion_format': 'ANGLEAXIS6', 
 82 |    
 83 |    # if True normalized the translation ||t||=1 and scales the depth values accordingly.
 84 |    'norm_trans_scale_depth': True, 
 85 |    
 86 |    # the output image/depth height and width.
 87 |    # Downsampling is supported. 
 88 |    'scaled_height': 192,
 89 |    'scaled_width': 256,
 90 | 
 91 |    # the number of scenes to keep in memory. A bigger pool improves variance when
 92 |    # generating a new batch item, but requires more main memory.
 93 |    # For testing a small value like 5 is sufficient.
 94 |    'scene_pool_size': 500,
 95 | 
 96 |    # The requested output tensors.
 97 |    'top_output': ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS'),
 98 | 
 99 |    # probabilities for geometric augmentations.
100 |    # There is a 50% change of rotating the image and cameras by 180 deg followed
101 |    # by a 50% change of mirroring the x-axis.
102 |    # Set this to 0 for testing.
103 |    'augment_rot180': 0.5,
104 |    'augment_mirror_x': 0.5,
105 | 
106 |    # source is a list of dicts, which define the paths to the hdf5 files and the 
107 |    # importance of each file.
108 |    # In the example below the reader will sample from data2.h5 twice as often as
109 |    # from data1.h5.
110 |    'source': [
111 |               {'path': '/path/to/data1.h5', 'weight': [{'t': 0, 'v': 1.0}]},
112 |               {'path': '/path/to/data2.h5', 'weight': [{'t': 0, 'v': 2.0}]},
113 |              ],
114 |    # for testing only 1 source must be used. Multiple files can be concatenated with ';'.
115 |    #'source': [ {'path': '/path/to/test1.h5;/path/to/test2.h5'}, ],
116 |   }
117 |   ```
118 | 
119 | #### Outputs
120 | 
121 | **info**: The info tensor stores information about the internal buffers.
122 |   It stores the following information:
123 |    - required number of test iterations
124 |    - current batch buffer size
125 |    - maximum batch buffer size
126 |    - current reader buffer size
127 |    - maximum reader buffer size
128 | 
129 | **sample_id**: A tensor storing a string with the id for each batch item.
130 |   A newline symbol is used to separate the individual id strings.
131 | 
132 | **output**: A list of tensors with the requested data.
133 | 
134 |   The order of tensors is always:
135 |   ['IMAGE_PAIR', 'MOTION', 'FLOW', 'DEPTH', 'INTRINSICS', 'DEPTHMASKS','SAMPLE_IDS'].
136 |   Depending on the 'top_output' parameter in 'param_json' not all tensors
137 |   may be present.
138 | 
139 |   The 'IMAGE_PAIR' tensor stores the image pair as 6 channel RGBRGB image.
140 | 
141 |   The 'MOTION' tensor stores the motion from the first to the second camera in
142 |   the requested format specified by the 'motion_format' parameter in 'param_json'.
143 | 
144 |   The 'FLOW' tensor stores the optical flow from the first to the second image
145 |   with 2 channels. The first channel stores the x component of the flow vector.
146 | 
147 |   The 'DEPTH' tensor stores the depth map for the first image.
148 | 
149 |   The 'INTRINSICS' tensor stores the normalized intrinsics as [fx fy cx cy].
150 |   fx,fy is the x and y component of the normalized focal length.
151 |   cx,cy is the x and y component of the normalized principal point.
152 | 
153 |   The 'DEPTHMASKS' tensor masks point where it is possible to compute a depth value.
154 | 
155 | 
156 | 
157 | **See also the example [```examples/create_dataset_and_use_readerop.py```](../examples/create_dataset_and_use_readerop.py) for using this op in the examples folder.**
158 | 
159 | 
160 | 
161 | ## HDF5 Data Format
162 | 
163 | Datasets are stored as objects in HDF5 files.
164 | To minimize data IO, we group images that show the same scene. 
165 | A valid group with a unique name "group" stores the following datasets:
166 | 
167 | ```
168 | /group/frames/t0/v0/image
169 | /group/frames/t0/v0/depth
170 | /group/frames/t0/v0/camera
171 | /group/frames/t0/v1/image
172 | /group/frames/t0/v1/depth
173 | /group/frames/t0/v1/camera
174 | ...
175 | ```
176 | 
177 | `t0/v0` means viewpoint 0 at time 0. The time is always `t0`. The number of
178 | viewpoints must always be >= 2.
179 | For test datasets the number of viewpoints is always 2.
180 | 
181 | 
182 | ### Reserved groups
183 | All groups starting with a '.' e.g. `/.config` are reserved and are not treated as data samples.
184 | 
185 | ### `image` dataset
186 | 
187 | Images are stored in webp format as 1D char arrays.
188 | 
189 | Attributes:
190 |  * format : scalar string attribute with value `"webp"`
191 | 
192 | 
193 | 
194 |  
195 | ### `depth` dataset
196 | 
197 | Depth maps are stored as half precision floats (16-bit) with LZ4 compression.
198 | 
199 | Attributes:
200 | 
201 |  * format       : scalar string attribute with value `"lz4half"`
202 |  * depth_metric : scalar string attribute with value `"camera_z"` or `"ray_length"`
203 |  * extents      : 1D int array with [height, width]
204 |  
205 | 
206 | 
207 | ### `camera` dataset
208 | 
209 | The camera dataset stores the intrinsic and extrinsic parameters for the viewpoint.
210 | Camera parameters are stored as 1D double data sets.
211 | 
212 | Attributes:
213 | 
214 |  * format : scalar string attribute with value `"pinhole"`
215 | 
216 |  Interpretation:
217 | 
218 | `[fx fy skew cx cy r11 r21 r31 r12 r22 r32 r13 r23 r33 tx ty tz]`
219 | 
220 | The internal parameters fx, fy, cx, cy are compatible with the image dimensions of the image data set
221 | 
222 | 
223 | ### `t0` group 
224 | 
225 | The time group `t0` stores an attribute `viewpoint_pairs` which enumerates all
226 | valid image pair combinations.
227 | 
228 | Attribute: 
229 |   * viewpoint_pairs : 1D int vector. Used by the multiviewh5datareader to generate image pairs. 
230 |   Two subsequent values describe a pair.  E.g. the vector `[0 1 0 2]` describes the pairs (0,1) and (0,2).
231 | 
232 | For test datasets the value of the `viewpoint_pairs` attribute must be `[0 1]`.
233 | 
234 | 
235 | 
236 | **See also the [```examples/create_dataset_and_use_readerop.py```](../examples/create_dataset_and_use_readerop.py) in the examples folder.**
237 | 


--------------------------------------------------------------------------------
/multivih5datareaderop/lz4_cmakelists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required( VERSION 3.5.1 )
2 | add_subdirectory( contrib/cmake_unofficial )
3 | 


--------------------------------------------------------------------------------
/multivih5datareaderop/simpleh5file.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  DeMoN - Depth Motion Network
  3 | //  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | //  
  5 | //  This program is free software: you can redistribute it and/or modify
  6 | //  it under the terms of the GNU General Public License as published by
  7 | //  the Free Software Foundation, either version 3 of the License, or
  8 | //  (at your option) any later version.
  9 | //  
 10 | //  This program is distributed in the hope that it will be useful,
 11 | //  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | //  GNU General Public License for more details.
 14 | //  
 15 | //  You should have received a copy of the GNU General Public License
 16 | //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | //
 18 | #ifndef SIMPLEH5FILE_H_
 19 | #define SIMPLEH5FILE_H_
 20 | #include <vector>
 21 | #include <string>
 22 | #include <hdf5.h>
 23 | 
 24 | 
 25 | /*!
 26 |  *  This class provides basic functions to manipulate hdf5 files
 27 |  */
 28 | class SimpleH5File
 29 | {
 30 | public:
 31 |   //! File modes
 32 |   enum FileMode { TRUNCATE,   //!< Create or overwrite file
 33 |                   READ,       //!< Read only file access
 34 |                   READ_WRITE  //!< Read write access
 35 |                 };
 36 |   //! Compression settings
 37 |   enum Compression { UNCOMPRESSED, /*GZIP_1, GZIP_2,...*/ };
 38 |   
 39 |   /*!
 40 |    *  ctor.
 41 |    *  \param use_locking  If true uses mutexes to allow accessing the object
 42 |    *                      from multiple threads.
 43 |    */
 44 |   SimpleH5File(bool use_locking=false);
 45 | 
 46 | 
 47 |   /*!
 48 |    *  Creates a SimpleH5File object and opens the specified hdf5 file.
 49 |    *  
 50 |    *  \param filename Filename of the hdf5 file. E.g. 'myfile.h5'
 51 |    *  \param mode     The mode for opening the file.
 52 |    *  \param use_locking  If true uses mutexes to allow accessing the object
 53 |    *                      from multiple threads.
 54 |    */
 55 |   SimpleH5File( const std::string& filename, FileMode mode=READ, bool use_locking=false );
 56 | 
 57 | 
 58 |   /*!
 59 |    *  dtor. Closes the opened file.
 60 |    */
 61 |   virtual ~SimpleH5File();
 62 | 
 63 | 
 64 |   /*!
 65 |    *  Opens the specified hdf5 file. If a hdf5 file was already opened it is 
 66 |    *  closed before opening the new file.
 67 |    *  
 68 |    *  \param filename Filename of the hdf5 file. E.g. 'myfile.h5'
 69 |    *  \param mode     The mode for opening the file.
 70 |    */
 71 |   void open( const std::string& filename, FileMode mode=READ );
 72 | 
 73 | 
 74 |   /*!
 75 |    *  Returns whether a file is open.
 76 |    */
 77 |   bool isOpen() const;
 78 | 
 79 | 
 80 |   /*!
 81 |    *  Returns whether the object uses locking to allow multiple thread using
 82 |    *  this object simultaneously.
 83 |    */
 84 |   bool useLocking() const;
 85 | 
 86 | 
 87 |   /*!
 88 |    *  Closes the file. Has no effect if no file is open.
 89 |    */
 90 |   void close();
 91 | 
 92 | 
 93 |   /*!
 94 |    *  Creates a new group and creates parent directories if necessary.
 95 |    *
 96 |    *  \param path  Path of the new group. E.g. '/group1/group2/newGroup'
 97 |    *               creates 'newGroup' and 'group1', 'group2' if they dont exist.
 98 |    */
 99 |   void makeGroup( const std::string& path );
100 | 
101 | 
102 |   /*!
103 |    *  Removes a group or dataset
104 |    *
105 |    *  \param path  The path of the group or dataset to be removed.
106 |    *               E.g. '/group/mydataset' removes 'mydataset'
107 |    */
108 |   void remove( const std::string& path );
109 | 
110 | 
111 |   /*!
112 |    *  Returns whether the object with the specified path is a group.
113 |    */
114 |   bool isGroup( const std::string& path );
115 | 
116 | 
117 |   /*!
118 |    *  Returns whether the object with the specified path is a dataset.
119 |    */
120 |   bool isDataset( const std::string& path );
121 | 
122 |   /*!
123 |    *  Returns true if the native type and the dataset type match.
124 |    *  If 'path' is not a dataset then false is returned.
125 |    */
126 |   template <class NATIVE_TYPE>
127 |   bool datasetDataType( const std::string& path );
128 | 
129 |   /*!
130 |    *  Returns whether the path points to a dataset or group
131 |    */
132 |   bool exists( const std::string& path );
133 | 
134 | 
135 |   /*!
136 |    *  Lists all objects (datasets and groups) with the parent specified by path.
137 |    *
138 |    *  \param path  E.g. '/' lists all groups and datasets of the root group.
139 |    */
140 |   std::vector<std::string> listObjects( const std::string& path );
141 | 
142 | 
143 |   /*!
144 |    *  Lists all datasets with the parent specified by path.
145 |    *
146 |    *  \param path  E.g. '/' lists all datasets of the root group.
147 |    */
148 |   std::vector<std::string> listDatasets( const std::string& path );
149 | 
150 | 
151 |   /*!
152 |    *  Lists all groups with the parent specified by path.
153 |    *
154 |    *  \param path  E.g. '/' lists all groups of the root group.
155 |    */
156 |   std::vector<std::string> listGroups( const std::string& path );
157 | 
158 | 
159 |   /*!
160 |    *  Lists all attributes of a dataset or group specified by path.
161 |    *
162 |    *  \param path  E.g. '/mydataset' lists all attributes of 'mydataset'.
163 |    */
164 |   std::vector<std::string> listAttributes( const std::string& path );
165 | 
166 | 
167 | 
168 |   /*!
169 |    *  Writes a dataset. Any existing dataset will be overwritten.
170 |    *  This command will also create parent groups if necessary.
171 |    *
172 |    *  \param data      Pointer to the data
173 |    *  \param dims      Dimensions of the dataset to write. The extent of each 
174 |    *                   dimension is defined in elements.
175 |    *  \param path      Path to the dataset e.g. '/group/dataset'.
176 |    *  \param compress  Reserved for future use to specify the compression filter
177 |    */
178 |   template <class T>
179 |   void writeDataset( const T* data, const std::vector<size_t>& dims, 
180 |                      const std::string& path,
181 |                      Compression compress = UNCOMPRESSED );
182 | 
183 |   /*!
184 |    *  Reads the dataset to the given buffer.
185 |    *
186 |    *  \param data  The buffer for reading the dataset. The buffer must be 
187 |    *               allocated by the user. Use getDatasetExtents() to retrieve
188 |    *               the size of the dataset.
189 |    *  \param path  Path to the dataset e.g. '/group/dataset'.
190 |    */
191 |   template <class T>
192 |   void readDataset( T* data, const std::string& path );
193 | 
194 | 
195 |   /*!
196 |    *  Returns the byte offset of the dataset in the file and the number 
197 |    *  of elements.
198 |    *
199 |    *  \param path  Path to the dataset e.g. '/group/dataset'.
200 |    *  \return Returns the byte offset of the dataset in the file and the number 
201 |    *  of elements
202 |    */
203 |   std::pair<size_t,size_t> getDatasetOffsetAndSize( const std::string& path );
204 | 
205 | 
206 |   /*!
207 |    *  Returns whether the dataset is contiguous or not.
208 |    *
209 |    *  \param path  Path to the dataset e.g. '/group/dataset'.
210 |    *  \return Returns true if the dataset is contiguous
211 |    */
212 |   bool isDatasetContiguous( const std::string& path );
213 | 
214 | 
215 |   /*!
216 |    *  Returns the extents of the dataset.
217 |    *
218 |    *  \param path  Path to the dataset e.g. '/group/dataset'.
219 |    *  \return Returns a vector containing the extents. The size of the vector
220 |    *          corresponds to the number of dimensions of the dataset
221 |    */
222 |   std::vector<size_t> getDatasetExtents( const std::string& path );
223 | 
224 | 
225 |   /*!
226 |    *  Writes an attribute. An attribute is attached to a group or a dataset.
227 |    *  Overwrites existing attributes.
228 |    *
229 |    *  \param value      The value of the attribute.
230 |    *  \param attr_name  The name of the attribute e.g. 'my_int_attribute'
231 |    *  \param path       The path of the group or dataset e.g. '/group'
232 |    */
233 |   template <class T>
234 |   void writeAttribute( const T& value, 
235 |                        const std::string& attr_name, const std::string& path ); 
236 | 
237 | 
238 |   //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
239 |   template <class T>
240 |   void writeAttribute( const std::vector<T>& value, 
241 |                        const std::string& attr_name, const std::string& path ); 
242 | 
243 |   //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
244 |   void writeAttribute( const char str[],
245 |                        const std::string& attr_name, const std::string& path ); 
246 | 
247 |   //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
248 |   void writeAttribute( const std::string& str,
249 |                        const std::string& attr_name, const std::string& path ); 
250 | 
251 | 
252 |   /*!
253 |    *  Reads an attribute. An attribute is attached to a group or a dataset.
254 |    *
255 |    *  \param value      The value that is written to the attribute
256 |    *  \param attr_name  The name of the attribute e.g. 'my_int_attribute'
257 |    *  \param path       The path of the group or dataset e.g. '/group'
258 |    */
259 |   template <class T>
260 |   void readAttribute( T& value, 
261 |                       const std::string& attr_name, const std::string& path ); 
262 | 
263 |   //! \sa readAttribute(T& value, const std::string&, const std::string&)
264 |   template <class T>
265 |   void readAttribute( std::vector<T>& value, 
266 |                       const std::string& attr_name, const std::string& path ); 
267 | 
268 |   //! \sa readAttribute(T& value, const std::string&, const std::string&)
269 |   void readAttribute( std::string& str,
270 |                       const std::string& attr_name, const std::string& path ); 
271 | 
272 |   /*!
273 |    *  Removes an attribute.
274 |    *
275 |    *  \param attr_name  Name of the attribute.
276 |    *  \param path       The path of the group or dataset e.g. '/group'
277 |    */
278 |   void removeAttribute( const std::string& attr_name, const std::string& path );
279 |   
280 | 
281 |   /*!
282 |    *  Checks the existence of an attribute.
283 |    *
284 |    *  \param attr_name  Name of the attribute.
285 |    *  \param path       The path of the group or dataset e.g. '/group'
286 |    *  \return Returns true if the attribute exists.
287 |    */
288 |   bool existsAttribute( const std::string& attr_name, const std::string& path );
289 | 
290 | 
291 |   /*!
292 |    *  Returns the H5O_info_t struct for the object with the specified path.
293 |    *
294 |    *  \param path  Path to an object (group or dataset) e.g. '/mydataset'
295 |    *  \return The H5O_info_t struct of the object.
296 |    */
297 |   H5O_info_t getObjectInfo( const std::string& path );
298 | 
299 | 
300 |   /*!
301 |    *  Checks if a file is a hdf5 file
302 |    *
303 |    *  \param filename path to the file
304 |    *  \return Returns true if the file is a hdf5 file.
305 |    *          Returns false if the file is not a hdf5 file.
306 |    *          Returns false if the file does not exist or reading fails.
307 |    */
308 |   static bool isHDF5( const std::string& filename );
309 | 
310 | 
311 |   /*!
312 |    *  Simplifies a hdf5 path. This function removes leading and trailing 
313 |    *  whitespaces and removes rendundant multiple '/'.
314 |    *
315 |    *  \return The simplified path.
316 |    */
317 |   static std::string simplifyPath( const std::string& path );
318 | 
319 | protected:
320 | 
321 |   FileMode mode;
322 | 
323 |   hid_t file_id; //! hdf5 file identifier
324 | 
325 | private:
326 |   SimpleH5File( const SimpleH5File& other ):use_locking(false) {}
327 |   SimpleH5File& operator=( const SimpleH5File& other ) { return *this; }
328 | 
329 | 
330 |   /*!
331 |    *  Creates a dataset. This command will also create parent groups if 
332 |    *  necessary.
333 |    *
334 |    *  \param dataset_path  Path to the dataset e.g. '/group/dataset'.
335 |    *  \param dims          Dimensions of the dataset to write. The extent of 
336 |    *                       each dimension is defined in elements.
337 |    *  \param compress      Reserved for future use to specify the compression
338 |    *                       filter
339 |    */
340 |   template <class T>
341 |   void createDataset( const std::string& dataset_path, 
342 |                       const std::vector<size_t>& dims, 
343 |                       Compression compress = UNCOMPRESSED );
344 | 
345 |   bool is_open;
346 |   const bool use_locking;
347 | 
348 | 
349 | };
350 | 
351 | 
352 | 
353 | 
354 | 
355 | #endif /* SIMPLEH5FILE_H_ */
356 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/datareader/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | import os
19 | import tensorflow
20 | from .helpers import *
21 | 
22 | # try to import the multivih5datareaderop from the 'build' directory
23 | if 'MULTIVIH5DATAREADEROP_LIB' in os.environ:
24 |     _readerlib_path = os.environ['MULTIVIH5DATAREADEROP_LIB']
25 | else:
26 |     _readerlib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','multivih5datareaderop', 'multivih5datareaderop.so'))
27 | 
28 | readerlib = None
29 | multi_vi_h5_data_reader = None
30 | if os.path.isfile(_readerlib_path):
31 |     readerlib = tensorflow.load_op_library(_readerlib_path)
32 |     print('Using {0}'.format(_readerlib_path))
33 |     multi_vi_h5_data_reader = readerlib.multi_vi_h5_data_reader
34 | 
35 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/datareader/helpers.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | 
19 | def add_sources(params, dataset_files, weight, normalize=True, concatenate=False):
20 |     """Add sources to the parameters for the multi_vi_h5_data_reader op.
21 | 
22 |     params: dict
23 |         dict with the parameters for the multi_vi_h5_data_reader op.
24 | 
25 |     dataset_files: list of str
26 |         List of h5 file paths to be added as sources.
27 | 
28 |     weight: float
29 |         The sampling importance. 
30 |         Higher values means the reader op samples more often from these files.
31 | 
32 |     normalize: bool
33 |         If True the weight for each file will be divided by the number of files.
34 |         If concatenate is True this parameter has no effect.
35 | 
36 |     concatenate: bool
37 |         If True adds only a single source that contains all files.
38 | 
39 |     """
40 |     if not 'source' in params:
41 |         params['source'] = []
42 | 
43 |     if concatenate:
44 |         # generate a single source with all paths
45 |         source = {'path': ';'.join(dataset_files)}
46 |         params['source'].append(source)
47 | 
48 |     else:
49 |         # generate for each path a new source
50 |         for item in dataset_files:
51 |             w = weight
52 |             if normalize:
53 |                 w /= len(dataset_files)
54 | 
55 |             source = {'path': item, 'weight': w}
56 |             params['source'].append(source)
57 | 
58 |     return params
59 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | from .view import View
19 | from .view_io import *
20 | from .view_tools import *
21 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/helpers.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import numpy as np
 19 | from PIL import Image
 20 | from scipy.ndimage.filters import laplace
 21 | 
 22 | 
 23 | def measure_sharpness(img):
 24 |     """Measures the sharpeness of an image using the variance of the laplacian
 25 | 
 26 |     img: PIL.Image
 27 | 
 28 |     Returns the variance of the laplacian. Higher values mean a sharper image
 29 |     """
 30 |     img_gray = np.array(img.convert('L'), dtype=np.float32)
 31 |     return np.var(laplace(img_gray))
 32 | 
 33 | 
 34 | def concat_images_vertical(images):
 35 |     """Concatenates a list of PIL.Image in vertical direction
 36 | 
 37 |     images: list of PIL.Image
 38 | 
 39 |     Returns the concatenated image
 40 |     """
 41 |     total_height = 0
 42 |     total_width = 0
 43 |     for img in images:
 44 |         total_width = max(total_width, img.size[0])
 45 |         total_height += img.size[1]
 46 |     result = Image.new('RGB',(total_width,total_height))
 47 |     ypos = 0
 48 |     for img in images:
 49 |         result.paste(img,(0,ypos))
 50 |         ypos += img.size[1]
 51 |     return result
 52 | 
 53 | 
 54 | def concat_images_horizontal(images):
 55 |     """Concatenates a list of PIL.Image in horizontal direction
 56 | 
 57 |     images: list of PIL.Image
 58 | 
 59 |     Returns the concatenated image
 60 |     """
 61 |     total_height = 0
 62 |     total_width = 0
 63 |     for img in images:
 64 |         total_height = max(total_height, img.size[1])
 65 |         total_width += img.size[0]
 66 |     result = Image.new('RGB',(total_width,total_height))
 67 |     xpos = 0
 68 |     for img in images:
 69 |         result.paste(img,(xpos,0))
 70 |         xpos += img.size[0]
 71 |     return result
 72 | 
 73 | 
 74 | def safe_crop_image(image, box, fill_value):
 75 |     """crops an image and adds a border if necessary
 76 |     
 77 |     image: PIL.Image
 78 | 
 79 |     box: 4 tuple
 80 |         (x0,y0,x1,y1) tuple
 81 | 
 82 |     fill_value: color value, scalar or tuple
 83 | 
 84 |     Returns the cropped image
 85 |     """
 86 |     x0, y0, x1, y1 = box
 87 |     if x0 >=0 and y0 >= 0 and x1 < image.width and y1 < image.height:
 88 |         return image.crop(box)
 89 |     else:
 90 |         crop_width = x1-x0
 91 |         crop_height = y1-y0
 92 |         tmp = Image.new(image.mode, (crop_width, crop_height), fill_value)
 93 |         safe_box = (
 94 |             max(0,min(x0,image.width-1)),
 95 |             max(0,min(y0,image.height-1)),
 96 |             max(0,min(x1,image.width)),
 97 |             max(0,min(y1,image.height)),
 98 |             )
 99 |         img_crop = image.crop(safe_box)
100 |         x = -x0 if x0 < 0 else 0
101 |         y = -y0 if y0 < 0 else 0
102 |         tmp.paste(image, (x,y))
103 |         return tmp
104 | 
105 | 
106 | def safe_crop_array2d(arr, box, fill_value):
107 |     """crops an array and adds a border if necessary
108 |     
109 |     arr: numpy.ndarray with 2 dims
110 | 
111 |     box: 4 tuple
112 |         (x0,y0,x1,y1) tuple. x is the column and y is the row!
113 | 
114 |     fill_value: scalar
115 | 
116 |     Returns the cropped array
117 |     """
118 |     x0, y0, x1, y1 = box
119 |     if x0 >=0 and y0 >= 0 and x1 < arr.shape[1] and y1 < arr.shape[0]:
120 |         return arr[y0:y1,x0:x1]
121 |     else:
122 |         crop_width = x1-x0
123 |         crop_height = y1-y0
124 |         tmp = np.full((crop_height, crop_width), fill_value, dtype=arr.dtype)
125 |         safe_box = (
126 |             max(0,min(x0,arr.shape[1]-1)),
127 |             max(0,min(y0,arr.shape[0]-1)),
128 |             max(0,min(x1,arr.shape[1])),
129 |             max(0,min(y1,arr.shape[0])),
130 |             )
131 |         x = -x0 if x0 < 0 else 0
132 |         y = -y0 if y0 < 0 else 0
133 |         safe_width = safe_box[2]-safe_box[0]
134 |         safe_height = safe_box[3]-safe_box[1]
135 |         tmp[y:y+safe_height,x:x+safe_width] = arr[safe_box[1]:safe_box[3],safe_box[0]:safe_box[2]]
136 |         return tmp
137 | 
138 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/lz4.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | from ctypes import *
19 | import os
20 | 
21 | # try the version used by the multivih5datareaderop first
22 | try:
23 |     _lib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','lz4','src','lz4-build','contrib', 'cmake_unofficial', 'liblz4.so'))
24 |     liblz4 = CDLL(_lib_path)
25 | except:
26 |     # try system version
27 |     try:
28 |         liblz4 = CDLL('liblz4.so')
29 |     except:
30 |         raise RuntimeError('Cannot load liblz4.so')
31 | 
32 | 
33 | def lz4_uncompress(input_data, expected_decompressed_size):
34 |     """decompresses the LZ4 compressed data
35 |     
36 |     input_data: bytes
37 |         byte string of the input data
38 | 
39 |     expected_decompressed_size: int
40 |         size of the decompressed output data
41 | 
42 |     returns the decompressed data as bytes or None on error
43 |     """
44 |     assert isinstance(input_data,bytes), "input_data must be of type bytes"
45 |     assert isinstance(expected_decompressed_size,int), "expected_decompressed_size must be of type int"
46 | 
47 |     dst_buf = create_string_buffer(expected_decompressed_size)
48 |     status = liblz4.LZ4_decompress_safe(input_data,dst_buf,len(input_data),expected_decompressed_size)
49 |     if status != expected_decompressed_size:
50 |         return None
51 |     else:
52 |         return dst_buf.raw
53 | 
54 | 
55 | 
56 | def lz4_compress_bound(input_size):
57 |     """Returns the maximum size needed for compressing data with the given input_size"""
58 |     assert isinstance(input_size,int), "input_size must be of type int"
59 |     
60 |     result = liblz4.LZ4_compressBound(c_int(input_size))
61 |     return result
62 | 
63 | 
64 | 
65 | def lz4_compress_HC(src):
66 |     """Compresses the input bytes with LZ4 high compression algorithm.
67 | 
68 |     Returns the compressed bytes array or an empty array on error
69 |     """
70 |     assert isinstance(src,bytes), "src must be of type bytes"
71 |     max_compressed_size = lz4_compress_bound(len(src))
72 |     dst_buf = create_string_buffer(max_compressed_size)
73 |     # written_size = liblz4.LZ4_compress_HC(src, dst_buf, len(src), max_compressed_size, c_int(0)) # new signature. TODO update liblz4
74 |     written_size = liblz4.LZ4_compressHC(src, dst_buf, len(src))
75 |     return dst_buf.raw[:written_size]
76 |     
77 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/sun3d_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import os
 19 | import math
 20 | import itertools
 21 | import h5py
 22 | import numpy as np
 23 | from PIL import Image
 24 | from scipy.ndimage import maximum_filter1d, minimum_filter1d
 25 | 
 26 | from .view import View
 27 | from .view_io import *
 28 | from .view_tools import *
 29 | from .helpers import measure_sharpness
 30 | 
 31 | 
 32 | def read_frameid_timestamp(files):
 33 |     """Get frameids and timestamps from the sun3d filenames
 34 |     
 35 |     files: list of str
 36 |         a list of the filenames
 37 |         
 38 |     Returns the frameid and timestamp as numpy.array
 39 |     """
 40 |     frameids = []
 41 |     timestamps = []
 42 |     for f in files:
 43 |         id_timestamp = f[:-4].split('-')
 44 |         frameids.append( int(id_timestamp[0]) )
 45 |         timestamps.append( int(id_timestamp[1]) )
 46 |     return np.asarray(frameids), np.asarray(timestamps)
 47 | 
 48 | 
 49 | def read_image(filename):
 50 |     """Read image from a file
 51 |     
 52 |     filename: str
 53 |     
 54 |     Returns image as PIL Image
 55 |     """
 56 |     image = Image.open(filename)
 57 |     image.load()
 58 |     return image
 59 | 
 60 | def read_depth(filename):
 61 |     """Read depth from a sun3d depth file
 62 |     
 63 |     filename: str
 64 |     
 65 |     Returns depth as np.float32 array
 66 |     """
 67 |     depth_pil = Image.open(filename)
 68 |     depth_arr = np.array(depth_pil)
 69 |     depth_uint16 = depth_arr.astype(np.uint16)
 70 |     depth_shifted = (depth_uint16 >> 3) | (depth_uint16 << 13)
 71 |     depth_float = (depth_shifted/1000).astype(np.float32)
 72 |     return depth_float
 73 | 
 74 | def read_Rt(extrinsics, frame):
 75 |     """Read camera extrinsics at certain frame
 76 |     
 77 |     extrinsics: np array with size (totalframe*3, 4)
 78 |     
 79 |     frame: int (starts from 0)
 80 |     
 81 |     Returns the rotation and translation 
 82 |     """
 83 |     Rt = extrinsics[3*frame:3*frame+3]
 84 |     R_arr = Rt[0:3,0:3]
 85 |     t_arr = Rt[0:3,3]
 86 |     R = R_arr.transpose()
 87 |     t = -np.dot(R,t_arr)
 88 |     return R, t
 89 | 
 90 | 
 91 | def compute_sharpness(sun3d_data_path, seq_name):
 92 |     """Returns a numpy array with the sharpness score of all images in the sequence.
 93 | 
 94 |     sun3d_data_path: str
 95 |         base path to the sun3d data
 96 | 
 97 |     seq_name: str
 98 |         the name of the sequence e.g. "mit_32_d463/d463_1"
 99 | 
100 |     """
101 |     seq_path = os.path.join(sun3d_data_path,seq_name)
102 |     image_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'image'))) if f.endswith('.jpg')]
103 | 
104 |     sharpness = []
105 |     for img_file in image_files:
106 |         img = read_image(os.path.join(seq_path,'image',img_file))
107 |         sharpness.append(measure_sharpness(img))
108 | 
109 |     return np.asarray(sharpness)
110 | 
111 | 
112 | def create_samples_from_sequence(h5file, sun3d_data_path, seq_name, baseline_range, sharpness, sharpness_window=30, max_views_num=10):
113 |     """Read a sun3d sequence and write samples to the h5file
114 |     
115 |     h5file: h5py.File handle
116 |     
117 |     sun3d_data_path: str
118 |         base path to the sun3d data
119 | 
120 |     seq_name: str
121 |         the name of the sequence e.g. "mit_32_d463/d463_1"
122 | 
123 |     baseline_range: tuple(float,float)
124 |         The allowed baseline range
125 | 
126 |     sharpness: numpy.ndarray 1D
127 |         Array with the sharpness score for each image
128 | 
129 |     sharpness_window: int
130 |         Window for detecting sharp images
131 | 
132 |     Returns the number of generated groups
133 |     """
134 |     generated_groups = 0
135 |     seq_path = os.path.join(sun3d_data_path,seq_name)
136 |     group_prefix = seq_name.replace('/','.')
137 |     if not os.path.exists(os.path.join(seq_path, 'extrinsics')):
138 |        return 0
139 | 
140 |     # file list
141 |     image_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'image'))) if f.endswith('.jpg')]
142 |     depth_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'depthTSDF'))) if f.endswith('.png')]
143 |     extrinsics_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'extrinsics'))) if f.endswith('.txt')]
144 | 
145 |     # read intrinsics
146 |     intrinsics = np.loadtxt(os.path.join(seq_path,'intrinsics.txt'))
147 | 
148 |     # read extrinsics params
149 |     extrinsics = np.loadtxt(os.path.join(seq_path,'extrinsics',extrinsics_files[-1]))
150 | 
151 |     # read time stamp
152 |     img_ids, img_timestamps = read_frameid_timestamp(image_files)
153 |     _, depth_timestamps = read_frameid_timestamp(depth_files)
154 | 
155 |     # find a depth for each image
156 |     idx_img2depth = []
157 |     for img_timestamp in img_timestamps:
158 |         idx_img2depth.append(np.argmin(abs(depth_timestamps[:] - img_timestamp)))
159 | 
160 | 
161 |     # find sharp images with nonmaximum suppression
162 |     assert sharpness.size == len(image_files)
163 |     sharpness_maxfilter = maximum_filter1d(np.asarray(sharpness), size=sharpness_window, mode='constant', cval=0)
164 |     sharp_images_index = np.where( sharpness == sharpness_maxfilter )[0]
165 | 
166 |     used_views = set()
167 |     for i1, frame_idx1 in enumerate(sharp_images_index):
168 |         if i1 in used_views:
169 |             continue
170 |             
171 |         R1, t1 = read_Rt(extrinsics, frame_idx1)
172 |         i2 = i1+1
173 |         
174 |         depth_file = os.path.join(seq_path,'depthTSDF', depth_files[idx_img2depth[frame_idx1]])
175 |         depth1 = read_depth(depth_file)
176 |         
177 |         if np.count_nonzero(np.isfinite(depth1) & (depth1 > 0)) < 0.5*depth1.size:
178 |             continue
179 |         
180 |         image1 = read_image(os.path.join(seq_path,'image',image_files[frame_idx1]))
181 |         view1 = View(R=R1, t=t1, K=intrinsics, image=image1, depth=depth1, depth_metric='camera_z')
182 |         
183 |         views = [view1]
184 |         used_views.add(i1)
185 |         
186 |         for i2 in range(i1+1, sharp_images_index.size):
187 |             frame_idx2 = sharp_images_index[i2]
188 |             R2, t2 = read_Rt(extrinsics, frame_idx2)
189 |             baseline = np.linalg.norm( (-R1.transpose().dot(t1)) - (-R2.transpose().dot(t2))) # unit is meters
190 |             if baseline < baseline_range[0] or baseline > baseline_range[1]:
191 |                 continue
192 |             
193 |             cosine = np.dot(R1[2,:],R2[2,:])
194 |             if cosine < math.cos(math.radians(70)):
195 |                 continue
196 |                 
197 |             depth_file = os.path.join(seq_path,'depthTSDF', depth_files[idx_img2depth[frame_idx2]])
198 |             depth2 = read_depth(depth_file)
199 |             
200 |             if np.count_nonzero(np.isfinite(depth2) & (depth2 > 0)) < 0.5*depth2.size:
201 |                 continue
202 | 
203 |             view2 = View(R=R2, t=t2, K=intrinsics, image=None, depth=depth2, depth_metric='camera_z')
204 |             check_params = {'min_valid_threshold': 0.4, 'min_depth_consistent': 0.7 }
205 |             if check_depth_consistency(view1, [view2],**check_params) and check_depth_consistency(view2, [view1], **check_params):
206 |                 image2 = read_image(os.path.join(seq_path,'image',image_files[frame_idx2]))
207 |                 view2 = view2._replace(image=image2)
208 |                 views.append(view2)
209 |                 used_views.add(i2)
210 |                 # print(baseline, cosine)
211 |             if len(views) > max_views_num:
212 |                 break
213 |             
214 |         if len(views) > 1:
215 |             group_name = group_prefix+'-{:07d}'.format(img_ids[i1])
216 |             print('writing', group_name)
217 | 
218 |             view_pairs = []
219 |             for pair in itertools.product(range(len(views)),repeat=2):
220 |                 if pair[0] != pair[1]:
221 |                     baseline = np.linalg.norm(views[pair[0]].t-views[pair[1]].t)
222 |                     if baseline >= baseline_range[0] or baseline <= baseline_range[1]:
223 |                         view_pairs.extend(pair)
224 |             for i, v in enumerate(views):
225 |                 view_group = h5file.require_group(group_name+'/frames/t0/v{0}'.format(i))
226 |                 write_view(view_group, v)
227 | 
228 |             # write valid image pair combinations to the group t0
229 |             viewpoint_pairs = np.array(view_pairs, dtype=np.int32)
230 |             time_group = h5file[group_name]['frames/t0']
231 |             time_group.attrs['viewpoint_pairs'] = viewpoint_pairs
232 |             generated_groups += 1
233 | 
234 |     return generated_groups
235 |                 
236 |     
237 | 
238 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | from collections import namedtuple
19 | 
20 | # depth always stores the absolute depth values (not inverse depth)
21 | # image is a PIL.Image with the same dimensions as depth
22 | # depth_metric should always be 'camera_z'
23 | # K corresponds to the width and height of image/depth
24 | # R, t is the world to camera transform
25 | View = namedtuple('View',['R','t','K','image','depth','depth_metric'])
26 | 
27 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_io.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import numpy as np
 19 | from PIL import Image
 20 | from io import BytesIO
 21 | from .lz4 import lz4_uncompress, lz4_compress_HC
 22 | from .webp import webp_encode_array, webp_encode_image
 23 | 
 24 | from .view import View
 25 | 
 26 | 
 27 | def read_webp_image(h5_dataset):
 28 |     """Reads a dataset that stores an image compressed as webp
 29 |     
 30 |     h5_dataset : hdf5 dataset object
 31 | 
 32 |     Returns the image as PIL Image
 33 |     """
 34 |     data = h5_dataset[:].tobytes()
 35 |     img_bytesio = BytesIO(data)
 36 |     pil_img = Image.open(img_bytesio,'r')
 37 |     return pil_img
 38 | 
 39 | 
 40 | def write_webp_image(h5_group, image, dsname="image"):
 41 |     """Writes the image as webp to a new dataset
 42 | 
 43 |     h5_group: hdf5 group
 44 |         The group that shall contain the newly created dataset
 45 | 
 46 |     image: PIL.Image or rgb numpy array
 47 |         The image
 48 |     """
 49 |     if isinstance(image,np.ndarray):
 50 |         compressed_data = webp_encode_array(image)
 51 |     else:
 52 |         compressed_data = webp_encode_image(image)
 53 |     image_compressed = np.frombuffer(compressed_data,dtype=np.int8)
 54 |     ds = h5_group.create_dataset(dsname, data=image_compressed)
 55 |     ds.attrs['format'] = np.string_("webp")
 56 | 
 57 | 
 58 | 
 59 | def read_lz4half_depth(h5_dataset):
 60 |     """Reads a dataset that stores a depth map in lz4 compressed float16 format
 61 |     
 62 |     h5_dataset : hdf5 dataset object
 63 | 
 64 |     Returns the depth map as numpy array with float32
 65 |     """
 66 |     extents = h5_dataset.attrs['extents']
 67 |     num_pixel = extents[0]*extents[1]
 68 |     expected_size = 2*num_pixel
 69 |     data = h5_dataset[:].tobytes()
 70 |     depth_raw_data = lz4_uncompress(data,int(expected_size))
 71 |     depth = np.fromstring(depth_raw_data,dtype=np.float16)
 72 |     depth = depth.astype(np.float32)
 73 |     depth = depth.reshape((extents[0],extents[1]))
 74 |     return depth
 75 | 
 76 | 
 77 | def write_lz4half_depth(h5_group, depth, depth_metric, dsname="depth"):
 78 |     """Writes the depth as 16bit lz4 compressed char array to the given path
 79 | 
 80 |     h5_group: hdf5 group
 81 |         The group that shall contain the newly created dataset
 82 | 
 83 |     depth: numpy array with float32
 84 |     """
 85 |     assert isinstance(depth, np.ndarray), "depth must be a numpy array"
 86 |     assert depth.dtype == np.float32, "depth must be a float32 array"
 87 |     assert len(depth.shape) == 2, "depth must be a 2d array"
 88 |     assert depth_metric in ('camera_z', 'ray_length'), "depth metric must be either 'camera_z' or 'ray_length'"
 89 |     height = depth.shape[0]
 90 |     width = depth.shape[1]
 91 |     depth16 = depth.astype(np.float16)
 92 |     depth_raw_data = depth16.tobytes()
 93 |     compressed_data = lz4_compress_HC(depth_raw_data)
 94 |     depth_compressed = np.frombuffer(compressed_data,dtype=np.int8)
 95 |     ds = h5_group.create_dataset(dsname, data=depth_compressed)
 96 |     ds.attrs['format'] = np.string_("lz4half")
 97 |     ds.attrs['extents'] = np.array([height, width], dtype=np.int32)
 98 |     ds.attrs['depth_metric'] = np.string_(depth_metric)
 99 | 
100 | 
101 | def read_camera_params(h5_dataset):
102 |     """Reads a dataset that stores camera params in float64
103 |     
104 |     h5_dataset : hdf5 dataset object
105 | 
106 |     Returns K,R,t as numpy array with float64
107 |     """
108 |     fx = h5_dataset[0]
109 |     fy = h5_dataset[1]
110 |     skew = h5_dataset[2]
111 |     cx = h5_dataset[3]
112 |     cy = h5_dataset[4]
113 |     K = np.array([[fx, skew, cx],
114 |                  [0, fy, cy],
115 |                  [0, 0, 1]], dtype=np.float64)
116 |     R = np.array([[h5_dataset[5], h5_dataset[8], h5_dataset[11]], 
117 |                   [h5_dataset[6], h5_dataset[9], h5_dataset[12]], 
118 |                   [h5_dataset[7], h5_dataset[10], h5_dataset[13]]], dtype=np.float64)
119 |     t = np.array([h5_dataset[14], h5_dataset[15], h5_dataset[16]], dtype=np.float64)   
120 |     return K,R,t
121 | 
122 | 
123 | def write_camera_params(h5_group, K, R, t, dsname="camera"):
124 |     """Writes the camera params as float64 to the given path
125 | 
126 |     h5_group: hdf5 group
127 |         The group that shall contain the newly created dataset
128 | 
129 |     K, R, t: numpy array with float64
130 |     """
131 |     data = np.array([K[0,0], K[1,1], K[0,1], K[0,2], K[1,2], 
132 |                     R[0,0], R[1,0], R[2,0], R[0,1], R[1,1], R[2,1], R[0,2], R[1,2], R[2,2], 
133 |                     t[0], t[1], t[2]], dtype=np.float64)
134 |     ds = h5_group.create_dataset(dsname, data=data)
135 |     ds.attrs['format'] = "pinhole".encode('ascii')
136 | 
137 | 
138 | def read_view(h5_group):
139 |     """Reads the view group and returns it as a View tuple
140 |     
141 |     h5_group: hdf5 group
142 |         The group for reading the view
143 | 
144 |     Returns the View tuple
145 |     """
146 |     img = read_webp_image(h5_group['image'])
147 |     depth = read_lz4half_depth(h5_group['depth'])
148 |     depth_metric = h5_group['depth'].attrs['depth_metric'].decode('ascii')
149 |     K_arr,R_arr,t_arr = read_camera_params(h5_group['camera'])
150 |     return View(image=img, depth=depth, depth_metric=depth_metric, K=K_arr, R=R_arr, t=t_arr)
151 | 
152 | 
153 | def write_view(h5_group, view):
154 |     """Writes the View tuple to the group
155 | 
156 |     h5_group: hdf5 group
157 |         The group for storing the view
158 | 
159 |     view: View namedtuple
160 |         The tuple storing the view
161 |     
162 |     """
163 |     for ds in ('image', 'depth', 'camera'):
164 |         if ds in h5_group:
165 |             del h5_group[ds]
166 | 
167 |     write_webp_image(h5_group, view.image)
168 |     write_lz4half_depth(h5_group, view.depth, view.depth_metric)
169 |     write_camera_params(h5_group, view.K, view.R, view.t)
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_tools.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import pyximport; pyximport.install()
 19 | import numpy as np
 20 | 
 21 | from .view import View
 22 | 
 23 | def compute_visible_points_mask( view1, view2, borderx=0, bordery=0 ):
 24 |     """Computes a mask of the pixels in view1 that are visible in view2
 25 | 
 26 |     view1: View namedtuple
 27 |         First view
 28 | 
 29 |     view2: View namedtuple
 30 |         Second view
 31 | 
 32 |     borderx: int
 33 |         border in x direction. Points in the border are considered invalid
 34 | 
 35 |     bordery: int
 36 |         border in y direction. Points in the border are considered invalid
 37 | 
 38 |     Returns a mask of valid points
 39 |     """
 40 |     from .view_tools_cython import compute_visible_points_mask as _compute_visible_points_mask
 41 |     assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
 42 |     return _compute_visible_points_mask( view1, view2, borderx, bordery )
 43 | 
 44 | 
 45 | def compute_depth_ratios( view1, view2 ):
 46 |     """Projects each point defined in view1 to view2 and computes the ratio of 
 47 |     the depth value of the projected point and the stored depth value in view2.
 48 | 
 49 | 
 50 |     view1: View namedtuple
 51 |         First view
 52 | 
 53 |     view2: View namedtuple
 54 |         Second view
 55 | 
 56 |     Returns the scale value for view2 relative to view1
 57 |     """
 58 |     from .view_tools_cython import compute_depth_ratios as _compute_depth_ratios
 59 |     return _compute_depth_ratios(view1, view2)
 60 | 
 61 | 
 62 | def check_depth_consistency( view, rest_of_the_views, depth_ratio_threshold=0.9, min_valid_threshold=0.5, min_depth_consistent=0.7 ):
 63 |     """Checks if the depth of view is consistent with the rest_of_the_views
 64 |     
 65 |     view: View namedtuple
 66 |         Reference view
 67 | 
 68 |     rest_of_the_views: list of View namedtuple
 69 |         List of the rest of the views
 70 | 
 71 |     depth_ratio_threshold: float
 72 |         The allowed minimum depth ratio
 73 | 
 74 |     min_valid_threshold: float
 75 |         ratio of pixels that should have consistent depth values with the rest_of_the_views
 76 | 
 77 |     min_depth_consistent: float
 78 |         ratio of depth consistent pixels with respect to the number of valid depth ratios
 79 | 
 80 |     Returns True if the depth is consistent
 81 |     """
 82 |     min_ratio_threshold = min(depth_ratio_threshold, 1/depth_ratio_threshold)
 83 |     max_ratio_threshold = max(depth_ratio_threshold, 1/depth_ratio_threshold)
 84 |     for v in rest_of_the_views:
 85 |         dr = compute_depth_ratios(view, v)
 86 |         valid_dr = dr[np.isfinite(dr)]
 87 |         if valid_dr.size / dr.size < min_valid_threshold:
 88 |             return False
 89 | 
 90 |         num_consistent = np.count_nonzero((valid_dr > min_ratio_threshold) & (valid_dr < max_ratio_threshold))
 91 |         if num_consistent / valid_dr.size < min_depth_consistent:
 92 |             return False
 93 | 
 94 |     return True
 95 | 
 96 | 
 97 | def adjust_intrinsics(view, K_new, width_new, height_new):
 98 |     """Creates a new View with the specified intrinsics and image dimensions.
 99 |     The skew parameter K[0,1] will be ignored.
100 |     
101 |     view: View namedtuple
102 |         The view tuple
103 |         
104 |     K_new: numpy.ndarray
105 |         3x3 calibration matrix with the new intrinsics
106 |         
107 |     width_new: int
108 |         The new image width
109 |         
110 |     height_new: int
111 |         The new image height
112 |         
113 |     Returns a View tuple with adjusted image, depth and intrinsics
114 |     """
115 |     from PIL import Image
116 |     from skimage.transform import resize
117 |     from .helpers import safe_crop_image, safe_crop_array2d
118 | 
119 |     #original parameters
120 |     fx = view.K[0,0]
121 |     fy = view.K[1,1]
122 |     cx = view.K[0,2]
123 |     cy = view.K[1,2]
124 |     width = view.image.width
125 |     height = view.image.height
126 |     
127 |     #target param
128 |     fx_new = K_new[0,0]
129 |     fy_new = K_new[1,1]
130 |     cx_new = K_new[0,2]
131 |     cy_new = K_new[1,2]
132 |     
133 |     scale_x = fx_new/fx
134 |     scale_y = fy_new/fy
135 |     
136 |     #resize to get the right focal length
137 |     width_resize = int(width*scale_x)
138 |     height_resize = int(height*scale_y)
139 |     # principal point position in the resized image
140 |     cx_resize = cx*scale_x
141 |     cy_resize = cy*scale_y
142 |     
143 |     img_resize = view.image.resize((width_resize, height_resize), Image.BILINEAR if scale_x > 1 else Image.LANCZOS)
144 |     if not view.depth is None:
145 |         max_depth    = np.max(view.depth)
146 |         depth_resize = view.depth / max_depth
147 |         depth_resize[depth_resize < 0.] = 0.
148 |         depth_resize = resize(depth_resize, (height_resize,width_resize), 0,mode='constant') * max_depth
149 |     else:
150 |         depth_resize = None
151 |     
152 |     #crop to get the right principle point and resolution
153 |     x0 = int(round(cx_resize - cx_new))
154 |     y0 = int(round(cy_resize - cy_new))
155 |     x1 = x0 + int(width_new)
156 |     y1 = y0 + int(height_new)
157 | 
158 |     if x0 < 0 or y0 < 0 or x1 > width_resize or y1 > height_resize:
159 |         print('Warning: Adjusting intrinsics adds a border to the image')
160 |         img_new = safe_crop_image(img_resize,(x0,y0,x1,y1),(127,127,127))
161 |         if not depth_resize is None:
162 |             depth_new = safe_crop_array2d(depth_resize,(x0,y0,x1,y1),0).astype(np.float32)
163 |         else:
164 |             depth_new = None
165 |     else:
166 |         img_new = img_resize.crop((x0,y0,x1,y1))
167 |         if not depth_resize is None:
168 |             depth_new = depth_resize[y0:y1,x0:x1].astype(np.float32)
169 |         else:
170 |             depth_new = None
171 |     
172 |     return View(R=view.R, t=view.t, K=K_new, image=img_new, depth=depth_new, depth_metric=view.depth_metric)
173 | 
174 | 
175 | def resize_view(view, width_new, height_new):
176 |     """Creates a new View with the new size.
177 |     The intrinsics will be adjusted to match the new image size
178 |     
179 |     view: View namedtuple
180 |         The view tuple
181 | 
182 |     width_new: int
183 |         The new image width
184 |         
185 |     height_new: int
186 |         The new image height
187 | 
188 |     Returns a View tuple with adjusted image, depth and intrinsics
189 |     """
190 |     from PIL import Image
191 |     from skimage.transform import resize
192 | 
193 |     if view.image.width == width_new and view.image.height == height_new:
194 |         return View(*view)
195 | 
196 |     #original param
197 |     fx = view.K[0,0]
198 |     fy = view.K[1,1]
199 |     cx = view.K[0,2]
200 |     cy = view.K[1,2]
201 |     width = view.image.width
202 |     height = view.image.height
203 | 
204 |     #target param
205 |     fx_new = width_new*fx/width
206 |     fy_new = height_new*fy/height
207 |     cx_new = width_new*cx/width
208 |     cy_new = height_new*cy/height
209 | 
210 |     K_new = np.array([fx_new, 0, cx_new, 0, fy_new, cy_new, 0, 0, 1],dtype=np.float64).reshape((3,3))
211 | 
212 |     img_resize = view.image.resize((width_new, height_new), Image.BILINEAR if width_new > width else Image.LANCZOS)
213 |     max_depth = view.depth.max()
214 |     depth_resize = max_depth*resize(view.depth/max_depth, (height_new, width_new), order=0, mode='constant')
215 |     depth_resize = depth_resize.astype(view.depth.dtype)
216 |     return View(R=view.R, t=view.t, K=K_new, image=img_resize, depth=depth_resize, depth_metric=view.depth_metric)
217 | 
218 | 
219 | def compute_view_distances( views ):
220 |     """Computes the spatial distances between views
221 | 
222 |     views: List of View namedtuple
223 | 
224 |     Returns the spatial distance as distance matrix
225 |     """
226 |     from scipy.spatial.distance import pdist, squareform
227 |     positions = np.empty((len(views),3))
228 |     for i, view in enumerate(views):
229 |         C = -view.R.transpose().dot(view.t)
230 |         positions[i] = C
231 |     return squareform(pdist(positions,'euclidean'))
232 | 
233 | 
234 | def compute_view_angle( view1, view2 ):
235 |     """Computes the viewing direction angle between two views
236 | 
237 |     view1: View namedtuple
238 |         First view
239 | 
240 |     view2: View namedtuple
241 |         Second view
242 | 
243 |     Returns the angle in radians
244 |     """
245 |     dot = np.clip(view1.R[2,:].dot(view2.R[2,:]), -1, 1)
246 |     return np.arccos(dot)
247 | 
248 | 
249 | def create_image_overview( views ):
250 |     """Creates a small overview image showing the RGB images of all views
251 |     
252 |     views: list of View  or  list of list of View
253 | 
254 |     Returns a PIL.Image
255 |     """
256 |     assert isinstance(views, list)
257 |     from .helpers import concat_images_vertical, concat_images_horizontal
258 |     max_height = 100 # maximum height of individual images
259 | 
260 |     def resize_image(img):
261 |         if img.size[1] > max_height:
262 |             new_width = int(img.size[0]*(max_height/img.size[1]))
263 |             return img.resize((new_width,max_height))
264 |         else:
265 |             return img
266 | 
267 |     column_images = []
268 |     for col in views:
269 |         if isinstance(col,list):
270 |             tmp_images = []
271 |             for row in col:
272 |                 tmp_images.append(resize_image(row.image))
273 |             col_img = concat_images_vertical(tmp_images)
274 |             column_images.append(col_img)
275 |         elif isinstance(col,View):
276 |             column_images.append(resize_image(col.image))
277 |     return concat_images_horizontal(column_images)
278 | 
279 | 
280 | def visualize_views( views ):
281 |     """Visualizes views
282 | 
283 |     views: list of View namedtuple
284 | 
285 |     Opens a vtk window with the visualization
286 |     """
287 |     import vtk
288 |     from .. import vis
289 | 
290 | 
291 |     renderer = vtk.vtkRenderer()
292 |     renderer.SetBackground(0, 0, 0)
293 | 
294 |     axes = vtk.vtkAxesActor()
295 |     axes.GetXAxisCaptionActor2D().SetHeight(0.05)
296 |     axes.GetYAxisCaptionActor2D().SetHeight(0.05)
297 |     axes.GetZAxisCaptionActor2D().SetHeight(0.05)
298 |     axes.SetCylinderRadius(0.03)
299 |     axes.SetShaftTypeToCylinder()
300 |     renderer.AddActor(axes)
301 | 
302 |     renwin = vtk.vtkRenderWindow()
303 |     renwin.SetWindowName("Viewer (press 'm' to change colors, use '.' and ',' to adjust opacity)")
304 |     renwin.SetSize(800,600)
305 |     renwin.AddRenderer(renderer)
306 |     
307 |  
308 |     # An interactor
309 |     interactor = vtk.vtkRenderWindowInteractor()
310 |     interstyle = vtk.vtkInteractorStyleTrackballCamera()
311 |     interactor.SetInteractorStyle(interstyle)
312 |     interactor.SetRenderWindow(renwin)
313 | 
314 |     colors = ((1,0,0), (0,0,1), (0,1,1), (1,0,1), (1,1,0), (1,1,1), (0,1,0))
315 |  
316 |     pointcloud_polydatas = []
317 |     pointcloud_actors = []
318 |     for idx, view in enumerate(views):
319 | 
320 |         img_arr = None
321 |         if not view.image is None:
322 |             img_arr = np.array(view.image).transpose([2,0,1])
323 | 
324 | 
325 |         pointcloud = vis.compute_point_cloud_from_depthmap(view.depth, view.K, view.R, view.t, colors=img_arr)
326 |         pointcloud_polydata = vis.create_pointcloud_polydata( 
327 |             points=pointcloud['points'], 
328 |             colors=pointcloud['colors'] if 'colors' in pointcloud else None,
329 |         )
330 |         pointcloud_polydatas.append(pointcloud_polydata)
331 | 
332 |         pc_mapper = vtk.vtkPolyDataMapper()
333 |         pc_mapper.SetInputData(pointcloud_polydata)
334 | 
335 |         pc_actor = vtk.vtkActor()
336 |         pointcloud_actors.append(pc_actor)
337 |         pc_actor.SetMapper(pc_mapper)
338 |         pc_actor.GetProperty().SetPointSize(2)
339 |         
340 | 
341 |         color = colors[idx%len(colors)]
342 | 
343 |         pc_actor.GetProperty().SetColor(*color)
344 |         renderer.AddActor(pc_actor)
345 | 
346 |         cam_actor = vis.create_camera_actor(view.R,view.t)
347 |         cam_actor.GetProperty().SetColor(*color)
348 |         renderer.AddActor(cam_actor)
349 | 
350 | 
351 | 
352 |     def change_point_properties(obj, ev):
353 |         if change_point_properties.current_active_scalars == "Colors":
354 |             change_point_properties.current_active_scalars = ""
355 |         else:
356 |             change_point_properties.current_active_scalars = "Colors"
357 | 
358 |         if "m" == obj.GetKeySym():
359 |             for polydata in pointcloud_polydatas:
360 |                 polydata.GetPointData().SetActiveScalars(change_point_properties.current_active_scalars)
361 | 
362 |         if "period" == obj.GetKeySym():
363 |             for actor in pointcloud_actors:
364 |                 opacity = actor.GetProperty().GetOpacity()
365 |                 opacity = min(1.0, opacity - 0.1)
366 |                 
367 |                 actor.GetProperty().SetOpacity(opacity)
368 |         if "comma" == obj.GetKeySym():
369 |             for actor in pointcloud_actors:
370 |                 opacity = actor.GetProperty().GetOpacity()
371 |                 opacity = max(0.0, opacity + 0.1)
372 |                 actor.GetProperty().SetOpacity(opacity)
373 |         renwin.Render()
374 |             
375 |     change_point_properties.current_active_scalars = "Colors"
376 | 
377 |     interactor.AddObserver('KeyReleaseEvent', change_point_properties)
378 |     
379 |     # Start
380 |     interactor.Initialize()
381 |     interactor.Start()
382 | 
383 |     interactor.RemoveAllObservers()
384 |     del change_point_properties
385 |     
386 | 
387 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_tools_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | cimport cython
  4 | from libc.math cimport isfinite
  5 | 
  6 | 
  7 | 
  8 | @cython.boundscheck(False)
  9 | cdef _compute_visible_points_mask( 
 10 |         np.ndarray[np.float32_t, ndim=2] depth, 
 11 |         np.ndarray[np.float32_t, ndim=2] K1, 
 12 |         np.ndarray[np.float32_t, ndim=2] R1,
 13 |         np.ndarray[np.float32_t, ndim=1] t1,
 14 |         np.ndarray[np.float32_t, ndim=2] P2,
 15 |         int width2, 
 16 |         int height2,
 17 |         int borderx,
 18 |         int bordery):
 19 | 
 20 |     cdef np.float32_t point3d[3]
 21 |     cdef np.float32_t point4d[4]
 22 |     point4d[3] = 1.0
 23 |     cdef np.float32_t point_proj[3]
 24 |     cdef int x, y
 25 |     cdef np.float32_t px, py
 26 |     cdef np.float32_t d
 27 |     cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
 28 | 
 29 |     cdef np.ndarray[np.uint8_t,ndim=2] mask = np.zeros((depth.shape[0],depth.shape[1]), dtype=np.uint8)
 30 | 
 31 |     for y in range(depth.shape[0]):
 32 |         for x in range(depth.shape[1]):
 33 | 
 34 |             d = depth[y,x]
 35 |             if np.isfinite(d) and d > 0.0:
 36 |                 px = x + 0.5
 37 |                 py = y + 0.5
 38 | 
 39 |                 point3d[0] = d*(px - K1[0,2])/K1[0,0]
 40 |                 point3d[1] = d*(py - K1[1,2])/K1[1,1]
 41 |                 point3d[2] = d
 42 |                 point3d[0] -= t1[0]
 43 |                 point3d[1] -= t1[1]
 44 |                 point3d[2] -= t1[2]
 45 |                 point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
 46 |                 point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
 47 |                 point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
 48 | 
 49 |                 point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
 50 |                 point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
 51 |                 point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
 52 |                 if point_proj[2] > 0.0:
 53 |                     point_proj[0] /= point_proj[2]
 54 |                     point_proj[1] /= point_proj[2]
 55 |                     if point_proj[0] > borderx and point_proj[1] > bordery and point_proj[0] < width2-borderx and point_proj[1] < height2-bordery:
 56 |                         mask[y,x] = 1
 57 |                 
 58 |     return mask
 59 | 
 60 | 
 61 | 
 62 | def compute_visible_points_mask( view1, view2, borderx=0, bordery=0 ):
 63 |     """Computes a mask of the pixels in view1 that are visible in view2
 64 | 
 65 |     view1: View namedtuple
 66 |         First view
 67 | 
 68 |     view2: View namedtuple
 69 |         Second view
 70 | 
 71 |     borderx: int
 72 |         border in x direction. Points in the border are considered invalid
 73 | 
 74 |     bordery: int
 75 |         border in y direction. Points in the border are considered invalid
 76 | 
 77 |     Returns a mask of valid points
 78 |     """
 79 |     assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
 80 |     
 81 |     P2 = np.empty((3,4), dtype=np.float32)
 82 |     P2[:,0:3] = view2.R
 83 |     P2[:,3:4] = view2.t.reshape((3,1))
 84 |     P2 = view2.K.dot(P2)
 85 | 
 86 |     if view2.depth is None:
 87 |         width2 = view1.depth.shape[1]
 88 |         height2 = view1.depth.shape[0]
 89 |     else:
 90 |         width2 = view2.depth.shape[1]
 91 |         height2 = view2.depth.shape[0]
 92 | 
 93 |     return _compute_visible_points_mask(
 94 |             view1.depth, 
 95 |             view1.K.astype(np.float32), 
 96 |             view1.R.astype(np.float32), 
 97 |             view1.t.astype(np.float32), 
 98 |             P2.astype(np.float32), 
 99 |             width2,
100 |             height2,
101 |             borderx,
102 |             bordery)
103 | 
104 | 
105 | 
106 | 
107 | @cython.boundscheck(False)
108 | cdef _compute_depth_ratios( 
109 |         np.ndarray[np.float32_t, ndim=2] depth1, 
110 |         np.ndarray[np.float32_t, ndim=2] depth2, 
111 |         np.ndarray[np.float32_t, ndim=2] K1, 
112 |         np.ndarray[np.float32_t, ndim=2] R1,
113 |         np.ndarray[np.float32_t, ndim=1] t1,
114 |         np.ndarray[np.float32_t, ndim=2] P2 ):
115 |     cdef np.float32_t point3d[3]
116 |     cdef np.float32_t point4d[4]
117 |     point4d[3] = 1.0
118 |     cdef np.float32_t point_proj[3]
119 |     cdef int x, y, x2, y2
120 |     cdef np.float32_t px, py
121 |     cdef np.float32_t d, d2
122 |     cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
123 | 
124 |     cdef np.ndarray[np.float32_t,ndim=2] result = np.full((depth1.shape[0],depth1.shape[1]), np.nan, dtype=np.float32)
125 | 
126 |     for y in range(depth1.shape[0]):
127 |         for x in range(depth1.shape[1]):
128 | 
129 |             d = depth1[y,x]
130 |             if np.isfinite(d) and d > 0.0:
131 |                 px = x + 0.5
132 |                 py = y + 0.5
133 | 
134 |                 point3d[0] = d*(px - K1[0,2])/K1[0,0]
135 |                 point3d[1] = d*(py - K1[1,2])/K1[1,1]
136 |                 point3d[2] = d
137 |                 point3d[0] -= t1[0]
138 |                 point3d[1] -= t1[1]
139 |                 point3d[2] -= t1[2]
140 |                 point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
141 |                 point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
142 |                 point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
143 | 
144 |                 point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
145 |                 point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
146 |                 point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
147 |                 if point_proj[2] > 0.0:
148 |                     point_proj[0] /= point_proj[2]
149 |                     point_proj[1] /= point_proj[2]
150 |                     if point_proj[0] > 0 and point_proj[1] > 0 and point_proj[0] < depth2.shape[1] and point_proj[1] < depth2.shape[0]:
151 |                         # lookup the depth value
152 |                         x2 = max(0,min(depth2.shape[1],int(round(point_proj[0]))))
153 |                         y2 = max(0,min(depth2.shape[0],int(round(point_proj[1]))))
154 |                         d2 = depth2[y2,x2]
155 |                         if d2 > 0.0 and isfinite(d2):
156 |                             s = point_proj[2]/d2
157 |                             result[y,x] = s
158 |                 
159 |     return result
160 |     
161 | 
162 | 
163 | 
164 | def compute_depth_ratios( view1, view2 ):
165 |     """Projects each point defined in view1 to view2 and computes the ratio of 
166 |     the depth value of the projected point and the stored depth value in view2.
167 | 
168 | 
169 |     view1: View namedtuple
170 |         First view
171 | 
172 |     view2: View namedtuple
173 |         Second view
174 | 
175 |     Returns the scale value for view2 relative to view1
176 |     """
177 |     assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
178 |     assert view2.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
179 |     
180 |     P2 = np.empty((3,4), dtype=np.float32)
181 |     P2[:,0:3] = view2.R
182 |     P2[:,3:4] = view2.t.reshape((3,1))
183 |     P2 = view2.K.dot(P2)
184 | 
185 |     return _compute_depth_ratios(
186 |             view1.depth, 
187 |             view2.depth,
188 |             view1.K.astype(np.float32), 
189 |             view1.R.astype(np.float32), 
190 |             view1.t.astype(np.float32), 
191 |             P2.astype(np.float32) )
192 | 
193 | 
194 | 
195 | @cython.boundscheck(False)
196 | cdef _compute_flow( 
197 |         np.ndarray[np.float32_t, ndim=2] depth1, 
198 |         np.ndarray[np.float32_t, ndim=2] K1, 
199 |         np.ndarray[np.float32_t, ndim=2] R1,
200 |         np.ndarray[np.float32_t, ndim=1] t1,
201 |         np.ndarray[np.float32_t, ndim=2] P2 ):
202 |     cdef np.float32_t point3d[3]
203 |     cdef np.float32_t point4d[4]
204 |     point4d[3] = 1.0
205 |     cdef np.float32_t point_proj[3]
206 |     cdef int x, y, x2, y2
207 |     cdef np.float32_t px, py
208 |     cdef np.float32_t d, d2
209 |     cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
210 | 
211 |     cdef np.ndarray[np.float32_t,ndim=3] result = np.full((2,depth1.shape[0],depth1.shape[1]), np.nan, dtype=np.float32)
212 | 
213 |     for y in range(depth1.shape[0]):
214 |         for x in range(depth1.shape[1]):
215 | 
216 |             d = depth1[y,x]
217 |             if np.isfinite(d) and d > 0.0:
218 |                 px = x + 0.5
219 |                 py = y + 0.5
220 | 
221 |                 point3d[0] = d*(px - K1[0,2])/K1[0,0]
222 |                 point3d[1] = d*(py - K1[1,2])/K1[1,1]
223 |                 point3d[2] = d
224 |                 point3d[0] -= t1[0]
225 |                 point3d[1] -= t1[1]
226 |                 point3d[2] -= t1[2]
227 |                 point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
228 |                 point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
229 |                 point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
230 | 
231 |                 point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
232 |                 point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
233 |                 point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
234 | 
235 |                 point_proj[0] /= point_proj[2]
236 |                 point_proj[1] /= point_proj[2]
237 |                 result[0,y,x] = point_proj[0]-px
238 |                 result[1,y,x] = point_proj[1]-py
239 |                 
240 |     return result
241 |     
242 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/webp.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | from ctypes import *
19 | from PIL import Image
20 | import numpy as np
21 | import os
22 | 
23 | # try the version used by the multivih5datareaderop first
24 | try:
25 |     _lib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','webp','src','webp-build', 'src', '.libs', 'libwebp.so'))
26 |     libwebp = CDLL(_lib_path)
27 | except:
28 |     # try system version
29 |     try:
30 |         libwebp = CDLL('libwebp.so')
31 |     except:
32 |         raise RuntimeError('Cannot load libwebp.so')
33 | 
34 | def webp_encode_array(array, quality=90.0):
35 |     """encode the array as webp and return as bytes.
36 | 
37 |     array: uint8 numpy array
38 |         array with the following shape [height,width,3] or [3,height,width]
39 | 
40 |     Returns the compressed bytes array or None on error
41 |     """
42 |     assert isinstance(array, np.ndarray), "array must be a numpy array"
43 |     assert array.dtype == np.uint8, "array must be a uint8 array"
44 |     assert len(array.shape) == 3, "array must be a 3d array"
45 |     assert array.shape[0] == 3 or array.shape[-1] == 3, "array must have 3 color channels"
46 |     
47 |     if array.shape[0] != array.shape[-1] and array.shape[0] == 3:
48 |         array_rgb = array.transpose([2,0,1])
49 |     else:
50 |         array_rgb = array
51 |     data = array_rgb.tobytes()
52 | 
53 |     width = c_int(array_rgb.shape[1])
54 |     height = c_int(array_rgb.shape[0])
55 |     stride = c_int(array_rgb.shape[1]*3)
56 |     output = POINTER(c_char)()
57 |     size = libwebp.WebPEncodeRGB(data, width, height, stride, c_float(quality), pointer(output))
58 |     if size == 0:
59 |         return None
60 | 
61 |     webp_img = output[:size]
62 |     libwebp.WebPFree(output)
63 |     # libc.free(output)
64 |     return webp_img
65 | 
66 |     
67 | 
68 | 
69 | def webp_encode_image(image):
70 |     """encode the image as webp and return as bytes
71 | 
72 |     image: PIL.Image
73 |         Image to encode
74 |     """
75 |     arr = np.array(image)
76 |     return webp_encode_array(arr)
77 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | from .evaluate_to_xarray import evaluate, write_xarray_json, read_xarray_json
19 | 
20 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/evaluate_to_xarray.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | from .metrics import compute_motion_errors,evaluate_depth,compute_flow_epe
 19 | import h5py
 20 | import xarray
 21 | import numpy as np
 22 | import re
 23 | import math
 24 | import json
 25 | import scipy.misc
 26 | import time
 27 | import skimage.transform
 28 | 
 29 | '''
 30 | Functions to evaluate DeMoN results stored as hdf5 files. The results are stored as xarray DataArray converted to json
 31 | '''
 32 | 
 33 | def write_xarray_json(data, out_file):
 34 |     """Writes xarray as json to a file"""
 35 |     with open(out_file, 'w') as f:
 36 |         json.dump(data.to_dict(), f)
 37 |         
 38 | def read_xarray_json(in_file):
 39 |     """Reads xarray from a json file"""
 40 |     with open(in_file, 'r') as f:
 41 |         return xarray.DataArray.from_dict(json.load(f))
 42 |            
 43 | def get_metainfo(data_file):
 44 |     """Checks a hdf5 data file for its format and dimensions. 
 45 | 
 46 |     data_file: str
 47 |         Path to the hdf5 file generated with the test_iterative.py script.
 48 | 
 49 |     returns a dictionary with the following keys:
 50 |         iterative:  bool, if the file is from an iterative net
 51 |         snapshots:  list of str, names od snapshots in the file contain
 52 |         iterations: list of net_iterations
 53 |         samples:    list of samples
 54 |     """
 55 |     
 56 |     re_iteration = re.compile('.*_(\d+)(\.caffemodel\.h5)?')
 57 |     with h5py.File(data_file,'r') as f:
 58 |         group_name = list(f.keys())[0]
 59 |         iterative_net = bool(re_iteration.match(group_name))
 60 |         if iterative_net:
 61 |             snapshots = list(f.keys())
 62 |             snapshots.sort(key=lambda x: int(re_iteration.match(x).group(1)))
 63 |             snapshot_iters = [int(re_iteration.match(x).group(1)) for x in snapshots]
 64 |             snapshot_group = f[snapshots[0]]
 65 |             samples = list(snapshot_group.keys())
 66 |             samples.sort(key=int)
 67 |             sample_group = snapshot_group[samples[0]]
 68 |             # collect iterations from all prediction datasets
 69 |             iterations = set()
 70 |             for prediction in ('predicted_depth', 'predicted_normal', 'predicted_motion', 'predicted_flow', 'predicted_conf'):
 71 |                 if prediction in sample_group:
 72 |                     iterations.update( list(sample_group[prediction]) )
 73 |             iterations = list(iterations)
 74 |             iterations.sort(key=lambda x: (int(x.split('_')[0]),len(x.split('_'))))
 75 |         else:
 76 |             snapshots = ['snapshot']
 77 |             snapshot_iters = [-1]
 78 |             iterations = ['0']
 79 |             samples = list(f.keys())
 80 |             samples.sort(key=int)
 81 | 
 82 |     metainfo = {
 83 |             'iterative':iterative_net, 
 84 |             'snapshots': snapshots, 
 85 |             'iterations': iterations, 
 86 |             'samples':samples, 
 87 |             'snapshot_iters': snapshot_iters, 
 88 |             'input_file': data_file,
 89 |             }
 90 |     return metainfo
 91 | 
 92 | 
 93 | def invalidate_points_not_visible_in_second_image(depth, motion, intrinsics):
 94 |     """Sets the depth values for the points not visible in the second view to nan
 95 | 
 96 |     depth: numpy.ndarray
 97 |         array with inverse depth values as stored in the test output h5 files
 98 | 
 99 |     motion: numpy.ndarray
100 |         the 6 element motion vector (ANGLEAXIS6)
101 | 
102 |     intrinsics: numpy.ndarray or None
103 |         the normalized intrinsics vector
104 |         If None we assume intrinsics as in sun3d
105 |     """
106 |     from .helpers import motion_vector_to_Rt, intrinsics_vector_to_K
107 |     from ..dataset_tools.view import View
108 |     from ..dataset_tools.view_tools import compute_visible_points_mask
109 |     #from matplotlib import pyplot as plt
110 |     abs_depth = 1/depth
111 |     R, t = motion_vector_to_Rt(motion.squeeze())
112 | 
113 |     if intrinsics is None:
114 |         intrinsics = np.array([[0.891, 1.188, 0.5, 0.5]], dtype=np.float32) # sun3d intrinsics
115 |     intrinsics = intrinsics.squeeze()
116 |     K = intrinsics_vector_to_K(intrinsics, depth.shape[-1], depth.shape[-2])
117 |     view1 = View(R=np.eye(3), t=np.zeros((3,)), K=K, image=None, depth=abs_depth, depth_metric='camera_z')
118 |     view2 = View(R=R, t=t, K=K, image=None, depth=abs_depth, depth_metric='camera_z')
119 |     invalid_points = compute_visible_points_mask(view1, view2) == 0
120 |     # tmp = depth.copy()
121 |     depth[invalid_points] = np.nan
122 |     # plt.imshow(np.concatenate((tmp,depth),axis=1))
123 |     # plt.show(block=True)
124 |     
125 |         
126 | 
127 | 
128 | 
129 | def get_data(iterative, results_h5_file, snap, sample, net_iter, gt_h5_file=None, depthmask=False, eigen_crop_gt_and_pred=False):
130 |     """Helper function to read data from the h5 files
131 |     
132 |     iterative: bool
133 |         If true the hdf5 file stores results from multiple iterations.
134 | 
135 |     results_h5_file: h5py.File
136 |         The file with the network predictions
137 | 
138 |     snap: str
139 |         Name of the snapshot
140 | 
141 |     sample: str
142 |         Sample number as string
143 | 
144 |     net_iter: int
145 |         network iteration
146 | 
147 |     gt_h5_file: h5py.File
148 |         ground truth h5 file.
149 | 
150 |     depthmask: bool
151 |         If True the depth values for points not visible in the second image will be masked out
152 |         
153 |     eigen_crop_gt_and_pred: bool
154 |         If true crops images and depth maps to match the evaluation for NYU in Eigen's paper.
155 | 
156 |     Returns a dictionary with ground truth and predictions for depth, motion and flow.
157 |     """
158 |     data_types = ['motion', 'depth', 'flow', 'normals', 'intrinsics']
159 |     data = {}
160 |     # get ground truth
161 |     if iterative and (gt_h5_file is None):
162 |         sample_group = results_h5_file[snap][sample]
163 |     else:
164 |         if gt_h5_file is None:
165 |             sample_group = results_h5_file[sample]
166 |         else:
167 |             sample_group = gt_h5_file[sample]
168 |             gt_sample_id = sample_group.attrs['sample_id']
169 | 
170 |     for dt in data_types:
171 |         if dt in sample_group:
172 |             data[dt + '_gt'] = sample_group[dt][:]    
173 |             
174 |     # get predictions
175 |     if iterative:
176 |         sample_group = results_h5_file[snap][sample]
177 |         pr_sample_id = sample_group.attrs['sample_id']
178 |         assert gt_sample_id == pr_sample_id, "sample ids do not match: prediction id='{0}', ground truth id='{1}'".format(pr_sample_id,gt_sample_id)
179 |         for dt in data_types:
180 |             if 'predicted_{0}/{1}'.format(dt,net_iter) in sample_group:
181 |                 data[dt + '_pred'] = sample_group['predicted_'+dt][net_iter][:]
182 |     else:
183 |         sample_group = results_h5_file[sample]
184 |         for dt in data_types:
185 |             if ('predicted_'+dt) in sample_group:
186 |                 data[dt + '_pred'] = sample_group['predicted_'+dt][:]
187 |         
188 |     for key in data:    
189 |         data[key] = np.squeeze(data[key])
190 |         
191 |     if ('depth_pred' in data) and (data['depth_pred'].shape == (109,147)):
192 |         print('\n >>> Eigen and Fergus detected, cropping the ground truth <<<\n')
193 |         assert(data['depth_gt'].shape == (480,640))
194 |         data['depth_gt'] = data['depth_gt'][23:23+436,27:27+588]
195 |         
196 |     if depthmask and ('motion_gt' in data) and ('depth_gt' in data):
197 |         intrinsics = data['intrinsics'] if 'intrinsics' in data else None
198 |         invalidate_points_not_visible_in_second_image(data['depth_gt'], data['motion_gt'], intrinsics)
199 |     
200 |     # reshape the predictions to GT size if necessary
201 |     if ('depth_gt' in data) and ('depth_pred' in data) and (not (data['depth_gt'].shape == data['depth_pred'].shape)):
202 |         data['depth_pred'] = skimage.transform.resize(data['depth_pred'], data['depth_gt'].shape, order=0, mode='constant', preserve_range=True)
203 |     if ('flow_gt' in data) and ('flow_pred' in data) and (not (data['flow_gt'].shape == data['flow_pred'].shape)):
204 |         data['flow_pred'] = np.transpose(skimage.transform.resize(\
205 |                                 np.transpose(data['flow_pred'],(1,2,0)), data['depth_gt'].shape, order=0, mode='constant', preserve_range=True),(2,0,1))
206 |         
207 |     if eigen_crop_gt_and_pred and data['depth_gt'].shape != (436,588):
208 |         assert(data['depth_gt'].shape == (480,640))
209 |         assert(data['depth_pred'].shape == (480,640))
210 |         data['depth_gt'] = data['depth_gt'][23:23+436,27:27+588]
211 |         data['depth_pred'] = data['depth_pred'][23:23+436,27:27+588]
212 |     
213 |     return data
214 |         
215 | 
216 | def evaluate(results_file, gt_file, depthmask=False, eigen_crop_gt_and_pred=False, depth_scaling='abs'):
217 |     '''
218 |     Compute different error measures given a hdf5 result (prediction) file, and output them as an xarray.
219 |     results_file: str
220 |         Path to the network results (prediction) in hdf5 format.
221 | 
222 |     gt_file: str
223 |         Path to the hdf5 file with ground truth data stored in the simple test output format
224 | 
225 |     depthmask: bool
226 |         If True the depth values for points not visible in the second image will be masked out
227 | 
228 |     eigen_crop_gt_and_pred: bool
229 |         If true crops images and depth maps to match the evaluation for NYU in Eigen's paper.
230 | 
231 |     depth_scaling: str
232 |         selects a scaling method for the scaled results. E.g. 'abs' scales such that the 
233 |         least squares error for the absolute depth values is minimized.
234 |         
235 |     '''
236 |     depth_pred_max=np.inf
237 | 
238 |     depth_errors_to_compute = ['l1',
239 |                                'l1_inverse',
240 |                                'scale_invariant',
241 |                                'abs_relative',
242 |                                'sq_relative',
243 |                                'avg_log10',
244 |                                'rmse_log',
245 |                                'rmse',
246 |                                'ratio_threshold_1.25',
247 |                                'ratio_threshold_1.5625',
248 |                                'ratio_threshold_1.953125']
249 |     
250 |     errors_to_compute = ['rot_err', 'tran_err', 'tran_angle_err'] + \
251 |                        ['depth_' + e for e in depth_errors_to_compute] + \
252 |                        ['flow_epe', 'camera_baseline']
253 |     
254 |     metainfo = get_metainfo(results_file)
255 |     results = xarray.DataArray(np.zeros((len(metainfo['snapshots']), len(metainfo['iterations']), len(metainfo['samples']), len(errors_to_compute), 2)), 
256 |                              [('snapshot', metainfo['snapshots']),
257 |                               ('iteration', metainfo['iterations']),
258 |                               ('sample', metainfo['samples']),
259 |                               ('errors', errors_to_compute),
260 |                               ('scaled', [False,True])])
261 |     results[:] = np.nan
262 |     
263 |     # save metainfo and evaluation options
264 |     for key,val in metainfo.items():
265 |         results.attrs[key] = val
266 |     results.attrs['gt_file'] = gt_file
267 |     results.attrs['depthmask'] = depthmask
268 |     results.attrs['depth_scaling'] = depth_scaling
269 |     results.attrs['depth_pred_max'] = str(depth_pred_max)
270 | 
271 |            
272 |     with h5py.File(results_file,'r') as results_f:
273 |         if gt_file:
274 |             gt_f = h5py.File(gt_file,'r')
275 |         else:
276 |             gt_f = None
277 | 
278 |         t0 = 0
279 |         for nsnap,snap in enumerate(metainfo['snapshots']):
280 |             for nsample,sample in enumerate(metainfo['samples']):            
281 |                 for niter,net_iter in enumerate(metainfo['iterations']):
282 |                     if time.time() - t0 > 5:
283 |                         t0 = time.time()
284 |                         print('Processing snapshot %d/%d. sample %d/%d' % \
285 |                                     (nsnap+1, len(metainfo['snapshots']), nsample+1, len(metainfo['samples'])))
286 |                     data = get_data(metainfo['iterative'], results_f, snap, sample, net_iter, gt_h5_file=gt_f, depthmask=depthmask, eigen_crop_gt_and_pred=eigen_crop_gt_and_pred)
287 |                     
288 |                     if ('depth_gt' in data) and ('depth_pred' in data): 
289 |                         #print(data['depth_pred'].dtype, data['depth_pred'][:3,:3], data['depth_gt'].dtype, data['depth_gt'][:3,:3])
290 |                         if 'motion_gt' in data and (not np.any(np.isnan(data['motion_gt']))):
291 |                             translation_gt = data['motion_gt'][-3:]
292 |                             results.loc[snap,net_iter,sample,'camera_baseline'] = np.linalg.norm(translation_gt)
293 |                         else:
294 |                             translation_gt = np.array([1.,0.,0.])                    
295 |                         depth_errs, depth_errs_pred_scaled = evaluate_depth(translation_gt, data['depth_gt'], data['depth_pred'], 
296 |                                                                 distances_to_compute=depth_errors_to_compute, inverse_gt=True, inverse_pred=True, 
297 |                                                                 depth_scaling=depth_scaling, depth_pred_max=depth_pred_max)
298 |                     
299 |                         for dist in depth_errors_to_compute:
300 |                             results.loc[snap,net_iter,sample,'depth_' + dist,False] = depth_errs[dist]
301 |                             results.loc[snap,net_iter,sample,'depth_' + dist,True] = depth_errs_pred_scaled[dist]
302 |                     
303 |                     if ('motion_gt' in data) and ('motion_pred' in data):
304 |                         normalize_translation = True
305 |                         rot_err, tran_err, tran_angle_err = compute_motion_errors(data['motion_pred'], data['motion_gt'], normalize_translation)
306 |                         results.loc[snap,net_iter,sample,'rot_err'] = rot_err
307 |                         results.loc[snap,net_iter,sample,'tran_err'] = tran_err
308 |                         results.loc[snap,net_iter,sample,'tran_angle_err'] = tran_angle_err
309 |                     
310 |                     if ('flow_gt' in data) and ('flow_pred' in data):
311 |                         flow_epe = compute_flow_epe(data['flow_pred'],data['flow_gt'])
312 |                         results.loc[snap,net_iter,sample,'flow_epe'] = flow_epe
313 |         if gt_file:
314 |             gt_f.close()
315 |              
316 |     return results
317 |     
318 |         
319 | 
320 | 
321 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/helpers.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import numpy as np
 19 | from minieigen import Matrix3, Vector3, Vector2, Quaternion
 20 | 
 21 | 
 22 | def angleaxis_to_angle_axis(aa, epsilon=1e-6):
 23 |     """Converts the angle axis vector with angle encoded as magnitude to 
 24 |     the angle axis representation with seperate angle and axis.
 25 | 
 26 |     aa: minieigen.Vector3
 27 |         axis angle with angle as vector magnitude
 28 | 
 29 |     epsilon: minimum angle in rad
 30 |         If the angle is smaller than epsilon
 31 |         then 0,(1,0,0) will be returned
 32 | 
 33 |     returns the tuple (angle,axis)
 34 |     """
 35 |     angle = aa.norm()
 36 |     if angle < epsilon:
 37 |         angle = 0
 38 |         axis = Vector3(1,0,0)
 39 |     else:
 40 |         axis = aa.normalized()
 41 |     return angle, axis
 42 | 
 43 | 
 44 | def angleaxis_to_quaternion(aa, epsilon=1e-6):
 45 |     """Converts the angle axis vector with angle encoded as magnitude to 
 46 |     the quaternion representation.
 47 | 
 48 |     aa: minieigen.Vector3
 49 |         axis angle with angle as vector magnitude
 50 | 
 51 |     epsilon: minimum angle in rad
 52 |         If the angle is smaller than epsilon
 53 |         then 0,(1,0,0) will be returned
 54 | 
 55 |     returns the unit quaternion
 56 |     """
 57 |     angle, axis = angleaxis_to_angle_axis(aa,epsilon)
 58 |     return Quaternion(angle,axis)
 59 | 
 60 | 
 61 | 
 62 | def angleaxis_to_rotation_matrix(aa, epsilon=1e-6):
 63 |     """Converts the angle axis vector with angle encoded as magnitude to 
 64 |     the rotation matrix representation.
 65 | 
 66 |     aa: minieigen.Vector3
 67 |         axis angle with angle as vector magnitude
 68 | 
 69 |     epsilon: minimum angle in rad
 70 |         If the angle is smaller than epsilon
 71 |         then 0,(1,0,0) will be returned
 72 | 
 73 |     returns the 3x3 rotation matrix as numpy.ndarray
 74 |     """
 75 |     q = angleaxis_to_quaternion(aa,epsilon)
 76 |     tmp = q.toRotationMatrix()
 77 |     return np.array(tmp)
 78 | 
 79 | 
 80 | 
 81 | def motion_vector_to_Rt(motion, epsilon=1e-6):
 82 |     """Converts the motion vector to the rotation matrix R and translation t
 83 | 
 84 |     motion: np.ndarray
 85 |         array with 6 elements. The motions is given as [aa1, aa2, aa3, tx, ty, tz].
 86 |         aa1,aa2,aa3 is an angle axis representation. The angle is the norm of the axis.
 87 |         [tx, ty, tz] is a 3d translation.
 88 | 
 89 | 
 90 |     epsilon: minimum angle in rad
 91 |         If the angle is smaller than epsilon
 92 |         then 0,(1,0,0) will be returned
 93 | 
 94 |     returns the 3x3 rotation matrix and the 3d translation vector
 95 |     """
 96 |     pass
 97 |     tmp = motion.squeeze().astype(np.float64)
 98 |     t = tmp[3:].copy()
 99 |     R = angleaxis_to_rotation_matrix(Vector3(tmp[0:3]),epsilon)
100 |     return R, t
101 | 
102 | 
103 | def intrinsics_vector_to_K(intrinsics, width, height):
104 |     """Converts the normalized intrinsics vector to the calibration matrix K
105 | 
106 |     intrinsics: np.ndarray
107 |         4 element vector with normalized intrinsics [fx, fy, cx, cy]
108 | 
109 |     width: int
110 |         image width in pixels
111 | 
112 |     height: int 
113 |         image height in pixels
114 | 
115 |     returns the calibration matrix K as numpy.ndarray
116 |     """
117 |     tmp = intrinsics.squeeze().astype(np.float64)
118 |     K = np.array([tmp[0]*width, 0, tmp[2]*width, 0, tmp[1]*height, tmp[3]*height, 0, 0, 1], dtype=np.float64).reshape((3,3))
119 |     
120 |     return K
121 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/helpers.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import tensorflow as tf
 19 | import lmbspecialops as sops
 20 | import numpy as np
 21 |  
 22 | def convert_NCHW_to_NHWC(inp):
 23 |     """Convert the tensor from caffe format NCHW into tensorflow format NHWC
 24 |         
 25 |         inp: tensor 
 26 |     """
 27 |     return tf.transpose(inp,[0,2,3,1])
 28 | 
 29 | def convert_NHWC_to_NCHW(inp):
 30 |     """Convert the tensor from tensorflow format NHWC into caffe format NCHW 
 31 |         
 32 |         inp: tensor 
 33 |     """
 34 |     return tf.transpose(inp,[0,3,1,2])
 35 | 
 36 | 
 37 | def angleaxis_to_rotation_matrix(aa):
 38 |     """Converts the 3 element angle axis representation to a 3x3 rotation matrix
 39 |     
 40 |     aa: numpy.ndarray with 1 dimension and 3 elements
 41 | 
 42 |     Returns a 3x3 numpy.ndarray
 43 |     """
 44 |     angle = np.sqrt(aa.dot(aa))
 45 | 
 46 |     if angle > 1e-6:
 47 |         c = np.cos(angle);
 48 |         s = np.sin(angle);
 49 |         u = np.array([aa[0]/angle, aa[1]/angle, aa[2]/angle]);
 50 | 
 51 |         R = np.empty((3,3))
 52 |         R[0,0] = c+u[0]*u[0]*(1-c);      R[0,1] = u[0]*u[1]*(1-c)-u[2]*s; R[0,2] = u[0]*u[2]*(1-c)+u[1]*s;
 53 |         R[1,0] = u[1]*u[0]*(1-c)+u[2]*s; R[1,1] = c+u[1]*u[1]*(1-c);      R[1,2] = u[1]*u[2]*(1-c)-u[0]*s;
 54 |         R[2,0] = u[2]*u[0]*(1-c)-u[1]*s; R[2,1] = u[2]*u[1]*(1-c)+u[0]*s; R[2,2] = c+u[2]*u[2]*(1-c);
 55 |     else:
 56 |         R = np.eye(3)
 57 |     return R
 58 | 
 59 | 
 60 | def myLeakyRelu(x):
 61 |     """Leaky ReLU with leak factor 0.1"""
 62 |     # return tf.maximum(0.1*x,x)
 63 |     return sops.leaky_relu(x, leak=0.1)
 64 | 
 65 | 
 66 | def default_weights_initializer():
 67 |     return tf.contrib.layers.variance_scaling_initializer()
 68 | 
 69 | 
 70 | def conv2d_caffe_padding(inputs, num_outputs, kernel_size, data_format, **kwargs):
 71 |     """Convolution with 'same' padding as in caffe"""
 72 |     if isinstance(kernel_size,(tuple,list)):
 73 |         kernel_ysize = kernel_size[0]
 74 |         kernel_xsize = kernel_size[1]
 75 |     else:
 76 |         kernel_ysize = kernel_size
 77 |         kernel_xsize = kernel_size
 78 |     pad_y = kernel_ysize//2
 79 |     pad_x = kernel_xsize//2
 80 | 
 81 |     if data_format=='channels_first':
 82 |         paddings = [[0,0], [0,0], [pad_y, pad_y], [pad_x,pad_x]]
 83 |     else:
 84 |         paddings = [[0,0], [pad_y, pad_y], [pad_x,pad_x], [0,0]]
 85 |     padded_input = tf.pad(inputs, paddings=paddings)
 86 |     return tf.layers.conv2d(
 87 |         inputs=padded_input,
 88 |         filters=num_outputs,
 89 |         kernel_size=kernel_size,
 90 |         kernel_initializer=default_weights_initializer(),
 91 |         padding='valid',
 92 |         data_format=data_format,
 93 |         **kwargs,
 94 |         )
 95 | 
 96 | 
 97 | def convrelu_caffe_padding(inputs, num_outputs, kernel_size, data_format, **kwargs):
 98 |     """Shortcut for a single convolution+relu 
 99 |     
100 |     See tf.layers.conv2d for a description of remaining parameters
101 |     """
102 |     return conv2d_caffe_padding(inputs, num_outputs, kernel_size, data_format, activation=myLeakyRelu, **kwargs)
103 | 
104 | 
105 | def convrelu2_caffe_padding(inputs, num_outputs, kernel_size, name, stride, data_format, **kwargs):
106 |     """Shortcut for two convolution+relu with 1D filter kernels and 'same' padding as in caffe
107 |     
108 |     num_outputs: int or (int,int)
109 |         If num_outputs is a tuple then the first element is the number of
110 |         outputs for the 1d filter in y direction and the second element is
111 |         the final number of outputs.
112 |     """
113 |     if isinstance(num_outputs,(tuple,list)):
114 |         num_outputs_y = num_outputs[0]
115 |         num_outputs_x = num_outputs[1]
116 |     else:
117 |         num_outputs_y = num_outputs
118 |         num_outputs_x = num_outputs
119 | 
120 |     pad = kernel_size//2
121 | 
122 |     if data_format=='channels_first':
123 |         paddings_y = [[0,0], [0,0], [pad, pad], [0,0]]
124 |         paddings_x = [[0,0], [0,0], [0,0], [pad, pad]]
125 |     else:
126 |         paddings_y = [[0,0], [pad, pad], [0,0], [0,0]]
127 |         paddings_x = [[0,0], [0,0], [pad, pad], [0,0]]
128 |     padded_input = tf.pad(inputs, paddings=paddings_y)
129 | 
130 |     tmp_y = tf.layers.conv2d(
131 |         inputs=padded_input,
132 |         filters=num_outputs_y,
133 |         kernel_size=[kernel_size,1],
134 |         strides=[stride,1],
135 |         padding='valid',
136 |         activation=myLeakyRelu,
137 |         kernel_initializer=default_weights_initializer(),
138 |         data_format=data_format,
139 |         name=name+'y',
140 |         **kwargs,
141 |     )
142 |     return tf.layers.conv2d(
143 |         inputs=tf.pad(tmp_y, paddings=paddings_x),
144 |         filters=num_outputs_x,
145 |         kernel_size=[1,kernel_size],
146 |         strides=[1,stride],
147 |         padding='valid',
148 |         activation=myLeakyRelu,
149 |         kernel_initializer=default_weights_initializer(),
150 |         data_format=data_format,
151 |         name=name+'x',
152 |         **kwargs,
153 |     )
154 | 
155 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/networks_original.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | from .blocks_original import *
 19 | 
 20 | 
 21 | 
 22 | class BootstrapNet:
 23 |     def __init__(self, session, data_format='channels_first', batch_size=1):
 24 |         """Creates the network
 25 | 
 26 |         session: tf.Session
 27 |             Tensorflow session
 28 | 
 29 |         data_format: str
 30 |             Either 'channels_first' or 'channels_last'.
 31 |             Running on the cpu requires 'channels_last'.
 32 | 
 33 |         batch_size: int
 34 |             The batch size
 35 |         """
 36 |         self.session = session
 37 |         if data_format=='channels_first':
 38 |             self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,6,192,256))
 39 |             self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
 40 |         else:
 41 |             self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,6))
 42 |             self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
 43 | 
 44 |         with tf.variable_scope('netFlow1'):
 45 |             netFlow1_result = flow_block_demon_original(self.placeholder_image_pair, data_format=data_format )
 46 |             self.netFlow1_result = netFlow1_result
 47 |             self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow1_result['predict_flowconf5'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
 48 |             self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow1_result['predict_flowconf2'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
 49 | 
 50 |         with tf.variable_scope('netDM1'):
 51 |             self.netDM1_result = depthmotion_block_demon_original(
 52 |                     image_pair=self.placeholder_image_pair, 
 53 |                     image2_2=self.placeholder_image2_2, 
 54 |                     prev_flow2=self.predict_flow2, 
 55 |                     prev_flowconf2=self.netFlow1_result['predict_flowconf2'], 
 56 |                     data_format=data_format
 57 |                     )
 58 | 
 59 | 
 60 |     def eval(self, image_pair, image2_2):
 61 |         """Runs the bootstrap network
 62 |         
 63 |         image_pair: numpy.ndarray
 64 |             Array with shape [N,6,192,256] if data_format=='channels_first'
 65 |             
 66 |             Image pair in the range [-0.5, 0.5]
 67 | 
 68 |         image2_2: numpy.ndarray
 69 |             Second image at resolution level 2 (downsampled two times)
 70 | 
 71 |             The shape for data_format=='channels_first' is [1,3,48,64]
 72 | 
 73 |         Returns a dict with the preditions of the bootstrap net
 74 |         """
 75 |         
 76 |         fetches = {
 77 |                 'predict_flow5': self.predict_flow5,
 78 |                 'predict_flow2': self.predict_flow2,
 79 |                 'predict_depth2': self.netDM1_result['predict_depth2'],
 80 |                 'predict_normal2': self.netDM1_result['predict_normal2'],
 81 |                 'predict_rotation': self.netDM1_result['predict_rotation'],
 82 |                 'predict_translation': self.netDM1_result['predict_translation'],
 83 |                 }
 84 |         feed_dict = {
 85 |                 self.placeholder_image_pair: image_pair,
 86 |                 self.placeholder_image2_2: image2_2,
 87 |                 }
 88 |         return self.session.run(fetches, feed_dict=feed_dict)
 89 | 
 90 | 
 91 | 
 92 | class IterativeNet:
 93 |     def __init__(self, session, data_format='channels_first', batch_size=1):
 94 |         """Creates the network
 95 | 
 96 |         session: tf.Session
 97 |             Tensorflow session
 98 | 
 99 |         data_format: str
100 |             Either 'channels_first' or 'channels_last'.
101 |             Running on the cpu requires 'channels_last'.
102 | 
103 |         batch_size: int
104 |             The batch size
105 |         """
106 |         self.session = session
107 | 
108 |         intrinsics = np.broadcast_to(np.array([[0.89115971, 1.18821287, 0.5, 0.5]]),(batch_size,4))
109 |         self.intrinsics = tf.constant(intrinsics, dtype=tf.float32)
110 | 
111 |         if data_format == 'channels_first':
112 |             self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,6,192,256))
113 |             self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
114 |             self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,1,48,64))
115 |             self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
116 |         else:
117 |             self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,6))
118 |             self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
119 |             self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,1))
120 |             self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
121 | 
122 |         self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(batch_size,3))
123 |         self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(batch_size,3))
124 | 
125 |         with tf.variable_scope('netFlow2'):
126 |             netFlow2_result = flow_block_demon_original(
127 |                     image_pair=self.placeholder_image_pair,
128 |                     image2_2=self.placeholder_image2_2,
129 |                     intrinsics=self.intrinsics,
130 |                     prev_predictions={
131 |                         'predict_depth2': self.placeholder_depth2,
132 |                         'predict_normal2': self.placeholder_normal2,
133 |                         'predict_rotation': self.placeholder_rotation,
134 |                         'predict_translation': self.placeholder_translation,
135 |                         },
136 |                     data_format=data_format,
137 |                 )
138 |             self.netFlow2_result = netFlow2_result
139 |             self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow2_result['predict_flowconf5'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
140 |             self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow2_result['predict_flowconf2'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
141 | 
142 |         with tf.variable_scope('netDM2'):
143 |             self.netDM2_result = depthmotion_block_demon_original(
144 |                     image_pair=self.placeholder_image_pair,
145 |                     image2_2=self.placeholder_image2_2, 
146 |                     prev_flow2=self.predict_flow2, 
147 |                     prev_flowconf2=self.netFlow2_result['predict_flowconf2'],
148 |                     prev_rotation=self.placeholder_rotation,
149 |                     prev_translation=self.placeholder_translation,
150 |                     intrinsics=self.intrinsics,
151 |                     data_format=data_format,
152 |                     )
153 | 
154 |     def eval(self, image_pair, image2_2, depth2, normal2, rotation, translation ):
155 |         """Runs the iterative network
156 |         
157 |         image_pair: numpy.ndarray
158 |             Array with shape [N,6,192,256] if data_format=='channels_first'
159 |             
160 |             Image pair in the range [-0.5, 0.5]
161 | 
162 |         image2_2: numpy.ndarray
163 |             Second image at resolution level 2 (downsampled two times)
164 | 
165 |             The shape for data_format=='channels_first' is [1,3,48,64]
166 | 
167 |         depth2: numpy.ndarray
168 |             Depth prediction at resolution level 2
169 | 
170 |         normal2: numpy.ndarray
171 |             Normal prediction at resolution level 2
172 | 
173 |         rotation: numpy.ndarray
174 |             Rotation prediction in 3 element angle axis format
175 | 
176 |         translation: numpy.ndarray
177 |             Translation prediction
178 | 
179 |         Returns a dict with the preditions of the iterative net
180 |         """
181 | 
182 |         fetches = {
183 |                 'predict_flow5': self.predict_flow5,
184 |                 'predict_flow2': self.predict_flow2,
185 |                 'predict_depth2': self.netDM2_result['predict_depth2'],
186 |                 'predict_normal2': self.netDM2_result['predict_normal2'],
187 |                 'predict_rotation': self.netDM2_result['predict_rotation'],
188 |                 'predict_translation': self.netDM2_result['predict_translation'],
189 |                 }
190 |         feed_dict = {
191 |                 self.placeholder_image_pair: image_pair,
192 |                 self.placeholder_image2_2: image2_2,
193 |                 self.placeholder_depth2: depth2,
194 |                 self.placeholder_normal2: normal2,
195 |                 self.placeholder_rotation: rotation,
196 |                 self.placeholder_translation: translation,
197 |                 }
198 |         return self.session.run(fetches, feed_dict=feed_dict)
199 | 
200 | 
201 | 
202 | class RefinementNet:
203 | 
204 |     def __init__(self, session, data_format='channels_first', batch_size=1):
205 |         """Creates the network
206 | 
207 |         session: tf.Session
208 |             Tensorflow session
209 | 
210 |         data_format: str
211 |             Either 'channels_first' or 'channels_last'.
212 |             Running on the cpu requires 'channels_last'.
213 | 
214 |         batch_size: int
215 |             The batch size
216 |         """
217 |         self.session = session
218 | 
219 |         if data_format == 'channels_first':
220 |             self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,192,256))
221 |             self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,1,48,64))
222 |         else:
223 |             self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,3))
224 |             self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,1))
225 | 
226 | 
227 |         with tf.variable_scope('netRefine'):
228 |             self.netRefine_result = depth_refine_block_demon_original(
229 |                     image1=self.placeholder_image1, 
230 |                     depthmotion_predictions={
231 |                         'predict_depth2': self.placeholder_depth2,
232 |                         },
233 |                     data_format=data_format,
234 |                     )
235 | 
236 |     def eval(self, image1, depth2):
237 |         """Runs the refinement network
238 |         
239 |         image1: numpy.ndarray
240 |             Array with the first image with shape [N,3,192,256] if data_format=='channels_first'
241 | 
242 |         depth2: numpy.ndarray
243 |             Depth prediction at resolution level 2
244 | 
245 |         Returns a dict with the preditions of the refinement net
246 |         """
247 | 
248 |         fetches = {
249 |                 'predict_depth0': self.netRefine_result['predict_depth0'],
250 |                 }
251 |         feed_dict = {
252 |                 self.placeholder_image1: image1,
253 |                 self.placeholder_depth2: depth2,
254 |                 }
255 |         return self.session.run(fetches, feed_dict=feed_dict)
256 | 
257 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/v2/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | #  DeMoN - Depth Motion Network
 3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
 4 | #  
 5 | #  This program is free software: you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation, either version 3 of the License, or
 8 | #  (at your option) any later version.
 9 | #  
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #  
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | 
19 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/v2/helpers.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import tensorflow as tf
 19 | import lmbspecialops as sops
 20 | import numpy as np
 21 | 
 22 | from depthmotionnet.helpers import *
 23 |  
 24 | def conv2d(inputs, num_outputs, kernel_size, data_format, **kwargs):
 25 |     """Convolution with 'same' padding"""
 26 | 
 27 |     return tf.layers.conv2d(
 28 |         inputs=inputs,
 29 |         filters=num_outputs,
 30 |         kernel_size=kernel_size,
 31 |         kernel_initializer=default_weights_initializer(),
 32 |         padding='same',
 33 |         data_format=data_format,
 34 |         **kwargs,
 35 |         )
 36 | 
 37 | 
 38 | def convrelu(inputs, num_outputs, kernel_size, data_format, **kwargs):
 39 |     """Shortcut for a single convolution+relu 
 40 |     
 41 |     See tf.layers.conv2d for a description of remaining parameters
 42 |     """
 43 |     return conv2d(inputs, num_outputs, kernel_size, data_format, activation=myLeakyRelu, **kwargs)
 44 | 
 45 | 
 46 | def convrelu2(inputs, num_outputs, kernel_size, name, stride, data_format, **kwargs):
 47 |     """Shortcut for two convolution+relu with 1D filter kernels 
 48 |     
 49 |     num_outputs: int or (int,int)
 50 |         If num_outputs is a tuple then the first element is the number of
 51 |         outputs for the 1d filter in y direction and the second element is
 52 |         the final number of outputs.
 53 |     """
 54 |     if isinstance(num_outputs,(tuple,list)):
 55 |         num_outputs_y = num_outputs[0]
 56 |         num_outputs_x = num_outputs[1]
 57 |     else:
 58 |         num_outputs_y = num_outputs
 59 |         num_outputs_x = num_outputs
 60 | 
 61 |     if isinstance(kernel_size,(tuple,list)):
 62 |         kernel_size_y = kernel_size[0]
 63 |         kernel_size_x = kernel_size[1]
 64 |     else:
 65 |         kernel_size_y = kernel_size
 66 |         kernel_size_x = kernel_size
 67 | 
 68 |     tmp_y = tf.layers.conv2d(
 69 |         inputs=inputs,
 70 |         filters=num_outputs_y,
 71 |         kernel_size=[kernel_size_y,1],
 72 |         strides=[stride,1],
 73 |         padding='same',
 74 |         activation=myLeakyRelu,
 75 |         kernel_initializer=default_weights_initializer(),
 76 |         data_format=data_format,
 77 |         name=name+'y',
 78 |         **kwargs,
 79 |     )
 80 |     return tf.layers.conv2d(
 81 |         inputs=tmp_y,
 82 |         filters=num_outputs_x,
 83 |         kernel_size=[1,kernel_size_x],
 84 |         strides=[1,stride],
 85 |         padding='same',
 86 |         activation=myLeakyRelu,
 87 |         kernel_initializer=default_weights_initializer(),
 88 |         data_format=data_format,
 89 |         name=name+'x',
 90 |         **kwargs,
 91 |     )
 92 | 
 93 | 
 94 | def recursive_median_downsample(inp, iterations):
 95 |     """Recursively downsamples the input using a 3x3 median filter"""
 96 |     result = []
 97 |     for i in range(iterations):
 98 |         if not result:
 99 |             tmp_inp = inp
100 |         else:
101 |             tmp_inp = result[-1]
102 |         result.append(sops.median3x3_downsample(tmp_inp))
103 |     return tuple(result)
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/v2/networks.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | from .blocks import *
 19 | 
 20 | class BootstrapNet:
 21 |     def __init__(self, session):
 22 |         """Creates the bootstrap network
 23 | 
 24 |         session: tf.Session
 25 |             Tensorflow session
 26 | 
 27 |         """
 28 |         self.session = session
 29 |         self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
 30 |         self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
 31 | 
 32 | 
 33 |         with tf.variable_scope('netFlow1'):
 34 |             netFlow1_result = flow_block(self.placeholder_image_pair )
 35 |             self.netFlow1_result = netFlow1_result
 36 |             self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow1_result['predict_flowconf5'], num_or_size_splits=2, axis=1)
 37 |             self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow1_result['predict_flowconf2'], num_or_size_splits=2, axis=1)
 38 | 
 39 |         with tf.variable_scope('netDM1'):
 40 |             self.netDM1_result = depthmotion_block(
 41 |                     image_pair=self.placeholder_image_pair, 
 42 |                     image2_2=self.placeholder_image2_2, 
 43 |                     prev_flow2=self.predict_flow2, 
 44 |                     prev_flowconf2=self.netFlow1_result['predict_flowconf2'], 
 45 |                     )
 46 | 
 47 | 
 48 |     def eval(self, image_pair, image2_2):
 49 |         """Runs the bootstrap network
 50 |         
 51 |         image_pair: numpy.ndarray
 52 |             Array with shape [1,6,192,256] if data_format=='channels_first'
 53 |             
 54 |             Image pair in the range [-0.5, 0.5]
 55 | 
 56 |         image2_2: numpy.ndarray
 57 |             Second image at resolution level 2 (downsampled two times)
 58 | 
 59 |             The shape for data_format=='channels_first' is [1,3,48,64]
 60 | 
 61 |         Returns a dict with the preditions of the bootstrap net
 62 |         """
 63 |         fetches = {
 64 |                 'predict_flow5': self.predict_flow5,
 65 |                 'predict_flow2': self.predict_flow2,
 66 |                 'predict_depth2': self.netDM1_result['predict_depth2'],
 67 |                 'predict_normal2': self.netDM1_result['predict_normal2'],
 68 |                 'predict_rotation': self.netDM1_result['predict_rotation'],
 69 |                 'predict_translation': self.netDM1_result['predict_translation'],
 70 |                 }
 71 |         feed_dict = {
 72 |                 self.placeholder_image_pair: image_pair,
 73 |                 self.placeholder_image2_2: image2_2,
 74 |                 }
 75 |         return self.session.run(fetches, feed_dict=feed_dict)
 76 | 
 77 | 
 78 | 
 79 | 
 80 | class IterativeNet:
 81 |     def __init__(self, session):
 82 |         """Creates the bootstrap network
 83 | 
 84 |         session: tf.Session
 85 |             Tensorflow session
 86 | 
 87 |         """
 88 |         self.session = session
 89 | 
 90 |         self.intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]], dtype=tf.float32)
 91 | 
 92 |         self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
 93 |         self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
 94 | 
 95 |         self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(1,1,48,64))
 96 |         self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
 97 |         self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(1,3))
 98 |         self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(1,3))
 99 | 
100 |         with tf.variable_scope('netFlow2'):
101 |             netFlow2_result = flow_block(
102 |                 image_pair=self.placeholder_image_pair,
103 |                 image2_2=self.placeholder_image2_2,
104 |                 intrinsics=self.intrinsics,
105 |                 prev_predictions={
106 |                     'predict_depth2': self.placeholder_depth2,
107 |                     'predict_normal2': self.placeholder_normal2,
108 |                     'predict_rotation': self.placeholder_rotation,
109 |                     'predict_translation': self.placeholder_translation,
110 |                     },
111 |                 )
112 |             self.netFlow2_result = netFlow2_result
113 |             self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow2_result['predict_flowconf5'], num_or_size_splits=2, axis=1)
114 |             self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow2_result['predict_flowconf2'], num_or_size_splits=2, axis=1)
115 | 
116 |         with tf.variable_scope('netDM2'):
117 |             self.netDM2_result = depthmotion_block(
118 |                     image_pair=self.placeholder_image_pair,
119 |                     image2_2=self.placeholder_image2_2, 
120 |                     prev_flow2=self.predict_flow2, 
121 |                     prev_flowconf2=self.netFlow2_result['predict_flowconf2'], 
122 |                     intrinsics=self.intrinsics,
123 |                     prev_rotation=self.placeholder_rotation,
124 |                     prev_translation=self.placeholder_translation,
125 |                     )
126 | 
127 |     def eval(self, image_pair, image2_2, depth2, normal2, rotation, translation ):
128 |         """Runs the iterative network
129 |         
130 |         image_pair: numpy.ndarray
131 |             Array with shape [1,6,192,256]
132 |             
133 |             Image pair in the range [-0.5, 0.5]
134 | 
135 |         image2_2: numpy.ndarray
136 |             Second image at resolution level 2 (downsampled two times)
137 | 
138 |             The shape is [1,3,48,64]
139 | 
140 |         depth2: numpy.ndarray
141 |             Depth prediction at resolution level 2
142 | 
143 |         normal2: numpy.ndarray
144 |             Normal prediction at resolution level 2
145 | 
146 |         rotation: numpy.ndarray
147 |             Rotation prediction in 3 element angle axis format
148 | 
149 |         translation: numpy.ndarray
150 |             Translation prediction
151 | 
152 |         Returns a dict with the preditions of the iterative net
153 |         """
154 | 
155 | 
156 |         fetches = {
157 |                 'predict_flow5': self.predict_flow5,
158 |                 'predict_flow2': self.predict_flow2,
159 |                 'predict_depth2': self.netDM2_result['predict_depth2'],
160 |                 'predict_normal2': self.netDM2_result['predict_normal2'],
161 |                 'predict_rotation': self.netDM2_result['predict_rotation'],
162 |                 'predict_translation': self.netDM2_result['predict_translation'],
163 |                 }
164 |         feed_dict = {
165 |                 self.placeholder_image_pair: image_pair,
166 |                 self.placeholder_image2_2: image2_2,
167 |                 self.placeholder_depth2: depth2,
168 |                 self.placeholder_normal2: normal2,
169 |                 self.placeholder_rotation: rotation,
170 |                 self.placeholder_translation: translation,
171 |                 }
172 |         return self.session.run(fetches, feed_dict=feed_dict)
173 | 
174 | 
175 | 
176 | 
177 | class RefinementNet:
178 | 
179 |     def __init__(self, session):
180 |         """Creates the network
181 | 
182 |         session: tf.Session
183 |             Tensorflow session
184 | 
185 |         """
186 | 
187 |         self.session = session
188 |         self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
189 |         self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(1,3,192,256))
190 |         self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(1,1,48,64))
191 |         self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
192 |         self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(1,3))
193 |         self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(1,3))
194 | 
195 |         with tf.variable_scope('netRefine'):
196 |             self.netRefine_result = depth_refine_block(
197 |                     image1=self.placeholder_image1, 
198 |                     depthmotion_predictions={
199 |                         'predict_depth2': self.placeholder_depth2,
200 |                         'predict_normal2': self.placeholder_normal2,
201 |                         },
202 |                     )
203 | 
204 |     def eval(self, image1, depth2, normal2):
205 |         """Runs the refinement network
206 |         
207 |         image1: numpy.ndarray
208 |             Array with the first image with shape [1,3,192,256]
209 | 
210 |         depth2: numpy.ndarray
211 |             Depth prediction at resolution level 2
212 | 
213 |         normal2: numpy.ndarray
214 |             normal prediction at resolution level 2
215 | 
216 |         Returns a dict with the preditions of the refinement net
217 |         """
218 | 
219 |         fetches = {
220 |                 'predict_depth0': self.netRefine_result['predict_depth0'],
221 |                 'predict_normal0': self.netRefine_result['predict_normal0'],
222 |                 }
223 |         feed_dict = {
224 |                 self.placeholder_image1: image1,
225 |                 self.placeholder_depth2: depth2,
226 |                 self.placeholder_normal2: normal2,
227 |                 }
228 |         return self.session.run(fetches, feed_dict=feed_dict)
229 | 
230 | 
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/vis.py:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import pyximport; pyximport.install()
 19 | import numpy as np
 20 | from .helpers import angleaxis_to_rotation_matrix
 21 | 
 22 | 
 23 | def compute_point_cloud_from_depthmap( depth, K, R, t, normals=None, colors=None ):
 24 |     """Creates a point cloud numpy array and optional normals and colors arrays
 25 | 
 26 |     depth: numpy.ndarray 
 27 |         2d array with depth values
 28 | 
 29 |     K: numpy.ndarray
 30 |         3x3 matrix with internal camera parameters
 31 | 
 32 |     R: numpy.ndarray
 33 |         3x3 rotation matrix
 34 | 
 35 |     t: numpy.ndarray
 36 |         3d translation vector
 37 | 
 38 |     normals: numpy.ndarray
 39 |         optional array with normal vectors
 40 | 
 41 |     colors: numpy.ndarray
 42 |         optional RGB image with the same dimensions as the depth map.
 43 |         The shape is (3,h,w) with type uint8
 44 | 
 45 |     """
 46 |     from .vis_cython import compute_point_cloud_from_depthmap as _compute_point_cloud_from_depthmap
 47 |     return _compute_point_cloud_from_depthmap(depth, K, R, t, normals, colors)
 48 | 
 49 | 
 50 | def create_camera_polydata(R, t, only_polys=False):
 51 |     """Creates a vtkPolyData object with a camera mesh"""
 52 |     import vtk
 53 |     cam_points = np.array([ 
 54 |         [0, 0, 0],
 55 |         [-1,-1, 1.5],
 56 |         [ 1,-1, 1.5],
 57 |         [ 1, 1, 1.5],
 58 |         [-1, 1, 1.5],
 59 |         [-0.5, 1, 1.5],
 60 |         [ 0.5, 1, 1.5],
 61 |         [ 0,1.2,1.5],
 62 |         [ 1,-0.5,1.5],
 63 |         [ 1, 0.5,1.5],
 64 |         [ 1.2, 0, 1.5]]
 65 |     ) 
 66 |     cam_points = (0.25*cam_points - t).dot(R)
 67 | 
 68 |     vpoints = vtk.vtkPoints()
 69 |     vpoints.SetNumberOfPoints(cam_points.shape[0])
 70 |     for i in range(cam_points.shape[0]):
 71 |         vpoints.SetPoint(i, cam_points[i])
 72 |     vpoly = vtk.vtkPolyData()
 73 |     vpoly.SetPoints(vpoints)
 74 |     
 75 |     poly_cells = vtk.vtkCellArray()
 76 | 
 77 |     if not only_polys:
 78 |         line_cells = vtk.vtkCellArray()
 79 |         
 80 |         line_cells.InsertNextCell( 5 );
 81 |         line_cells.InsertCellPoint( 1 );
 82 |         line_cells.InsertCellPoint( 2 );
 83 |         line_cells.InsertCellPoint( 3 );
 84 |         line_cells.InsertCellPoint( 4 );
 85 |         line_cells.InsertCellPoint( 1 );
 86 | 
 87 |         line_cells.InsertNextCell( 3 );
 88 |         line_cells.InsertCellPoint( 1 );
 89 |         line_cells.InsertCellPoint( 0 );
 90 |         line_cells.InsertCellPoint( 2 );
 91 | 
 92 |         line_cells.InsertNextCell( 3 );
 93 |         line_cells.InsertCellPoint( 3 );
 94 |         line_cells.InsertCellPoint( 0 );
 95 |         line_cells.InsertCellPoint( 4 );
 96 | 
 97 |         # x-axis indicator
 98 |         line_cells.InsertNextCell( 3 );
 99 |         line_cells.InsertCellPoint( 8 );
100 |         line_cells.InsertCellPoint( 10 );
101 |         line_cells.InsertCellPoint( 9 );
102 |         vpoly.SetLines(line_cells)
103 |     else:
104 |         # left
105 |         poly_cells.InsertNextCell( 3 );
106 |         poly_cells.InsertCellPoint( 0 );
107 |         poly_cells.InsertCellPoint( 1 );
108 |         poly_cells.InsertCellPoint( 4 );
109 | 
110 |         # right
111 |         poly_cells.InsertNextCell( 3 );
112 |         poly_cells.InsertCellPoint( 0 );
113 |         poly_cells.InsertCellPoint( 3 );
114 |         poly_cells.InsertCellPoint( 2 );
115 | 
116 |         # top
117 |         poly_cells.InsertNextCell( 3 );
118 |         poly_cells.InsertCellPoint( 0 );
119 |         poly_cells.InsertCellPoint( 4 );
120 |         poly_cells.InsertCellPoint( 3 );
121 | 
122 |         # bottom
123 |         poly_cells.InsertNextCell( 3 );
124 |         poly_cells.InsertCellPoint( 0 );
125 |         poly_cells.InsertCellPoint( 2 );
126 |         poly_cells.InsertCellPoint( 1 );
127 | 
128 |         # x-axis indicator
129 |         poly_cells.InsertNextCell( 3 );
130 |         poly_cells.InsertCellPoint( 8 );
131 |         poly_cells.InsertCellPoint( 10 );
132 |         poly_cells.InsertCellPoint( 9 );
133 | 
134 |     # up vector (y-axis)
135 |     poly_cells.InsertNextCell( 3 );
136 |     poly_cells.InsertCellPoint( 5 );
137 |     poly_cells.InsertCellPoint( 6 );
138 |     poly_cells.InsertCellPoint( 7 );
139 | 
140 |     vpoly.SetPolys(poly_cells)
141 | 
142 |     return vpoly
143 | 
144 | 
145 | def create_camera_actor(R, t):
146 |     """Creates a vtkActor object with a camera mesh"""
147 |     import vtk
148 |     vpoly = create_camera_polydata(R, t)
149 |     mapper = vtk.vtkPolyDataMapper()
150 |     mapper.SetInputData(vpoly)
151 | 
152 |     actor = vtk.vtkActor()
153 |     actor.SetMapper(mapper)
154 |     actor.GetProperty().LightingOff()
155 |     actor.GetProperty().SetLineWidth(2)
156 | 
157 |     return actor
158 | 
159 | 
160 | def create_pointcloud_polydata(points, colors=None):
161 |     """Creates a vtkPolyData object with the point cloud from numpy arrays
162 |     
163 |     points: numpy.ndarray
164 |         pointcloud with shape (n,3)
165 |     
166 |     colors: numpy.ndarray
167 |         uint8 array with colors for each point. shape is (n,3)
168 | 
169 |     Returns vtkPolyData object
170 |     """
171 |     import vtk
172 |     vpoints = vtk.vtkPoints()
173 |     vpoints.SetNumberOfPoints(points.shape[0])
174 |     for i in range(points.shape[0]):
175 |         vpoints.SetPoint(i, points[i])
176 |     vpoly = vtk.vtkPolyData()
177 |     vpoly.SetPoints(vpoints)
178 |     
179 |     if not colors is None:
180 |         vcolors = vtk.vtkUnsignedCharArray()
181 |         vcolors.SetNumberOfComponents(3)
182 |         vcolors.SetName("Colors")
183 |         vcolors.SetNumberOfTuples(points.shape[0])
184 |         for i in range(points.shape[0]):
185 |             vcolors.SetTuple3(i ,colors[i,0],colors[i,1], colors[i,2])
186 |         vpoly.GetPointData().SetScalars(vcolors)
187 | 
188 |     vcells = vtk.vtkCellArray()
189 |     
190 |     for i in range(points.shape[0]):
191 |         vcells.InsertNextCell(1)
192 |         vcells.InsertCellPoint(i)
193 |         
194 |     vpoly.SetVerts(vcells)
195 |     
196 |     return vpoly
197 | 
198 | 
199 | 
200 | def create_pointcloud_actor(points, colors=None):
201 |     """Creates a vtkActor with the point cloud from numpy arrays
202 |     
203 |     points: numpy.ndarray
204 |         pointcloud with shape (n,3)
205 |     
206 |     colors: numpy.ndarray
207 |         uint8 array with colors for each point. shape is (n,3)
208 | 
209 |     Returns vtkActor object
210 |     """
211 |     import vtk
212 |     vpoly = create_pointcloud_polydata(points, colors)
213 |     mapper = vtk.vtkPolyDataMapper()
214 |     mapper.SetInputData(vpoly)
215 | 
216 |     actor = vtk.vtkActor()
217 |     actor.SetMapper(mapper)
218 |     actor.GetProperty().SetPointSize(3)
219 | 
220 |     return actor
221 | 
222 | 
223 | def visualize_prediction( inverse_depth, intrinsics=None, normals=None, rotation=None, translation=None, image=None ):
224 |     """Visualizes the network predictions
225 | 
226 |     inverse_depth: numpy.ndarray
227 |         2d array with the inverse depth values with shape (h,w)
228 | 
229 |     intrinsics: numpy.ndarray
230 |         4 element vector with the normalized intrinsic parameters with shape
231 |         (4,)
232 | 
233 |     normals: numpy.ndarray
234 |         normal map with shape (3,h,w)
235 | 
236 |     rotation: numpy.ndarray
237 |         rotation in axis angle format with 3 elements with shape (3,)
238 | 
239 |     translation: numpy.ndarray
240 |         translation vector with shape (3,)
241 | 
242 |     image: numpy.ndarray
243 |         Image with shape (3,h,w) in the range [-0.5,0.5].
244 |     """
245 |     import vtk
246 |     depth = (1/inverse_depth).squeeze()
247 | 
248 |     w = depth.shape[-1]
249 |     h = depth.shape[-2]
250 | 
251 |     if intrinsics is None:
252 |         intrinsics = np.array([0.89115971, 1.18821287, 0.5, 0.5]) # sun3d intrinsics
253 | 
254 |     K = np.eye(3)
255 |     K[0,0] = intrinsics[0]*w
256 |     K[1,1] = intrinsics[1]*h
257 |     K[0,2] = intrinsics[2]*w
258 |     K[1,2] = intrinsics[3]*h
259 | 
260 |     R1 = np.eye(3)
261 |     t1 = np.zeros((3,))
262 | 
263 |     if not rotation is None and not translation is None:
264 |         R2 = angleaxis_to_rotation_matrix(rotation.squeeze())
265 |         t2 = translation.squeeze()
266 |     else:
267 |         R2 = np.eye(3)
268 |         t2 = np.zeros((3,))
269 | 
270 |     if not normals is None:
271 |         n = normals.squeeze()
272 |     else:
273 |         n = None
274 | 
275 |     if not image is None:
276 |         img = ((image+0.5)*255).astype(np.uint8)
277 |     else:
278 |         img = None
279 | 
280 |     pointcloud = compute_point_cloud_from_depthmap(depth, K, R1, t1, n, img)
281 | 
282 |     renderer = vtk.vtkRenderer()
283 |     renderer.SetBackground(0, 0, 0)
284 | 
285 |     pointcloud_actor = create_pointcloud_actor(
286 |         points=pointcloud['points'], 
287 |         colors=pointcloud['colors'] if 'colors' in pointcloud else None,
288 |         )
289 |     renderer.AddActor(pointcloud_actor)
290 | 
291 |     cam1_actor = create_camera_actor(R1,t1)
292 |     renderer.AddActor(cam1_actor)
293 |     
294 |     cam2_actor = create_camera_actor(R2,t2)
295 |     renderer.AddActor(cam2_actor)
296 |     
297 |     axes = vtk.vtkAxesActor()
298 |     axes.GetXAxisCaptionActor2D().SetHeight(0.05)
299 |     axes.GetYAxisCaptionActor2D().SetHeight(0.05)
300 |     axes.GetZAxisCaptionActor2D().SetHeight(0.05)
301 |     axes.SetCylinderRadius(0.03)
302 |     axes.SetShaftTypeToCylinder()
303 |     renderer.AddActor(axes)
304 | 
305 |     renwin = vtk.vtkRenderWindow()
306 |     renwin.SetWindowName("Point Cloud Viewer")
307 |     renwin.SetSize(800,600)
308 |     renwin.AddRenderer(renderer)
309 |     
310 |  
311 |     # An interactor
312 |     interactor = vtk.vtkRenderWindowInteractor()
313 |     interstyle = vtk.vtkInteractorStyleTrackballCamera()
314 |     interactor.SetInteractorStyle(interstyle)
315 |     interactor.SetRenderWindow(renwin)
316 |  
317 |     # Start
318 |     interactor.Initialize()
319 |     interactor.Start()
320 |     
321 | 
322 | def export_prediction_to_ply( output_prefix, inverse_depth, intrinsics=None, normals=None, rotation=None, translation=None, image=None ):
323 |     """Exports the network predictions to ply files meant for external visualization
324 | 
325 |     inverse_depth: numpy.ndarray
326 |         2d array with the inverse depth values with shape (h,w)
327 | 
328 |     intrinsics: numpy.ndarray
329 |         4 element vector with the normalized intrinsic parameters with shape
330 |         (4,)
331 | 
332 |     normals: numpy.ndarray
333 |         normal map with shape (3,h,w)
334 | 
335 |     rotation: numpy.ndarray
336 |         rotation in axis angle format with 3 elements with shape (3,)
337 | 
338 |     translation: numpy.ndarray
339 |         translation vector with shape (3,)
340 | 
341 |     image: numpy.ndarray
342 |         Image with shape (3,h,w) in the range [-0.5,0.5].
343 |     """
344 |     import vtk
345 |     depth = (1/inverse_depth).squeeze()
346 | 
347 |     w = depth.shape[-1]
348 |     h = depth.shape[-2]
349 | 
350 |     if intrinsics is None:
351 |         intrinsics = np.array([0.89115971, 1.18821287, 0.5, 0.5]) # sun3d intrinsics
352 | 
353 |     K = np.eye(3)
354 |     K[0,0] = intrinsics[0]*w
355 |     K[1,1] = intrinsics[1]*h
356 |     K[0,2] = intrinsics[2]*w
357 |     K[1,2] = intrinsics[3]*h
358 | 
359 |     R1 = np.eye(3)
360 |     t1 = np.zeros((3,))
361 | 
362 |     if not rotation is None and not translation is None:
363 |         R2 = angleaxis_to_rotation_matrix(rotation.squeeze())
364 |         t2 = translation.squeeze()
365 |     else:
366 |         R2 = np.eye(3)
367 |         t2 = np.zeros((3,))
368 | 
369 |     if not normals is None:
370 |         n = normals.squeeze()
371 |     else:
372 |         n = None
373 | 
374 |     if not image is None:
375 |         img = ((image+0.5)*255).astype(np.uint8)
376 |     else:
377 |         img = None
378 | 
379 |     pointcloud = compute_point_cloud_from_depthmap(depth, K, R1, t1, n, img)
380 | 
381 |     pointcloud_polydata = create_pointcloud_polydata( 
382 |         points=pointcloud['points'], 
383 |         colors=pointcloud['colors'] if 'colors' in pointcloud else None,
384 |         )
385 |     plywriter = vtk.vtkPLYWriter()
386 |     plywriter.SetFileName(output_prefix + 'points.ply')
387 |     plywriter.SetInputData(pointcloud_polydata)
388 |     plywriter.SetArrayName('Colors')
389 |     plywriter.Write()
390 |     
391 |     cam1_polydata = create_camera_polydata(R1,t1, True)
392 |     plywriter = vtk.vtkPLYWriter()
393 |     plywriter.SetFileName(output_prefix + 'cam1.ply')
394 |     plywriter.SetInputData(cam1_polydata)
395 |     plywriter.Write()
396 |     
397 |     cam2_polydata = create_camera_polydata(R2,t2, True)
398 |     plywriter = vtk.vtkPLYWriter()
399 |     plywriter.SetFileName(output_prefix + 'cam2.ply')
400 |     plywriter.SetInputData(cam2_polydata)
401 |     plywriter.Write()
402 | 
403 | 
404 | 
405 | def transform_pointcloud_points(points, T):
406 |     """Transforms the pointcloud with T
407 |     
408 |     points: numpy.ndarray
409 |         pointcloud with shape (n,3)
410 | 
411 |     T: numpy.ndarray
412 |         The 4x4 transformation
413 | 
414 |     Returns the transformed points
415 |     """
416 |     tmp = np.empty((points.shape[0],points.shape[1]+1),dtype=points.dtype)
417 |     tmp[:,0:3] = points
418 |     tmp[:,3] = 1
419 |     return T.dot(tmp.transpose())[0:3].transpose()
420 | 
421 | 


--------------------------------------------------------------------------------
/python/depthmotionnet/vis_cython.pyx:
--------------------------------------------------------------------------------
  1 | #
  2 | #  DeMoN - Depth Motion Network
  3 | #  Copyright (C) 2017  Benjamin Ummenhofer, Huizhong Zhou
  4 | #  
  5 | #  This program is free software: you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation, either version 3 of the License, or
  8 | #  (at your option) any later version.
  9 | #  
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #  
 15 | #  You should have received a copy of the GNU General Public License
 16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | #
 18 | import numpy as np
 19 | cimport numpy as np
 20 | cimport cython
 21 | from libc.math cimport isfinite
 22 | 
 23 | 
 24 | @cython.boundscheck(False)
 25 | cdef _compute_point_cloud_from_depthmap(
 26 |     np.ndarray[np.float32_t, ndim=2] depth, 
 27 |     np.ndarray[np.float32_t, ndim=3] normals, 
 28 |     np.ndarray[np.uint8_t, ndim=3] colors, 
 29 |     np.ndarray[np.float32_t, ndim=2] K, 
 30 |     np.ndarray[np.float32_t, ndim=2] R_arr,
 31 |     np.ndarray[np.float32_t, ndim=1] t_arr ):
 32 | 
 33 |     cdef int valid_count
 34 |     valid_count = 0
 35 |     cdef int x, y
 36 |     cdef int index
 37 |     cdef int width, height
 38 |     cdef float d
 39 |     cdef float tmp[3]
 40 |     cdef float X[3]
 41 |     cdef float inv_fx = 1/K[0,0]
 42 |     cdef float inv_fy = 1/K[1,1]
 43 |     cdef float cx = K[0,2]
 44 |     cdef float cy = K[1,2]
 45 |     cdef float h = 0.5
 46 |     cdef float [:,:] R = R_arr
 47 |     cdef float [:] t = t_arr
 48 | 
 49 |     width = depth.shape[1]
 50 |     height = depth.shape[0]
 51 | 
 52 |     for y in range(height):
 53 |         for x in range(width):
 54 |             d = depth[y,x]
 55 |             if isfinite(d) and d > 0:
 56 |                 valid_count += 1
 57 | 
 58 |     cdef np.ndarray[np.float32_t, ndim=2] points_arr = np.empty((valid_count,3), dtype=np.float32)
 59 |     cdef float [:,:] points = points_arr
 60 |     cdef np.ndarray[np.float32_t,ndim=2] normals_attr_arr = np.empty((valid_count,3), dtype=np.float32)
 61 |     cdef float [:,:] normals_attr = normals_attr_arr
 62 |     cdef np.ndarray[np.uint8_t,ndim=2] colors_attr_arr = np.empty((valid_count,3), dtype=np.uint8)
 63 |     cdef unsigned char [:,:] colors_attr = colors_attr_arr
 64 | 
 65 |     index = 0
 66 |     for y in range(height):
 67 |         for x in range(width):
 68 |             d = depth[y,x]
 69 |             if isfinite(d) and d > 0:
 70 |                 tmp[0] = d*((x+h) - cx)*inv_fx - t[0]
 71 |                 tmp[1] = d*((y+h) - cy)*inv_fy - t[1]
 72 |                 tmp[2] = d - t[2]
 73 |                 X[0] = R[0,0]*tmp[0] + R[1,0]*tmp[1] + R[2,0]*tmp[2]
 74 |                 X[1] = R[0,1]*tmp[0] + R[1,1]*tmp[1] + R[2,1]*tmp[2]
 75 |                 X[2] = R[0,2]*tmp[0] + R[1,2]*tmp[1] + R[2,2]*tmp[2]
 76 |                 points[index,0] = X[0]
 77 |                 points[index,1] = X[1]
 78 |                 points[index,2] = X[2]
 79 |                 index += 1
 80 | 
 81 |     result = {'points':points_arr}
 82 | 
 83 |     if normals.shape[0] > 0:
 84 |         index = 0
 85 |         for y in range(height):
 86 |             for x in range(width):
 87 |                 d = depth[y,x]
 88 |                 if np.isfinite(d) and d > 0.0:
 89 |                     tmp[0] = normals[0,y,x]
 90 |                     tmp[1] = normals[1,y,x]
 91 |                     tmp[2] = normals[2,y,x]
 92 |                     X[0] = R[0,0]*tmp[0] + R[1,0]*tmp[1] + R[2,0]*tmp[2]
 93 |                     X[1] = R[0,1]*tmp[0] + R[1,1]*tmp[1] + R[2,1]*tmp[2]
 94 |                     X[2] = R[0,2]*tmp[0] + R[1,2]*tmp[1] + R[2,2]*tmp[2]
 95 |                     normals_attr[index,0] = X[0]
 96 |                     normals_attr[index,1] = X[1]
 97 |                     normals_attr[index,2] = X[2]
 98 |                     index += 1
 99 | 
100 |         result['normals'] = normals_attr_arr
101 |            
102 |     if colors.shape[0] > 0:
103 |         index = 0
104 |         for y in range(height):
105 |             for x in range(width):
106 |                 d = depth[y,x]
107 |                 if np.isfinite(d) and d > 0.0:
108 |                     colors_attr[index,0] = colors[0,y,x]
109 |                     colors_attr[index,1] = colors[1,y,x]
110 |                     colors_attr[index,2] = colors[2,y,x]
111 |                     index += 1
112 | 
113 |         result['colors'] = colors_attr_arr
114 |     
115 |     return result
116 | 
117 | 
118 | 
119 | def compute_point_cloud_from_depthmap( depth, K, R, t, normals=None, colors=None ):
120 |     """Creates a point cloud numpy array and optional normals and colors arrays
121 | 
122 |     depth: numpy.ndarray 
123 |         2d array with depth values
124 | 
125 |     K: numpy.ndarray
126 |         3x3 matrix with internal camera parameters
127 | 
128 |     R: numpy.ndarray
129 |         3x3 rotation matrix
130 | 
131 |     t: numpy.ndarray
132 |         3d translation vector
133 | 
134 |     normals: numpy.ndarray
135 |         optional array with normal vectors
136 | 
137 |     colors: numpy.ndarray
138 |         optional RGB image with the same dimensions as the depth map.
139 |         The shape is (3,h,w) with type uint8
140 | 
141 |     """
142 |     assert colors.dtype == np.uint8 if not colors is None else True
143 | 
144 |     # make sure the dims and type are ok for the depth
145 |     if depth.dtype != np.float32:
146 |         _depth = depth.astype(np.float32)
147 |     else:
148 |         _depth = depth
149 | 
150 |     if len(_depth.shape) > 2:
151 |         _depth = _depth.squeeze()
152 |     if len(_depth.shape) > 2:
153 |         raise ValueError("wrong number of dimensions for depth")
154 | 
155 |     # sanity checks
156 |     if normals is None:
157 |         normals = np.empty((0,0,0),dtype=np.float32)
158 |     elif normals.shape[1:] != _depth.shape:
159 |         raise ValueError("shape mismatch: normals {0}, depth {1}".format(normals.shape, depth.shape))
160 | 
161 |     if normals.dtype != np.float32:
162 |         _normals = normals.astype(np.float32)
163 |     else:
164 |         _normals = normals
165 |         
166 |     if colors is None:
167 |         colors_arr = np.empty((0,0,0),dtype=np.uint8)
168 |     else:
169 |         colors_arr = colors
170 |         if colors_arr.shape[1:] != _depth.shape:
171 |             raise ValueError("shape mismatch: colors {0}, depth {1}".format(colors_arr.shape, depth.shape))
172 | 
173 |     return _compute_point_cloud_from_depthmap(_depth, _normals, colors_arr, K.astype(np.float32), R.astype(np.float32), t.astype(np.float32))
174 | 
175 | 


--------------------------------------------------------------------------------
/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/teaser.png


--------------------------------------------------------------------------------
/training/README.md:
--------------------------------------------------------------------------------
 1 | # Network Training
 2 | 
 3 | Code for training the network can be found in the following directories:
 4 | 
 5 | ```
 6 | training/v2
 7 | python/depthmotionnet/v2
 8 | ```
 9 | 
10 |  * ```training/v2``` contains the training script ```training.py```.
11 |  * ```python/depthmotionnet/v2``` contains the definition of the network parts for version _v2_ (```blocks.py```) and loss functions (```losses.py```) as well as code for easy instantiating of the network.
12 | 
13 | 
14 | ## Training Evolutions
15 | 
16 | The training process is made up of several stages called _evolutions_. 
17 | To train DeMoN we use 6 evolutions (```0_flow1, 1_dm1, 2_flow2, 3_flow2, 4_iterative, 5_refine```).
18 | The instantiated and actively training network parts are visualized below:
19 | ![Training Evolutions](network_training_evolutions.gif)
20 | 
21 | 
22 | ## Prerequisites
23 | 
24 |  * The python library **tfutils** for managing the training evolutions must be downloaded and added to the python path (https://github.com/lmb-freiburg/tfutils.git)
25 |  * The ```python``` directory in the demon root is added to the python path
26 |  * **lmbspecialops** is built and added to the python path.
27 |  * **multivih5datareaderop** is built (see [readme](../multivih5datareaderop/README.md))
28 |  * Training data sets are available in the folder ```datasets/training_data```. 
29 |    The script ```datasets/download_traindata.sh``` can be used to download the data sets.
30 | 
31 | 
32 | The following script sets up all required libraries from scratch in a virtualenv ```demon_venv``` managed with ```pew```:
33 | 
34 | ```bash
35 | pew new demon_venv # create new virtualenv
36 | # the following commands are executed within the demon_venv virtualenv
37 | 
38 | # install python module dependencies
39 | pip install tensorflow-gpu # or 'tensorflow' without gpu support
40 | pip install pillow # for reading images
41 | pip install matplotlib # required for visualizing depth maps
42 | pip install Cython # required for visualizing point clouds
43 | pip install h5py
44 | pip install minieigen
45 | pip install pandas
46 | pip install scipy
47 | pip install scikit-image
48 | pip install xarray
49 | 
50 | # install tfutils library
51 | git clone https://github.com/lmb-freiburg/tfutils.git
52 | pew add $PWD/tfutils/python # add to python path
53 | 
54 | # clone demon repo with submodules
55 | git clone --recursive https://github.com/lmb-freiburg/demon.git
56 | DEMON_DIR=$PWD/demon
57 | pew add $DEMON_DIR/python # add to python path
58 | 
59 | # build lmbspecialops
60 | mkdir $DEMON_DIR/lmbspecialops/build
61 | cd $DEMON_DIR/lmbspecialops/build
62 | cmake .. # add '-DBUILD_WITH_CUDA=OFF' to build without gpu support
63 | # (optional) run 'ccmake .' here to adjust settings for gpu code generation
64 | make
65 | pew add $DEMON_DIR/lmbspecialops/python # add to python path
66 | 
67 | 
68 | # build multivih5datareaderop (requires OpenCV)
69 | mkdir $DEMON_DIR/build 
70 | cd $DEMON_DIR/build 
71 | cmake ..
72 | make
73 | 
74 | # download training data
75 | cd $DEMON_DIR/datasets
76 | ./download_traindata.sh
77 | 
78 | ```
79 | 
80 | ## Training Script
81 | 
82 | ```bash
83 | cd $DEMON_DIR/training/v2
84 | pew in demon_venv 
85 | python training.py
86 | ```
87 | The training script creates the folder ```$DEMON_DIR/training/v2/training```.
88 | Once training is complete the last snapshot can be found as ```$DEMON_DIR/training/v2/training/5_refine/checkpoints/snapshot-250000.*```
89 |    
90 | The location of the training data can be adjusted in the file ```training.py```.
91 | 
92 | 


--------------------------------------------------------------------------------
/training/network_training_evolutions.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/training/network_training_evolutions.gif


--------------------------------------------------------------------------------
/weights/.gitignore:
--------------------------------------------------------------------------------
1 | *.data-*-of-*
2 | *.index
3 | *.tgz
4 | 


--------------------------------------------------------------------------------
/weights/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget https://lmb.informatik.uni-freiburg.de/people/ummenhof/depthmotionnet/demon_original_weights.tgz
3 | tar -xvf demon_original_weights.tgz
4 | 


--------------------------------------------------------------------------------