├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE.txt
├── README.md
├── datasets
├── .gitignore
├── download_testdata.sh
├── download_traindata.sh
├── generate_sun3d_train_datasets.py
├── sun3d_train_sequences.txt
├── testdata.md5
└── traindata
│ └── traindata.md5
├── examples
├── .gitignore
├── create_dataset_and_use_readerop.py
├── evaluation.py
├── example.py
├── example_v2.py
├── sculpture1.png
├── sculpture2.png
├── sculpture_Rt1.txt
├── sculpture_Rt2.txt
├── sculpture_depth1.npy
└── sculpture_depth2.npy
├── multivih5datareaderop
├── CMakeLists.txt
├── README.md
├── lz4_cmakelists.txt
├── multivih5datareader.cpp
├── multivih5datareader.h
├── multivih5datareaderop.cc
├── simpleh5file.cpp
└── simpleh5file.h
├── python
└── depthmotionnet
│ ├── __init__.py
│ ├── blocks_original.py
│ ├── datareader
│ ├── __init__.py
│ └── helpers.py
│ ├── dataset_tools
│ ├── __init__.py
│ ├── helpers.py
│ ├── lz4.py
│ ├── sun3d_utils.py
│ ├── view.py
│ ├── view_io.py
│ ├── view_tools.py
│ ├── view_tools_cython.pyx
│ └── webp.py
│ ├── evaluation
│ ├── __init__.py
│ ├── evaluate_to_xarray.py
│ ├── helpers.py
│ └── metrics.py
│ ├── helpers.py
│ ├── networks_original.py
│ ├── v2
│ ├── __init__.py
│ ├── blocks.py
│ ├── helpers.py
│ ├── losses.py
│ └── networks.py
│ ├── vis.py
│ └── vis_cython.pyx
├── teaser.png
├── training
├── README.md
├── network_training_evolutions.gif
└── v2
│ └── training.py
└── weights
├── .gitignore
└── download_weights.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.swp
3 | build
4 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lmbspecialops"]
2 | path = lmbspecialops
3 | url = https://github.com/lmb-freiburg/lmbspecialops.git
4 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required( VERSION 3.5.1 )
2 |
3 | project(depthmotionnet)
4 |
5 | if( NOT CMAKE_BUILD_TYPE )
6 | set( CMAKE_BUILD_TYPE "Release" CACHE STRING "Build configuration 'Release' or 'Debug'." FORCE )
7 | endif()
8 |
9 | # enable all warnings
10 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
11 |
12 | add_subdirectory( multivih5datareaderop )
13 |
14 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
2 |
3 | RUN apt update && apt install -y python3-pip \
4 | libhdf5-dev \
5 | libopencv-dev \
6 | python3-tk \
7 | cmake \
8 | gcc-4.8 \
9 | g++-4.8 \
10 | cmake \
11 | x11-apps
12 |
13 | RUN python3 -m pip install numpy==1.12.1 \
14 | tensorflow-gpu===1.4.0 \
15 | pillow==2.0.0 \
16 | pyparsing===2.1.4 \
17 | cycler===0.10.0 \
18 | matplotlib===2.1.2
19 | ADD . /home/demon
20 | RUN mkdir /home/demon/lmbspecialops/build
21 | WORKDIR /home/demon/lmbspecialops/build
22 | ENV CC=/usr/bin/gcc-4.8
23 | ENV CXX=/usr/bin/g++-4.8
24 | RUN cmake -DCMAKE_BUILD_TYPE=Release ..
25 | RUN make
26 | ENV PYTHONPATH=/home/demon/lmbspecialops/python
27 | ENV LMBSPECIALOPS_LIB=/home/demon/lmbspecialops/build/lib/lmbspecialops.so
28 | WORKDIR /home/demon/examples
29 | CMD ["python3", "example.py"]
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeMoN: Depth and Motion Network
2 |
3 | [](LICENSE)
4 |
5 | DeMoN is "A computer algorithm for reconstructing a scene from two projections"1.
6 | The network estimates the depth and relative camera motion for pairs of images; it addresses the important two view case in structure from motion.
7 |
8 | 
9 |
10 | If you use this code for research please cite:
11 |
12 | @InProceedings{UZUMIDB17,
13 | author = "B. Ummenhofer and H. Zhou and J. Uhrig and N. Mayer and E. Ilg and A. Dosovitskiy and T. Brox",
14 | title = "DeMoN: Depth and Motion Network for Learning Monocular Stereo",
15 | booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
16 | month = " ",
17 | year = "2017",
18 | url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/UZUMIDB17"
19 | }
20 |
21 | See the [project website](https://lmb.informatik.uni-freiburg.de/people/ummenhof/depthmotionnet) for the paper and other material.
22 |
23 | 1 This is the title of H. C. Longuet-Higgins paper from 1981, which perfectly describes what our method does. DeMoN shows that complex geometric relations can be learnt by a ConvNet.
24 |
25 | ## Requirements
26 |
27 | Building and using requires the following libraries and programs
28 |
29 | tensorflow 1.4.0
30 | cmake 3.7.1
31 | python 3.5
32 | cuda 8.0.61 (required for gpu support)
33 | VTK 7.1 with python3 interface (required for visualizing point clouds)
34 |
35 | The versions match the configuration we have tested on an ubuntu 16.04 system.
36 | DeMoN can work with other versions of the aforementioned dependencies, e.g. tensorflow 1.3, but this is not well tested.
37 |
38 | The binary package from [vtk.org](http://www.vtk.org) does not come with a python3 interface.
39 | To enable python3 support VTK needs to be built from source.
40 | Alternatively, there are also VTK packages with python3 support available in Anaconda via the conda package manager.
41 |
42 | The network also depends on our [lmbspecialops](https://github.com/lmb-freiburg/lmbspecialops) library which is included as a submodule.
43 |
44 |
45 |
46 | ## Build instructions
47 |
48 | The following describes how to install tensorflow and demon into a new virtualenv and run the inference example.
49 | We will use ```pew``` (```pip3 install pew```) to manage a new virtualenv named ```demon_venv``` in the following:
50 |
51 | ```bash
52 | # create virtualenv
53 | pew new demon_venv
54 | ```
55 |
56 | The following commands all run inside the virtualenv:
57 |
58 | ```bash
59 | # install python module dependencies
60 | pip3 install tensorflow-gpu # or 'tensorflow' without gpu support
61 | pip3 install pillow # for reading images
62 | pip3 install matplotlib # required for visualizing depth maps
63 | pip3 install Cython # required for visualizing point clouds
64 | ```
65 |
66 | ```bash
67 | # clone repo with submodules
68 | git clone --recursive https://github.com/lmb-freiburg/demon.git
69 |
70 | # build lmbspecialops
71 | DEMON_DIR=$PWD/demon
72 | mkdir $DEMON_DIR/lmbspecialops/build
73 | cd $DEMON_DIR/lmbspecialops/build
74 | cmake .. # add '-DBUILD_WITH_CUDA=OFF' to build without gpu support
75 | # (optional) run 'ccmake .' here to adjust settings for gpu code generation
76 | make
77 | pew add $DEMON_DIR/lmbspecialops/python # add to python path
78 |
79 | # download weights
80 | cd $DEMON_DIR/weights
81 | ./download_weights.sh
82 |
83 | # run example
84 | cd $DEMON_DIR/examples
85 | python3 example.py # opens a window with the depth map (and the point cloud if vtk is available)
86 | ```
87 |
88 | ## Data reader op & evaluation
89 |
90 | The data reader op and the evaluation code have additional dependencies.
91 | The code for the data reader is in the ```multivih5datareaderop``` directory.
92 | See the corresponding [readme](multivih5datareaderop/README.md) for more details.
93 |
94 | For the evaluation see the example [```examples/evaluation.py```](examples/evaluation.py).
95 | The evaluation code requires the following additional python3 packages, which can be installed with ```pip```:
96 |
97 | ```
98 | h5py
99 | minieigen
100 | pandas
101 | scipy
102 | scikit-image
103 | xarray
104 | ```
105 | Note that the evaluation code also depends on the data reader op.
106 |
107 |
108 | ## Training code
109 |
110 | Instructions for training a clean tensorflow version of DeMoN are [here](training/README.md).
111 | Note that the tensorflow training code and model are work in progress and are not identical to the original Caffe version.
112 |
113 |
114 | ## Datasets
115 |
116 | Download scripts for training and testing are located in the ```datasets``` subdirectory.
117 | Note that due to a bug that some of the dataset files with the prefix ```rgbd``` did contain some samples from the test set.
118 | The affected files have been replaced and now have the prefix ```rgbd_bugfix```.
119 | MD5 checksums for all files can be found in the file ```traindata.md5```.
120 |
121 | ## Docker build
122 | Ensure Docker is installed on your system, and that the default Docker runtime
123 | is Nvidia:
124 |
125 | ```
126 | {
127 | "runtimes": {
128 | "nvidia": {
129 | "path": "/usr/bin/nvidia-container-runtime",
130 | "runtimeArgs": []
131 | }
132 | },
133 | "default-runtime": "nvidia"
134 | }
135 | ```
136 |
137 | Then issue the Docker build command:
138 |
139 | ```
140 | $ docker build . -t demon
141 | ```
142 |
143 | To visualize the example:
144 |
145 | ```
146 | $ docker run --gpus all -it -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix:ro demon
147 | ```
148 |
149 |
150 | ## License
151 |
152 | DeMoN is under the [GNU General Public License v3.0](LICENSE.txt)
153 |
--------------------------------------------------------------------------------
/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.tgz
3 |
--------------------------------------------------------------------------------
/datasets/download_testdata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | clear
3 | cat << EOF
4 |
5 | ================================================================================
6 |
7 |
8 | The test datasets are provided for research purposes only.
9 |
10 | Some of the test datasets build upon other publicly available data.
11 | Make sure to cite the respective original source of the data if you use the
12 | provided files for your research.
13 |
14 | * sun3d_test.h5 is based on the SUN3D dataset http://sun3d.cs.princeton.edu/
15 |
16 | J. Xiao, A. Owens, and A. Torralba, “SUN3D: A Database of Big Spaces Reconstructed Using SfM and Object Labels,” in 2013 IEEE International Conference on Computer Vision (ICCV), 2013, pp. 1625–1632.
17 |
18 |
19 |
20 |
21 | * rgbd_test.h5 is based on the RGBD SLAM benchmark http://vision.in.tum.de/data/datasets/rgbd-dataset (licensed under CC-BY 3.0)
22 |
23 | J. Sturm, N. Engelhard, F. Endres, W. Burgard, and D. Cremers, “A benchmark for the evaluation of RGB-D SLAM systems,” in 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2012, pp. 573–580.
24 |
25 |
26 |
27 | * scenes11_test.h5 uses objects from shapenet https://www.shapenet.org/
28 |
29 | A. X. Chang et al., “ShapeNet: An Information-Rich 3D Model Repository,” arXiv:1512.03012 [cs], Dec. 2015.
30 |
31 |
32 |
33 | * mvs_test.h5 contains scenes from https://colmap.github.io/datasets.html
34 |
35 | J. L. Schönberger and J. M. Frahm, “Structure-from-Motion Revisited,” in 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016, pp. 4104–4113.
36 | J. L. Schönberger, E. Zheng, J.-M. Frahm, and M. Pollefeys, “Pixelwise View Selection for Unstructured Multi-View Stereo,” in Computer Vision – ECCV 2016, 2016, pp. 501–518.
37 |
38 |
39 |
40 | * nyu2_test.h5 is based on http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html
41 |
42 | N. Silberman, D. Hoiem, P. Kohli, and R. Fergus, “Indoor Segmentation and Support Inference from RGBD Images,” in Computer Vision – ECCV 2012, 2012, pp. 746–760.
43 |
44 |
45 | ================================================================================
46 |
47 | type Y to start the download.
48 |
49 | EOF
50 |
51 | read -s -n 1 answer
52 | if [ "$answer" != "Y" -a "$answer" != "y" ]; then
53 | exit 0
54 | fi
55 | echo
56 |
57 | datasets=(sun3d rgbd mvs scenes11 nyu2)
58 |
59 | for ds in ${datasets[@]}; do
60 | if [ -e "${ds}_test.h5" ]; then
61 | echo "${ds}_test.h5 already exists, skipping ${ds}"
62 | else
63 | wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/testdata/${ds}_test.tgz"
64 | tar -xvf "${ds}_test.tgz"
65 | fi
66 | done
67 |
--------------------------------------------------------------------------------
/datasets/download_traindata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | clear
3 | cat << EOF
4 |
5 | ================================================================================
6 |
7 |
8 | The train datasets are provided for research purposes only.
9 |
10 | Some of the test datasets build upon other publicly available data.
11 | Make sure to cite the respective original source of the data if you use the
12 | provided files for your research.
13 |
14 | * sun3d_train.h5 is based on the SUN3D dataset http://sun3d.cs.princeton.edu/
15 |
16 | J. Xiao, A. Owens, and A. Torralba, “SUN3D: A Database of Big Spaces Reconstructed Using SfM and Object Labels,” in 2013 IEEE International Conference on Computer Vision (ICCV), 2013, pp. 1625–1632.
17 |
18 |
19 |
20 |
21 | * rgbd_bugfix_train.h5 is based on the RGBD SLAM benchmark http://vision.in.tum.de/data/datasets/rgbd-dataset (licensed under CC-BY 3.0)
22 |
23 | J. Sturm, N. Engelhard, F. Endres, W. Burgard, and D. Cremers, “A benchmark for the evaluation of RGB-D SLAM systems,” in 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2012, pp. 573–580.
24 |
25 |
26 |
27 | * scenes11_train.h5 uses objects from shapenet https://www.shapenet.org/
28 |
29 | A. X. Chang et al., “ShapeNet: An Information-Rich 3D Model Repository,” arXiv:1512.03012 [cs], Dec. 2015.
30 |
31 |
32 |
33 | * mvs_train.h5 contains the Citywall and Achteck-Turm scenes from MVE (Multi-View Environment) http://www.gcc.tu-darmstadt.de/home/proj/mve/
34 |
35 | S. Fuhrmann, F. Langguth, and M. Goesele, “MVE: A Multi-view Reconstruction Environment,” in Proceedings of the Eurographics Workshop on Graphics and Cultural Heritage, Aire-la-Ville, Switzerland, Switzerland, 2014, pp. 11–18.
36 |
37 |
38 |
39 | ================================================================================
40 |
41 | type Y to start the download.
42 |
43 | EOF
44 |
45 | read -s -n 1 answer
46 | if [ "$answer" != "Y" -a "$answer" != "y" ]; then
47 | exit 0
48 | fi
49 | echo
50 |
51 | datasets=(sun3d rgbd_bugfix mvs scenes11)
52 |
53 | OLD_PWD="$PWD"
54 | DESTINATION=traindata
55 | mkdir $DESTINATION
56 | cd $DESTINATION
57 |
58 | if [ ! -e "README_traindata" ]; then
59 | wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/traindata/README_traindata"
60 | fi
61 |
62 | for ds in ${datasets[@]}; do
63 | if [ -e "${ds}_train.h5" ]; then
64 | echo "${ds}_train.h5 already exists, skipping ${ds}"
65 | else
66 | wget --no-check-certificate "https://lmb.informatik.uni-freiburg.de/data/demon/traindata/${ds}_train.tgz"
67 | tar -xvf "${ds}_train.tgz"
68 | fi
69 | done
70 |
71 | cd "$OLD_PWD"
72 |
--------------------------------------------------------------------------------
/datasets/generate_sun3d_train_datasets.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import os
19 | import sys
20 | import math
21 | import pickle
22 | import argparse
23 | import itertools
24 | import h5py
25 | from multiprocessing import Pool
26 | datasets_dir = os.path.dirname(__file__)
27 | sys.path.insert(0, os.path.join(datasets_dir, '..', 'python'))
28 |
29 | from depthmotionnet.dataset_tools.sun3d_utils import *
30 | from depthmotionnet.dataset_tools.view_tools import *
31 | from depthmotionnet.dataset_tools.view_io import *
32 |
33 |
34 | def create_train_file(outfile, sun3d_data_path, seq_name, baseline_range, seq_sharpness_dict):
35 | """Creates a h5 file with training samples with a specific baseline range
36 |
37 | outfile: str
38 | Output file
39 |
40 | sun3d_data_path: str
41 | The path to the sun3d data directory
42 |
43 | seq_name: str
44 | sequence name
45 |
46 | baseline_range: tuple(float, float)
47 | Minimum and maximum baseline
48 |
49 | seq_sharpness_dict: dict
50 | Dictionary with the sharpness score of all sequences.
51 | key: str with sequence name
52 | value: numpy.ndarray with sharpness scores
53 |
54 | """
55 | created_groups = 0
56 | with h5py.File(outfile,'w') as f:
57 | created_groups += create_samples_from_sequence(f, sun3d_data_path, seq_name, baseline_range, seq_sharpness_dict[seq_name])
58 | return created_groups
59 |
60 |
61 | def merge_h5files(outfile, files):
62 | """Merges multiple h5 files into a new file and removes the input files afterwards.
63 |
64 | outfile: str
65 | Output file
66 |
67 | files: list of str
68 | List of files to merge
69 | """
70 | with h5py.File(outfile,'w') as dst:
71 | for f in files:
72 | print('copy', f, 'to', outfile)
73 | with h5py.File(f,'r') as src:
74 | for group_name in src:
75 | src.copy(source=group_name, dest=dst)
76 | for f in files:
77 | os.remove(f)
78 |
79 |
80 |
81 |
82 | def main():
83 |
84 | print(
85 | """================================================================================
86 |
87 | This script runs for about 1 day on a computer with 16 threads and requires
88 | up to 50GB of disk space in the output directory!
89 |
90 | ================================================================================""")
91 |
92 | parser = argparse.ArgumentParser(description="Generates the sun3d training datasets.")
93 | parser.add_argument("--sun3d_path", type=str, required=True, help="The path to the sun3d data directory")
94 | parser.add_argument("--outputdir", type=str, default='training_data', help="Output directory for the generated h5 files")
95 | parser.add_argument("--threads", type=int, default=16, help="Number of threads")
96 |
97 | args = None
98 | try:
99 | args = parser.parse_args()
100 | print(args)
101 | except:
102 | return 1
103 |
104 | sun3d_data_path = args.sun3d_path
105 | outputdir = args.outputdir
106 | os.makedirs(outputdir, exist_ok=True)
107 | threads = args.threads
108 |
109 | # read txt file with the train sequence names
110 | with open('sun3d_train_sequences.txt', 'r') as f:
111 | sequences = f.read().splitlines()
112 |
113 | # compute the sharpness scores for all sequences and images
114 | if os.path.isfile('sun3d_seq_sharpness_dict.pkl'):
115 | print('Reading sequence sharpness file seq_sharpness_dict.pkl')
116 | with open('sun3d_seq_sharpness_dict.pkl','rb') as f:
117 | seq_sharpness_dict = pickle.load(f)
118 | else:
119 | print('Computing sharpness for all images. This could take a while.')
120 | with Pool(threads) as pool:
121 | args = [(sun3d_data_path, seq,) for seq in sequences]
122 | sequence_sharpness = pool.starmap(compute_sharpness, args, chunksize=1)
123 |
124 | seq_sharpness_dict = dict(zip(sequences, sequence_sharpness))
125 |
126 | with open('sun3d_seq_sharpness_dict.pkl','wb') as f:
127 | pickle.dump(seq_sharpness_dict, f)
128 |
129 |
130 | # baseline ranges from 1cm-10cm to 1.6m-inf
131 | baseline_ranges = [(0.01,0.10), (0.10,0.20), (0.20,0.40), (0.40,0.80), (0.80,1.60), (1.60, float('inf'))]
132 |
133 | with Pool(threads) as pool:
134 |
135 | # create temporary h5 files for each baseline and sequence combination
136 | baseline_range_files_dict = {b:[] for b in baseline_ranges}
137 | args = []
138 | for i, base_range_seq_name in enumerate(itertools.product(baseline_ranges, sequences)):
139 | base_range, seq_name = base_range_seq_name
140 | #print(base_range, seq_name)
141 | outfile = os.path.join(outputdir,"{0}.h5".format(i))
142 | args.append((outfile, sun3d_data_path, seq_name, base_range, seq_sharpness_dict))
143 | baseline_range_files_dict[base_range].append(outfile)
144 |
145 | created_groups = pool.starmap(create_train_file, args, chunksize=1)
146 |
147 | # merge temporary files by creating one file per baseline range
148 | for base_range in baseline_ranges:
149 | outfile = os.path.join(outputdir, 'sun3d_train_{0}m_to_{1}m.h5'.format(*base_range))
150 | merge_h5files(outfile, baseline_range_files_dict[base_range])
151 |
152 |
153 | print('created', sum(created_groups), 'groups')
154 |
155 | return 0
156 |
157 |
158 |
159 |
160 |
161 | if __name__ == "__main__":
162 | sys.exit(main())
163 |
164 |
--------------------------------------------------------------------------------
/datasets/sun3d_train_sequences.txt:
--------------------------------------------------------------------------------
1 | brown_bm_1/brown_bm_1
2 | brown_bm_2/brown_bm_2
3 | brown_bm_3/brown_bm_3
4 | brown_bm_4/brown_bm_4
5 | brown_bm_5/brown_bm_5
6 | brown_bm_6/brown_bm_6
7 | brown_cogsci_1/brown_cogsci_1
8 | brown_cogsci_3/brown_cogsci_3
9 | brown_cogsci_4/brown_cogsci_4
10 | brown_cogsci_5/brown_cogsci_5
11 | brown_cogsci_7/brown_cogsci_7
12 | brown_cs_1/brown_cs1
13 | brown_cs_2/brown_cs2
14 | brown_cs_4/brown_cs4
15 | brown_cs_5/brown_cs5
16 | brown_cs_6/brown_cs6
17 | brown_cs_8/brown_cs8
18 | brown_cs_9/brown_cs9
19 | brown_cs_10/brown_cs10
20 | harvard_computer_lab/hv_c1_1
21 | harvard_computer_lab/hv_c1_2
22 | harvard_c2/hv_c2_1
23 | harvard_c2/hv_c2_2
24 | harvard_c3/hv_c3_1
25 | harvard_c3/hv_c3_2
26 | harvard_c5/hv_c5_1
27 | harvard_c5/hv_c5_2
28 | harvard_c6/hv_c6_1
29 | harvard_c6/hv_c6_2
30 | harvard_c7/hv_c7_1
31 | harvard_c7/hv_c7_2
32 | harvard_c7/hv_c7_3
33 | harvard_c8/hv_c8_1
34 | harvard_c8/hv_c8_2
35 | harvard_c8/hv_c8_3
36 | harvard_c9/hv_c9_1
37 | harvard_c9/hv_c9_2
38 | harvard_c11/hv_c11_1
39 | harvard_c11/hv_c11_2
40 | harvard_conf_big/hv_conf_big_1
41 | harvard_conf_big/hv_conf_big_2
42 | harvard_conf_big/hv_conf_big_3
43 | harvard_dq_office/hv_dq_office_1
44 | harvard_dq_office/hv_dq_office_2
45 | harvard_corridor_lounge/hv_lounge_corridor2_1
46 | harvard_corridor_lounge/hv_lounge_corridor3_whole_floor
47 | harvard_corridor_lounge/hv_corridor1_1
48 | harvard_corridor_lounge/hv_corridor1_2
49 | harvard_corridor_lounge/hv_lounge_corridor_1
50 | harvard_corridor_lounge/hv_lounge2_1
51 | harvard_corridor_lounge/hv_lounge2_2
52 | harvard_corridor_lounge/hv_lounge3_1
53 | harvard_corridor_lounge/hv_lounge3_2
54 | harvard_printroom1/hv_printroom1_1
55 | harvard_printroom1/hv_printroom1_2
56 | harvard_restroom_1/hv_restroom1_1
57 | harvard_restroom_1/hv_restroom1_2
58 | harvard_restroom_2/hv_restroom4_1
59 | harvard_restroom_2/hv_restroom4_2
60 | harvard_tea_1/hv_tea1_1
61 | harvard_tea_1/hv_tea1_2
62 | harvard_tea_3/hv_tea3_1
63 | home_ac/home_ac_scan1_2012_aug_22
64 | home_ac/home_ac_scan2_2012_aug_22
65 | home_ac/home_ac_scan3_2012_aug_24
66 | home_ag/apartment_ag_nov_7_2012_scan1_erika
67 | home_amo/amo-apt-floor1-1
68 | home_amo/floor1v2
69 | home_amo/floor2-1
70 | home_at/home_at_scan1_2013_jan_1
71 | home_bksh/home_bksh_2012_oct_19_scan1
72 | home_bksh/home_bksh_oct_30_2012_scan2_erika
73 | home_han/apartment_han_oct_31_2012_scan1_erika
74 | home_md/home_md_scan1_2012_july_9
75 | home_md/home_md_scan1_lowres_2012_july_14
76 | home_md/home_md_scan2_2012_july_9
77 | home_md/home_md_scan3_2012_july_9
78 | home_md/home_md_scan4_2012_july_29
79 | home_md/home_md_scan5_2012_aug_20
80 | home_md/home_md_scan6_2012_aug_21
81 | home_md/home_md_scan7_2012_aug_27
82 | home_md/home_md_scan8_2012_aug_28
83 | home_md/home_md_scan9_2012_sep_30
84 | home_ph_cz/ph_cz_1
85 | home_ph_cz/ph_cz_2
86 | home_ph_cz/pk_cz_3
87 | home_pt/home_pt_scan1_2012_oct_19
88 | home_puigpunyen/home_puigpunyent_scan1_2012_aug_22
89 | home_puigpunyen/home_puigpunyent_scan2_2012_aug_23
90 | home_puigpunyen/home_puigpunyent_scan3_2012_aug_26
91 | home_puigpunyen/home_puigpunyent_scan4_2012_dec_23
92 | home_rz/home_rz_scan1_2012_oct_31
93 | home_ts/apartment_ts_oct_31_2012_scan1_erika
94 | hotel_barcelona/scan1_2012_july_23
95 | hotel_beijing/beijing_hotel_1
96 | hotel_beijing/beijing_hotel_2
97 | hotel_beijing/beijing_hotel_3
98 | hotel_beijing/beijing_hotel_4
99 | hotel_casarural/casarural_scan1_2012_july_22
100 | hotel_florence_ant/hotel_room_florence_scan1_2012_oct_09
101 | hotel_florence_jx/florence_hotel_another_room
102 | hotel_florence_jx/florence_hotel_kitchen
103 | hotel_florence_jx/myhotel
104 | hotel_graz/scan1_2012_aug_29
105 | hotel_grenoble/scan1_2012_july_11
106 | hotel_grenoble/scan1_2012_july_11_another
107 | hotel_grenoble/scan2_2012_july_11
108 | hotel_hkust/hk_hotel_1
109 | hotel_m/scan1
110 | hotel_mr/scan1
111 | hotel_nips2012/nips_1
112 | hotel_ny/hotel_ny_2012_oct_21_scan1
113 | hotel_pedraza/hotel_room_pedraza_2012_nov_25
114 | hotel_pittsburg/hotel_pittsburg_scan1_2012_dec_12
115 | hotel_sf/scan1
116 | hotel_singapore_2012/singapore_hotel1
117 | hotel_stb/scan1
118 | hotel_uc/scan1
119 | hotel_ucla_ant/hotel_room_ucla_scan1_2012_oct_05
120 | hotel_ucla_jx/ucla_hotel_1
121 | hotel_ucsd/la1
122 | hotel_umd/maryland_hotel1
123 | mit_1_242/classroom_1242_oct_30_2012_scan1_erika
124 | mit_1_375/classroom_1375_nov_6_2012_scan1_erika
125 | mit_13_xh_lab1/xh_lab1_1
126 | mit_13_xh_lab2/xh_lab2_1
127 | mit_26_100/classroom_26100_nov_2_2012_scan1_erika
128 | mit_3_133/classroom_3133_nov_6_2012_scan1_erika
129 | mit_3_270/classroom_3270_oct_30_2012_scan1_erika
130 | mit_3_huge_office/cl_1
131 | mit_32_123/123_1
132 | mit_32_124/124_1
133 | mit_32_144/classroom_32144_nov_2_2012_scan1_erika
134 | mit_32_261/261_1
135 | mit_32_262/262_1
136 | mit_32_397/397_1
137 | mit_32_bar/bar_1
138 | mit_32_bikeroom/bikeroom_1
139 | mit_32_lounge_d429/d4_lounge_1
140 | mit_32_lounge_d4_bill/d4_lounge_vision_1
141 | mit_32_d407/d407-1
142 | mit_32_d428/bs4j179mmv
143 | mit_32_d446/d446_1
144 | mit_32_d451/d451-1
145 | mit_32_d460/d460-1
146 | mit_32_d463/d463_1
147 | mit_32_d466/d466-1
148 | mit_32_d475a/d475a-1
149 | mit_32_d507/d507_1
150 | mit_32_d530/d530_scan_oldest
151 | mit_32_d6_lounge/d6_lounge_1
152 | mit_32_g431/g431_1
153 | mit_32_g442/g442_1
154 | mit_32_g449/g449_1
155 | mit_32_g451/g451_1
156 | mit_32_g460/g460_1
157 | mit_32_g5_lounge/g5_lounge_1
158 | mit_32_g531/g531_1
159 | mit_32_g631/g631_1
160 | mit_32_g660/g660_1
161 | mit_32_g7_lounge/g7_lounge_1
162 | mit_32_g825/g825_1
163 | mit_32_g882/g882_1
164 | mit_32_g9_lounge/g9_lounge_1
165 | mit_32_pingpong/pingpong_1
166 | mit_32_pool/pool_1
167 | mit_34_302/classroom_34302_oct_30_2012_scan1_erika
168 | mit_35_225/classroom_35225_nov_2_2012_scan1_erika
169 | mit_36_144/classroom_36144_nov_2_2012_scan1_erika
170 | mit_36_ti_lab/tian_lab_1
171 | mit_36_ti_lab2/tian_lab_a
172 | mit_36_ti_office/tian_office_1
173 | mit_46_5bathroom/bcs_floor5_bathroom
174 | mit_46_6bathroom/bcs_floor6_restroom
175 | mit_46_4conf_1/bcs_floor4_conf_1
176 | mit_46_4conf_2/bcs_floor4_conf_a
177 | mit_46_5conf_1/bcs_floor5_conf_1
178 | mit_46_5conf_2/bcs_floor5_conf_a
179 | mit_46_5lounge/bcs_floor5_lounge
180 | mit_46_ted_lab1/ted_lab_1
181 | mit_5_234/classroom_5234_nov_6_2012_scan1_erika
182 | mit_6_120/classroom_6120_nov_2_2012_scan1_erika
183 | mit_76_417/76-417a
184 | mit_76_458/76-458a
185 | mit_76_459/76-459a
186 | mit_76_4kitchen/76-4kitchen1
187 | mit_76_4lounge/76-4lounge
188 | mit_76_4lounge/76-4lounge2
189 | mit_76_4printroom/76-4printroom1
190 | mit_76_studyroom/76-1studyroom1
191 | mit_dorm_baker_cj/dorm_baker_cj_oct_29_2012_scan1_erika
192 | mit_dorm_ec_tam/dorm_ec_tam_oct_30_2012_scan1_erika
193 | mit_dorm_masseeh_flr4a/dorm_masseeh_flr4_scan1_oct_26_2012_erika
194 | mit_dorm_masseeh_md/dorm_masseeh_md_scan1_oct_26_2012_erika
195 | mit_dorm_masseeh_sr/dorm_masseeh_sr_scan1_oct_26_2012_erika
196 | mit_dorm_masseeh_wm/dorm_masseeh_wm_oct_29_2012_scan1_erika
197 | mit_dorm_masseeh_xc/dorm_masseeh_xc_oct_29_2012_scan1_erika
198 | mit_dorm_mcc_313/dorm_mcc_313_oct_31_2012_scan1_erika
199 | mit_dorm_mcc_flr1/dorm_mcc_flr1_oct_31_2012_scan1_erika
200 | mit_dorm_mcc_nk/dorm_mcc_nk_oct_31_2012_scan1_erika
201 | mit_dorm_mcc_wflr7/dorm_mcc_wflr7_oct_31_2012_scan1_erika
202 | mit_dorm_mcc_wph/dorm_mcc_wph_oct_31_2012_scan1_erika
203 | mit_dorm_next_536/dorm_next_536_oct_29_2012_scan1_erika
204 | mit_dorm_next_552/dorm_next_552_oct_30_2012_scan1_erika
205 | mit_dorm_next_dn/dorm_next_dn_oct_30_2012_scan1_erika
206 | mit_dorm_next_jc/dorm_next_jc_oct_30_2012_scan1_erika
207 | mit_dorm_next_sj/dorm_next_sj_oct_30_2012_scan1_erika
208 | mit_dorm_sh_basement/dorm_sh_basement_oct_28_2012_scan1_erika
209 | mit_gym_dupont/gym_dupont_nov_1_2012_scan1.erika
210 | mit_gym_tohoku/gym_tohoku_oct_28_2012_scan1_erika
211 | mit_gym_wrestling/gym_wrestling_nov_1_2012_scan1_erika
212 | mit_gym_z_squash/gym_z_squash_scan1_oct_26_2012_erika
213 | mit_dorm3/corridor
214 | mit_lab_16/lab_16_nov_2_2012_scan1_erika
215 | mit_lab_hj/lab_hj_hall5_nov_2_2012_scan1_erika
216 | mit_lab_koch/lab_koch_bench_nov_2_2012_scan1_erika
217 | mit_lab_pdl/lab_pdl_nov_2_2012_scan1_erika
218 | mit_ne47_2corridor/ne47_floor2_corridor
219 | mit_ne47_2biolab/ne47_floor2
220 | mit_ne47_2conf/ne47_floor2_conf_room
221 | mit_ne47_huge_office/ne47_floor2_office_1
222 | mit_ne47_huge_office/ne47_floor2_office_2
223 | mit_w16/kresge_back
224 | mit_w20_athena/sc_athena_oct_29_2012_scan1_erika
225 | mit_w20_flr4/sc_flr4_oct_29_2012_scan1_erika
226 | mit_w59_conference/conference_w59_nov_6_2012_scan1_erika
227 | mit_w85_2floor/2_stair_4
228 | mit_w85_4/4_1
229 | mit_w85_5/5_1
230 | mit_w85_8/8_1
231 | mit_w85_10/10_1
232 | mit_w85_11/11_1_1
233 | mit_w85_12/12_1_1
234 | mit_w85_16/16_1
235 | mit_w85_basement/wg_big_lounge_1
236 | mit_w85_basement/wg_laundary_1
237 | mit_w85_basement/wg_small_lounge_1
238 | mit_w85_lounge1/wg_gym_lounge_1
239 | mit_w85_lounge1/wg_lounge1_1
240 | mit_w85_playroom/westgate_playroom_1
241 | mit_w85a/a2
242 | mit_w85d/d1
243 | mit_w85h/h4_1
244 | mit_w85j/j1
245 | mit_w85k2/k1
246 | mit_w85k1/whole_apartment
247 | providence_station/providence_station
248 |
--------------------------------------------------------------------------------
/datasets/testdata.md5:
--------------------------------------------------------------------------------
1 | f42e0ffd1243274a2fcee1ea5137176c *mvs_test.h5
2 | 25f8f3749341b193ce507889bde122b7 *nyu2_test.h5
3 | 56393d4d56acb189c5b5e698dc8ec0ae *rgbd_test.h5
4 | cf9314cb4229967bb9978e7e900c1a50 *scenes11_test.h5
5 | 8d9d742497b6c3928bcab2f1ff14f32c *sun3d_test.h5
6 | 7b427e0b45c7e8c5a74a3ce8c96de407 *mvs_test.tgz
7 | 6d67223a13a013dff2730612e950d191 *nyu2_test.tgz
8 | c94d735e99fa2c9e8f83d357db96f347 *rgbd_test.tgz
9 | d8d3df17400b7e763c8aaa0f3b81963d *scenes11_test.tgz
10 | 30ee9c8861c4439e04fec7405a4e0c8d *sun3d_test.tgz
11 |
--------------------------------------------------------------------------------
/datasets/traindata/traindata.md5:
--------------------------------------------------------------------------------
1 | d968ca0fa1785d57559469fdcb476121 *mvs_achteck_turm.h5
2 | 34818891c3b6ae66c90badc9a24ed015 *mvs_breisach.h5
3 | b3af67b01725f3cd99d6cebad08857e9 *mvs_citywall.h5
4 | # 2543a4b1a2cc8385d342958b2557d25e *rgbd_10_to_20_3d_train.h5
5 | # ee6c5805c0244b01268e394e33b4e455 *rgbd_10_to_20_handheld_train.h5
6 | a316f02a84abe42ccb6a1d5450ac4bf1 *rgbd_10_to_20_simple_train.h5
7 | # 5be8f60f0e1d9f5afaa3ee0afcb5e3be *rgbd_20_to_inf_3d_train.h5
8 | # 3e6c78142bfd7a92d647abf443e98e3c *rgbd_20_to_inf_handheld_train.h5
9 | 0ee80f4b7450bbaa4df59854b73ca770 *rgbd_20_to_inf_simple_train.h5
10 | 2e07e7b76a4e39254dd046cfe6754fe7 *scenes11_train.h5
11 | ab1190b58350ed3f39b4d5e2966fb45c *sun3d_train_0.01m_to_0.1m.h5
12 | d72b646f474559e7338a65c2199da544 *sun3d_train_0.1m_to_0.2m.h5
13 | d31bbefcffbfd795d2a19193ab61478f *sun3d_train_0.2m_to_0.4m.h5
14 | 8aeaae77e38f6493e52a0a0a7bcac8e3 *sun3d_train_0.4m_to_0.8m.h5
15 | 6e872b0fec5cd5a404e25c034e1c9322 *sun3d_train_0.8m_to_1.6m.h5
16 | 0d306ed5e6e4e4dcb5371b4c0c132e6a *sun3d_train_1.6m_to_infm.h5
17 | a981ec421da35df09225e875ce7531f8 *mvs_train.tgz
18 | # cd11f27d9c5d170030ad46dc1e032cf8 *rgbd_train.tgz
19 | 2cc58de179d11a5fa88f3a67edb0a5d2 *scenes11_train.tgz
20 | a77e1a5159258fadc0216e647fa21f33 *sun3d_train.tgz
21 | 3c8111feb0eebe50b5068c4a71ac6a28 *rgbd_bugfix_10_to_20_3d_train.h5
22 | 2c02bd33e6e5a7550ef5abe8eee1609e *rgbd_bugfix_10_to_20_handheld_train.h5
23 | 286c2b2bee520a427f347e848660636a *rgbd_bugfix_20_to_inf_3d_train.h5
24 | 9a1907607af8d7543b99df33ae4d4733 *rgbd_bugfix_20_to_inf_handheld_train.h5
25 | 3975c81540462d7ba009190381f2d88b *rgbd_bugfix_train.tgz
26 |
--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.json
3 |
4 |
--------------------------------------------------------------------------------
/examples/create_dataset_and_use_readerop.py:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Create a new dataset h5 file that could be used for training
3 | #
4 | import os
5 | import sys
6 | import numpy as np
7 | from PIL import Image
8 | import h5py
9 |
10 | examples_dir = os.path.dirname(__file__)
11 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
12 | from depthmotionnet.dataset_tools import *
13 |
14 |
15 | # intrinsics supported by DeMoN
16 | normalized_intrinsics = [0.89115971, 1.18821287, 0.5, 0.5]
17 |
18 | # unique group name not starting with '.'
19 | group_name = 'sculpture-0001'
20 |
21 | # write a new dataset with a single group and two views
22 | with h5py.File('dataset.h5','w') as f:
23 |
24 | for i in range(2):
25 | img = Image.open('sculpture{0}.png'.format(i+1))
26 | Rt = np.loadtxt('sculpture_Rt{0}.txt'.format(i+1))
27 | depth = np.load('sculpture_depth{0}.npy'.format(i+1))
28 | K = np.eye(3)
29 | K[0,0] = normalized_intrinsics[0] * img.size[0]
30 | K[1,1] = normalized_intrinsics[1] * img.size[1]
31 | K[0,2] = normalized_intrinsics[2] * img.size[0]
32 | K[1,2] = normalized_intrinsics[2] * img.size[1]
33 |
34 | # create a View tuple
35 | view = View(R=Rt[:,:3], t=Rt[:,3], K=K, image=img, depth=depth, depth_metric='camera_z')
36 |
37 | # write view to the h5 file
38 | # view enumeration must start with 0 ('v0')
39 | view_group = f.require_group(group_name+'/frames/t0/v{0}'.format(i))
40 | write_view(view_group, view)
41 |
42 | # write valid image pair combinations to the group t0
43 | viewpoint_pairs = np.array([0, 1, 1, 0], dtype=np.int32)
44 | time_group = f[group_name]['frames/t0']
45 | time_group.attrs['viewpoint_pairs'] = viewpoint_pairs
46 |
47 |
48 | ################################################################################
49 | # Use the reader op to read the created h5 file
50 | #
51 | from depthmotionnet.datareader import *
52 | import json
53 | import tensorflow as tf
54 | from matplotlib import pyplot as plt
55 |
56 |
57 | # keys for the requested output tensors.
58 | # These keys will be passed to the data reader op.
59 | data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
60 |
61 | # the following parameters are just an example and are not optimized for training
62 | reader_params = {
63 | 'batch_size': 1,
64 | 'test_phase': False,
65 | 'builder_threads': 1,
66 | 'inverse_depth': True,
67 | 'motion_format': 'ANGLEAXIS6',
68 | 'norm_trans_scale_depth': True,
69 | # downsampling of image and depth is supported
70 | 'scaled_height': 96,
71 | 'scaled_width': 128,
72 | 'scene_pool_size': 5, # for actual training this should be around 500
73 | 'augment_rot180': 0,
74 | 'augment_mirror_x': 0,
75 | 'top_output': data_tensors_keys, # request data tensors
76 | 'source': [{'path': 'dataset.h5', 'weight': [{'t': 0, 'v': 1.0}]},],
77 | }
78 |
79 | reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
80 | # create a dict to make the distinct data tensors accessible via keys
81 | data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
82 |
83 | gpu_options = tf.GPUOptions()
84 | gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
85 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
86 |
87 | result = session.run(data_dict)
88 |
89 | # show the depth ground truth.
90 | # Note that the data reader op replaces invalid depth values with nan.
91 | plt.imshow(result['DEPTH'].squeeze(), cmap='Greys')
92 | plt.show()
93 |
94 | # visualize the data as point cloud if vtk is available
95 | try:
96 | from depthmotionnet.vis import *
97 | visualize_prediction(
98 | inverse_depth=result['DEPTH'],
99 | image=result['IMAGE_PAIR'][0,0:3],
100 | rotation=result['MOTION'][0,0:3],
101 | translation=result['MOTION'][0,3:])
102 | except ImportError as err:
103 | print("Cannot visualize as pointcloud.", err)
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/examples/evaluation.py:
--------------------------------------------------------------------------------
1 | #
2 | # This script computes the depth and motion errors for the network predictions.
3 | #
4 | # Note that numbers are not identical to the values reported in the paper, due
5 | # to implementation differences between the caffe and tensorflow version.
6 | #
7 | # Running this script requires about 4gb of disk space.
8 | #
9 | # This script expects the test datasets in the folder ../datasets
10 | # Use the provided script in ../datasets for downloading the data.
11 | #
12 | import os
13 | import sys
14 | import json
15 | import h5py
16 | import xarray
17 | import numpy as np
18 | import lmbspecialops as sops
19 | import tensorflow as tf
20 |
21 | examples_dir = os.path.dirname(__file__)
22 | weights_dir = os.path.join(examples_dir,'..','weights')
23 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
24 |
25 | from depthmotionnet.datareader import *
26 | from depthmotionnet.networks_original import *
27 | from depthmotionnet.helpers import convert_NCHW_to_NHWC, convert_NHWC_to_NCHW
28 | from depthmotionnet.evaluation import *
29 |
30 |
31 | def create_ground_truth_file(dataset, dataset_dir):
32 | """Creates a hdf5 file with the ground truth test data
33 |
34 | dataset: str
35 | name of the dataset
36 | dataset_dir: str
37 | path to the directory containing the datasets
38 |
39 | Returns the path to the created file
40 | """
41 | ds = dataset
42 | # destination file
43 | ground_truth_file = '{0}_ground_truth.h5'.format(ds)
44 |
45 | if os.path.isfile(ground_truth_file):
46 | return ground_truth_file # skip existing files
47 |
48 | print('creating {0}'.format(ground_truth_file))
49 |
50 | # data types requested from the reader op
51 | data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
52 |
53 | reader_params = {
54 | 'batch_size': 1,
55 | 'test_phase': True, # deactivates randomization
56 | 'builder_threads': 1, # must be 1 in test phase
57 | 'inverse_depth': True,
58 | 'motion_format': 'ANGLEAXIS6',
59 | # True is also possible here. If set to True we store ground truth with
60 | # precomputed normalization. False keeps the original information.
61 | 'norm_trans_scale_depth': False,
62 | # original data resolution
63 | 'scaled_height': 480,
64 | 'scaled_width': 640,
65 | 'scene_pool_size': 5,
66 | # no augmentation
67 | 'augment_rot180': 0,
68 | 'augment_mirror_x': 0,
69 | 'top_output': data_tensors_keys,
70 | 'source': [{'path': os.path.join(dataset_dir,'{0}_test.h5'.format(ds))}],
71 | }
72 |
73 | reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
74 |
75 | # create a dict to make the distinct data tensors accessible via keys
76 | data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
77 | info_tensor = reader_tensors[0]
78 | sample_ids_tensor = reader_tensors[1]
79 | rotation_tensor, translation_tensor = tf.split(data_dict['MOTION'], 2, axis=1)
80 |
81 | flow_tensor = sops.depth_to_flow(data_dict['DEPTH'], data_dict['INTRINSICS'], rotation_tensor, translation_tensor, inverse_depth=True, normalize_flow=True)
82 |
83 | gpu_options = tf.GPUOptions()
84 | gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
85 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
86 |
87 |
88 | fetch_dict = {'INFO': info_tensor, 'SAMPLE_IDS': sample_ids_tensor, 'FLOW': flow_tensor}
89 | fetch_dict.update(data_dict)
90 |
91 | with h5py.File(ground_truth_file) as f:
92 |
93 | number_of_test_iterations = 1 # will be set to the correct value in the while loop
94 | iteration = 0
95 | while iteration < number_of_test_iterations:
96 |
97 | data = session.run(fetch_dict)
98 |
99 | # get number of iterations from the info vector
100 | number_of_test_iterations = int(data['INFO'][0])
101 |
102 | # write ground truth data to the file
103 | group = f.require_group(str(iteration))
104 | group['image_pair'] = data['IMAGE_PAIR'][0]
105 | group['depth'] = data['DEPTH'][0]
106 | group['motion'] = data['MOTION'][0]
107 | group['flow'] = data['FLOW'][0]
108 | group['intrinsics'] = data['INTRINSICS'][0]
109 |
110 | # save sample id as attribute of the group.
111 | # the evaluation code will use this to check if prediction and ground truth match.
112 | sample_id = (''.join(map(chr, data['SAMPLE_IDS']))).strip()
113 | group.attrs['sample_id'] = np.string_(sample_id)
114 | iteration += 1
115 |
116 | del session
117 | tf.reset_default_graph()
118 | return ground_truth_file
119 |
120 |
121 |
122 | def create_prediction_file(dataset, dataset_dir):
123 | """Creates a hdf5 file with the predictions
124 |
125 | dataset: str
126 | name of the dataset
127 | dataset_dir: str
128 | path to the directory containing the datasets
129 |
130 | Returns the path to the created file
131 | """
132 |
133 | if tf.test.is_gpu_available(True):
134 | data_format='channels_first'
135 | else: # running on cpu requires channels_last data format
136 | data_format='channels_last'
137 | print('Using data_format "{0}"'.format(data_format))
138 |
139 | ds = dataset
140 | # destination file
141 | prediction_file = '{0}_prediction.h5'.format(ds)
142 |
143 | # data types requested from the reader op
144 | data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS')
145 |
146 | reader_params = {
147 | 'batch_size': 1,
148 | 'test_phase': True, # deactivates randomization
149 | 'builder_threads': 1, # must be 1 in test phase
150 | 'inverse_depth': True,
151 | 'motion_format': 'ANGLEAXIS6',
152 | 'norm_trans_scale_depth': True,
153 | # inpu resolution for demon
154 | 'scaled_height': 192,
155 | 'scaled_width': 256,
156 | 'scene_pool_size': 5,
157 | # no augmentation
158 | 'augment_rot180': 0,
159 | 'augment_mirror_x': 0,
160 | 'top_output': data_tensors_keys,
161 | 'source': [{'path': os.path.join(dataset_dir,'{0}_test.h5'.format(ds))}],
162 | }
163 |
164 | reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params))
165 |
166 | # create a dict to make the distinct data tensors accessible via keys
167 | data_dict = dict(zip(data_tensors_keys,reader_tensors[2]))
168 | info_tensor = reader_tensors[0]
169 | sample_ids_tensor = reader_tensors[1]
170 | image1, image2 = tf.split(data_dict['IMAGE_PAIR'],2,axis=1)
171 |
172 | # downsample second image
173 | image2_2 = sops.median3x3_downsample(sops.median3x3_downsample(image2))
174 |
175 | gpu_options = tf.GPUOptions()
176 | gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes
177 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
178 |
179 |
180 | # init networks
181 | bootstrap_net = BootstrapNet(session, data_format)
182 | iterative_net = IterativeNet(session, data_format)
183 | refine_net = RefinementNet(session, data_format)
184 |
185 | session.run(tf.global_variables_initializer())
186 |
187 | # load weights
188 | saver = tf.train.Saver()
189 | saver.restore(session,os.path.join(weights_dir,'demon_original'))
190 |
191 | fetch_dict = {
192 | 'INFO': info_tensor,
193 | 'SAMPLE_IDS': sample_ids_tensor,
194 | 'image1': image1,
195 | 'image2_2': image2_2,
196 | }
197 | fetch_dict.update(data_dict)
198 |
199 | if data_format == 'channels_last':
200 | for k in ('image1', 'image2_2', 'IMAGE_PAIR',):
201 | fetch_dict[k] = convert_NCHW_to_NHWC(fetch_dict[k])
202 |
203 | with h5py.File(prediction_file, 'w') as f:
204 |
205 | number_of_test_iterations = 1 # will be set to the correct value in the while loop
206 | test_iteration = 0
207 | while test_iteration < number_of_test_iterations:
208 |
209 | data = session.run(fetch_dict)
210 |
211 | # get number of iterations from the info vector
212 | number_of_test_iterations = int(data['INFO'][0])
213 |
214 | # create group for the current test sample and save the sample id.
215 | group = f.require_group('snapshot_1/{0}'.format(test_iteration))
216 | sample_id = (''.join(map(chr, data['SAMPLE_IDS']))).strip()
217 | group.attrs['sample_id'] = np.string_(sample_id)
218 |
219 | # save intrinsics
220 | group['intrinsics'] = data['INTRINSICS']
221 |
222 | # run the network and save outputs for each network iteration 'i'.
223 | # iteration 0 corresponds to the bootstrap network.
224 | # we also store the refined depth for each iteration.
225 | for i in range(4):
226 | if i == 0:
227 | result = bootstrap_net.eval(data['IMAGE_PAIR'], data['image2_2'])
228 | else:
229 | result = iterative_net.eval(
230 | data['IMAGE_PAIR'],
231 | data['image2_2'],
232 | result['predict_depth2'],
233 | result['predict_normal2'],
234 | result['predict_rotation'],
235 | result['predict_translation']
236 | )
237 | # write predictions
238 | if data_format == 'channels_last':
239 | group['predicted_flow/{0}'.format(i)] = result['predict_flow2'][0].transpose([2,0,1])
240 | group['predicted_depth/{0}'.format(i)] = result['predict_depth2'][0,:,:,0]
241 | else:
242 | group['predicted_flow/{0}'.format(i)] = result['predict_flow2'][0]
243 | group['predicted_depth/{0}'.format(i)] = result['predict_depth2'][0,0]
244 |
245 | predict_motion = np.concatenate((result['predict_rotation'],result['predict_translation']),axis=1)
246 | group['predicted_motion/{0}'.format(i)] = predict_motion[0]
247 |
248 | # run refinement network
249 | result_refined = refine_net.eval(data['image1'],result['predict_depth2'])
250 |
251 | # write refined depth prediction
252 | if data_format == 'channels_last':
253 | group['predicted_depth/{0}_refined'.format(i)] = result_refined['predict_depth0'][0,:,:,0]
254 | else:
255 | group['predicted_depth/{0}_refined'.format(i)] = result_refined['predict_depth0'][0,0]
256 |
257 | test_iteration += 1
258 |
259 | del session
260 | tf.reset_default_graph()
261 | return prediction_file
262 |
263 | def main():
264 |
265 | # list the test datasets names for evaluation
266 | datasets = ('mvs', 'scenes11', 'rgbd', 'sun3d', 'nyu2')
267 | dataset_dir = os.path.join('..', 'datasets')
268 |
269 |
270 |
271 | # creating the ground truth and prediction files requires about 11gb of disk space
272 | for dataset in datasets:
273 | gt_file = create_ground_truth_file(dataset, dataset_dir)
274 |
275 | print('creating predictions for', dataset)
276 | pr_file = create_prediction_file(dataset, dataset_dir)
277 |
278 | # compute errors
279 | # the evaluate function expects the path to a prediction and the corresponding
280 | # ground truth file.
281 | print('computing errors for', dataset)
282 |
283 | # compute errors for comparison with single image depth methods
284 | eval_result = evaluate(pr_file, gt_file, depthmask=False, eigen_crop_gt_and_pred=True)
285 | # save evaluation results to disk
286 | write_xarray_json(eval_result, '{0}_eval_crop_allpix.json'.format(dataset))
287 |
288 | if dataset != 'nyu2':
289 | # depthmask=True will compute depth errors only for pixels visible in both images.
290 | eval_result = evaluate(pr_file, gt_file, depthmask=True)
291 | # save evaluation results to disk
292 | write_xarray_json(eval_result, '{0}_eval.json'.format(dataset))
293 |
294 |
295 |
296 | # print errors
297 | for dataset in datasets:
298 |
299 | # In the following eval_result is a 5D array with the following dimensions:
300 | # - snapshots: stores results of different network training states
301 | # - iteration: network iterations '0' stores the result of the bootstrap network.
302 | # '3' stores the results after bootstrap + 3 times iterative network.
303 | # '3_refined' stores the result after the refinement network.
304 | # - sample: the sample number.
305 | # - errors: stores the different error metrics.
306 | # - scaled: is a boolean dimension used for storing errors after optimal scaling
307 | # the prediction with a scalar factor. This was meant as an alternative
308 | # to scale invariant error measures. Just set this to False and ignore.
309 | #
310 | # The following prints the error metrics as used in the paper.
311 |
312 | depth_errors = ['depth_l1_inverse','depth_scale_invariant','depth_abs_relative']
313 | motion_errors = ['rot_err','tran_angle_err']
314 | print('======================================')
315 | print('dataset: ', dataset)
316 | if dataset != 'nyu2':
317 | eval_result = read_xarray_json('{0}_eval.json'.format(dataset))
318 | print(' depth', eval_result[0].loc['3_refined',:,depth_errors,False].mean('sample').to_pandas().to_string())
319 | print(' motion', eval_result[0].loc['3',:,motion_errors,False].mean('sample').to_pandas().to_string())
320 | eval_result = read_xarray_json('{0}_eval_crop_allpix.json'.format(dataset))
321 | print(' depth cropped+all pixels', eval_result[0].loc['3_refined',:,['depth_scale_invariant'],False].mean('sample').to_pandas().to_string())
322 |
323 |
324 | if __name__ == "__main__":
325 | main()
326 |
327 |
328 |
--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from PIL import Image
4 | from matplotlib import pyplot as plt
5 | import os
6 | import sys
7 |
8 | examples_dir = os.path.dirname(__file__)
9 | weights_dir = os.path.join(examples_dir,'..','weights')
10 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
11 |
12 | from depthmotionnet.networks_original import *
13 |
14 |
15 | def prepare_input_data(img1, img2, data_format):
16 | """Creates the arrays used as input from the two images."""
17 | # scale images if necessary
18 | if img1.size[0] != 256 or img1.size[1] != 192:
19 | img1 = img1.resize((256,192))
20 | if img2.size[0] != 256 or img2.size[1] != 192:
21 | img2 = img2.resize((256,192))
22 | img2_2 = img2.resize((64,48))
23 |
24 | # transform range from [0,255] to [-0.5,0.5]
25 | img1_arr = np.array(img1).astype(np.float32)/255 -0.5
26 | img2_arr = np.array(img2).astype(np.float32)/255 -0.5
27 | img2_2_arr = np.array(img2_2).astype(np.float32)/255 -0.5
28 |
29 | if data_format == 'channels_first':
30 | img1_arr = img1_arr.transpose([2,0,1])
31 | img2_arr = img2_arr.transpose([2,0,1])
32 | img2_2_arr = img2_2_arr.transpose([2,0,1])
33 | image_pair = np.concatenate((img1_arr,img2_arr), axis=0)
34 | else:
35 | image_pair = np.concatenate((img1_arr,img2_arr),axis=-1)
36 |
37 | result = {
38 | 'image_pair': image_pair[np.newaxis,:],
39 | 'image1': img1_arr[np.newaxis,:], # first image
40 | 'image2_2': img2_2_arr[np.newaxis,:], # second image with (w=64,h=48)
41 | }
42 | return result
43 |
44 |
45 | if tf.test.is_gpu_available(True):
46 | data_format='channels_first'
47 | else: # running on cpu requires channels_last data format
48 | data_format='channels_last'
49 |
50 | #
51 | # DeMoN has been trained for specific internal camera parameters.
52 | #
53 | # If you use your own images try to adapt the intrinsics by cropping
54 | # to match the following normalized intrinsics:
55 | #
56 | # K = (0.89115971 0 0.5)
57 | # (0 1.18821287 0.5)
58 | # (0 0 1 ),
59 | # where K(1,1), K(2,2) are the focal lengths for x and y direction.
60 | # and (K(1,3), K(2,3)) is the principal point.
61 | # The parameters are normalized such that the image height and width is 1.
62 | #
63 |
64 | # read data
65 | img1 = Image.open(os.path.join(examples_dir,'sculpture1.png'))
66 | img2 = Image.open(os.path.join(examples_dir,'sculpture2.png'))
67 |
68 | input_data = prepare_input_data(img1,img2,data_format)
69 |
70 | gpu_options = tf.GPUOptions()
71 | gpu_options.per_process_gpu_memory_fraction=0.8
72 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
73 |
74 | # init networks
75 | bootstrap_net = BootstrapNet(session, data_format)
76 | iterative_net = IterativeNet(session, data_format)
77 | refine_net = RefinementNet(session, data_format)
78 |
79 | session.run(tf.global_variables_initializer())
80 |
81 | # load weights
82 | saver = tf.train.Saver()
83 | saver.restore(session,os.path.join(weights_dir,'demon_original'))
84 |
85 |
86 | # run the network
87 | result = bootstrap_net.eval(input_data['image_pair'], input_data['image2_2'])
88 | for i in range(3):
89 | result = iterative_net.eval(
90 | input_data['image_pair'],
91 | input_data['image2_2'],
92 | result['predict_depth2'],
93 | result['predict_normal2'],
94 | result['predict_rotation'],
95 | result['predict_translation']
96 | )
97 | rotation = result['predict_rotation']
98 | translation = result['predict_translation']
99 | result = refine_net.eval(input_data['image1'],result['predict_depth2'])
100 |
101 |
102 | plt.imshow(result['predict_depth0'].squeeze(), cmap='Greys')
103 | plt.show()
104 |
105 | # try to visualize the point cloud
106 | try:
107 | from depthmotionnet.vis import *
108 | visualize_prediction(
109 | inverse_depth=result['predict_depth0'],
110 | image=input_data['image_pair'][0,0:3] if data_format=='channels_first' else input_data['image_pair'].transpose([0,3,1,2])[0,0:3],
111 | rotation=rotation,
112 | translation=translation)
113 | except ImportError as err:
114 | print("Cannot visualize as pointcloud.", err)
115 |
116 |
--------------------------------------------------------------------------------
/examples/example_v2.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from PIL import Image
4 | from matplotlib import pyplot as plt
5 | import os
6 | import sys
7 | import argparse
8 |
9 | examples_dir = os.path.dirname(__file__)
10 | weights_dir = os.path.join(examples_dir,'..','weights')
11 | sys.path.insert(0, os.path.join(examples_dir, '..', 'python'))
12 |
13 | from depthmotionnet.v2.networks import *
14 |
15 | parser = argparse.ArgumentParser(description="Runs the v2 network on the example image pair.")
16 | parser.add_argument("--checkpoint", type=str, required=True, help="Path to the checkpoint without the file extension")
17 | args = parser.parse_args()
18 |
19 |
20 | def prepare_input_data(img1, img2, data_format):
21 | """Creates the arrays used as input from the two images."""
22 | # scale images if necessary
23 | if img1.size[0] != 256 or img1.size[1] != 192:
24 | img1 = img1.resize((256,192))
25 | if img2.size[0] != 256 or img2.size[1] != 192:
26 | img2 = img2.resize((256,192))
27 | img2_2 = img2.resize((64,48))
28 |
29 | # transform range from [0,255] to [-0.5,0.5]
30 | img1_arr = np.array(img1).astype(np.float32)/255 -0.5
31 | img2_arr = np.array(img2).astype(np.float32)/255 -0.5
32 | img2_2_arr = np.array(img2_2).astype(np.float32)/255 -0.5
33 |
34 | if data_format == 'channels_first':
35 | img1_arr = img1_arr.transpose([2,0,1])
36 | img2_arr = img2_arr.transpose([2,0,1])
37 | img2_2_arr = img2_2_arr.transpose([2,0,1])
38 | image_pair = np.concatenate((img1_arr,img2_arr), axis=0)
39 | else:
40 | image_pair = np.concatenate((img1_arr,img2_arr),axis=-1)
41 |
42 | result = {
43 | 'image_pair': image_pair[np.newaxis,:],
44 | 'image1': img1_arr[np.newaxis,:], # first image
45 | 'image2_2': img2_2_arr[np.newaxis,:], # second image with (w=64,h=48)
46 | }
47 | return result
48 |
49 |
50 | if tf.test.is_gpu_available(True):
51 | data_format='channels_first'
52 | else: # running on cpu requires channels_last data format
53 | print('Running this example requires a GPU')
54 | sys.exit(1)
55 |
56 | #
57 | # DeMoN has been trained for specific internal camera parameters.
58 | #
59 | # If you use your own images try to adapt the intrinsics by cropping
60 | # to match the following normalized intrinsics:
61 | #
62 | # K = (0.89115971 0 0.5)
63 | # (0 1.18821287 0.5)
64 | # (0 0 1 ),
65 | # where K(1,1), K(2,2) are the focal lengths for x and y direction.
66 | # and (K(1,3), K(2,3)) is the principal point.
67 | # The parameters are normalized such that the image height and width is 1.
68 | #
69 |
70 | # read data
71 | img1 = Image.open(os.path.join(examples_dir,'sculpture1.png'))
72 | img2 = Image.open(os.path.join(examples_dir,'sculpture2.png'))
73 |
74 | input_data = prepare_input_data(img1,img2,data_format)
75 |
76 | gpu_options = tf.GPUOptions()
77 | gpu_options.per_process_gpu_memory_fraction=0.8
78 | session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
79 |
80 | # init networks
81 | bootstrap_net = BootstrapNet(session)
82 | iterative_net = IterativeNet(session)
83 | refine_net = RefinementNet(session)
84 |
85 | session.run(tf.global_variables_initializer())
86 |
87 | # load weights
88 | saver = tf.train.Saver()
89 | saver.restore(session, args.checkpoint)
90 |
91 |
92 | # run the network
93 | result = bootstrap_net.eval(input_data['image_pair'], input_data['image2_2'])
94 | for i in range(3):
95 | result = iterative_net.eval(
96 | input_data['image_pair'],
97 | input_data['image2_2'],
98 | result['predict_depth2'],
99 | result['predict_normal2'],
100 | result['predict_rotation'],
101 | result['predict_translation']
102 | )
103 | rotation = result['predict_rotation']
104 | translation = result['predict_translation']
105 | result = refine_net.eval(input_data['image1'],result['predict_depth2'], result['predict_normal2'])
106 |
107 |
108 | plt.imshow(result['predict_depth0'].squeeze(), cmap='Greys')
109 | plt.show()
110 |
111 | # try to visualize the point cloud
112 | try:
113 | from depthmotionnet.vis import *
114 | visualize_prediction(
115 | inverse_depth=result['predict_depth0'],
116 | image=input_data['image_pair'][0,0:3] if data_format=='channels_first' else input_data['image_pair'].transpose([0,3,1,2])[0,0:3],
117 | rotation=rotation,
118 | translation=translation)
119 | except ImportError as err:
120 | print("Cannot visualize as pointcloud.", err)
121 |
122 |
--------------------------------------------------------------------------------
/examples/sculpture1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture1.png
--------------------------------------------------------------------------------
/examples/sculpture2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture2.png
--------------------------------------------------------------------------------
/examples/sculpture_Rt1.txt:
--------------------------------------------------------------------------------
1 | 1 0 0 0
2 | 0 1 0 0
3 | 0 0 1 0
4 |
--------------------------------------------------------------------------------
/examples/sculpture_Rt2.txt:
--------------------------------------------------------------------------------
1 | 9.634469799073742680e-01 -7.059393257190359205e-02 2.584306746332428184e-01 -9.577744007110595703e-01
2 | 6.105031979073534398e-02 9.971296798444659881e-01 4.478012982028589661e-02 -1.465892046689987183e-01
3 | -2.608501013241291688e-01 -2.736600550511965324e-02 9.649913607809581517e-01 2.473454177379608154e-01
4 |
--------------------------------------------------------------------------------
/examples/sculpture_depth1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture_depth1.npy
--------------------------------------------------------------------------------
/examples/sculpture_depth2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/examples/sculpture_depth2.npy
--------------------------------------------------------------------------------
/multivih5datareaderop/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | find_package( HDF5 REQUIRED )
2 | find_package( OpenCV QUIET COMPONENTS core highgui imgproc)
3 | find_package( OpenMP REQUIRED )
4 | set(Python_ADDITIONAL_VERSIONS 3)
5 | find_package( PythonInterp REQUIRED )
6 |
7 | include(ExternalProject)
8 |
9 | #
10 | # Download some not so common dependencies
11 | #
12 |
13 | # webp
14 | ExternalProject_Add(
15 | webp
16 | PREFIX "${CMAKE_BINARY_DIR}/webp"
17 | URL "http://downloads.webmproject.org/releases/webp/libwebp-0.5.1.tar.gz"
18 | # do not update
19 | UPDATE_COMMAND ""
20 | CONFIGURE_COMMAND CFLAGS=-fPIC ../webp/configure --disable-gl --disable-png --disable-jpeg --disable-tiff --disable-gif --disable-wic
21 | BUILD_COMMAND "make"
22 | # do not install
23 | INSTALL_COMMAND ""
24 | )
25 | ExternalProject_Get_Property( webp SOURCE_DIR BINARY_DIR )
26 | set( webp_INCLUDE_DIR "${SOURCE_DIR}/src" )
27 | set( webp_LIBRARY "${BINARY_DIR}/src/.libs/libwebp.a" )
28 |
29 |
30 | # lz4
31 | ExternalProject_Add(
32 | lz4
33 | PREFIX "${CMAKE_BINARY_DIR}/lz4"
34 | URL "https://github.com/lz4/lz4/archive/v1.7.5.tar.gz"
35 | # do not update
36 | UPDATE_COMMAND ""
37 | # copy SOURCE_DIR CMakeLists.txt
38 | PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/multivih5datareaderop/lz4_cmakelists.txt /CMakeLists.txt
39 | CMAKE_CACHE_ARGS "-DLZ4_BUNDLED_MODE:string=off" "-DBUILD_STATIC_LIBS:string=on" "-DBUILD_SHARED_LIBS:string=on"
40 | # do not install
41 | INSTALL_COMMAND ""
42 | )
43 | ExternalProject_Get_Property( lz4 SOURCE_DIR BINARY_DIR )
44 | set( lz4_INCLUDE_DIR "${SOURCE_DIR}/lib" )
45 | set( lz4_STATIC_LIB "${BINARY_DIR}/contrib/cmake_unofficial/liblz4.a" )
46 |
47 |
48 | # json for modern c++
49 | message( STATUS "Downloading json" )
50 | file( DOWNLOAD "https://github.com/nlohmann/json/releases/download/v2.1.1/json.hpp"
51 | "${CMAKE_BINARY_DIR}/json_for_modern_cpp/json.hpp" SHOW_PROGRESS )
52 | set( json_for_modern_cpp_INCLUDE_DIR "${CMAKE_BINARY_DIR}/json_for_modern_cpp/" )
53 |
54 |
55 | # half
56 | message( STATUS "Downloading half" )
57 | file( DOWNLOAD "https://sourceforge.net/p/half/code/HEAD/tree/tags/release-1.12.0/include/half.hpp?format=raw"
58 | "${CMAKE_BINARY_DIR}/half/include/half.hpp" SHOW_PROGRESS )
59 | set( half_INCLUDE_DIR "${CMAKE_BINARY_DIR}/half/include" )
60 |
61 |
62 | # retrieve tensorflow include dir and lib dir
63 | execute_process( COMMAND ${PYTHON_EXECUTABLE} "-c" "from __future__ import print_function; import tensorflow as tf; print(tf.sysconfig.get_include(), end='')"
64 | OUTPUT_VARIABLE TENSORFLOW_INCLUDE_DIR )
65 | execute_process( COMMAND ${PYTHON_EXECUTABLE} "-c" "from __future__ import print_function; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='')"
66 | OUTPUT_VARIABLE TENSORFLOW_LIB_DIR )
67 | find_library( TENSORFLOW_FRAMEWORK_LIB tensorflow_framework PATHS "${TENSORFLOW_LIB_DIR}" NO_DEFAULT_PATH )
68 | message( STATUS "${TENSORFLOW_INCLUDE_DIR}" )
69 | message( STATUS "${TENSORFLOW_LIB_DIR}" )
70 | message( STATUS "${TENSORFLOW_FRAMEWORK_LIB}" )
71 |
72 | file( GLOB multivih5datareaderop_SOURCES *.cpp *.cc )
73 |
74 |
75 | add_library( multivih5datareaderop SHARED ${multivih5datareaderop_SOURCES} )
76 | # do not add 'lib' prefix
77 | set_target_properties( multivih5datareaderop PROPERTIES PREFIX "" )
78 | set_target_properties( multivih5datareaderop PROPERTIES DEBUG_POSTFIX "_debug" )
79 | set_target_properties( multivih5datareaderop PROPERTIES COMPILE_FLAGS "-std=c++11 ${OpenMP_CXX_FLAGS} -Wabi-tag" )
80 | set_target_properties( multivih5datareaderop PROPERTIES LINK_FLAGS "${OpenMP_CXX_FLAGS}" )
81 | # use old ABI with gcc 5.x
82 | set_target_properties( multivih5datareaderop PROPERTIES COMPILE_DEFINITIONS "_GLIBCXX_USE_CXX11_ABI=0" )
83 | add_dependencies( multivih5datareaderop lz4 webp )
84 |
85 | target_include_directories( multivih5datareaderop PRIVATE
86 | ${TENSORFLOW_INCLUDE_DIR}
87 | ${TENSORFLOW_INCLUDE_DIR}/external/nsync/public/
88 | ${lz4_INCLUDE_DIR}
89 | ${webp_INCLUDE_DIR}
90 | ${HDF5_INCLUDE_DIRS}
91 | ${OpenCV_INCLUDE_DIRS}
92 | ${json_for_modern_cpp_INCLUDE_DIR}
93 | ${half_INCLUDE_DIR}
94 | )
95 |
96 | target_link_libraries( multivih5datareaderop
97 | ${lz4_STATIC_LIB}
98 | ${webp_LIBRARY}
99 | ${HDF5_LIBRARIES}
100 | ${OpenCV_LIBS}
101 | )
102 | if( TENSORFLOW_FRAMEWORK_LIB )
103 | target_link_libraries( multivih5datareaderop ${TENSORFLOW_FRAMEWORK_LIB} )
104 | endif()
105 |
106 |
--------------------------------------------------------------------------------
/multivih5datareaderop/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Multi View H5 Data Reader
3 |
4 | This document describes the op and the data format used for training DeMoN.
5 |
6 | ## Building the op
7 |
8 | To build the op, create a ```build``` directory inside the demon root directory.
9 | The location of the ```build``` directory is important, because the python package
10 | ```depthmotionnet.datareader``` will search this path for the data reader library.
11 |
12 | Then run cmake inside the build folder to configure and generate the build
13 | files.
14 | If you use a virtualenv make sure to activate it before running cmake.
15 |
16 | Assuming the virtualenv is managed with ```pew``` and named ```demon_venv```
17 | and the demon root directory is stored in the variable ```DEMON_DIR``` we can
18 | build the data reader op with:
19 | ```bash
20 | cd $DEMON_DIR # change to the demon root directory
21 |
22 | mkdir build
23 | cd build
24 |
25 | pew in demon_venv
26 | cmake ..
27 | make
28 | ```
29 |
30 | ### Dependencies
31 | The op depends on the following libraries:
32 | ```
33 | cmake 3.5.1
34 | tensorflow 1.0.0
35 | hdf5 1.8.16
36 | OpenCV 3.2.0
37 | ```
38 | The versions match the configuration we have tested on an ubuntu 16.04 system.
39 |
40 | In addition, the cmake build script will download and build ```lz4```, ```webp```, [```json```](https://github.com/nlohmann/json) and [```half```](http://half.sourceforge.net/)
41 |
42 |
43 | ## `multi_vi_h5_data_reader` Op
44 |
45 | ```multi_vi_h5_data_reader(num_outputs, param_json)```
46 |
47 | Reads data in the multi view h5 format.
48 |
49 | This op reads one or more hdf5 files and generates data samples.
50 | The data is returned in NCHW format.
51 |
52 | #### Args
53 |
54 | **num_outputs**: The number of data tensors to return. This number depends on the
55 | values passed in `param_json`.
56 |
57 | **param_json**:
58 | The parameters passed to the reader in JSON format as a string.
59 | It is recommended to create a python dict with all parameters first and then convert
60 | the dict to str with json.dumps().
61 | Here is an example python dict with comments and good values for training:
62 |
63 | ```python
64 | {
65 | 'batch_size': 32, # the batch size
66 | 'test_phase': False, # If True enables testing mode which disables randomization.
67 |
68 | # the number of threads used for building batches. For testing set this to 1.
69 | 'builder_threads': 4,
70 |
71 | 'inverse_depth': True, # return depth with inverse depth values (1/z)
72 |
73 | # return the motion as one of 'ANGLEAXIS6', 'ANGLEAXIS7' 'QUATERNION', 'FMATRIX'.
74 | # The translation is stored always in the last 3 elements.
75 | #
76 | # ANGLEAXIS6: uses 3 elements for the rotation as angle axis [aa0, aa1, aa2, tx, ty, tz]
77 | # ANGLEAXIS7: uses 4 elements for the rotation as angle axis [angle, ax, ay, az, tx, ty, tz]
78 | # QUATERNION: uses 4 elements for the rotation as quaternion [qw, qx, qy, qz, tx, ty, tz]
79 | # FMATRIX: returns a fundamental matrix in column major order without the last element
80 | # which is defined as 1. [f11, f21, f31, f12, f22, f32, f13, f23]
81 | 'motion_format': 'ANGLEAXIS6',
82 |
83 | # if True normalized the translation ||t||=1 and scales the depth values accordingly.
84 | 'norm_trans_scale_depth': True,
85 |
86 | # the output image/depth height and width.
87 | # Downsampling is supported.
88 | 'scaled_height': 192,
89 | 'scaled_width': 256,
90 |
91 | # the number of scenes to keep in memory. A bigger pool improves variance when
92 | # generating a new batch item, but requires more main memory.
93 | # For testing a small value like 5 is sufficient.
94 | 'scene_pool_size': 500,
95 |
96 | # The requested output tensors.
97 | 'top_output': ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS'),
98 |
99 | # probabilities for geometric augmentations.
100 | # There is a 50% change of rotating the image and cameras by 180 deg followed
101 | # by a 50% change of mirroring the x-axis.
102 | # Set this to 0 for testing.
103 | 'augment_rot180': 0.5,
104 | 'augment_mirror_x': 0.5,
105 |
106 | # source is a list of dicts, which define the paths to the hdf5 files and the
107 | # importance of each file.
108 | # In the example below the reader will sample from data2.h5 twice as often as
109 | # from data1.h5.
110 | 'source': [
111 | {'path': '/path/to/data1.h5', 'weight': [{'t': 0, 'v': 1.0}]},
112 | {'path': '/path/to/data2.h5', 'weight': [{'t': 0, 'v': 2.0}]},
113 | ],
114 | # for testing only 1 source must be used. Multiple files can be concatenated with ';'.
115 | #'source': [ {'path': '/path/to/test1.h5;/path/to/test2.h5'}, ],
116 | }
117 | ```
118 |
119 | #### Outputs
120 |
121 | **info**: The info tensor stores information about the internal buffers.
122 | It stores the following information:
123 | - required number of test iterations
124 | - current batch buffer size
125 | - maximum batch buffer size
126 | - current reader buffer size
127 | - maximum reader buffer size
128 |
129 | **sample_id**: A tensor storing a string with the id for each batch item.
130 | A newline symbol is used to separate the individual id strings.
131 |
132 | **output**: A list of tensors with the requested data.
133 |
134 | The order of tensors is always:
135 | ['IMAGE_PAIR', 'MOTION', 'FLOW', 'DEPTH', 'INTRINSICS', 'DEPTHMASKS','SAMPLE_IDS'].
136 | Depending on the 'top_output' parameter in 'param_json' not all tensors
137 | may be present.
138 |
139 | The 'IMAGE_PAIR' tensor stores the image pair as 6 channel RGBRGB image.
140 |
141 | The 'MOTION' tensor stores the motion from the first to the second camera in
142 | the requested format specified by the 'motion_format' parameter in 'param_json'.
143 |
144 | The 'FLOW' tensor stores the optical flow from the first to the second image
145 | with 2 channels. The first channel stores the x component of the flow vector.
146 |
147 | The 'DEPTH' tensor stores the depth map for the first image.
148 |
149 | The 'INTRINSICS' tensor stores the normalized intrinsics as [fx fy cx cy].
150 | fx,fy is the x and y component of the normalized focal length.
151 | cx,cy is the x and y component of the normalized principal point.
152 |
153 | The 'DEPTHMASKS' tensor masks point where it is possible to compute a depth value.
154 |
155 |
156 |
157 | **See also the example [```examples/create_dataset_and_use_readerop.py```](../examples/create_dataset_and_use_readerop.py) for using this op in the examples folder.**
158 |
159 |
160 |
161 | ## HDF5 Data Format
162 |
163 | Datasets are stored as objects in HDF5 files.
164 | To minimize data IO, we group images that show the same scene.
165 | A valid group with a unique name "group" stores the following datasets:
166 |
167 | ```
168 | /group/frames/t0/v0/image
169 | /group/frames/t0/v0/depth
170 | /group/frames/t0/v0/camera
171 | /group/frames/t0/v1/image
172 | /group/frames/t0/v1/depth
173 | /group/frames/t0/v1/camera
174 | ...
175 | ```
176 |
177 | `t0/v0` means viewpoint 0 at time 0. The time is always `t0`. The number of
178 | viewpoints must always be >= 2.
179 | For test datasets the number of viewpoints is always 2.
180 |
181 |
182 | ### Reserved groups
183 | All groups starting with a '.' e.g. `/.config` are reserved and are not treated as data samples.
184 |
185 | ### `image` dataset
186 |
187 | Images are stored in webp format as 1D char arrays.
188 |
189 | Attributes:
190 | * format : scalar string attribute with value `"webp"`
191 |
192 |
193 |
194 |
195 | ### `depth` dataset
196 |
197 | Depth maps are stored as half precision floats (16-bit) with LZ4 compression.
198 |
199 | Attributes:
200 |
201 | * format : scalar string attribute with value `"lz4half"`
202 | * depth_metric : scalar string attribute with value `"camera_z"` or `"ray_length"`
203 | * extents : 1D int array with [height, width]
204 |
205 |
206 |
207 | ### `camera` dataset
208 |
209 | The camera dataset stores the intrinsic and extrinsic parameters for the viewpoint.
210 | Camera parameters are stored as 1D double data sets.
211 |
212 | Attributes:
213 |
214 | * format : scalar string attribute with value `"pinhole"`
215 |
216 | Interpretation:
217 |
218 | `[fx fy skew cx cy r11 r21 r31 r12 r22 r32 r13 r23 r33 tx ty tz]`
219 |
220 | The internal parameters fx, fy, cx, cy are compatible with the image dimensions of the image data set
221 |
222 |
223 | ### `t0` group
224 |
225 | The time group `t0` stores an attribute `viewpoint_pairs` which enumerates all
226 | valid image pair combinations.
227 |
228 | Attribute:
229 | * viewpoint_pairs : 1D int vector. Used by the multiviewh5datareader to generate image pairs.
230 | Two subsequent values describe a pair. E.g. the vector `[0 1 0 2]` describes the pairs (0,1) and (0,2).
231 |
232 | For test datasets the value of the `viewpoint_pairs` attribute must be `[0 1]`.
233 |
234 |
235 |
236 | **See also the [```examples/create_dataset_and_use_readerop.py```](../examples/create_dataset_and_use_readerop.py) in the examples folder.**
237 |
--------------------------------------------------------------------------------
/multivih5datareaderop/lz4_cmakelists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required( VERSION 3.5.1 )
2 | add_subdirectory( contrib/cmake_unofficial )
3 |
--------------------------------------------------------------------------------
/multivih5datareaderop/simpleh5file.h:
--------------------------------------------------------------------------------
1 | //
2 | // DeMoN - Depth Motion Network
3 | // Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | //
5 | // This program is free software: you can redistribute it and/or modify
6 | // it under the terms of the GNU General Public License as published by
7 | // the Free Software Foundation, either version 3 of the License, or
8 | // (at your option) any later version.
9 | //
10 | // This program is distributed in the hope that it will be useful,
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | // GNU General Public License for more details.
14 | //
15 | // You should have received a copy of the GNU General Public License
16 | // along with this program. If not, see .
17 | //
18 | #ifndef SIMPLEH5FILE_H_
19 | #define SIMPLEH5FILE_H_
20 | #include
21 | #include
22 | #include
23 |
24 |
25 | /*!
26 | * This class provides basic functions to manipulate hdf5 files
27 | */
28 | class SimpleH5File
29 | {
30 | public:
31 | //! File modes
32 | enum FileMode { TRUNCATE, //!< Create or overwrite file
33 | READ, //!< Read only file access
34 | READ_WRITE //!< Read write access
35 | };
36 | //! Compression settings
37 | enum Compression { UNCOMPRESSED, /*GZIP_1, GZIP_2,...*/ };
38 |
39 | /*!
40 | * ctor.
41 | * \param use_locking If true uses mutexes to allow accessing the object
42 | * from multiple threads.
43 | */
44 | SimpleH5File(bool use_locking=false);
45 |
46 |
47 | /*!
48 | * Creates a SimpleH5File object and opens the specified hdf5 file.
49 | *
50 | * \param filename Filename of the hdf5 file. E.g. 'myfile.h5'
51 | * \param mode The mode for opening the file.
52 | * \param use_locking If true uses mutexes to allow accessing the object
53 | * from multiple threads.
54 | */
55 | SimpleH5File( const std::string& filename, FileMode mode=READ, bool use_locking=false );
56 |
57 |
58 | /*!
59 | * dtor. Closes the opened file.
60 | */
61 | virtual ~SimpleH5File();
62 |
63 |
64 | /*!
65 | * Opens the specified hdf5 file. If a hdf5 file was already opened it is
66 | * closed before opening the new file.
67 | *
68 | * \param filename Filename of the hdf5 file. E.g. 'myfile.h5'
69 | * \param mode The mode for opening the file.
70 | */
71 | void open( const std::string& filename, FileMode mode=READ );
72 |
73 |
74 | /*!
75 | * Returns whether a file is open.
76 | */
77 | bool isOpen() const;
78 |
79 |
80 | /*!
81 | * Returns whether the object uses locking to allow multiple thread using
82 | * this object simultaneously.
83 | */
84 | bool useLocking() const;
85 |
86 |
87 | /*!
88 | * Closes the file. Has no effect if no file is open.
89 | */
90 | void close();
91 |
92 |
93 | /*!
94 | * Creates a new group and creates parent directories if necessary.
95 | *
96 | * \param path Path of the new group. E.g. '/group1/group2/newGroup'
97 | * creates 'newGroup' and 'group1', 'group2' if they dont exist.
98 | */
99 | void makeGroup( const std::string& path );
100 |
101 |
102 | /*!
103 | * Removes a group or dataset
104 | *
105 | * \param path The path of the group or dataset to be removed.
106 | * E.g. '/group/mydataset' removes 'mydataset'
107 | */
108 | void remove( const std::string& path );
109 |
110 |
111 | /*!
112 | * Returns whether the object with the specified path is a group.
113 | */
114 | bool isGroup( const std::string& path );
115 |
116 |
117 | /*!
118 | * Returns whether the object with the specified path is a dataset.
119 | */
120 | bool isDataset( const std::string& path );
121 |
122 | /*!
123 | * Returns true if the native type and the dataset type match.
124 | * If 'path' is not a dataset then false is returned.
125 | */
126 | template
127 | bool datasetDataType( const std::string& path );
128 |
129 | /*!
130 | * Returns whether the path points to a dataset or group
131 | */
132 | bool exists( const std::string& path );
133 |
134 |
135 | /*!
136 | * Lists all objects (datasets and groups) with the parent specified by path.
137 | *
138 | * \param path E.g. '/' lists all groups and datasets of the root group.
139 | */
140 | std::vector listObjects( const std::string& path );
141 |
142 |
143 | /*!
144 | * Lists all datasets with the parent specified by path.
145 | *
146 | * \param path E.g. '/' lists all datasets of the root group.
147 | */
148 | std::vector listDatasets( const std::string& path );
149 |
150 |
151 | /*!
152 | * Lists all groups with the parent specified by path.
153 | *
154 | * \param path E.g. '/' lists all groups of the root group.
155 | */
156 | std::vector listGroups( const std::string& path );
157 |
158 |
159 | /*!
160 | * Lists all attributes of a dataset or group specified by path.
161 | *
162 | * \param path E.g. '/mydataset' lists all attributes of 'mydataset'.
163 | */
164 | std::vector listAttributes( const std::string& path );
165 |
166 |
167 |
168 | /*!
169 | * Writes a dataset. Any existing dataset will be overwritten.
170 | * This command will also create parent groups if necessary.
171 | *
172 | * \param data Pointer to the data
173 | * \param dims Dimensions of the dataset to write. The extent of each
174 | * dimension is defined in elements.
175 | * \param path Path to the dataset e.g. '/group/dataset'.
176 | * \param compress Reserved for future use to specify the compression filter
177 | */
178 | template
179 | void writeDataset( const T* data, const std::vector& dims,
180 | const std::string& path,
181 | Compression compress = UNCOMPRESSED );
182 |
183 | /*!
184 | * Reads the dataset to the given buffer.
185 | *
186 | * \param data The buffer for reading the dataset. The buffer must be
187 | * allocated by the user. Use getDatasetExtents() to retrieve
188 | * the size of the dataset.
189 | * \param path Path to the dataset e.g. '/group/dataset'.
190 | */
191 | template
192 | void readDataset( T* data, const std::string& path );
193 |
194 |
195 | /*!
196 | * Returns the byte offset of the dataset in the file and the number
197 | * of elements.
198 | *
199 | * \param path Path to the dataset e.g. '/group/dataset'.
200 | * \return Returns the byte offset of the dataset in the file and the number
201 | * of elements
202 | */
203 | std::pair getDatasetOffsetAndSize( const std::string& path );
204 |
205 |
206 | /*!
207 | * Returns whether the dataset is contiguous or not.
208 | *
209 | * \param path Path to the dataset e.g. '/group/dataset'.
210 | * \return Returns true if the dataset is contiguous
211 | */
212 | bool isDatasetContiguous( const std::string& path );
213 |
214 |
215 | /*!
216 | * Returns the extents of the dataset.
217 | *
218 | * \param path Path to the dataset e.g. '/group/dataset'.
219 | * \return Returns a vector containing the extents. The size of the vector
220 | * corresponds to the number of dimensions of the dataset
221 | */
222 | std::vector getDatasetExtents( const std::string& path );
223 |
224 |
225 | /*!
226 | * Writes an attribute. An attribute is attached to a group or a dataset.
227 | * Overwrites existing attributes.
228 | *
229 | * \param value The value of the attribute.
230 | * \param attr_name The name of the attribute e.g. 'my_int_attribute'
231 | * \param path The path of the group or dataset e.g. '/group'
232 | */
233 | template
234 | void writeAttribute( const T& value,
235 | const std::string& attr_name, const std::string& path );
236 |
237 |
238 | //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
239 | template
240 | void writeAttribute( const std::vector& value,
241 | const std::string& attr_name, const std::string& path );
242 |
243 | //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
244 | void writeAttribute( const char str[],
245 | const std::string& attr_name, const std::string& path );
246 |
247 | //! \sa writeAttribute(const T& value, const std::string&, const std::string&)
248 | void writeAttribute( const std::string& str,
249 | const std::string& attr_name, const std::string& path );
250 |
251 |
252 | /*!
253 | * Reads an attribute. An attribute is attached to a group or a dataset.
254 | *
255 | * \param value The value that is written to the attribute
256 | * \param attr_name The name of the attribute e.g. 'my_int_attribute'
257 | * \param path The path of the group or dataset e.g. '/group'
258 | */
259 | template
260 | void readAttribute( T& value,
261 | const std::string& attr_name, const std::string& path );
262 |
263 | //! \sa readAttribute(T& value, const std::string&, const std::string&)
264 | template
265 | void readAttribute( std::vector& value,
266 | const std::string& attr_name, const std::string& path );
267 |
268 | //! \sa readAttribute(T& value, const std::string&, const std::string&)
269 | void readAttribute( std::string& str,
270 | const std::string& attr_name, const std::string& path );
271 |
272 | /*!
273 | * Removes an attribute.
274 | *
275 | * \param attr_name Name of the attribute.
276 | * \param path The path of the group or dataset e.g. '/group'
277 | */
278 | void removeAttribute( const std::string& attr_name, const std::string& path );
279 |
280 |
281 | /*!
282 | * Checks the existence of an attribute.
283 | *
284 | * \param attr_name Name of the attribute.
285 | * \param path The path of the group or dataset e.g. '/group'
286 | * \return Returns true if the attribute exists.
287 | */
288 | bool existsAttribute( const std::string& attr_name, const std::string& path );
289 |
290 |
291 | /*!
292 | * Returns the H5O_info_t struct for the object with the specified path.
293 | *
294 | * \param path Path to an object (group or dataset) e.g. '/mydataset'
295 | * \return The H5O_info_t struct of the object.
296 | */
297 | H5O_info_t getObjectInfo( const std::string& path );
298 |
299 |
300 | /*!
301 | * Checks if a file is a hdf5 file
302 | *
303 | * \param filename path to the file
304 | * \return Returns true if the file is a hdf5 file.
305 | * Returns false if the file is not a hdf5 file.
306 | * Returns false if the file does not exist or reading fails.
307 | */
308 | static bool isHDF5( const std::string& filename );
309 |
310 |
311 | /*!
312 | * Simplifies a hdf5 path. This function removes leading and trailing
313 | * whitespaces and removes rendundant multiple '/'.
314 | *
315 | * \return The simplified path.
316 | */
317 | static std::string simplifyPath( const std::string& path );
318 |
319 | protected:
320 |
321 | FileMode mode;
322 |
323 | hid_t file_id; //! hdf5 file identifier
324 |
325 | private:
326 | SimpleH5File( const SimpleH5File& other ):use_locking(false) {}
327 | SimpleH5File& operator=( const SimpleH5File& other ) { return *this; }
328 |
329 |
330 | /*!
331 | * Creates a dataset. This command will also create parent groups if
332 | * necessary.
333 | *
334 | * \param dataset_path Path to the dataset e.g. '/group/dataset'.
335 | * \param dims Dimensions of the dataset to write. The extent of
336 | * each dimension is defined in elements.
337 | * \param compress Reserved for future use to specify the compression
338 | * filter
339 | */
340 | template
341 | void createDataset( const std::string& dataset_path,
342 | const std::vector& dims,
343 | Compression compress = UNCOMPRESSED );
344 |
345 | bool is_open;
346 | const bool use_locking;
347 |
348 |
349 | };
350 |
351 |
352 |
353 |
354 |
355 | #endif /* SIMPLEH5FILE_H_ */
356 |
--------------------------------------------------------------------------------
/python/depthmotionnet/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 |
--------------------------------------------------------------------------------
/python/depthmotionnet/datareader/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import os
19 | import tensorflow
20 | from .helpers import *
21 |
22 | # try to import the multivih5datareaderop from the 'build' directory
23 | if 'MULTIVIH5DATAREADEROP_LIB' in os.environ:
24 | _readerlib_path = os.environ['MULTIVIH5DATAREADEROP_LIB']
25 | else:
26 | _readerlib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','multivih5datareaderop', 'multivih5datareaderop.so'))
27 |
28 | readerlib = None
29 | multi_vi_h5_data_reader = None
30 | if os.path.isfile(_readerlib_path):
31 | readerlib = tensorflow.load_op_library(_readerlib_path)
32 | print('Using {0}'.format(_readerlib_path))
33 | multi_vi_h5_data_reader = readerlib.multi_vi_h5_data_reader
34 |
35 |
--------------------------------------------------------------------------------
/python/depthmotionnet/datareader/helpers.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 |
19 | def add_sources(params, dataset_files, weight, normalize=True, concatenate=False):
20 | """Add sources to the parameters for the multi_vi_h5_data_reader op.
21 |
22 | params: dict
23 | dict with the parameters for the multi_vi_h5_data_reader op.
24 |
25 | dataset_files: list of str
26 | List of h5 file paths to be added as sources.
27 |
28 | weight: float
29 | The sampling importance.
30 | Higher values means the reader op samples more often from these files.
31 |
32 | normalize: bool
33 | If True the weight for each file will be divided by the number of files.
34 | If concatenate is True this parameter has no effect.
35 |
36 | concatenate: bool
37 | If True adds only a single source that contains all files.
38 |
39 | """
40 | if not 'source' in params:
41 | params['source'] = []
42 |
43 | if concatenate:
44 | # generate a single source with all paths
45 | source = {'path': ';'.join(dataset_files)}
46 | params['source'].append(source)
47 |
48 | else:
49 | # generate for each path a new source
50 | for item in dataset_files:
51 | w = weight
52 | if normalize:
53 | w /= len(dataset_files)
54 |
55 | source = {'path': item, 'weight': w}
56 | params['source'].append(source)
57 |
58 | return params
59 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from .view import View
19 | from .view_io import *
20 | from .view_tools import *
21 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/helpers.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import numpy as np
19 | from PIL import Image
20 | from scipy.ndimage.filters import laplace
21 |
22 |
23 | def measure_sharpness(img):
24 | """Measures the sharpeness of an image using the variance of the laplacian
25 |
26 | img: PIL.Image
27 |
28 | Returns the variance of the laplacian. Higher values mean a sharper image
29 | """
30 | img_gray = np.array(img.convert('L'), dtype=np.float32)
31 | return np.var(laplace(img_gray))
32 |
33 |
34 | def concat_images_vertical(images):
35 | """Concatenates a list of PIL.Image in vertical direction
36 |
37 | images: list of PIL.Image
38 |
39 | Returns the concatenated image
40 | """
41 | total_height = 0
42 | total_width = 0
43 | for img in images:
44 | total_width = max(total_width, img.size[0])
45 | total_height += img.size[1]
46 | result = Image.new('RGB',(total_width,total_height))
47 | ypos = 0
48 | for img in images:
49 | result.paste(img,(0,ypos))
50 | ypos += img.size[1]
51 | return result
52 |
53 |
54 | def concat_images_horizontal(images):
55 | """Concatenates a list of PIL.Image in horizontal direction
56 |
57 | images: list of PIL.Image
58 |
59 | Returns the concatenated image
60 | """
61 | total_height = 0
62 | total_width = 0
63 | for img in images:
64 | total_height = max(total_height, img.size[1])
65 | total_width += img.size[0]
66 | result = Image.new('RGB',(total_width,total_height))
67 | xpos = 0
68 | for img in images:
69 | result.paste(img,(xpos,0))
70 | xpos += img.size[0]
71 | return result
72 |
73 |
74 | def safe_crop_image(image, box, fill_value):
75 | """crops an image and adds a border if necessary
76 |
77 | image: PIL.Image
78 |
79 | box: 4 tuple
80 | (x0,y0,x1,y1) tuple
81 |
82 | fill_value: color value, scalar or tuple
83 |
84 | Returns the cropped image
85 | """
86 | x0, y0, x1, y1 = box
87 | if x0 >=0 and y0 >= 0 and x1 < image.width and y1 < image.height:
88 | return image.crop(box)
89 | else:
90 | crop_width = x1-x0
91 | crop_height = y1-y0
92 | tmp = Image.new(image.mode, (crop_width, crop_height), fill_value)
93 | safe_box = (
94 | max(0,min(x0,image.width-1)),
95 | max(0,min(y0,image.height-1)),
96 | max(0,min(x1,image.width)),
97 | max(0,min(y1,image.height)),
98 | )
99 | img_crop = image.crop(safe_box)
100 | x = -x0 if x0 < 0 else 0
101 | y = -y0 if y0 < 0 else 0
102 | tmp.paste(image, (x,y))
103 | return tmp
104 |
105 |
106 | def safe_crop_array2d(arr, box, fill_value):
107 | """crops an array and adds a border if necessary
108 |
109 | arr: numpy.ndarray with 2 dims
110 |
111 | box: 4 tuple
112 | (x0,y0,x1,y1) tuple. x is the column and y is the row!
113 |
114 | fill_value: scalar
115 |
116 | Returns the cropped array
117 | """
118 | x0, y0, x1, y1 = box
119 | if x0 >=0 and y0 >= 0 and x1 < arr.shape[1] and y1 < arr.shape[0]:
120 | return arr[y0:y1,x0:x1]
121 | else:
122 | crop_width = x1-x0
123 | crop_height = y1-y0
124 | tmp = np.full((crop_height, crop_width), fill_value, dtype=arr.dtype)
125 | safe_box = (
126 | max(0,min(x0,arr.shape[1]-1)),
127 | max(0,min(y0,arr.shape[0]-1)),
128 | max(0,min(x1,arr.shape[1])),
129 | max(0,min(y1,arr.shape[0])),
130 | )
131 | x = -x0 if x0 < 0 else 0
132 | y = -y0 if y0 < 0 else 0
133 | safe_width = safe_box[2]-safe_box[0]
134 | safe_height = safe_box[3]-safe_box[1]
135 | tmp[y:y+safe_height,x:x+safe_width] = arr[safe_box[1]:safe_box[3],safe_box[0]:safe_box[2]]
136 | return tmp
137 |
138 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/lz4.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from ctypes import *
19 | import os
20 |
21 | # try the version used by the multivih5datareaderop first
22 | try:
23 | _lib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','lz4','src','lz4-build','contrib', 'cmake_unofficial', 'liblz4.so'))
24 | liblz4 = CDLL(_lib_path)
25 | except:
26 | # try system version
27 | try:
28 | liblz4 = CDLL('liblz4.so')
29 | except:
30 | raise RuntimeError('Cannot load liblz4.so')
31 |
32 |
33 | def lz4_uncompress(input_data, expected_decompressed_size):
34 | """decompresses the LZ4 compressed data
35 |
36 | input_data: bytes
37 | byte string of the input data
38 |
39 | expected_decompressed_size: int
40 | size of the decompressed output data
41 |
42 | returns the decompressed data as bytes or None on error
43 | """
44 | assert isinstance(input_data,bytes), "input_data must be of type bytes"
45 | assert isinstance(expected_decompressed_size,int), "expected_decompressed_size must be of type int"
46 |
47 | dst_buf = create_string_buffer(expected_decompressed_size)
48 | status = liblz4.LZ4_decompress_safe(input_data,dst_buf,len(input_data),expected_decompressed_size)
49 | if status != expected_decompressed_size:
50 | return None
51 | else:
52 | return dst_buf.raw
53 |
54 |
55 |
56 | def lz4_compress_bound(input_size):
57 | """Returns the maximum size needed for compressing data with the given input_size"""
58 | assert isinstance(input_size,int), "input_size must be of type int"
59 |
60 | result = liblz4.LZ4_compressBound(c_int(input_size))
61 | return result
62 |
63 |
64 |
65 | def lz4_compress_HC(src):
66 | """Compresses the input bytes with LZ4 high compression algorithm.
67 |
68 | Returns the compressed bytes array or an empty array on error
69 | """
70 | assert isinstance(src,bytes), "src must be of type bytes"
71 | max_compressed_size = lz4_compress_bound(len(src))
72 | dst_buf = create_string_buffer(max_compressed_size)
73 | # written_size = liblz4.LZ4_compress_HC(src, dst_buf, len(src), max_compressed_size, c_int(0)) # new signature. TODO update liblz4
74 | written_size = liblz4.LZ4_compressHC(src, dst_buf, len(src))
75 | return dst_buf.raw[:written_size]
76 |
77 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/sun3d_utils.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import os
19 | import math
20 | import itertools
21 | import h5py
22 | import numpy as np
23 | from PIL import Image
24 | from scipy.ndimage import maximum_filter1d, minimum_filter1d
25 |
26 | from .view import View
27 | from .view_io import *
28 | from .view_tools import *
29 | from .helpers import measure_sharpness
30 |
31 |
32 | def read_frameid_timestamp(files):
33 | """Get frameids and timestamps from the sun3d filenames
34 |
35 | files: list of str
36 | a list of the filenames
37 |
38 | Returns the frameid and timestamp as numpy.array
39 | """
40 | frameids = []
41 | timestamps = []
42 | for f in files:
43 | id_timestamp = f[:-4].split('-')
44 | frameids.append( int(id_timestamp[0]) )
45 | timestamps.append( int(id_timestamp[1]) )
46 | return np.asarray(frameids), np.asarray(timestamps)
47 |
48 |
49 | def read_image(filename):
50 | """Read image from a file
51 |
52 | filename: str
53 |
54 | Returns image as PIL Image
55 | """
56 | image = Image.open(filename)
57 | image.load()
58 | return image
59 |
60 | def read_depth(filename):
61 | """Read depth from a sun3d depth file
62 |
63 | filename: str
64 |
65 | Returns depth as np.float32 array
66 | """
67 | depth_pil = Image.open(filename)
68 | depth_arr = np.array(depth_pil)
69 | depth_uint16 = depth_arr.astype(np.uint16)
70 | depth_shifted = (depth_uint16 >> 3) | (depth_uint16 << 13)
71 | depth_float = (depth_shifted/1000).astype(np.float32)
72 | return depth_float
73 |
74 | def read_Rt(extrinsics, frame):
75 | """Read camera extrinsics at certain frame
76 |
77 | extrinsics: np array with size (totalframe*3, 4)
78 |
79 | frame: int (starts from 0)
80 |
81 | Returns the rotation and translation
82 | """
83 | Rt = extrinsics[3*frame:3*frame+3]
84 | R_arr = Rt[0:3,0:3]
85 | t_arr = Rt[0:3,3]
86 | R = R_arr.transpose()
87 | t = -np.dot(R,t_arr)
88 | return R, t
89 |
90 |
91 | def compute_sharpness(sun3d_data_path, seq_name):
92 | """Returns a numpy array with the sharpness score of all images in the sequence.
93 |
94 | sun3d_data_path: str
95 | base path to the sun3d data
96 |
97 | seq_name: str
98 | the name of the sequence e.g. "mit_32_d463/d463_1"
99 |
100 | """
101 | seq_path = os.path.join(sun3d_data_path,seq_name)
102 | image_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'image'))) if f.endswith('.jpg')]
103 |
104 | sharpness = []
105 | for img_file in image_files:
106 | img = read_image(os.path.join(seq_path,'image',img_file))
107 | sharpness.append(measure_sharpness(img))
108 |
109 | return np.asarray(sharpness)
110 |
111 |
112 | def create_samples_from_sequence(h5file, sun3d_data_path, seq_name, baseline_range, sharpness, sharpness_window=30, max_views_num=10):
113 | """Read a sun3d sequence and write samples to the h5file
114 |
115 | h5file: h5py.File handle
116 |
117 | sun3d_data_path: str
118 | base path to the sun3d data
119 |
120 | seq_name: str
121 | the name of the sequence e.g. "mit_32_d463/d463_1"
122 |
123 | baseline_range: tuple(float,float)
124 | The allowed baseline range
125 |
126 | sharpness: numpy.ndarray 1D
127 | Array with the sharpness score for each image
128 |
129 | sharpness_window: int
130 | Window for detecting sharp images
131 |
132 | Returns the number of generated groups
133 | """
134 | generated_groups = 0
135 | seq_path = os.path.join(sun3d_data_path,seq_name)
136 | group_prefix = seq_name.replace('/','.')
137 | if not os.path.exists(os.path.join(seq_path, 'extrinsics')):
138 | return 0
139 |
140 | # file list
141 | image_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'image'))) if f.endswith('.jpg')]
142 | depth_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'depthTSDF'))) if f.endswith('.png')]
143 | extrinsics_files = [f for f in sorted(os.listdir(os.path.join(seq_path,'extrinsics'))) if f.endswith('.txt')]
144 |
145 | # read intrinsics
146 | intrinsics = np.loadtxt(os.path.join(seq_path,'intrinsics.txt'))
147 |
148 | # read extrinsics params
149 | extrinsics = np.loadtxt(os.path.join(seq_path,'extrinsics',extrinsics_files[-1]))
150 |
151 | # read time stamp
152 | img_ids, img_timestamps = read_frameid_timestamp(image_files)
153 | _, depth_timestamps = read_frameid_timestamp(depth_files)
154 |
155 | # find a depth for each image
156 | idx_img2depth = []
157 | for img_timestamp in img_timestamps:
158 | idx_img2depth.append(np.argmin(abs(depth_timestamps[:] - img_timestamp)))
159 |
160 |
161 | # find sharp images with nonmaximum suppression
162 | assert sharpness.size == len(image_files)
163 | sharpness_maxfilter = maximum_filter1d(np.asarray(sharpness), size=sharpness_window, mode='constant', cval=0)
164 | sharp_images_index = np.where( sharpness == sharpness_maxfilter )[0]
165 |
166 | used_views = set()
167 | for i1, frame_idx1 in enumerate(sharp_images_index):
168 | if i1 in used_views:
169 | continue
170 |
171 | R1, t1 = read_Rt(extrinsics, frame_idx1)
172 | i2 = i1+1
173 |
174 | depth_file = os.path.join(seq_path,'depthTSDF', depth_files[idx_img2depth[frame_idx1]])
175 | depth1 = read_depth(depth_file)
176 |
177 | if np.count_nonzero(np.isfinite(depth1) & (depth1 > 0)) < 0.5*depth1.size:
178 | continue
179 |
180 | image1 = read_image(os.path.join(seq_path,'image',image_files[frame_idx1]))
181 | view1 = View(R=R1, t=t1, K=intrinsics, image=image1, depth=depth1, depth_metric='camera_z')
182 |
183 | views = [view1]
184 | used_views.add(i1)
185 |
186 | for i2 in range(i1+1, sharp_images_index.size):
187 | frame_idx2 = sharp_images_index[i2]
188 | R2, t2 = read_Rt(extrinsics, frame_idx2)
189 | baseline = np.linalg.norm( (-R1.transpose().dot(t1)) - (-R2.transpose().dot(t2))) # unit is meters
190 | if baseline < baseline_range[0] or baseline > baseline_range[1]:
191 | continue
192 |
193 | cosine = np.dot(R1[2,:],R2[2,:])
194 | if cosine < math.cos(math.radians(70)):
195 | continue
196 |
197 | depth_file = os.path.join(seq_path,'depthTSDF', depth_files[idx_img2depth[frame_idx2]])
198 | depth2 = read_depth(depth_file)
199 |
200 | if np.count_nonzero(np.isfinite(depth2) & (depth2 > 0)) < 0.5*depth2.size:
201 | continue
202 |
203 | view2 = View(R=R2, t=t2, K=intrinsics, image=None, depth=depth2, depth_metric='camera_z')
204 | check_params = {'min_valid_threshold': 0.4, 'min_depth_consistent': 0.7 }
205 | if check_depth_consistency(view1, [view2],**check_params) and check_depth_consistency(view2, [view1], **check_params):
206 | image2 = read_image(os.path.join(seq_path,'image',image_files[frame_idx2]))
207 | view2 = view2._replace(image=image2)
208 | views.append(view2)
209 | used_views.add(i2)
210 | # print(baseline, cosine)
211 | if len(views) > max_views_num:
212 | break
213 |
214 | if len(views) > 1:
215 | group_name = group_prefix+'-{:07d}'.format(img_ids[i1])
216 | print('writing', group_name)
217 |
218 | view_pairs = []
219 | for pair in itertools.product(range(len(views)),repeat=2):
220 | if pair[0] != pair[1]:
221 | baseline = np.linalg.norm(views[pair[0]].t-views[pair[1]].t)
222 | if baseline >= baseline_range[0] or baseline <= baseline_range[1]:
223 | view_pairs.extend(pair)
224 | for i, v in enumerate(views):
225 | view_group = h5file.require_group(group_name+'/frames/t0/v{0}'.format(i))
226 | write_view(view_group, v)
227 |
228 | # write valid image pair combinations to the group t0
229 | viewpoint_pairs = np.array(view_pairs, dtype=np.int32)
230 | time_group = h5file[group_name]['frames/t0']
231 | time_group.attrs['viewpoint_pairs'] = viewpoint_pairs
232 | generated_groups += 1
233 |
234 | return generated_groups
235 |
236 |
237 |
238 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from collections import namedtuple
19 |
20 | # depth always stores the absolute depth values (not inverse depth)
21 | # image is a PIL.Image with the same dimensions as depth
22 | # depth_metric should always be 'camera_z'
23 | # K corresponds to the width and height of image/depth
24 | # R, t is the world to camera transform
25 | View = namedtuple('View',['R','t','K','image','depth','depth_metric'])
26 |
27 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_io.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import numpy as np
19 | from PIL import Image
20 | from io import BytesIO
21 | from .lz4 import lz4_uncompress, lz4_compress_HC
22 | from .webp import webp_encode_array, webp_encode_image
23 |
24 | from .view import View
25 |
26 |
27 | def read_webp_image(h5_dataset):
28 | """Reads a dataset that stores an image compressed as webp
29 |
30 | h5_dataset : hdf5 dataset object
31 |
32 | Returns the image as PIL Image
33 | """
34 | data = h5_dataset[:].tobytes()
35 | img_bytesio = BytesIO(data)
36 | pil_img = Image.open(img_bytesio,'r')
37 | return pil_img
38 |
39 |
40 | def write_webp_image(h5_group, image, dsname="image"):
41 | """Writes the image as webp to a new dataset
42 |
43 | h5_group: hdf5 group
44 | The group that shall contain the newly created dataset
45 |
46 | image: PIL.Image or rgb numpy array
47 | The image
48 | """
49 | if isinstance(image,np.ndarray):
50 | compressed_data = webp_encode_array(image)
51 | else:
52 | compressed_data = webp_encode_image(image)
53 | image_compressed = np.frombuffer(compressed_data,dtype=np.int8)
54 | ds = h5_group.create_dataset(dsname, data=image_compressed)
55 | ds.attrs['format'] = np.string_("webp")
56 |
57 |
58 |
59 | def read_lz4half_depth(h5_dataset):
60 | """Reads a dataset that stores a depth map in lz4 compressed float16 format
61 |
62 | h5_dataset : hdf5 dataset object
63 |
64 | Returns the depth map as numpy array with float32
65 | """
66 | extents = h5_dataset.attrs['extents']
67 | num_pixel = extents[0]*extents[1]
68 | expected_size = 2*num_pixel
69 | data = h5_dataset[:].tobytes()
70 | depth_raw_data = lz4_uncompress(data,int(expected_size))
71 | depth = np.fromstring(depth_raw_data,dtype=np.float16)
72 | depth = depth.astype(np.float32)
73 | depth = depth.reshape((extents[0],extents[1]))
74 | return depth
75 |
76 |
77 | def write_lz4half_depth(h5_group, depth, depth_metric, dsname="depth"):
78 | """Writes the depth as 16bit lz4 compressed char array to the given path
79 |
80 | h5_group: hdf5 group
81 | The group that shall contain the newly created dataset
82 |
83 | depth: numpy array with float32
84 | """
85 | assert isinstance(depth, np.ndarray), "depth must be a numpy array"
86 | assert depth.dtype == np.float32, "depth must be a float32 array"
87 | assert len(depth.shape) == 2, "depth must be a 2d array"
88 | assert depth_metric in ('camera_z', 'ray_length'), "depth metric must be either 'camera_z' or 'ray_length'"
89 | height = depth.shape[0]
90 | width = depth.shape[1]
91 | depth16 = depth.astype(np.float16)
92 | depth_raw_data = depth16.tobytes()
93 | compressed_data = lz4_compress_HC(depth_raw_data)
94 | depth_compressed = np.frombuffer(compressed_data,dtype=np.int8)
95 | ds = h5_group.create_dataset(dsname, data=depth_compressed)
96 | ds.attrs['format'] = np.string_("lz4half")
97 | ds.attrs['extents'] = np.array([height, width], dtype=np.int32)
98 | ds.attrs['depth_metric'] = np.string_(depth_metric)
99 |
100 |
101 | def read_camera_params(h5_dataset):
102 | """Reads a dataset that stores camera params in float64
103 |
104 | h5_dataset : hdf5 dataset object
105 |
106 | Returns K,R,t as numpy array with float64
107 | """
108 | fx = h5_dataset[0]
109 | fy = h5_dataset[1]
110 | skew = h5_dataset[2]
111 | cx = h5_dataset[3]
112 | cy = h5_dataset[4]
113 | K = np.array([[fx, skew, cx],
114 | [0, fy, cy],
115 | [0, 0, 1]], dtype=np.float64)
116 | R = np.array([[h5_dataset[5], h5_dataset[8], h5_dataset[11]],
117 | [h5_dataset[6], h5_dataset[9], h5_dataset[12]],
118 | [h5_dataset[7], h5_dataset[10], h5_dataset[13]]], dtype=np.float64)
119 | t = np.array([h5_dataset[14], h5_dataset[15], h5_dataset[16]], dtype=np.float64)
120 | return K,R,t
121 |
122 |
123 | def write_camera_params(h5_group, K, R, t, dsname="camera"):
124 | """Writes the camera params as float64 to the given path
125 |
126 | h5_group: hdf5 group
127 | The group that shall contain the newly created dataset
128 |
129 | K, R, t: numpy array with float64
130 | """
131 | data = np.array([K[0,0], K[1,1], K[0,1], K[0,2], K[1,2],
132 | R[0,0], R[1,0], R[2,0], R[0,1], R[1,1], R[2,1], R[0,2], R[1,2], R[2,2],
133 | t[0], t[1], t[2]], dtype=np.float64)
134 | ds = h5_group.create_dataset(dsname, data=data)
135 | ds.attrs['format'] = "pinhole".encode('ascii')
136 |
137 |
138 | def read_view(h5_group):
139 | """Reads the view group and returns it as a View tuple
140 |
141 | h5_group: hdf5 group
142 | The group for reading the view
143 |
144 | Returns the View tuple
145 | """
146 | img = read_webp_image(h5_group['image'])
147 | depth = read_lz4half_depth(h5_group['depth'])
148 | depth_metric = h5_group['depth'].attrs['depth_metric'].decode('ascii')
149 | K_arr,R_arr,t_arr = read_camera_params(h5_group['camera'])
150 | return View(image=img, depth=depth, depth_metric=depth_metric, K=K_arr, R=R_arr, t=t_arr)
151 |
152 |
153 | def write_view(h5_group, view):
154 | """Writes the View tuple to the group
155 |
156 | h5_group: hdf5 group
157 | The group for storing the view
158 |
159 | view: View namedtuple
160 | The tuple storing the view
161 |
162 | """
163 | for ds in ('image', 'depth', 'camera'):
164 | if ds in h5_group:
165 | del h5_group[ds]
166 |
167 | write_webp_image(h5_group, view.image)
168 | write_lz4half_depth(h5_group, view.depth, view.depth_metric)
169 | write_camera_params(h5_group, view.K, view.R, view.t)
170 |
171 |
172 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_tools.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import pyximport; pyximport.install()
19 | import numpy as np
20 |
21 | from .view import View
22 |
23 | def compute_visible_points_mask( view1, view2, borderx=0, bordery=0 ):
24 | """Computes a mask of the pixels in view1 that are visible in view2
25 |
26 | view1: View namedtuple
27 | First view
28 |
29 | view2: View namedtuple
30 | Second view
31 |
32 | borderx: int
33 | border in x direction. Points in the border are considered invalid
34 |
35 | bordery: int
36 | border in y direction. Points in the border are considered invalid
37 |
38 | Returns a mask of valid points
39 | """
40 | from .view_tools_cython import compute_visible_points_mask as _compute_visible_points_mask
41 | assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
42 | return _compute_visible_points_mask( view1, view2, borderx, bordery )
43 |
44 |
45 | def compute_depth_ratios( view1, view2 ):
46 | """Projects each point defined in view1 to view2 and computes the ratio of
47 | the depth value of the projected point and the stored depth value in view2.
48 |
49 |
50 | view1: View namedtuple
51 | First view
52 |
53 | view2: View namedtuple
54 | Second view
55 |
56 | Returns the scale value for view2 relative to view1
57 | """
58 | from .view_tools_cython import compute_depth_ratios as _compute_depth_ratios
59 | return _compute_depth_ratios(view1, view2)
60 |
61 |
62 | def check_depth_consistency( view, rest_of_the_views, depth_ratio_threshold=0.9, min_valid_threshold=0.5, min_depth_consistent=0.7 ):
63 | """Checks if the depth of view is consistent with the rest_of_the_views
64 |
65 | view: View namedtuple
66 | Reference view
67 |
68 | rest_of_the_views: list of View namedtuple
69 | List of the rest of the views
70 |
71 | depth_ratio_threshold: float
72 | The allowed minimum depth ratio
73 |
74 | min_valid_threshold: float
75 | ratio of pixels that should have consistent depth values with the rest_of_the_views
76 |
77 | min_depth_consistent: float
78 | ratio of depth consistent pixels with respect to the number of valid depth ratios
79 |
80 | Returns True if the depth is consistent
81 | """
82 | min_ratio_threshold = min(depth_ratio_threshold, 1/depth_ratio_threshold)
83 | max_ratio_threshold = max(depth_ratio_threshold, 1/depth_ratio_threshold)
84 | for v in rest_of_the_views:
85 | dr = compute_depth_ratios(view, v)
86 | valid_dr = dr[np.isfinite(dr)]
87 | if valid_dr.size / dr.size < min_valid_threshold:
88 | return False
89 |
90 | num_consistent = np.count_nonzero((valid_dr > min_ratio_threshold) & (valid_dr < max_ratio_threshold))
91 | if num_consistent / valid_dr.size < min_depth_consistent:
92 | return False
93 |
94 | return True
95 |
96 |
97 | def adjust_intrinsics(view, K_new, width_new, height_new):
98 | """Creates a new View with the specified intrinsics and image dimensions.
99 | The skew parameter K[0,1] will be ignored.
100 |
101 | view: View namedtuple
102 | The view tuple
103 |
104 | K_new: numpy.ndarray
105 | 3x3 calibration matrix with the new intrinsics
106 |
107 | width_new: int
108 | The new image width
109 |
110 | height_new: int
111 | The new image height
112 |
113 | Returns a View tuple with adjusted image, depth and intrinsics
114 | """
115 | from PIL import Image
116 | from skimage.transform import resize
117 | from .helpers import safe_crop_image, safe_crop_array2d
118 |
119 | #original parameters
120 | fx = view.K[0,0]
121 | fy = view.K[1,1]
122 | cx = view.K[0,2]
123 | cy = view.K[1,2]
124 | width = view.image.width
125 | height = view.image.height
126 |
127 | #target param
128 | fx_new = K_new[0,0]
129 | fy_new = K_new[1,1]
130 | cx_new = K_new[0,2]
131 | cy_new = K_new[1,2]
132 |
133 | scale_x = fx_new/fx
134 | scale_y = fy_new/fy
135 |
136 | #resize to get the right focal length
137 | width_resize = int(width*scale_x)
138 | height_resize = int(height*scale_y)
139 | # principal point position in the resized image
140 | cx_resize = cx*scale_x
141 | cy_resize = cy*scale_y
142 |
143 | img_resize = view.image.resize((width_resize, height_resize), Image.BILINEAR if scale_x > 1 else Image.LANCZOS)
144 | if not view.depth is None:
145 | max_depth = np.max(view.depth)
146 | depth_resize = view.depth / max_depth
147 | depth_resize[depth_resize < 0.] = 0.
148 | depth_resize = resize(depth_resize, (height_resize,width_resize), 0,mode='constant') * max_depth
149 | else:
150 | depth_resize = None
151 |
152 | #crop to get the right principle point and resolution
153 | x0 = int(round(cx_resize - cx_new))
154 | y0 = int(round(cy_resize - cy_new))
155 | x1 = x0 + int(width_new)
156 | y1 = y0 + int(height_new)
157 |
158 | if x0 < 0 or y0 < 0 or x1 > width_resize or y1 > height_resize:
159 | print('Warning: Adjusting intrinsics adds a border to the image')
160 | img_new = safe_crop_image(img_resize,(x0,y0,x1,y1),(127,127,127))
161 | if not depth_resize is None:
162 | depth_new = safe_crop_array2d(depth_resize,(x0,y0,x1,y1),0).astype(np.float32)
163 | else:
164 | depth_new = None
165 | else:
166 | img_new = img_resize.crop((x0,y0,x1,y1))
167 | if not depth_resize is None:
168 | depth_new = depth_resize[y0:y1,x0:x1].astype(np.float32)
169 | else:
170 | depth_new = None
171 |
172 | return View(R=view.R, t=view.t, K=K_new, image=img_new, depth=depth_new, depth_metric=view.depth_metric)
173 |
174 |
175 | def resize_view(view, width_new, height_new):
176 | """Creates a new View with the new size.
177 | The intrinsics will be adjusted to match the new image size
178 |
179 | view: View namedtuple
180 | The view tuple
181 |
182 | width_new: int
183 | The new image width
184 |
185 | height_new: int
186 | The new image height
187 |
188 | Returns a View tuple with adjusted image, depth and intrinsics
189 | """
190 | from PIL import Image
191 | from skimage.transform import resize
192 |
193 | if view.image.width == width_new and view.image.height == height_new:
194 | return View(*view)
195 |
196 | #original param
197 | fx = view.K[0,0]
198 | fy = view.K[1,1]
199 | cx = view.K[0,2]
200 | cy = view.K[1,2]
201 | width = view.image.width
202 | height = view.image.height
203 |
204 | #target param
205 | fx_new = width_new*fx/width
206 | fy_new = height_new*fy/height
207 | cx_new = width_new*cx/width
208 | cy_new = height_new*cy/height
209 |
210 | K_new = np.array([fx_new, 0, cx_new, 0, fy_new, cy_new, 0, 0, 1],dtype=np.float64).reshape((3,3))
211 |
212 | img_resize = view.image.resize((width_new, height_new), Image.BILINEAR if width_new > width else Image.LANCZOS)
213 | max_depth = view.depth.max()
214 | depth_resize = max_depth*resize(view.depth/max_depth, (height_new, width_new), order=0, mode='constant')
215 | depth_resize = depth_resize.astype(view.depth.dtype)
216 | return View(R=view.R, t=view.t, K=K_new, image=img_resize, depth=depth_resize, depth_metric=view.depth_metric)
217 |
218 |
219 | def compute_view_distances( views ):
220 | """Computes the spatial distances between views
221 |
222 | views: List of View namedtuple
223 |
224 | Returns the spatial distance as distance matrix
225 | """
226 | from scipy.spatial.distance import pdist, squareform
227 | positions = np.empty((len(views),3))
228 | for i, view in enumerate(views):
229 | C = -view.R.transpose().dot(view.t)
230 | positions[i] = C
231 | return squareform(pdist(positions,'euclidean'))
232 |
233 |
234 | def compute_view_angle( view1, view2 ):
235 | """Computes the viewing direction angle between two views
236 |
237 | view1: View namedtuple
238 | First view
239 |
240 | view2: View namedtuple
241 | Second view
242 |
243 | Returns the angle in radians
244 | """
245 | dot = np.clip(view1.R[2,:].dot(view2.R[2,:]), -1, 1)
246 | return np.arccos(dot)
247 |
248 |
249 | def create_image_overview( views ):
250 | """Creates a small overview image showing the RGB images of all views
251 |
252 | views: list of View or list of list of View
253 |
254 | Returns a PIL.Image
255 | """
256 | assert isinstance(views, list)
257 | from .helpers import concat_images_vertical, concat_images_horizontal
258 | max_height = 100 # maximum height of individual images
259 |
260 | def resize_image(img):
261 | if img.size[1] > max_height:
262 | new_width = int(img.size[0]*(max_height/img.size[1]))
263 | return img.resize((new_width,max_height))
264 | else:
265 | return img
266 |
267 | column_images = []
268 | for col in views:
269 | if isinstance(col,list):
270 | tmp_images = []
271 | for row in col:
272 | tmp_images.append(resize_image(row.image))
273 | col_img = concat_images_vertical(tmp_images)
274 | column_images.append(col_img)
275 | elif isinstance(col,View):
276 | column_images.append(resize_image(col.image))
277 | return concat_images_horizontal(column_images)
278 |
279 |
280 | def visualize_views( views ):
281 | """Visualizes views
282 |
283 | views: list of View namedtuple
284 |
285 | Opens a vtk window with the visualization
286 | """
287 | import vtk
288 | from .. import vis
289 |
290 |
291 | renderer = vtk.vtkRenderer()
292 | renderer.SetBackground(0, 0, 0)
293 |
294 | axes = vtk.vtkAxesActor()
295 | axes.GetXAxisCaptionActor2D().SetHeight(0.05)
296 | axes.GetYAxisCaptionActor2D().SetHeight(0.05)
297 | axes.GetZAxisCaptionActor2D().SetHeight(0.05)
298 | axes.SetCylinderRadius(0.03)
299 | axes.SetShaftTypeToCylinder()
300 | renderer.AddActor(axes)
301 |
302 | renwin = vtk.vtkRenderWindow()
303 | renwin.SetWindowName("Viewer (press 'm' to change colors, use '.' and ',' to adjust opacity)")
304 | renwin.SetSize(800,600)
305 | renwin.AddRenderer(renderer)
306 |
307 |
308 | # An interactor
309 | interactor = vtk.vtkRenderWindowInteractor()
310 | interstyle = vtk.vtkInteractorStyleTrackballCamera()
311 | interactor.SetInteractorStyle(interstyle)
312 | interactor.SetRenderWindow(renwin)
313 |
314 | colors = ((1,0,0), (0,0,1), (0,1,1), (1,0,1), (1,1,0), (1,1,1), (0,1,0))
315 |
316 | pointcloud_polydatas = []
317 | pointcloud_actors = []
318 | for idx, view in enumerate(views):
319 |
320 | img_arr = None
321 | if not view.image is None:
322 | img_arr = np.array(view.image).transpose([2,0,1])
323 |
324 |
325 | pointcloud = vis.compute_point_cloud_from_depthmap(view.depth, view.K, view.R, view.t, colors=img_arr)
326 | pointcloud_polydata = vis.create_pointcloud_polydata(
327 | points=pointcloud['points'],
328 | colors=pointcloud['colors'] if 'colors' in pointcloud else None,
329 | )
330 | pointcloud_polydatas.append(pointcloud_polydata)
331 |
332 | pc_mapper = vtk.vtkPolyDataMapper()
333 | pc_mapper.SetInputData(pointcloud_polydata)
334 |
335 | pc_actor = vtk.vtkActor()
336 | pointcloud_actors.append(pc_actor)
337 | pc_actor.SetMapper(pc_mapper)
338 | pc_actor.GetProperty().SetPointSize(2)
339 |
340 |
341 | color = colors[idx%len(colors)]
342 |
343 | pc_actor.GetProperty().SetColor(*color)
344 | renderer.AddActor(pc_actor)
345 |
346 | cam_actor = vis.create_camera_actor(view.R,view.t)
347 | cam_actor.GetProperty().SetColor(*color)
348 | renderer.AddActor(cam_actor)
349 |
350 |
351 |
352 | def change_point_properties(obj, ev):
353 | if change_point_properties.current_active_scalars == "Colors":
354 | change_point_properties.current_active_scalars = ""
355 | else:
356 | change_point_properties.current_active_scalars = "Colors"
357 |
358 | if "m" == obj.GetKeySym():
359 | for polydata in pointcloud_polydatas:
360 | polydata.GetPointData().SetActiveScalars(change_point_properties.current_active_scalars)
361 |
362 | if "period" == obj.GetKeySym():
363 | for actor in pointcloud_actors:
364 | opacity = actor.GetProperty().GetOpacity()
365 | opacity = min(1.0, opacity - 0.1)
366 |
367 | actor.GetProperty().SetOpacity(opacity)
368 | if "comma" == obj.GetKeySym():
369 | for actor in pointcloud_actors:
370 | opacity = actor.GetProperty().GetOpacity()
371 | opacity = max(0.0, opacity + 0.1)
372 | actor.GetProperty().SetOpacity(opacity)
373 | renwin.Render()
374 |
375 | change_point_properties.current_active_scalars = "Colors"
376 |
377 | interactor.AddObserver('KeyReleaseEvent', change_point_properties)
378 |
379 | # Start
380 | interactor.Initialize()
381 | interactor.Start()
382 |
383 | interactor.RemoveAllObservers()
384 | del change_point_properties
385 |
386 |
387 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/view_tools_cython.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 | from libc.math cimport isfinite
5 |
6 |
7 |
8 | @cython.boundscheck(False)
9 | cdef _compute_visible_points_mask(
10 | np.ndarray[np.float32_t, ndim=2] depth,
11 | np.ndarray[np.float32_t, ndim=2] K1,
12 | np.ndarray[np.float32_t, ndim=2] R1,
13 | np.ndarray[np.float32_t, ndim=1] t1,
14 | np.ndarray[np.float32_t, ndim=2] P2,
15 | int width2,
16 | int height2,
17 | int borderx,
18 | int bordery):
19 |
20 | cdef np.float32_t point3d[3]
21 | cdef np.float32_t point4d[4]
22 | point4d[3] = 1.0
23 | cdef np.float32_t point_proj[3]
24 | cdef int x, y
25 | cdef np.float32_t px, py
26 | cdef np.float32_t d
27 | cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
28 |
29 | cdef np.ndarray[np.uint8_t,ndim=2] mask = np.zeros((depth.shape[0],depth.shape[1]), dtype=np.uint8)
30 |
31 | for y in range(depth.shape[0]):
32 | for x in range(depth.shape[1]):
33 |
34 | d = depth[y,x]
35 | if np.isfinite(d) and d > 0.0:
36 | px = x + 0.5
37 | py = y + 0.5
38 |
39 | point3d[0] = d*(px - K1[0,2])/K1[0,0]
40 | point3d[1] = d*(py - K1[1,2])/K1[1,1]
41 | point3d[2] = d
42 | point3d[0] -= t1[0]
43 | point3d[1] -= t1[1]
44 | point3d[2] -= t1[2]
45 | point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
46 | point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
47 | point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
48 |
49 | point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
50 | point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
51 | point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
52 | if point_proj[2] > 0.0:
53 | point_proj[0] /= point_proj[2]
54 | point_proj[1] /= point_proj[2]
55 | if point_proj[0] > borderx and point_proj[1] > bordery and point_proj[0] < width2-borderx and point_proj[1] < height2-bordery:
56 | mask[y,x] = 1
57 |
58 | return mask
59 |
60 |
61 |
62 | def compute_visible_points_mask( view1, view2, borderx=0, bordery=0 ):
63 | """Computes a mask of the pixels in view1 that are visible in view2
64 |
65 | view1: View namedtuple
66 | First view
67 |
68 | view2: View namedtuple
69 | Second view
70 |
71 | borderx: int
72 | border in x direction. Points in the border are considered invalid
73 |
74 | bordery: int
75 | border in y direction. Points in the border are considered invalid
76 |
77 | Returns a mask of valid points
78 | """
79 | assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
80 |
81 | P2 = np.empty((3,4), dtype=np.float32)
82 | P2[:,0:3] = view2.R
83 | P2[:,3:4] = view2.t.reshape((3,1))
84 | P2 = view2.K.dot(P2)
85 |
86 | if view2.depth is None:
87 | width2 = view1.depth.shape[1]
88 | height2 = view1.depth.shape[0]
89 | else:
90 | width2 = view2.depth.shape[1]
91 | height2 = view2.depth.shape[0]
92 |
93 | return _compute_visible_points_mask(
94 | view1.depth,
95 | view1.K.astype(np.float32),
96 | view1.R.astype(np.float32),
97 | view1.t.astype(np.float32),
98 | P2.astype(np.float32),
99 | width2,
100 | height2,
101 | borderx,
102 | bordery)
103 |
104 |
105 |
106 |
107 | @cython.boundscheck(False)
108 | cdef _compute_depth_ratios(
109 | np.ndarray[np.float32_t, ndim=2] depth1,
110 | np.ndarray[np.float32_t, ndim=2] depth2,
111 | np.ndarray[np.float32_t, ndim=2] K1,
112 | np.ndarray[np.float32_t, ndim=2] R1,
113 | np.ndarray[np.float32_t, ndim=1] t1,
114 | np.ndarray[np.float32_t, ndim=2] P2 ):
115 | cdef np.float32_t point3d[3]
116 | cdef np.float32_t point4d[4]
117 | point4d[3] = 1.0
118 | cdef np.float32_t point_proj[3]
119 | cdef int x, y, x2, y2
120 | cdef np.float32_t px, py
121 | cdef np.float32_t d, d2
122 | cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
123 |
124 | cdef np.ndarray[np.float32_t,ndim=2] result = np.full((depth1.shape[0],depth1.shape[1]), np.nan, dtype=np.float32)
125 |
126 | for y in range(depth1.shape[0]):
127 | for x in range(depth1.shape[1]):
128 |
129 | d = depth1[y,x]
130 | if np.isfinite(d) and d > 0.0:
131 | px = x + 0.5
132 | py = y + 0.5
133 |
134 | point3d[0] = d*(px - K1[0,2])/K1[0,0]
135 | point3d[1] = d*(py - K1[1,2])/K1[1,1]
136 | point3d[2] = d
137 | point3d[0] -= t1[0]
138 | point3d[1] -= t1[1]
139 | point3d[2] -= t1[2]
140 | point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
141 | point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
142 | point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
143 |
144 | point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
145 | point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
146 | point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
147 | if point_proj[2] > 0.0:
148 | point_proj[0] /= point_proj[2]
149 | point_proj[1] /= point_proj[2]
150 | if point_proj[0] > 0 and point_proj[1] > 0 and point_proj[0] < depth2.shape[1] and point_proj[1] < depth2.shape[0]:
151 | # lookup the depth value
152 | x2 = max(0,min(depth2.shape[1],int(round(point_proj[0]))))
153 | y2 = max(0,min(depth2.shape[0],int(round(point_proj[1]))))
154 | d2 = depth2[y2,x2]
155 | if d2 > 0.0 and isfinite(d2):
156 | s = point_proj[2]/d2
157 | result[y,x] = s
158 |
159 | return result
160 |
161 |
162 |
163 |
164 | def compute_depth_ratios( view1, view2 ):
165 | """Projects each point defined in view1 to view2 and computes the ratio of
166 | the depth value of the projected point and the stored depth value in view2.
167 |
168 |
169 | view1: View namedtuple
170 | First view
171 |
172 | view2: View namedtuple
173 | Second view
174 |
175 | Returns the scale value for view2 relative to view1
176 | """
177 | assert view1.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
178 | assert view2.depth_metric == 'camera_z', "Depth metric must be 'camera_z'"
179 |
180 | P2 = np.empty((3,4), dtype=np.float32)
181 | P2[:,0:3] = view2.R
182 | P2[:,3:4] = view2.t.reshape((3,1))
183 | P2 = view2.K.dot(P2)
184 |
185 | return _compute_depth_ratios(
186 | view1.depth,
187 | view2.depth,
188 | view1.K.astype(np.float32),
189 | view1.R.astype(np.float32),
190 | view1.t.astype(np.float32),
191 | P2.astype(np.float32) )
192 |
193 |
194 |
195 | @cython.boundscheck(False)
196 | cdef _compute_flow(
197 | np.ndarray[np.float32_t, ndim=2] depth1,
198 | np.ndarray[np.float32_t, ndim=2] K1,
199 | np.ndarray[np.float32_t, ndim=2] R1,
200 | np.ndarray[np.float32_t, ndim=1] t1,
201 | np.ndarray[np.float32_t, ndim=2] P2 ):
202 | cdef np.float32_t point3d[3]
203 | cdef np.float32_t point4d[4]
204 | point4d[3] = 1.0
205 | cdef np.float32_t point_proj[3]
206 | cdef int x, y, x2, y2
207 | cdef np.float32_t px, py
208 | cdef np.float32_t d, d2
209 | cdef np.ndarray[np.float32_t,ndim=2] RT = R1.transpose()
210 |
211 | cdef np.ndarray[np.float32_t,ndim=3] result = np.full((2,depth1.shape[0],depth1.shape[1]), np.nan, dtype=np.float32)
212 |
213 | for y in range(depth1.shape[0]):
214 | for x in range(depth1.shape[1]):
215 |
216 | d = depth1[y,x]
217 | if np.isfinite(d) and d > 0.0:
218 | px = x + 0.5
219 | py = y + 0.5
220 |
221 | point3d[0] = d*(px - K1[0,2])/K1[0,0]
222 | point3d[1] = d*(py - K1[1,2])/K1[1,1]
223 | point3d[2] = d
224 | point3d[0] -= t1[0]
225 | point3d[1] -= t1[1]
226 | point3d[2] -= t1[2]
227 | point4d[0] = RT[0,0]*point3d[0] + RT[0,1]*point3d[1] + RT[0,2]*point3d[2]
228 | point4d[1] = RT[1,0]*point3d[0] + RT[1,1]*point3d[1] + RT[1,2]*point3d[2]
229 | point4d[2] = RT[2,0]*point3d[0] + RT[2,1]*point3d[1] + RT[2,2]*point3d[2]
230 |
231 | point_proj[0] = P2[0,0]*point4d[0] + P2[0,1]*point4d[1] + P2[0,2]*point4d[2] + P2[0,3]*point4d[3]
232 | point_proj[1] = P2[1,0]*point4d[0] + P2[1,1]*point4d[1] + P2[1,2]*point4d[2] + P2[1,3]*point4d[3]
233 | point_proj[2] = P2[2,0]*point4d[0] + P2[2,1]*point4d[1] + P2[2,2]*point4d[2] + P2[2,3]*point4d[3]
234 |
235 | point_proj[0] /= point_proj[2]
236 | point_proj[1] /= point_proj[2]
237 | result[0,y,x] = point_proj[0]-px
238 | result[1,y,x] = point_proj[1]-py
239 |
240 | return result
241 |
242 |
--------------------------------------------------------------------------------
/python/depthmotionnet/dataset_tools/webp.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from ctypes import *
19 | from PIL import Image
20 | import numpy as np
21 | import os
22 |
23 | # try the version used by the multivih5datareaderop first
24 | try:
25 | _lib_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], '..', '..', '..', 'build','webp','src','webp-build', 'src', '.libs', 'libwebp.so'))
26 | libwebp = CDLL(_lib_path)
27 | except:
28 | # try system version
29 | try:
30 | libwebp = CDLL('libwebp.so')
31 | except:
32 | raise RuntimeError('Cannot load libwebp.so')
33 |
34 | def webp_encode_array(array, quality=90.0):
35 | """encode the array as webp and return as bytes.
36 |
37 | array: uint8 numpy array
38 | array with the following shape [height,width,3] or [3,height,width]
39 |
40 | Returns the compressed bytes array or None on error
41 | """
42 | assert isinstance(array, np.ndarray), "array must be a numpy array"
43 | assert array.dtype == np.uint8, "array must be a uint8 array"
44 | assert len(array.shape) == 3, "array must be a 3d array"
45 | assert array.shape[0] == 3 or array.shape[-1] == 3, "array must have 3 color channels"
46 |
47 | if array.shape[0] != array.shape[-1] and array.shape[0] == 3:
48 | array_rgb = array.transpose([2,0,1])
49 | else:
50 | array_rgb = array
51 | data = array_rgb.tobytes()
52 |
53 | width = c_int(array_rgb.shape[1])
54 | height = c_int(array_rgb.shape[0])
55 | stride = c_int(array_rgb.shape[1]*3)
56 | output = POINTER(c_char)()
57 | size = libwebp.WebPEncodeRGB(data, width, height, stride, c_float(quality), pointer(output))
58 | if size == 0:
59 | return None
60 |
61 | webp_img = output[:size]
62 | libwebp.WebPFree(output)
63 | # libc.free(output)
64 | return webp_img
65 |
66 |
67 |
68 |
69 | def webp_encode_image(image):
70 | """encode the image as webp and return as bytes
71 |
72 | image: PIL.Image
73 | Image to encode
74 | """
75 | arr = np.array(image)
76 | return webp_encode_array(arr)
77 |
--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from .evaluate_to_xarray import evaluate, write_xarray_json, read_xarray_json
19 |
20 |
--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/evaluate_to_xarray.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from .metrics import compute_motion_errors,evaluate_depth,compute_flow_epe
19 | import h5py
20 | import xarray
21 | import numpy as np
22 | import re
23 | import math
24 | import json
25 | import scipy.misc
26 | import time
27 | import skimage.transform
28 |
29 | '''
30 | Functions to evaluate DeMoN results stored as hdf5 files. The results are stored as xarray DataArray converted to json
31 | '''
32 |
33 | def write_xarray_json(data, out_file):
34 | """Writes xarray as json to a file"""
35 | with open(out_file, 'w') as f:
36 | json.dump(data.to_dict(), f)
37 |
38 | def read_xarray_json(in_file):
39 | """Reads xarray from a json file"""
40 | with open(in_file, 'r') as f:
41 | return xarray.DataArray.from_dict(json.load(f))
42 |
43 | def get_metainfo(data_file):
44 | """Checks a hdf5 data file for its format and dimensions.
45 |
46 | data_file: str
47 | Path to the hdf5 file generated with the test_iterative.py script.
48 |
49 | returns a dictionary with the following keys:
50 | iterative: bool, if the file is from an iterative net
51 | snapshots: list of str, names od snapshots in the file contain
52 | iterations: list of net_iterations
53 | samples: list of samples
54 | """
55 |
56 | re_iteration = re.compile('.*_(\d+)(\.caffemodel\.h5)?')
57 | with h5py.File(data_file,'r') as f:
58 | group_name = list(f.keys())[0]
59 | iterative_net = bool(re_iteration.match(group_name))
60 | if iterative_net:
61 | snapshots = list(f.keys())
62 | snapshots.sort(key=lambda x: int(re_iteration.match(x).group(1)))
63 | snapshot_iters = [int(re_iteration.match(x).group(1)) for x in snapshots]
64 | snapshot_group = f[snapshots[0]]
65 | samples = list(snapshot_group.keys())
66 | samples.sort(key=int)
67 | sample_group = snapshot_group[samples[0]]
68 | # collect iterations from all prediction datasets
69 | iterations = set()
70 | for prediction in ('predicted_depth', 'predicted_normal', 'predicted_motion', 'predicted_flow', 'predicted_conf'):
71 | if prediction in sample_group:
72 | iterations.update( list(sample_group[prediction]) )
73 | iterations = list(iterations)
74 | iterations.sort(key=lambda x: (int(x.split('_')[0]),len(x.split('_'))))
75 | else:
76 | snapshots = ['snapshot']
77 | snapshot_iters = [-1]
78 | iterations = ['0']
79 | samples = list(f.keys())
80 | samples.sort(key=int)
81 |
82 | metainfo = {
83 | 'iterative':iterative_net,
84 | 'snapshots': snapshots,
85 | 'iterations': iterations,
86 | 'samples':samples,
87 | 'snapshot_iters': snapshot_iters,
88 | 'input_file': data_file,
89 | }
90 | return metainfo
91 |
92 |
93 | def invalidate_points_not_visible_in_second_image(depth, motion, intrinsics):
94 | """Sets the depth values for the points not visible in the second view to nan
95 |
96 | depth: numpy.ndarray
97 | array with inverse depth values as stored in the test output h5 files
98 |
99 | motion: numpy.ndarray
100 | the 6 element motion vector (ANGLEAXIS6)
101 |
102 | intrinsics: numpy.ndarray or None
103 | the normalized intrinsics vector
104 | If None we assume intrinsics as in sun3d
105 | """
106 | from .helpers import motion_vector_to_Rt, intrinsics_vector_to_K
107 | from ..dataset_tools.view import View
108 | from ..dataset_tools.view_tools import compute_visible_points_mask
109 | #from matplotlib import pyplot as plt
110 | abs_depth = 1/depth
111 | R, t = motion_vector_to_Rt(motion.squeeze())
112 |
113 | if intrinsics is None:
114 | intrinsics = np.array([[0.891, 1.188, 0.5, 0.5]], dtype=np.float32) # sun3d intrinsics
115 | intrinsics = intrinsics.squeeze()
116 | K = intrinsics_vector_to_K(intrinsics, depth.shape[-1], depth.shape[-2])
117 | view1 = View(R=np.eye(3), t=np.zeros((3,)), K=K, image=None, depth=abs_depth, depth_metric='camera_z')
118 | view2 = View(R=R, t=t, K=K, image=None, depth=abs_depth, depth_metric='camera_z')
119 | invalid_points = compute_visible_points_mask(view1, view2) == 0
120 | # tmp = depth.copy()
121 | depth[invalid_points] = np.nan
122 | # plt.imshow(np.concatenate((tmp,depth),axis=1))
123 | # plt.show(block=True)
124 |
125 |
126 |
127 |
128 |
129 | def get_data(iterative, results_h5_file, snap, sample, net_iter, gt_h5_file=None, depthmask=False, eigen_crop_gt_and_pred=False):
130 | """Helper function to read data from the h5 files
131 |
132 | iterative: bool
133 | If true the hdf5 file stores results from multiple iterations.
134 |
135 | results_h5_file: h5py.File
136 | The file with the network predictions
137 |
138 | snap: str
139 | Name of the snapshot
140 |
141 | sample: str
142 | Sample number as string
143 |
144 | net_iter: int
145 | network iteration
146 |
147 | gt_h5_file: h5py.File
148 | ground truth h5 file.
149 |
150 | depthmask: bool
151 | If True the depth values for points not visible in the second image will be masked out
152 |
153 | eigen_crop_gt_and_pred: bool
154 | If true crops images and depth maps to match the evaluation for NYU in Eigen's paper.
155 |
156 | Returns a dictionary with ground truth and predictions for depth, motion and flow.
157 | """
158 | data_types = ['motion', 'depth', 'flow', 'normals', 'intrinsics']
159 | data = {}
160 | # get ground truth
161 | if iterative and (gt_h5_file is None):
162 | sample_group = results_h5_file[snap][sample]
163 | else:
164 | if gt_h5_file is None:
165 | sample_group = results_h5_file[sample]
166 | else:
167 | sample_group = gt_h5_file[sample]
168 | gt_sample_id = sample_group.attrs['sample_id']
169 |
170 | for dt in data_types:
171 | if dt in sample_group:
172 | data[dt + '_gt'] = sample_group[dt][:]
173 |
174 | # get predictions
175 | if iterative:
176 | sample_group = results_h5_file[snap][sample]
177 | pr_sample_id = sample_group.attrs['sample_id']
178 | assert gt_sample_id == pr_sample_id, "sample ids do not match: prediction id='{0}', ground truth id='{1}'".format(pr_sample_id,gt_sample_id)
179 | for dt in data_types:
180 | if 'predicted_{0}/{1}'.format(dt,net_iter) in sample_group:
181 | data[dt + '_pred'] = sample_group['predicted_'+dt][net_iter][:]
182 | else:
183 | sample_group = results_h5_file[sample]
184 | for dt in data_types:
185 | if ('predicted_'+dt) in sample_group:
186 | data[dt + '_pred'] = sample_group['predicted_'+dt][:]
187 |
188 | for key in data:
189 | data[key] = np.squeeze(data[key])
190 |
191 | if ('depth_pred' in data) and (data['depth_pred'].shape == (109,147)):
192 | print('\n >>> Eigen and Fergus detected, cropping the ground truth <<<\n')
193 | assert(data['depth_gt'].shape == (480,640))
194 | data['depth_gt'] = data['depth_gt'][23:23+436,27:27+588]
195 |
196 | if depthmask and ('motion_gt' in data) and ('depth_gt' in data):
197 | intrinsics = data['intrinsics'] if 'intrinsics' in data else None
198 | invalidate_points_not_visible_in_second_image(data['depth_gt'], data['motion_gt'], intrinsics)
199 |
200 | # reshape the predictions to GT size if necessary
201 | if ('depth_gt' in data) and ('depth_pred' in data) and (not (data['depth_gt'].shape == data['depth_pred'].shape)):
202 | data['depth_pred'] = skimage.transform.resize(data['depth_pred'], data['depth_gt'].shape, order=0, mode='constant', preserve_range=True)
203 | if ('flow_gt' in data) and ('flow_pred' in data) and (not (data['flow_gt'].shape == data['flow_pred'].shape)):
204 | data['flow_pred'] = np.transpose(skimage.transform.resize(\
205 | np.transpose(data['flow_pred'],(1,2,0)), data['depth_gt'].shape, order=0, mode='constant', preserve_range=True),(2,0,1))
206 |
207 | if eigen_crop_gt_and_pred and data['depth_gt'].shape != (436,588):
208 | assert(data['depth_gt'].shape == (480,640))
209 | assert(data['depth_pred'].shape == (480,640))
210 | data['depth_gt'] = data['depth_gt'][23:23+436,27:27+588]
211 | data['depth_pred'] = data['depth_pred'][23:23+436,27:27+588]
212 |
213 | return data
214 |
215 |
216 | def evaluate(results_file, gt_file, depthmask=False, eigen_crop_gt_and_pred=False, depth_scaling='abs'):
217 | '''
218 | Compute different error measures given a hdf5 result (prediction) file, and output them as an xarray.
219 | results_file: str
220 | Path to the network results (prediction) in hdf5 format.
221 |
222 | gt_file: str
223 | Path to the hdf5 file with ground truth data stored in the simple test output format
224 |
225 | depthmask: bool
226 | If True the depth values for points not visible in the second image will be masked out
227 |
228 | eigen_crop_gt_and_pred: bool
229 | If true crops images and depth maps to match the evaluation for NYU in Eigen's paper.
230 |
231 | depth_scaling: str
232 | selects a scaling method for the scaled results. E.g. 'abs' scales such that the
233 | least squares error for the absolute depth values is minimized.
234 |
235 | '''
236 | depth_pred_max=np.inf
237 |
238 | depth_errors_to_compute = ['l1',
239 | 'l1_inverse',
240 | 'scale_invariant',
241 | 'abs_relative',
242 | 'sq_relative',
243 | 'avg_log10',
244 | 'rmse_log',
245 | 'rmse',
246 | 'ratio_threshold_1.25',
247 | 'ratio_threshold_1.5625',
248 | 'ratio_threshold_1.953125']
249 |
250 | errors_to_compute = ['rot_err', 'tran_err', 'tran_angle_err'] + \
251 | ['depth_' + e for e in depth_errors_to_compute] + \
252 | ['flow_epe', 'camera_baseline']
253 |
254 | metainfo = get_metainfo(results_file)
255 | results = xarray.DataArray(np.zeros((len(metainfo['snapshots']), len(metainfo['iterations']), len(metainfo['samples']), len(errors_to_compute), 2)),
256 | [('snapshot', metainfo['snapshots']),
257 | ('iteration', metainfo['iterations']),
258 | ('sample', metainfo['samples']),
259 | ('errors', errors_to_compute),
260 | ('scaled', [False,True])])
261 | results[:] = np.nan
262 |
263 | # save metainfo and evaluation options
264 | for key,val in metainfo.items():
265 | results.attrs[key] = val
266 | results.attrs['gt_file'] = gt_file
267 | results.attrs['depthmask'] = depthmask
268 | results.attrs['depth_scaling'] = depth_scaling
269 | results.attrs['depth_pred_max'] = str(depth_pred_max)
270 |
271 |
272 | with h5py.File(results_file,'r') as results_f:
273 | if gt_file:
274 | gt_f = h5py.File(gt_file,'r')
275 | else:
276 | gt_f = None
277 |
278 | t0 = 0
279 | for nsnap,snap in enumerate(metainfo['snapshots']):
280 | for nsample,sample in enumerate(metainfo['samples']):
281 | for niter,net_iter in enumerate(metainfo['iterations']):
282 | if time.time() - t0 > 5:
283 | t0 = time.time()
284 | print('Processing snapshot %d/%d. sample %d/%d' % \
285 | (nsnap+1, len(metainfo['snapshots']), nsample+1, len(metainfo['samples'])))
286 | data = get_data(metainfo['iterative'], results_f, snap, sample, net_iter, gt_h5_file=gt_f, depthmask=depthmask, eigen_crop_gt_and_pred=eigen_crop_gt_and_pred)
287 |
288 | if ('depth_gt' in data) and ('depth_pred' in data):
289 | #print(data['depth_pred'].dtype, data['depth_pred'][:3,:3], data['depth_gt'].dtype, data['depth_gt'][:3,:3])
290 | if 'motion_gt' in data and (not np.any(np.isnan(data['motion_gt']))):
291 | translation_gt = data['motion_gt'][-3:]
292 | results.loc[snap,net_iter,sample,'camera_baseline'] = np.linalg.norm(translation_gt)
293 | else:
294 | translation_gt = np.array([1.,0.,0.])
295 | depth_errs, depth_errs_pred_scaled = evaluate_depth(translation_gt, data['depth_gt'], data['depth_pred'],
296 | distances_to_compute=depth_errors_to_compute, inverse_gt=True, inverse_pred=True,
297 | depth_scaling=depth_scaling, depth_pred_max=depth_pred_max)
298 |
299 | for dist in depth_errors_to_compute:
300 | results.loc[snap,net_iter,sample,'depth_' + dist,False] = depth_errs[dist]
301 | results.loc[snap,net_iter,sample,'depth_' + dist,True] = depth_errs_pred_scaled[dist]
302 |
303 | if ('motion_gt' in data) and ('motion_pred' in data):
304 | normalize_translation = True
305 | rot_err, tran_err, tran_angle_err = compute_motion_errors(data['motion_pred'], data['motion_gt'], normalize_translation)
306 | results.loc[snap,net_iter,sample,'rot_err'] = rot_err
307 | results.loc[snap,net_iter,sample,'tran_err'] = tran_err
308 | results.loc[snap,net_iter,sample,'tran_angle_err'] = tran_angle_err
309 |
310 | if ('flow_gt' in data) and ('flow_pred' in data):
311 | flow_epe = compute_flow_epe(data['flow_pred'],data['flow_gt'])
312 | results.loc[snap,net_iter,sample,'flow_epe'] = flow_epe
313 | if gt_file:
314 | gt_f.close()
315 |
316 | return results
317 |
318 |
319 |
320 |
321 |
--------------------------------------------------------------------------------
/python/depthmotionnet/evaluation/helpers.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import numpy as np
19 | from minieigen import Matrix3, Vector3, Vector2, Quaternion
20 |
21 |
22 | def angleaxis_to_angle_axis(aa, epsilon=1e-6):
23 | """Converts the angle axis vector with angle encoded as magnitude to
24 | the angle axis representation with seperate angle and axis.
25 |
26 | aa: minieigen.Vector3
27 | axis angle with angle as vector magnitude
28 |
29 | epsilon: minimum angle in rad
30 | If the angle is smaller than epsilon
31 | then 0,(1,0,0) will be returned
32 |
33 | returns the tuple (angle,axis)
34 | """
35 | angle = aa.norm()
36 | if angle < epsilon:
37 | angle = 0
38 | axis = Vector3(1,0,0)
39 | else:
40 | axis = aa.normalized()
41 | return angle, axis
42 |
43 |
44 | def angleaxis_to_quaternion(aa, epsilon=1e-6):
45 | """Converts the angle axis vector with angle encoded as magnitude to
46 | the quaternion representation.
47 |
48 | aa: minieigen.Vector3
49 | axis angle with angle as vector magnitude
50 |
51 | epsilon: minimum angle in rad
52 | If the angle is smaller than epsilon
53 | then 0,(1,0,0) will be returned
54 |
55 | returns the unit quaternion
56 | """
57 | angle, axis = angleaxis_to_angle_axis(aa,epsilon)
58 | return Quaternion(angle,axis)
59 |
60 |
61 |
62 | def angleaxis_to_rotation_matrix(aa, epsilon=1e-6):
63 | """Converts the angle axis vector with angle encoded as magnitude to
64 | the rotation matrix representation.
65 |
66 | aa: minieigen.Vector3
67 | axis angle with angle as vector magnitude
68 |
69 | epsilon: minimum angle in rad
70 | If the angle is smaller than epsilon
71 | then 0,(1,0,0) will be returned
72 |
73 | returns the 3x3 rotation matrix as numpy.ndarray
74 | """
75 | q = angleaxis_to_quaternion(aa,epsilon)
76 | tmp = q.toRotationMatrix()
77 | return np.array(tmp)
78 |
79 |
80 |
81 | def motion_vector_to_Rt(motion, epsilon=1e-6):
82 | """Converts the motion vector to the rotation matrix R and translation t
83 |
84 | motion: np.ndarray
85 | array with 6 elements. The motions is given as [aa1, aa2, aa3, tx, ty, tz].
86 | aa1,aa2,aa3 is an angle axis representation. The angle is the norm of the axis.
87 | [tx, ty, tz] is a 3d translation.
88 |
89 |
90 | epsilon: minimum angle in rad
91 | If the angle is smaller than epsilon
92 | then 0,(1,0,0) will be returned
93 |
94 | returns the 3x3 rotation matrix and the 3d translation vector
95 | """
96 | pass
97 | tmp = motion.squeeze().astype(np.float64)
98 | t = tmp[3:].copy()
99 | R = angleaxis_to_rotation_matrix(Vector3(tmp[0:3]),epsilon)
100 | return R, t
101 |
102 |
103 | def intrinsics_vector_to_K(intrinsics, width, height):
104 | """Converts the normalized intrinsics vector to the calibration matrix K
105 |
106 | intrinsics: np.ndarray
107 | 4 element vector with normalized intrinsics [fx, fy, cx, cy]
108 |
109 | width: int
110 | image width in pixels
111 |
112 | height: int
113 | image height in pixels
114 |
115 | returns the calibration matrix K as numpy.ndarray
116 | """
117 | tmp = intrinsics.squeeze().astype(np.float64)
118 | K = np.array([tmp[0]*width, 0, tmp[2]*width, 0, tmp[1]*height, tmp[3]*height, 0, 0, 1], dtype=np.float64).reshape((3,3))
119 |
120 | return K
121 |
--------------------------------------------------------------------------------
/python/depthmotionnet/helpers.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import tensorflow as tf
19 | import lmbspecialops as sops
20 | import numpy as np
21 |
22 | def convert_NCHW_to_NHWC(inp):
23 | """Convert the tensor from caffe format NCHW into tensorflow format NHWC
24 |
25 | inp: tensor
26 | """
27 | return tf.transpose(inp,[0,2,3,1])
28 |
29 | def convert_NHWC_to_NCHW(inp):
30 | """Convert the tensor from tensorflow format NHWC into caffe format NCHW
31 |
32 | inp: tensor
33 | """
34 | return tf.transpose(inp,[0,3,1,2])
35 |
36 |
37 | def angleaxis_to_rotation_matrix(aa):
38 | """Converts the 3 element angle axis representation to a 3x3 rotation matrix
39 |
40 | aa: numpy.ndarray with 1 dimension and 3 elements
41 |
42 | Returns a 3x3 numpy.ndarray
43 | """
44 | angle = np.sqrt(aa.dot(aa))
45 |
46 | if angle > 1e-6:
47 | c = np.cos(angle);
48 | s = np.sin(angle);
49 | u = np.array([aa[0]/angle, aa[1]/angle, aa[2]/angle]);
50 |
51 | R = np.empty((3,3))
52 | R[0,0] = c+u[0]*u[0]*(1-c); R[0,1] = u[0]*u[1]*(1-c)-u[2]*s; R[0,2] = u[0]*u[2]*(1-c)+u[1]*s;
53 | R[1,0] = u[1]*u[0]*(1-c)+u[2]*s; R[1,1] = c+u[1]*u[1]*(1-c); R[1,2] = u[1]*u[2]*(1-c)-u[0]*s;
54 | R[2,0] = u[2]*u[0]*(1-c)-u[1]*s; R[2,1] = u[2]*u[1]*(1-c)+u[0]*s; R[2,2] = c+u[2]*u[2]*(1-c);
55 | else:
56 | R = np.eye(3)
57 | return R
58 |
59 |
60 | def myLeakyRelu(x):
61 | """Leaky ReLU with leak factor 0.1"""
62 | # return tf.maximum(0.1*x,x)
63 | return sops.leaky_relu(x, leak=0.1)
64 |
65 |
66 | def default_weights_initializer():
67 | return tf.contrib.layers.variance_scaling_initializer()
68 |
69 |
70 | def conv2d_caffe_padding(inputs, num_outputs, kernel_size, data_format, **kwargs):
71 | """Convolution with 'same' padding as in caffe"""
72 | if isinstance(kernel_size,(tuple,list)):
73 | kernel_ysize = kernel_size[0]
74 | kernel_xsize = kernel_size[1]
75 | else:
76 | kernel_ysize = kernel_size
77 | kernel_xsize = kernel_size
78 | pad_y = kernel_ysize//2
79 | pad_x = kernel_xsize//2
80 |
81 | if data_format=='channels_first':
82 | paddings = [[0,0], [0,0], [pad_y, pad_y], [pad_x,pad_x]]
83 | else:
84 | paddings = [[0,0], [pad_y, pad_y], [pad_x,pad_x], [0,0]]
85 | padded_input = tf.pad(inputs, paddings=paddings)
86 | return tf.layers.conv2d(
87 | inputs=padded_input,
88 | filters=num_outputs,
89 | kernel_size=kernel_size,
90 | kernel_initializer=default_weights_initializer(),
91 | padding='valid',
92 | data_format=data_format,
93 | **kwargs,
94 | )
95 |
96 |
97 | def convrelu_caffe_padding(inputs, num_outputs, kernel_size, data_format, **kwargs):
98 | """Shortcut for a single convolution+relu
99 |
100 | See tf.layers.conv2d for a description of remaining parameters
101 | """
102 | return conv2d_caffe_padding(inputs, num_outputs, kernel_size, data_format, activation=myLeakyRelu, **kwargs)
103 |
104 |
105 | def convrelu2_caffe_padding(inputs, num_outputs, kernel_size, name, stride, data_format, **kwargs):
106 | """Shortcut for two convolution+relu with 1D filter kernels and 'same' padding as in caffe
107 |
108 | num_outputs: int or (int,int)
109 | If num_outputs is a tuple then the first element is the number of
110 | outputs for the 1d filter in y direction and the second element is
111 | the final number of outputs.
112 | """
113 | if isinstance(num_outputs,(tuple,list)):
114 | num_outputs_y = num_outputs[0]
115 | num_outputs_x = num_outputs[1]
116 | else:
117 | num_outputs_y = num_outputs
118 | num_outputs_x = num_outputs
119 |
120 | pad = kernel_size//2
121 |
122 | if data_format=='channels_first':
123 | paddings_y = [[0,0], [0,0], [pad, pad], [0,0]]
124 | paddings_x = [[0,0], [0,0], [0,0], [pad, pad]]
125 | else:
126 | paddings_y = [[0,0], [pad, pad], [0,0], [0,0]]
127 | paddings_x = [[0,0], [0,0], [pad, pad], [0,0]]
128 | padded_input = tf.pad(inputs, paddings=paddings_y)
129 |
130 | tmp_y = tf.layers.conv2d(
131 | inputs=padded_input,
132 | filters=num_outputs_y,
133 | kernel_size=[kernel_size,1],
134 | strides=[stride,1],
135 | padding='valid',
136 | activation=myLeakyRelu,
137 | kernel_initializer=default_weights_initializer(),
138 | data_format=data_format,
139 | name=name+'y',
140 | **kwargs,
141 | )
142 | return tf.layers.conv2d(
143 | inputs=tf.pad(tmp_y, paddings=paddings_x),
144 | filters=num_outputs_x,
145 | kernel_size=[1,kernel_size],
146 | strides=[1,stride],
147 | padding='valid',
148 | activation=myLeakyRelu,
149 | kernel_initializer=default_weights_initializer(),
150 | data_format=data_format,
151 | name=name+'x',
152 | **kwargs,
153 | )
154 |
155 |
--------------------------------------------------------------------------------
/python/depthmotionnet/networks_original.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from .blocks_original import *
19 |
20 |
21 |
22 | class BootstrapNet:
23 | def __init__(self, session, data_format='channels_first', batch_size=1):
24 | """Creates the network
25 |
26 | session: tf.Session
27 | Tensorflow session
28 |
29 | data_format: str
30 | Either 'channels_first' or 'channels_last'.
31 | Running on the cpu requires 'channels_last'.
32 |
33 | batch_size: int
34 | The batch size
35 | """
36 | self.session = session
37 | if data_format=='channels_first':
38 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,6,192,256))
39 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
40 | else:
41 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,6))
42 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
43 |
44 | with tf.variable_scope('netFlow1'):
45 | netFlow1_result = flow_block_demon_original(self.placeholder_image_pair, data_format=data_format )
46 | self.netFlow1_result = netFlow1_result
47 | self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow1_result['predict_flowconf5'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
48 | self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow1_result['predict_flowconf2'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
49 |
50 | with tf.variable_scope('netDM1'):
51 | self.netDM1_result = depthmotion_block_demon_original(
52 | image_pair=self.placeholder_image_pair,
53 | image2_2=self.placeholder_image2_2,
54 | prev_flow2=self.predict_flow2,
55 | prev_flowconf2=self.netFlow1_result['predict_flowconf2'],
56 | data_format=data_format
57 | )
58 |
59 |
60 | def eval(self, image_pair, image2_2):
61 | """Runs the bootstrap network
62 |
63 | image_pair: numpy.ndarray
64 | Array with shape [N,6,192,256] if data_format=='channels_first'
65 |
66 | Image pair in the range [-0.5, 0.5]
67 |
68 | image2_2: numpy.ndarray
69 | Second image at resolution level 2 (downsampled two times)
70 |
71 | The shape for data_format=='channels_first' is [1,3,48,64]
72 |
73 | Returns a dict with the preditions of the bootstrap net
74 | """
75 |
76 | fetches = {
77 | 'predict_flow5': self.predict_flow5,
78 | 'predict_flow2': self.predict_flow2,
79 | 'predict_depth2': self.netDM1_result['predict_depth2'],
80 | 'predict_normal2': self.netDM1_result['predict_normal2'],
81 | 'predict_rotation': self.netDM1_result['predict_rotation'],
82 | 'predict_translation': self.netDM1_result['predict_translation'],
83 | }
84 | feed_dict = {
85 | self.placeholder_image_pair: image_pair,
86 | self.placeholder_image2_2: image2_2,
87 | }
88 | return self.session.run(fetches, feed_dict=feed_dict)
89 |
90 |
91 |
92 | class IterativeNet:
93 | def __init__(self, session, data_format='channels_first', batch_size=1):
94 | """Creates the network
95 |
96 | session: tf.Session
97 | Tensorflow session
98 |
99 | data_format: str
100 | Either 'channels_first' or 'channels_last'.
101 | Running on the cpu requires 'channels_last'.
102 |
103 | batch_size: int
104 | The batch size
105 | """
106 | self.session = session
107 |
108 | intrinsics = np.broadcast_to(np.array([[0.89115971, 1.18821287, 0.5, 0.5]]),(batch_size,4))
109 | self.intrinsics = tf.constant(intrinsics, dtype=tf.float32)
110 |
111 | if data_format == 'channels_first':
112 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,6,192,256))
113 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
114 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,1,48,64))
115 | self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,48,64))
116 | else:
117 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,6))
118 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
119 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,1))
120 | self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,3))
121 |
122 | self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(batch_size,3))
123 | self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(batch_size,3))
124 |
125 | with tf.variable_scope('netFlow2'):
126 | netFlow2_result = flow_block_demon_original(
127 | image_pair=self.placeholder_image_pair,
128 | image2_2=self.placeholder_image2_2,
129 | intrinsics=self.intrinsics,
130 | prev_predictions={
131 | 'predict_depth2': self.placeholder_depth2,
132 | 'predict_normal2': self.placeholder_normal2,
133 | 'predict_rotation': self.placeholder_rotation,
134 | 'predict_translation': self.placeholder_translation,
135 | },
136 | data_format=data_format,
137 | )
138 | self.netFlow2_result = netFlow2_result
139 | self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow2_result['predict_flowconf5'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
140 | self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow2_result['predict_flowconf2'], num_or_size_splits=2, axis=1 if data_format=='channels_first' else 3)
141 |
142 | with tf.variable_scope('netDM2'):
143 | self.netDM2_result = depthmotion_block_demon_original(
144 | image_pair=self.placeholder_image_pair,
145 | image2_2=self.placeholder_image2_2,
146 | prev_flow2=self.predict_flow2,
147 | prev_flowconf2=self.netFlow2_result['predict_flowconf2'],
148 | prev_rotation=self.placeholder_rotation,
149 | prev_translation=self.placeholder_translation,
150 | intrinsics=self.intrinsics,
151 | data_format=data_format,
152 | )
153 |
154 | def eval(self, image_pair, image2_2, depth2, normal2, rotation, translation ):
155 | """Runs the iterative network
156 |
157 | image_pair: numpy.ndarray
158 | Array with shape [N,6,192,256] if data_format=='channels_first'
159 |
160 | Image pair in the range [-0.5, 0.5]
161 |
162 | image2_2: numpy.ndarray
163 | Second image at resolution level 2 (downsampled two times)
164 |
165 | The shape for data_format=='channels_first' is [1,3,48,64]
166 |
167 | depth2: numpy.ndarray
168 | Depth prediction at resolution level 2
169 |
170 | normal2: numpy.ndarray
171 | Normal prediction at resolution level 2
172 |
173 | rotation: numpy.ndarray
174 | Rotation prediction in 3 element angle axis format
175 |
176 | translation: numpy.ndarray
177 | Translation prediction
178 |
179 | Returns a dict with the preditions of the iterative net
180 | """
181 |
182 | fetches = {
183 | 'predict_flow5': self.predict_flow5,
184 | 'predict_flow2': self.predict_flow2,
185 | 'predict_depth2': self.netDM2_result['predict_depth2'],
186 | 'predict_normal2': self.netDM2_result['predict_normal2'],
187 | 'predict_rotation': self.netDM2_result['predict_rotation'],
188 | 'predict_translation': self.netDM2_result['predict_translation'],
189 | }
190 | feed_dict = {
191 | self.placeholder_image_pair: image_pair,
192 | self.placeholder_image2_2: image2_2,
193 | self.placeholder_depth2: depth2,
194 | self.placeholder_normal2: normal2,
195 | self.placeholder_rotation: rotation,
196 | self.placeholder_translation: translation,
197 | }
198 | return self.session.run(fetches, feed_dict=feed_dict)
199 |
200 |
201 |
202 | class RefinementNet:
203 |
204 | def __init__(self, session, data_format='channels_first', batch_size=1):
205 | """Creates the network
206 |
207 | session: tf.Session
208 | Tensorflow session
209 |
210 | data_format: str
211 | Either 'channels_first' or 'channels_last'.
212 | Running on the cpu requires 'channels_last'.
213 |
214 | batch_size: int
215 | The batch size
216 | """
217 | self.session = session
218 |
219 | if data_format == 'channels_first':
220 | self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(batch_size,3,192,256))
221 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,1,48,64))
222 | else:
223 | self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(batch_size,192,256,3))
224 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(batch_size,48,64,1))
225 |
226 |
227 | with tf.variable_scope('netRefine'):
228 | self.netRefine_result = depth_refine_block_demon_original(
229 | image1=self.placeholder_image1,
230 | depthmotion_predictions={
231 | 'predict_depth2': self.placeholder_depth2,
232 | },
233 | data_format=data_format,
234 | )
235 |
236 | def eval(self, image1, depth2):
237 | """Runs the refinement network
238 |
239 | image1: numpy.ndarray
240 | Array with the first image with shape [N,3,192,256] if data_format=='channels_first'
241 |
242 | depth2: numpy.ndarray
243 | Depth prediction at resolution level 2
244 |
245 | Returns a dict with the preditions of the refinement net
246 | """
247 |
248 | fetches = {
249 | 'predict_depth0': self.netRefine_result['predict_depth0'],
250 | }
251 | feed_dict = {
252 | self.placeholder_image1: image1,
253 | self.placeholder_depth2: depth2,
254 | }
255 | return self.session.run(fetches, feed_dict=feed_dict)
256 |
257 |
--------------------------------------------------------------------------------
/python/depthmotionnet/v2/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 |
19 |
--------------------------------------------------------------------------------
/python/depthmotionnet/v2/helpers.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import tensorflow as tf
19 | import lmbspecialops as sops
20 | import numpy as np
21 |
22 | from depthmotionnet.helpers import *
23 |
24 | def conv2d(inputs, num_outputs, kernel_size, data_format, **kwargs):
25 | """Convolution with 'same' padding"""
26 |
27 | return tf.layers.conv2d(
28 | inputs=inputs,
29 | filters=num_outputs,
30 | kernel_size=kernel_size,
31 | kernel_initializer=default_weights_initializer(),
32 | padding='same',
33 | data_format=data_format,
34 | **kwargs,
35 | )
36 |
37 |
38 | def convrelu(inputs, num_outputs, kernel_size, data_format, **kwargs):
39 | """Shortcut for a single convolution+relu
40 |
41 | See tf.layers.conv2d for a description of remaining parameters
42 | """
43 | return conv2d(inputs, num_outputs, kernel_size, data_format, activation=myLeakyRelu, **kwargs)
44 |
45 |
46 | def convrelu2(inputs, num_outputs, kernel_size, name, stride, data_format, **kwargs):
47 | """Shortcut for two convolution+relu with 1D filter kernels
48 |
49 | num_outputs: int or (int,int)
50 | If num_outputs is a tuple then the first element is the number of
51 | outputs for the 1d filter in y direction and the second element is
52 | the final number of outputs.
53 | """
54 | if isinstance(num_outputs,(tuple,list)):
55 | num_outputs_y = num_outputs[0]
56 | num_outputs_x = num_outputs[1]
57 | else:
58 | num_outputs_y = num_outputs
59 | num_outputs_x = num_outputs
60 |
61 | if isinstance(kernel_size,(tuple,list)):
62 | kernel_size_y = kernel_size[0]
63 | kernel_size_x = kernel_size[1]
64 | else:
65 | kernel_size_y = kernel_size
66 | kernel_size_x = kernel_size
67 |
68 | tmp_y = tf.layers.conv2d(
69 | inputs=inputs,
70 | filters=num_outputs_y,
71 | kernel_size=[kernel_size_y,1],
72 | strides=[stride,1],
73 | padding='same',
74 | activation=myLeakyRelu,
75 | kernel_initializer=default_weights_initializer(),
76 | data_format=data_format,
77 | name=name+'y',
78 | **kwargs,
79 | )
80 | return tf.layers.conv2d(
81 | inputs=tmp_y,
82 | filters=num_outputs_x,
83 | kernel_size=[1,kernel_size_x],
84 | strides=[1,stride],
85 | padding='same',
86 | activation=myLeakyRelu,
87 | kernel_initializer=default_weights_initializer(),
88 | data_format=data_format,
89 | name=name+'x',
90 | **kwargs,
91 | )
92 |
93 |
94 | def recursive_median_downsample(inp, iterations):
95 | """Recursively downsamples the input using a 3x3 median filter"""
96 | result = []
97 | for i in range(iterations):
98 | if not result:
99 | tmp_inp = inp
100 | else:
101 | tmp_inp = result[-1]
102 | result.append(sops.median3x3_downsample(tmp_inp))
103 | return tuple(result)
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/python/depthmotionnet/v2/networks.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | from .blocks import *
19 |
20 | class BootstrapNet:
21 | def __init__(self, session):
22 | """Creates the bootstrap network
23 |
24 | session: tf.Session
25 | Tensorflow session
26 |
27 | """
28 | self.session = session
29 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
30 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
31 |
32 |
33 | with tf.variable_scope('netFlow1'):
34 | netFlow1_result = flow_block(self.placeholder_image_pair )
35 | self.netFlow1_result = netFlow1_result
36 | self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow1_result['predict_flowconf5'], num_or_size_splits=2, axis=1)
37 | self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow1_result['predict_flowconf2'], num_or_size_splits=2, axis=1)
38 |
39 | with tf.variable_scope('netDM1'):
40 | self.netDM1_result = depthmotion_block(
41 | image_pair=self.placeholder_image_pair,
42 | image2_2=self.placeholder_image2_2,
43 | prev_flow2=self.predict_flow2,
44 | prev_flowconf2=self.netFlow1_result['predict_flowconf2'],
45 | )
46 |
47 |
48 | def eval(self, image_pair, image2_2):
49 | """Runs the bootstrap network
50 |
51 | image_pair: numpy.ndarray
52 | Array with shape [1,6,192,256] if data_format=='channels_first'
53 |
54 | Image pair in the range [-0.5, 0.5]
55 |
56 | image2_2: numpy.ndarray
57 | Second image at resolution level 2 (downsampled two times)
58 |
59 | The shape for data_format=='channels_first' is [1,3,48,64]
60 |
61 | Returns a dict with the preditions of the bootstrap net
62 | """
63 | fetches = {
64 | 'predict_flow5': self.predict_flow5,
65 | 'predict_flow2': self.predict_flow2,
66 | 'predict_depth2': self.netDM1_result['predict_depth2'],
67 | 'predict_normal2': self.netDM1_result['predict_normal2'],
68 | 'predict_rotation': self.netDM1_result['predict_rotation'],
69 | 'predict_translation': self.netDM1_result['predict_translation'],
70 | }
71 | feed_dict = {
72 | self.placeholder_image_pair: image_pair,
73 | self.placeholder_image2_2: image2_2,
74 | }
75 | return self.session.run(fetches, feed_dict=feed_dict)
76 |
77 |
78 |
79 |
80 | class IterativeNet:
81 | def __init__(self, session):
82 | """Creates the bootstrap network
83 |
84 | session: tf.Session
85 | Tensorflow session
86 |
87 | """
88 | self.session = session
89 |
90 | self.intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]], dtype=tf.float32)
91 |
92 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
93 | self.placeholder_image2_2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
94 |
95 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(1,1,48,64))
96 | self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
97 | self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(1,3))
98 | self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(1,3))
99 |
100 | with tf.variable_scope('netFlow2'):
101 | netFlow2_result = flow_block(
102 | image_pair=self.placeholder_image_pair,
103 | image2_2=self.placeholder_image2_2,
104 | intrinsics=self.intrinsics,
105 | prev_predictions={
106 | 'predict_depth2': self.placeholder_depth2,
107 | 'predict_normal2': self.placeholder_normal2,
108 | 'predict_rotation': self.placeholder_rotation,
109 | 'predict_translation': self.placeholder_translation,
110 | },
111 | )
112 | self.netFlow2_result = netFlow2_result
113 | self.predict_flow5, self.predict_conf5 = tf.split(value=netFlow2_result['predict_flowconf5'], num_or_size_splits=2, axis=1)
114 | self.predict_flow2, self.predict_conf2 = tf.split(value=netFlow2_result['predict_flowconf2'], num_or_size_splits=2, axis=1)
115 |
116 | with tf.variable_scope('netDM2'):
117 | self.netDM2_result = depthmotion_block(
118 | image_pair=self.placeholder_image_pair,
119 | image2_2=self.placeholder_image2_2,
120 | prev_flow2=self.predict_flow2,
121 | prev_flowconf2=self.netFlow2_result['predict_flowconf2'],
122 | intrinsics=self.intrinsics,
123 | prev_rotation=self.placeholder_rotation,
124 | prev_translation=self.placeholder_translation,
125 | )
126 |
127 | def eval(self, image_pair, image2_2, depth2, normal2, rotation, translation ):
128 | """Runs the iterative network
129 |
130 | image_pair: numpy.ndarray
131 | Array with shape [1,6,192,256]
132 |
133 | Image pair in the range [-0.5, 0.5]
134 |
135 | image2_2: numpy.ndarray
136 | Second image at resolution level 2 (downsampled two times)
137 |
138 | The shape is [1,3,48,64]
139 |
140 | depth2: numpy.ndarray
141 | Depth prediction at resolution level 2
142 |
143 | normal2: numpy.ndarray
144 | Normal prediction at resolution level 2
145 |
146 | rotation: numpy.ndarray
147 | Rotation prediction in 3 element angle axis format
148 |
149 | translation: numpy.ndarray
150 | Translation prediction
151 |
152 | Returns a dict with the preditions of the iterative net
153 | """
154 |
155 |
156 | fetches = {
157 | 'predict_flow5': self.predict_flow5,
158 | 'predict_flow2': self.predict_flow2,
159 | 'predict_depth2': self.netDM2_result['predict_depth2'],
160 | 'predict_normal2': self.netDM2_result['predict_normal2'],
161 | 'predict_rotation': self.netDM2_result['predict_rotation'],
162 | 'predict_translation': self.netDM2_result['predict_translation'],
163 | }
164 | feed_dict = {
165 | self.placeholder_image_pair: image_pair,
166 | self.placeholder_image2_2: image2_2,
167 | self.placeholder_depth2: depth2,
168 | self.placeholder_normal2: normal2,
169 | self.placeholder_rotation: rotation,
170 | self.placeholder_translation: translation,
171 | }
172 | return self.session.run(fetches, feed_dict=feed_dict)
173 |
174 |
175 |
176 |
177 | class RefinementNet:
178 |
179 | def __init__(self, session):
180 | """Creates the network
181 |
182 | session: tf.Session
183 | Tensorflow session
184 |
185 | """
186 |
187 | self.session = session
188 | self.placeholder_image_pair = tf.placeholder(dtype=tf.float32, shape=(1,6,192,256))
189 | self.placeholder_image1 = tf.placeholder(dtype=tf.float32, shape=(1,3,192,256))
190 | self.placeholder_depth2 = tf.placeholder(dtype=tf.float32, shape=(1,1,48,64))
191 | self.placeholder_normal2 = tf.placeholder(dtype=tf.float32, shape=(1,3,48,64))
192 | self.placeholder_rotation = tf.placeholder(dtype=tf.float32, shape=(1,3))
193 | self.placeholder_translation = tf.placeholder(dtype=tf.float32, shape=(1,3))
194 |
195 | with tf.variable_scope('netRefine'):
196 | self.netRefine_result = depth_refine_block(
197 | image1=self.placeholder_image1,
198 | depthmotion_predictions={
199 | 'predict_depth2': self.placeholder_depth2,
200 | 'predict_normal2': self.placeholder_normal2,
201 | },
202 | )
203 |
204 | def eval(self, image1, depth2, normal2):
205 | """Runs the refinement network
206 |
207 | image1: numpy.ndarray
208 | Array with the first image with shape [1,3,192,256]
209 |
210 | depth2: numpy.ndarray
211 | Depth prediction at resolution level 2
212 |
213 | normal2: numpy.ndarray
214 | normal prediction at resolution level 2
215 |
216 | Returns a dict with the preditions of the refinement net
217 | """
218 |
219 | fetches = {
220 | 'predict_depth0': self.netRefine_result['predict_depth0'],
221 | 'predict_normal0': self.netRefine_result['predict_normal0'],
222 | }
223 | feed_dict = {
224 | self.placeholder_image1: image1,
225 | self.placeholder_depth2: depth2,
226 | self.placeholder_normal2: normal2,
227 | }
228 | return self.session.run(fetches, feed_dict=feed_dict)
229 |
230 |
231 |
232 |
233 |
--------------------------------------------------------------------------------
/python/depthmotionnet/vis.py:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import pyximport; pyximport.install()
19 | import numpy as np
20 | from .helpers import angleaxis_to_rotation_matrix
21 |
22 |
23 | def compute_point_cloud_from_depthmap( depth, K, R, t, normals=None, colors=None ):
24 | """Creates a point cloud numpy array and optional normals and colors arrays
25 |
26 | depth: numpy.ndarray
27 | 2d array with depth values
28 |
29 | K: numpy.ndarray
30 | 3x3 matrix with internal camera parameters
31 |
32 | R: numpy.ndarray
33 | 3x3 rotation matrix
34 |
35 | t: numpy.ndarray
36 | 3d translation vector
37 |
38 | normals: numpy.ndarray
39 | optional array with normal vectors
40 |
41 | colors: numpy.ndarray
42 | optional RGB image with the same dimensions as the depth map.
43 | The shape is (3,h,w) with type uint8
44 |
45 | """
46 | from .vis_cython import compute_point_cloud_from_depthmap as _compute_point_cloud_from_depthmap
47 | return _compute_point_cloud_from_depthmap(depth, K, R, t, normals, colors)
48 |
49 |
50 | def create_camera_polydata(R, t, only_polys=False):
51 | """Creates a vtkPolyData object with a camera mesh"""
52 | import vtk
53 | cam_points = np.array([
54 | [0, 0, 0],
55 | [-1,-1, 1.5],
56 | [ 1,-1, 1.5],
57 | [ 1, 1, 1.5],
58 | [-1, 1, 1.5],
59 | [-0.5, 1, 1.5],
60 | [ 0.5, 1, 1.5],
61 | [ 0,1.2,1.5],
62 | [ 1,-0.5,1.5],
63 | [ 1, 0.5,1.5],
64 | [ 1.2, 0, 1.5]]
65 | )
66 | cam_points = (0.25*cam_points - t).dot(R)
67 |
68 | vpoints = vtk.vtkPoints()
69 | vpoints.SetNumberOfPoints(cam_points.shape[0])
70 | for i in range(cam_points.shape[0]):
71 | vpoints.SetPoint(i, cam_points[i])
72 | vpoly = vtk.vtkPolyData()
73 | vpoly.SetPoints(vpoints)
74 |
75 | poly_cells = vtk.vtkCellArray()
76 |
77 | if not only_polys:
78 | line_cells = vtk.vtkCellArray()
79 |
80 | line_cells.InsertNextCell( 5 );
81 | line_cells.InsertCellPoint( 1 );
82 | line_cells.InsertCellPoint( 2 );
83 | line_cells.InsertCellPoint( 3 );
84 | line_cells.InsertCellPoint( 4 );
85 | line_cells.InsertCellPoint( 1 );
86 |
87 | line_cells.InsertNextCell( 3 );
88 | line_cells.InsertCellPoint( 1 );
89 | line_cells.InsertCellPoint( 0 );
90 | line_cells.InsertCellPoint( 2 );
91 |
92 | line_cells.InsertNextCell( 3 );
93 | line_cells.InsertCellPoint( 3 );
94 | line_cells.InsertCellPoint( 0 );
95 | line_cells.InsertCellPoint( 4 );
96 |
97 | # x-axis indicator
98 | line_cells.InsertNextCell( 3 );
99 | line_cells.InsertCellPoint( 8 );
100 | line_cells.InsertCellPoint( 10 );
101 | line_cells.InsertCellPoint( 9 );
102 | vpoly.SetLines(line_cells)
103 | else:
104 | # left
105 | poly_cells.InsertNextCell( 3 );
106 | poly_cells.InsertCellPoint( 0 );
107 | poly_cells.InsertCellPoint( 1 );
108 | poly_cells.InsertCellPoint( 4 );
109 |
110 | # right
111 | poly_cells.InsertNextCell( 3 );
112 | poly_cells.InsertCellPoint( 0 );
113 | poly_cells.InsertCellPoint( 3 );
114 | poly_cells.InsertCellPoint( 2 );
115 |
116 | # top
117 | poly_cells.InsertNextCell( 3 );
118 | poly_cells.InsertCellPoint( 0 );
119 | poly_cells.InsertCellPoint( 4 );
120 | poly_cells.InsertCellPoint( 3 );
121 |
122 | # bottom
123 | poly_cells.InsertNextCell( 3 );
124 | poly_cells.InsertCellPoint( 0 );
125 | poly_cells.InsertCellPoint( 2 );
126 | poly_cells.InsertCellPoint( 1 );
127 |
128 | # x-axis indicator
129 | poly_cells.InsertNextCell( 3 );
130 | poly_cells.InsertCellPoint( 8 );
131 | poly_cells.InsertCellPoint( 10 );
132 | poly_cells.InsertCellPoint( 9 );
133 |
134 | # up vector (y-axis)
135 | poly_cells.InsertNextCell( 3 );
136 | poly_cells.InsertCellPoint( 5 );
137 | poly_cells.InsertCellPoint( 6 );
138 | poly_cells.InsertCellPoint( 7 );
139 |
140 | vpoly.SetPolys(poly_cells)
141 |
142 | return vpoly
143 |
144 |
145 | def create_camera_actor(R, t):
146 | """Creates a vtkActor object with a camera mesh"""
147 | import vtk
148 | vpoly = create_camera_polydata(R, t)
149 | mapper = vtk.vtkPolyDataMapper()
150 | mapper.SetInputData(vpoly)
151 |
152 | actor = vtk.vtkActor()
153 | actor.SetMapper(mapper)
154 | actor.GetProperty().LightingOff()
155 | actor.GetProperty().SetLineWidth(2)
156 |
157 | return actor
158 |
159 |
160 | def create_pointcloud_polydata(points, colors=None):
161 | """Creates a vtkPolyData object with the point cloud from numpy arrays
162 |
163 | points: numpy.ndarray
164 | pointcloud with shape (n,3)
165 |
166 | colors: numpy.ndarray
167 | uint8 array with colors for each point. shape is (n,3)
168 |
169 | Returns vtkPolyData object
170 | """
171 | import vtk
172 | vpoints = vtk.vtkPoints()
173 | vpoints.SetNumberOfPoints(points.shape[0])
174 | for i in range(points.shape[0]):
175 | vpoints.SetPoint(i, points[i])
176 | vpoly = vtk.vtkPolyData()
177 | vpoly.SetPoints(vpoints)
178 |
179 | if not colors is None:
180 | vcolors = vtk.vtkUnsignedCharArray()
181 | vcolors.SetNumberOfComponents(3)
182 | vcolors.SetName("Colors")
183 | vcolors.SetNumberOfTuples(points.shape[0])
184 | for i in range(points.shape[0]):
185 | vcolors.SetTuple3(i ,colors[i,0],colors[i,1], colors[i,2])
186 | vpoly.GetPointData().SetScalars(vcolors)
187 |
188 | vcells = vtk.vtkCellArray()
189 |
190 | for i in range(points.shape[0]):
191 | vcells.InsertNextCell(1)
192 | vcells.InsertCellPoint(i)
193 |
194 | vpoly.SetVerts(vcells)
195 |
196 | return vpoly
197 |
198 |
199 |
200 | def create_pointcloud_actor(points, colors=None):
201 | """Creates a vtkActor with the point cloud from numpy arrays
202 |
203 | points: numpy.ndarray
204 | pointcloud with shape (n,3)
205 |
206 | colors: numpy.ndarray
207 | uint8 array with colors for each point. shape is (n,3)
208 |
209 | Returns vtkActor object
210 | """
211 | import vtk
212 | vpoly = create_pointcloud_polydata(points, colors)
213 | mapper = vtk.vtkPolyDataMapper()
214 | mapper.SetInputData(vpoly)
215 |
216 | actor = vtk.vtkActor()
217 | actor.SetMapper(mapper)
218 | actor.GetProperty().SetPointSize(3)
219 |
220 | return actor
221 |
222 |
223 | def visualize_prediction( inverse_depth, intrinsics=None, normals=None, rotation=None, translation=None, image=None ):
224 | """Visualizes the network predictions
225 |
226 | inverse_depth: numpy.ndarray
227 | 2d array with the inverse depth values with shape (h,w)
228 |
229 | intrinsics: numpy.ndarray
230 | 4 element vector with the normalized intrinsic parameters with shape
231 | (4,)
232 |
233 | normals: numpy.ndarray
234 | normal map with shape (3,h,w)
235 |
236 | rotation: numpy.ndarray
237 | rotation in axis angle format with 3 elements with shape (3,)
238 |
239 | translation: numpy.ndarray
240 | translation vector with shape (3,)
241 |
242 | image: numpy.ndarray
243 | Image with shape (3,h,w) in the range [-0.5,0.5].
244 | """
245 | import vtk
246 | depth = (1/inverse_depth).squeeze()
247 |
248 | w = depth.shape[-1]
249 | h = depth.shape[-2]
250 |
251 | if intrinsics is None:
252 | intrinsics = np.array([0.89115971, 1.18821287, 0.5, 0.5]) # sun3d intrinsics
253 |
254 | K = np.eye(3)
255 | K[0,0] = intrinsics[0]*w
256 | K[1,1] = intrinsics[1]*h
257 | K[0,2] = intrinsics[2]*w
258 | K[1,2] = intrinsics[3]*h
259 |
260 | R1 = np.eye(3)
261 | t1 = np.zeros((3,))
262 |
263 | if not rotation is None and not translation is None:
264 | R2 = angleaxis_to_rotation_matrix(rotation.squeeze())
265 | t2 = translation.squeeze()
266 | else:
267 | R2 = np.eye(3)
268 | t2 = np.zeros((3,))
269 |
270 | if not normals is None:
271 | n = normals.squeeze()
272 | else:
273 | n = None
274 |
275 | if not image is None:
276 | img = ((image+0.5)*255).astype(np.uint8)
277 | else:
278 | img = None
279 |
280 | pointcloud = compute_point_cloud_from_depthmap(depth, K, R1, t1, n, img)
281 |
282 | renderer = vtk.vtkRenderer()
283 | renderer.SetBackground(0, 0, 0)
284 |
285 | pointcloud_actor = create_pointcloud_actor(
286 | points=pointcloud['points'],
287 | colors=pointcloud['colors'] if 'colors' in pointcloud else None,
288 | )
289 | renderer.AddActor(pointcloud_actor)
290 |
291 | cam1_actor = create_camera_actor(R1,t1)
292 | renderer.AddActor(cam1_actor)
293 |
294 | cam2_actor = create_camera_actor(R2,t2)
295 | renderer.AddActor(cam2_actor)
296 |
297 | axes = vtk.vtkAxesActor()
298 | axes.GetXAxisCaptionActor2D().SetHeight(0.05)
299 | axes.GetYAxisCaptionActor2D().SetHeight(0.05)
300 | axes.GetZAxisCaptionActor2D().SetHeight(0.05)
301 | axes.SetCylinderRadius(0.03)
302 | axes.SetShaftTypeToCylinder()
303 | renderer.AddActor(axes)
304 |
305 | renwin = vtk.vtkRenderWindow()
306 | renwin.SetWindowName("Point Cloud Viewer")
307 | renwin.SetSize(800,600)
308 | renwin.AddRenderer(renderer)
309 |
310 |
311 | # An interactor
312 | interactor = vtk.vtkRenderWindowInteractor()
313 | interstyle = vtk.vtkInteractorStyleTrackballCamera()
314 | interactor.SetInteractorStyle(interstyle)
315 | interactor.SetRenderWindow(renwin)
316 |
317 | # Start
318 | interactor.Initialize()
319 | interactor.Start()
320 |
321 |
322 | def export_prediction_to_ply( output_prefix, inverse_depth, intrinsics=None, normals=None, rotation=None, translation=None, image=None ):
323 | """Exports the network predictions to ply files meant for external visualization
324 |
325 | inverse_depth: numpy.ndarray
326 | 2d array with the inverse depth values with shape (h,w)
327 |
328 | intrinsics: numpy.ndarray
329 | 4 element vector with the normalized intrinsic parameters with shape
330 | (4,)
331 |
332 | normals: numpy.ndarray
333 | normal map with shape (3,h,w)
334 |
335 | rotation: numpy.ndarray
336 | rotation in axis angle format with 3 elements with shape (3,)
337 |
338 | translation: numpy.ndarray
339 | translation vector with shape (3,)
340 |
341 | image: numpy.ndarray
342 | Image with shape (3,h,w) in the range [-0.5,0.5].
343 | """
344 | import vtk
345 | depth = (1/inverse_depth).squeeze()
346 |
347 | w = depth.shape[-1]
348 | h = depth.shape[-2]
349 |
350 | if intrinsics is None:
351 | intrinsics = np.array([0.89115971, 1.18821287, 0.5, 0.5]) # sun3d intrinsics
352 |
353 | K = np.eye(3)
354 | K[0,0] = intrinsics[0]*w
355 | K[1,1] = intrinsics[1]*h
356 | K[0,2] = intrinsics[2]*w
357 | K[1,2] = intrinsics[3]*h
358 |
359 | R1 = np.eye(3)
360 | t1 = np.zeros((3,))
361 |
362 | if not rotation is None and not translation is None:
363 | R2 = angleaxis_to_rotation_matrix(rotation.squeeze())
364 | t2 = translation.squeeze()
365 | else:
366 | R2 = np.eye(3)
367 | t2 = np.zeros((3,))
368 |
369 | if not normals is None:
370 | n = normals.squeeze()
371 | else:
372 | n = None
373 |
374 | if not image is None:
375 | img = ((image+0.5)*255).astype(np.uint8)
376 | else:
377 | img = None
378 |
379 | pointcloud = compute_point_cloud_from_depthmap(depth, K, R1, t1, n, img)
380 |
381 | pointcloud_polydata = create_pointcloud_polydata(
382 | points=pointcloud['points'],
383 | colors=pointcloud['colors'] if 'colors' in pointcloud else None,
384 | )
385 | plywriter = vtk.vtkPLYWriter()
386 | plywriter.SetFileName(output_prefix + 'points.ply')
387 | plywriter.SetInputData(pointcloud_polydata)
388 | plywriter.SetArrayName('Colors')
389 | plywriter.Write()
390 |
391 | cam1_polydata = create_camera_polydata(R1,t1, True)
392 | plywriter = vtk.vtkPLYWriter()
393 | plywriter.SetFileName(output_prefix + 'cam1.ply')
394 | plywriter.SetInputData(cam1_polydata)
395 | plywriter.Write()
396 |
397 | cam2_polydata = create_camera_polydata(R2,t2, True)
398 | plywriter = vtk.vtkPLYWriter()
399 | plywriter.SetFileName(output_prefix + 'cam2.ply')
400 | plywriter.SetInputData(cam2_polydata)
401 | plywriter.Write()
402 |
403 |
404 |
405 | def transform_pointcloud_points(points, T):
406 | """Transforms the pointcloud with T
407 |
408 | points: numpy.ndarray
409 | pointcloud with shape (n,3)
410 |
411 | T: numpy.ndarray
412 | The 4x4 transformation
413 |
414 | Returns the transformed points
415 | """
416 | tmp = np.empty((points.shape[0],points.shape[1]+1),dtype=points.dtype)
417 | tmp[:,0:3] = points
418 | tmp[:,3] = 1
419 | return T.dot(tmp.transpose())[0:3].transpose()
420 |
421 |
--------------------------------------------------------------------------------
/python/depthmotionnet/vis_cython.pyx:
--------------------------------------------------------------------------------
1 | #
2 | # DeMoN - Depth Motion Network
3 | # Copyright (C) 2017 Benjamin Ummenhofer, Huizhong Zhou
4 | #
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU General Public License as published by
7 | # the Free Software Foundation, either version 3 of the License, or
8 | # (at your option) any later version.
9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with this program. If not, see .
17 | #
18 | import numpy as np
19 | cimport numpy as np
20 | cimport cython
21 | from libc.math cimport isfinite
22 |
23 |
24 | @cython.boundscheck(False)
25 | cdef _compute_point_cloud_from_depthmap(
26 | np.ndarray[np.float32_t, ndim=2] depth,
27 | np.ndarray[np.float32_t, ndim=3] normals,
28 | np.ndarray[np.uint8_t, ndim=3] colors,
29 | np.ndarray[np.float32_t, ndim=2] K,
30 | np.ndarray[np.float32_t, ndim=2] R_arr,
31 | np.ndarray[np.float32_t, ndim=1] t_arr ):
32 |
33 | cdef int valid_count
34 | valid_count = 0
35 | cdef int x, y
36 | cdef int index
37 | cdef int width, height
38 | cdef float d
39 | cdef float tmp[3]
40 | cdef float X[3]
41 | cdef float inv_fx = 1/K[0,0]
42 | cdef float inv_fy = 1/K[1,1]
43 | cdef float cx = K[0,2]
44 | cdef float cy = K[1,2]
45 | cdef float h = 0.5
46 | cdef float [:,:] R = R_arr
47 | cdef float [:] t = t_arr
48 |
49 | width = depth.shape[1]
50 | height = depth.shape[0]
51 |
52 | for y in range(height):
53 | for x in range(width):
54 | d = depth[y,x]
55 | if isfinite(d) and d > 0:
56 | valid_count += 1
57 |
58 | cdef np.ndarray[np.float32_t, ndim=2] points_arr = np.empty((valid_count,3), dtype=np.float32)
59 | cdef float [:,:] points = points_arr
60 | cdef np.ndarray[np.float32_t,ndim=2] normals_attr_arr = np.empty((valid_count,3), dtype=np.float32)
61 | cdef float [:,:] normals_attr = normals_attr_arr
62 | cdef np.ndarray[np.uint8_t,ndim=2] colors_attr_arr = np.empty((valid_count,3), dtype=np.uint8)
63 | cdef unsigned char [:,:] colors_attr = colors_attr_arr
64 |
65 | index = 0
66 | for y in range(height):
67 | for x in range(width):
68 | d = depth[y,x]
69 | if isfinite(d) and d > 0:
70 | tmp[0] = d*((x+h) - cx)*inv_fx - t[0]
71 | tmp[1] = d*((y+h) - cy)*inv_fy - t[1]
72 | tmp[2] = d - t[2]
73 | X[0] = R[0,0]*tmp[0] + R[1,0]*tmp[1] + R[2,0]*tmp[2]
74 | X[1] = R[0,1]*tmp[0] + R[1,1]*tmp[1] + R[2,1]*tmp[2]
75 | X[2] = R[0,2]*tmp[0] + R[1,2]*tmp[1] + R[2,2]*tmp[2]
76 | points[index,0] = X[0]
77 | points[index,1] = X[1]
78 | points[index,2] = X[2]
79 | index += 1
80 |
81 | result = {'points':points_arr}
82 |
83 | if normals.shape[0] > 0:
84 | index = 0
85 | for y in range(height):
86 | for x in range(width):
87 | d = depth[y,x]
88 | if np.isfinite(d) and d > 0.0:
89 | tmp[0] = normals[0,y,x]
90 | tmp[1] = normals[1,y,x]
91 | tmp[2] = normals[2,y,x]
92 | X[0] = R[0,0]*tmp[0] + R[1,0]*tmp[1] + R[2,0]*tmp[2]
93 | X[1] = R[0,1]*tmp[0] + R[1,1]*tmp[1] + R[2,1]*tmp[2]
94 | X[2] = R[0,2]*tmp[0] + R[1,2]*tmp[1] + R[2,2]*tmp[2]
95 | normals_attr[index,0] = X[0]
96 | normals_attr[index,1] = X[1]
97 | normals_attr[index,2] = X[2]
98 | index += 1
99 |
100 | result['normals'] = normals_attr_arr
101 |
102 | if colors.shape[0] > 0:
103 | index = 0
104 | for y in range(height):
105 | for x in range(width):
106 | d = depth[y,x]
107 | if np.isfinite(d) and d > 0.0:
108 | colors_attr[index,0] = colors[0,y,x]
109 | colors_attr[index,1] = colors[1,y,x]
110 | colors_attr[index,2] = colors[2,y,x]
111 | index += 1
112 |
113 | result['colors'] = colors_attr_arr
114 |
115 | return result
116 |
117 |
118 |
119 | def compute_point_cloud_from_depthmap( depth, K, R, t, normals=None, colors=None ):
120 | """Creates a point cloud numpy array and optional normals and colors arrays
121 |
122 | depth: numpy.ndarray
123 | 2d array with depth values
124 |
125 | K: numpy.ndarray
126 | 3x3 matrix with internal camera parameters
127 |
128 | R: numpy.ndarray
129 | 3x3 rotation matrix
130 |
131 | t: numpy.ndarray
132 | 3d translation vector
133 |
134 | normals: numpy.ndarray
135 | optional array with normal vectors
136 |
137 | colors: numpy.ndarray
138 | optional RGB image with the same dimensions as the depth map.
139 | The shape is (3,h,w) with type uint8
140 |
141 | """
142 | assert colors.dtype == np.uint8 if not colors is None else True
143 |
144 | # make sure the dims and type are ok for the depth
145 | if depth.dtype != np.float32:
146 | _depth = depth.astype(np.float32)
147 | else:
148 | _depth = depth
149 |
150 | if len(_depth.shape) > 2:
151 | _depth = _depth.squeeze()
152 | if len(_depth.shape) > 2:
153 | raise ValueError("wrong number of dimensions for depth")
154 |
155 | # sanity checks
156 | if normals is None:
157 | normals = np.empty((0,0,0),dtype=np.float32)
158 | elif normals.shape[1:] != _depth.shape:
159 | raise ValueError("shape mismatch: normals {0}, depth {1}".format(normals.shape, depth.shape))
160 |
161 | if normals.dtype != np.float32:
162 | _normals = normals.astype(np.float32)
163 | else:
164 | _normals = normals
165 |
166 | if colors is None:
167 | colors_arr = np.empty((0,0,0),dtype=np.uint8)
168 | else:
169 | colors_arr = colors
170 | if colors_arr.shape[1:] != _depth.shape:
171 | raise ValueError("shape mismatch: colors {0}, depth {1}".format(colors_arr.shape, depth.shape))
172 |
173 | return _compute_point_cloud_from_depthmap(_depth, _normals, colors_arr, K.astype(np.float32), R.astype(np.float32), t.astype(np.float32))
174 |
175 |
--------------------------------------------------------------------------------
/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/teaser.png
--------------------------------------------------------------------------------
/training/README.md:
--------------------------------------------------------------------------------
1 | # Network Training
2 |
3 | Code for training the network can be found in the following directories:
4 |
5 | ```
6 | training/v2
7 | python/depthmotionnet/v2
8 | ```
9 |
10 | * ```training/v2``` contains the training script ```training.py```.
11 | * ```python/depthmotionnet/v2``` contains the definition of the network parts for version _v2_ (```blocks.py```) and loss functions (```losses.py```) as well as code for easy instantiating of the network.
12 |
13 |
14 | ## Training Evolutions
15 |
16 | The training process is made up of several stages called _evolutions_.
17 | To train DeMoN we use 6 evolutions (```0_flow1, 1_dm1, 2_flow2, 3_flow2, 4_iterative, 5_refine```).
18 | The instantiated and actively training network parts are visualized below:
19 | 
20 |
21 |
22 | ## Prerequisites
23 |
24 | * The python library **tfutils** for managing the training evolutions must be downloaded and added to the python path (https://github.com/lmb-freiburg/tfutils.git)
25 | * The ```python``` directory in the demon root is added to the python path
26 | * **lmbspecialops** is built and added to the python path.
27 | * **multivih5datareaderop** is built (see [readme](../multivih5datareaderop/README.md))
28 | * Training data sets are available in the folder ```datasets/training_data```.
29 | The script ```datasets/download_traindata.sh``` can be used to download the data sets.
30 |
31 |
32 | The following script sets up all required libraries from scratch in a virtualenv ```demon_venv``` managed with ```pew```:
33 |
34 | ```bash
35 | pew new demon_venv # create new virtualenv
36 | # the following commands are executed within the demon_venv virtualenv
37 |
38 | # install python module dependencies
39 | pip install tensorflow-gpu # or 'tensorflow' without gpu support
40 | pip install pillow # for reading images
41 | pip install matplotlib # required for visualizing depth maps
42 | pip install Cython # required for visualizing point clouds
43 | pip install h5py
44 | pip install minieigen
45 | pip install pandas
46 | pip install scipy
47 | pip install scikit-image
48 | pip install xarray
49 |
50 | # install tfutils library
51 | git clone https://github.com/lmb-freiburg/tfutils.git
52 | pew add $PWD/tfutils/python # add to python path
53 |
54 | # clone demon repo with submodules
55 | git clone --recursive https://github.com/lmb-freiburg/demon.git
56 | DEMON_DIR=$PWD/demon
57 | pew add $DEMON_DIR/python # add to python path
58 |
59 | # build lmbspecialops
60 | mkdir $DEMON_DIR/lmbspecialops/build
61 | cd $DEMON_DIR/lmbspecialops/build
62 | cmake .. # add '-DBUILD_WITH_CUDA=OFF' to build without gpu support
63 | # (optional) run 'ccmake .' here to adjust settings for gpu code generation
64 | make
65 | pew add $DEMON_DIR/lmbspecialops/python # add to python path
66 |
67 |
68 | # build multivih5datareaderop (requires OpenCV)
69 | mkdir $DEMON_DIR/build
70 | cd $DEMON_DIR/build
71 | cmake ..
72 | make
73 |
74 | # download training data
75 | cd $DEMON_DIR/datasets
76 | ./download_traindata.sh
77 |
78 | ```
79 |
80 | ## Training Script
81 |
82 | ```bash
83 | cd $DEMON_DIR/training/v2
84 | pew in demon_venv
85 | python training.py
86 | ```
87 | The training script creates the folder ```$DEMON_DIR/training/v2/training```.
88 | Once training is complete the last snapshot can be found as ```$DEMON_DIR/training/v2/training/5_refine/checkpoints/snapshot-250000.*```
89 |
90 | The location of the training data can be adjusted in the file ```training.py```.
91 |
92 |
--------------------------------------------------------------------------------
/training/network_training_evolutions.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/demon/ea57419e6fc37583ef4c66d709fabed92815a41f/training/network_training_evolutions.gif
--------------------------------------------------------------------------------
/weights/.gitignore:
--------------------------------------------------------------------------------
1 | *.data-*-of-*
2 | *.index
3 | *.tgz
4 |
--------------------------------------------------------------------------------
/weights/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget https://lmb.informatik.uni-freiburg.de/people/ummenhof/depthmotionnet/demon_original_weights.tgz
3 | tar -xvf demon_original_weights.tgz
4 |
--------------------------------------------------------------------------------