├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── configs
    ├── fusion
    │   ├── corbs.yaml
    │   ├── replica.yaml
    │   └── scene3d.yaml
    └── routing
    │   └── replica.yaml
├── data
    ├── mvs_depth_estimation
    │   ├── downsample_dataset.py
    │   ├── move_data.py
    │   ├── reconstruct_colmap_slurm_copyroom.sh
    │   ├── reconstruct_colmap_slurm_stonewall.sh
    │   ├── setup_colmap.py
    │   └── setup_colmap_corbs.py
    └── save_every_tenth_frame.py
├── dataset
    ├── __init__.py
    ├── associate.py
    ├── colmap.py
    ├── corbs.py
    ├── replica.py
    └── scene3d.py
├── images
    └── architecture.png
├── lists
    ├── corbs
    │   ├── desk.txt
    │   └── human.txt
    ├── replica
    │   ├── test_hotel_0.txt
    │   ├── test_office_0.txt
    │   ├── test_office_4.txt
    │   ├── test_office_4_hotel_0_office_0.txt
    │   ├── train.txt
    │   └── val.txt
    └── scene3d
    │   ├── copyroom.txt
    │   └── stonewall.txt
├── models
    ├── fusion
    │   ├── sgm_psmnet
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── sgm_psmnet_routedfusion
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── sgm_psmnet_routing
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── sgm_psmnet_routing_routedfusion
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_mvs_corbs
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_mvs_scene3d
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_psmnet
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_psmnet_routedfusion
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_psmnet_routing
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   ├── tof_psmnet_routing_routedfusion
    │   │   └── model
    │   │   │   └── best.pth.tar
    │   └── tof_tof_scene3d_collab_rec
    │   │   └── model
    │   │       └── best.pth.tar
    └── routing
    │   ├── psmnet
    │       └── model
    │       │   └── best.pth.tar
    │   ├── sgm
    │       └── model
    │       │   └── best.pth.tar
    │   ├── sgm_psmnet
    │       └── model
    │       │   └── best.pth.tar
    │   ├── tof
    │       └── model
    │       │   └── best.pth.tar
    │   └── tof_psmnet
    │       └── model
    │           └── best.pth.tar
├── modules
    ├── __init__.py
    ├── database.py
    ├── extractor.py
    ├── filter_pipeline.py
    ├── filtering_net.py
    ├── fuse_pipeline.py
    ├── integrator.py
    ├── model.py
    ├── model_features.py
    ├── pipeline.py
    ├── routing.py
    └── voxelgrid.py
├── requirements.txt
├── test_fusion.py
├── test_routing.py
├── train_fusion.py
├── train_routing.py
├── utils
    ├── __init__.py
    ├── loading.py
    ├── loss.py
    ├── metrics.py
    ├── saving.py
    ├── setup.py
    ├── transform.py
    └── visualize_sensor_weighting.py
└── videos
    ├── create_depth_video.py
    └── render_option.json


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # ignore folders and files
132 | videos
133 | .idea
134 | .vscode
135 | utils/invert_colormap.py
136 | compute_attention_similarity.py
137 | scripts/log
138 | models
139 | debug_mc.py
140 | wandb
141 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | 
2 | [submodule "deps/evaluate_3d_reconstruction"]
3 | 	path = deps/evaluate_3d_reconstruction
4 | 	url = https://github.com/tfy14esa/evaluate_3d_reconstruction.git
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022, Erik Sandström
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Before commercial usage of source code, the copyright holder must be contacted.
 8 | 
 9 | 2. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 3. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 4. Neither the name of ETH Zurich nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/configs/fusion/corbs.yaml:
--------------------------------------------------------------------------------
  1 | SETTINGS:
  2 |   gpu: True # run on cpu or gpu
  3 |   experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved.
  4 |   eval_freq: 442 # how many global steps before evaluation and saving the model
  5 |   log_freq: 442 # how many global steps before logging the training loss
  6 |   seed: 52 # seed for shuffling operations
  7 | FUSION_MODEL:
  8 |   use_fusion_net: False # use learned fusion net as done by RoutedFusion
  9 |   fixed: True # use fixed or finetune weights when use_fusion_net is true
 10 |   output_scale: 1.0 # output scale from fusion net (same as RoutedFusion)
 11 |   n_points: 11 # extraction band samples
 12 |   n_tail_points: 9 # samples along the ray which update the grid
 13 |   n_points_tof: 11 # tof specific extraction band samples
 14 |   n_tail_points_tof: 9
 15 |   n_points_stereo: 11 # stereo specific extraction band samples
 16 |   n_tail_points_stereo: 9
 17 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 18 |   n_empty_space_voting: 0 # samples with free space update
 19 |   max_weight: 500 # max weight
 20 |   extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation
 21 | FEATURE_MODEL:
 22 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 23 |   stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net
 24 |   network: resnet # anything else but "resnet" will give a standard network
 25 |   use_feature_net: True # use learned feature net. When false, yields the depth as feature
 26 |   append_depth: True # append depth to feature vector
 27 |   w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net
 28 |   w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net
 29 |   w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 
 30 |   normalize: True # normalize the feature vector
 31 |   fixed: False # fix weights of feature net - when true does not declare an optimzer
 32 |   n_features: 4 # output dimension from feature net
 33 |   n_layers: 6 # layers 
 34 |   enc_activation: torch.nn.Tanh()
 35 |   dec_activation: torch.nn.Tanh()
 36 |   depth: True # concatenate depth as input to feature net
 37 |   layernorm: False
 38 | ROUTING_MODEL:
 39 |   contraction: 64 # hidden dimension of routing network
 40 |   normalization: False # apply batch normalization
 41 | FILTERING_MODEL:
 42 |   do: True  # whether to do sensor fusion or not
 43 |   model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion
 44 |   CONV3D_MODEL:
 45 |     fixed: False # fix network weights
 46 |     outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 
 47 |     features_to_weight_head: True # feed 2D features directly to alpha head
 48 |     sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style)
 49 |     weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style)
 50 |     tanh_weight: True # apply tanh-transform to weight counter
 51 |     inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true
 52 |     bias: True # bias in alpha head
 53 |     chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet
 54 |     activation: torch.nn.ReLU()
 55 |     weighting_complexity: '3layer' # Xlayer
 56 | LOSS:
 57 |   alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates
 58 |   alpha_supervision: False # supervise directly with proxy alpha in 3D. Not available on the corbs dataset.
 59 |   fusion_weight: 6.0 # l1 weight of fusion net
 60 |   grid_weight: 6
 61 |   alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision
 62 | TRAINING:
 63 |   reset_strategy: True # May not make any difference
 64 |   reset_prob: 0.01 # in percent (used if reset_strategy: True)
 65 |   pretrain_filtering_net: False
 66 |   pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets
 67 |   train_batch_size: 1
 68 |   train_shuffle: True
 69 |   val_batch_size: 1
 70 |   val_shuffle: False
 71 |   n_epochs: 1000
 72 |   gradient_clipping: True
 73 | TESTING:
 74 |   mc: 'skimage' # use skimage marching cubes implementation
 75 |   routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model.
 76 |   routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids
 77 |   use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv'
 78 |   eval_single_sensors: True # not applicable when evaluating routedfusion
 79 |   visualize_sensor_weighting: False
 80 |   test_batch_size: 1
 81 |   test_shuffle: False
 82 |   fusion_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_mvs_corbs/model/best.pth.tar # used for conv3d, routedfusion as filtering models 
 83 |   weight_thresholds: [0.0]
 84 | ROUTING:
 85 |   do: False # needs to be false at all times
 86 |   dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold
 87 |   threshold: 0.15 
 88 |   intensity_grad: False # feed grayscale image and its gradient to routing network
 89 | OPTIMIZATION:
 90 |   scheduler:
 91 |     step_size_filtering: 500
 92 |     step_size_fusion: 100
 93 |     gamma_filtering: 0.1
 94 |     gamma_fusion: 0.5
 95 |   lr_filtering: 1.e-04
 96 |   lr_fusion: 1.e-04
 97 |   rho: 0.95 # rmsprop fusion net
 98 |   eps: 1.e-08 # rmsprop fusion net
 99 |   momentum: 0.5 # rmsprop fusion net
100 |   weight_decay: 0.00 # rmsprop fusion net
101 |   accumulation_steps: 20 # note that this is normally 8
102 | DATA:
103 |   collaborative_reconstruction: False # multi-agent reconstruction
104 |   frames_per_chunk: 100 # used when colaborative_reconstruction: true
105 |   mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov
106 |   # compared to the color camera of the azure kinect
107 |   mask_stereo_width: 10 # in pixels (achieves fov 84.32)
108 |   mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52
109 |   mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35
110 |   mask_width: 10 # general sensor
111 |   mask_height: 10 # general sensor
112 |   pad: 2 # pad ground truth grid (not needed, but all results are using it)
113 |   min_depth_stereo: 0.0 # 0.5 (in meters)
114 |   max_depth_stereo: 12.3 # 2.5 (in meters)
115 |   min_depth_tof: 0.0 # 0.5 (in meters)
116 |   max_depth_tof: 12.3 # 3.86 (in meters)
117 |   min_depth: 0.0 # general sensor (in meters)
118 |   max_depth: 12.3 # general sensor (in meters)
119 |   root_dir: /cluster/work/cvl/esandstroem/data/corbs # training on data from work folder or on local scratch of compute node
120 |   dataset: CoRBS # dataset
121 |   input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor
122 |   target: gt # ground truth depth label
123 |   resx_stereo: 256 # I assume square input images
124 |   resy_stereo: 256
125 |   resx_tof: 256
126 |   resy_tof: 256
127 |   resx: 256 # default settings
128 |   resy: 256
129 |   train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/desk.txt
130 |   val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/desk.txt
131 |   test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/human.txt
132 |   init_value: 0.0 # init value of tsdf grids
133 |   trunc_value: 0.05 # truncation distance
134 | 


--------------------------------------------------------------------------------
/configs/fusion/replica.yaml:
--------------------------------------------------------------------------------
  1 | SETTINGS:
  2 |   gpu: True # run on cpu or gpu
  3 |   experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved.
  4 |   eval_freq: 500 # how many global steps before evaluation and saving the model
  5 |   log_freq: 500 # how many global steps before logging the training loss
  6 |   seed: 52 # seed for shuffling operations
  7 | FUSION_MODEL:
  8 |   use_fusion_net: False # use learned fusion net as done by RoutedFusion
  9 |   fixed: True # use fixed or finetune weights when use_fusion_net is true
 10 |   output_scale: 1.0 # output scale from fusion net (same as RoutedFusion)
 11 |   n_points: 11 # extraction band samples
 12 |   n_tail_points: 9 # samples along the ray which update the grid
 13 |   n_points_tof: 11 # tof specific extraction band samples
 14 |   n_tail_points_tof: 9
 15 |   n_points_stereo: 11 # stereo specific extraction band samples
 16 |   n_tail_points_stereo: 9
 17 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 18 |   n_empty_space_voting: 0 # samples with free space update
 19 |   max_weight: 500 # max weight
 20 |   extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation
 21 | FEATURE_MODEL:
 22 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 23 |   stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net
 24 |   network: resnet # anything else but "resnet" will give a standard network
 25 |   use_feature_net: True # use learned feature net. When false, yields the depth as feature
 26 |   append_depth: True # append depth to feature vector
 27 |   w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net
 28 |   w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net
 29 |   w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 
 30 |   normalize: True # normalize the feature vector
 31 |   fixed: False # fix weights of feature net - when true does not declare an optimzer
 32 |   n_features: 4 # output dimension from feature net
 33 |   n_layers: 6 # layers 
 34 |   enc_activation: torch.nn.Tanh()
 35 |   dec_activation: torch.nn.Tanh()
 36 |   depth: True # concatenate depth as input to feature net
 37 |   layernorm: False
 38 | ROUTING_MODEL:
 39 |   contraction: 64 # hidden dimension of routing network
 40 |   normalization: False # apply batch normalization
 41 | FILTERING_MODEL:
 42 |   do: True  # whether to do sensor fusion or not
 43 |   model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion
 44 |   CONV3D_MODEL:
 45 |     fixed: False # fix network weights
 46 |     outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 
 47 |     features_to_weight_head: True # feed 2D features directly to alpha head
 48 |     sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style)
 49 |     weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style)
 50 |     tanh_weight: True # apply tanh-transform to weight counter
 51 |     inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true
 52 |     bias: True # bias in alpha head
 53 |     chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet
 54 |     activation: torch.nn.ReLU()
 55 |     weighting_complexity: '3layer' # Xlayer
 56 | LOSS:
 57 |   alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates
 58 |   alpha_supervision: False # supervise directly with proxy alpha in 3D. Only available on some scenes e.g. office 0, hotel 0
 59 |   fusion_weight: 6.0 # l1 weight of fusion net
 60 |   grid_weight: 6
 61 |   alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision
 62 | TRAINING:
 63 |   reset_strategy: True # May not make any difference
 64 |   reset_prob: 0.01 # in percent (used if reset_strategy: True)
 65 |   pretrain_filtering_net: False
 66 |   pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets
 67 |   routing_stereo_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/psmnet/model/best.pth.tar
 68 |   routing_tof_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof/model/best.pth.tar
 69 |   routing_tof_2_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof/model/best.pth.tar
 70 |   routing_sgm_stereo_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/sgm/model/best.pth.tar
 71 |   train_batch_size: 1
 72 |   train_shuffle: True
 73 |   val_batch_size: 1
 74 |   val_shuffle: False
 75 |   n_epochs: 1000
 76 |   gradient_clipping: True
 77 | TESTING:
 78 |   mc: 'skimage' # 'skimage' or 'Open3D' (requires local library installation)
 79 |   routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model.
 80 |   routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids
 81 |   use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv'
 82 |   eval_single_sensors: False # not applicable when evaluating routedfusion
 83 |   visualize_sensor_weighting: False
 84 |   test_batch_size: 1
 85 |   test_shuffle: False
 86 |   routing_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof_psmnet/model/best.pth.tar # Only used for tsdf_early_fusion.
 87 |   fusion_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_psmnet/model/best.pth.tar # used for conv3d, routedfusion as filtering models.
 88 |   weight_thresholds: [0.0]
 89 | ROUTING:
 90 |   do: False # use routing network
 91 |   dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold
 92 |   threshold: 0.15 
 93 |   intensity_grad: False # feed grayscale image and its gradient to routing network
 94 | OPTIMIZATION:
 95 |   scheduler:
 96 |     step_size_filtering: 500
 97 |     step_size_fusion: 100
 98 |     gamma_filtering: 0.1
 99 |     gamma_fusion: 0.5
100 |   lr_filtering: 1.e-04
101 |   lr_fusion: 1.e-04
102 |   rho: 0.95 # rmsprop fusion net
103 |   eps: 1.e-08 # rmsprop fusion net
104 |   momentum: 0.5 # rmsprop fusion net
105 |   weight_decay: 0.00 # rmsprop fusion net
106 |   accumulation_steps: 20
107 | DATA:
108 |   early_fusion_asynch: False # asynchronous early fusion experiment
109 |   collaborative_reconstruction: False # multi-agent reconstruction
110 |   frames_per_chunk: 100 # used when colaborative_reconstruction: true
111 |   downsampling: [1, 1] # first entry is the downsampling rate of the 1st sensor
112 |   # 2nd entry is the downsampling rate of the 2nd sensor
113 |   mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov
114 |   # compared to the color camera of the azure kinect
115 |   mask_stereo_width: 10 # in pixels (achieves fov 84.32)
116 |   mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52
117 |   mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35
118 |   mask_width: 10 # general sensor
119 |   mask_height: 10 # general sensor
120 |   pad: 2 # pad grid (not needed, but all results are using it)
121 |   min_depth_stereo: 0.0 # 0.5 (in meters)
122 |   max_depth_stereo: 12.3 # 2.5 (in meters)
123 |   min_depth_tof: 0.0 # 0.5 (in meters)
124 |   max_depth_tof: 12.3 # 3.86 (in meters)
125 |   min_depth: 0.0 # general sensor (in meters)
126 |   max_depth: 12.3 # general sensor (in meters)
127 |   root_dir: TMPDIR #/cluster/work/cvl/esandstroem/data/replica/manual #TMPDIR # use TMPDIR for the euler cluster. Path to data folder
128 |   dataset: Replica # dataset
129 |   input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor
130 |   target: gt # ground truth depth label
131 |   resx_stereo: 256 # I assume square input images
132 |   resy_stereo: 256
133 |   resx_tof: 256
134 |   resy_tof: 256
135 |   resx: 256 # default settings
136 |   resy: 256
137 |   train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt
138 |   val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt
139 |   test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt #4_hotel_0_office_0.txt
140 |   init_value: 0.0 # init value of tsdf grids
141 |   trunc_value: 0.05 # truncation distance
142 | 


--------------------------------------------------------------------------------
/configs/fusion/scene3d.yaml:
--------------------------------------------------------------------------------
  1 | SETTINGS:
  2 |   gpu: True # run on cpu or gpu
  3 |   experiment_path: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/fusion/ #/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved.
  4 |   eval_freq: 442 # how many global steps before evaluation and saving the model
  5 |   log_freq: 442 #957 383 # how many global steps before logging the training loss
  6 |   seed: 52 # seed for shuffling operations
  7 | FUSION_MODEL:
  8 |   use_fusion_net: False # use learned fusion net as done by RoutedFusion
  9 |   fixed: True # use fixed or finetune weights when use_fusion_net is true
 10 |   output_scale: 1.0 # output scale from fusion net (same as RoutedFusion)
 11 |   n_points: 11 # extraction band samples
 12 |   n_tail_points: 9 # samples along the ray which update the grid
 13 |   n_points_tof: 11 # tof specific extraction band samples
 14 |   n_tail_points_tof: 9
 15 |   n_points_stereo: 11 # stereo specific extraction band samples
 16 |   n_tail_points_stereo: 9
 17 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 18 |   n_empty_space_voting: 0 # samples with free space update
 19 |   max_weight: 500 # max weight
 20 |   extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation
 21 | FEATURE_MODEL:
 22 |   confidence: False # feed 2D confidence map to learned fusion net (only when using routing)
 23 |   stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net
 24 |   network: resnet # anything else but "resnet" will give a standard network
 25 |   use_feature_net: True # use learned feature net. When false, yields the depth as feature
 26 |   append_depth: True # append depth to feature vector
 27 |   w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net
 28 |   w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net
 29 |   w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 
 30 |   normalize: True # normalize the feature vector
 31 |   fixed: False # fix weights of feature net - when true does not declare an optimzer
 32 |   n_features: 4 # output dimension from feature net
 33 |   n_layers: 6 # layers 
 34 |   enc_activation: torch.nn.Tanh()
 35 |   dec_activation: torch.nn.Tanh()
 36 |   depth: True # concatenate depth as input to feature net
 37 |   layernorm: False
 38 | ROUTING_MODEL:
 39 |   contraction: 64 # hidden dimension of routing network
 40 |   normalization: False # apply batch normalization
 41 | FILTERING_MODEL:
 42 |   do: True  # whether to do sensor fusion or not
 43 |   model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion
 44 |   CONV3D_MODEL:
 45 |     fixed: False # fix network weights
 46 |     outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 
 47 |     features_to_weight_head: True # feed 2D features directly to alpha head
 48 |     sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style)
 49 |     weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style)
 50 |     tanh_weight: True # apply tanh-transform to weight counter
 51 |     inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true
 52 |     bias: True # bias in alpha head
 53 |     chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet
 54 |     activation: torch.nn.ReLU()
 55 |     weighting_complexity: '3layer' # Xlayer
 56 | LOSS:
 57 |   alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates
 58 |   alpha_supervision: False # supervise directly with proxy alpha in 3D. Not available on scene3d dataset.
 59 |   fusion_weight: 6.0 # l1 weight of fusion net
 60 |   grid_weight: 6
 61 |   alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision
 62 | TRAINING:
 63 |   reset_strategy: True # May not make any difference
 64 |   reset_prob: 0.01 # in percent (used if reset_strategy: True)
 65 |   pretrain_filtering_net: False
 66 |   pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets
 67 |   train_batch_size: 1
 68 |   train_shuffle: True
 69 |   val_batch_size: 1
 70 |   val_shuffle: False
 71 |   n_epochs: 1000
 72 |   gradient_clipping: True
 73 | TESTING:
 74 |   mc: 'skimage'
 75 |   routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model.
 76 |   routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids
 77 |   use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv'
 78 |   eval_single_sensors: False # not applicable when evaluating routedfusion
 79 |   visualize_sensor_weighting: False
 80 |   test_batch_size: 1
 81 |   test_shuffle: False
 82 |   fusion_model_path: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/fusion/220526-124631/model/best.pth.tar #/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_mvs_scene3d/model/best.pth.tar # used for conv3d, routedfusion as filtering models.
 83 |   weight_thresholds: [0.0]
 84 | ROUTING:
 85 |   do: False # needs to be false at all times
 86 |   dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold
 87 |   threshold: 0.15 
 88 |   intensity_grad: False # feed grayscale image and its gradient to routing network
 89 | OPTIMIZATION:
 90 |   scheduler:
 91 |     step_size_filtering: 500
 92 |     step_size_fusion: 100
 93 |     gamma_filtering: 0.1
 94 |     gamma_fusion: 0.5
 95 |   lr_filtering: 1.e-04
 96 |   lr_fusion: 1.e-04
 97 |   rho: 0.95 # rmsprop fusion net
 98 |   eps: 1.e-08 # rmsprop fusion net
 99 |   momentum: 0.5 # rmsprop fusion net
100 |   weight_decay: 0.00 # rmsprop fusion net
101 |   accumulation_steps: 20 # note that this is normally 8
102 | DATA:
103 |   collaborative_reconstruction: False # multi-agent reconstruction
104 |   frames_per_chunk: 100 # used when colaborative_reconstruction: true
105 |   mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov
106 |   # compared to the color camera of the azure kinect
107 |   mask_stereo_width: 10 # in pixels (achieves fov 84.32)
108 |   mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52
109 |   mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35
110 |   mask_width: 10 # general sensor
111 |   mask_height: 10 # general sensor
112 |   pad: 0 # pad ground truth grid (not needed, but all results are using it)
113 |   min_depth_stereo: 0.5 # 0.5 (in meters)
114 |   max_depth_stereo: 3.0 # 2.5 (in meters)
115 |   min_depth_tof: 0.0 # 0.5 (in meters)
116 |   max_depth_tof: 12.3 # 3.86 (in meters)
117 |   min_depth: 0.0 # general sensor (in meters)
118 |   max_depth: 12.3 # general sensor (in meters)
119 |   root_dir: /cluster/work/cvl/esandstroem/data/scene3d # Path to data folder
120 |   dataset: Scene3D # dataset
121 |   input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor
122 |   target: gt # ground truth depth label
123 |   resx_stereo: 256 # I assume square input images
124 |   resy_stereo: 256
125 |   resx_tof: 256
126 |   resy_tof: 256
127 |   resx: 256 # default settings
128 |   resy: 256
129 |   train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/stonewall.txt
130 |   val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/stonewall.txt
131 |   test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/copyroom.txt
132 |   init_value: 0.0 # init value of tsdf grids
133 |   trunc_value: 0.05 # truncation distance
134 | 


--------------------------------------------------------------------------------
/configs/routing/replica.yaml:
--------------------------------------------------------------------------------
 1 | SETTINGS:
 2 |   gpu: True
 3 |   experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing # path where the logging is done and the models are saved. OLD: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/routing
 4 |   log_freq: 500 # how many batch steps before logging the training loss
 5 |   seed: 52 # seed for shuffling operations
 6 | MODEL:
 7 |   contraction: 64
 8 |   normalization: False
 9 | LOSS:
10 |   name: gradweighted + uncertainty # gradweighteduncertainty or gradweighted + uncertainty or uncertainty or VNL + gradweighted + uncertainty or VNL + gradweighteduncertainty
11 |   crop_fraction: 0.
12 |   vmin: 0.05
13 |   vmax: 12.3
14 |   weight_scale: 10. # only relevant if the gradweighted term is used
15 |   lmbda: 0.06
16 |   completion: False # If completion is True, all pixels incur a loss, while, if it is false, on those with a valid input value incur a loss.
17 | TRAINING:
18 |   train_batch_size: 2
19 |   train_shuffle: True
20 |   val_batch_size: 2
21 |   val_shuffle: False
22 |   n_epochs: 1000
23 | TESTING:
24 |   test_batch_size: 1
25 |   test_shuffle: False
26 |   model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/psmnet/model/best.pth.tar
27 |   output_path:
28 | OPTIMIZATION:
29 |   lr: 1.e-05
30 |   rho: 0.95
31 |   eps: 1.e-07
32 |   momentum: 0.9
33 |   weight_decay: 0.01
34 |   accumulate: True
35 |   accumulation_steps: 16
36 |   alternate: False
37 |   alternate_steps: 5
38 | ROUTING:
39 |   intensity_grad: False # weather to load the grayscale image and its gradient and feed to the routing network
40 | DATA:
41 |   early_fusion_asynch: False # asynchronous early fusion experiment
42 |   downsampling: [1, 1] # first entry is the downsampling rate of the 1st sensor
43 |   # 2nd entry is the downsampling rate of the 2nd sensor
44 |   mask_stereo_height: 10 #35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov
45 |   # compared to the color camera of the azure kinect
46 |   mask_stereo_width: 10 # in pixels (achieves fov 84.32)
47 |   mask_tof_height: 10 # 52 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52
48 |   mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35
49 |   mask_width: 10
50 |   mask_height: 10
51 |   pad: 2
52 |   min_depth_stereo: 0.0 # 0.5
53 |   max_depth_stereo: 12.3 # 2.5
54 |   min_depth_tof: 0.0 # 0.5
55 |   max_depth_tof: 12.3 # 3.86
56 |   min_depth: 0.0
57 |   max_depth: 12.3
58 |   root_dir: TMPDIR # use TMPDIR for the euler cluster. Path to data folder
59 |   dataset: Replica
60 |   input: [tof, stereo] # 
61 |   target: depth_gt
62 |   resx: 512
63 |   resy: 512
64 |   resx_stereo: 512
65 |   resy_stereo: 512
66 |   resx_tof: 512 # a tof camera has typically half the resolution of an rgb camera
67 |   resy_tof: 512
68 |   focalx: 256 # focal length of intrinsic matrix - only used when virtual normal loss is applied
69 |   focaly: 256
70 |   train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/train.txt
71 |   val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/val.txt
72 |   test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt
73 |   init_value: 0.05 # truncation distance


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/downsample_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | scene = 'copyroom'
 4 | 
 5 | data_path = '/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' + '/' + scene + '_downsampled'
 6 | 
 7 | remove_list = []
 8 | image_list = sorted(os.listdir(data_path + '/images'))
 9 | cfg = data_path + '/dense/stereo/patch-match_new.cfg'
10 | images = data_path + '/sparse/images_new.txt'
11 | tof_list = sorted(os.listdir(data_path + '/' + scene + '_png/depth'))
12 | 
13 | with open(data_path + '/dense/stereo/patch-match.cfg', 'r') as cfg_file, \
14 | 		open(cfg, 'w') as cfg_file_new, \
15 | 		open(data_path + '/sparse/images.txt', 'r') as traj_file, \
16 | 		open(images, 'w') as traj_file_new:
17 | 
18 | 	cfg_file = cfg_file.readlines()
19 | 	traj_file = traj_file.readlines()
20 | 
21 | 	for k, frame in enumerate(image_list):
22 | 		if k % 10 != 0:
23 | 			# pass
24 | 			remove_list.append(data_path + '/images/' + image_list[k])
25 | 			remove_list.append(data_path + '/' + scene + '_png/depth/' + tof_list[k])
26 | 		else:
27 | 			traj_file_new.write(str(k//10 + 1) + ' ' + ' '.join(traj_file[2*k].split(' ')[1:]))
28 | 			traj_file_new.write('\n')
29 | 			cfg_file_new.write(cfg_file[2*k])
30 | 			cfg_file_new.write(cfg_file[2*k + 1])
31 | 
32 | 
33 | for path in remove_list:
34 | 	os.system('rm ' + path)
35 | 
36 | # remove old patch-match.cfg and images.txt
37 | os.system('rm ' + data_path + '/dense/stereo/patch-match.cfg')
38 | os.system('rm ' + data_path + '/sparse/images.txt')
39 | 
40 | # rename new files to old names
41 | os.system('mv ' + data_path + '/sparse/images_new.txt' + ' ' + data_path + '/sparse/images.txt')
42 | os.system('mv ' + data_path + '/dense/stereo/patch-match_new.cfg' + ' ' + data_path + '/dense/stereo/patch-match.cfg')


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/move_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | scene = 'cactusgarden'
 4 | 
 5 | data_target_path = '/home/esandstroem/scratch-second/euler_work/data/scene3d' + '/' + scene
 6 | data_source_path = '/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' + '/' + scene
 7 | 
 8 | copy_list = dict()
 9 | 
10 | image_list = sorted(os.listdir(data_source_path + '/images'))
11 | stereo_list = sorted(os.listdir(data_source_path + '/dense/stereo/depth_maps'))
12 | 
13 | # remove all entries containing 'photometric' from the stereo_list
14 | stereo_list = stereo_list[::2]
15 | 
16 | tof_list = sorted(os.listdir(data_source_path + '/' + scene + '_png/depth'))
17 | 
18 | for k, frame in enumerate(sorted(os.listdir(data_source_path + '/images'))):
19 | 	if k % 10 == 0:
20 | 		copy_list[data_source_path + '/images/' + image_list[k]] = data_target_path + '/images/' + image_list[k]
21 | 		copy_list[data_source_path + '/dense/stereo/depth_maps/' + stereo_list[k]] = data_target_path + '/dense/stereo/depth_maps/' + stereo_list[k]
22 | 		copy_list[data_source_path + '/' + scene + '_png/depth/' + tof_list[k]] = data_target_path + '/' + scene + '_png/depth/' + tof_list[k]
23 | 
24 | for path in copy_list.keys():
25 | 	os.system('cp ' + path + ' ' + copy_list[path])


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/reconstruct_colmap_slurm_copyroom.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --output=/scratch_net/nudel/colmap-test/log/%j.out  # could not get it to work on nudel_second... #/scratch_ned/nudel_second/opportunistic_3d_capture/data/scene3d/log/%j.out
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH --mem=50G
 6 | 
 7 | 
 8 | PROJECT_PATH='/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d'
 9 | 
10 | scene_string='copyroom_downsampled'
11 | 
12 | for SCENE in $scene_string
13 | do
14 | 	# extract features
15 | 	colmap feature_extractor --image_path $PROJECT_PATH/$SCENE/images \
16 | 	                         --database_path $PROJECT_PATH/$SCENE/database.db \
17 | 	                         --ImageReader.camera_model PINHOLE \
18 | 	                         --ImageReader.single_camera 1 \
19 | 	                         --ImageReader.camera_params "525.0, 525.0, 319.5, 239.5"
20 | 
21 | 	# # sequential matching along trajectory
22 | 	colmap sequential_matcher --database_path $PROJECT_PATH/$SCENE/database.db \
23 | 	                          --SequentialMatching.overlap 10
24 | 
25 | 	## dense reconstruction
26 | 	mkdir -p $PROJECT_PATH/$SCENE/dense/sparse
27 | 
28 | 	# build sparse model
29 | 	colmap point_triangulator --database_path $PROJECT_PATH/$SCENE/database.db \
30 | 	                          --image_path $PROJECT_PATH/$SCENE/images \
31 | 	                          --input_path $PROJECT_PATH/$SCENE/sparse \
32 | 	                          --output_path $PROJECT_PATH/$SCENE/dense/sparse \
33 | 	                          --Mapper.ba_refine_focal_length 0 \
34 | 	                          --Mapper.ba_refine_extra_param 0
35 | 
36 | 	# # create dense workspace folders
37 | 	cp -r $PROJECT_PATH/$SCENE/images $PROJECT_PATH/$SCENE/dense/
38 | 	mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/depth_maps
39 | 	mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/normal_maps
40 | 
41 | 
42 | 	# # compute dense depth maps
43 | 	colmap patch_match_stereo --workspace_path $PROJECT_PATH/$SCENE/dense \
44 | 							  --PatchMatchStereo.depth_min 0.5 \
45 | 							  --PatchMatchStereo.depth_max 10.0
46 | 
47 | 	# # # fuse stereo depth maps
48 | 	# colmap stereo_fusion --workspace_path PROJECT_PATH/${SCENE}/dense \
49 | 	#                      --output_path PROJECT_PATH/${SCENE}/dense/fused.ply
50 | done
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/reconstruct_colmap_slurm_stonewall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --output=/scratch_net/nudel/colmap-test/log/%j.out  # could not get it to work on nudel_second... #/scratch_ned/nudel_second/opportunistic_3d_capture/data/scene3d/log/%j.out
 4 | #SBATCH --gres=gpu:1
 5 | #SBATCH --mem=50G
 6 | 
 7 | 
 8 | PROJECT_PATH='/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d'
 9 | 
10 | scene_string='stonewall_downsampled_limit_depth'
11 | 
12 | for SCENE in $scene_string
13 | do
14 | 	# # extract features
15 | 	# colmap feature_extractor --image_path $PROJECT_PATH/$SCENE/images \
16 | 	#                          --database_path $PROJECT_PATH/$SCENE/database.db \
17 | 	#                          --ImageReader.camera_model PINHOLE \
18 | 	#                          --ImageReader.single_camera 1 \
19 | 	#                          --ImageReader.camera_params "525.0, 525.0, 319.5, 239.5"
20 | 
21 | 	# sequential matching along trajectory
22 | 	# colmap sequential_matcher --database_path $PROJECT_PATH/$SCENE/database.db \
23 | 	#                           --SequentialMatching.overlap 10
24 | 
25 | 	# ## dense reconstruction
26 | 	# mkdir -p $PROJECT_PATH/$SCENE/dense/sparse
27 | 
28 | 	# # build sparse model
29 | 	# colmap point_triangulator --database_path $PROJECT_PATH/$SCENE/database.db \
30 | 	#                           --image_path $PROJECT_PATH/$SCENE/images \
31 | 	#                           --input_path $PROJECT_PATH/$SCENE/sparse \
32 | 	#                           --output_path $PROJECT_PATH/$SCENE/dense/sparse \
33 | 	#                           --Mapper.ba_refine_focal_length 0 \
34 | 	#                           --Mapper.ba_refine_extra_param 0
35 | 
36 | 	# # create dense workspace folders
37 | 	# cp -r $PROJECT_PATH/$SCENE/images $PROJECT_PATH/$SCENE/dense/
38 | 	# mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/depth_maps
39 | 	# mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/normal_maps
40 | 
41 | 
42 | 	# # compute dense depth maps
43 | 	colmap patch_match_stereo --workspace_path $PROJECT_PATH/$SCENE/dense \
44 | 							  --PatchMatchStereo.depth_min 0.5 \
45 | 							  --PatchMatchStereo.depth_max 10.0
46 | 
47 | 	# # # fuse stereo depth maps
48 | 	# colmap stereo_fusion --workspace_path PROJECT_PATH/${SCENE}/dense \
49 | 	#                      --output_path PROJECT_PATH/${SCENE}/dense/fused.ply
50 | done
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/setup_colmap.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import numpy as np
  5 | 
  6 | from pyquaternion import Quaternion
  7 | 
  8 | def arg_parse():
  9 | 
 10 |     parser = argparse.ArgumentParser()
 11 | 
 12 |     parser.add_argument('--source_path', default='/scratch-second/esandstroem/opportunistic_3d_capture/data/scene3d', type=str)
 13 |     parser.add_argument('--colmap_path', default='/scratch-second/esandstroem/opportunistic_3d_capture/data/scene3d', type=str)
 14 | 
 15 |     # camera options 
 16 |     parser.add_argument('--fx', default=525.00, type=float)
 17 |     parser.add_argument('--fy', default=525.00, type=float)
 18 |     parser.add_argument('--cx', default=319.5, type=float)
 19 |     parser.add_argument('--cy', default=239.5, type=float)
 20 |     parser.add_argument('--width', default=640, type=int)
 21 |     parser.add_argument('--height', default=480, type=int)
 22 | 
 23 |     args = parser.parse_args()
 24 |     return vars(args)
 25 | 
 26 | def main(args):
 27 |     scenes = ['cactusgarden', 'lounge', 'copyroom']
 28 |     for scene in scenes:
 29 |         IMAGE_PATH = os.path.join(args['colmap_path'], scene, 'images')
 30 |         TRAJECTORY_PATH = os.path.join(args['colmap_path'], scene, scene + '_trajectory.log')
 31 |         SPARSE_PATH = os.path.join(args['colmap_path'], scene, 'sparse')
 32 |         DENSE_PATH = os.path.join(args['colmap_path'], scene, 'dense')
 33 |         STEREO_PATH = os.path.join(DENSE_PATH, 'stereo')
 34 |         # setup colmap workspace
 35 |         if not os.path.exists(args['colmap_path']):
 36 |             os.makedirs(args['colmap_path'])
 37 |         if not os.path.exists(IMAGE_PATH):
 38 |             os.makedirs(IMAGE_PATH)
 39 |         if not os.path.exists(SPARSE_PATH):
 40 |             os.makedirs(SPARSE_PATH)
 41 |         if not os.path.exists(DENSE_PATH):
 42 |             os.makedirs(DENSE_PATH)
 43 |         if not os.path.exists(STEREO_PATH):
 44 |             os.makedirs(STEREO_PATH)
 45 | 
 46 |         # write camera file
 47 |         with open(os.path.join(SPARSE_PATH, 'cameras.txt'), 'w') as file:
 48 |             file.write('1 PINHOLE {} {} {} {} {} {}'.format(args['width'], args['height'], args['fx'], args['fy'], args['cx'], args['cy']))
 49 | 
 50 |         # write points file
 51 |         with open(os.path.join(SPARSE_PATH, 'points3D.txt'), 'w') as file:
 52 |             pass
 53 |         
 54 |         poses = dict()
 55 |         # retrieve pose dictionary
 56 |         with open(TRAJECTORY_PATH, 'r') as file:
 57 | 
 58 |             for rgb_name in sorted(os.listdir(IMAGE_PATH)):
 59 |                 # extract the camera extrinsics by reading 5 lines
 60 |                 metadata = next(file)
 61 |       
 62 |                 first = np.fromstring(next(file), count=4, sep=' ', dtype=float) #[:-1].split(' ')
 63 |                 second = np.fromstring(next(file), count=4, sep=' ', dtype=float)
 64 |                 third = np.fromstring(next(file), count=4, sep=' ', dtype=float)
 65 |                 fourth = np.fromstring(next(file), count=4, sep=' ', dtype=float)
 66 | 
 67 |                 extrinsics = np.zeros((4,4))
 68 |                 extrinsics[0, :] = first
 69 |                 extrinsics[1, :] = second
 70 |                 extrinsics[2, :] = third
 71 |                 extrinsics[3, :] = fourth
 72 | 
 73 |                 # print(np.matmul(extrinsics[:3, :3] , np.transpose(extrinsics[:3, :3])))
 74 |                 # invert for colmap
 75 |                 extrinsics = np.linalg.inv(extrinsics)
 76 | 
 77 |                 rotation = Quaternion(matrix=extrinsics[:3, :3], rtol=1e-04, atol=1e-04)
 78 |                 rotation = [rotation.elements[0], rotation.elements[1], rotation.elements[2], rotation.elements[3]]
 79 |                 translation = list(extrinsics[:3, 3])
 80 | 
 81 |                 pose = rotation + translation
 82 |                 pose = [str(p) for p in pose]
 83 |                 pose = " ".join(pose)
 84 | 
 85 |                 # check correct length of pose
 86 |                 assert len(pose.split(' ')) == 7
 87 |                 # print(rgb_name)
 88 |                 poses[rgb_name] = pose
 89 | 
 90 |         # write and copy images
 91 |         with open(os.path.join(SPARSE_PATH, 'images.txt'), 'w') as file, open(os.path.join(STEREO_PATH, 'patch-match.cfg'), 'w') as cfg:
 92 |             
 93 |             for i, rgb_name in enumerate(sorted(os.listdir(IMAGE_PATH))):
 94 |                 
 95 |                 # add rgb name to patch-match.cfg file
 96 |                 cfg.write(rgb_name + '\n')
 97 |                 # limit the number of source images during reconstruction to 20 to reduce memory requirement
 98 |                 cfg.write('__auto__, 20\n')
 99 |                 # if specifying source images manually
100 |                 # get source images
101 |                 # start_indx = max(0, i - 10)
102 |                 # end_indx = min(len(matches), i + 10)
103 |                 # source_images = []
104 |                 # for j in range(start_indx, end_indx):
105 |                 #     if j == i:
106 |                 #         continue
107 |                 #     source_images.append(timestamp_mapping[matches[j][1]].replace('rgb/', ''))
108 |                 # source_images = ", ".join(source_images)  
109 |                 # cfg.write('{}\n'.format(source_images))
110 |                 
111 |                 # retrieve pose for the rgb frame
112 |                 image_line = '{} '.format(i + 1) + poses[rgb_name] + ' {} '.format(1) + rgb_name + '\n' + '\n'
113 |                 file.write(image_line)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     args = arg_parse()
118 |     main(args)


--------------------------------------------------------------------------------
/data/mvs_depth_estimation/setup_colmap_corbs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import numpy as np
  5 | 
  6 | from data3d.utils.associate import associate
  7 | from pyquaternion import Quaternion
  8 | 
  9 | def arg_parse():
 10 | 
 11 |     parser = argparse.ArgumentParser()
 12 | 
 13 |     parser.add_argument('--source_path')
 14 |     parser.add_argument('--colmap_path')
 15 | 
 16 |     # dataset options
 17 |     parser.add_argument('--sequence_id', default='H1')
 18 | 
 19 |     # camera options 
 20 |     parser.add_argument('--fx', default=468.60, type=float)
 21 |     parser.add_argument('--fy', default=468.61, type=float)
 22 |     parser.add_argument('--cx', default=318.27, type=float)
 23 |     parser.add_argument('--cy', default=243.99, type=float)
 24 |     parser.add_argument('--width', default=640, type=int)
 25 |     parser.add_argument('--height', default=480, type=int)
 26 | 
 27 |     args = parser.parse_args()
 28 |     return vars(args)
 29 | 
 30 | def main(args):
 31 |     
 32 |     IMAGE_PATH = os.path.join(args['colmap_path'], 'images')
 33 |     SPARSE_PATH = os.path.join(args['colmap_path'], 'sparse')
 34 |     DENSE_PATH = os.path.join(args['colmap_path'], 'dense')
 35 |     STEREO_PATH = os.path.join(DENSE_PATH, 'stereo')
 36 |     # setup colmap workspace
 37 |     if not os.path.exists(args['colmap_path']):
 38 |         os.makedirs(args['colmap_path'])
 39 |     if not os.path.exists(IMAGE_PATH):
 40 |         os.makedirs(IMAGE_PATH)
 41 |     if not os.path.exists(SPARSE_PATH):
 42 |         os.makedirs(SPARSE_PATH)
 43 |     if not os.path.exists(DENSE_PATH):
 44 |         os.makedirs(DENSE_PATH)
 45 |     if not os.path.exists(STEREO_PATH):
 46 |         os.makedirs(STEREO_PATH)
 47 | 
 48 |     # write camera file
 49 |     with open(os.path.join(SPARSE_PATH, 'cameras.txt'), 'w') as file:
 50 |         file.write('1 PINHOLE {} {} {} {} {} {}'.format(args['width'], args['height'], args['fx'], args['fy'], args['cx'], args['cy']))
 51 | 
 52 |     # write points file
 53 |     with open(os.path.join(SPARSE_PATH, 'points3D.txt'), 'w') as file:
 54 |         pass
 55 | 
 56 |     # copy images as build images file
 57 | 
 58 |     # build dictionary timestamp -> path
 59 |     timestamp_mapping = {}
 60 |     with open(os.path.join(args['source_path'], '{}_pre_registereddata/rgb.txt'.format(args['sequence_id'])), 'r') as file:
 61 |         for line in file:
 62 | 
 63 |             # skip comments
 64 |             if line[0] == '#':
 65 |                 continue
 66 |             
 67 |             line = line.rstrip()
 68 |             timestamp, file_path = line.split(' ')
 69 |             timestamp_mapping[float(timestamp)] = file_path.replace('\\', '/')
 70 |     
 71 |     # iterate through trajectory
 72 |     poses = {}
 73 |     
 74 |     with open(os.path.join(args['source_path'], '{}_Trajectory/groundtruth.txt'.format(args['sequence_id'])), 'r') as file:
 75 |         for line in file:
 76 |             # skip comments
 77 |             if line[0] == '#':
 78 |                 continue
 79 |             
 80 |             # parse and reformat data
 81 |             line = line.rstrip()
 82 |             elem = line.split(' ')
 83 |             timestamp = float(elem[0])
 84 | 
 85 |             # transform pose
 86 |             rotation = [float(e) for e in elem[4:]]
 87 |             rotation = Quaternion(rotation[-1], rotation[0], rotation[1], rotation[2])
 88 |             rotation = rotation.rotation_matrix
 89 |             translation = [float(e) for e in elem[1:4]]
 90 |             
 91 |             extrinsics = np.eye(4)
 92 |             extrinsics[:3, :3] = rotation
 93 |             extrinsics[:3, 3] = translation
 94 | 
 95 |             # # invert for colmap
 96 |             extrinsics = np.linalg.inv(extrinsics)
 97 | 
 98 |             rotation = Quaternion(matrix=extrinsics[:3, :3])
 99 |             rotation = [rotation.elements[0], rotation.elements[1], rotation.elements[2], rotation.elements[3]]
100 |             translation = list(extrinsics[:3, 3])
101 | 
102 |             pose = rotation + translation
103 |             pose = [str(p) for p in pose]
104 |             pose = " ".join(pose)
105 | 
106 |             # check correct length of pose
107 |             assert len(pose.split(' ')) == 7
108 | 
109 |             poses[timestamp] = pose
110 |     
111 |     matches = associate(poses, timestamp_mapping, offset=0.0, max_difference=0.02)
112 | 
113 |     # write and copy images
114 |     with open(os.path.join(SPARSE_PATH, 'images.txt'), 'w') as file, open(os.path.join(STEREO_PATH, 'patch-match.cfg'), 'w') as cfg:
115 |         for i, (t_p, t_f) in enumerate(matches):
116 |             
117 |             # get data
118 |             try:
119 |                 pose = poses[t_p]
120 |                 file_path = timestamp_mapping[t_f]
121 |             except KeyError:
122 |                 continue
123 |             
124 |             image_line = '{} '.format(i + 1) + pose + ' {} '.format(1) + file_path.replace('rgb/', '') + '\n' + '\n'
125 |             file.write(image_line)
126 | 
127 |             source_image = os.path.join(args['source_path'], '{}_pre_registereddata'.format(args['sequence_id']), file_path)
128 |             target_image = os.path.join(IMAGE_PATH, file_path.replace('rgb/', ''))
129 |             os.system('cp -p {} {}'.format(source_image, target_image))
130 | 
131 |             # write patch match config file
132 |             cfg.write(file_path.replace('rgb/', '') + '\n')
133 |             
134 |             # get source images
135 |             start_indx = max(0, i - 10)
136 |             end_indx = min(len(matches), i + 10)
137 |             source_images = []
138 |             for j in range(start_indx, end_indx):
139 |                 if j == i:
140 |                     continue
141 |                 source_images.append(timestamp_mapping[matches[j][1]].replace('rgb/', ''))
142 |             
143 |             # source_images = ", ".join(source_images)  
144 |             # cfg.write('{}\n'.format(source_images))
145 |             cfg.write('__auto__, 20\n')
146 | 
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     args = arg_parse()
151 |     main(args)


--------------------------------------------------------------------------------
/data/save_every_tenth_frame.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | scene = 'lounge'
 4 | 
 5 | data_path = '/home/esandstroem/scratch-second/euler_work/data/scene3d' + '/' + scene
 6 | 
 7 | remove_list = []
 8 | 
 9 | image_list = sorted(os.listdir(data_path + '/images'))
10 | tof_list = sorted(os.listdir(data_path + '/' + scene + '_png/depth'))
11 | 
12 | for k, frame in enumerate(sorted(os.listdir(data_path + '/images'))):
13 | 	if k % 10 != 0:
14 | 		remove_list.append(data_path + '/images/' + image_list[k])
15 | 		remove_list.append(data_path + '/' + scene + '_png/depth/' + tof_list[k])
16 | 
17 | 
18 | for path in remove_list:
19 | 	os.system('rm ' + path)
20 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .replica import Replica
2 | from .corbs import CoRBS
3 | from .scene3d import Scene3D
4 | 


--------------------------------------------------------------------------------
/dataset/associate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # Software License Agreement (BSD License)
  3 | #
  4 | # Copyright (c) 2013, Juergen Sturm, TUM
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions
  9 | # are met:
 10 | #
 11 | #  * Redistributions of source code must retain the above copyright
 12 | #    notice, this list of conditions and the following disclaimer.
 13 | #  * Redistributions in binary form must reproduce the above
 14 | #    copyright notice, this list of conditions and the following
 15 | #    disclaimer in the documentation and/or other materials provided
 16 | #    with the distribution.
 17 | #  * Neither the name of TUM nor the names of its
 18 | #    contributors may be used to endorse or promote products derived
 19 | #    from this software without specific prior written permission.
 20 | #
 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 25 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 26 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 27 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 29 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 30 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 31 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 32 | # POSSIBILITY OF SUCH DAMAGE.
 33 | #
 34 | # Requirements:
 35 | # sudo apt-get install python-argparse
 36 | 
 37 | """
 38 | The Kinect provides the color and depth images in an un-synchronized way. This means that the set of time stamps from the color images do not intersect with those of the depth images. Therefore, we need some way of associating color images to depth images.
 39 | For this purpose, you can use the ''associate.py'' script. It reads the time stamps from the rgb.txt file and the depth.txt file, and joins them by finding the best matches.
 40 | """
 41 | 
 42 | import argparse
 43 | 
 44 | 
 45 | def read_file_list(filename):
 46 |     """
 47 |     Reads a trajectory from a text file.
 48 | 
 49 |     File format:
 50 |     The file format is "stamp d1 d2 d3 ...", where stamp denotes the time stamp (to be matched)
 51 |     and "d1 d2 d3.." is arbitary data (e.g., a 3D position and 3D orientation) associated to this timestamp.
 52 | 
 53 |     Input:
 54 |     filename -- File name
 55 | 
 56 |     Output:
 57 |     dict -- dictionary of (stamp,data) tuples
 58 | 
 59 |     """
 60 |     file = open(filename)
 61 |     data = file.read()
 62 |     lines = data.replace(",", " ").replace("\t", " ").split("\n")
 63 |     list = [
 64 |         [v.strip() for v in line.split(" ") if v.strip() != ""]
 65 |         for line in lines
 66 |         if len(line) > 0 and line[0] != "#"
 67 |     ]
 68 |     list = [(float(l[0]), l[1:]) for l in list if len(l) > 1]
 69 |     return dict(list)
 70 | 
 71 | 
 72 | def associate(first_list, second_list, offset, max_difference):
 73 |     """
 74 |     Associate two dictionaries of (stamp,data). As the time stamps never match exactly, we aim
 75 |     to find the closest match for every input tuple.
 76 | 
 77 |     Input:
 78 |     first_list -- first dictionary of (stamp,data) tuples
 79 |     second_list -- second dictionary of (stamp,data) tuples
 80 |     offset -- time offset between both dictionaries (e.g., to model the delay between the sensors)
 81 |     max_difference -- search radius for candidate generation
 82 |     Output:
 83 |     matches -- list of matched tuples ((stamp1,data1),(stamp2,data2))
 84 | 
 85 |     """
 86 |     first_keys = list(first_list.keys())
 87 |     second_keys = list(second_list.keys())
 88 |     potential_matches = [
 89 |         (abs(a - (b + offset)), a, b)
 90 |         for a in first_keys
 91 |         for b in second_keys
 92 |         if abs(a - (b + offset)) < max_difference
 93 |     ]
 94 |     potential_matches.sort()
 95 |     matches = []
 96 |     for diff, a, b in potential_matches:
 97 |         if a in first_keys and b in second_keys:
 98 |             first_keys.remove(a)
 99 |             second_keys.remove(b)
100 |             matches.append((a, b))
101 | 
102 |     matches.sort()
103 |     return matches
104 | 
105 | 
106 | if __name__ == "__main__":
107 | 
108 |     # parse command line
109 |     parser = argparse.ArgumentParser(
110 |         description="""
111 |     This script takes two data files with timestamps and associates them   
112 |     """
113 |     )
114 |     parser.add_argument("first_file", help="first text file (format: timestamp data)")
115 |     parser.add_argument("second_file", help="second text file (format: timestamp data)")
116 |     parser.add_argument(
117 |         "--first_only",
118 |         help="only output associated lines from first file",
119 |         action="store_true",
120 |     )
121 |     parser.add_argument(
122 |         "--offset",
123 |         help="time offset added to the timestamps of the second file (default: 0.0)",
124 |         default=0.0,
125 |     )
126 |     parser.add_argument(
127 |         "--max_difference",
128 |         help="maximally allowed time difference for matching entries (default: 0.02)",
129 |         default=0.02,
130 |     )
131 |     args = parser.parse_args()
132 | 
133 |     first_list = read_file_list(args.first_file)
134 |     second_list = read_file_list(args.second_file)
135 | 
136 |     matches = associate(
137 |         first_list, second_list, float(args.offset), float(args.max_difference)
138 |     )
139 | 
140 |     if args.first_only:
141 |         for a, b in matches:
142 |             print("%f %s" % (a, " ".join(first_list[a])))
143 |     else:
144 |         for a, b in matches:
145 |             print(
146 |                 "%f %s %f %s"
147 |                 % (
148 |                     a,
149 |                     " ".join(first_list[a]),
150 |                     b - float(args.offset),
151 |                     " ".join(second_list[b]),
152 |                 )
153 |             )
154 | 


--------------------------------------------------------------------------------
/dataset/colmap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
  2 | # All rights reserved.
  3 | 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | 
  7 | #       * Redistributions of source code must retain the above copyright
  8 | #         notice, this list of conditions and the following disclaimer.
  9 | 
 10 | #       * Redistributions in binary form must reproduce the above copyright
 11 | #         notice, this list of conditions and the following disclaimer in the
 12 | #         documentation and/or other materials provided with the distribution.
 13 | #
 14 | #       * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
 15 | #         its contributors may be used to endorse or promote products derived
 16 | #         from this software without specific prior written permission.
 17 | 
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 28 | # POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
 31 | 
 32 | import numpy as np
 33 | import struct
 34 | import collections
 35 | 
 36 | 
 37 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 38 |     """Read and unpack the next bytes from a binary file.
 39 |     :param fid:
 40 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 41 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 42 |     :param endian_character: Any of {@, =, <, >, !}
 43 |     :return: Tuple of read and unpacked values.
 44 |     """
 45 |     data = fid.read(num_bytes)
 46 |     return struct.unpack(endian_character + format_char_sequence, data)
 47 | 
 48 | 
 49 | def read_cameras_binary(path_to_model_file):
 50 |     """
 51 |     see: src/base/reconstruction.cc
 52 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
 53 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
 54 |     """
 55 |     cameras = {}
 56 |     with open(path_to_model_file, "rb") as fid:
 57 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
 58 |         for camera_line_index in range(num_cameras):
 59 |             camera_properties = read_next_bytes(
 60 |                 fid, num_bytes=24, format_char_sequence="iiQQ"
 61 |             )
 62 |             camera_id = camera_properties[0]
 63 |             model_id = camera_properties[1]  # not used
 64 |             width = camera_properties[2]  # not used
 65 |             height = camera_properties[3]  # not used
 66 |             num_params = 4
 67 |             params = read_next_bytes(
 68 |                 fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params
 69 |             )
 70 | 
 71 |             cameras[camera_id] = params
 72 | 
 73 |     return cameras
 74 | 
 75 | 
 76 | def read_array(path):
 77 |     with open(path, "rb") as fid:
 78 |         width, height, channels = np.genfromtxt(
 79 |             fid, delimiter="&", max_rows=1, usecols=(0, 1, 2), dtype=int
 80 |         )
 81 |         fid.seek(0)
 82 |         num_delimiter = 0
 83 |         byte = fid.read(1)
 84 |         while True:
 85 |             if byte == b"&":
 86 |                 num_delimiter += 1
 87 |                 if num_delimiter >= 3:
 88 |                     break
 89 |             byte = fid.read(1)
 90 |         array = np.fromfile(fid, np.float32)
 91 | 
 92 |     array = array.reshape((width, height, channels), order="F")
 93 |     return np.transpose(array, (1, 0, 2)).squeeze()
 94 | 
 95 | 
 96 | BaseImage = collections.namedtuple(
 97 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
 98 | )
 99 | Point3D = collections.namedtuple(
100 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]
101 | )
102 | 
103 | 
104 | class Image(BaseImage):
105 |     def qvec2rotmat(self):
106 |         return qvec2rotmat(self.qvec)
107 | 
108 | 
109 | def qvec2rotmat(qvec):
110 |     return np.array(
111 |         [
112 |             [
113 |                 1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
114 |                 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
115 |                 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
116 |             ],
117 |             [
118 |                 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
119 |                 1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
120 |                 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
121 |             ],
122 |             [
123 |                 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
124 |                 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
125 |                 1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
126 |             ],
127 |         ]
128 |     )
129 | 
130 | 
131 | def read_images_binary(path_to_model_file):
132 |     """
133 |     see: src/base/reconstruction.cc
134 |         void Reconstruction::ReadImagesBinary(const std::string& path)
135 |         void Reconstruction::WriteImagesBinary(const std::string& path)
136 |     """
137 |     images = {}
138 |     with open(path_to_model_file, "rb") as fid:
139 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
140 |         for image_index in range(num_reg_images):
141 |             binary_image_properties = read_next_bytes(
142 |                 fid, num_bytes=64, format_char_sequence="idddddddi"
143 |             )
144 |             image_id = binary_image_properties[0]
145 |             qvec = np.array(binary_image_properties[1:5])
146 |             tvec = np.array(binary_image_properties[5:8])
147 |             camera_id = binary_image_properties[8]
148 |             image_name = ""
149 |             current_char = read_next_bytes(fid, 1, "c")[0]
150 |             while current_char != b"\x00":  # look for the ASCII 0 entry
151 |                 image_name += current_char.decode("utf-8")
152 |                 current_char = read_next_bytes(fid, 1, "c")[0]
153 |             num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[
154 |                 0
155 |             ]
156 |             x_y_id_s = read_next_bytes(
157 |                 fid,
158 |                 num_bytes=24 * num_points2D,
159 |                 format_char_sequence="ddq" * num_points2D,
160 |             )
161 |             xys = np.column_stack(
162 |                 [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))]
163 |             )
164 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
165 |             images[image_id] = Image(
166 |                 id=image_id,
167 |                 qvec=qvec,
168 |                 tvec=tvec,
169 |                 camera_id=camera_id,
170 |                 name=image_name,
171 |                 xys=xys,
172 |                 point3D_ids=point3D_ids,
173 |             )
174 |     return images
175 | 
176 | 
177 | def read_images(path):
178 | 
179 |     images = {}
180 | 
181 |     with open(path, "r") as file:
182 |         for i, line in enumerate(file):
183 |             if i % 2 == 0:
184 |                 if line[0] == "#":
185 |                     continue
186 | 
187 |                 elements = line.rstrip().split(" ")
188 | 
189 |                 image_id = elements[0]
190 | 
191 |                 qw = elements[1]
192 |                 qx = elements[2]
193 |                 qy = elements[3]
194 |                 qz = elements[4]
195 | 
196 |                 tx = elements[5]
197 |                 ty = elements[6]
198 |                 tz = elements[7]
199 | 
200 |                 camera_id = elements[8]
201 | 
202 |                 name = elements[9]
203 | 
204 |                 quaternion = np.asarray([float(qw), float(qx), float(qy), float(qz)])
205 |                 translation = np.asarray([float(tx), float(ty), float(tz)])
206 | 
207 |                 images[str(image_id)] = {}
208 |                 images[image_id]["camera_id"] = camera_id
209 |                 images[image_id]["name"] = name
210 |                 images[image_id]["quaternion"] = quaternion
211 |                 images[image_id]["translation"] = translation
212 | 
213 |     return images
214 | 
215 | 
216 | def read_cameras(path):
217 |     cameras = {}
218 | 
219 |     with open(path, "r") as file:
220 |         for line in file:
221 |             if line[0] == "#":
222 |                 continue
223 | 
224 |             # parse camera line
225 |             elements = line.rstrip().split(" ")
226 |             camera_id = elements[0]
227 |             model = elements[1]
228 |             width = float(elements[2])
229 |             height = float(elements[3])
230 |             fx = float(elements[4])
231 |             fy = float(elements[5])
232 |             px = float(elements[6])
233 |             py = float(elements[7])
234 | 
235 |             # create camera entry
236 |             cameras[camera_id] = {}
237 |             cameras[camera_id]["model"] = model
238 |             cameras[camera_id]["width"] = width
239 |             cameras[camera_id]["height"] = height
240 |             cameras[camera_id]["fx"] = fx
241 |             cameras[camera_id]["fy"] = fy
242 |             cameras[camera_id]["px"] = px
243 |             cameras[camera_id]["py"] = py
244 | 
245 |     return cameras
246 | 


--------------------------------------------------------------------------------
/dataset/corbs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import sys
  4 | import numpy as np
  5 | 
  6 | from skimage import io
  7 | from torch.utils.data import Dataset
  8 | 
  9 | import h5py
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | # uncomment to run train_fusion and test_fusion
 13 | from dataset.associate import associate
 14 | from dataset.colmap import read_array
 15 | 
 16 | 
 17 | from pyquaternion import Quaternion
 18 | 
 19 | 
 20 | class CoRBS(Dataset):
 21 | 
 22 |     # NOTE: For now, the dataset class can only load one scene at a time
 23 |     def __init__(self, config_data):
 24 |         super(CoRBS, self).__init__()
 25 | 
 26 |         self.root_dir = os.getenv(
 27 |             config_data.root_dir
 28 |         )  # when training on local scratch
 29 | 
 30 |         # os.getenv returns none when the input does not exist. When
 31 |         # it returns none, we want to train on the work folder
 32 |         if not self.root_dir:
 33 |             self.root_dir = config_data.root_dir
 34 | 
 35 |         self.resolution_stereo = (config_data.resy_stereo, config_data.resx_stereo)
 36 | 
 37 |         self.resolution_tof = (config_data.resy_tof, config_data.resx_tof)
 38 | 
 39 |         self.mask_stereo_width = config_data.mask_stereo_width
 40 |         self.mask_stereo_height = config_data.mask_stereo_height
 41 |         self.mask_tof_width = config_data.mask_tof_width
 42 |         self.mask_tof_height = config_data.mask_tof_height
 43 | 
 44 |         self.min_depth_stereo = config_data.min_depth_stereo
 45 |         self.max_depth_stereo = config_data.max_depth_stereo
 46 |         self.min_depth_tof = config_data.min_depth_tof
 47 |         self.max_depth_tof = config_data.max_depth_tof
 48 | 
 49 |         self.transform = config_data.transform
 50 |         self.pad = config_data.pad
 51 | 
 52 |         self.scene_list = config_data.scene_list
 53 |         self.input = config_data.input
 54 |         self.target = config_data.target
 55 |         self.mode = config_data.mode
 56 | 
 57 |         self._scenes = []
 58 | 
 59 |         self.__init_dataset()
 60 | 
 61 |     def __init_dataset(self):
 62 | 
 63 |         # read paths to data from scene list file
 64 |         with open(os.path.join(self.root_dir, self.scene_list), "r") as file:
 65 |             for (
 66 |                 line
 67 |             ) in (
 68 |                 file
 69 |             ):  # only contains one line now since we only load one scene at a time
 70 |                 line = line.split(" ")
 71 |                 self._scenes.append(
 72 |                     line[0].split("/")[0]
 73 |                 )  # change this into append when we use more scenes
 74 |                 trajectory_file = os.path.join(
 75 |                     self.root_dir, line[4][:-1]
 76 |                 )  # make this into a directory when we use more scenes
 77 |                 rgb_file = os.path.join(self.root_dir, line[2])
 78 |                 depth_file = os.path.join(self.root_dir, line[3])
 79 |                 self.stereo_path = os.path.join(self.root_dir, line[0])
 80 |                 self.tof_path = os.path.join(self.root_dir, line[1])
 81 |                 self.rgb_path = os.path.join(self.root_dir, line[1])
 82 | 
 83 |         # read all files for pose, rgb, and depth
 84 |         self.poses = {}
 85 |         with open(trajectory_file, "r") as file:
 86 |             for line in file:
 87 |                 # skip comment lines
 88 |                 if line[0] == "#":
 89 |                     continue
 90 |                 elems = line.rstrip().split(" ")
 91 |                 timestamp = float(elems[0])
 92 |                 pose = [float(e) for e in elems[1:]]
 93 |                 self.poses[timestamp] = pose
 94 | 
 95 |         self.rgb_frames = {}
 96 |         with open(rgb_file, "r") as file:
 97 |             for line in file:
 98 |                 # skip comment lines
 99 |                 if line[0] == "#":
100 |                     continue
101 |                 timestamp, file_path = line.rstrip().split(" ")
102 |                 timestamp = float(timestamp)
103 |                 self.rgb_frames[timestamp] = file_path
104 | 
105 |         self.depth_frames = {}
106 |         with open(depth_file, "r") as file:
107 |             for line in file:
108 |                 # skip comment lines
109 |                 if line[0] == "#":
110 |                     continue
111 |                 timestamp, file_path = line.rstrip().split(" ")
112 |                 timestamp = float(timestamp)
113 |                 self.depth_frames[timestamp] = file_path
114 | 
115 |         # match pose to rgb timestamp
116 |         rgb_matches = associate(
117 |             self.poses, self.rgb_frames, offset=0.0, max_difference=0.02
118 |         )
119 |         # build mapping databases to get matches from pose timestamp to frame timestamp
120 |         self.pose_to_rgb = {t_p: t_r for (t_p, t_r) in rgb_matches}
121 | 
122 |         # match poses that are matched with rgb to a corresponding depth timestamp
123 |         depth_matches = associate(
124 |             self.pose_to_rgb, self.depth_frames, offset=0.0, max_difference=0.02
125 |         )
126 |         # build mapping databases to get matches from pose timestamp to frame timestamp
127 |         self.pose_to_depth = {t_p: t_d for (t_p, t_d) in depth_matches}
128 |         self.poses_matched = {t_p: self.poses[t_p] for (t_p, t_r) in rgb_matches}
129 | 
130 |     @property
131 |     def scenes(self):
132 |         return self._scenes
133 | 
134 |     def __len__(self):
135 |         return len(self.poses_matched)
136 | 
137 |     def __getitem__(self, item):
138 | 
139 |         sample = dict()
140 |         sample["item_id"] = item
141 | 
142 |         timestamp_pose = list(self.poses_matched.keys())[item]
143 |         timestamp_rgb = self.pose_to_rgb[timestamp_pose]
144 |         timestamp_depth = self.pose_to_depth[timestamp_pose]
145 | 
146 |         # read RGB frame
147 |         rgb_file = os.path.join(
148 |             self.rgb_path, self.rgb_frames[timestamp_rgb].replace("\\", "/")
149 |         )
150 |         rgb_image = io.imread(rgb_file).astype(np.float32)
151 | 
152 |         step_x = rgb_image.shape[0] / self.resolution_tof[0]
153 |         step_y = rgb_image.shape[1] / self.resolution_tof[1]
154 | 
155 |         index_y = [int(step_y * i) for i in range(0, int(rgb_image.shape[1] / step_y))]
156 |         index_x = [int(step_x * i) for i in range(0, int(rgb_image.shape[0] / step_x))]
157 | 
158 |         rgb_image = rgb_image[:, index_y]
159 |         rgb_image = rgb_image[index_x, :]
160 |         sample["image"] = np.asarray(rgb_image) / 255
161 | 
162 |         frame_id = "{}/{}".format(self._scenes[0], str(timestamp_pose))
163 |         sample["frame_id"] = frame_id
164 | 
165 |         # read kinect depth file
166 |         depth_file = os.path.join(
167 |             self.tof_path, self.depth_frames[timestamp_depth].replace("\\", "/")
168 |         )
169 |         depth_tof = io.imread(depth_file).astype(np.float32)
170 |         depth_tof /= 5000.0
171 | 
172 |         step_x = depth_tof.shape[0] / self.resolution_tof[0]
173 |         step_y = depth_tof.shape[1] / self.resolution_tof[1]
174 | 
175 |         index_y = [int(step_y * i) for i in range(0, int(depth_tof.shape[1] / step_y))]
176 |         index_x = [int(step_x * i) for i in range(0, int(depth_tof.shape[0] / step_x))]
177 | 
178 |         depth_tof = depth_tof[:, index_y]
179 |         depth_tof = depth_tof[index_x, :]
180 |         sample["tof_depth"] = np.asarray(depth_tof)
181 | 
182 |         # read colmap stereo depth file
183 |         try:
184 |             stereo_file = os.path.join(
185 |                 self.stereo_path,
186 |                 self.rgb_frames[timestamp_rgb].replace("rgb\\", "") + ".geometric.bin",
187 |             )
188 |             depth_stereo = read_array(stereo_file)
189 |         except FileNotFoundError:
190 |             print("stereo frame not found")
191 |             return None
192 | 
193 |         step_x = depth_stereo.shape[0] / self.resolution_stereo[0]
194 |         step_y = depth_stereo.shape[1] / self.resolution_stereo[1]
195 | 
196 |         index_y = [
197 |             int(step_y * i) for i in range(0, int(depth_stereo.shape[1] / step_y))
198 |         ]
199 |         index_x = [
200 |             int(step_x * i) for i in range(0, int(depth_stereo.shape[0] / step_x))
201 |         ]
202 | 
203 |         depth_stereo = depth_stereo[:, index_y]
204 |         depth_stereo = depth_stereo[index_x, :]
205 |         sample["stereo_depth"] = np.asarray(depth_stereo)
206 | 
207 |         # define mask
208 |         mask = depth_stereo > self.min_depth_stereo
209 |         mask = np.logical_and(mask, depth_stereo < self.max_depth_stereo)
210 | 
211 |         # do not integrate depth values close to the image boundary
212 |         mask[0 : self.mask_stereo_height, :] = 0
213 |         mask[-self.mask_stereo_height : -1, :] = 0
214 |         mask[:, 0 : self.mask_stereo_width] = 0
215 |         mask[:, -self.mask_stereo_width : -1] = 0
216 |         sample["stereo_mask"] = mask
217 | 
218 |         mask = depth_tof > self.min_depth_tof
219 |         mask = np.logical_and(mask, depth_tof < self.max_depth_tof)
220 | 
221 |         # do not integrate depth values close to the image boundary
222 |         mask[0 : self.mask_tof_height, :] = 0
223 |         mask[-self.mask_tof_height : -1, :] = 0
224 |         mask[:, 0 : self.mask_tof_width] = 0
225 |         mask[:, -self.mask_tof_width : -1] = 0
226 |         sample["tof_mask"] = mask
227 | 
228 |         # load extrinsics
229 |         rotation = self.poses_matched[timestamp_pose][3:]
230 |         rotation = Quaternion(rotation[-1], rotation[0], rotation[1], rotation[2])
231 |         rotation = rotation.rotation_matrix
232 |         translation = self.poses_matched[timestamp_pose][:3]
233 | 
234 |         extrinsics = np.eye(4)
235 |         extrinsics[:3, :3] = rotation
236 |         extrinsics[:3, 3] = translation
237 |         sample["extrinsics"] = extrinsics
238 | 
239 |         # load intrinsics
240 |         intrinsics_stereo = np.asarray(
241 |             [
242 |                 [
243 |                     468.60 * self.resolution_stereo[1] / 640,
244 |                     0.0,
245 |                     318.27 * self.resolution_stereo[1] / 640,
246 |                 ],
247 |                 [
248 |                     0.0,
249 |                     468.61 * self.resolution_stereo[0] / 480,
250 |                     243.99 * self.resolution_stereo[0] / 480,
251 |                 ],
252 |                 [0.0, 0.0, 1.0],
253 |             ]
254 |         )
255 | 
256 |         sample["intrinsics_stereo"] = intrinsics_stereo
257 | 
258 |         intrinsics_tof = np.asarray(
259 |             [
260 |                 [
261 |                     468.60 * self.resolution_tof[1] / 640,
262 |                     0.0,
263 |                     318.27 * self.resolution_tof[1] / 640,
264 |                 ],
265 |                 [
266 |                     0.0,
267 |                     468.61 * self.resolution_tof[0] / 480,
268 |                     243.99 * self.resolution_tof[0] / 480,
269 |                 ],
270 |                 [0.0, 0.0, 1.0],
271 |             ]
272 |         )
273 | 
274 |         sample["intrinsics_tof"] = intrinsics_tof
275 | 
276 |         # convert key image ndarray to compatible pytorch tensor shape. The function also converts the ndarrays to tensors, but this is not necessary as the pytorch dataloader does this anyway in a step later.
277 |         if self.transform:
278 |             sample = self.transform(sample)
279 | 
280 |         return sample
281 | 
282 |     def get_grid(self, scene, truncation):
283 |         file = os.path.join(self.root_dir, scene, "sdf_" + scene + ".hdf")
284 | 
285 |         # read from hdf file!
286 |         f = h5py.File(file, "r")
287 |         voxels = np.array(f["sdf"]).astype(np.float16)
288 | 
289 |         voxels[voxels > truncation] = truncation
290 |         voxels[voxels < -truncation] = -truncation
291 |         # Add padding to grid to give more room to fusion net
292 |         voxels = np.pad(voxels, self.pad, "constant", constant_values=-truncation)
293 | 
294 |         print(scene, voxels.shape)
295 |         bbox = np.zeros((3, 2))
296 |         bbox[:, 0] = f.attrs["bbox"][:, 0] - self.pad * f.attrs["voxel_size"] * np.ones(
297 |             (1, 1, 1)
298 |         )
299 |         bbox[:, 1] = bbox[:, 0] + f.attrs["voxel_size"] * np.array(voxels.shape)
300 | 
301 |         return voxels, bbox, f.attrs["voxel_size"]
302 | 


--------------------------------------------------------------------------------
/dataset/scene3d.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | 
  4 | import numpy as np
  5 | import re
  6 | 
  7 | from skimage import io
  8 | from skimage.color import rgb2gray
  9 | from skimage import filters
 10 | from torch.utils.data import Dataset
 11 | 
 12 | # used for debugging
 13 | # import matplotlib.pyplot as plt
 14 | from dataset.colmap import read_array
 15 | import itertools
 16 | 
 17 | import h5py
 18 | 
 19 | 
 20 | class Scene3D(Dataset):
 21 |     def __init__(self, config_data):
 22 |         self.root_dir = os.getenv(config_data.root_dir)
 23 |         if self.root_dir:
 24 |             self.root_dir += "/cluster/work/cvl/esandstroem/data/scene3D"  # when training on local scratch
 25 |         # os.getenv returns none when the input does not exist. When
 26 |         # it returns none, we want to train on the work folder
 27 |         else:
 28 |             self.root_dir = config_data.root_dir
 29 | 
 30 |         self.resolution_stereo = (config_data.resy_stereo, config_data.resx_stereo)
 31 | 
 32 |         self.resolution_tof = (config_data.resy_tof, config_data.resx_tof)
 33 | 
 34 |         self.resolution = (config_data.resy, config_data.resx)
 35 | 
 36 |         self.mask_stereo_width = config_data.mask_stereo_width
 37 |         self.mask_stereo_height = config_data.mask_stereo_height
 38 |         self.mask_tof_width = config_data.mask_tof_width
 39 |         self.mask_tof_height = config_data.mask_tof_height
 40 |         self.mask_height = config_data.mask_height
 41 |         self.mask_width = config_data.mask_width
 42 | 
 43 |         self.min_depth_stereo = config_data.min_depth_stereo
 44 |         self.max_depth_stereo = config_data.max_depth_stereo
 45 |         self.min_depth_tof = config_data.min_depth_tof
 46 |         self.max_depth_tof = config_data.max_depth_tof
 47 |         self.min_depth = config_data.min_depth
 48 |         self.max_depth = config_data.max_depth
 49 | 
 50 |         self.transform = config_data.transform
 51 |         self.pad = config_data.pad
 52 | 
 53 |         self.scene_list = config_data.scene_list
 54 |         self.input = config_data.input
 55 |         self.target = config_data.target
 56 |         self.mode = config_data.mode
 57 | 
 58 |         self._scenes = []
 59 | 
 60 |         self.sensor_line_mapping = {
 61 |             "rgb": 0,
 62 |             "camera_matrix": -1,
 63 |             "tof": 1,
 64 |             "tof_2": 1,
 65 |             "stereo": 2,
 66 |         }
 67 | 
 68 |         self._load_color()
 69 |         self._load_cameras()
 70 |         self._load_depths()
 71 | 
 72 |     def _load_depths(self):  # loads the paths of the noisy depth images to a list
 73 | 
 74 |         # reading files from list
 75 |         self.depth_images = dict()
 76 |         for sensor_ in self.input:  # initialize empty lists
 77 |             self.depth_images[sensor_] = []
 78 | 
 79 |         with open(os.path.join(self.root_dir, self.scene_list), "r") as scene_list:
 80 |             for line in scene_list:
 81 |                 if len(line) > 1:  # avoid parsing empty line only containing \n
 82 |                     line = line.split(" ")
 83 |                     for sensor_ in self.input:
 84 |                         if sensor_ == "tof":
 85 |                             files = glob.glob(
 86 |                                 os.path.join(
 87 |                                     self.root_dir,
 88 |                                     line[self.sensor_line_mapping[sensor_]],
 89 |                                     "*.png",
 90 |                                 )
 91 |                             )
 92 |                         elif sensor_ == "stereo":
 93 |                             files = glob.glob(
 94 |                                 os.path.join(
 95 |                                     self.root_dir,
 96 |                                     line[self.sensor_line_mapping[sensor_]],
 97 |                                     "*.geometric.bin",
 98 |                                 )
 99 |                             )
100 |                         for file in files:
101 |                             self.depth_images[sensor_].append(file)
102 | 
103 |         for sensor_ in self.depth_images.keys():
104 |             self.depth_images[sensor_] = sorted(
105 |                 self.depth_images[sensor_],
106 |                 key=lambda x: os.path.splitext(x.split("/")[-1])[0],
107 |             )
108 | 
109 |     def _load_color(self):
110 |         self.color_images = []
111 | 
112 |         # reading files from list
113 |         with open(os.path.join(self.root_dir, self.scene_list), "r") as file:
114 |             for line in file:
115 |                 if len(line) > 1:  # avoid parsing empty line only containing \n
116 |                     line = line.split(" ")
117 |                     self._scenes.append(line[0].split("/")[0])
118 |                     files = glob.glob(
119 |                         os.path.join(
120 |                             self.root_dir,
121 |                             line[self.sensor_line_mapping["rgb"]],
122 |                             "*.png",
123 |                         )
124 |                     )
125 |                     for file in files:
126 |                         self.color_images.append(file)
127 | 
128 |         self.color_images = sorted(
129 |             self.color_images, key=lambda x: os.path.splitext(x.split("/")[-1])[0]
130 |         )
131 | 
132 |     def _load_cameras(self):
133 |         def grouper_it(n, iterable):
134 |             it = iter(iterable)
135 |             while True:
136 |                 chunk_it = itertools.islice(it, n)
137 |                 try:
138 |                     first_el = next(chunk_it)
139 |                 except StopIteration:
140 |                     return
141 |                 yield itertools.chain((first_el,), chunk_it)
142 | 
143 |         self.cameras = dict()
144 | 
145 |         with open(os.path.join(self.root_dir, self.scene_list), "r") as file:
146 |             for line in file:
147 |                 line = line.split(" ")
148 |                 if len(line) > 1:  # avoid parsing empty line only containing \n
149 |                     with open(
150 |                         os.path.join(self.root_dir, line[-1][:-1]), "r"
151 |                     ) as traj_file:
152 |                         chunk_iterable = grouper_it(5, traj_file)
153 |                         for frame in chunk_iterable:
154 |                             frame_id = next(frame)[:-1]
155 |                             frame_id = re.split(r"\t+", frame_id.rstrip("\t"))[-1]
156 |                             first = np.fromstring(
157 |                                 next(frame), count=4, sep=" ", dtype=float
158 |                             )
159 |                             second = np.fromstring(
160 |                                 next(frame), count=4, sep=" ", dtype=float
161 |                             )
162 |                             third = np.fromstring(
163 |                                 next(frame), count=4, sep=" ", dtype=float
164 |                             )
165 |                             fourth = np.fromstring(
166 |                                 next(frame), count=4, sep=" ", dtype=float
167 |                             )
168 | 
169 |                             extrinsics = np.zeros((4, 4))
170 |                             extrinsics[0, :] = first
171 |                             extrinsics[1, :] = second
172 |                             extrinsics[2, :] = third
173 |                             extrinsics[3, :] = fourth
174 | 
175 |                             self.cameras[
176 |                                 line[0].split("/")[0] + "/" + frame_id
177 |                             ] = extrinsics
178 | 
179 |     @property
180 |     def scenes(self):
181 |         return self._scenes
182 | 
183 |     def __len__(self):
184 |         return len(self.color_images)
185 | 
186 |     def __getitem__(self, item):
187 | 
188 |         sample = dict()
189 |         sample["item_id"] = item
190 | 
191 |         # load rgb image
192 |         file = self.color_images[item]
193 |         pathsplit = file.split("/")
194 |         scene = pathsplit[-3]
195 |         frame = os.path.splitext(pathsplit[-1])[0]
196 | 
197 |         frame_id = "{}/{}".format(scene, frame)
198 | 
199 |         image = io.imread(file)
200 | 
201 |         step_x = image.shape[0] / self.resolution[0]
202 |         step_y = image.shape[1] / self.resolution[0]
203 | 
204 |         index_y = [int(step_y * i) for i in range(0, int(image.shape[1] / step_y))]
205 |         index_x = [int(step_x * i) for i in range(0, int(image.shape[0] / step_x))]
206 | 
207 |         image = image[:, index_y]
208 |         image = image[index_x, :]
209 |         sample["image"] = np.asarray(image).astype(np.float32) / 255
210 | 
211 |         intensity = rgb2gray(image)  # seems to be in range 0 - 1
212 |         sample["intensity"] = np.asarray(intensity).astype(np.float32)
213 |         grad_y = filters.sobel_h(intensity)
214 |         grad_x = filters.sobel_v(intensity)
215 |         grad = (grad_x ** 2 + grad_y ** 2) ** (1 / 2)
216 |         sample["gradient"] = np.asarray(grad).astype(np.float32)
217 | 
218 |         # load noisy depth maps
219 |         for sensor_ in self.input:
220 |             file = self.depth_images[sensor_][item]
221 |             if sensor_ == "tof":
222 |                 depth = io.imread(file).astype(np.float32)
223 |                 depth /= 1000.0
224 |             elif sensor_ == "stereo":
225 |                 depth = read_array(file)
226 | 
227 |             try:
228 |                 step_x = depth.shape[0] / eval("self.resolution_" + sensor_ + "[0]")
229 |                 step_y = depth.shape[1] / eval("self.resolution_" + sensor_ + "[1]")
230 |             except AttributeError:  # default values used in case sensor specific parameters do not exist
231 |                 step_x = depth.shape[0] / self.resolution[0]
232 |                 step_y = depth.shape[1] / self.resolution[1]
233 | 
234 |             index_y = [int(step_y * i) for i in range(0, int(depth.shape[1] / step_y))]
235 |             index_x = [int(step_x * i) for i in range(0, int(depth.shape[0] / step_x))]
236 | 
237 |             depth = depth[:, index_y]
238 |             depth = depth[index_x, :]
239 | 
240 |             sample[sensor_ + "_depth"] = np.asarray(depth)
241 | 
242 |             # plt.imsave('left' +frame +'.png', sample['image'])
243 |             # plt.imsave(sensor_ + '_depth' +frame +'.png', sample[sensor_ + '_depth'])
244 | 
245 |             # define mask
246 |             try:
247 |                 mask = depth > eval("self.min_depth_" + sensor_)
248 |                 mask = np.logical_and(mask, depth < eval("self.max_depth_" + sensor_))
249 | 
250 |                 # do not integrate depth values close to the image boundary
251 |                 mask[0 : eval("self.mask_" + sensor_ + "_height"), :] = 0
252 |                 mask[-eval("self.mask_" + sensor_ + "_height") : -1, :] = 0
253 |                 mask[:, 0 : eval("self.mask_" + sensor_ + "_width")] = 0
254 |                 mask[:, -eval("self.mask_" + sensor_ + "_width") : -1] = 0
255 |                 sample[sensor_ + "_mask"] = mask
256 |             except AttributeError:
257 |                 mask = depth > self.min_depth
258 |                 mask = np.logical_and(mask, depth < self.max_depth)
259 | 
260 |                 # do not integrate depth values close to the image boundary
261 |                 mask[0 : self.mask_height, :] = 0
262 |                 mask[-self.mask_height : -1, :] = 0
263 |                 mask[:, 0 : self.mask_width] = 0
264 |                 mask[:, -self.mask_width : -1] = 0
265 |                 sample[sensor_ + "_mask"] = mask
266 | 
267 |         # load extrinsics
268 |         extrinsics = self.cameras[scene + "/" + str(int(frame))]
269 | 
270 |         sample["extrinsics"] = extrinsics
271 | 
272 |         intrinsics_tof = np.asarray(
273 |             [
274 |                 [
275 |                     525.0 * self.resolution_tof[1] / 640,
276 |                     0.0,
277 |                     319.5 * self.resolution_tof[1] / 640,
278 |                 ],
279 |                 [
280 |                     0.0,
281 |                     525.0 * self.resolution_tof[0] / 480,
282 |                     239.5 * self.resolution_tof[0] / 480,
283 |                 ],
284 |                 [0.0, 0.0, 1.0],
285 |             ]
286 |         )
287 | 
288 |         sample["intrinsics_tof"] = intrinsics_tof
289 | 
290 |         sample["intrinsics_tof_2"] = intrinsics_tof
291 | 
292 |         intrinsics_stereo = np.asarray(
293 |             [
294 |                 [
295 |                     525.0 * self.resolution_stereo[1] / 640,
296 |                     0.0,
297 |                     319.5 * self.resolution_stereo[1] / 640,
298 |                 ],
299 |                 [
300 |                     0.0,
301 |                     525.0 * self.resolution_stereo[0] / 480,
302 |                     239.5 * self.resolution_stereo[0] / 480,
303 |                 ],
304 |                 [0.0, 0.0, 1.0],
305 |             ]
306 |         )
307 | 
308 |         sample["intrinsics_stereo"] = intrinsics_stereo
309 | 
310 |         sample["frame_id"] = frame_id
311 | 
312 |         if self.transform:
313 |             sample = self.transform(sample)
314 | 
315 |         return sample
316 | 
317 |     def get_grid(self, scene, truncation):
318 |         file = os.path.join(self.root_dir, scene, "sdf_" + scene + ".hdf")
319 | 
320 |         # read from hdf file!
321 |         f = h5py.File(file, "r")
322 |         voxels = np.array(f["sdf"]).astype(np.float16)
323 | 
324 |         voxels[voxels > truncation] = truncation
325 |         voxels[voxels < -truncation] = -truncation
326 |         # Add padding to grid to give more room to fusion net
327 |         voxels = np.pad(voxels, self.pad, "constant", constant_values=-truncation)
328 | 
329 |         print(scene, voxels.shape)
330 |         bbox = np.zeros((3, 2))
331 |         bbox[:, 0] = f.attrs["bbox"][:, 0] - self.pad * f.attrs["voxel_size"] * np.ones(
332 |             (1, 1, 1)
333 |         )
334 |         bbox[:, 1] = bbox[:, 0] + f.attrs["voxel_size"] * np.array(voxels.shape)
335 | 
336 |         return voxels, bbox, f.attrs["voxel_size"]
337 | 


--------------------------------------------------------------------------------
/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/images/architecture.png


--------------------------------------------------------------------------------
/lists/corbs/desk.txt:
--------------------------------------------------------------------------------
1 | desk/colmap/dense/stereo/depth_maps desk/data/D1_pre_registereddata desk/data/D1_pre_registereddata/rgb.txt desk/data/D1_pre_registereddata/depth.txt desk/data/D1_Trajectory/groundtruth.txt
2 | 


--------------------------------------------------------------------------------
/lists/corbs/human.txt:
--------------------------------------------------------------------------------
1 | human/colmap/dense/stereo/depth_maps human/data/H1_pre_registereddata human/data/H1_pre_registereddata/rgb.txt human/data/H1_pre_registereddata/depth.txt human/data/H1_Trajectory/groundtruth.txt
2 | 


--------------------------------------------------------------------------------
/lists/replica/test_hotel_0.txt:
--------------------------------------------------------------------------------
1 | hotel_0/3/left_depth_gt hotel_0/3/left_depth_noise_5.0 hotel_0/3/left_psmnet_depth hotel_0/3/left_sgm_depth hotel_0/3/left_rgb hotel_0/3/left_camera_matrix
2 | 


--------------------------------------------------------------------------------
/lists/replica/test_office_0.txt:
--------------------------------------------------------------------------------
1 | office_0/1/left_depth_gt office_0/1/left_depth_noise_5.0 office_0/1/left_psmnet_depth office_0/1/left_sgm_depth office_0/1/left_rgb office_0/1/left_camera_matrix
2 | 


--------------------------------------------------------------------------------
/lists/replica/test_office_4.txt:
--------------------------------------------------------------------------------
1 | office_4/3/left_depth_gt office_4/3/left_depth_noise_5.0 office_4/3/left_psmnet_depth office_4/3/left_sgm_depth office_4/3/left_rgb office_4/3/left_camera_matrix
2 | 


--------------------------------------------------------------------------------
/lists/replica/test_office_4_hotel_0_office_0.txt:
--------------------------------------------------------------------------------
1 | hotel_0/3/left_depth_gt hotel_0/3/left_depth_noise_5.0 hotel_0/3/left_psmnet_depth hotel_0/3/left_sgm_depth hotel_0/3/left_rgb hotel_0/3/left_camera_matrix
2 | office_4/3/left_depth_gt office_4/3/left_depth_noise_5.0 office_4/3/left_psmnet_depth office_4/3/left_sgm_depth office_4/3/left_rgb office_4/3/left_camera_matrix
3 | office_0/1/left_depth_gt office_0/1/left_depth_noise_5.0 office_0/1/left_psmnet_depth office_0/1/left_sgm_depth office_0/1/left_rgb office_0/1/left_camera_matrix
4 | 


--------------------------------------------------------------------------------
/lists/replica/train.txt:
--------------------------------------------------------------------------------
 1 | apartment_1/2/left_depth_gt apartment_1/2/left_depth_noise_5.0 apartment_1/2/left_psmnet_depth apartment_1/2/left_sgm_depth apartment_1/2/left_rgb apartment_1/2/left_camera_matrix
 2 | apartment_1/1/left_depth_gt apartment_1/1/left_depth_noise_5.0 apartment_1/1/left_psmnet_depth apartment_1/1/left_sgm_depth apartment_1/1/left_rgb apartment_1/1/left_camera_matrix
 3 | apartment_1/3/left_depth_gt apartment_1/3/left_depth_noise_5.0 apartment_1/3/left_psmnet_depth apartment_1/3/left_sgm_depth apartment_1/3/left_rgb apartment_1/3/left_camera_matrix
 4 | frl_apartment_0/2/left_depth_gt frl_apartment_0/2/left_depth_noise_5.0 frl_apartment_0/2/left_psmnet_depth frl_apartment_0/2/left_sgm_depth frl_apartment_0/2/left_rgb frl_apartment_0/2/left_camera_matrix
 5 | frl_apartment_0/1/left_depth_gt frl_apartment_0/1/left_depth_noise_5.0 frl_apartment_0/1/left_psmnet_depth frl_apartment_0/1/left_sgm_depth frl_apartment_0/1/left_rgb frl_apartment_0/1/left_camera_matrix
 6 | frl_apartment_0/3/left_depth_gt frl_apartment_0/3/left_depth_noise_5.0 frl_apartment_0/3/left_psmnet_depth frl_apartment_0/3/left_sgm_depth frl_apartment_0/3/left_rgb frl_apartment_0/3/left_camera_matrix
 7 | office_1/2/left_depth_gt office_1/2/left_depth_noise_5.0 office_1/2/left_psmnet_depth office_1/2/left_sgm_depth office_1/2/left_rgb office_1/2/left_camera_matrix
 8 | office_1/1/left_depth_gt office_1/1/left_depth_noise_5.0 office_1/1/left_psmnet_depth office_1/1/left_sgm_depth office_1/1/left_rgb office_1/1/left_camera_matrix
 9 | office_1/3/left_depth_gt office_1/3/left_depth_noise_5.0 office_1/3/left_psmnet_depth office_1/3/left_sgm_depth office_1/3/left_rgb office_1/3/left_camera_matrix
10 | room_2/2/left_depth_gt room_2/2/left_depth_noise_5.0 room_2/2/left_psmnet_depth room_2/2/left_sgm_depth room_2/2/left_rgb room_2/2/left_camera_matrix
11 | room_2/1/left_depth_gt room_2/1/left_depth_noise_5.0 room_2/1/left_psmnet_depth room_2/1/left_sgm_depth room_2/1/left_rgb room_2/1/left_camera_matrix
12 | room_2/3/left_depth_gt room_2/3/left_depth_noise_5.0 room_2/3/left_psmnet_depth room_2/3/left_sgm_depth room_2/3/left_rgb room_2/3/left_camera_matrix
13 | office_3/2/left_depth_gt office_3/2/left_depth_noise_5.0 office_3/2/left_psmnet_depth office_3/2/left_sgm_depth office_3/2/left_rgb office_3/2/left_camera_matrix
14 | office_3/1/left_depth_gt office_3/1/left_depth_noise_5.0 office_3/1/left_psmnet_depth office_3/1/left_sgm_depth office_3/1/left_rgb office_3/1/left_camera_matrix
15 | office_3/3/left_depth_gt office_3/3/left_depth_noise_5.0 office_3/3/left_psmnet_depth office_3/3/left_sgm_depth office_3/3/left_rgb office_3/3/left_camera_matrix
16 | room_0/2/left_depth_gt room_0/2/left_depth_noise_5.0 room_0/2/left_psmnet_depth room_0/2/left_sgm_depth room_0/2/left_rgb room_0/2/left_camera_matrix
17 | room_0/1/left_depth_gt room_0/1/left_depth_noise_5.0 room_0/1/left_psmnet_depth room_0/1/left_sgm_depth room_0/1/left_rgb room_0/1/left_camera_matrix
18 | room_0/3/left_depth_gt room_0/3/left_depth_noise_5.0 room_0/3/left_psmnet_depth room_0/3/left_sgm_depth room_0/3/left_rgb room_0/3/left_camera_matrix
19 | 


--------------------------------------------------------------------------------
/lists/replica/val.txt:
--------------------------------------------------------------------------------
1 | frl_apartment_1/1/left_depth_gt frl_apartment_1/1/left_depth_noise_5.0 frl_apartment_1/1/left_psmnet_depth frl_apartment_1/1/left_sgm_depth frl_apartment_1/1/left_rgb frl_apartment_1/1/left_camera_matrix
2 | 


--------------------------------------------------------------------------------
/lists/scene3d/copyroom.txt:
--------------------------------------------------------------------------------
1 | copyroom/images copyroom/copyroom_png/depth copyroom/dense/stereo/depth_maps copyroom/copyroom_trajectory.log
2 | 
3 | 


--------------------------------------------------------------------------------
/lists/scene3d/stonewall.txt:
--------------------------------------------------------------------------------
1 | stonewall/images stonewall/stonewall_png/depth stonewall/dense/stereo/depth_maps stonewall/stonewall_trajectory.log
2 | 


--------------------------------------------------------------------------------
/models/fusion/sgm_psmnet/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/sgm_psmnet_routedfusion/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routedfusion/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/sgm_psmnet_routing/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routing/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/sgm_psmnet_routing_routedfusion/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routing_routedfusion/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_mvs_corbs/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_mvs_corbs/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_mvs_scene3d/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_mvs_scene3d/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_psmnet/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_psmnet_routedfusion/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routedfusion/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_psmnet_routing/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routing/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_psmnet_routing_routedfusion/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routing_routedfusion/model/best.pth.tar


--------------------------------------------------------------------------------
/models/fusion/tof_tof_scene3d_collab_rec/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_tof_scene3d_collab_rec/model/best.pth.tar


--------------------------------------------------------------------------------
/models/routing/psmnet/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/psmnet/model/best.pth.tar


--------------------------------------------------------------------------------
/models/routing/sgm/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/sgm/model/best.pth.tar


--------------------------------------------------------------------------------
/models/routing/sgm_psmnet/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/sgm_psmnet/model/best.pth.tar


--------------------------------------------------------------------------------
/models/routing/tof/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/tof/model/best.pth.tar


--------------------------------------------------------------------------------
/models/routing/tof_psmnet/model/best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/tof_psmnet/model/best.pth.tar


--------------------------------------------------------------------------------
/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/modules/__init__.py


--------------------------------------------------------------------------------
/modules/database.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | 
  4 | import numpy as np
  5 | 
  6 | from torch.utils.data import Dataset
  7 | from modules.voxelgrid import VoxelGrid, FeatureGrid
  8 | 
  9 | from utils.metrics import evaluation
 10 | 
 11 | 
 12 | class Database(Dataset):
 13 |     def __init__(self, dataset, config):
 14 | 
 15 |         super(Database, self).__init__()
 16 | 
 17 |         self.transform = config.transform
 18 |         self.initial_value = config.init_value
 19 |         self.trunc_value = config.trunc_value
 20 |         self.n_features = config.n_features  # this includes the append_depth option
 21 |         self.sensors = config.input
 22 |         self.test_mode = config.test_mode
 23 |         self.alpha_supervision = config.alpha_supervision
 24 |         self.outlier_channel = config.outlier_channel
 25 | 
 26 |         self.scenes_gt = {}
 27 |         self.tsdf = {}
 28 |         self.fusion_weights = {}
 29 |         self.features = {}
 30 | 
 31 |         for sensor_ in config.input:
 32 |             self.tsdf[sensor_] = {}
 33 |             self.fusion_weights[sensor_] = {}
 34 |             self.features[sensor_] = {}
 35 | 
 36 |         self.filtered = {}  # grid to store the fused sdf prediction
 37 |         if config.test_mode:
 38 |             self.sensor_weighting = {}
 39 | 
 40 |         if self.alpha_supervision:
 41 |             self.proxy_alpha = {}
 42 | 
 43 |         for s in dataset.scenes:
 44 |             grid, bbox, voxel_size = dataset.get_grid(s, truncation=self.trunc_value)
 45 |             if self.alpha_supervision:
 46 |                 self.proxy_alpha[s] = dataset.get_proxy_alpha_grid(s)
 47 |             self.scenes_gt[s] = VoxelGrid(voxel_size, grid, bbox)
 48 | 
 49 |             for sensor in config.input:
 50 |                 self.fusion_weights[sensor][s] = np.zeros(
 51 |                     self.scenes_gt[s].shape, dtype=np.float16
 52 |                 )
 53 | 
 54 |                 self.features[sensor][s] = FeatureGrid(
 55 |                     voxel_size, self.n_features, bbox
 56 |                 )
 57 | 
 58 |                 self.tsdf[sensor][s] = VoxelGrid(
 59 |                     voxel_size,
 60 |                     volume=None,
 61 |                     bbox=bbox,
 62 |                     initial_value=self.initial_value,
 63 |                 )
 64 | 
 65 |             self.filtered[s] = VoxelGrid(
 66 |                 voxel_size,
 67 |                 volume=None,
 68 |                 bbox=bbox,
 69 |                 initial_value=self.initial_value,
 70 |             )
 71 |             if config.test_mode:
 72 |                 if config.outlier_channel:
 73 |                     sensor_weighting_shape = (
 74 |                         2,
 75 |                         self.scenes_gt[s].shape[0],
 76 |                         self.scenes_gt[s].shape[1],
 77 |                         self.scenes_gt[s].shape[2],
 78 |                     )
 79 |                     self.sensor_weighting[s] = -np.ones(
 80 |                         sensor_weighting_shape, dtype=np.float16
 81 |                     )
 82 |                 else:
 83 |                     # initialize to negative so that we know what values are initialized without needing the mask later in the visualization script
 84 |                     self.sensor_weighting[s] = -np.ones(
 85 |                         self.scenes_gt[s].shape, dtype=np.float16
 86 |                     )
 87 | 
 88 |     def __getitem__(self, item):
 89 | 
 90 |         sample = dict()
 91 | 
 92 |         sample["gt"] = self.scenes_gt[item].volume
 93 |         if self.alpha_supervision:
 94 |             sample["proxy_alpha"] = self.proxy_alpha[item]
 95 |         sample["origin"] = self.scenes_gt[item].origin
 96 |         sample["resolution"] = self.scenes_gt[item].resolution
 97 |         sample["filtered"] = self.filtered[item].volume
 98 |         if self.test_mode:
 99 |             sample["sensor_weighting"] = self.sensor_weighting[item]
100 |         for sensor_ in self.sensors:
101 |             sample["tsdf_" + sensor_] = self.tsdf[sensor_][item].volume
102 |             sample["weights_" + sensor_] = self.fusion_weights[sensor_][item]
103 |             sample["features_" + sensor_] = self.features[sensor_][item].volume
104 | 
105 |         if self.transform is not None:
106 |             sample = self.transform(sample)
107 | 
108 |         return sample
109 | 
110 |     def __len__(self):
111 |         return len(self.scenes_gt)
112 | 
113 |     def save(self, path, scene_id=None):
114 | 
115 |         for sensor in self.sensors:
116 |             filename = scene_id + "_" + sensor + ".tsdf.hf5"
117 |             weightname = scene_id + "_" + sensor + ".weights.hf5"
118 |             featurename = scene_id + "_" + sensor + ".features.hf5"
119 | 
120 |             with h5py.File(os.path.join(path, filename), "w") as hf:
121 |                 hf.create_dataset(
122 |                     "TSDF",
123 |                     shape=self.tsdf[sensor][scene_id].volume.shape,
124 |                     data=self.tsdf[sensor][scene_id].volume,
125 |                     compression="gzip",
126 |                     compression_opts=9,
127 |                 )
128 |             with h5py.File(os.path.join(path, weightname), "w") as hf:
129 |                 hf.create_dataset(
130 |                     "weights",
131 |                     shape=self.fusion_weights[sensor][scene_id].shape,
132 |                     data=self.fusion_weights[sensor][scene_id],
133 |                     compression="gzip",
134 |                     compression_opts=9,
135 |                 )
136 | 
137 |         sdfname = scene_id + ".tsdf_filtered.hf5"
138 |         with h5py.File(os.path.join(path, sdfname), "w") as hf:
139 |             hf.create_dataset(
140 |                 "TSDF_filtered",
141 |                 shape=self.filtered[scene_id].volume.shape,
142 |                 data=self.filtered[scene_id].volume,
143 |                 compression="gzip",
144 |                 compression_opts=9,
145 |             )
146 | 
147 |         if self.test_mode:
148 |             sensor_weighting_name = scene_id + ".sensor_weighting.hf5"
149 |             with h5py.File(os.path.join(path, sensor_weighting_name), "w") as hf:
150 |                 hf.create_dataset(
151 |                     "sensor_weighting",
152 |                     shape=self.sensor_weighting[scene_id].shape,
153 |                     data=self.sensor_weighting[scene_id],
154 |                     compression="gzip",
155 |                     compression_opts=9,
156 |                 )
157 | 
158 |     def evaluate(self, mode="train", workspace=None):
159 | 
160 |         eval_results = {}
161 |         eval_results_scene_save = {}
162 |         for sensor in self.sensors:
163 |             eval_results[sensor] = {}
164 |             eval_results_scene_save[sensor] = {}
165 | 
166 |         eval_results_filt = {}
167 |         eval_results_scene_save_filt = {}
168 |         if workspace is not None:
169 |             workspace.log(
170 |                 "-------------------------------------------------------", mode
171 |             )
172 |         for scene_id in self.scenes_gt.keys():
173 |             if workspace is None:
174 |                 print("Evaluating ", scene_id, "...")
175 |             else:
176 |                 workspace.log("Evaluating {} ...".format(scene_id), mode)
177 |             est = {}
178 |             mask, mask_filt = self.get_evaluation_masks(scene_id)
179 | 
180 |             for sensor in self.sensors:
181 |                 est[sensor] = self.tsdf[sensor][scene_id].volume
182 | 
183 |             est_filt = self.filtered[scene_id].volume
184 |             gt = self.scenes_gt[scene_id].volume
185 | 
186 |             eval_results_scene = dict()
187 |             for sensor_ in self.sensors:
188 |                 eval_results_scene[sensor_] = evaluation(
189 |                     est[sensor_], gt, mask[sensor_]
190 |                 )
191 | 
192 |             eval_results_scene_filt = evaluation(est_filt, gt, mask_filt)
193 | 
194 |             del est, gt, mask, est_filt, mask_filt
195 | 
196 |             for sensor in self.sensors:
197 |                 eval_results_scene_save[sensor][scene_id] = eval_results_scene[sensor]
198 |             eval_results_scene_save_filt[scene_id] = eval_results_scene_filt
199 | 
200 |             for key in eval_results_scene_filt.keys():
201 |                 if workspace is None:
202 |                     for sensor in self.sensors:
203 |                         print(sensor, " ", key, eval_results_scene[sensor][key])
204 |                     print("filtered ", key, eval_results_scene_filt[key])
205 |                 else:
206 |                     for sensor in self.sensors:
207 |                         workspace.log(
208 |                             "{} {}".format(key, eval_results_scene[sensor][key]), mode
209 |                         )
210 |                     workspace.log(
211 |                         "{} {}".format(key, eval_results_scene_filt[key]), mode
212 |                     )
213 | 
214 |                 if not eval_results_filt.get(key):  # iou, mad, mse, acc as keys
215 |                     for sensor in self.sensors:
216 |                         eval_results[sensor][key] = eval_results_scene[sensor][key]
217 |                     eval_results_filt[key] = eval_results_scene_filt[key]
218 |                 else:
219 |                     for sensor in self.sensors:
220 |                         eval_results[sensor][key] += eval_results_scene[sensor][key]
221 |                     eval_results_filt[key] += eval_results_scene_filt[key]
222 | 
223 |         # normalizing metrics
224 |         for key in eval_results_filt.keys():
225 |             for sensor in self.sensors:
226 |                 eval_results[sensor][key] /= len(self.scenes_gt.keys())
227 |             eval_results_filt[key] /= len(self.scenes_gt.keys())
228 | 
229 |         if mode == "test":
230 |             return (
231 |                 eval_results,
232 |                 eval_results_filt,
233 |                 eval_results_scene_save,
234 |                 eval_results_scene_save_filt,
235 |             )
236 |         else:
237 |             return eval_results, eval_results_filt
238 | 
239 |     def reset(self, scene_id=None):
240 |         if scene_id:
241 |             for sensor in self.sensors:
242 |                 self.tsdf[sensor][scene_id].volume = self.initial_value * np.ones(
243 |                     self.scenes_gt[scene_id].shape, dtype=np.float16
244 |                 )
245 |                 self.fusion_weights[sensor][scene_id] = np.zeros(
246 |                     self.scenes_gt[scene_id].shape, dtype=np.float16
247 |                 )
248 |                 self.features[sensor][scene_id].volume = np.zeros(
249 |                     self.features[sensor][scene_id].shape, dtype=np.float16
250 |                 )
251 |         else:
252 |             for scene_id in self.scenes_gt.keys():
253 |                 for sensor in self.sensors:
254 |                     self.tsdf[sensor][scene_id].volume = self.initial_value * np.ones(
255 |                         self.scenes_gt[scene_id].shape, dtype=np.float16
256 |                     )
257 |                     self.fusion_weights[sensor][scene_id] = np.zeros(
258 |                         self.scenes_gt[scene_id].shape, dtype=np.float16
259 |                     )
260 |                     self.features[sensor][scene_id].volume = np.zeros(
261 |                         self.features[sensor][scene_id].shape, dtype=np.float16
262 |                     )
263 | 
264 |     def get_evaluation_masks(self, scene):
265 |         sensor_mask = {}
266 |         mask = np.zeros_like(self[scene]["gt"])
267 |         and_mask = np.ones_like(self[scene]["gt"])
268 |         filter_mask = np.zeros_like(self[scene]["gt"])
269 |         sensor_mask_filtering = {}
270 | 
271 |         for sensor_ in self.sensors:
272 |             weights = self.fusion_weights[sensor_][scene]
273 |             mask = np.logical_or(mask, weights > 0)
274 |             and_mask = np.logical_and(and_mask, weights > 0)
275 |             sensor_mask[sensor_] = weights > 0
276 | 
277 |         # load weighting sensor grid
278 |         if self.outlier_channel:
279 |             sensor_weighting = self.sensor_weighting[scene][1, :, :, :]
280 |         else:
281 |             sensor_weighting = self.sensor_weighting[scene]
282 | 
283 |         only_one_sensor_mask = np.logical_xor(mask, and_mask)
284 |         for sensor_ in self.sensors:
285 | 
286 |             only_sensor_mask = np.logical_and(
287 |                 only_one_sensor_mask, sensor_mask[sensor_]
288 |             )
289 |             if sensor_ == self.sensors[0]:
290 |                 rem_indices = np.logical_and(only_sensor_mask, sensor_weighting < 0.5)
291 |             else:
292 |                 rem_indices = np.logical_and(only_sensor_mask, sensor_weighting > 0.5)
293 | 
294 |             sensor_mask_filtering[sensor_] = sensor_mask[sensor_].copy()
295 |             sensor_mask_filtering[sensor_][rem_indices] = 0
296 | 
297 |         for sensor_ in self.sensors:
298 |             filter_mask = np.logical_or(filter_mask, sensor_mask_filtering[sensor_] > 0)
299 | 
300 |         return sensor_mask, filter_mask
301 | 


--------------------------------------------------------------------------------
/modules/filtering_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from torch import nn
  4 | 
  5 | 
  6 | class FilteringNet(nn.Module):
  7 |     def __init__(self, config):
  8 | 
  9 |         super(FilteringNet, self).__init__()
 10 | 
 11 |         self.config = config
 12 |         self.trunc_value = config.DATA.trunc_value
 13 |         self.sensors = config.DATA.input
 14 |         self.feature_to_weight_head = (
 15 |             config.FILTERING_MODEL.CONV3D_MODEL.features_to_weight_head
 16 |         )
 17 |         self.weight_to_weight_head = (
 18 |             config.FILTERING_MODEL.CONV3D_MODEL.weights_to_weight_head
 19 |         )
 20 |         self.sdf_to_weight_head = config.FILTERING_MODEL.CONV3D_MODEL.sdf_to_weight_head
 21 |         self.weighting_complexity = (
 22 |             config.FILTERING_MODEL.CONV3D_MODEL.weighting_complexity
 23 |         )
 24 |         self.activation = eval(config.FILTERING_MODEL.CONV3D_MODEL.activation)
 25 |         self.n_features = config.FEATURE_MODEL.n_features
 26 |         self.alpha_supervision = config.LOSS.alpha_supervision
 27 |         self.alpha_single_sensor_supervision = (
 28 |             config.LOSS.alpha_single_sensor_supervision
 29 |         )
 30 |         bias_wn = config.FILTERING_MODEL.CONV3D_MODEL.bias
 31 |         self.outlier_channel = config.FILTERING_MODEL.CONV3D_MODEL.outlier_channel
 32 | 
 33 |         # alpha layer
 34 |         if self.weighting_complexity == "1layer":
 35 |             self.weight_decoder = nn.Conv3d(
 36 |                 len(self.sensors)
 37 |                 * (
 38 |                     self.sdf_to_weight_head
 39 |                     + self.n_features * self.feature_to_weight_head
 40 |                     + self.weight_to_weight_head
 41 |                 ),
 42 |                 1,
 43 |                 1,
 44 |                 padding=0,
 45 |                 bias=bias_wn,
 46 |             )
 47 |         elif self.weighting_complexity == "2layer":
 48 |             self.weight_decoder = nn.Sequential(
 49 |                 nn.Conv3d(
 50 |                     len(self.sensors)
 51 |                     * (
 52 |                         self.sdf_to_weight_head
 53 |                         + self.n_features * self.feature_to_weight_head
 54 |                         + self.weight_to_weight_head
 55 |                     ),
 56 |                     16,
 57 |                     3,
 58 |                     padding=1,
 59 |                     padding_mode="replicate",
 60 |                     bias=bias_wn,
 61 |                 ),
 62 |                 self.activation,
 63 |                 nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn),
 64 |             )
 65 |         elif self.weighting_complexity == "3layer":
 66 |             self.weight_decoder = nn.Sequential(
 67 |                 nn.Conv3d(
 68 |                     len(self.sensors)
 69 |                     * (
 70 |                         self.sdf_to_weight_head
 71 |                         + self.n_features * self.feature_to_weight_head
 72 |                         + self.weight_to_weight_head
 73 |                     ),
 74 |                     32,
 75 |                     3,
 76 |                     padding=1,
 77 |                     padding_mode="replicate",
 78 |                     bias=bias_wn,
 79 |                 ),
 80 |                 self.activation,
 81 |                 nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn),
 82 |                 self.activation,
 83 |                 nn.Conv3d(16, 1, 1, padding=0, bias=bias_wn),
 84 |             )
 85 | 
 86 |         elif self.weighting_complexity == "4layer":
 87 |             self.weight_decoder = nn.Sequential(
 88 |                 nn.Conv3d(
 89 |                     len(self.sensors)
 90 |                     * (
 91 |                         self.sdf_to_weight_head
 92 |                         + self.n_features * self.feature_to_weight_head
 93 |                         + self.weight_to_weight_head
 94 |                     ),
 95 |                     32,
 96 |                     3,
 97 |                     padding=1,
 98 |                     padding_mode="replicate",
 99 |                     bias=bias_wn,
100 |                 ),
101 |                 self.activation,
102 |                 nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn),
103 |                 self.activation,
104 |                 nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn),
105 |                 self.activation,
106 |                 nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn),
107 |             )
108 |         elif self.weighting_complexity == "5layer":
109 |             self.weight_decoder = nn.Sequential(
110 |                 nn.Conv3d(
111 |                     len(self.sensors)
112 |                     * (
113 |                         self.sdf_to_weight_head
114 |                         + self.n_features * self.feature_to_weight_head
115 |                         + self.weight_to_weight_head
116 |                     ),
117 |                     32,
118 |                     3,
119 |                     padding=1,
120 |                     padding_mode="replicate",
121 |                     bias=bias_wn,
122 |                 ),
123 |                 self.activation,
124 |                 nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn),
125 |                 self.activation,
126 |                 nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn),
127 |                 self.activation,
128 |                 nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn),
129 |                 self.activation,
130 |                 nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn),
131 |             )
132 | 
133 |         self.tanh = nn.Tanh()
134 |         self.sigmoid = nn.Sigmoid()
135 |         self.softmax = nn.Softmax(dim=1)
136 | 
137 |     def forward(self, neighborhood):
138 |         weight = dict()
139 |         sdf = dict()
140 |         enc = dict()
141 |         output = dict()
142 | 
143 |         for sensor_ in self.sensors:
144 |             sdf[sensor_] = neighborhood[sensor_][:, 0, :, :, :]
145 | 
146 |             weight[sensor_] = neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1)
147 | 
148 |         for sensor_ in self.sensors:
149 |             output["tsdf_" + sensor_] = sdf[sensor_].squeeze()
150 |             output[sensor_ + "_init"] = weight[sensor_].squeeze() > 0
151 | 
152 |         input_ = None
153 |         alpha_val = dict()
154 | 
155 |         for k, sensor_ in enumerate(self.config.DATA.input):
156 |             inp = None
157 |             if self.sdf_to_weight_head:
158 |                 if inp is None:
159 |                     inp = neighborhood[sensor_][:, 0, :, :, :].unsqueeze(1)
160 |                 else:
161 |                     inp = torch.cat(
162 |                         (inp, neighborhood[sensor_][:, 0, :, :, :].unsqueeze(1)),
163 |                         dim=1,
164 |                     )
165 |             if self.feature_to_weight_head:
166 |                 if inp is None:
167 |                     inp = neighborhood[sensor_][:, 2:, :, :, :]
168 |                 else:
169 |                     inp = torch.cat((inp, neighborhood[sensor_][:, 2:, :, :, :]), dim=1)
170 |             if self.weight_to_weight_head:
171 |                 if self.config.FILTERING_MODEL.CONV3D_MODEL.tanh_weight:
172 |                     if self.config.FILTERING_MODEL.CONV3D_MODEL.inverted_weight:
173 |                         weights = torch.ones_like(
174 |                             neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1)
175 |                         ) - self.tanh(neighborhood[sensor_][:, 1, :, :, :])
176 |                     else:
177 |                         weights = self.tanh(
178 |                             neighborhood[sensor_][:, 1, :, :, :]
179 |                         ).unsqueeze(1)
180 |                 else:
181 |                     weights = neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1)
182 | 
183 |                 if inp is None:
184 |                     inp = weights
185 |                 else:
186 |                     inp = torch.cat((inp, weights), dim=1)
187 | 
188 |             if input_ is None:
189 |                 input_ = inp
190 |             else:
191 |                 input_ = torch.cat((input_, inp), dim=1)
192 | 
193 |             if k == 0:
194 |                 alpha_val[sensor_] = torch.zeros_like(sdf[sensor_])
195 |             else:
196 |                 alpha_val[sensor_] = torch.ones_like(sdf[sensor_])
197 | 
198 |         if input_.isnan().sum() > 0:
199 |             print("Input isnan: ", input_.isnan().sum())
200 | 
201 |         alpha = self.sigmoid(self.weight_decoder(input_))
202 | 
203 |         if alpha.isnan().sum() > 0 or alpha.isinf().sum() > 0:
204 |             print("alpha nan: ", alpha.isnan().sum())
205 |             print("alpha inf: ", alpha.isinf().sum())
206 |             return None
207 | 
208 |         if (
209 |             neighborhood["test_mode"]
210 |             or self.alpha_supervision
211 |             or self.alpha_single_sensor_supervision
212 |         ):
213 |             output["sensor_weighting"] = alpha.squeeze()
214 | 
215 |         if self.outlier_channel:
216 |             alpha_sdf = alpha[:, 0, :, :, :]
217 |         else:
218 |             alpha_sdf = alpha
219 | 
220 |         # this step is to not filter the voxels where we only have one sensor observation.
221 |         # Note that we save the variable alpha and not alpha_sdf so we can still
222 |         # use the outlier filter as usual.
223 |         for sensor_ in self.config.DATA.input:
224 |             alpha_sdf = torch.where(weight[sensor_] == 0, alpha_val[sensor_], alpha_sdf)
225 | 
226 |         sdf_final = None
227 | 
228 |         for k, sensor_ in enumerate(self.config.DATA.input):
229 |             if k == 0:
230 |                 sdf_final = alpha_sdf * sdf[sensor_]
231 |             else:
232 |                 sdf_final += (1 - alpha_sdf) * sdf[sensor_]
233 | 
234 |         output["tsdf"] = sdf_final.squeeze()
235 | 
236 |         return output
237 | 


--------------------------------------------------------------------------------
/modules/integrator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class Integrator(torch.nn.Module):
  5 |     def __init__(self, config):
  6 | 
  7 |         super(Integrator, self).__init__()
  8 | 
  9 |         self.device = config.device
 10 |         self.max_weight = config.max_weight
 11 |         self.extraction_strategy = config.extraction_strategy
 12 |         self.n_empty_space_voting = config.n_empty_space_voting
 13 |         self.trunc_value = config.trunc_value
 14 | 
 15 |     def forward(
 16 |         self,
 17 |         integrator_input,
 18 |         values_volume,
 19 |         features_volume,
 20 |         weights_volume,
 21 |     ):
 22 |         xs, ys, zs = values_volume.shape
 23 | 
 24 |         # unpack data
 25 |         values = integrator_input["update_values"].to(self.device)
 26 |         features = integrator_input["update_features"].to(self.device)
 27 |         indices = integrator_input["update_indices"].to(self.device)
 28 |         weights = integrator_input["update_weights"].to(
 29 |             self.device
 30 |         )  # update weights. When using nearest neighbor interpolation these are all ones.
 31 | 
 32 |         if self.n_empty_space_voting > 0:
 33 |             indices_empty = integrator_input["update_indices_empty"].to(self.device)
 34 |             weights_empty = integrator_input["update_weights_empty"].to(self.device)
 35 | 
 36 |         (
 37 |             n1,
 38 |             n2,
 39 |             n3,
 40 |             f4,
 41 |         ) = features.shape
 42 | 
 43 |         # reshape tensors
 44 |         features = features.contiguous().view(-1, f4).float()
 45 |         values = values.contiguous().view(-1, 1).float()
 46 | 
 47 |         if self.extraction_strategy == "trilinear_interpolation":
 48 |             features = features.repeat(8, 1)
 49 |             values = values.repeat(1, 8)
 50 |             indices = indices.contiguous().view(-1, 8, 3).long()
 51 |             weights = weights.contiguous().view(-1, 8)
 52 |             if self.n_empty_space_voting > 0:
 53 |                 indices_empty = indices_empty.contiguous().view(-1, 8, 3).long()
 54 |                 weights_empty = weights_empty.contiguous().view(-1, 8)
 55 |         elif self.extraction_strategy == "nearest_neighbor":
 56 |             values = values.repeat(1, 1)
 57 |             indices = indices.contiguous().view(-1, 1, 3).long()
 58 |             weights = weights.contiguous().view(-1, 1)
 59 |             if self.n_empty_space_voting > 0:
 60 |                 indices_empty = indices_empty.contiguous().view(-1, 1, 3).long()
 61 |                 weights_empty = weights_empty.contiguous().view(-1, 1)
 62 | 
 63 |         values = values.contiguous().view(-1, 1).float()
 64 |         indices = indices.contiguous().view(-1, 3).long()
 65 | 
 66 |         if self.n_empty_space_voting > 0:
 67 |             indices_empty = indices_empty.contiguous().view(-1, 3).long()
 68 |             weights_empty = weights_empty.contiguous().view(-1, 1).float()
 69 | 
 70 |         weights = weights.contiguous().view(-1, 1).float()
 71 | 
 72 |         # get valid indices
 73 |         valid = get_index_mask(indices, values_volume.shape)
 74 |         indices = extract_indices(indices, mask=valid)
 75 |         if self.n_empty_space_voting > 0:
 76 |             valid_empty = get_index_mask(indices_empty, values_volume.shape)
 77 |             indices_empty = extract_indices(indices_empty, mask=valid_empty)
 78 | 
 79 |         feature_indices = indices.clone()
 80 | 
 81 |         # remove the invalid entries from the values, features and weights
 82 |         valid_features = valid.clone().unsqueeze_(-1)
 83 |         features = torch.masked_select(features, valid_features.repeat(1, f4))
 84 |         features = features.view(int(features.shape[0] / f4), f4)
 85 | 
 86 |         values = torch.masked_select(values[:, 0], valid)
 87 |         weights = torch.masked_select(weights[:, 0], valid)
 88 |         if self.n_empty_space_voting > 0:
 89 |             weights_empty = torch.masked_select(weights_empty[:, 0], valid_empty)
 90 | 
 91 |         update_feat = weights.repeat(f4, 1).permute(1, 0) * features
 92 |         del features
 93 | 
 94 |         update = weights * values
 95 |         del values
 96 | 
 97 |         # aggregate updates to the same index
 98 | 
 99 |         # tsdf
100 |         index = ys * zs * indices[:, 0] + zs * indices[:, 1] + indices[:, 2]
101 |         indices_insert = torch.unique_consecutive(indices[index.sort()[1]], dim=0)
102 |         vcache = torch.sparse.FloatTensor(
103 |             index.unsqueeze_(0), update, torch.Size([xs * ys * zs])
104 |         ).coalesce()
105 |         update = vcache.values()
106 | 
107 |         if indices_insert.shape[0] != update.shape[0]:
108 |             print("wrong dim!")
109 |         del vcache
110 | 
111 |         # if using the same extraction procedure for fusion and feature updates
112 |         update_feat_weights = weights
113 | 
114 |         # weights for tsdf
115 |         wcache = torch.sparse.FloatTensor(
116 |             index, weights, torch.Size([xs * ys * zs])
117 |         ).coalesce()  # this line adds the values at the same index together
118 |         indices = wcache.indices().squeeze()
119 |         weights = wcache.values()
120 | 
121 |         del wcache
122 | 
123 |         if self.n_empty_space_voting > 0:
124 |             # weights for empty indices
125 |             index_empty = (
126 |                 ys * zs * indices_empty[:, 0]
127 |                 + zs * indices_empty[:, 1]
128 |                 + indices_empty[:, 2]
129 |             )
130 |             indices_empty_insert = torch.unique_consecutive(
131 |                 indices_empty[index_empty.sort()[1]], dim=0
132 |             )
133 |             wcache_empty = torch.sparse.FloatTensor(
134 |                 index_empty.unsqueeze_(0), weights_empty, torch.Size([xs * ys * zs])
135 |             ).coalesce()  # this line adds the values at the same index together
136 |             indices_empty = wcache_empty.indices().squeeze()
137 |             weights_empty = wcache_empty.values()
138 |             del wcache_empty
139 | 
140 |         # features
141 |         feature_index = (
142 |             ys * zs * feature_indices[:, 0]
143 |             + zs * feature_indices[:, 1]
144 |             + feature_indices[:, 2]
145 |         )
146 |         feature_indices_insert = torch.unique_consecutive(
147 |             feature_indices[feature_index.sort()[1]], dim=0
148 |         )
149 |         fcache = torch.sparse.FloatTensor(
150 |             feature_index.unsqueeze_(0), update_feat, torch.Size([xs * ys * zs, f4])
151 |         ).coalesce()
152 | 
153 |         feature_indices = fcache.indices().squeeze()
154 |         update_feat = fcache.values()
155 |         if feature_indices_insert.shape[0] != update_feat.shape[0]:
156 |             print("wrong dim feat!")
157 |         del fcache
158 | 
159 |         # feature weights
160 |         wcache_feat = torch.sparse.FloatTensor(
161 |             feature_index, update_feat_weights, torch.Size([xs * ys * zs])
162 |         ).coalesce()
163 |         weights_feat = wcache_feat.values().unsqueeze_(-1).repeat(1, f4).float()
164 |         del wcache_feat
165 | 
166 |         # tsdf and weights update
167 |         values_old = values_volume.view(xs * ys * zs)[indices]
168 |         weights_old = weights_volume.view(xs * ys * zs)[indices]
169 |         value_update = (weights_old * values_old + update) / (weights_old + weights)
170 |         weight_update = weights_old + weights
171 |         weight_update = torch.clamp(weight_update, 0, self.max_weight)
172 | 
173 |         if self.n_empty_space_voting > 0:
174 |             # empty space update
175 |             values_old_empty = values_volume.view(xs * ys * zs)[indices_empty]
176 |             weights_old_empty = weights_volume.view(xs * ys * zs)[indices_empty]
177 |             value_update_empty = torch.add(
178 |                 weights_old_empty * values_old_empty, self.trunc_value * weights_empty
179 |             ) / (weights_old_empty + weights_empty)
180 |             weight_update_empty = weights_old_empty + weights_empty
181 |             weight_update_empty = torch.clamp(weight_update_empty, 0, self.max_weight)
182 | 
183 |         # feature update
184 |         feature_weights_old = (
185 |             weights_volume.view(xs * ys * zs)[feature_indices]
186 |             .unsqueeze_(-1)
187 |             .repeat(1, f4)
188 |             .float()
189 |         )
190 | 
191 |         features_old = features_volume.view(xs * ys * zs, f4)[feature_indices]
192 | 
193 |         # here we should not multiply the update_feat with weights_feat in the nominator since we already have that baked in
194 |         feature_update = (feature_weights_old * features_old + update_feat) / (
195 |             feature_weights_old + weights_feat
196 |         )
197 | 
198 |         del update_feat, feature_weights_old, weights_feat
199 | 
200 |         # inser tsdf and tsdf weights
201 |         insert_values(value_update, indices_insert, values_volume)
202 |         insert_values(weight_update, indices_insert, weights_volume)
203 | 
204 |         # insert features
205 |         insert_values(feature_update, feature_indices_insert, features_volume)
206 | 
207 |         if self.n_empty_space_voting > 0:
208 |             # insert empty tsdf and weights
209 |             insert_values(value_update_empty, indices_empty_insert, values_volume)
210 |             insert_values(weight_update_empty, indices_empty_insert, weights_volume)
211 | 
212 |         return (
213 |             values_volume,
214 |             features_volume,
215 |             weights_volume,
216 |             indices_insert,
217 |         )
218 | 
219 | 
220 | def get_index_mask(indices, shape):
221 |     """Method to check whether indices are valid.
222 | 
223 |     Args:
224 |         indices: indices to check
225 |         shape: constraints for indices
226 | 
227 |     Returns:
228 |         mask
229 |     """
230 | 
231 |     xs, ys, zs = shape
232 | 
233 |     valid = (
234 |         (indices[:, 0] >= 0)
235 |         & (indices[:, 0] < xs)
236 |         & (indices[:, 1] >= 0)
237 |         & (indices[:, 1] < ys)
238 |         & (indices[:, 2] >= 0)
239 |         & (indices[:, 2] < zs)
240 |     )
241 | 
242 |     return valid
243 | 
244 | 
245 | def extract_indices(indices, mask):
246 |     """Method to extract indices according to mask."""
247 | 
248 |     x = torch.masked_select(indices[:, 0], mask)
249 |     y = torch.masked_select(indices[:, 1], mask)
250 |     z = torch.masked_select(indices[:, 2], mask)
251 | 
252 |     masked_indices = torch.cat(
253 |         (x.unsqueeze_(1), y.unsqueeze_(1), z.unsqueeze_(1)), dim=1
254 |     )
255 |     return masked_indices
256 | 
257 | 
258 | def insert_values(values, indices, volume):
259 |     """Method to insert values back into volume."""
260 | 
261 |     if volume.dim() == 3:
262 |         volume = volume.half()
263 |         volume[indices[:, 0], indices[:, 1], indices[:, 2]] = values.half()
264 |     else:
265 |         volume = volume.half()
266 |         volume[indices[:, 0], indices[:, 1], indices[:, 2], :] = values.half()
267 | 


--------------------------------------------------------------------------------
/modules/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from torch import nn
  4 | 
  5 | 
  6 | class FusionNet(nn.Module):
  7 |     def __init__(self, config, sensor):
  8 | 
  9 |         super(FusionNet, self).__init__()
 10 | 
 11 |         self.scale = config.output_scale
 12 |         self.conf = config.confidence
 13 | 
 14 |         try:
 15 |             self.n_channels = (
 16 |                 2 * eval("config.n_points_" + sensor) + 1 + int(config.confidence)
 17 |             )
 18 |             self.n_points = eval("config.n_points_" + sensor)
 19 |         except AttributeError:
 20 |             self.n_channels = 2 * config.n_points + 1 + int(config.confidence)
 21 |             self.n_points = config.n_points
 22 | 
 23 |         self.block1 = nn.Sequential(
 24 |             nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1),
 25 |             nn.BatchNorm2d(self.n_channels),
 26 |             nn.LeakyReLU(),
 27 |             nn.Dropout2d(p=0.2),
 28 |             nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1),
 29 |             nn.BatchNorm2d(self.n_channels),
 30 |             nn.LeakyReLU(),
 31 |             nn.Dropout2d(p=0.2),
 32 |         )
 33 | 
 34 |         self.block2 = nn.Sequential(
 35 |             nn.Conv2d(2 * self.n_channels, self.n_channels, (3, 3), padding=1),
 36 |             nn.BatchNorm2d(self.n_channels),
 37 |             nn.LeakyReLU(),
 38 |             nn.Dropout2d(p=0.2),
 39 |             nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1),
 40 |             nn.BatchNorm2d(self.n_channels),
 41 |             nn.LeakyReLU(),
 42 |             nn.Dropout2d(p=0.2),
 43 |         )
 44 | 
 45 |         self.block3 = nn.Sequential(
 46 |             nn.Conv2d(3 * self.n_channels, self.n_channels, (3, 3), padding=1),
 47 |             nn.BatchNorm2d(self.n_channels),
 48 |             nn.LeakyReLU(),
 49 |             nn.Dropout2d(p=0.2),
 50 |             nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1),
 51 |             nn.BatchNorm2d(self.n_channels),
 52 |             nn.LeakyReLU(),
 53 |             nn.Dropout2d(p=0.2),
 54 |         )
 55 | 
 56 |         self.block4 = nn.Sequential(
 57 |             nn.Conv2d(4 * self.n_channels, self.n_channels, (3, 3), padding=1),
 58 |             nn.BatchNorm2d(self.n_channels),
 59 |             nn.LeakyReLU(),
 60 |             nn.Dropout2d(p=0.2),
 61 |             nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1),
 62 |             nn.BatchNorm2d(self.n_channels),
 63 |             nn.LeakyReLU(),
 64 |             nn.Dropout2d(p=0.2),
 65 |         )
 66 | 
 67 |         self.pred1 = nn.Sequential(
 68 |             nn.Conv2d(5 * self.n_channels, 4 * self.n_channels, (1, 1), padding=0),
 69 |             nn.BatchNorm2d(4 * self.n_channels),
 70 |             nn.LeakyReLU(),
 71 |             nn.Dropout2d(p=0.2),
 72 |             nn.Conv2d(4 * self.n_channels, 4 * self.n_channels, (1, 1), padding=0),
 73 |             nn.BatchNorm2d(4 * self.n_channels),
 74 |             nn.LeakyReLU(),
 75 |             nn.Dropout2d(p=0.2),
 76 |         )
 77 | 
 78 |         self.pred2 = nn.Sequential(
 79 |             nn.Conv2d(4 * self.n_channels, 3 * self.n_channels, (1, 1), padding=0),
 80 |             nn.BatchNorm2d(3 * self.n_channels),
 81 |             nn.LeakyReLU(),
 82 |             nn.Dropout2d(p=0.2),
 83 |             nn.Conv2d(3 * self.n_channels, 3 * self.n_channels, (1, 1), padding=0),
 84 |             nn.BatchNorm2d(3 * self.n_channels),
 85 |             nn.LeakyReLU(),
 86 |             nn.Dropout2d(p=0.2),
 87 |         )
 88 | 
 89 |         self.pred3 = nn.Sequential(
 90 |             nn.Conv2d(3 * self.n_channels, 2 * self.n_channels, (1, 1), padding=0),
 91 |             nn.BatchNorm2d(2 * self.n_channels),
 92 |             nn.LeakyReLU(),
 93 |             nn.Dropout2d(p=0.2),
 94 |             nn.Conv2d(2 * self.n_channels, 2 * self.n_channels, (1, 1), padding=0),
 95 |             nn.BatchNorm2d(2 * self.n_channels),
 96 |             nn.LeakyReLU(),
 97 |             nn.Dropout2d(p=0.2),
 98 |         )
 99 | 
100 |         self.pred4 = nn.Sequential(
101 |             nn.Conv2d(2 * self.n_channels, 1 * self.n_channels, (1, 1), padding=0),
102 |             nn.BatchNorm2d(self.n_channels),
103 |             nn.LeakyReLU(),
104 |             nn.Dropout2d(p=0.2),
105 |             nn.Conv2d(1 * self.n_channels, 1 * self.n_channels, (1, 1), padding=0),
106 |             nn.LeakyReLU(),
107 |             nn.Conv2d(1 * self.n_channels, self.n_points, (1, 1), padding=0),
108 |         )
109 | 
110 |         self.tanh = nn.Tanh()
111 |         self.relu = nn.ReLU()
112 | 
113 |     def forward(self, x):
114 |         x1 = self.block1.forward(x)
115 | 
116 |         x1 = torch.cat([x, x1], dim=1)
117 |         x2 = self.block2.forward(x1)
118 |         x2 = torch.cat([x1, x2], dim=1)
119 |         x3 = self.block3.forward(x2)
120 |         x3 = torch.cat([x2, x3], dim=1)
121 |         x4 = self.block4.forward(x3)
122 |         x4 = torch.cat([x3, x4], dim=1)
123 | 
124 |         y = self.pred1.forward(x4)
125 |         y = self.pred2.forward(y)
126 |         y = self.pred3.forward(y)
127 |         y = self.pred4.forward(y)
128 | 
129 |         tsdf = self.scale * self.tanh.forward(y)
130 | 
131 |         return tsdf
132 | 


--------------------------------------------------------------------------------
/modules/model_features.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from torch.nn.functional import normalize
  5 | 
  6 | 
  7 | class EncoderBlock(nn.Module):
  8 |     """Encoder block for the fusion network in NeuralFusion"""
  9 | 
 10 |     def __init__(self, c_in, c_out, activation, resolution, layernorm):
 11 | 
 12 |         super(EncoderBlock, self).__init__()
 13 | 
 14 |         if layernorm:
 15 |             self.block = nn.Sequential(
 16 |                 nn.Conv2d(c_in, c_out, (3, 3), padding=1),
 17 |                 nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True),
 18 |                 activation,
 19 |                 nn.Conv2d(c_out, c_out, (3, 3), padding=1),
 20 |                 nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True),
 21 |                 activation,
 22 |             )
 23 |         else:
 24 |             self.block = nn.Sequential(
 25 |                 nn.Conv2d(c_in, c_out, (3, 3), padding=1),
 26 |                 activation,
 27 |                 nn.Conv2d(c_out, c_out, (3, 3), padding=1),
 28 |                 activation,
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         return self.block(x)
 33 | 
 34 | 
 35 | class DecoderBlock(nn.Module):
 36 |     """Decoder block for the fusion network in NeuralFusion"""
 37 | 
 38 |     def __init__(self, c_in, c_out, activation, resolution, layernorm):
 39 | 
 40 |         super(DecoderBlock, self).__init__()
 41 | 
 42 |         if layernorm:
 43 |             self.block = nn.Sequential(
 44 |                 nn.Conv2d(c_in, c_out, (3, 3), padding=1),
 45 |                 nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True),
 46 |                 activation,
 47 |                 nn.Conv2d(c_out, c_out, (3, 3), padding=1),
 48 |                 nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True),
 49 |                 activation,
 50 |             )
 51 |         else:
 52 |             self.block = nn.Sequential(
 53 |                 nn.Conv2d(c_in, c_out, (3, 3), padding=1),
 54 |                 activation,
 55 |                 nn.Conv2d(c_out, c_out, (3, 3), padding=1),
 56 |                 activation,
 57 |             )
 58 | 
 59 |     def forward(self, x):
 60 |         return self.block(x)
 61 | 
 62 | 
 63 | class FeatureNet(nn.Module):
 64 |     """Network used in NeuralFusion"""
 65 | 
 66 |     def __init__(self, config, sensor):
 67 | 
 68 |         super(FeatureNet, self).__init__()
 69 | 
 70 |         try:
 71 |             self.n_points = eval("config.n_points_" + sensor)
 72 |         except AttributeError:
 73 |             self.n_points = config.n_points
 74 | 
 75 |         self.n_features = config.n_features - config.append_depth
 76 | 
 77 |         self.normalize = config.normalize
 78 |         self.w_rgb = config.w_rgb
 79 |         self.w_stereo_warp_right = config.stereo_warp_right
 80 |         self.w_intensity_gradient = config.w_intensity_gradient
 81 |         self.confidence = config.confidence
 82 | 
 83 |         # layer settings
 84 |         n_channels_input = self.n_features
 85 |         n_channels_output = self.n_features
 86 |         self.n_layers = config.n_layers
 87 |         self.height = config.resy
 88 |         self.width = config.resx
 89 |         resolution = (self.height, self.width)
 90 |         enc_activation = eval(config.enc_activation)
 91 |         dec_activation = eval(config.dec_activation)
 92 |         self.tsdf_out = self.n_points
 93 |         layernorm = config.layernorm
 94 |         self.append_depth = config.append_depth
 95 | 
 96 |         # define network submodules (encoder/decoder)
 97 |         self.encoder = nn.ModuleList()
 98 |         self.decoder = nn.ModuleList()
 99 | 
100 |         if sensor == "tof":
101 |             n_channels_first = (
102 |                 config.depth
103 |                 + 3 * int(self.w_rgb) * config.w_rgb_tof
104 |                 + 2 * int(self.w_intensity_gradient)
105 |                 + int(self.confidence)
106 |             )
107 |         elif sensor == "stereo":
108 |             n_channels_first = (
109 |                 config.depth
110 |                 + 3 * int(self.w_rgb)
111 |                 + 2 * int(self.w_intensity_gradient)
112 |                 + 3 * int(self.w_stereo_warp_right)
113 |                 + int(self.confidence)
114 |             )
115 |         else:
116 |             n_channels_first = (
117 |                 config.depth
118 |                 + 3 * int(self.w_rgb)
119 |                 + 2 * int(self.w_intensity_gradient)
120 |                 + int(self.confidence)
121 |             )
122 | 
123 |         # add first encoder block
124 |         self.encoder.append(
125 |             EncoderBlock(
126 |                 n_channels_first,
127 |                 n_channels_input,
128 |                 enc_activation,
129 |                 resolution,
130 |                 layernorm,
131 |             )
132 |         )
133 |         # add first decoder block
134 |         if sensor == "stereo":
135 |             self.decoder.append(
136 |                 DecoderBlock(
137 |                     (self.n_layers) * n_channels_input
138 |                     + config.depth
139 |                     + 3 * int(self.w_rgb)
140 |                     + 2 * int(self.w_intensity_gradient)
141 |                     + 3 * int(self.w_stereo_warp_right)
142 |                     + int(self.confidence),
143 |                     self.n_layers * n_channels_output,
144 |                     dec_activation,
145 |                     resolution,
146 |                     layernorm,
147 |                 )
148 |             )
149 |         elif sensor == "tof":
150 |             self.decoder.append(
151 |                 DecoderBlock(
152 |                     (self.n_layers) * n_channels_input
153 |                     + config.depth
154 |                     + 3 * int(self.w_rgb) * config.w_rgb_tof
155 |                     + 2 * int(self.w_intensity_gradient)
156 |                     + int(self.confidence),
157 |                     self.n_layers * n_channels_output,
158 |                     dec_activation,
159 |                     resolution,
160 |                     layernorm,
161 |                 )
162 |             )
163 |         else:
164 |             self.decoder.append(
165 |                 DecoderBlock(
166 |                     (self.n_layers) * n_channels_input
167 |                     + config.depth
168 |                     + 3 * int(self.w_rgb)
169 |                     + 2 * int(self.w_intensity_gradient)
170 |                     + int(self.confidence),
171 |                     self.n_layers * n_channels_output,
172 |                     dec_activation,
173 |                     resolution,
174 |                     layernorm,
175 |                 )
176 |             )
177 | 
178 |         # adding model layers
179 |         for l in range(1, self.n_layers):
180 |             self.encoder.append(
181 |                 EncoderBlock(
182 |                     n_channels_first + l * n_channels_input,
183 |                     n_channels_input,
184 |                     enc_activation,
185 |                     resolution,
186 |                     layernorm,
187 |                 )
188 |             )
189 | 
190 |             self.decoder.append(
191 |                 DecoderBlock(
192 |                     ((self.n_layers + 1) - l) * n_channels_output,
193 |                     ((self.n_layers + 1) - (l + 1)) * n_channels_output,
194 |                     dec_activation,
195 |                     resolution,
196 |                     layernorm,
197 |                 )
198 |             )
199 | 
200 |         self.tanh = nn.Tanh()
201 | 
202 |     def forward(self, x):
203 |         if self.append_depth:
204 |             if self.w_rgb:
205 |                 d = x[:, 0, :, :].unsqueeze(1)
206 |             else:
207 |                 d = x
208 | 
209 |         # encoding
210 | 
211 |         for enc in self.encoder:
212 |             xmid = enc(x)
213 |             if xmid.isnan().sum() > 0 or xmid.isinf().sum() > 0:
214 |                 print("xmid nan: ", xmid.isnan().sum())
215 |                 print("xmid inf: ", xmid.isinf().sum())
216 |             x = torch.cat([x, xmid], dim=1)
217 | 
218 |         # decoding
219 |         for dec in self.decoder:
220 |             x = dec(x)
221 | 
222 |         if self.normalize:
223 |             x = normalize(x, p=2, dim=1)
224 | 
225 |         if self.append_depth:
226 |             x = torch.cat([x, d], dim=1)
227 | 
228 |         output = dict()
229 | 
230 |         output["feature"] = x
231 | 
232 |         return output
233 | 
234 | 
235 | class FeatureResNet(nn.Module):
236 |     """Residual Network"""
237 | 
238 |     def __init__(self, config, sensor):
239 | 
240 |         super(FeatureResNet, self).__init__()
241 | 
242 |         try:
243 |             self.n_points = eval("config.n_points_" + sensor)
244 |         except AttributeError:
245 |             self.n_points = config.n_points
246 | 
247 |         self.n_features = config.n_features - config.append_depth
248 | 
249 |         self.normalize = config.normalize
250 |         self.w_rgb = config.w_rgb
251 |         self.w_stereo_warp_right = config.stereo_warp_right
252 |         self.w_intensity_gradient = config.w_intensity_gradient
253 |         self.confidence = config.confidence
254 | 
255 |         # layer settings
256 |         n_channels_input = self.n_features
257 |         self.n_layers = config.n_layers
258 |         self.height = config.resy
259 |         self.width = config.resx
260 |         resolution = (self.height, self.width)
261 |         enc_activation = eval(config.enc_activation)
262 |         self.tsdf_out = self.n_points
263 |         layernorm = config.layernorm
264 |         self.append_depth = config.append_depth
265 | 
266 |         # define network submodules (encoder/decoder)
267 |         self.encoder = nn.ModuleList()
268 | 
269 |         if sensor == "tof":
270 |             n_channels_first = (
271 |                 config.depth
272 |                 + 3 * int(self.w_rgb) * config.w_rgb_tof
273 |                 + 2 * int(self.w_intensity_gradient)
274 |                 + int(self.confidence)
275 |             )
276 |         elif (
277 |             sensor == "stereo"
278 |         ):  # I did not feed rgb to sgm_stereo. This line should have been sensor.endswith("stereo"):
279 |             n_channels_first = (
280 |                 config.depth
281 |                 + 3 * int(self.w_rgb)
282 |                 + 2 * int(self.w_intensity_gradient)
283 |                 + 3 * int(self.w_stereo_warp_right)
284 |                 + int(self.confidence)
285 |             )
286 |         else:
287 |             n_channels_first = (
288 |                 config.depth
289 |                 + 3 * int(self.w_rgb)
290 |                 + 2 * int(self.w_intensity_gradient)
291 |                 + int(self.confidence)
292 |             )
293 | 
294 |         # add first encoder block
295 |         self.encoder.append(
296 |             EncoderBlock(
297 |                 n_channels_first,
298 |                 n_channels_input,
299 |                 enc_activation,
300 |                 resolution,
301 |                 layernorm,
302 |             )
303 |         )
304 | 
305 |         # adding model layers
306 |         for l in range(1, self.n_layers):
307 |             self.encoder.append(
308 |                 EncoderBlock(
309 |                     n_channels_input,
310 |                     n_channels_input,
311 |                     enc_activation,
312 |                     resolution,
313 |                     layernorm,
314 |                 )
315 |             )
316 | 
317 |         self.tanh = nn.Tanh()
318 | 
319 |     def forward(self, x):
320 |         if self.append_depth:
321 |             if self.w_rgb:
322 |                 d = x[:, 0, :, :].unsqueeze(1)
323 |             else:
324 |                 d = x
325 | 
326 |         # encoding
327 | 
328 |         for k, enc in enumerate(self.encoder):
329 |             xmid = enc(x)
330 |             if xmid.isnan().sum() > 0 or xmid.isinf().sum() > 0:
331 |                 print("xmid nan: ", xmid.isnan().sum())
332 |                 print("xmid inf: ", xmid.isinf().sum())
333 | 
334 |             if k > 0:
335 |                 x = x + xmid
336 |             else:
337 |                 x = xmid
338 | 
339 |         if self.normalize:
340 |             x = normalize(x, p=2, dim=1)
341 | 
342 |         if self.append_depth:
343 |             x = torch.cat([x, d], dim=1)
344 | 
345 |         output = dict()
346 | 
347 |         output["feature"] = x
348 | 
349 |         return output
350 | 


--------------------------------------------------------------------------------
/modules/pipeline.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from tqdm import tqdm
  3 | import math
  4 | 
  5 | from modules.fuse_pipeline import Fuse_Pipeline
  6 | from modules.filter_pipeline import Filter_Pipeline
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | class Pipeline(torch.nn.Module):
 12 |     def __init__(self, config):
 13 | 
 14 |         super(Pipeline, self).__init__()
 15 | 
 16 |         self.config = config
 17 | 
 18 |         # setup pipeline
 19 |         self.fuse_pipeline = Fuse_Pipeline(config)
 20 |         if config.FILTERING_MODEL.do:
 21 |             if config.FILTERING_MODEL.model == "3dconv":
 22 |                 self.filter_pipeline = Filter_Pipeline(config)
 23 |             else:
 24 |                 self.filter_pipeline = (
 25 |                     None  # used when we run the tsdf fusion or routedfusion
 26 |                 )
 27 |         else:
 28 |             self.filter_pipeline = None
 29 | 
 30 |     def forward(self, batch, database, epoch, device):  # train step
 31 |         scene_id = batch["frame_id"][0].split("/")[0]
 32 | 
 33 |         frame = batch["frame_id"][0].split("/")[-1]
 34 | 
 35 |         fused_output = self.fuse_pipeline.fuse_training(batch, database, device)
 36 | 
 37 |         if self.config.FILTERING_MODEL.do:
 38 |             if self.filter_pipeline is not None:
 39 |                 filtered_output = self.filter_pipeline.filter_training(
 40 |                     fused_output,
 41 |                     database,
 42 |                     epoch,
 43 |                     frame,
 44 |                     scene_id,
 45 |                     batch["sensor"],
 46 |                     device,
 47 |                 )
 48 |             else:
 49 |                 filtered_output = None
 50 | 
 51 |             if filtered_output == "save_and_exit":
 52 |                 return "save_and_exit"
 53 | 
 54 |             if filtered_output is not None:
 55 |                 fused_output["filtered_output"] = filtered_output
 56 |             else:
 57 |                 if not self.config.FILTERING_MODEL.model == "routedfusion":
 58 |                     return None
 59 | 
 60 |         return fused_output
 61 | 
 62 |     def test(self, loader, dataset, database, sensors, device):
 63 |         for k, batch in tqdm(enumerate(loader), total=len(dataset)):
 64 |             if self.config.DATA.collaborative_reconstruction:
 65 |                 if (
 66 |                     math.ceil(
 67 |                         int(batch["frame_id"][0].split("/")[-1])
 68 |                         / self.config.DATA.frames_per_chunk
 69 |                     )
 70 |                     % 2
 71 |                     == 0
 72 |                 ):
 73 |                     sensor_ = sensors[0]
 74 |                 else:
 75 |                     sensor_ = sensors[1]
 76 | 
 77 |                 batch["depth"] = batch[sensor_ + "_depth"]
 78 |                 batch["routing_net"] = "self._routing_network_" + sensor_
 79 |                 batch["mask"] = batch[sensor_ + "_mask"]
 80 |                 if self.config.FILTERING_MODEL.model == "routedfusion":
 81 |                     batch["sensor"] = self.config.DATA.input[0]
 82 |                 else:
 83 |                     batch["sensor"] = sensor_
 84 | 
 85 |                 batch["routingNet"] = sensor_  # used to be able to train routedfusion
 86 |                 batch["fusionNet"] = sensor_  # used to be able to train routedfusion
 87 |                 self.fuse_pipeline.fuse(batch, database, device)
 88 |             else:
 89 |                 for sensor_ in sensors:
 90 |                     if (
 91 |                         sensor_ + "_depth"
 92 |                     ) in batch:  # None on the Replica dataset when simulating sensors of different frame rates
 93 |                         batch["depth"] = batch[sensor_ + "_depth"]
 94 |                         batch["routing_net"] = "self._routing_network_" + sensor_
 95 |                         batch["mask"] = batch[sensor_ + "_mask"]
 96 |                         if self.config.FILTERING_MODEL.model == "routedfusion":
 97 |                             batch["sensor"] = self.config.DATA.input[0]
 98 |                         else:
 99 |                             batch["sensor"] = sensor_
100 | 
101 |                         batch[
102 |                             "routingNet"
103 |                         ] = sensor_  # used to be able to train routedfusion
104 |                         batch[
105 |                             "fusionNet"
106 |                         ] = sensor_  # used to be able to train routedfusion
107 |                         self.fuse_pipeline.fuse(batch, database, device)
108 | 
109 |         if self.filter_pipeline is not None:
110 |             # run filtering network on all voxels which have a non-zero weight
111 |             for scene in database.filtered.keys():
112 |                 self.filter_pipeline.filter(scene, database, device)
113 | 
114 |     def test_tsdf(self, val_loader, val_dataset, val_database, sensors, device):
115 | 
116 |         for k, batch in tqdm(enumerate(val_loader), total=len(val_dataset)):
117 | 
118 |             if (
119 |                 self.config.ROUTING.do
120 |                 and self.config.FILTERING_MODEL.model == "tsdf_early_fusion"
121 |             ):
122 |                 batch["routing_net"] = "self._routing_network"
123 |                 batch["sensor"] = self.config.DATA.input[0]
124 |                 batch[
125 |                     "fusionNet"
126 |                 ] = None  # We don't use a fusion net during early fusion
127 |                 self.fuse_pipeline.fuse(batch, val_database, device)
128 |             else:
129 |                 for sensor_ in sensors:
130 |                     batch["depth"] = batch[sensor_ + "_depth"]
131 |                     batch["routing_net"] = "self._routing_network_" + sensor_
132 |                     batch["mask"] = batch[sensor_ + "_mask"]
133 |                     batch["sensor"] = sensor_
134 |                     batch[
135 |                         "routingNet"
136 |                     ] = sensor_  # used to be able to train routedfusion
137 |                     batch[
138 |                         "fusionNet"
139 |                     ] = sensor_  # used to be able to train routedfusion
140 |                     self.fuse_pipeline.fuse(batch, val_database, device)
141 | 
142 |         if self.config.FILTERING_MODEL.do:
143 |             # perform the fusion of the grids
144 |             if self.config.FILTERING_MODEL.model == "tsdf_early_fusion":
145 |                 for scene in val_database.filtered.keys():
146 |                     val_database.filtered[scene].volume = val_database.tsdf[
147 |                         self.config.DATA.input[0]
148 |                     ][scene].volume
149 | 
150 |             elif (
151 |                 self.config.FILTERING_MODEL.model == "tsdf_middle_fusion"
152 |             ):  # this is weighted average fusion
153 |                 for scene in val_database.filtered.keys():
154 |                     weight_sum = np.zeros_like(val_database.filtered[scene].volume)
155 |                     for sensor_ in sensors:
156 |                         weight_sum += val_database.fusion_weights[sensor_][scene]
157 |                         val_database.filtered[scene].volume += (
158 |                             val_database.tsdf[sensor_][scene].volume
159 |                             * val_database.fusion_weights[sensor_][scene]
160 |                         )
161 |                     val_database.filtered[scene].volume = np.divide(
162 |                         val_database.filtered[scene].volume,
163 |                         weight_sum,
164 |                         out=np.zeros_like(weight_sum),
165 |                         where=weight_sum != 0.0,
166 |                     )
167 | 
168 |                     val_database.sensor_weighting[scene] = np.divide(
169 |                         val_database.fusion_weights[sensors[0]][scene],
170 |                         weight_sum,
171 |                         out=np.zeros_like(weight_sum),
172 |                         where=weight_sum != 0.0,
173 |                     )
174 | 


--------------------------------------------------------------------------------
/modules/routing.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class ConfidenceRouting(torch.nn.Module):
  5 |     """
  6 |     Confidence Routing Network
  7 |     """
  8 | 
  9 |     def __init__(self, Cin, F, batchnorms=True):
 10 | 
 11 |         super().__init__()
 12 |         self.F = F
 13 | 
 14 |         Cout = 1
 15 | 
 16 |         if batchnorms:
 17 |             self.pre = torch.nn.Sequential(
 18 |                 torch.nn.ReflectionPad2d(1),
 19 |                 torch.nn.Conv2d(Cin, F, kernel_size=3, stride=1, padding=0),
 20 |                 torch.nn.BatchNorm2d(F),
 21 |                 torch.nn.ReLU(),
 22 |                 torch.nn.ReflectionPad2d(1),
 23 |                 torch.nn.Conv2d(F, F, kernel_size=3, stride=1, padding=0),
 24 |                 torch.nn.BatchNorm2d(F),
 25 |                 torch.nn.ReLU(),
 26 |             )
 27 | 
 28 |             self.post = torch.nn.Sequential(
 29 |                 torch.nn.ReflectionPad2d(1),
 30 |                 torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0),
 31 |                 torch.nn.BatchNorm2d(F),
 32 |                 torch.nn.ReLU(),
 33 |                 torch.nn.ReflectionPad2d(1),
 34 |                 torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0),
 35 |                 torch.nn.BatchNorm2d(Cout),
 36 |                 torch.nn.ReLU(),
 37 |             )
 38 | 
 39 |             self.process = torch.nn.Sequential(
 40 |                 torch.nn.ReflectionPad2d(1),
 41 |                 torch.nn.Conv2d(F, 2 * F, kernel_size=3, stride=1, padding=0),
 42 |                 torch.nn.BatchNorm2d(2 * F),
 43 |                 torch.nn.ReLU(),
 44 |                 torch.nn.ReflectionPad2d(1),
 45 |                 torch.nn.Conv2d(2 * F, 2 * F, kernel_size=3, stride=1, padding=0),
 46 |                 torch.nn.BatchNorm2d(2 * F),
 47 |                 torch.nn.ReLU(),
 48 |             )
 49 |         else:
 50 |             self.pre = torch.nn.Sequential(
 51 |                 torch.nn.ReflectionPad2d(1),
 52 |                 torch.nn.Conv2d(Cin, F, kernel_size=3, stride=1, padding=0),
 53 |                 torch.nn.ReLU(),
 54 |                 torch.nn.ReflectionPad2d(1),
 55 |                 torch.nn.Conv2d(F, F, kernel_size=3, stride=1, padding=0),
 56 |                 torch.nn.ReLU(),
 57 |             )
 58 | 
 59 |             self.post = torch.nn.Sequential(
 60 |                 torch.nn.ReflectionPad2d(1),
 61 |                 torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0),
 62 |                 torch.nn.ReLU(),
 63 |                 torch.nn.ReflectionPad2d(1),
 64 |                 torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0),
 65 |                 torch.nn.ReLU(),
 66 |             )
 67 | 
 68 |             self.process = torch.nn.Sequential(
 69 |                 torch.nn.ReflectionPad2d(1),
 70 |                 torch.nn.Conv2d(F, 2 * F, kernel_size=3, stride=1, padding=0),
 71 |                 torch.nn.ReLU(),
 72 |                 torch.nn.ReflectionPad2d(1),
 73 |                 torch.nn.Conv2d(2 * F, 2 * F, kernel_size=3, stride=1, padding=0),
 74 |                 torch.nn.ReLU(),
 75 |             )
 76 | 
 77 |         self.uncertainty = torch.nn.Sequential(
 78 |             torch.nn.ReflectionPad2d(1),
 79 |             torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0),
 80 |             torch.nn.ReLU(),
 81 |             torch.nn.ReflectionPad2d(1),
 82 |             torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0),
 83 |             torch.nn.ReLU(),
 84 |         )
 85 | 
 86 |         self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 87 | 
 88 |     def forward(self, data):
 89 |         features = self.pre(data)
 90 |         lower_scale = self.maxpool(features)
 91 |         lower_features = self.process(lower_scale)
 92 |         upsampled = torch.nn.functional.interpolate(
 93 |             lower_features, scale_factor=2, mode="bilinear", align_corners=False
 94 |         )
 95 |         H = data.shape[2]
 96 |         W = data.shape[3]
 97 |         upsampled = upsampled[:, :, :H, :W]
 98 |         output = self.post(torch.cat((features, upsampled), dim=1))
 99 | 
100 |         uncertainty = self.uncertainty(torch.cat((features, upsampled), dim=1))
101 | 
102 |         return torch.cat((output, uncertainty), dim=1)
103 | 


--------------------------------------------------------------------------------
/modules/voxelgrid.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | 
  4 | 
  5 | class FeatureGrid(object):
  6 |     def __init__(self, voxel_size, n_features, bbox=None):
  7 | 
  8 |         self._resolution = voxel_size
  9 |         self._bbox = bbox
 10 |         self._n_features = n_features
 11 |         self._volume = None
 12 | 
 13 |         if bbox is not None:
 14 |             self._origin = bbox[:, 0]
 15 | 
 16 |             volume_shape = np.diff(self._bbox, axis=1).ravel() / self.resolution
 17 |             # float16 conversion critical - otherwise, numerical
 18 |             # instabilies will cause wrong voxel grid size
 19 |             volume_shape = volume_shape.astype(np.float16)
 20 |             self._shape = (
 21 |                 np.ceil([volume_shape[0], volume_shape[1], volume_shape[2], n_features])
 22 |                 .astype(np.int32)
 23 |                 .tolist()
 24 |             )  # round up
 25 | 
 26 |             self._volume = np.zeros(self._shape, dtype=np.float16)
 27 | 
 28 |     @property
 29 |     def resolution(self):
 30 |         return self._resolution
 31 | 
 32 |     @property
 33 |     def bbox(self):
 34 |         assert self._bbox is not None
 35 |         return self._bbox
 36 | 
 37 |     @property
 38 |     def volume(self):
 39 |         assert self._volume is not None
 40 |         return self._volume
 41 | 
 42 |     @volume.setter
 43 |     def volume(self, volume):
 44 |         self._volume = volume
 45 | 
 46 |     @property
 47 |     def origin(self):
 48 |         assert self._origin is not None
 49 |         return self._origin
 50 | 
 51 |     @property
 52 |     def shape(self):
 53 |         assert self._volume is not None
 54 |         return self._volume.shape
 55 | 
 56 |     def __getattr__(self, x, y, z):
 57 |         return self._volume[x, y, z, :]
 58 | 
 59 | 
 60 | class VoxelGrid(object):
 61 |     def __init__(self, voxel_size, volume=None, bbox=None, initial_value=0.0):
 62 | 
 63 |         self._resolution = voxel_size
 64 | 
 65 |         self._volume = volume
 66 |         self._bbox = bbox
 67 | 
 68 |         if bbox is not None:
 69 |             self._origin = bbox[:, 0]
 70 | 
 71 |         if volume is None and bbox is not None:
 72 |             volume_shape = np.diff(self._bbox, axis=1).ravel() / self.resolution
 73 |             # float16 conversion critical - otherwise, numerical
 74 |             # instabilies will cause wrong voxel grid size
 75 |             volume_shape = volume_shape.astype(np.float16)
 76 | 
 77 |             volume_shape = np.ceil(volume_shape).astype(np.int32).tolist()  # round up
 78 |             # float 16 conversion is critical
 79 |             self._volume = initial_value * np.ones(volume_shape).astype("float16")
 80 | 
 81 |     def from_array(self, array, bbox):
 82 | 
 83 |         self._volume = array
 84 |         self._bbox = bbox
 85 |         self._origin = bbox[:, 0]
 86 | 
 87 |     @property
 88 |     def resolution(self):
 89 |         return self._resolution
 90 | 
 91 |     @property
 92 |     def bbox(self):
 93 |         assert self._bbox is not None
 94 |         return self._bbox
 95 | 
 96 |     @property
 97 |     def volume(self):
 98 |         assert self._volume is not None
 99 |         return self._volume
100 | 
101 |     @volume.setter
102 |     def volume(self, volume):
103 |         self._volume = volume
104 | 
105 |     @property
106 |     def origin(self):
107 |         assert self._origin is not None
108 |         return self._origin
109 | 
110 |     @property
111 |     def shape(self):
112 |         assert self._volume is not None
113 |         return self._volume.shape
114 | 
115 |     def __getattr__(self, x, y, z):
116 |         return self._volume[x, y, z]
117 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.0.0
  2 | argon2-cffi==21.3.0
  3 | argon2-cffi-bindings==21.2.0
  4 | attrs==21.4.0
  5 | backcall==0.2.0
  6 | bleach==4.1.0
  7 | cachetools==4.2.4
  8 | certifi==2021.10.8
  9 | cffi==1.15.0
 10 | charset-normalizer==2.0.9
 11 | cycler==0.11.0
 12 | debugpy==1.5.1
 13 | decorator==5.1.0
 14 | defusedxml==0.7.1
 15 | easydict==1.9
 16 | entrypoints==0.3
 17 | evaluate-3d-reconstruction @ file:///cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/deps/evaluate_3d_reconstruction
 18 | fonttools==4.28.5
 19 | google-auth==1.35.0
 20 | google-auth-oauthlib==0.4.6
 21 | grpcio==1.43.0
 22 | h5py==2.10.0
 23 | idna==3.3
 24 | imageio==2.13.5
 25 | importlib-metadata==4.10.0
 26 | importlib-resources==5.4.0
 27 | ipykernel==6.6.0
 28 | ipython==7.30.1
 29 | ipython-genutils==0.2.0
 30 | ipywidgets==7.6.5
 31 | jedi==0.18.1
 32 | Jinja2==3.0.3
 33 | joblib==1.1.0
 34 | jsonschema==4.3.2
 35 | jupyter-client==7.1.0
 36 | jupyter-core==4.9.1
 37 | jupyterlab-pygments==0.1.2
 38 | jupyterlab-widgets==1.0.2
 39 | kiwisolver==1.3.2
 40 | Markdown==3.3.6
 41 | MarkupSafe==2.0.1
 42 | matplotlib==3.5.1
 43 | matplotlib-inline==0.1.3
 44 | mistune==0.8.4
 45 | nbclient==0.5.9
 46 | nbconvert==6.3.0
 47 | nbformat==5.1.3
 48 | nest-asyncio==1.5.4
 49 | networkx==2.6.3
 50 | notebook==6.4.6
 51 | numpy==1.21.5
 52 | oauthlib==3.1.1
 53 | open3d @ file:///cluster/work/cvl/esandstroem/programs/Open3D/build/lib/python_package/pip_package/open3d-0.9.0.0-cp38-cp38-linux_x86_64.whl
 54 | openTSNE==0.6.0
 55 | packaging==21.3
 56 | pandocfilters==1.5.0
 57 | parso==0.8.3
 58 | pexpect==4.8.0
 59 | pickleshare==0.7.5
 60 | Pillow==8.4.0
 61 | prometheus-client==0.12.0
 62 | prompt-toolkit==3.0.24
 63 | protobuf==3.19.1
 64 | ptyprocess==0.7.0
 65 | pyasn1==0.4.8
 66 | pyasn1-modules==0.2.8
 67 | pycparser==2.21
 68 | Pygments==2.10.0
 69 | pyparsing==3.0.6
 70 | pyquaternion==0.9.9
 71 | pyrsistent==0.18.0
 72 | python-dateutil==2.8.2
 73 | PyWavelets==1.2.0
 74 | PyYAML==5.3
 75 | pyzmq==22.3.0
 76 | requests==2.26.0
 77 | requests-oauthlib==1.3.0
 78 | rsa==4.8
 79 | scikit-image==0.17.2
 80 | scikit-learn==1.0.2
 81 | scipy==1.7.3
 82 | Send2Trash==1.8.0
 83 | six==1.16.0
 84 | tensorboard==2.2.1
 85 | tensorboard-plugin-wit==1.8.0
 86 | terminado==0.12.1
 87 | testpath==0.5.0
 88 | threadpoolctl==3.0.0
 89 | tifffile==2021.11.2
 90 | torch==1.7.1
 91 | tornado==6.1
 92 | tqdm==4.43.0
 93 | traitlets==5.1.1
 94 | trimesh==3.7.6
 95 | typing-extensions==4.0.1
 96 | urllib3==1.26.7
 97 | wandb==0.12.9
 98 | wcwidth==0.2.5
 99 | webencodings==0.5.1
100 | Werkzeug==2.0.2
101 | widgetsnbextension==3.5.2
102 | zipp==3.6.0
103 | 


--------------------------------------------------------------------------------
/test_routing.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | 
  4 | from skimage import io
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | from utils.loading import load_config
  9 | from utils.setup import *
 10 | from modules.routing import ConfidenceRouting
 11 | 
 12 | 
 13 | def arg_parser():
 14 | 
 15 |     parser = argparse.ArgumentParser()
 16 | 
 17 |     parser.add_argument("--config", required=False)
 18 | 
 19 |     args = parser.parse_args()
 20 | 
 21 |     return vars(args)
 22 | 
 23 | 
 24 | def prepare_input_data(batch, config, device):
 25 | 
 26 |     for k, sensor_ in enumerate(config.DATA.input):
 27 |         if k == 0:
 28 |             inputs = batch[sensor_ + "_depth"].unsqueeze_(1)
 29 |         else:
 30 |             inputs = torch.cat((batch[sensor_ + "_depth"].unsqueeze_(1), inputs), 1)
 31 |     inputs = inputs.to(device)
 32 | 
 33 |     if config.ROUTING.intensity_grad:
 34 |         intensity = batch["intensity"].unsqueeze_(1)
 35 |         grad = batch["gradient"].unsqueeze_(1)
 36 |         inputs = torch.cat((intensity, grad, inputs), 1)
 37 |     inputs = inputs.to(device)
 38 | 
 39 |     target = batch[config.DATA.target]  # (batch size, height, width)
 40 |     target = target.to(device)
 41 |     target = target.unsqueeze_(1)  # (batch size, channels, height, width)
 42 |     return inputs, target
 43 | 
 44 | 
 45 | def test(config):
 46 | 
 47 |     if config.SETTINGS.gpu:
 48 |         device = torch.device("cuda:0")
 49 |     else:
 50 |         device = torch.device("cpu")
 51 | 
 52 |     # get test dataset
 53 |     test_data_config = get_data_config(config, mode="test")
 54 |     test_dataset = get_data(config.DATA.dataset, test_data_config)
 55 |     test_loader = torch.utils.data.DataLoader(
 56 |         test_dataset, config.TESTING.test_batch_size, config.TESTING.test_shuffle
 57 |     )
 58 | 
 59 |     # define model
 60 |     Cin = len(config.DATA.input)
 61 | 
 62 |     if config.ROUTING.intensity_grad:
 63 |         Cin += 2
 64 | 
 65 |     model = ConfidenceRouting(
 66 |         Cin=Cin, F=config.MODEL.contraction, batchnorms=config.MODEL.normalization
 67 |     )
 68 |     # load model
 69 |     checkpoint = torch.load(config.TESTING.model_path)
 70 | 
 71 |     model.load_state_dict(checkpoint["pipeline_state_dict"])
 72 | 
 73 |     model = model.to(device)
 74 | 
 75 |     n_test_batches = int(len(test_dataset) / config.TESTING.test_batch_size)
 76 | 
 77 |     for i, batch in enumerate(tqdm(test_loader, total=n_test_batches)):
 78 |         inputs, target = prepare_input_data(batch, config, device)
 79 | 
 80 |         output = model.forward(inputs)
 81 | 
 82 |         est = output[:, 0, :, :].unsqueeze_(1)
 83 |         unc = output[:, 1, :, :].unsqueeze_(1)
 84 | 
 85 |         est = est.detach().cpu().numpy()
 86 |         est = est.squeeze()
 87 |         estplot = est
 88 |         est = est * 1000
 89 |         est = est.astype("uint16")
 90 | 
 91 |         unc = unc.detach().cpu().numpy()
 92 |         unc = (
 93 |             unc.squeeze()
 94 |         )  # there is a relu activation function as the last step of the confidence decoder s.t. we always get non-negative numbers
 95 |         confidence = np.exp(-1.0 * unc)
 96 |         confidence *= 10000
 97 |         confidence = confidence.astype("uint16")
 98 | 
 99 |         output_dir_refined = (
100 |             config.DATA.root_dir
101 |             + "/"
102 |             + batch["frame_id"][0].split("/")[0]
103 |             + "/"
104 |             + batch["frame_id"][0].split("/")[1]
105 |             + "/left_routing_refined_"
106 |             + config.TESTING.model_path.split("/")[-3]
107 |         )
108 |         output_dir_confidence = (
109 |             config.DATA.root_dir
110 |             + "/"
111 |             + batch["frame_id"][0].split("/")[0]
112 |             + "/"
113 |             + batch["frame_id"][0].split("/")[1]
114 |             + "/left_routing_confidence_"
115 |             + config.TESTING.model_path.split("/")[-3]
116 |         )
117 | 
118 |         if not os.path.exists(output_dir_refined):
119 |             os.makedirs(output_dir_refined)
120 | 
121 |         if not os.path.exists(output_dir_confidence):
122 |             os.makedirs(output_dir_confidence)
123 | 
124 |         io.imsave(
125 |             output_dir_refined + "/" + batch["frame_id"][0].split("/")[-1] + ".png", est
126 |         )
127 |         io.imsave(
128 |             output_dir_confidence + "/" + batch["frame_id"][0].split("/")[-1] + ".png",
129 |             confidence,
130 |         )
131 | 
132 | 
133 | if __name__ == "__main__":
134 | 
135 |     # get arguments
136 |     args = arg_parser()
137 | 
138 |     # get configs
139 |     # load config
140 |     if args["config"]:
141 |         config = load_config(args["config"])
142 |     else:
143 |         raise ValueError("Missing configuration: Please specify config.")
144 | 
145 |     # train
146 |     test(config)
147 | 


--------------------------------------------------------------------------------
/train_routing.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import argparse
  3 | import datetime
  4 | import random
  5 | 
  6 | import numpy as np
  7 | 
  8 | from tqdm import tqdm
  9 | 
 10 | from utils.loading import load_config_from_yaml
 11 | from utils.setup import *
 12 | 
 13 | from utils.loss import RoutingLoss
 14 | from modules.routing import ConfidenceRouting
 15 | import wandb
 16 | 
 17 | 
 18 | def arg_parser():
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 | 
 22 |     parser.add_argument("--config", required=True)
 23 | 
 24 |     args = parser.parse_args()
 25 | 
 26 |     return vars(args)
 27 | 
 28 | 
 29 | def prepare_input_data(batch, config, device):
 30 | 
 31 |     for k, sensor_ in enumerate(config.DATA.input):
 32 |         if k == 0:
 33 |             inputs = batch[sensor_ + "_depth"].unsqueeze_(1)
 34 |         else:
 35 |             inputs = torch.cat((batch[sensor_ + "_depth"].unsqueeze_(1), inputs), 1)
 36 |     inputs = inputs.to(device)
 37 | 
 38 |     if config.ROUTING.intensity_grad:
 39 |         intensity = batch["intensity"].unsqueeze_(1)
 40 |         grad = batch["gradient"].unsqueeze_(1)
 41 |         inputs = torch.cat((intensity, grad, inputs), 1)
 42 |     inputs = inputs.to(device)
 43 | 
 44 |     target = batch[config.DATA.target]  # (batch size, height, width)
 45 |     target = target.to(device)
 46 |     target = target.unsqueeze_(1)  # (batch size, channels, height, width)
 47 |     return inputs, target
 48 | 
 49 | 
 50 | def train(args, config):
 51 |     # set seed for reproducibility
 52 |     if config.SETTINGS.seed:
 53 |         random.seed(config.SETTINGS.seed)
 54 |         np.random.seed(config.SETTINGS.seed)
 55 |         torch.manual_seed(config.SETTINGS.seed)
 56 |         torch.cuda.manual_seed_all(config.SETTINGS.seed)
 57 |         torch.backends.cudnn.deterministic = True
 58 |         torch.cuda.manual_seed_all(config.SETTINGS.seed)
 59 |         torch.backends.cudnn.benchmark = False
 60 | 
 61 |     if config.SETTINGS.gpu:
 62 |         device = torch.device("cuda:0")
 63 |     else:
 64 |         device = torch.device("cpu")
 65 | 
 66 |     config.TIMESTAMP = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
 67 |     print("model time stamp: ", config.TIMESTAMP)
 68 | 
 69 |     # initialize weights and biases logging
 70 |     wandb.init(
 71 |         config=config,
 72 |         entity="esandstroem",
 73 |         project="senfunet-routing",
 74 |         name=config.TIMESTAMP,
 75 |         notes="put comment here",
 76 |     )
 77 |     # change run name of wandb
 78 |     wandb.run.name = config.TIMESTAMP
 79 |     wandb.run.save()
 80 | 
 81 |     workspace = get_workspace(config)
 82 |     workspace.save_config(config)
 83 | 
 84 |     # get train dataset
 85 |     train_data_config = get_data_config(config, mode="train")
 86 |     train_dataset = get_data(config.DATA.dataset, train_data_config)
 87 |     train_loader = torch.utils.data.DataLoader(
 88 |         train_dataset, config.TRAINING.train_batch_size, config.TRAINING.train_shuffle
 89 |     )
 90 | 
 91 |     # get val dataset
 92 |     val_data_config = get_data_config(config, mode="val")
 93 |     val_dataset = get_data(config.DATA.dataset, val_data_config)
 94 | 
 95 |     val_loader = torch.utils.data.DataLoader(
 96 |         val_dataset, config.TRAINING.val_batch_size, config.TRAINING.val_shuffle
 97 |     )
 98 | 
 99 |     # define model
100 |     Cin = len(config.DATA.input)
101 | 
102 |     if config.ROUTING.intensity_grad:
103 |         Cin += 2
104 | 
105 |     model = ConfidenceRouting(
106 |         Cin=Cin, F=config.MODEL.contraction, batchnorms=config.MODEL.normalization
107 |     )
108 |     model = model.to(device)
109 | 
110 |     # define loss function
111 |     criterion = RoutingLoss(config)
112 |     criterion = criterion.to(device)
113 | 
114 |     # add weight and gradient tracking in wandb
115 |     wandb.watch(model, criterion, log="all", log_freq=1000)
116 | 
117 |     # define optimizer
118 |     optimizer = torch.optim.RMSprop(
119 |         model.parameters(),
120 |         config.OPTIMIZATION.lr,
121 |         config.OPTIMIZATION.rho,
122 |         config.OPTIMIZATION.eps,
123 |         momentum=config.OPTIMIZATION.momentum,
124 |         weight_decay=config.OPTIMIZATION.weight_decay,
125 |     )
126 | 
127 |     n_train_batches = int(len(train_dataset) / config.TRAINING.train_batch_size)
128 |     n_val_batches = int(len(val_dataset) / config.TRAINING.val_batch_size)
129 | 
130 |     val_loss_best = np.infty
131 | 
132 |     # sample validation visualization frames
133 |     val_vis_ids = np.random.choice(np.arange(0, n_val_batches), 5, replace=False)
134 | 
135 |     # # define metrics
136 |     l1_criterion = torch.nn.L1Loss()
137 |     l2_criterion = torch.nn.MSELoss()
138 | 
139 |     for epoch in range(0, config.TRAINING.n_epochs):
140 |         print("epoch: ", epoch)
141 | 
142 |         val_loss_t = 0.0
143 |         val_loss_l1 = 0.0
144 |         val_loss_l2 = 0.0
145 | 
146 |         train_loss_t = 0.0
147 |         train_loss_l1 = 0.0
148 |         train_loss_l2 = 0.0
149 | 
150 |         train_epoch_loss_t = 0.0
151 |         train_epoch_loss_l1 = 0.0
152 |         train_epoch_loss_l2 = 0.0
153 | 
154 |         # make ready for training and clear optimizer
155 |         model.train()
156 |         optimizer.zero_grad()
157 | 
158 |         for i, batch in enumerate(tqdm(train_loader, total=n_train_batches)):
159 |             inputs, target = prepare_input_data(batch, config, device)
160 | 
161 |             output = model(inputs)
162 | 
163 |             est = output[:, 0, :, :].unsqueeze_(1)
164 |             unc = output[:, 1, :, :].unsqueeze_(1)
165 | 
166 |             if not config.LOSS.completion:
167 |                 if len(config.DATA.input) == 1:
168 |                     mask = (
169 |                         batch[config.DATA.input[0] + "_mask"].to(device).unsqueeze_(1)
170 |                     )
171 |                 else:
172 |                     mask = batch["mask"].to(device).unsqueeze_(1)
173 |                 target = torch.where(mask == 0.0, torch.zeros_like(target), target)
174 | 
175 |             # compute training loss
176 |             loss = criterion.forward(est, unc, target)
177 |             loss.backward()
178 | 
179 |             # compute metrics for analysis
180 |             loss_l1 = l1_criterion.forward(est, target)
181 |             loss_l2 = l2_criterion.forward(est, target)
182 | 
183 |             train_loss_t += loss.item()
184 |             train_loss_l1 += loss_l1.item()
185 |             train_loss_l2 += loss_l2.item()
186 | 
187 |             train_epoch_loss_t += loss.item()
188 |             train_epoch_loss_l1 += loss_l1.item()
189 |             train_epoch_loss_l2 += loss_l2.item()
190 | 
191 |             if i % config.OPTIMIZATION.accumulation_steps == 0:
192 |                 optimizer.step()
193 |                 optimizer.zero_grad()
194 | 
195 |             if i % config.SETTINGS.log_freq == 0 and i > 0:
196 |                 # compute avg. loss per frame
197 |                 train_loss_t /= (
198 |                     config.SETTINGS.log_freq * config.TRAINING.train_batch_size
199 |                 )
200 |                 train_loss_l1 /= (
201 |                     config.SETTINGS.log_freq * config.TRAINING.train_batch_size
202 |                 )
203 |                 train_loss_l2 /= (
204 |                     config.SETTINGS.log_freq * config.TRAINING.train_batch_size
205 |                 )
206 | 
207 |                 wandb.log(
208 |                     {
209 |                         "Train/total loss": train_loss_t,
210 |                         "Train/l1 loss": train_loss_l1,
211 |                         "Train/l2 loss": train_loss_l2,
212 |                         "Train/nbr_frames": (epoch * n_train_batches + i)
213 |                         * config.TRAINING.train_batch_size,
214 |                     }
215 |                 )
216 |                 train_loss_t = 0
217 |                 train_loss_l1 = 0
218 |                 train_loss_l2 = 0
219 | 
220 |         train_epoch_loss_t /= n_train_batches * config.TRAINING.train_batch_size
221 |         train_epoch_loss_l1 /= n_train_batches * config.TRAINING.train_batch_size
222 |         train_epoch_loss_l2 /= n_train_batches * config.TRAINING.train_batch_size
223 | 
224 |         # log training metrics
225 |         workspace.log("Epoch {} Loss {}".format(epoch, train_epoch_loss_t))
226 |         workspace.log("Epoch {} L1 Loss {}".format(epoch, train_epoch_loss_l1))
227 |         workspace.log("Epoch {} L2 Loss {}".format(epoch, train_epoch_loss_l2))
228 | 
229 |         model.eval()
230 | 
231 |         for i, batch in enumerate(tqdm(val_loader, total=n_val_batches)):
232 |             inputs, target = prepare_input_data(batch, config, device)
233 | 
234 |             output = model(inputs)
235 | 
236 |             est = output[:, 0, :, :].unsqueeze_(1)
237 |             unc = output[:, 1, :, :].unsqueeze_(1)
238 |             # visualize frames
239 |             if i in val_vis_ids:
240 |                 # parse frames and normalize to range 0-1
241 |                 frame_est = est[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1)
242 |                 frame_est /= np.amax(frame_est)
243 |                 frame_gt = (
244 |                     target[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1)
245 |                 )
246 |                 frame_gt /= np.amax(frame_gt)
247 |                 frame_unc = unc[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1)
248 |                 frame_conf = np.exp(-1.0 * frame_unc)
249 |                 frame_unc /= np.amax(frame_unc)
250 |                 frame_l1 = np.abs(frame_est - frame_gt).reshape(512, 512, 1)
251 |                 frame_l1 /= np.amax(frame_l1)
252 | 
253 |                 wandb.log(
254 |                     {
255 |                         "Val/images": [
256 |                             wandb.Image(
257 |                                 frame_est,
258 |                                 caption="depth estimate {}".format(i),
259 |                             ),
260 |                             wandb.Image(frame_gt, caption="gt depth {}".format(i)),
261 |                             wandb.Image(
262 |                                 frame_unc,
263 |                                 caption="uncertainty estimate {}".format(i),
264 |                             ),
265 |                             wandb.Image(
266 |                                 frame_conf,
267 |                                 caption="confidence estimate {}".format(i),
268 |                             ),
269 |                             wandb.Image(
270 |                                 frame_l1,
271 |                                 caption="l1 depth error {}".format(i),
272 |                             ),
273 |                         ]
274 |                     }
275 |                 )
276 | 
277 |             if not config.LOSS.completion:
278 |                 if len(config.DATA.input) == 1:
279 |                     mask = (
280 |                         batch[config.DATA.input[0] + "_mask"].to(device).unsqueeze_(1)
281 |                     )
282 |                 else:
283 |                     mask = batch["mask"].to(device).unsqueeze_(1)
284 |                 target = torch.where(mask == 0.0, torch.zeros_like(target), target)
285 | 
286 |             loss_t = criterion.forward(est, unc, target)
287 |             loss_l1 = l1_criterion.forward(est, target)
288 |             loss_l2 = l2_criterion.forward(est, target)
289 | 
290 |             val_loss_t += loss_t.item()
291 |             val_loss_l1 += loss_l1.item()
292 |             val_loss_l2 += loss_l2.item()
293 | 
294 |         val_loss_t /= n_val_batches * config.TRAINING.train_batch_size
295 |         val_loss_l1 /= n_val_batches * config.TRAINING.train_batch_size
296 |         val_loss_l2 /= n_val_batches * config.TRAINING.train_batch_size
297 | 
298 |         # log validation metrics
299 |         workspace.log(
300 |             "Epoch {} Validation Loss {}".format(epoch, val_loss_t), mode="val"
301 |         )
302 |         workspace.log(
303 |             "Epoch {} Validation L1 Loss {}".format(epoch, val_loss_l1), mode="val"
304 |         )
305 |         workspace.log(
306 |             "Epoch {} Validation L2 Loss {}".format(epoch, val_loss_l2), mode="val"
307 |         )
308 | 
309 |         wandb.log(
310 |             {
311 |                 "Val/total loss": val_loss_t,
312 |                 "Val/l1 loss": val_loss_l1,
313 |                 "Val/l2 loss": val_loss_l2,
314 |                 "Val/epoch": epoch,
315 |             }
316 |         )
317 | 
318 |         # define model state for storing
319 |         model_state = {
320 |             "epoch": epoch,
321 |             "pipeline_state_dict": model.state_dict(),
322 |             "optimizer_state_dict": optimizer.state_dict(),
323 |         }
324 | 
325 |         if val_loss_t <= val_loss_best:
326 |             val_loss_best = val_loss_t
327 |             workspace.log(
328 |                 "Found new best model with loss {} at epoch {}".format(
329 |                     val_loss_best, epoch
330 |                 ),
331 |                 mode="val",
332 |             )
333 |             workspace.save_model_state(model_state, is_best=True)
334 |         else:
335 |             workspace.save_model_state(model_state, is_best=False)
336 | 
337 | 
338 | if __name__ == "__main__":
339 | 
340 |     # get arguments
341 |     args = arg_parser()
342 | 
343 |     # get configs
344 |     config = load_config_from_yaml(args["config"])
345 | 
346 |     # train
347 |     train(args, config)
348 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/utils/__init__.py


--------------------------------------------------------------------------------
/utils/loading.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import json
 3 | import os
 4 | import torch
 5 | 
 6 | from easydict import EasyDict
 7 | 
 8 | 
 9 | def load_config_from_yaml(path):
10 |     """
11 |     Method to load the config file for
12 |     neural network training
13 |     :param path: yaml-filepath with configs stored
14 |     :return: easydict containing config
15 |     """
16 |     c = yaml.safe_load(open(path))
17 |     config = EasyDict(c)
18 | 
19 |     return config
20 | 
21 | 
22 | def load_config_from_json(path):
23 |     """
24 |     Method to load the config file
25 |     from json files.
26 |     :param path: path to json file
27 |     :return: easydict containing config
28 |     """
29 |     with open(path, "r") as file:
30 |         data = json.load(file)
31 |     config = EasyDict(data)
32 |     return config
33 | 
34 | 
35 | def load_config(path):
36 |     """
37 |     Wrapper method around different methods
38 |     loading config file based on file ending.
39 |     """
40 | 
41 |     if path[-4:] == "yaml":
42 |         return load_config_from_yaml(path)
43 |     elif path[-4:] == "json":
44 |         return load_config_from_json(path)
45 |     else:
46 |         raise ValueError("Unsupported file format for config")
47 | 
48 | 
49 | def load_pipeline(
50 |     file, model
51 | ):  # loads all paramters that can be loaded in the checkpoint!
52 | 
53 |     checkpoint = file
54 | 
55 |     if not os.path.exists(checkpoint):
56 |         raise FileNotFoundError("File doesn't exist {}".format(checkpoint))
57 |     try:
58 |         if torch.cuda.is_available():
59 |             checkpoint = torch.load(checkpoint)
60 |         else:
61 |             checkpoint = torch.load(checkpoint, map_location=torch.device("cpu"))
62 | 
63 |         model.load_state_dict(checkpoint["pipeline_state_dict"])
64 |         print("loading full model")
65 |     except:
66 |         print("loading model partly")
67 | 
68 |         print(
69 |             "nbr of entries in checkpoint model: ",
70 |             len(checkpoint["pipeline_state_dict"].keys()),
71 |         )
72 |         pretrained_dict = {
73 |             k: v
74 |             for k, v in checkpoint["pipeline_state_dict"].items()
75 |             if k in model.state_dict()
76 |         }
77 |         print("nbr of entries found in created model: ", len(model.state_dict().keys()))
78 |         print(
79 |             "nbr of entries found in created model and checkpoint model: ",
80 |             len(pretrained_dict.keys()),
81 |         )
82 |         print("Keys in created model but not in checkpoint:")
83 |         for key in model.state_dict().keys():
84 |             if key not in checkpoint["pipeline_state_dict"].keys():
85 |                 print(key)
86 |         print("...")
87 |         print("Keys in checkpoint but not in created model")
88 |         for key in checkpoint["pipeline_state_dict"].keys():
89 |             if key not in model.state_dict().keys():
90 |                 print(key)
91 | 
92 |         model.load_state_dict(pretrained_dict, False)
93 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def evaluation(est, target, mask=None):
 6 | 
 7 |     mse = mse_fn(est, target, mask)
 8 |     mad = mad_fn(est, target, mask)
 9 |     iou = iou_fn(est, target, mask)
10 |     acc = acc_fn(est, target, mask)
11 | 
12 |     return {"mse": mse, "mad": mad, "iou": iou, "acc": acc}
13 | 
14 | 
15 | def mse_fn(est, target, mask=None):
16 | 
17 |     if mask is not None:
18 |         grid = mask * np.power(est - target, 2)
19 |         grid = grid.astype(
20 |             np.float32
21 |         )  # required to not get inf values since we use float16 here as input grids
22 |         metric = np.sum(grid) / np.sum(mask)
23 |     else:
24 |         metric = np.mean(np.power(est - target, 2))
25 | 
26 |     return metric
27 | 
28 | 
29 | def mad_fn(est, target, mask=None):
30 | 
31 |     if mask is not None:
32 |         grid = mask * np.abs(est - target)
33 |         grid = grid.astype(
34 |             np.float32
35 |         )  # required to not get inf values since we use float16 here as input grids
36 |         metric = np.sum(grid) / np.sum(mask)
37 |     else:
38 |         metric = np.mean(np.abs(est - target))
39 | 
40 |     return metric
41 | 
42 | 
43 | def iou_fn(est, target, mask=None):
44 | 
45 |     est = est.astype(
46 |         np.float32
47 |     )  # required to not get inf values since we use float16 here as input grids
48 |     target = target.astype(np.float32)
49 |     if mask is not None:
50 |         tp = (est < 0) & (target < 0) & (mask > 0)
51 |         fp = (est < 0) & (target >= 0) & (mask > 0)
52 |         fn = (est >= 0) & (target < 0) & (mask > 0)
53 |     else:
54 |         tp = (est < 0) & (target < 0)
55 |         fp = (est < 0) & (target >= 0)
56 |         fn = (est >= 0) & (target < 0)
57 | 
58 |     intersection = tp.sum()
59 |     union = tp.sum() + fp.sum() + fn.sum()
60 | 
61 |     del tp, fp, fn
62 |     metric = intersection / union
63 |     return metric
64 | 
65 | 
66 | def acc_fn(est, target, mask=None):
67 | 
68 |     est = est.astype(
69 |         np.float32
70 |     )  # required to not get inf values since we use float16 here as input grids
71 |     target = target.astype(np.float32)
72 |     if mask is not None:
73 |         tp = (est < 0) & (target < 0) & (mask > 0)
74 |         tn = (est >= 0) & (target >= 0) & (mask > 0)
75 |     else:
76 |         tp = (est < 0) & (target < 0)
77 |         tn = (est >= 0) & (target >= 0)
78 | 
79 |     acc = (tp.sum() + tn.sum()) / mask.sum()
80 | 
81 |     del tp, tn
82 |     metric = acc
83 |     return metric
84 | 


--------------------------------------------------------------------------------
/utils/saving.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import shutil
 4 | import torch
 5 | 
 6 | 
 7 | def save_config_to_json(path, config):
 8 |     """Saves config to json file"""
 9 |     with open(os.path.join(path, "config.json"), "w") as file:
10 |         json.dump(config, file)
11 | 
12 | 
13 | def save_checkpoint(state, is_best, checkpoint, is_best_filt=None):
14 |     """Saves model and training parameters
15 |     at checkpoint + 'last.pth.tar'.
16 |     If is_best==True, also saves
17 |     checkpoint + 'best.pth.tar'
18 |     Args:
19 |        state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict
20 |        is_best: (dict) Dict of bools for each sensor. True at one sensor if it is the best model seen untill now
21 |        checkpoint: (string) folder where parameters are to be saved
22 |        is_best_filt: (bool) True if it is the best filtered model seen until now
23 |     """
24 |     if not os.path.exists(checkpoint):
25 |         print(
26 |             "Checkpoint Directory does not exist! Making directory {}".format(
27 |                 checkpoint
28 |             )
29 |         )
30 |         os.mkdir(checkpoint)
31 | 
32 |     filepath = os.path.join(checkpoint, "last.pth.tar")
33 |     torch.save(state, filepath)
34 |     if is_best_filt:
35 |         shutil.copyfile(filepath, os.path.join(checkpoint, "best.pth.tar"))
36 | 
37 |     if isinstance(is_best, dict):
38 |         for sensor in is_best.keys():
39 |             if is_best[sensor]:
40 |                 shutil.copyfile(
41 |                     filepath, os.path.join(checkpoint, "best_" + sensor + ".pth.tar")
42 |                 )
43 |     else:
44 |         if is_best:
45 |             shutil.copyfile(
46 |                 filepath, os.path.join(checkpoint, "best.pth.tar")
47 |             )  # train routing network with multiple sensor inputs
48 | 


--------------------------------------------------------------------------------
/utils/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | from dataset import Replica
  5 | from dataset import CoRBS
  6 | from dataset import Scene3D
  7 | 
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | import matplotlib
 11 | 
 12 | matplotlib.use("Agg")
 13 | 
 14 | import trimesh
 15 | import skimage.measure
 16 | 
 17 | from modules.database import Database
 18 | 
 19 | from utils import transform
 20 | 
 21 | from copy import copy
 22 | 
 23 | from utils.saving import *
 24 | 
 25 | 
 26 | def get_data_config(config, mode):
 27 |     data_config = copy(config.DATA)
 28 |     try:
 29 |         data_config.filtering_model = config.FILTERING_MODEL.model
 30 |     except AttributeError:
 31 |         data_config.filtering_model = len(
 32 |             config.DATA.input
 33 |         )  # used when training routing network
 34 | 
 35 |     if mode == "train":
 36 |         data_config.mode = "train"
 37 |         data_config.scene_list = data_config.train_scene_list
 38 |     elif mode == "val":
 39 |         data_config.mode = "val"
 40 |         data_config.scene_list = data_config.val_scene_list
 41 |     elif mode == "test":
 42 |         data_config.mode = "test"
 43 |         data_config.scene_list = data_config.test_scene_list
 44 | 
 45 |     data_config.transform = transform.ToTensor()
 46 | 
 47 |     return data_config
 48 | 
 49 | 
 50 | def get_data(dataset, config):
 51 |     try:
 52 |         return eval(dataset)(config.DATA)
 53 |     except AttributeError:
 54 |         return eval(dataset)(config)
 55 | 
 56 | 
 57 | def get_database(dataset, config, mode="train"):
 58 | 
 59 |     # TODO: make this better
 60 |     database_config = copy(config.DATA)
 61 |     database_config.transform = transform.ToTensor()
 62 |     database_config.n_features = config.FEATURE_MODEL.n_features
 63 | 
 64 |     database_config.test_mode = mode == "val" or mode == "test"
 65 |     database_config.alpha_supervision = config.LOSS.alpha_supervision
 66 |     database_config.outlier_channel = (
 67 |         config.FILTERING_MODEL.CONV3D_MODEL.outlier_channel
 68 |     )
 69 |     database_config.scene_list = eval("config.DATA.{}_scene_list".format(mode))
 70 | 
 71 |     return Database(dataset, database_config)
 72 | 
 73 | 
 74 | def get_workspace(config):
 75 |     workspace_path = os.path.join(config.SETTINGS.experiment_path, config.TIMESTAMP)
 76 |     workspace = Workspace(workspace_path)
 77 |     workspace.save_config(config)
 78 |     return workspace
 79 | 
 80 | 
 81 | def get_logger(path, name="training"):
 82 | 
 83 |     filehandler = logging.FileHandler(os.path.join(path, "{}.logs".format(name)), "a")
 84 |     consolehandler = logging.StreamHandler()
 85 | 
 86 |     formatter = logging.Formatter(
 87 |         "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 88 |     )
 89 | 
 90 |     filehandler.setFormatter(formatter)
 91 |     consolehandler.setFormatter(formatter)
 92 | 
 93 |     logger = logging.getLogger(name)
 94 | 
 95 |     for hdlr in logger.handlers[:]:  # remove all old handlers
 96 |         logger.removeHandler(hdlr)
 97 | 
 98 |     logger.addHandler(filehandler)  # set the new handler
 99 |     logger.addHandler(consolehandler)
100 | 
101 |     logger.setLevel(logging.DEBUG)
102 | 
103 |     return logger
104 | 
105 | 
106 | class Workspace(object):
107 |     def __init__(self, path):
108 | 
109 |         self.workspace_path = path
110 |         self.model_path = os.path.join(path, "model")
111 |         self.log_path = os.path.join(path, "logs")
112 |         self.output_path = os.path.join(path, "output")
113 | 
114 |         os.makedirs(self.workspace_path)
115 |         os.makedirs(self.model_path)
116 |         os.makedirs(self.log_path)
117 |         os.makedirs(self.output_path)
118 | 
119 |         self._init_logger()
120 | 
121 |     def _init_logger(self):
122 |         self.train_logger = get_logger(self.log_path, "training")
123 |         self.val_logger = get_logger(self.log_path, "validation")
124 | 
125 |     def save_config(self, config):
126 |         print("Saving config to ", self.workspace_path)
127 |         save_config_to_json(self.workspace_path, config)
128 | 
129 |     def save_model_state(self, state, is_best, is_best_filt=None):
130 |         save_checkpoint(state, is_best, self.model_path, is_best_filt)
131 | 
132 |     def save_alpha_histogram(self, database, sensors, epoch):
133 | 
134 |         for scene in database.scenes_gt.keys():
135 |             mask = np.zeros_like(database.sensor_weighting[scene], dtype=bool)
136 |             for sensor_ in sensors:
137 |                 mask = np.logical_or(
138 |                     mask, (database.fusion_weights[sensor_][scene] > 0)
139 |                 )
140 | 
141 |             hist = database.sensor_weighting[scene][mask].flatten().astype(np.float32)
142 |             plt.hist(hist, bins=100)
143 |             plt.savefig(
144 |                 self.output_path
145 |                 + "/sensor_weighting_grid_histogram_"
146 |                 + scene
147 |                 + "_epoch_"
148 |                 + str(epoch)
149 |                 + ".png"
150 |             )
151 |             plt.clf()
152 | 
153 |     def log(self, message, mode="train"):
154 |         if mode == "train":
155 |             self.train_logger.info(message)
156 |         elif mode == "val":
157 |             self.val_logger.info(message)
158 | 


--------------------------------------------------------------------------------
/utils/transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class ToTensor(object):
 6 | 
 7 |     """Convert ndarrays in sample to Tensors."""
 8 | 
 9 |     def __call__(self, sample):
10 | 
11 |         result = {}
12 | 
13 |         for key in sample.keys():
14 |             if type(sample[key]) is np.ndarray:
15 | 
16 |                 if key == "image":
17 |                     # swap color axis because
18 |                     # numpy image: H x W x C
19 |                     # torch image: C X H X W
20 |                     image = sample[key].transpose((2, 0, 1))
21 |                     image = torch.from_numpy(image)
22 |                     result[key] = image
23 |                     continue
24 | 
25 |                 result[key] = torch.from_numpy(sample[key])
26 | 
27 |             else:
28 |                 result[key] = sample[key]
29 | 
30 |         return result
31 | 


--------------------------------------------------------------------------------
/utils/visualize_sensor_weighting.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import open3d as o3d
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib
  5 | import trimesh
  6 | import skimage.measure
  7 | 
  8 | matplotlib.use("Agg")
  9 | 
 10 | 
 11 | def visualize_sensor_weighting(
 12 |     tsdf,
 13 |     sensor_weighting,
 14 |     test_dir,
 15 |     mask,
 16 |     truncation,
 17 |     length,
 18 |     max_resolution,
 19 |     resolution,
 20 |     voxel_size,
 21 |     outlier_channel,
 22 |     mc,
 23 | ):
 24 |     cmap = plt.get_cmap("inferno")
 25 | 
 26 |     if outlier_channel:
 27 |         sensor_weighting = sensor_weighting[0, :, :, :]
 28 | 
 29 |     hist = sensor_weighting[mask].flatten()
 30 |     plt.clf()  # clear plot (important)
 31 |     cm = plt.get_cmap("inferno")
 32 |     n, bins, patches = plt.hist(hist, bins=100)
 33 |     for c, p in zip(bins, patches):
 34 |         plt.setp(p, "facecolor", cm(c))
 35 |     plt.savefig(test_dir + "/sensor_weighting_grid_histogram_no_outlier_filter.png")
 36 |     plt.clf()
 37 | 
 38 |     if mc == "Open3D":
 39 |         # Create the mesh using the given mask
 40 |         tsdf_cube = np.zeros((max_resolution, max_resolution, max_resolution))
 41 |         tsdf_cube[: resolution[0], : resolution[1], : resolution[2]] = tsdf
 42 | 
 43 |         indices_x = mask.nonzero()[0]
 44 |         indices_y = mask.nonzero()[1]
 45 |         indices_z = mask.nonzero()[2]
 46 | 
 47 |         volume = o3d.integration.UniformTSDFVolume(
 48 |             length=length,
 49 |             resolution=max_resolution,
 50 |             sdf_trunc=truncation,
 51 |             color_type=o3d.integration.TSDFVolumeColorType.RGB8,
 52 |         )
 53 | 
 54 |         for i in range(indices_x.shape[0]):
 55 |             volume.set_tsdf_at(
 56 |                 tsdf_cube[indices_x[i], indices_y[i], indices_z[i]],
 57 |                 indices_x[i],
 58 |                 indices_y[i],
 59 |                 indices_z[i],
 60 |             )
 61 |             volume.set_weight_at(1, indices_x[i], indices_y[i], indices_z[i])
 62 | 
 63 |         print("Extract a triangle mesh from the volume and visualize it.")
 64 |         mesh = volume.extract_triangle_mesh()
 65 | 
 66 |         del volume
 67 |         mesh.compute_vertex_normals()
 68 | 
 69 |         # read vertices from mesh
 70 |         vertices = mesh.vertices
 71 | 
 72 |         # we need to subtract half a voxel size from the vertices to get to the voxel points
 73 |         # since the marching cubes algorithm of open3d thinks that the tsdf voxel vertices are
 74 |         # always located at the mid point between the metric space resolution i.e. if we have a tsdf
 75 |         # grid of shape 2,2,2, a voxel size of 1 and -0.5 at the first voxel and 0.5 at the next, the marching cubes algorithm will generate a surface at 1.5 and not at 1.0.
 76 |         voxel_points = np.round(
 77 |             np.asarray(vertices - voxel_size / 2) * 1 / voxel_size
 78 |         ).astype(int)
 79 |     elif mc == "skimage":
 80 |         # Skimage marching cubes
 81 |         # ---------------------------------------------
 82 |         (verts, faces, normals, values,) = skimage.measure.marching_cubes_lewiner(
 83 |             tsdf,
 84 |             level=0,
 85 |             spacing=(voxel_size, voxel_size, voxel_size),
 86 |             mask=preprocess_weight_grid(mask),
 87 |         )
 88 | 
 89 |         voxel_points = np.round(np.asarray(verts) * 1 / voxel_size).astype(int)
 90 | 
 91 |         # add 0.5 * voxel_size to vertices to match Open3D marching cubes output
 92 |         mesh = o3d.geometry.TriangleMesh(
 93 |             vertices=o3d.utility.Vector3dVector(verts + voxel_size / 2),
 94 |             triangles=o3d.utility.Vector3iVector(faces),
 95 |         )
 96 |         mesh.compute_vertex_normals()
 97 | 
 98 |     # remove voxels if they are outside of the voxelgrid - these are treated as uninitialized.
 99 |     valid_points = (
100 |         (voxel_points[:, 0] >= 0)
101 |         * (voxel_points[:, 0] < sensor_weighting.shape[0])
102 |         * (voxel_points[:, 1] >= 0)
103 |         * (voxel_points[:, 1] < sensor_weighting.shape[1])
104 |         * (voxel_points[:, 2] >= 0)
105 |         * (voxel_points[:, 2] < sensor_weighting.shape[2])
106 |     )
107 |     filtered_voxel_points = voxel_points[valid_points, :]
108 | 
109 |     vals = -np.ones(voxel_points.shape[0])
110 |     vals[valid_points] = sensor_weighting[
111 |         filtered_voxel_points[:, 0],
112 |         filtered_voxel_points[:, 1],
113 |         filtered_voxel_points[:, 2],
114 |     ]
115 |     colors = cmap((vals * 255).astype(int))[:, :-1]
116 | 
117 |     if (vals == -1).sum() > 0:
118 |         print("Invalid index or indices found among voxel points!")
119 | 
120 |     colors[vals == -1] = [0, 1, 0]  # make all uninitialized voxels green
121 |     mesh.vertex_colors = o3d.utility.Vector3dVector(colors)
122 |     o3d.io.write_triangle_mesh(
123 |         test_dir + "/sensor_weighting_no_outlier_filter.ply", mesh
124 |     )
125 | 
126 |     # compute surface histogram
127 |     n, bins, patches = plt.hist(vals.flatten(), bins=100)
128 |     for c, p in zip(bins, patches):
129 |         plt.setp(p, "facecolor", cm(c))
130 |     plt.savefig(test_dir + "/sensor_weighting_surface_histogram_no_outlier_filter.png")
131 |     plt.clf()
132 | 
133 | 
134 | def preprocess_weight_grid(weights):
135 |     """Function to compute the weight mask for skimage marching cubes corresponding to how Open3D marching cubes deals with masking. Open3D requires that all 8 corners of the voxel are initialized in order to draw a surface while skimage only requires 1 of the voxels to be initialized e.g. the index (1,1,1) determines if the voxel at (0,0,0) is initialized etc.
136 | 
137 |     Args:
138 |         weights: weight grid
139 | 
140 |     Returns:
141 |         mask: boolean grid to be used as input to skimage marching cubes algorithm
142 |     """
143 |     mask = np.zeros_like(weights)
144 |     indices = np.array(weights.nonzero())
145 |     indices = indices[:, ~np.any(indices == 0, axis=0)]
146 |     for index in range(indices.shape[1]):
147 |         i = indices[:, index][0]
148 |         j = indices[:, index][1]
149 |         k = indices[:, index][2]
150 |         mask[i, j, k] = weights[i, j, k]
151 |         mask[i, j, k] = mask[i, j, k] and weights[i, j, k - 1]
152 |         mask[i, j, k] = mask[i, j, k] and weights[i, j - 1, k]
153 |         mask[i, j, k] = mask[i, j, k] and weights[i, j - 1, k - 1]
154 |         mask[i, j, k] = mask[i, j, k] and weights[i - 1, j, k]
155 |         mask[i, j, k] = mask[i, j, k] and weights[i - 1, j, k - 1]
156 |         mask[i, j, k] = mask[i, j, k] and weights[i - 1, j - 1, k]
157 |         mask[i, j, k] = mask[i, j, k] and weights[i - 1, j - 1, k - 1]
158 | 
159 |     return mask > 0
160 | 


--------------------------------------------------------------------------------
/videos/create_depth_video.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import numpy as np
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | import cv2
  9 | 
 10 | 
 11 | def arg_parse():
 12 |     parser = argparse.ArgumentParser(
 13 |         description="Script for creating a video of the depth."
 14 |     )
 15 | 
 16 |     parser.add_argument("--scene", required=True)
 17 |     parser.add_argument("--sensor", required=True)
 18 |     parser.add_argument("--trajectory", required=True)
 19 |     parser.add_argument("--dataset", required=True)
 20 | 
 21 |     args = parser.parse_args()
 22 | 
 23 |     return vars(args)
 24 | 
 25 | 
 26 | # From Johannes Schoenberger code.
 27 | def read_array(path):
 28 |     with open(path, "rb") as fid:
 29 |         width, height, channels = np.genfromtxt(
 30 |             fid, delimiter="&", max_rows=1, usecols=(0, 1, 2), dtype=int
 31 |         )
 32 |         fid.seek(0)
 33 |         num_delimiter = 0
 34 |         byte = fid.read(1)
 35 |         while True:
 36 |             if byte == b"&":
 37 |                 num_delimiter += 1
 38 |                 if num_delimiter >= 3:
 39 |                     break
 40 |             byte = fid.read(1)
 41 |         array = np.fromfile(fid, np.float32)
 42 | 
 43 |     array = array.reshape((width, height, channels), order="F")
 44 |     return np.transpose(array, (1, 0, 2)).squeeze()
 45 | 
 46 | 
 47 | def get_depth(sensor, scene, trajectory, dataset):
 48 |     if dataset == "replica":
 49 |         input_dir = (
 50 |             "/cluster/work/cvl/esandstroem/data/replica/manual/"
 51 |             + scene
 52 |             + "/"
 53 |             + trajectory
 54 |             + "/"
 55 |             + sensor
 56 |         )
 57 |     else:
 58 |         if sensor == "tof":
 59 |             # corbs
 60 |             # input_dir = '/cluster/work/cvl/esandstroem/data/corbs/human/data/H1_pre_registereddata/depth'
 61 |             # scene3d
 62 |             input_dir = (
 63 |                 "/cluster/work/cvl/esandstroem/data/scene3d/copyroom/copyroom_png/depth"
 64 |             )
 65 |         else:
 66 |             # corbs
 67 |             # input_dir = '/cluster/work/cvl/esandstroem/data/corbs/human/colmap/dense/stereo/depth_maps'
 68 |             # scene3d
 69 |             input_dir = "/cluster/work/cvl/esandstroem/data/scene3d/copyroom/dense/stereo/depth_maps"
 70 | 
 71 |     # define output dir
 72 |     output_folder = "/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/videos/"
 73 |     output_folder += "depth/" + scene + "/" + sensor
 74 | 
 75 |     if not os.path.exists(output_folder):
 76 |         os.makedirs(output_folder)
 77 | 
 78 |     images = os.listdir(input_dir)
 79 | 
 80 |     if dataset == "replica":
 81 |         images = sorted(images, key=lambda x: float(x[:-4]))
 82 |     else:
 83 |         if sensor == "tof":
 84 |             images = sorted(images, key=lambda x: float(x[:-4]))
 85 |         else:
 86 |             images = [x for x in images if x.endswith("geometric.bin")]
 87 |             images = sorted(images, key=lambda x: float(x[:-18]))
 88 | 
 89 |     for k, im in enumerate(images):
 90 |         # print(im)
 91 |         if dataset == "replica":
 92 |             im = cv2.imread(input_dir + "/" + im, -1)
 93 |         elif sensor == "tof":
 94 |             im = cv2.imread(input_dir + "/" + im, -1)
 95 |         else:
 96 |             im = read_array(input_dir + "/" + im)
 97 | 
 98 |         print(k)
 99 |         # cv2.imwrite(im, input_dir + '/' + im)
100 |         print(output_folder)
101 |         plt.imsave(
102 |             output_folder + "/" + "%04d" % k + ".png",
103 |             np.asarray(im),
104 |             vmin=0,
105 |             vmax=5,
106 |             dpi=1,
107 |         )
108 | 
109 |     # vmin=0, vmax=25000
110 |     # if k > 100:
111 |     # break
112 | 
113 |     # create video of the rendered images
114 |     os.chdir(output_folder)
115 |     os.system(
116 |         "ffmpeg -framerate 15 -i %04d.png -vcodec libx264 -preset veryslow -c:a libmp3lame -r 15 -crf 25 -pix_fmt yuv420p "
117 |         + "/".join(output_folder.split("/")[:-1])
118 |         + ".mp4"
119 |     )
120 | 
121 |     # remove the images folder
122 |     os.system("rm -r " + output_folder)
123 | 
124 | 
125 | if __name__ == "__main__":
126 | 
127 |     # parse commandline arguments
128 |     args = arg_parse()
129 | 
130 |     get_depth(args["sensor"], args["scene"], args["trajectory"], args["dataset"])
131 | 


--------------------------------------------------------------------------------
/videos/render_option.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"background_color" : [ 1, 1, 1 ],
 3 | 	"class_name" : "RenderOption",
 4 | 	"default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ],
 5 | 	"image_max_depth" : 3000,
 6 | 	"image_stretch_option" : 0,
 7 | 	"interpolation_option" : 0,
 8 | 	"light0_color" : [ 1, 1, 1 ],
 9 | 	"light0_diffuse_power" : 0.66000000000000003,
10 | 	"light0_position" : [ 0, 0, 2 ],
11 | 	"light0_specular_power" : 0.20000000000000001,
12 | 	"light0_specular_shininess" : 100,
13 | 	"light1_color" : [ 1, 1, 1 ],
14 | 	"light1_diffuse_power" : 0.66000000000000003,
15 | 	"light1_position" : [ 0, 0, 2 ],
16 | 	"light1_specular_power" : 0.20000000000000001,
17 | 	"light1_specular_shininess" : 100,
18 | 	"light2_color" : [ 1, 1, 1 ],
19 | 	"light2_diffuse_power" : 0.66000000000000003,
20 | 	"light2_position" : [ 0, 0, -2 ],
21 | 	"light2_specular_power" : 0.20000000000000001,
22 | 	"light2_specular_shininess" : 100,
23 | 	"light3_color" : [ 1, 1, 1 ],
24 | 	"light3_diffuse_power" : 0.66000000000000003,
25 | 	"light3_position" : [ 0, 0, -2 ],
26 | 	"light3_specular_power" : 0.20000000000000001,
27 | 	"light3_specular_shininess" : 100,
28 | 	"light_ambient_color" : [ 0, 0, 0 ],
29 | 	"light_on" : true,
30 | 	"mesh_color_option" : 1,
31 | 	"mesh_shade_option" : 0,
32 | 	"mesh_show_back_face" : false,
33 | 	"mesh_show_wireframe" : false,
34 | 	"point_color_option" : 9,
35 | 	"point_show_normal" : false,
36 | 	"point_size" : 5,
37 | 	"show_coordinate_frame" : false,
38 | 	"version_major" : 1,
39 | 	"version_minor" : 0
40 | }
41 | 


--------------------------------------------------------------------------------