├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── configs ├── fusion │ ├── corbs.yaml │ ├── replica.yaml │ └── scene3d.yaml └── routing │ └── replica.yaml ├── data ├── mvs_depth_estimation │ ├── downsample_dataset.py │ ├── move_data.py │ ├── reconstruct_colmap_slurm_copyroom.sh │ ├── reconstruct_colmap_slurm_stonewall.sh │ ├── setup_colmap.py │ └── setup_colmap_corbs.py └── save_every_tenth_frame.py ├── dataset ├── __init__.py ├── associate.py ├── colmap.py ├── corbs.py ├── replica.py └── scene3d.py ├── images └── architecture.png ├── lists ├── corbs │ ├── desk.txt │ └── human.txt ├── replica │ ├── test_hotel_0.txt │ ├── test_office_0.txt │ ├── test_office_4.txt │ ├── test_office_4_hotel_0_office_0.txt │ ├── train.txt │ └── val.txt └── scene3d │ ├── copyroom.txt │ └── stonewall.txt ├── models ├── fusion │ ├── sgm_psmnet │ │ └── model │ │ │ └── best.pth.tar │ ├── sgm_psmnet_routedfusion │ │ └── model │ │ │ └── best.pth.tar │ ├── sgm_psmnet_routing │ │ └── model │ │ │ └── best.pth.tar │ ├── sgm_psmnet_routing_routedfusion │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_mvs_corbs │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_mvs_scene3d │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_psmnet │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_psmnet_routedfusion │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_psmnet_routing │ │ └── model │ │ │ └── best.pth.tar │ ├── tof_psmnet_routing_routedfusion │ │ └── model │ │ │ └── best.pth.tar │ └── tof_tof_scene3d_collab_rec │ │ └── model │ │ └── best.pth.tar └── routing │ ├── psmnet │ └── model │ │ └── best.pth.tar │ ├── sgm │ └── model │ │ └── best.pth.tar │ ├── sgm_psmnet │ └── model │ │ └── best.pth.tar │ ├── tof │ └── model │ │ └── best.pth.tar │ └── tof_psmnet │ └── model │ └── best.pth.tar ├── modules ├── __init__.py ├── database.py ├── extractor.py ├── filter_pipeline.py ├── filtering_net.py ├── fuse_pipeline.py ├── integrator.py ├── model.py ├── model_features.py ├── pipeline.py ├── routing.py └── voxelgrid.py ├── requirements.txt ├── test_fusion.py ├── test_routing.py ├── train_fusion.py ├── train_routing.py ├── utils ├── __init__.py ├── loading.py ├── loss.py ├── metrics.py ├── saving.py ├── setup.py ├── transform.py └── visualize_sensor_weighting.py └── videos ├── create_depth_video.py └── render_option.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # ignore folders and files 132 | videos 133 | .idea 134 | .vscode 135 | utils/invert_colormap.py 136 | compute_attention_similarity.py 137 | scripts/log 138 | models 139 | debug_mc.py 140 | wandb 141 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | 2 | [submodule "deps/evaluate_3d_reconstruction"] 3 | path = deps/evaluate_3d_reconstruction 4 | url = https://github.com/tfy14esa/evaluate_3d_reconstruction.git 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022, Erik Sandström 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Before commercial usage of source code, the copyright holder must be contacted. 8 | 9 | 2. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 3. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 4. Neither the name of ETH Zurich nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /configs/fusion/corbs.yaml: -------------------------------------------------------------------------------- 1 | SETTINGS: 2 | gpu: True # run on cpu or gpu 3 | experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved. 4 | eval_freq: 442 # how many global steps before evaluation and saving the model 5 | log_freq: 442 # how many global steps before logging the training loss 6 | seed: 52 # seed for shuffling operations 7 | FUSION_MODEL: 8 | use_fusion_net: False # use learned fusion net as done by RoutedFusion 9 | fixed: True # use fixed or finetune weights when use_fusion_net is true 10 | output_scale: 1.0 # output scale from fusion net (same as RoutedFusion) 11 | n_points: 11 # extraction band samples 12 | n_tail_points: 9 # samples along the ray which update the grid 13 | n_points_tof: 11 # tof specific extraction band samples 14 | n_tail_points_tof: 9 15 | n_points_stereo: 11 # stereo specific extraction band samples 16 | n_tail_points_stereo: 9 17 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 18 | n_empty_space_voting: 0 # samples with free space update 19 | max_weight: 500 # max weight 20 | extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation 21 | FEATURE_MODEL: 22 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 23 | stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net 24 | network: resnet # anything else but "resnet" will give a standard network 25 | use_feature_net: True # use learned feature net. When false, yields the depth as feature 26 | append_depth: True # append depth to feature vector 27 | w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net 28 | w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net 29 | w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 30 | normalize: True # normalize the feature vector 31 | fixed: False # fix weights of feature net - when true does not declare an optimzer 32 | n_features: 4 # output dimension from feature net 33 | n_layers: 6 # layers 34 | enc_activation: torch.nn.Tanh() 35 | dec_activation: torch.nn.Tanh() 36 | depth: True # concatenate depth as input to feature net 37 | layernorm: False 38 | ROUTING_MODEL: 39 | contraction: 64 # hidden dimension of routing network 40 | normalization: False # apply batch normalization 41 | FILTERING_MODEL: 42 | do: True # whether to do sensor fusion or not 43 | model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion 44 | CONV3D_MODEL: 45 | fixed: False # fix network weights 46 | outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 47 | features_to_weight_head: True # feed 2D features directly to alpha head 48 | sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style) 49 | weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style) 50 | tanh_weight: True # apply tanh-transform to weight counter 51 | inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true 52 | bias: True # bias in alpha head 53 | chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet 54 | activation: torch.nn.ReLU() 55 | weighting_complexity: '3layer' # Xlayer 56 | LOSS: 57 | alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates 58 | alpha_supervision: False # supervise directly with proxy alpha in 3D. Not available on the corbs dataset. 59 | fusion_weight: 6.0 # l1 weight of fusion net 60 | grid_weight: 6 61 | alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision 62 | TRAINING: 63 | reset_strategy: True # May not make any difference 64 | reset_prob: 0.01 # in percent (used if reset_strategy: True) 65 | pretrain_filtering_net: False 66 | pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets 67 | train_batch_size: 1 68 | train_shuffle: True 69 | val_batch_size: 1 70 | val_shuffle: False 71 | n_epochs: 1000 72 | gradient_clipping: True 73 | TESTING: 74 | mc: 'skimage' # use skimage marching cubes implementation 75 | routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model. 76 | routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids 77 | use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv' 78 | eval_single_sensors: True # not applicable when evaluating routedfusion 79 | visualize_sensor_weighting: False 80 | test_batch_size: 1 81 | test_shuffle: False 82 | fusion_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_mvs_corbs/model/best.pth.tar # used for conv3d, routedfusion as filtering models 83 | weight_thresholds: [0.0] 84 | ROUTING: 85 | do: False # needs to be false at all times 86 | dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold 87 | threshold: 0.15 88 | intensity_grad: False # feed grayscale image and its gradient to routing network 89 | OPTIMIZATION: 90 | scheduler: 91 | step_size_filtering: 500 92 | step_size_fusion: 100 93 | gamma_filtering: 0.1 94 | gamma_fusion: 0.5 95 | lr_filtering: 1.e-04 96 | lr_fusion: 1.e-04 97 | rho: 0.95 # rmsprop fusion net 98 | eps: 1.e-08 # rmsprop fusion net 99 | momentum: 0.5 # rmsprop fusion net 100 | weight_decay: 0.00 # rmsprop fusion net 101 | accumulation_steps: 20 # note that this is normally 8 102 | DATA: 103 | collaborative_reconstruction: False # multi-agent reconstruction 104 | frames_per_chunk: 100 # used when colaborative_reconstruction: true 105 | mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov 106 | # compared to the color camera of the azure kinect 107 | mask_stereo_width: 10 # in pixels (achieves fov 84.32) 108 | mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52 109 | mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35 110 | mask_width: 10 # general sensor 111 | mask_height: 10 # general sensor 112 | pad: 2 # pad ground truth grid (not needed, but all results are using it) 113 | min_depth_stereo: 0.0 # 0.5 (in meters) 114 | max_depth_stereo: 12.3 # 2.5 (in meters) 115 | min_depth_tof: 0.0 # 0.5 (in meters) 116 | max_depth_tof: 12.3 # 3.86 (in meters) 117 | min_depth: 0.0 # general sensor (in meters) 118 | max_depth: 12.3 # general sensor (in meters) 119 | root_dir: /cluster/work/cvl/esandstroem/data/corbs # training on data from work folder or on local scratch of compute node 120 | dataset: CoRBS # dataset 121 | input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor 122 | target: gt # ground truth depth label 123 | resx_stereo: 256 # I assume square input images 124 | resy_stereo: 256 125 | resx_tof: 256 126 | resy_tof: 256 127 | resx: 256 # default settings 128 | resy: 256 129 | train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/desk.txt 130 | val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/desk.txt 131 | test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/corbs/human.txt 132 | init_value: 0.0 # init value of tsdf grids 133 | trunc_value: 0.05 # truncation distance 134 | -------------------------------------------------------------------------------- /configs/fusion/replica.yaml: -------------------------------------------------------------------------------- 1 | SETTINGS: 2 | gpu: True # run on cpu or gpu 3 | experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved. 4 | eval_freq: 500 # how many global steps before evaluation and saving the model 5 | log_freq: 500 # how many global steps before logging the training loss 6 | seed: 52 # seed for shuffling operations 7 | FUSION_MODEL: 8 | use_fusion_net: False # use learned fusion net as done by RoutedFusion 9 | fixed: True # use fixed or finetune weights when use_fusion_net is true 10 | output_scale: 1.0 # output scale from fusion net (same as RoutedFusion) 11 | n_points: 11 # extraction band samples 12 | n_tail_points: 9 # samples along the ray which update the grid 13 | n_points_tof: 11 # tof specific extraction band samples 14 | n_tail_points_tof: 9 15 | n_points_stereo: 11 # stereo specific extraction band samples 16 | n_tail_points_stereo: 9 17 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 18 | n_empty_space_voting: 0 # samples with free space update 19 | max_weight: 500 # max weight 20 | extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation 21 | FEATURE_MODEL: 22 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 23 | stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net 24 | network: resnet # anything else but "resnet" will give a standard network 25 | use_feature_net: True # use learned feature net. When false, yields the depth as feature 26 | append_depth: True # append depth to feature vector 27 | w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net 28 | w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net 29 | w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 30 | normalize: True # normalize the feature vector 31 | fixed: False # fix weights of feature net - when true does not declare an optimzer 32 | n_features: 4 # output dimension from feature net 33 | n_layers: 6 # layers 34 | enc_activation: torch.nn.Tanh() 35 | dec_activation: torch.nn.Tanh() 36 | depth: True # concatenate depth as input to feature net 37 | layernorm: False 38 | ROUTING_MODEL: 39 | contraction: 64 # hidden dimension of routing network 40 | normalization: False # apply batch normalization 41 | FILTERING_MODEL: 42 | do: True # whether to do sensor fusion or not 43 | model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion 44 | CONV3D_MODEL: 45 | fixed: False # fix network weights 46 | outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 47 | features_to_weight_head: True # feed 2D features directly to alpha head 48 | sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style) 49 | weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style) 50 | tanh_weight: True # apply tanh-transform to weight counter 51 | inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true 52 | bias: True # bias in alpha head 53 | chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet 54 | activation: torch.nn.ReLU() 55 | weighting_complexity: '3layer' # Xlayer 56 | LOSS: 57 | alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates 58 | alpha_supervision: False # supervise directly with proxy alpha in 3D. Only available on some scenes e.g. office 0, hotel 0 59 | fusion_weight: 6.0 # l1 weight of fusion net 60 | grid_weight: 6 61 | alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision 62 | TRAINING: 63 | reset_strategy: True # May not make any difference 64 | reset_prob: 0.01 # in percent (used if reset_strategy: True) 65 | pretrain_filtering_net: False 66 | pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets 67 | routing_stereo_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/psmnet/model/best.pth.tar 68 | routing_tof_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof/model/best.pth.tar 69 | routing_tof_2_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof/model/best.pth.tar 70 | routing_sgm_stereo_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/sgm/model/best.pth.tar 71 | train_batch_size: 1 72 | train_shuffle: True 73 | val_batch_size: 1 74 | val_shuffle: False 75 | n_epochs: 1000 76 | gradient_clipping: True 77 | TESTING: 78 | mc: 'skimage' # 'skimage' or 'Open3D' (requires local library installation) 79 | routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model. 80 | routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids 81 | use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv' 82 | eval_single_sensors: False # not applicable when evaluating routedfusion 83 | visualize_sensor_weighting: False 84 | test_batch_size: 1 85 | test_shuffle: False 86 | routing_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/tof_psmnet/model/best.pth.tar # Only used for tsdf_early_fusion. 87 | fusion_model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_psmnet/model/best.pth.tar # used for conv3d, routedfusion as filtering models. 88 | weight_thresholds: [0.0] 89 | ROUTING: 90 | do: False # use routing network 91 | dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold 92 | threshold: 0.15 93 | intensity_grad: False # feed grayscale image and its gradient to routing network 94 | OPTIMIZATION: 95 | scheduler: 96 | step_size_filtering: 500 97 | step_size_fusion: 100 98 | gamma_filtering: 0.1 99 | gamma_fusion: 0.5 100 | lr_filtering: 1.e-04 101 | lr_fusion: 1.e-04 102 | rho: 0.95 # rmsprop fusion net 103 | eps: 1.e-08 # rmsprop fusion net 104 | momentum: 0.5 # rmsprop fusion net 105 | weight_decay: 0.00 # rmsprop fusion net 106 | accumulation_steps: 20 107 | DATA: 108 | early_fusion_asynch: False # asynchronous early fusion experiment 109 | collaborative_reconstruction: False # multi-agent reconstruction 110 | frames_per_chunk: 100 # used when colaborative_reconstruction: true 111 | downsampling: [1, 1] # first entry is the downsampling rate of the 1st sensor 112 | # 2nd entry is the downsampling rate of the 2nd sensor 113 | mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov 114 | # compared to the color camera of the azure kinect 115 | mask_stereo_width: 10 # in pixels (achieves fov 84.32) 116 | mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52 117 | mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35 118 | mask_width: 10 # general sensor 119 | mask_height: 10 # general sensor 120 | pad: 2 # pad grid (not needed, but all results are using it) 121 | min_depth_stereo: 0.0 # 0.5 (in meters) 122 | max_depth_stereo: 12.3 # 2.5 (in meters) 123 | min_depth_tof: 0.0 # 0.5 (in meters) 124 | max_depth_tof: 12.3 # 3.86 (in meters) 125 | min_depth: 0.0 # general sensor (in meters) 126 | max_depth: 12.3 # general sensor (in meters) 127 | root_dir: TMPDIR #/cluster/work/cvl/esandstroem/data/replica/manual #TMPDIR # use TMPDIR for the euler cluster. Path to data folder 128 | dataset: Replica # dataset 129 | input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor 130 | target: gt # ground truth depth label 131 | resx_stereo: 256 # I assume square input images 132 | resy_stereo: 256 133 | resx_tof: 256 134 | resy_tof: 256 135 | resx: 256 # default settings 136 | resy: 256 137 | train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt 138 | val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt 139 | test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt #4_hotel_0_office_0.txt 140 | init_value: 0.0 # init value of tsdf grids 141 | trunc_value: 0.05 # truncation distance 142 | -------------------------------------------------------------------------------- /configs/fusion/scene3d.yaml: -------------------------------------------------------------------------------- 1 | SETTINGS: 2 | gpu: True # run on cpu or gpu 3 | experiment_path: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/fusion/ #/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion # path where the logging is done and the models are saved. 4 | eval_freq: 442 # how many global steps before evaluation and saving the model 5 | log_freq: 442 #957 383 # how many global steps before logging the training loss 6 | seed: 52 # seed for shuffling operations 7 | FUSION_MODEL: 8 | use_fusion_net: False # use learned fusion net as done by RoutedFusion 9 | fixed: True # use fixed or finetune weights when use_fusion_net is true 10 | output_scale: 1.0 # output scale from fusion net (same as RoutedFusion) 11 | n_points: 11 # extraction band samples 12 | n_tail_points: 9 # samples along the ray which update the grid 13 | n_points_tof: 11 # tof specific extraction band samples 14 | n_tail_points_tof: 9 15 | n_points_stereo: 11 # stereo specific extraction band samples 16 | n_tail_points_stereo: 9 17 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 18 | n_empty_space_voting: 0 # samples with free space update 19 | max_weight: 500 # max weight 20 | extraction_strategy: 'nearest_neighbor' # nearest_neighbor or trilinear_interpolation 21 | FEATURE_MODEL: 22 | confidence: False # feed 2D confidence map to learned fusion net (only when using routing) 23 | stereo_warp_right: False # concatenate the right stereo view warped to the left view using the left stereo view depth as input to the feature net 24 | network: resnet # anything else but "resnet" will give a standard network 25 | use_feature_net: True # use learned feature net. When false, yields the depth as feature 26 | append_depth: True # append depth to feature vector 27 | w_rgb: True # concatenate rgb to stereo or mvs depth sensors as input to feature net 28 | w_rgb_tof: False # concatenate rgb to tof sensor as input to the feature net 29 | w_intensity_gradient: False # concatenate rgb intensity and gradient as input to the feature net 30 | normalize: True # normalize the feature vector 31 | fixed: False # fix weights of feature net - when true does not declare an optimzer 32 | n_features: 4 # output dimension from feature net 33 | n_layers: 6 # layers 34 | enc_activation: torch.nn.Tanh() 35 | dec_activation: torch.nn.Tanh() 36 | depth: True # concatenate depth as input to feature net 37 | layernorm: False 38 | ROUTING_MODEL: 39 | contraction: 64 # hidden dimension of routing network 40 | normalization: False # apply batch normalization 41 | FILTERING_MODEL: 42 | do: True # whether to do sensor fusion or not 43 | model: '3dconv' # 3dconv, tsdf_early_fusion, tsdf_middle_fusion, routedfusion 44 | CONV3D_MODEL: 45 | fixed: False # fix network weights 46 | outlier_channel: False # if True, outputs another channel from the filtering network to be used with the single sensor outlier loss. 47 | features_to_weight_head: True # feed 2D features directly to alpha head 48 | sdf_to_weight_head: False # feed sdf values directly wo encoding to alpha head (not implemented when weighting_complexity: unet_style) 49 | weights_to_weight_head: True # feed the tsdf weights to the alpha head (not implemented when weighting_complexity: unet_style) 50 | tanh_weight: True # apply tanh-transform to weight counter 51 | inverted_weight: False # when tanh_weight: true, we make 0 to 1 and 1 to 0. Only relevant when weights_to_weight_head: true 52 | bias: True # bias in alpha head 53 | chunk_size: 64 # determines the size of the window used during training and testing that is fed to the 3D convnet 54 | activation: torch.nn.ReLU() 55 | weighting_complexity: '3layer' # Xlayer 56 | LOSS: 57 | alpha_single_sensor_supervision: True # supervise voxels where only one sensor integrates 58 | alpha_supervision: False # supervise directly with proxy alpha in 3D. Not available on scene3d dataset. 59 | fusion_weight: 6.0 # l1 weight of fusion net 60 | grid_weight: 6 61 | alpha_weight: 0.01 # weight of single sensor alpha supervision and proxy supervision 62 | TRAINING: 63 | reset_strategy: True # May not make any difference 64 | reset_prob: 0.01 # in percent (used if reset_strategy: True) 65 | pretrain_filtering_net: False 66 | pretrain_fusion_net: False # if True, provide a path called pretrain_fusion_SENSORNAME_model_path. Used to load pretrained and/or fixed fusion nets 67 | train_batch_size: 1 68 | train_shuffle: True 69 | val_batch_size: 1 70 | val_shuffle: False 71 | n_epochs: 1000 72 | gradient_clipping: True 73 | TESTING: 74 | mc: 'skimage' 75 | routedfusion_nn: True # using nearest neighbor mask or trilinear interpolation mask. When true, requires specifying the path to the model containing the nearest neighbor weight grid in the variable routedfusion_nn_model. 76 | routedfusion_nn_model: 210929-165610 # specify from what tsdf fusion model (or SenFuNet model) to use the nearest neighbor weight grids 77 | use_outlier_filter: True # only true when FILTERING_MODEL.model: '3dconv' 78 | eval_single_sensors: False # not applicable when evaluating routedfusion 79 | visualize_sensor_weighting: False 80 | test_batch_size: 1 81 | test_shuffle: False 82 | fusion_model_path: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/fusion/220526-124631/model/best.pth.tar #/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/fusion/tof_mvs_scene3d/model/best.pth.tar # used for conv3d, routedfusion as filtering models. 83 | weight_thresholds: [0.0] 84 | ROUTING: 85 | do: False # needs to be false at all times 86 | dont_smooth_where_uncertain: False # if True, replaces the routing output with the input depth if the confidence is below the threshold 87 | threshold: 0.15 88 | intensity_grad: False # feed grayscale image and its gradient to routing network 89 | OPTIMIZATION: 90 | scheduler: 91 | step_size_filtering: 500 92 | step_size_fusion: 100 93 | gamma_filtering: 0.1 94 | gamma_fusion: 0.5 95 | lr_filtering: 1.e-04 96 | lr_fusion: 1.e-04 97 | rho: 0.95 # rmsprop fusion net 98 | eps: 1.e-08 # rmsprop fusion net 99 | momentum: 0.5 # rmsprop fusion net 100 | weight_decay: 0.00 # rmsprop fusion net 101 | accumulation_steps: 20 # note that this is normally 8 102 | DATA: 103 | collaborative_reconstruction: False # multi-agent reconstruction 104 | frames_per_chunk: 100 # used when colaborative_reconstruction: true 105 | mask_stereo_height: 10 # 35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov 106 | # compared to the color camera of the azure kinect 107 | mask_stereo_width: 10 # in pixels (achieves fov 84.32) 108 | mask_tof_height: 10 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52 109 | mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35 110 | mask_width: 10 # general sensor 111 | mask_height: 10 # general sensor 112 | pad: 0 # pad ground truth grid (not needed, but all results are using it) 113 | min_depth_stereo: 0.5 # 0.5 (in meters) 114 | max_depth_stereo: 3.0 # 2.5 (in meters) 115 | min_depth_tof: 0.0 # 0.5 (in meters) 116 | max_depth_tof: 12.3 # 3.86 (in meters) 117 | min_depth: 0.0 # general sensor (in meters) 118 | max_depth: 12.3 # general sensor (in meters) 119 | root_dir: /cluster/work/cvl/esandstroem/data/scene3d # Path to data folder 120 | dataset: Scene3D # dataset 121 | input: [tof, stereo] # list of sensors to fuse. When FILTERING_MODEL.do: False, this list can consist of only one sensor 122 | target: gt # ground truth depth label 123 | resx_stereo: 256 # I assume square input images 124 | resy_stereo: 256 125 | resx_tof: 256 126 | resy_tof: 256 127 | resx: 256 # default settings 128 | resy: 256 129 | train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/stonewall.txt 130 | val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/stonewall.txt 131 | test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/scene3d/copyroom.txt 132 | init_value: 0.0 # init value of tsdf grids 133 | trunc_value: 0.05 # truncation distance 134 | -------------------------------------------------------------------------------- /configs/routing/replica.yaml: -------------------------------------------------------------------------------- 1 | SETTINGS: 2 | gpu: True 3 | experiment_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing # path where the logging is done and the models are saved. OLD: /cluster/work/cvl/esandstroem/src/late_fusion_3dconvnet/workspace/routing 4 | log_freq: 500 # how many batch steps before logging the training loss 5 | seed: 52 # seed for shuffling operations 6 | MODEL: 7 | contraction: 64 8 | normalization: False 9 | LOSS: 10 | name: gradweighted + uncertainty # gradweighteduncertainty or gradweighted + uncertainty or uncertainty or VNL + gradweighted + uncertainty or VNL + gradweighteduncertainty 11 | crop_fraction: 0. 12 | vmin: 0.05 13 | vmax: 12.3 14 | weight_scale: 10. # only relevant if the gradweighted term is used 15 | lmbda: 0.06 16 | completion: False # If completion is True, all pixels incur a loss, while, if it is false, on those with a valid input value incur a loss. 17 | TRAINING: 18 | train_batch_size: 2 19 | train_shuffle: True 20 | val_batch_size: 2 21 | val_shuffle: False 22 | n_epochs: 1000 23 | TESTING: 24 | test_batch_size: 1 25 | test_shuffle: False 26 | model_path: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/models/routing/psmnet/model/best.pth.tar 27 | output_path: 28 | OPTIMIZATION: 29 | lr: 1.e-05 30 | rho: 0.95 31 | eps: 1.e-07 32 | momentum: 0.9 33 | weight_decay: 0.01 34 | accumulate: True 35 | accumulation_steps: 16 36 | alternate: False 37 | alternate_steps: 5 38 | ROUTING: 39 | intensity_grad: False # weather to load the grayscale image and its gradient and feed to the routing network 40 | DATA: 41 | early_fusion_asynch: False # asynchronous early fusion experiment 42 | downsampling: [1, 1] # first entry is the downsampling rate of the 1st sensor 43 | # 2nd entry is the downsampling rate of the 2nd sensor 44 | mask_stereo_height: 10 #35 # in pixels (achieves fov 71.11). Together with the width mask this gives the same relationship between the height and width fov 45 | # compared to the color camera of the azure kinect 46 | mask_stereo_width: 10 # in pixels (achieves fov 84.32) 47 | mask_tof_height: 10 # 52 # 52 # in pixels. Note that this value depends on the resolution of the image. With resolution 256 this would be 52 48 | mask_tof_width: 10 #35 # 35 # in pixels. With resolution 256 this would be 35 49 | mask_width: 10 50 | mask_height: 10 51 | pad: 2 52 | min_depth_stereo: 0.0 # 0.5 53 | max_depth_stereo: 12.3 # 2.5 54 | min_depth_tof: 0.0 # 0.5 55 | max_depth_tof: 12.3 # 3.86 56 | min_depth: 0.0 57 | max_depth: 12.3 58 | root_dir: TMPDIR # use TMPDIR for the euler cluster. Path to data folder 59 | dataset: Replica 60 | input: [tof, stereo] # 61 | target: depth_gt 62 | resx: 512 63 | resy: 512 64 | resx_stereo: 512 65 | resy_stereo: 512 66 | resx_tof: 512 # a tof camera has typically half the resolution of an rgb camera 67 | resy_tof: 512 68 | focalx: 256 # focal length of intrinsic matrix - only used when virtual normal loss is applied 69 | focaly: 256 70 | train_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/train.txt 71 | val_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/val.txt 72 | test_scene_list: /cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/lists/replica/test_office_0.txt 73 | init_value: 0.05 # truncation distance -------------------------------------------------------------------------------- /data/mvs_depth_estimation/downsample_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | scene = 'copyroom' 4 | 5 | data_path = '/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' + '/' + scene + '_downsampled' 6 | 7 | remove_list = [] 8 | image_list = sorted(os.listdir(data_path + '/images')) 9 | cfg = data_path + '/dense/stereo/patch-match_new.cfg' 10 | images = data_path + '/sparse/images_new.txt' 11 | tof_list = sorted(os.listdir(data_path + '/' + scene + '_png/depth')) 12 | 13 | with open(data_path + '/dense/stereo/patch-match.cfg', 'r') as cfg_file, \ 14 | open(cfg, 'w') as cfg_file_new, \ 15 | open(data_path + '/sparse/images.txt', 'r') as traj_file, \ 16 | open(images, 'w') as traj_file_new: 17 | 18 | cfg_file = cfg_file.readlines() 19 | traj_file = traj_file.readlines() 20 | 21 | for k, frame in enumerate(image_list): 22 | if k % 10 != 0: 23 | # pass 24 | remove_list.append(data_path + '/images/' + image_list[k]) 25 | remove_list.append(data_path + '/' + scene + '_png/depth/' + tof_list[k]) 26 | else: 27 | traj_file_new.write(str(k//10 + 1) + ' ' + ' '.join(traj_file[2*k].split(' ')[1:])) 28 | traj_file_new.write('\n') 29 | cfg_file_new.write(cfg_file[2*k]) 30 | cfg_file_new.write(cfg_file[2*k + 1]) 31 | 32 | 33 | for path in remove_list: 34 | os.system('rm ' + path) 35 | 36 | # remove old patch-match.cfg and images.txt 37 | os.system('rm ' + data_path + '/dense/stereo/patch-match.cfg') 38 | os.system('rm ' + data_path + '/sparse/images.txt') 39 | 40 | # rename new files to old names 41 | os.system('mv ' + data_path + '/sparse/images_new.txt' + ' ' + data_path + '/sparse/images.txt') 42 | os.system('mv ' + data_path + '/dense/stereo/patch-match_new.cfg' + ' ' + data_path + '/dense/stereo/patch-match.cfg') -------------------------------------------------------------------------------- /data/mvs_depth_estimation/move_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | scene = 'cactusgarden' 4 | 5 | data_target_path = '/home/esandstroem/scratch-second/euler_work/data/scene3d' + '/' + scene 6 | data_source_path = '/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' + '/' + scene 7 | 8 | copy_list = dict() 9 | 10 | image_list = sorted(os.listdir(data_source_path + '/images')) 11 | stereo_list = sorted(os.listdir(data_source_path + '/dense/stereo/depth_maps')) 12 | 13 | # remove all entries containing 'photometric' from the stereo_list 14 | stereo_list = stereo_list[::2] 15 | 16 | tof_list = sorted(os.listdir(data_source_path + '/' + scene + '_png/depth')) 17 | 18 | for k, frame in enumerate(sorted(os.listdir(data_source_path + '/images'))): 19 | if k % 10 == 0: 20 | copy_list[data_source_path + '/images/' + image_list[k]] = data_target_path + '/images/' + image_list[k] 21 | copy_list[data_source_path + '/dense/stereo/depth_maps/' + stereo_list[k]] = data_target_path + '/dense/stereo/depth_maps/' + stereo_list[k] 22 | copy_list[data_source_path + '/' + scene + '_png/depth/' + tof_list[k]] = data_target_path + '/' + scene + '_png/depth/' + tof_list[k] 23 | 24 | for path in copy_list.keys(): 25 | os.system('cp ' + path + ' ' + copy_list[path]) -------------------------------------------------------------------------------- /data/mvs_depth_estimation/reconstruct_colmap_slurm_copyroom.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --output=/scratch_net/nudel/colmap-test/log/%j.out # could not get it to work on nudel_second... #/scratch_ned/nudel_second/opportunistic_3d_capture/data/scene3d/log/%j.out 4 | #SBATCH --gres=gpu:1 5 | #SBATCH --mem=50G 6 | 7 | 8 | PROJECT_PATH='/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' 9 | 10 | scene_string='copyroom_downsampled' 11 | 12 | for SCENE in $scene_string 13 | do 14 | # extract features 15 | colmap feature_extractor --image_path $PROJECT_PATH/$SCENE/images \ 16 | --database_path $PROJECT_PATH/$SCENE/database.db \ 17 | --ImageReader.camera_model PINHOLE \ 18 | --ImageReader.single_camera 1 \ 19 | --ImageReader.camera_params "525.0, 525.0, 319.5, 239.5" 20 | 21 | # # sequential matching along trajectory 22 | colmap sequential_matcher --database_path $PROJECT_PATH/$SCENE/database.db \ 23 | --SequentialMatching.overlap 10 24 | 25 | ## dense reconstruction 26 | mkdir -p $PROJECT_PATH/$SCENE/dense/sparse 27 | 28 | # build sparse model 29 | colmap point_triangulator --database_path $PROJECT_PATH/$SCENE/database.db \ 30 | --image_path $PROJECT_PATH/$SCENE/images \ 31 | --input_path $PROJECT_PATH/$SCENE/sparse \ 32 | --output_path $PROJECT_PATH/$SCENE/dense/sparse \ 33 | --Mapper.ba_refine_focal_length 0 \ 34 | --Mapper.ba_refine_extra_param 0 35 | 36 | # # create dense workspace folders 37 | cp -r $PROJECT_PATH/$SCENE/images $PROJECT_PATH/$SCENE/dense/ 38 | mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/depth_maps 39 | mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/normal_maps 40 | 41 | 42 | # # compute dense depth maps 43 | colmap patch_match_stereo --workspace_path $PROJECT_PATH/$SCENE/dense \ 44 | --PatchMatchStereo.depth_min 0.5 \ 45 | --PatchMatchStereo.depth_max 10.0 46 | 47 | # # # fuse stereo depth maps 48 | # colmap stereo_fusion --workspace_path PROJECT_PATH/${SCENE}/dense \ 49 | # --output_path PROJECT_PATH/${SCENE}/dense/fused.ply 50 | done 51 | 52 | 53 | -------------------------------------------------------------------------------- /data/mvs_depth_estimation/reconstruct_colmap_slurm_stonewall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --output=/scratch_net/nudel/colmap-test/log/%j.out # could not get it to work on nudel_second... #/scratch_ned/nudel_second/opportunistic_3d_capture/data/scene3d/log/%j.out 4 | #SBATCH --gres=gpu:1 5 | #SBATCH --mem=50G 6 | 7 | 8 | PROJECT_PATH='/home/esandstroem/scratch-second/opportunistic_3d_capture/data/scene3d' 9 | 10 | scene_string='stonewall_downsampled_limit_depth' 11 | 12 | for SCENE in $scene_string 13 | do 14 | # # extract features 15 | # colmap feature_extractor --image_path $PROJECT_PATH/$SCENE/images \ 16 | # --database_path $PROJECT_PATH/$SCENE/database.db \ 17 | # --ImageReader.camera_model PINHOLE \ 18 | # --ImageReader.single_camera 1 \ 19 | # --ImageReader.camera_params "525.0, 525.0, 319.5, 239.5" 20 | 21 | # sequential matching along trajectory 22 | # colmap sequential_matcher --database_path $PROJECT_PATH/$SCENE/database.db \ 23 | # --SequentialMatching.overlap 10 24 | 25 | # ## dense reconstruction 26 | # mkdir -p $PROJECT_PATH/$SCENE/dense/sparse 27 | 28 | # # build sparse model 29 | # colmap point_triangulator --database_path $PROJECT_PATH/$SCENE/database.db \ 30 | # --image_path $PROJECT_PATH/$SCENE/images \ 31 | # --input_path $PROJECT_PATH/$SCENE/sparse \ 32 | # --output_path $PROJECT_PATH/$SCENE/dense/sparse \ 33 | # --Mapper.ba_refine_focal_length 0 \ 34 | # --Mapper.ba_refine_extra_param 0 35 | 36 | # # create dense workspace folders 37 | # cp -r $PROJECT_PATH/$SCENE/images $PROJECT_PATH/$SCENE/dense/ 38 | # mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/depth_maps 39 | # mkdir -p $PROJECT_PATH/$SCENE/dense/stereo/normal_maps 40 | 41 | 42 | # # compute dense depth maps 43 | colmap patch_match_stereo --workspace_path $PROJECT_PATH/$SCENE/dense \ 44 | --PatchMatchStereo.depth_min 0.5 \ 45 | --PatchMatchStereo.depth_max 10.0 46 | 47 | # # # fuse stereo depth maps 48 | # colmap stereo_fusion --workspace_path PROJECT_PATH/${SCENE}/dense \ 49 | # --output_path PROJECT_PATH/${SCENE}/dense/fused.ply 50 | done 51 | 52 | 53 | -------------------------------------------------------------------------------- /data/mvs_depth_estimation/setup_colmap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | 6 | from pyquaternion import Quaternion 7 | 8 | def arg_parse(): 9 | 10 | parser = argparse.ArgumentParser() 11 | 12 | parser.add_argument('--source_path', default='/scratch-second/esandstroem/opportunistic_3d_capture/data/scene3d', type=str) 13 | parser.add_argument('--colmap_path', default='/scratch-second/esandstroem/opportunistic_3d_capture/data/scene3d', type=str) 14 | 15 | # camera options 16 | parser.add_argument('--fx', default=525.00, type=float) 17 | parser.add_argument('--fy', default=525.00, type=float) 18 | parser.add_argument('--cx', default=319.5, type=float) 19 | parser.add_argument('--cy', default=239.5, type=float) 20 | parser.add_argument('--width', default=640, type=int) 21 | parser.add_argument('--height', default=480, type=int) 22 | 23 | args = parser.parse_args() 24 | return vars(args) 25 | 26 | def main(args): 27 | scenes = ['cactusgarden', 'lounge', 'copyroom'] 28 | for scene in scenes: 29 | IMAGE_PATH = os.path.join(args['colmap_path'], scene, 'images') 30 | TRAJECTORY_PATH = os.path.join(args['colmap_path'], scene, scene + '_trajectory.log') 31 | SPARSE_PATH = os.path.join(args['colmap_path'], scene, 'sparse') 32 | DENSE_PATH = os.path.join(args['colmap_path'], scene, 'dense') 33 | STEREO_PATH = os.path.join(DENSE_PATH, 'stereo') 34 | # setup colmap workspace 35 | if not os.path.exists(args['colmap_path']): 36 | os.makedirs(args['colmap_path']) 37 | if not os.path.exists(IMAGE_PATH): 38 | os.makedirs(IMAGE_PATH) 39 | if not os.path.exists(SPARSE_PATH): 40 | os.makedirs(SPARSE_PATH) 41 | if not os.path.exists(DENSE_PATH): 42 | os.makedirs(DENSE_PATH) 43 | if not os.path.exists(STEREO_PATH): 44 | os.makedirs(STEREO_PATH) 45 | 46 | # write camera file 47 | with open(os.path.join(SPARSE_PATH, 'cameras.txt'), 'w') as file: 48 | file.write('1 PINHOLE {} {} {} {} {} {}'.format(args['width'], args['height'], args['fx'], args['fy'], args['cx'], args['cy'])) 49 | 50 | # write points file 51 | with open(os.path.join(SPARSE_PATH, 'points3D.txt'), 'w') as file: 52 | pass 53 | 54 | poses = dict() 55 | # retrieve pose dictionary 56 | with open(TRAJECTORY_PATH, 'r') as file: 57 | 58 | for rgb_name in sorted(os.listdir(IMAGE_PATH)): 59 | # extract the camera extrinsics by reading 5 lines 60 | metadata = next(file) 61 | 62 | first = np.fromstring(next(file), count=4, sep=' ', dtype=float) #[:-1].split(' ') 63 | second = np.fromstring(next(file), count=4, sep=' ', dtype=float) 64 | third = np.fromstring(next(file), count=4, sep=' ', dtype=float) 65 | fourth = np.fromstring(next(file), count=4, sep=' ', dtype=float) 66 | 67 | extrinsics = np.zeros((4,4)) 68 | extrinsics[0, :] = first 69 | extrinsics[1, :] = second 70 | extrinsics[2, :] = third 71 | extrinsics[3, :] = fourth 72 | 73 | # print(np.matmul(extrinsics[:3, :3] , np.transpose(extrinsics[:3, :3]))) 74 | # invert for colmap 75 | extrinsics = np.linalg.inv(extrinsics) 76 | 77 | rotation = Quaternion(matrix=extrinsics[:3, :3], rtol=1e-04, atol=1e-04) 78 | rotation = [rotation.elements[0], rotation.elements[1], rotation.elements[2], rotation.elements[3]] 79 | translation = list(extrinsics[:3, 3]) 80 | 81 | pose = rotation + translation 82 | pose = [str(p) for p in pose] 83 | pose = " ".join(pose) 84 | 85 | # check correct length of pose 86 | assert len(pose.split(' ')) == 7 87 | # print(rgb_name) 88 | poses[rgb_name] = pose 89 | 90 | # write and copy images 91 | with open(os.path.join(SPARSE_PATH, 'images.txt'), 'w') as file, open(os.path.join(STEREO_PATH, 'patch-match.cfg'), 'w') as cfg: 92 | 93 | for i, rgb_name in enumerate(sorted(os.listdir(IMAGE_PATH))): 94 | 95 | # add rgb name to patch-match.cfg file 96 | cfg.write(rgb_name + '\n') 97 | # limit the number of source images during reconstruction to 20 to reduce memory requirement 98 | cfg.write('__auto__, 20\n') 99 | # if specifying source images manually 100 | # get source images 101 | # start_indx = max(0, i - 10) 102 | # end_indx = min(len(matches), i + 10) 103 | # source_images = [] 104 | # for j in range(start_indx, end_indx): 105 | # if j == i: 106 | # continue 107 | # source_images.append(timestamp_mapping[matches[j][1]].replace('rgb/', '')) 108 | # source_images = ", ".join(source_images) 109 | # cfg.write('{}\n'.format(source_images)) 110 | 111 | # retrieve pose for the rgb frame 112 | image_line = '{} '.format(i + 1) + poses[rgb_name] + ' {} '.format(1) + rgb_name + '\n' + '\n' 113 | file.write(image_line) 114 | 115 | 116 | if __name__ == '__main__': 117 | args = arg_parse() 118 | main(args) -------------------------------------------------------------------------------- /data/mvs_depth_estimation/setup_colmap_corbs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | 6 | from data3d.utils.associate import associate 7 | from pyquaternion import Quaternion 8 | 9 | def arg_parse(): 10 | 11 | parser = argparse.ArgumentParser() 12 | 13 | parser.add_argument('--source_path') 14 | parser.add_argument('--colmap_path') 15 | 16 | # dataset options 17 | parser.add_argument('--sequence_id', default='H1') 18 | 19 | # camera options 20 | parser.add_argument('--fx', default=468.60, type=float) 21 | parser.add_argument('--fy', default=468.61, type=float) 22 | parser.add_argument('--cx', default=318.27, type=float) 23 | parser.add_argument('--cy', default=243.99, type=float) 24 | parser.add_argument('--width', default=640, type=int) 25 | parser.add_argument('--height', default=480, type=int) 26 | 27 | args = parser.parse_args() 28 | return vars(args) 29 | 30 | def main(args): 31 | 32 | IMAGE_PATH = os.path.join(args['colmap_path'], 'images') 33 | SPARSE_PATH = os.path.join(args['colmap_path'], 'sparse') 34 | DENSE_PATH = os.path.join(args['colmap_path'], 'dense') 35 | STEREO_PATH = os.path.join(DENSE_PATH, 'stereo') 36 | # setup colmap workspace 37 | if not os.path.exists(args['colmap_path']): 38 | os.makedirs(args['colmap_path']) 39 | if not os.path.exists(IMAGE_PATH): 40 | os.makedirs(IMAGE_PATH) 41 | if not os.path.exists(SPARSE_PATH): 42 | os.makedirs(SPARSE_PATH) 43 | if not os.path.exists(DENSE_PATH): 44 | os.makedirs(DENSE_PATH) 45 | if not os.path.exists(STEREO_PATH): 46 | os.makedirs(STEREO_PATH) 47 | 48 | # write camera file 49 | with open(os.path.join(SPARSE_PATH, 'cameras.txt'), 'w') as file: 50 | file.write('1 PINHOLE {} {} {} {} {} {}'.format(args['width'], args['height'], args['fx'], args['fy'], args['cx'], args['cy'])) 51 | 52 | # write points file 53 | with open(os.path.join(SPARSE_PATH, 'points3D.txt'), 'w') as file: 54 | pass 55 | 56 | # copy images as build images file 57 | 58 | # build dictionary timestamp -> path 59 | timestamp_mapping = {} 60 | with open(os.path.join(args['source_path'], '{}_pre_registereddata/rgb.txt'.format(args['sequence_id'])), 'r') as file: 61 | for line in file: 62 | 63 | # skip comments 64 | if line[0] == '#': 65 | continue 66 | 67 | line = line.rstrip() 68 | timestamp, file_path = line.split(' ') 69 | timestamp_mapping[float(timestamp)] = file_path.replace('\\', '/') 70 | 71 | # iterate through trajectory 72 | poses = {} 73 | 74 | with open(os.path.join(args['source_path'], '{}_Trajectory/groundtruth.txt'.format(args['sequence_id'])), 'r') as file: 75 | for line in file: 76 | # skip comments 77 | if line[0] == '#': 78 | continue 79 | 80 | # parse and reformat data 81 | line = line.rstrip() 82 | elem = line.split(' ') 83 | timestamp = float(elem[0]) 84 | 85 | # transform pose 86 | rotation = [float(e) for e in elem[4:]] 87 | rotation = Quaternion(rotation[-1], rotation[0], rotation[1], rotation[2]) 88 | rotation = rotation.rotation_matrix 89 | translation = [float(e) for e in elem[1:4]] 90 | 91 | extrinsics = np.eye(4) 92 | extrinsics[:3, :3] = rotation 93 | extrinsics[:3, 3] = translation 94 | 95 | # # invert for colmap 96 | extrinsics = np.linalg.inv(extrinsics) 97 | 98 | rotation = Quaternion(matrix=extrinsics[:3, :3]) 99 | rotation = [rotation.elements[0], rotation.elements[1], rotation.elements[2], rotation.elements[3]] 100 | translation = list(extrinsics[:3, 3]) 101 | 102 | pose = rotation + translation 103 | pose = [str(p) for p in pose] 104 | pose = " ".join(pose) 105 | 106 | # check correct length of pose 107 | assert len(pose.split(' ')) == 7 108 | 109 | poses[timestamp] = pose 110 | 111 | matches = associate(poses, timestamp_mapping, offset=0.0, max_difference=0.02) 112 | 113 | # write and copy images 114 | with open(os.path.join(SPARSE_PATH, 'images.txt'), 'w') as file, open(os.path.join(STEREO_PATH, 'patch-match.cfg'), 'w') as cfg: 115 | for i, (t_p, t_f) in enumerate(matches): 116 | 117 | # get data 118 | try: 119 | pose = poses[t_p] 120 | file_path = timestamp_mapping[t_f] 121 | except KeyError: 122 | continue 123 | 124 | image_line = '{} '.format(i + 1) + pose + ' {} '.format(1) + file_path.replace('rgb/', '') + '\n' + '\n' 125 | file.write(image_line) 126 | 127 | source_image = os.path.join(args['source_path'], '{}_pre_registereddata'.format(args['sequence_id']), file_path) 128 | target_image = os.path.join(IMAGE_PATH, file_path.replace('rgb/', '')) 129 | os.system('cp -p {} {}'.format(source_image, target_image)) 130 | 131 | # write patch match config file 132 | cfg.write(file_path.replace('rgb/', '') + '\n') 133 | 134 | # get source images 135 | start_indx = max(0, i - 10) 136 | end_indx = min(len(matches), i + 10) 137 | source_images = [] 138 | for j in range(start_indx, end_indx): 139 | if j == i: 140 | continue 141 | source_images.append(timestamp_mapping[matches[j][1]].replace('rgb/', '')) 142 | 143 | # source_images = ", ".join(source_images) 144 | # cfg.write('{}\n'.format(source_images)) 145 | cfg.write('__auto__, 20\n') 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | args = arg_parse() 151 | main(args) -------------------------------------------------------------------------------- /data/save_every_tenth_frame.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | scene = 'lounge' 4 | 5 | data_path = '/home/esandstroem/scratch-second/euler_work/data/scene3d' + '/' + scene 6 | 7 | remove_list = [] 8 | 9 | image_list = sorted(os.listdir(data_path + '/images')) 10 | tof_list = sorted(os.listdir(data_path + '/' + scene + '_png/depth')) 11 | 12 | for k, frame in enumerate(sorted(os.listdir(data_path + '/images'))): 13 | if k % 10 != 0: 14 | remove_list.append(data_path + '/images/' + image_list[k]) 15 | remove_list.append(data_path + '/' + scene + '_png/depth/' + tof_list[k]) 16 | 17 | 18 | for path in remove_list: 19 | os.system('rm ' + path) 20 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .replica import Replica 2 | from .corbs import CoRBS 3 | from .scene3d import Scene3D 4 | -------------------------------------------------------------------------------- /dataset/associate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Software License Agreement (BSD License) 3 | # 4 | # Copyright (c) 2013, Juergen Sturm, TUM 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above 14 | # copyright notice, this list of conditions and the following 15 | # disclaimer in the documentation and/or other materials provided 16 | # with the distribution. 17 | # * Neither the name of TUM nor the names of its 18 | # contributors may be used to endorse or promote products derived 19 | # from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | # POSSIBILITY OF SUCH DAMAGE. 33 | # 34 | # Requirements: 35 | # sudo apt-get install python-argparse 36 | 37 | """ 38 | The Kinect provides the color and depth images in an un-synchronized way. This means that the set of time stamps from the color images do not intersect with those of the depth images. Therefore, we need some way of associating color images to depth images. 39 | For this purpose, you can use the ''associate.py'' script. It reads the time stamps from the rgb.txt file and the depth.txt file, and joins them by finding the best matches. 40 | """ 41 | 42 | import argparse 43 | 44 | 45 | def read_file_list(filename): 46 | """ 47 | Reads a trajectory from a text file. 48 | 49 | File format: 50 | The file format is "stamp d1 d2 d3 ...", where stamp denotes the time stamp (to be matched) 51 | and "d1 d2 d3.." is arbitary data (e.g., a 3D position and 3D orientation) associated to this timestamp. 52 | 53 | Input: 54 | filename -- File name 55 | 56 | Output: 57 | dict -- dictionary of (stamp,data) tuples 58 | 59 | """ 60 | file = open(filename) 61 | data = file.read() 62 | lines = data.replace(",", " ").replace("\t", " ").split("\n") 63 | list = [ 64 | [v.strip() for v in line.split(" ") if v.strip() != ""] 65 | for line in lines 66 | if len(line) > 0 and line[0] != "#" 67 | ] 68 | list = [(float(l[0]), l[1:]) for l in list if len(l) > 1] 69 | return dict(list) 70 | 71 | 72 | def associate(first_list, second_list, offset, max_difference): 73 | """ 74 | Associate two dictionaries of (stamp,data). As the time stamps never match exactly, we aim 75 | to find the closest match for every input tuple. 76 | 77 | Input: 78 | first_list -- first dictionary of (stamp,data) tuples 79 | second_list -- second dictionary of (stamp,data) tuples 80 | offset -- time offset between both dictionaries (e.g., to model the delay between the sensors) 81 | max_difference -- search radius for candidate generation 82 | Output: 83 | matches -- list of matched tuples ((stamp1,data1),(stamp2,data2)) 84 | 85 | """ 86 | first_keys = list(first_list.keys()) 87 | second_keys = list(second_list.keys()) 88 | potential_matches = [ 89 | (abs(a - (b + offset)), a, b) 90 | for a in first_keys 91 | for b in second_keys 92 | if abs(a - (b + offset)) < max_difference 93 | ] 94 | potential_matches.sort() 95 | matches = [] 96 | for diff, a, b in potential_matches: 97 | if a in first_keys and b in second_keys: 98 | first_keys.remove(a) 99 | second_keys.remove(b) 100 | matches.append((a, b)) 101 | 102 | matches.sort() 103 | return matches 104 | 105 | 106 | if __name__ == "__main__": 107 | 108 | # parse command line 109 | parser = argparse.ArgumentParser( 110 | description=""" 111 | This script takes two data files with timestamps and associates them 112 | """ 113 | ) 114 | parser.add_argument("first_file", help="first text file (format: timestamp data)") 115 | parser.add_argument("second_file", help="second text file (format: timestamp data)") 116 | parser.add_argument( 117 | "--first_only", 118 | help="only output associated lines from first file", 119 | action="store_true", 120 | ) 121 | parser.add_argument( 122 | "--offset", 123 | help="time offset added to the timestamps of the second file (default: 0.0)", 124 | default=0.0, 125 | ) 126 | parser.add_argument( 127 | "--max_difference", 128 | help="maximally allowed time difference for matching entries (default: 0.02)", 129 | default=0.02, 130 | ) 131 | args = parser.parse_args() 132 | 133 | first_list = read_file_list(args.first_file) 134 | second_list = read_file_list(args.second_file) 135 | 136 | matches = associate( 137 | first_list, second_list, float(args.offset), float(args.max_difference) 138 | ) 139 | 140 | if args.first_only: 141 | for a, b in matches: 142 | print("%f %s" % (a, " ".join(first_list[a]))) 143 | else: 144 | for a, b in matches: 145 | print( 146 | "%f %s %f %s" 147 | % ( 148 | a, 149 | " ".join(first_list[a]), 150 | b - float(args.offset), 151 | " ".join(second_list[b]), 152 | ) 153 | ) 154 | -------------------------------------------------------------------------------- /dataset/colmap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill. 2 | # All rights reserved. 3 | 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | 10 | # * Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 15 | # its contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | # POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) 31 | 32 | import numpy as np 33 | import struct 34 | import collections 35 | 36 | 37 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 38 | """Read and unpack the next bytes from a binary file. 39 | :param fid: 40 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 41 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 42 | :param endian_character: Any of {@, =, <, >, !} 43 | :return: Tuple of read and unpacked values. 44 | """ 45 | data = fid.read(num_bytes) 46 | return struct.unpack(endian_character + format_char_sequence, data) 47 | 48 | 49 | def read_cameras_binary(path_to_model_file): 50 | """ 51 | see: src/base/reconstruction.cc 52 | void Reconstruction::WriteCamerasBinary(const std::string& path) 53 | void Reconstruction::ReadCamerasBinary(const std::string& path) 54 | """ 55 | cameras = {} 56 | with open(path_to_model_file, "rb") as fid: 57 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 58 | for camera_line_index in range(num_cameras): 59 | camera_properties = read_next_bytes( 60 | fid, num_bytes=24, format_char_sequence="iiQQ" 61 | ) 62 | camera_id = camera_properties[0] 63 | model_id = camera_properties[1] # not used 64 | width = camera_properties[2] # not used 65 | height = camera_properties[3] # not used 66 | num_params = 4 67 | params = read_next_bytes( 68 | fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params 69 | ) 70 | 71 | cameras[camera_id] = params 72 | 73 | return cameras 74 | 75 | 76 | def read_array(path): 77 | with open(path, "rb") as fid: 78 | width, height, channels = np.genfromtxt( 79 | fid, delimiter="&", max_rows=1, usecols=(0, 1, 2), dtype=int 80 | ) 81 | fid.seek(0) 82 | num_delimiter = 0 83 | byte = fid.read(1) 84 | while True: 85 | if byte == b"&": 86 | num_delimiter += 1 87 | if num_delimiter >= 3: 88 | break 89 | byte = fid.read(1) 90 | array = np.fromfile(fid, np.float32) 91 | 92 | array = array.reshape((width, height, channels), order="F") 93 | return np.transpose(array, (1, 0, 2)).squeeze() 94 | 95 | 96 | BaseImage = collections.namedtuple( 97 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"] 98 | ) 99 | Point3D = collections.namedtuple( 100 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"] 101 | ) 102 | 103 | 104 | class Image(BaseImage): 105 | def qvec2rotmat(self): 106 | return qvec2rotmat(self.qvec) 107 | 108 | 109 | def qvec2rotmat(qvec): 110 | return np.array( 111 | [ 112 | [ 113 | 1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2, 114 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 115 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2], 116 | ], 117 | [ 118 | 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 119 | 1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2, 120 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1], 121 | ], 122 | [ 123 | 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 124 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 125 | 1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2, 126 | ], 127 | ] 128 | ) 129 | 130 | 131 | def read_images_binary(path_to_model_file): 132 | """ 133 | see: src/base/reconstruction.cc 134 | void Reconstruction::ReadImagesBinary(const std::string& path) 135 | void Reconstruction::WriteImagesBinary(const std::string& path) 136 | """ 137 | images = {} 138 | with open(path_to_model_file, "rb") as fid: 139 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 140 | for image_index in range(num_reg_images): 141 | binary_image_properties = read_next_bytes( 142 | fid, num_bytes=64, format_char_sequence="idddddddi" 143 | ) 144 | image_id = binary_image_properties[0] 145 | qvec = np.array(binary_image_properties[1:5]) 146 | tvec = np.array(binary_image_properties[5:8]) 147 | camera_id = binary_image_properties[8] 148 | image_name = "" 149 | current_char = read_next_bytes(fid, 1, "c")[0] 150 | while current_char != b"\x00": # look for the ASCII 0 entry 151 | image_name += current_char.decode("utf-8") 152 | current_char = read_next_bytes(fid, 1, "c")[0] 153 | num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ 154 | 0 155 | ] 156 | x_y_id_s = read_next_bytes( 157 | fid, 158 | num_bytes=24 * num_points2D, 159 | format_char_sequence="ddq" * num_points2D, 160 | ) 161 | xys = np.column_stack( 162 | [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))] 163 | ) 164 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 165 | images[image_id] = Image( 166 | id=image_id, 167 | qvec=qvec, 168 | tvec=tvec, 169 | camera_id=camera_id, 170 | name=image_name, 171 | xys=xys, 172 | point3D_ids=point3D_ids, 173 | ) 174 | return images 175 | 176 | 177 | def read_images(path): 178 | 179 | images = {} 180 | 181 | with open(path, "r") as file: 182 | for i, line in enumerate(file): 183 | if i % 2 == 0: 184 | if line[0] == "#": 185 | continue 186 | 187 | elements = line.rstrip().split(" ") 188 | 189 | image_id = elements[0] 190 | 191 | qw = elements[1] 192 | qx = elements[2] 193 | qy = elements[3] 194 | qz = elements[4] 195 | 196 | tx = elements[5] 197 | ty = elements[6] 198 | tz = elements[7] 199 | 200 | camera_id = elements[8] 201 | 202 | name = elements[9] 203 | 204 | quaternion = np.asarray([float(qw), float(qx), float(qy), float(qz)]) 205 | translation = np.asarray([float(tx), float(ty), float(tz)]) 206 | 207 | images[str(image_id)] = {} 208 | images[image_id]["camera_id"] = camera_id 209 | images[image_id]["name"] = name 210 | images[image_id]["quaternion"] = quaternion 211 | images[image_id]["translation"] = translation 212 | 213 | return images 214 | 215 | 216 | def read_cameras(path): 217 | cameras = {} 218 | 219 | with open(path, "r") as file: 220 | for line in file: 221 | if line[0] == "#": 222 | continue 223 | 224 | # parse camera line 225 | elements = line.rstrip().split(" ") 226 | camera_id = elements[0] 227 | model = elements[1] 228 | width = float(elements[2]) 229 | height = float(elements[3]) 230 | fx = float(elements[4]) 231 | fy = float(elements[5]) 232 | px = float(elements[6]) 233 | py = float(elements[7]) 234 | 235 | # create camera entry 236 | cameras[camera_id] = {} 237 | cameras[camera_id]["model"] = model 238 | cameras[camera_id]["width"] = width 239 | cameras[camera_id]["height"] = height 240 | cameras[camera_id]["fx"] = fx 241 | cameras[camera_id]["fy"] = fy 242 | cameras[camera_id]["px"] = px 243 | cameras[camera_id]["py"] = py 244 | 245 | return cameras 246 | -------------------------------------------------------------------------------- /dataset/corbs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sys 4 | import numpy as np 5 | 6 | from skimage import io 7 | from torch.utils.data import Dataset 8 | 9 | import h5py 10 | import matplotlib.pyplot as plt 11 | 12 | # uncomment to run train_fusion and test_fusion 13 | from dataset.associate import associate 14 | from dataset.colmap import read_array 15 | 16 | 17 | from pyquaternion import Quaternion 18 | 19 | 20 | class CoRBS(Dataset): 21 | 22 | # NOTE: For now, the dataset class can only load one scene at a time 23 | def __init__(self, config_data): 24 | super(CoRBS, self).__init__() 25 | 26 | self.root_dir = os.getenv( 27 | config_data.root_dir 28 | ) # when training on local scratch 29 | 30 | # os.getenv returns none when the input does not exist. When 31 | # it returns none, we want to train on the work folder 32 | if not self.root_dir: 33 | self.root_dir = config_data.root_dir 34 | 35 | self.resolution_stereo = (config_data.resy_stereo, config_data.resx_stereo) 36 | 37 | self.resolution_tof = (config_data.resy_tof, config_data.resx_tof) 38 | 39 | self.mask_stereo_width = config_data.mask_stereo_width 40 | self.mask_stereo_height = config_data.mask_stereo_height 41 | self.mask_tof_width = config_data.mask_tof_width 42 | self.mask_tof_height = config_data.mask_tof_height 43 | 44 | self.min_depth_stereo = config_data.min_depth_stereo 45 | self.max_depth_stereo = config_data.max_depth_stereo 46 | self.min_depth_tof = config_data.min_depth_tof 47 | self.max_depth_tof = config_data.max_depth_tof 48 | 49 | self.transform = config_data.transform 50 | self.pad = config_data.pad 51 | 52 | self.scene_list = config_data.scene_list 53 | self.input = config_data.input 54 | self.target = config_data.target 55 | self.mode = config_data.mode 56 | 57 | self._scenes = [] 58 | 59 | self.__init_dataset() 60 | 61 | def __init_dataset(self): 62 | 63 | # read paths to data from scene list file 64 | with open(os.path.join(self.root_dir, self.scene_list), "r") as file: 65 | for ( 66 | line 67 | ) in ( 68 | file 69 | ): # only contains one line now since we only load one scene at a time 70 | line = line.split(" ") 71 | self._scenes.append( 72 | line[0].split("/")[0] 73 | ) # change this into append when we use more scenes 74 | trajectory_file = os.path.join( 75 | self.root_dir, line[4][:-1] 76 | ) # make this into a directory when we use more scenes 77 | rgb_file = os.path.join(self.root_dir, line[2]) 78 | depth_file = os.path.join(self.root_dir, line[3]) 79 | self.stereo_path = os.path.join(self.root_dir, line[0]) 80 | self.tof_path = os.path.join(self.root_dir, line[1]) 81 | self.rgb_path = os.path.join(self.root_dir, line[1]) 82 | 83 | # read all files for pose, rgb, and depth 84 | self.poses = {} 85 | with open(trajectory_file, "r") as file: 86 | for line in file: 87 | # skip comment lines 88 | if line[0] == "#": 89 | continue 90 | elems = line.rstrip().split(" ") 91 | timestamp = float(elems[0]) 92 | pose = [float(e) for e in elems[1:]] 93 | self.poses[timestamp] = pose 94 | 95 | self.rgb_frames = {} 96 | with open(rgb_file, "r") as file: 97 | for line in file: 98 | # skip comment lines 99 | if line[0] == "#": 100 | continue 101 | timestamp, file_path = line.rstrip().split(" ") 102 | timestamp = float(timestamp) 103 | self.rgb_frames[timestamp] = file_path 104 | 105 | self.depth_frames = {} 106 | with open(depth_file, "r") as file: 107 | for line in file: 108 | # skip comment lines 109 | if line[0] == "#": 110 | continue 111 | timestamp, file_path = line.rstrip().split(" ") 112 | timestamp = float(timestamp) 113 | self.depth_frames[timestamp] = file_path 114 | 115 | # match pose to rgb timestamp 116 | rgb_matches = associate( 117 | self.poses, self.rgb_frames, offset=0.0, max_difference=0.02 118 | ) 119 | # build mapping databases to get matches from pose timestamp to frame timestamp 120 | self.pose_to_rgb = {t_p: t_r for (t_p, t_r) in rgb_matches} 121 | 122 | # match poses that are matched with rgb to a corresponding depth timestamp 123 | depth_matches = associate( 124 | self.pose_to_rgb, self.depth_frames, offset=0.0, max_difference=0.02 125 | ) 126 | # build mapping databases to get matches from pose timestamp to frame timestamp 127 | self.pose_to_depth = {t_p: t_d for (t_p, t_d) in depth_matches} 128 | self.poses_matched = {t_p: self.poses[t_p] for (t_p, t_r) in rgb_matches} 129 | 130 | @property 131 | def scenes(self): 132 | return self._scenes 133 | 134 | def __len__(self): 135 | return len(self.poses_matched) 136 | 137 | def __getitem__(self, item): 138 | 139 | sample = dict() 140 | sample["item_id"] = item 141 | 142 | timestamp_pose = list(self.poses_matched.keys())[item] 143 | timestamp_rgb = self.pose_to_rgb[timestamp_pose] 144 | timestamp_depth = self.pose_to_depth[timestamp_pose] 145 | 146 | # read RGB frame 147 | rgb_file = os.path.join( 148 | self.rgb_path, self.rgb_frames[timestamp_rgb].replace("\\", "/") 149 | ) 150 | rgb_image = io.imread(rgb_file).astype(np.float32) 151 | 152 | step_x = rgb_image.shape[0] / self.resolution_tof[0] 153 | step_y = rgb_image.shape[1] / self.resolution_tof[1] 154 | 155 | index_y = [int(step_y * i) for i in range(0, int(rgb_image.shape[1] / step_y))] 156 | index_x = [int(step_x * i) for i in range(0, int(rgb_image.shape[0] / step_x))] 157 | 158 | rgb_image = rgb_image[:, index_y] 159 | rgb_image = rgb_image[index_x, :] 160 | sample["image"] = np.asarray(rgb_image) / 255 161 | 162 | frame_id = "{}/{}".format(self._scenes[0], str(timestamp_pose)) 163 | sample["frame_id"] = frame_id 164 | 165 | # read kinect depth file 166 | depth_file = os.path.join( 167 | self.tof_path, self.depth_frames[timestamp_depth].replace("\\", "/") 168 | ) 169 | depth_tof = io.imread(depth_file).astype(np.float32) 170 | depth_tof /= 5000.0 171 | 172 | step_x = depth_tof.shape[0] / self.resolution_tof[0] 173 | step_y = depth_tof.shape[1] / self.resolution_tof[1] 174 | 175 | index_y = [int(step_y * i) for i in range(0, int(depth_tof.shape[1] / step_y))] 176 | index_x = [int(step_x * i) for i in range(0, int(depth_tof.shape[0] / step_x))] 177 | 178 | depth_tof = depth_tof[:, index_y] 179 | depth_tof = depth_tof[index_x, :] 180 | sample["tof_depth"] = np.asarray(depth_tof) 181 | 182 | # read colmap stereo depth file 183 | try: 184 | stereo_file = os.path.join( 185 | self.stereo_path, 186 | self.rgb_frames[timestamp_rgb].replace("rgb\\", "") + ".geometric.bin", 187 | ) 188 | depth_stereo = read_array(stereo_file) 189 | except FileNotFoundError: 190 | print("stereo frame not found") 191 | return None 192 | 193 | step_x = depth_stereo.shape[0] / self.resolution_stereo[0] 194 | step_y = depth_stereo.shape[1] / self.resolution_stereo[1] 195 | 196 | index_y = [ 197 | int(step_y * i) for i in range(0, int(depth_stereo.shape[1] / step_y)) 198 | ] 199 | index_x = [ 200 | int(step_x * i) for i in range(0, int(depth_stereo.shape[0] / step_x)) 201 | ] 202 | 203 | depth_stereo = depth_stereo[:, index_y] 204 | depth_stereo = depth_stereo[index_x, :] 205 | sample["stereo_depth"] = np.asarray(depth_stereo) 206 | 207 | # define mask 208 | mask = depth_stereo > self.min_depth_stereo 209 | mask = np.logical_and(mask, depth_stereo < self.max_depth_stereo) 210 | 211 | # do not integrate depth values close to the image boundary 212 | mask[0 : self.mask_stereo_height, :] = 0 213 | mask[-self.mask_stereo_height : -1, :] = 0 214 | mask[:, 0 : self.mask_stereo_width] = 0 215 | mask[:, -self.mask_stereo_width : -1] = 0 216 | sample["stereo_mask"] = mask 217 | 218 | mask = depth_tof > self.min_depth_tof 219 | mask = np.logical_and(mask, depth_tof < self.max_depth_tof) 220 | 221 | # do not integrate depth values close to the image boundary 222 | mask[0 : self.mask_tof_height, :] = 0 223 | mask[-self.mask_tof_height : -1, :] = 0 224 | mask[:, 0 : self.mask_tof_width] = 0 225 | mask[:, -self.mask_tof_width : -1] = 0 226 | sample["tof_mask"] = mask 227 | 228 | # load extrinsics 229 | rotation = self.poses_matched[timestamp_pose][3:] 230 | rotation = Quaternion(rotation[-1], rotation[0], rotation[1], rotation[2]) 231 | rotation = rotation.rotation_matrix 232 | translation = self.poses_matched[timestamp_pose][:3] 233 | 234 | extrinsics = np.eye(4) 235 | extrinsics[:3, :3] = rotation 236 | extrinsics[:3, 3] = translation 237 | sample["extrinsics"] = extrinsics 238 | 239 | # load intrinsics 240 | intrinsics_stereo = np.asarray( 241 | [ 242 | [ 243 | 468.60 * self.resolution_stereo[1] / 640, 244 | 0.0, 245 | 318.27 * self.resolution_stereo[1] / 640, 246 | ], 247 | [ 248 | 0.0, 249 | 468.61 * self.resolution_stereo[0] / 480, 250 | 243.99 * self.resolution_stereo[0] / 480, 251 | ], 252 | [0.0, 0.0, 1.0], 253 | ] 254 | ) 255 | 256 | sample["intrinsics_stereo"] = intrinsics_stereo 257 | 258 | intrinsics_tof = np.asarray( 259 | [ 260 | [ 261 | 468.60 * self.resolution_tof[1] / 640, 262 | 0.0, 263 | 318.27 * self.resolution_tof[1] / 640, 264 | ], 265 | [ 266 | 0.0, 267 | 468.61 * self.resolution_tof[0] / 480, 268 | 243.99 * self.resolution_tof[0] / 480, 269 | ], 270 | [0.0, 0.0, 1.0], 271 | ] 272 | ) 273 | 274 | sample["intrinsics_tof"] = intrinsics_tof 275 | 276 | # convert key image ndarray to compatible pytorch tensor shape. The function also converts the ndarrays to tensors, but this is not necessary as the pytorch dataloader does this anyway in a step later. 277 | if self.transform: 278 | sample = self.transform(sample) 279 | 280 | return sample 281 | 282 | def get_grid(self, scene, truncation): 283 | file = os.path.join(self.root_dir, scene, "sdf_" + scene + ".hdf") 284 | 285 | # read from hdf file! 286 | f = h5py.File(file, "r") 287 | voxels = np.array(f["sdf"]).astype(np.float16) 288 | 289 | voxels[voxels > truncation] = truncation 290 | voxels[voxels < -truncation] = -truncation 291 | # Add padding to grid to give more room to fusion net 292 | voxels = np.pad(voxels, self.pad, "constant", constant_values=-truncation) 293 | 294 | print(scene, voxels.shape) 295 | bbox = np.zeros((3, 2)) 296 | bbox[:, 0] = f.attrs["bbox"][:, 0] - self.pad * f.attrs["voxel_size"] * np.ones( 297 | (1, 1, 1) 298 | ) 299 | bbox[:, 1] = bbox[:, 0] + f.attrs["voxel_size"] * np.array(voxels.shape) 300 | 301 | return voxels, bbox, f.attrs["voxel_size"] 302 | -------------------------------------------------------------------------------- /dataset/scene3d.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | import numpy as np 5 | import re 6 | 7 | from skimage import io 8 | from skimage.color import rgb2gray 9 | from skimage import filters 10 | from torch.utils.data import Dataset 11 | 12 | # used for debugging 13 | # import matplotlib.pyplot as plt 14 | from dataset.colmap import read_array 15 | import itertools 16 | 17 | import h5py 18 | 19 | 20 | class Scene3D(Dataset): 21 | def __init__(self, config_data): 22 | self.root_dir = os.getenv(config_data.root_dir) 23 | if self.root_dir: 24 | self.root_dir += "/cluster/work/cvl/esandstroem/data/scene3D" # when training on local scratch 25 | # os.getenv returns none when the input does not exist. When 26 | # it returns none, we want to train on the work folder 27 | else: 28 | self.root_dir = config_data.root_dir 29 | 30 | self.resolution_stereo = (config_data.resy_stereo, config_data.resx_stereo) 31 | 32 | self.resolution_tof = (config_data.resy_tof, config_data.resx_tof) 33 | 34 | self.resolution = (config_data.resy, config_data.resx) 35 | 36 | self.mask_stereo_width = config_data.mask_stereo_width 37 | self.mask_stereo_height = config_data.mask_stereo_height 38 | self.mask_tof_width = config_data.mask_tof_width 39 | self.mask_tof_height = config_data.mask_tof_height 40 | self.mask_height = config_data.mask_height 41 | self.mask_width = config_data.mask_width 42 | 43 | self.min_depth_stereo = config_data.min_depth_stereo 44 | self.max_depth_stereo = config_data.max_depth_stereo 45 | self.min_depth_tof = config_data.min_depth_tof 46 | self.max_depth_tof = config_data.max_depth_tof 47 | self.min_depth = config_data.min_depth 48 | self.max_depth = config_data.max_depth 49 | 50 | self.transform = config_data.transform 51 | self.pad = config_data.pad 52 | 53 | self.scene_list = config_data.scene_list 54 | self.input = config_data.input 55 | self.target = config_data.target 56 | self.mode = config_data.mode 57 | 58 | self._scenes = [] 59 | 60 | self.sensor_line_mapping = { 61 | "rgb": 0, 62 | "camera_matrix": -1, 63 | "tof": 1, 64 | "tof_2": 1, 65 | "stereo": 2, 66 | } 67 | 68 | self._load_color() 69 | self._load_cameras() 70 | self._load_depths() 71 | 72 | def _load_depths(self): # loads the paths of the noisy depth images to a list 73 | 74 | # reading files from list 75 | self.depth_images = dict() 76 | for sensor_ in self.input: # initialize empty lists 77 | self.depth_images[sensor_] = [] 78 | 79 | with open(os.path.join(self.root_dir, self.scene_list), "r") as scene_list: 80 | for line in scene_list: 81 | if len(line) > 1: # avoid parsing empty line only containing \n 82 | line = line.split(" ") 83 | for sensor_ in self.input: 84 | if sensor_ == "tof": 85 | files = glob.glob( 86 | os.path.join( 87 | self.root_dir, 88 | line[self.sensor_line_mapping[sensor_]], 89 | "*.png", 90 | ) 91 | ) 92 | elif sensor_ == "stereo": 93 | files = glob.glob( 94 | os.path.join( 95 | self.root_dir, 96 | line[self.sensor_line_mapping[sensor_]], 97 | "*.geometric.bin", 98 | ) 99 | ) 100 | for file in files: 101 | self.depth_images[sensor_].append(file) 102 | 103 | for sensor_ in self.depth_images.keys(): 104 | self.depth_images[sensor_] = sorted( 105 | self.depth_images[sensor_], 106 | key=lambda x: os.path.splitext(x.split("/")[-1])[0], 107 | ) 108 | 109 | def _load_color(self): 110 | self.color_images = [] 111 | 112 | # reading files from list 113 | with open(os.path.join(self.root_dir, self.scene_list), "r") as file: 114 | for line in file: 115 | if len(line) > 1: # avoid parsing empty line only containing \n 116 | line = line.split(" ") 117 | self._scenes.append(line[0].split("/")[0]) 118 | files = glob.glob( 119 | os.path.join( 120 | self.root_dir, 121 | line[self.sensor_line_mapping["rgb"]], 122 | "*.png", 123 | ) 124 | ) 125 | for file in files: 126 | self.color_images.append(file) 127 | 128 | self.color_images = sorted( 129 | self.color_images, key=lambda x: os.path.splitext(x.split("/")[-1])[0] 130 | ) 131 | 132 | def _load_cameras(self): 133 | def grouper_it(n, iterable): 134 | it = iter(iterable) 135 | while True: 136 | chunk_it = itertools.islice(it, n) 137 | try: 138 | first_el = next(chunk_it) 139 | except StopIteration: 140 | return 141 | yield itertools.chain((first_el,), chunk_it) 142 | 143 | self.cameras = dict() 144 | 145 | with open(os.path.join(self.root_dir, self.scene_list), "r") as file: 146 | for line in file: 147 | line = line.split(" ") 148 | if len(line) > 1: # avoid parsing empty line only containing \n 149 | with open( 150 | os.path.join(self.root_dir, line[-1][:-1]), "r" 151 | ) as traj_file: 152 | chunk_iterable = grouper_it(5, traj_file) 153 | for frame in chunk_iterable: 154 | frame_id = next(frame)[:-1] 155 | frame_id = re.split(r"\t+", frame_id.rstrip("\t"))[-1] 156 | first = np.fromstring( 157 | next(frame), count=4, sep=" ", dtype=float 158 | ) 159 | second = np.fromstring( 160 | next(frame), count=4, sep=" ", dtype=float 161 | ) 162 | third = np.fromstring( 163 | next(frame), count=4, sep=" ", dtype=float 164 | ) 165 | fourth = np.fromstring( 166 | next(frame), count=4, sep=" ", dtype=float 167 | ) 168 | 169 | extrinsics = np.zeros((4, 4)) 170 | extrinsics[0, :] = first 171 | extrinsics[1, :] = second 172 | extrinsics[2, :] = third 173 | extrinsics[3, :] = fourth 174 | 175 | self.cameras[ 176 | line[0].split("/")[0] + "/" + frame_id 177 | ] = extrinsics 178 | 179 | @property 180 | def scenes(self): 181 | return self._scenes 182 | 183 | def __len__(self): 184 | return len(self.color_images) 185 | 186 | def __getitem__(self, item): 187 | 188 | sample = dict() 189 | sample["item_id"] = item 190 | 191 | # load rgb image 192 | file = self.color_images[item] 193 | pathsplit = file.split("/") 194 | scene = pathsplit[-3] 195 | frame = os.path.splitext(pathsplit[-1])[0] 196 | 197 | frame_id = "{}/{}".format(scene, frame) 198 | 199 | image = io.imread(file) 200 | 201 | step_x = image.shape[0] / self.resolution[0] 202 | step_y = image.shape[1] / self.resolution[0] 203 | 204 | index_y = [int(step_y * i) for i in range(0, int(image.shape[1] / step_y))] 205 | index_x = [int(step_x * i) for i in range(0, int(image.shape[0] / step_x))] 206 | 207 | image = image[:, index_y] 208 | image = image[index_x, :] 209 | sample["image"] = np.asarray(image).astype(np.float32) / 255 210 | 211 | intensity = rgb2gray(image) # seems to be in range 0 - 1 212 | sample["intensity"] = np.asarray(intensity).astype(np.float32) 213 | grad_y = filters.sobel_h(intensity) 214 | grad_x = filters.sobel_v(intensity) 215 | grad = (grad_x ** 2 + grad_y ** 2) ** (1 / 2) 216 | sample["gradient"] = np.asarray(grad).astype(np.float32) 217 | 218 | # load noisy depth maps 219 | for sensor_ in self.input: 220 | file = self.depth_images[sensor_][item] 221 | if sensor_ == "tof": 222 | depth = io.imread(file).astype(np.float32) 223 | depth /= 1000.0 224 | elif sensor_ == "stereo": 225 | depth = read_array(file) 226 | 227 | try: 228 | step_x = depth.shape[0] / eval("self.resolution_" + sensor_ + "[0]") 229 | step_y = depth.shape[1] / eval("self.resolution_" + sensor_ + "[1]") 230 | except AttributeError: # default values used in case sensor specific parameters do not exist 231 | step_x = depth.shape[0] / self.resolution[0] 232 | step_y = depth.shape[1] / self.resolution[1] 233 | 234 | index_y = [int(step_y * i) for i in range(0, int(depth.shape[1] / step_y))] 235 | index_x = [int(step_x * i) for i in range(0, int(depth.shape[0] / step_x))] 236 | 237 | depth = depth[:, index_y] 238 | depth = depth[index_x, :] 239 | 240 | sample[sensor_ + "_depth"] = np.asarray(depth) 241 | 242 | # plt.imsave('left' +frame +'.png', sample['image']) 243 | # plt.imsave(sensor_ + '_depth' +frame +'.png', sample[sensor_ + '_depth']) 244 | 245 | # define mask 246 | try: 247 | mask = depth > eval("self.min_depth_" + sensor_) 248 | mask = np.logical_and(mask, depth < eval("self.max_depth_" + sensor_)) 249 | 250 | # do not integrate depth values close to the image boundary 251 | mask[0 : eval("self.mask_" + sensor_ + "_height"), :] = 0 252 | mask[-eval("self.mask_" + sensor_ + "_height") : -1, :] = 0 253 | mask[:, 0 : eval("self.mask_" + sensor_ + "_width")] = 0 254 | mask[:, -eval("self.mask_" + sensor_ + "_width") : -1] = 0 255 | sample[sensor_ + "_mask"] = mask 256 | except AttributeError: 257 | mask = depth > self.min_depth 258 | mask = np.logical_and(mask, depth < self.max_depth) 259 | 260 | # do not integrate depth values close to the image boundary 261 | mask[0 : self.mask_height, :] = 0 262 | mask[-self.mask_height : -1, :] = 0 263 | mask[:, 0 : self.mask_width] = 0 264 | mask[:, -self.mask_width : -1] = 0 265 | sample[sensor_ + "_mask"] = mask 266 | 267 | # load extrinsics 268 | extrinsics = self.cameras[scene + "/" + str(int(frame))] 269 | 270 | sample["extrinsics"] = extrinsics 271 | 272 | intrinsics_tof = np.asarray( 273 | [ 274 | [ 275 | 525.0 * self.resolution_tof[1] / 640, 276 | 0.0, 277 | 319.5 * self.resolution_tof[1] / 640, 278 | ], 279 | [ 280 | 0.0, 281 | 525.0 * self.resolution_tof[0] / 480, 282 | 239.5 * self.resolution_tof[0] / 480, 283 | ], 284 | [0.0, 0.0, 1.0], 285 | ] 286 | ) 287 | 288 | sample["intrinsics_tof"] = intrinsics_tof 289 | 290 | sample["intrinsics_tof_2"] = intrinsics_tof 291 | 292 | intrinsics_stereo = np.asarray( 293 | [ 294 | [ 295 | 525.0 * self.resolution_stereo[1] / 640, 296 | 0.0, 297 | 319.5 * self.resolution_stereo[1] / 640, 298 | ], 299 | [ 300 | 0.0, 301 | 525.0 * self.resolution_stereo[0] / 480, 302 | 239.5 * self.resolution_stereo[0] / 480, 303 | ], 304 | [0.0, 0.0, 1.0], 305 | ] 306 | ) 307 | 308 | sample["intrinsics_stereo"] = intrinsics_stereo 309 | 310 | sample["frame_id"] = frame_id 311 | 312 | if self.transform: 313 | sample = self.transform(sample) 314 | 315 | return sample 316 | 317 | def get_grid(self, scene, truncation): 318 | file = os.path.join(self.root_dir, scene, "sdf_" + scene + ".hdf") 319 | 320 | # read from hdf file! 321 | f = h5py.File(file, "r") 322 | voxels = np.array(f["sdf"]).astype(np.float16) 323 | 324 | voxels[voxels > truncation] = truncation 325 | voxels[voxels < -truncation] = -truncation 326 | # Add padding to grid to give more room to fusion net 327 | voxels = np.pad(voxels, self.pad, "constant", constant_values=-truncation) 328 | 329 | print(scene, voxels.shape) 330 | bbox = np.zeros((3, 2)) 331 | bbox[:, 0] = f.attrs["bbox"][:, 0] - self.pad * f.attrs["voxel_size"] * np.ones( 332 | (1, 1, 1) 333 | ) 334 | bbox[:, 1] = bbox[:, 0] + f.attrs["voxel_size"] * np.array(voxels.shape) 335 | 336 | return voxels, bbox, f.attrs["voxel_size"] 337 | -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/images/architecture.png -------------------------------------------------------------------------------- /lists/corbs/desk.txt: -------------------------------------------------------------------------------- 1 | desk/colmap/dense/stereo/depth_maps desk/data/D1_pre_registereddata desk/data/D1_pre_registereddata/rgb.txt desk/data/D1_pre_registereddata/depth.txt desk/data/D1_Trajectory/groundtruth.txt 2 | -------------------------------------------------------------------------------- /lists/corbs/human.txt: -------------------------------------------------------------------------------- 1 | human/colmap/dense/stereo/depth_maps human/data/H1_pre_registereddata human/data/H1_pre_registereddata/rgb.txt human/data/H1_pre_registereddata/depth.txt human/data/H1_Trajectory/groundtruth.txt 2 | -------------------------------------------------------------------------------- /lists/replica/test_hotel_0.txt: -------------------------------------------------------------------------------- 1 | hotel_0/3/left_depth_gt hotel_0/3/left_depth_noise_5.0 hotel_0/3/left_psmnet_depth hotel_0/3/left_sgm_depth hotel_0/3/left_rgb hotel_0/3/left_camera_matrix 2 | -------------------------------------------------------------------------------- /lists/replica/test_office_0.txt: -------------------------------------------------------------------------------- 1 | office_0/1/left_depth_gt office_0/1/left_depth_noise_5.0 office_0/1/left_psmnet_depth office_0/1/left_sgm_depth office_0/1/left_rgb office_0/1/left_camera_matrix 2 | -------------------------------------------------------------------------------- /lists/replica/test_office_4.txt: -------------------------------------------------------------------------------- 1 | office_4/3/left_depth_gt office_4/3/left_depth_noise_5.0 office_4/3/left_psmnet_depth office_4/3/left_sgm_depth office_4/3/left_rgb office_4/3/left_camera_matrix 2 | -------------------------------------------------------------------------------- /lists/replica/test_office_4_hotel_0_office_0.txt: -------------------------------------------------------------------------------- 1 | hotel_0/3/left_depth_gt hotel_0/3/left_depth_noise_5.0 hotel_0/3/left_psmnet_depth hotel_0/3/left_sgm_depth hotel_0/3/left_rgb hotel_0/3/left_camera_matrix 2 | office_4/3/left_depth_gt office_4/3/left_depth_noise_5.0 office_4/3/left_psmnet_depth office_4/3/left_sgm_depth office_4/3/left_rgb office_4/3/left_camera_matrix 3 | office_0/1/left_depth_gt office_0/1/left_depth_noise_5.0 office_0/1/left_psmnet_depth office_0/1/left_sgm_depth office_0/1/left_rgb office_0/1/left_camera_matrix 4 | -------------------------------------------------------------------------------- /lists/replica/train.txt: -------------------------------------------------------------------------------- 1 | apartment_1/2/left_depth_gt apartment_1/2/left_depth_noise_5.0 apartment_1/2/left_psmnet_depth apartment_1/2/left_sgm_depth apartment_1/2/left_rgb apartment_1/2/left_camera_matrix 2 | apartment_1/1/left_depth_gt apartment_1/1/left_depth_noise_5.0 apartment_1/1/left_psmnet_depth apartment_1/1/left_sgm_depth apartment_1/1/left_rgb apartment_1/1/left_camera_matrix 3 | apartment_1/3/left_depth_gt apartment_1/3/left_depth_noise_5.0 apartment_1/3/left_psmnet_depth apartment_1/3/left_sgm_depth apartment_1/3/left_rgb apartment_1/3/left_camera_matrix 4 | frl_apartment_0/2/left_depth_gt frl_apartment_0/2/left_depth_noise_5.0 frl_apartment_0/2/left_psmnet_depth frl_apartment_0/2/left_sgm_depth frl_apartment_0/2/left_rgb frl_apartment_0/2/left_camera_matrix 5 | frl_apartment_0/1/left_depth_gt frl_apartment_0/1/left_depth_noise_5.0 frl_apartment_0/1/left_psmnet_depth frl_apartment_0/1/left_sgm_depth frl_apartment_0/1/left_rgb frl_apartment_0/1/left_camera_matrix 6 | frl_apartment_0/3/left_depth_gt frl_apartment_0/3/left_depth_noise_5.0 frl_apartment_0/3/left_psmnet_depth frl_apartment_0/3/left_sgm_depth frl_apartment_0/3/left_rgb frl_apartment_0/3/left_camera_matrix 7 | office_1/2/left_depth_gt office_1/2/left_depth_noise_5.0 office_1/2/left_psmnet_depth office_1/2/left_sgm_depth office_1/2/left_rgb office_1/2/left_camera_matrix 8 | office_1/1/left_depth_gt office_1/1/left_depth_noise_5.0 office_1/1/left_psmnet_depth office_1/1/left_sgm_depth office_1/1/left_rgb office_1/1/left_camera_matrix 9 | office_1/3/left_depth_gt office_1/3/left_depth_noise_5.0 office_1/3/left_psmnet_depth office_1/3/left_sgm_depth office_1/3/left_rgb office_1/3/left_camera_matrix 10 | room_2/2/left_depth_gt room_2/2/left_depth_noise_5.0 room_2/2/left_psmnet_depth room_2/2/left_sgm_depth room_2/2/left_rgb room_2/2/left_camera_matrix 11 | room_2/1/left_depth_gt room_2/1/left_depth_noise_5.0 room_2/1/left_psmnet_depth room_2/1/left_sgm_depth room_2/1/left_rgb room_2/1/left_camera_matrix 12 | room_2/3/left_depth_gt room_2/3/left_depth_noise_5.0 room_2/3/left_psmnet_depth room_2/3/left_sgm_depth room_2/3/left_rgb room_2/3/left_camera_matrix 13 | office_3/2/left_depth_gt office_3/2/left_depth_noise_5.0 office_3/2/left_psmnet_depth office_3/2/left_sgm_depth office_3/2/left_rgb office_3/2/left_camera_matrix 14 | office_3/1/left_depth_gt office_3/1/left_depth_noise_5.0 office_3/1/left_psmnet_depth office_3/1/left_sgm_depth office_3/1/left_rgb office_3/1/left_camera_matrix 15 | office_3/3/left_depth_gt office_3/3/left_depth_noise_5.0 office_3/3/left_psmnet_depth office_3/3/left_sgm_depth office_3/3/left_rgb office_3/3/left_camera_matrix 16 | room_0/2/left_depth_gt room_0/2/left_depth_noise_5.0 room_0/2/left_psmnet_depth room_0/2/left_sgm_depth room_0/2/left_rgb room_0/2/left_camera_matrix 17 | room_0/1/left_depth_gt room_0/1/left_depth_noise_5.0 room_0/1/left_psmnet_depth room_0/1/left_sgm_depth room_0/1/left_rgb room_0/1/left_camera_matrix 18 | room_0/3/left_depth_gt room_0/3/left_depth_noise_5.0 room_0/3/left_psmnet_depth room_0/3/left_sgm_depth room_0/3/left_rgb room_0/3/left_camera_matrix 19 | -------------------------------------------------------------------------------- /lists/replica/val.txt: -------------------------------------------------------------------------------- 1 | frl_apartment_1/1/left_depth_gt frl_apartment_1/1/left_depth_noise_5.0 frl_apartment_1/1/left_psmnet_depth frl_apartment_1/1/left_sgm_depth frl_apartment_1/1/left_rgb frl_apartment_1/1/left_camera_matrix 2 | -------------------------------------------------------------------------------- /lists/scene3d/copyroom.txt: -------------------------------------------------------------------------------- 1 | copyroom/images copyroom/copyroom_png/depth copyroom/dense/stereo/depth_maps copyroom/copyroom_trajectory.log 2 | 3 | -------------------------------------------------------------------------------- /lists/scene3d/stonewall.txt: -------------------------------------------------------------------------------- 1 | stonewall/images stonewall/stonewall_png/depth stonewall/dense/stereo/depth_maps stonewall/stonewall_trajectory.log 2 | -------------------------------------------------------------------------------- /models/fusion/sgm_psmnet/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/sgm_psmnet_routedfusion/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routedfusion/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/sgm_psmnet_routing/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routing/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/sgm_psmnet_routing_routedfusion/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/sgm_psmnet_routing_routedfusion/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_mvs_corbs/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_mvs_corbs/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_mvs_scene3d/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_mvs_scene3d/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_psmnet/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_psmnet_routedfusion/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routedfusion/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_psmnet_routing/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routing/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_psmnet_routing_routedfusion/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_psmnet_routing_routedfusion/model/best.pth.tar -------------------------------------------------------------------------------- /models/fusion/tof_tof_scene3d_collab_rec/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/fusion/tof_tof_scene3d_collab_rec/model/best.pth.tar -------------------------------------------------------------------------------- /models/routing/psmnet/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/psmnet/model/best.pth.tar -------------------------------------------------------------------------------- /models/routing/sgm/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/sgm/model/best.pth.tar -------------------------------------------------------------------------------- /models/routing/sgm_psmnet/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/sgm_psmnet/model/best.pth.tar -------------------------------------------------------------------------------- /models/routing/tof/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/tof/model/best.pth.tar -------------------------------------------------------------------------------- /models/routing/tof_psmnet/model/best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/models/routing/tof_psmnet/model/best.pth.tar -------------------------------------------------------------------------------- /modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/modules/__init__.py -------------------------------------------------------------------------------- /modules/database.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | from modules.voxelgrid import VoxelGrid, FeatureGrid 8 | 9 | from utils.metrics import evaluation 10 | 11 | 12 | class Database(Dataset): 13 | def __init__(self, dataset, config): 14 | 15 | super(Database, self).__init__() 16 | 17 | self.transform = config.transform 18 | self.initial_value = config.init_value 19 | self.trunc_value = config.trunc_value 20 | self.n_features = config.n_features # this includes the append_depth option 21 | self.sensors = config.input 22 | self.test_mode = config.test_mode 23 | self.alpha_supervision = config.alpha_supervision 24 | self.outlier_channel = config.outlier_channel 25 | 26 | self.scenes_gt = {} 27 | self.tsdf = {} 28 | self.fusion_weights = {} 29 | self.features = {} 30 | 31 | for sensor_ in config.input: 32 | self.tsdf[sensor_] = {} 33 | self.fusion_weights[sensor_] = {} 34 | self.features[sensor_] = {} 35 | 36 | self.filtered = {} # grid to store the fused sdf prediction 37 | if config.test_mode: 38 | self.sensor_weighting = {} 39 | 40 | if self.alpha_supervision: 41 | self.proxy_alpha = {} 42 | 43 | for s in dataset.scenes: 44 | grid, bbox, voxel_size = dataset.get_grid(s, truncation=self.trunc_value) 45 | if self.alpha_supervision: 46 | self.proxy_alpha[s] = dataset.get_proxy_alpha_grid(s) 47 | self.scenes_gt[s] = VoxelGrid(voxel_size, grid, bbox) 48 | 49 | for sensor in config.input: 50 | self.fusion_weights[sensor][s] = np.zeros( 51 | self.scenes_gt[s].shape, dtype=np.float16 52 | ) 53 | 54 | self.features[sensor][s] = FeatureGrid( 55 | voxel_size, self.n_features, bbox 56 | ) 57 | 58 | self.tsdf[sensor][s] = VoxelGrid( 59 | voxel_size, 60 | volume=None, 61 | bbox=bbox, 62 | initial_value=self.initial_value, 63 | ) 64 | 65 | self.filtered[s] = VoxelGrid( 66 | voxel_size, 67 | volume=None, 68 | bbox=bbox, 69 | initial_value=self.initial_value, 70 | ) 71 | if config.test_mode: 72 | if config.outlier_channel: 73 | sensor_weighting_shape = ( 74 | 2, 75 | self.scenes_gt[s].shape[0], 76 | self.scenes_gt[s].shape[1], 77 | self.scenes_gt[s].shape[2], 78 | ) 79 | self.sensor_weighting[s] = -np.ones( 80 | sensor_weighting_shape, dtype=np.float16 81 | ) 82 | else: 83 | # initialize to negative so that we know what values are initialized without needing the mask later in the visualization script 84 | self.sensor_weighting[s] = -np.ones( 85 | self.scenes_gt[s].shape, dtype=np.float16 86 | ) 87 | 88 | def __getitem__(self, item): 89 | 90 | sample = dict() 91 | 92 | sample["gt"] = self.scenes_gt[item].volume 93 | if self.alpha_supervision: 94 | sample["proxy_alpha"] = self.proxy_alpha[item] 95 | sample["origin"] = self.scenes_gt[item].origin 96 | sample["resolution"] = self.scenes_gt[item].resolution 97 | sample["filtered"] = self.filtered[item].volume 98 | if self.test_mode: 99 | sample["sensor_weighting"] = self.sensor_weighting[item] 100 | for sensor_ in self.sensors: 101 | sample["tsdf_" + sensor_] = self.tsdf[sensor_][item].volume 102 | sample["weights_" + sensor_] = self.fusion_weights[sensor_][item] 103 | sample["features_" + sensor_] = self.features[sensor_][item].volume 104 | 105 | if self.transform is not None: 106 | sample = self.transform(sample) 107 | 108 | return sample 109 | 110 | def __len__(self): 111 | return len(self.scenes_gt) 112 | 113 | def save(self, path, scene_id=None): 114 | 115 | for sensor in self.sensors: 116 | filename = scene_id + "_" + sensor + ".tsdf.hf5" 117 | weightname = scene_id + "_" + sensor + ".weights.hf5" 118 | featurename = scene_id + "_" + sensor + ".features.hf5" 119 | 120 | with h5py.File(os.path.join(path, filename), "w") as hf: 121 | hf.create_dataset( 122 | "TSDF", 123 | shape=self.tsdf[sensor][scene_id].volume.shape, 124 | data=self.tsdf[sensor][scene_id].volume, 125 | compression="gzip", 126 | compression_opts=9, 127 | ) 128 | with h5py.File(os.path.join(path, weightname), "w") as hf: 129 | hf.create_dataset( 130 | "weights", 131 | shape=self.fusion_weights[sensor][scene_id].shape, 132 | data=self.fusion_weights[sensor][scene_id], 133 | compression="gzip", 134 | compression_opts=9, 135 | ) 136 | 137 | sdfname = scene_id + ".tsdf_filtered.hf5" 138 | with h5py.File(os.path.join(path, sdfname), "w") as hf: 139 | hf.create_dataset( 140 | "TSDF_filtered", 141 | shape=self.filtered[scene_id].volume.shape, 142 | data=self.filtered[scene_id].volume, 143 | compression="gzip", 144 | compression_opts=9, 145 | ) 146 | 147 | if self.test_mode: 148 | sensor_weighting_name = scene_id + ".sensor_weighting.hf5" 149 | with h5py.File(os.path.join(path, sensor_weighting_name), "w") as hf: 150 | hf.create_dataset( 151 | "sensor_weighting", 152 | shape=self.sensor_weighting[scene_id].shape, 153 | data=self.sensor_weighting[scene_id], 154 | compression="gzip", 155 | compression_opts=9, 156 | ) 157 | 158 | def evaluate(self, mode="train", workspace=None): 159 | 160 | eval_results = {} 161 | eval_results_scene_save = {} 162 | for sensor in self.sensors: 163 | eval_results[sensor] = {} 164 | eval_results_scene_save[sensor] = {} 165 | 166 | eval_results_filt = {} 167 | eval_results_scene_save_filt = {} 168 | if workspace is not None: 169 | workspace.log( 170 | "-------------------------------------------------------", mode 171 | ) 172 | for scene_id in self.scenes_gt.keys(): 173 | if workspace is None: 174 | print("Evaluating ", scene_id, "...") 175 | else: 176 | workspace.log("Evaluating {} ...".format(scene_id), mode) 177 | est = {} 178 | mask, mask_filt = self.get_evaluation_masks(scene_id) 179 | 180 | for sensor in self.sensors: 181 | est[sensor] = self.tsdf[sensor][scene_id].volume 182 | 183 | est_filt = self.filtered[scene_id].volume 184 | gt = self.scenes_gt[scene_id].volume 185 | 186 | eval_results_scene = dict() 187 | for sensor_ in self.sensors: 188 | eval_results_scene[sensor_] = evaluation( 189 | est[sensor_], gt, mask[sensor_] 190 | ) 191 | 192 | eval_results_scene_filt = evaluation(est_filt, gt, mask_filt) 193 | 194 | del est, gt, mask, est_filt, mask_filt 195 | 196 | for sensor in self.sensors: 197 | eval_results_scene_save[sensor][scene_id] = eval_results_scene[sensor] 198 | eval_results_scene_save_filt[scene_id] = eval_results_scene_filt 199 | 200 | for key in eval_results_scene_filt.keys(): 201 | if workspace is None: 202 | for sensor in self.sensors: 203 | print(sensor, " ", key, eval_results_scene[sensor][key]) 204 | print("filtered ", key, eval_results_scene_filt[key]) 205 | else: 206 | for sensor in self.sensors: 207 | workspace.log( 208 | "{} {}".format(key, eval_results_scene[sensor][key]), mode 209 | ) 210 | workspace.log( 211 | "{} {}".format(key, eval_results_scene_filt[key]), mode 212 | ) 213 | 214 | if not eval_results_filt.get(key): # iou, mad, mse, acc as keys 215 | for sensor in self.sensors: 216 | eval_results[sensor][key] = eval_results_scene[sensor][key] 217 | eval_results_filt[key] = eval_results_scene_filt[key] 218 | else: 219 | for sensor in self.sensors: 220 | eval_results[sensor][key] += eval_results_scene[sensor][key] 221 | eval_results_filt[key] += eval_results_scene_filt[key] 222 | 223 | # normalizing metrics 224 | for key in eval_results_filt.keys(): 225 | for sensor in self.sensors: 226 | eval_results[sensor][key] /= len(self.scenes_gt.keys()) 227 | eval_results_filt[key] /= len(self.scenes_gt.keys()) 228 | 229 | if mode == "test": 230 | return ( 231 | eval_results, 232 | eval_results_filt, 233 | eval_results_scene_save, 234 | eval_results_scene_save_filt, 235 | ) 236 | else: 237 | return eval_results, eval_results_filt 238 | 239 | def reset(self, scene_id=None): 240 | if scene_id: 241 | for sensor in self.sensors: 242 | self.tsdf[sensor][scene_id].volume = self.initial_value * np.ones( 243 | self.scenes_gt[scene_id].shape, dtype=np.float16 244 | ) 245 | self.fusion_weights[sensor][scene_id] = np.zeros( 246 | self.scenes_gt[scene_id].shape, dtype=np.float16 247 | ) 248 | self.features[sensor][scene_id].volume = np.zeros( 249 | self.features[sensor][scene_id].shape, dtype=np.float16 250 | ) 251 | else: 252 | for scene_id in self.scenes_gt.keys(): 253 | for sensor in self.sensors: 254 | self.tsdf[sensor][scene_id].volume = self.initial_value * np.ones( 255 | self.scenes_gt[scene_id].shape, dtype=np.float16 256 | ) 257 | self.fusion_weights[sensor][scene_id] = np.zeros( 258 | self.scenes_gt[scene_id].shape, dtype=np.float16 259 | ) 260 | self.features[sensor][scene_id].volume = np.zeros( 261 | self.features[sensor][scene_id].shape, dtype=np.float16 262 | ) 263 | 264 | def get_evaluation_masks(self, scene): 265 | sensor_mask = {} 266 | mask = np.zeros_like(self[scene]["gt"]) 267 | and_mask = np.ones_like(self[scene]["gt"]) 268 | filter_mask = np.zeros_like(self[scene]["gt"]) 269 | sensor_mask_filtering = {} 270 | 271 | for sensor_ in self.sensors: 272 | weights = self.fusion_weights[sensor_][scene] 273 | mask = np.logical_or(mask, weights > 0) 274 | and_mask = np.logical_and(and_mask, weights > 0) 275 | sensor_mask[sensor_] = weights > 0 276 | 277 | # load weighting sensor grid 278 | if self.outlier_channel: 279 | sensor_weighting = self.sensor_weighting[scene][1, :, :, :] 280 | else: 281 | sensor_weighting = self.sensor_weighting[scene] 282 | 283 | only_one_sensor_mask = np.logical_xor(mask, and_mask) 284 | for sensor_ in self.sensors: 285 | 286 | only_sensor_mask = np.logical_and( 287 | only_one_sensor_mask, sensor_mask[sensor_] 288 | ) 289 | if sensor_ == self.sensors[0]: 290 | rem_indices = np.logical_and(only_sensor_mask, sensor_weighting < 0.5) 291 | else: 292 | rem_indices = np.logical_and(only_sensor_mask, sensor_weighting > 0.5) 293 | 294 | sensor_mask_filtering[sensor_] = sensor_mask[sensor_].copy() 295 | sensor_mask_filtering[sensor_][rem_indices] = 0 296 | 297 | for sensor_ in self.sensors: 298 | filter_mask = np.logical_or(filter_mask, sensor_mask_filtering[sensor_] > 0) 299 | 300 | return sensor_mask, filter_mask 301 | -------------------------------------------------------------------------------- /modules/filtering_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | 6 | class FilteringNet(nn.Module): 7 | def __init__(self, config): 8 | 9 | super(FilteringNet, self).__init__() 10 | 11 | self.config = config 12 | self.trunc_value = config.DATA.trunc_value 13 | self.sensors = config.DATA.input 14 | self.feature_to_weight_head = ( 15 | config.FILTERING_MODEL.CONV3D_MODEL.features_to_weight_head 16 | ) 17 | self.weight_to_weight_head = ( 18 | config.FILTERING_MODEL.CONV3D_MODEL.weights_to_weight_head 19 | ) 20 | self.sdf_to_weight_head = config.FILTERING_MODEL.CONV3D_MODEL.sdf_to_weight_head 21 | self.weighting_complexity = ( 22 | config.FILTERING_MODEL.CONV3D_MODEL.weighting_complexity 23 | ) 24 | self.activation = eval(config.FILTERING_MODEL.CONV3D_MODEL.activation) 25 | self.n_features = config.FEATURE_MODEL.n_features 26 | self.alpha_supervision = config.LOSS.alpha_supervision 27 | self.alpha_single_sensor_supervision = ( 28 | config.LOSS.alpha_single_sensor_supervision 29 | ) 30 | bias_wn = config.FILTERING_MODEL.CONV3D_MODEL.bias 31 | self.outlier_channel = config.FILTERING_MODEL.CONV3D_MODEL.outlier_channel 32 | 33 | # alpha layer 34 | if self.weighting_complexity == "1layer": 35 | self.weight_decoder = nn.Conv3d( 36 | len(self.sensors) 37 | * ( 38 | self.sdf_to_weight_head 39 | + self.n_features * self.feature_to_weight_head 40 | + self.weight_to_weight_head 41 | ), 42 | 1, 43 | 1, 44 | padding=0, 45 | bias=bias_wn, 46 | ) 47 | elif self.weighting_complexity == "2layer": 48 | self.weight_decoder = nn.Sequential( 49 | nn.Conv3d( 50 | len(self.sensors) 51 | * ( 52 | self.sdf_to_weight_head 53 | + self.n_features * self.feature_to_weight_head 54 | + self.weight_to_weight_head 55 | ), 56 | 16, 57 | 3, 58 | padding=1, 59 | padding_mode="replicate", 60 | bias=bias_wn, 61 | ), 62 | self.activation, 63 | nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn), 64 | ) 65 | elif self.weighting_complexity == "3layer": 66 | self.weight_decoder = nn.Sequential( 67 | nn.Conv3d( 68 | len(self.sensors) 69 | * ( 70 | self.sdf_to_weight_head 71 | + self.n_features * self.feature_to_weight_head 72 | + self.weight_to_weight_head 73 | ), 74 | 32, 75 | 3, 76 | padding=1, 77 | padding_mode="replicate", 78 | bias=bias_wn, 79 | ), 80 | self.activation, 81 | nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn), 82 | self.activation, 83 | nn.Conv3d(16, 1, 1, padding=0, bias=bias_wn), 84 | ) 85 | 86 | elif self.weighting_complexity == "4layer": 87 | self.weight_decoder = nn.Sequential( 88 | nn.Conv3d( 89 | len(self.sensors) 90 | * ( 91 | self.sdf_to_weight_head 92 | + self.n_features * self.feature_to_weight_head 93 | + self.weight_to_weight_head 94 | ), 95 | 32, 96 | 3, 97 | padding=1, 98 | padding_mode="replicate", 99 | bias=bias_wn, 100 | ), 101 | self.activation, 102 | nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn), 103 | self.activation, 104 | nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn), 105 | self.activation, 106 | nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn), 107 | ) 108 | elif self.weighting_complexity == "5layer": 109 | self.weight_decoder = nn.Sequential( 110 | nn.Conv3d( 111 | len(self.sensors) 112 | * ( 113 | self.sdf_to_weight_head 114 | + self.n_features * self.feature_to_weight_head 115 | + self.weight_to_weight_head 116 | ), 117 | 32, 118 | 3, 119 | padding=1, 120 | padding_mode="replicate", 121 | bias=bias_wn, 122 | ), 123 | self.activation, 124 | nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn), 125 | self.activation, 126 | nn.Conv3d(32, 32, 3, padding=1, padding_mode="replicate", bias=bias_wn), 127 | self.activation, 128 | nn.Conv3d(32, 16, 3, padding=1, padding_mode="replicate", bias=bias_wn), 129 | self.activation, 130 | nn.Conv3d(16, 1 + self.outlier_channel, 1, padding=0, bias=bias_wn), 131 | ) 132 | 133 | self.tanh = nn.Tanh() 134 | self.sigmoid = nn.Sigmoid() 135 | self.softmax = nn.Softmax(dim=1) 136 | 137 | def forward(self, neighborhood): 138 | weight = dict() 139 | sdf = dict() 140 | enc = dict() 141 | output = dict() 142 | 143 | for sensor_ in self.sensors: 144 | sdf[sensor_] = neighborhood[sensor_][:, 0, :, :, :] 145 | 146 | weight[sensor_] = neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1) 147 | 148 | for sensor_ in self.sensors: 149 | output["tsdf_" + sensor_] = sdf[sensor_].squeeze() 150 | output[sensor_ + "_init"] = weight[sensor_].squeeze() > 0 151 | 152 | input_ = None 153 | alpha_val = dict() 154 | 155 | for k, sensor_ in enumerate(self.config.DATA.input): 156 | inp = None 157 | if self.sdf_to_weight_head: 158 | if inp is None: 159 | inp = neighborhood[sensor_][:, 0, :, :, :].unsqueeze(1) 160 | else: 161 | inp = torch.cat( 162 | (inp, neighborhood[sensor_][:, 0, :, :, :].unsqueeze(1)), 163 | dim=1, 164 | ) 165 | if self.feature_to_weight_head: 166 | if inp is None: 167 | inp = neighborhood[sensor_][:, 2:, :, :, :] 168 | else: 169 | inp = torch.cat((inp, neighborhood[sensor_][:, 2:, :, :, :]), dim=1) 170 | if self.weight_to_weight_head: 171 | if self.config.FILTERING_MODEL.CONV3D_MODEL.tanh_weight: 172 | if self.config.FILTERING_MODEL.CONV3D_MODEL.inverted_weight: 173 | weights = torch.ones_like( 174 | neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1) 175 | ) - self.tanh(neighborhood[sensor_][:, 1, :, :, :]) 176 | else: 177 | weights = self.tanh( 178 | neighborhood[sensor_][:, 1, :, :, :] 179 | ).unsqueeze(1) 180 | else: 181 | weights = neighborhood[sensor_][:, 1, :, :, :].unsqueeze(1) 182 | 183 | if inp is None: 184 | inp = weights 185 | else: 186 | inp = torch.cat((inp, weights), dim=1) 187 | 188 | if input_ is None: 189 | input_ = inp 190 | else: 191 | input_ = torch.cat((input_, inp), dim=1) 192 | 193 | if k == 0: 194 | alpha_val[sensor_] = torch.zeros_like(sdf[sensor_]) 195 | else: 196 | alpha_val[sensor_] = torch.ones_like(sdf[sensor_]) 197 | 198 | if input_.isnan().sum() > 0: 199 | print("Input isnan: ", input_.isnan().sum()) 200 | 201 | alpha = self.sigmoid(self.weight_decoder(input_)) 202 | 203 | if alpha.isnan().sum() > 0 or alpha.isinf().sum() > 0: 204 | print("alpha nan: ", alpha.isnan().sum()) 205 | print("alpha inf: ", alpha.isinf().sum()) 206 | return None 207 | 208 | if ( 209 | neighborhood["test_mode"] 210 | or self.alpha_supervision 211 | or self.alpha_single_sensor_supervision 212 | ): 213 | output["sensor_weighting"] = alpha.squeeze() 214 | 215 | if self.outlier_channel: 216 | alpha_sdf = alpha[:, 0, :, :, :] 217 | else: 218 | alpha_sdf = alpha 219 | 220 | # this step is to not filter the voxels where we only have one sensor observation. 221 | # Note that we save the variable alpha and not alpha_sdf so we can still 222 | # use the outlier filter as usual. 223 | for sensor_ in self.config.DATA.input: 224 | alpha_sdf = torch.where(weight[sensor_] == 0, alpha_val[sensor_], alpha_sdf) 225 | 226 | sdf_final = None 227 | 228 | for k, sensor_ in enumerate(self.config.DATA.input): 229 | if k == 0: 230 | sdf_final = alpha_sdf * sdf[sensor_] 231 | else: 232 | sdf_final += (1 - alpha_sdf) * sdf[sensor_] 233 | 234 | output["tsdf"] = sdf_final.squeeze() 235 | 236 | return output 237 | -------------------------------------------------------------------------------- /modules/integrator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Integrator(torch.nn.Module): 5 | def __init__(self, config): 6 | 7 | super(Integrator, self).__init__() 8 | 9 | self.device = config.device 10 | self.max_weight = config.max_weight 11 | self.extraction_strategy = config.extraction_strategy 12 | self.n_empty_space_voting = config.n_empty_space_voting 13 | self.trunc_value = config.trunc_value 14 | 15 | def forward( 16 | self, 17 | integrator_input, 18 | values_volume, 19 | features_volume, 20 | weights_volume, 21 | ): 22 | xs, ys, zs = values_volume.shape 23 | 24 | # unpack data 25 | values = integrator_input["update_values"].to(self.device) 26 | features = integrator_input["update_features"].to(self.device) 27 | indices = integrator_input["update_indices"].to(self.device) 28 | weights = integrator_input["update_weights"].to( 29 | self.device 30 | ) # update weights. When using nearest neighbor interpolation these are all ones. 31 | 32 | if self.n_empty_space_voting > 0: 33 | indices_empty = integrator_input["update_indices_empty"].to(self.device) 34 | weights_empty = integrator_input["update_weights_empty"].to(self.device) 35 | 36 | ( 37 | n1, 38 | n2, 39 | n3, 40 | f4, 41 | ) = features.shape 42 | 43 | # reshape tensors 44 | features = features.contiguous().view(-1, f4).float() 45 | values = values.contiguous().view(-1, 1).float() 46 | 47 | if self.extraction_strategy == "trilinear_interpolation": 48 | features = features.repeat(8, 1) 49 | values = values.repeat(1, 8) 50 | indices = indices.contiguous().view(-1, 8, 3).long() 51 | weights = weights.contiguous().view(-1, 8) 52 | if self.n_empty_space_voting > 0: 53 | indices_empty = indices_empty.contiguous().view(-1, 8, 3).long() 54 | weights_empty = weights_empty.contiguous().view(-1, 8) 55 | elif self.extraction_strategy == "nearest_neighbor": 56 | values = values.repeat(1, 1) 57 | indices = indices.contiguous().view(-1, 1, 3).long() 58 | weights = weights.contiguous().view(-1, 1) 59 | if self.n_empty_space_voting > 0: 60 | indices_empty = indices_empty.contiguous().view(-1, 1, 3).long() 61 | weights_empty = weights_empty.contiguous().view(-1, 1) 62 | 63 | values = values.contiguous().view(-1, 1).float() 64 | indices = indices.contiguous().view(-1, 3).long() 65 | 66 | if self.n_empty_space_voting > 0: 67 | indices_empty = indices_empty.contiguous().view(-1, 3).long() 68 | weights_empty = weights_empty.contiguous().view(-1, 1).float() 69 | 70 | weights = weights.contiguous().view(-1, 1).float() 71 | 72 | # get valid indices 73 | valid = get_index_mask(indices, values_volume.shape) 74 | indices = extract_indices(indices, mask=valid) 75 | if self.n_empty_space_voting > 0: 76 | valid_empty = get_index_mask(indices_empty, values_volume.shape) 77 | indices_empty = extract_indices(indices_empty, mask=valid_empty) 78 | 79 | feature_indices = indices.clone() 80 | 81 | # remove the invalid entries from the values, features and weights 82 | valid_features = valid.clone().unsqueeze_(-1) 83 | features = torch.masked_select(features, valid_features.repeat(1, f4)) 84 | features = features.view(int(features.shape[0] / f4), f4) 85 | 86 | values = torch.masked_select(values[:, 0], valid) 87 | weights = torch.masked_select(weights[:, 0], valid) 88 | if self.n_empty_space_voting > 0: 89 | weights_empty = torch.masked_select(weights_empty[:, 0], valid_empty) 90 | 91 | update_feat = weights.repeat(f4, 1).permute(1, 0) * features 92 | del features 93 | 94 | update = weights * values 95 | del values 96 | 97 | # aggregate updates to the same index 98 | 99 | # tsdf 100 | index = ys * zs * indices[:, 0] + zs * indices[:, 1] + indices[:, 2] 101 | indices_insert = torch.unique_consecutive(indices[index.sort()[1]], dim=0) 102 | vcache = torch.sparse.FloatTensor( 103 | index.unsqueeze_(0), update, torch.Size([xs * ys * zs]) 104 | ).coalesce() 105 | update = vcache.values() 106 | 107 | if indices_insert.shape[0] != update.shape[0]: 108 | print("wrong dim!") 109 | del vcache 110 | 111 | # if using the same extraction procedure for fusion and feature updates 112 | update_feat_weights = weights 113 | 114 | # weights for tsdf 115 | wcache = torch.sparse.FloatTensor( 116 | index, weights, torch.Size([xs * ys * zs]) 117 | ).coalesce() # this line adds the values at the same index together 118 | indices = wcache.indices().squeeze() 119 | weights = wcache.values() 120 | 121 | del wcache 122 | 123 | if self.n_empty_space_voting > 0: 124 | # weights for empty indices 125 | index_empty = ( 126 | ys * zs * indices_empty[:, 0] 127 | + zs * indices_empty[:, 1] 128 | + indices_empty[:, 2] 129 | ) 130 | indices_empty_insert = torch.unique_consecutive( 131 | indices_empty[index_empty.sort()[1]], dim=0 132 | ) 133 | wcache_empty = torch.sparse.FloatTensor( 134 | index_empty.unsqueeze_(0), weights_empty, torch.Size([xs * ys * zs]) 135 | ).coalesce() # this line adds the values at the same index together 136 | indices_empty = wcache_empty.indices().squeeze() 137 | weights_empty = wcache_empty.values() 138 | del wcache_empty 139 | 140 | # features 141 | feature_index = ( 142 | ys * zs * feature_indices[:, 0] 143 | + zs * feature_indices[:, 1] 144 | + feature_indices[:, 2] 145 | ) 146 | feature_indices_insert = torch.unique_consecutive( 147 | feature_indices[feature_index.sort()[1]], dim=0 148 | ) 149 | fcache = torch.sparse.FloatTensor( 150 | feature_index.unsqueeze_(0), update_feat, torch.Size([xs * ys * zs, f4]) 151 | ).coalesce() 152 | 153 | feature_indices = fcache.indices().squeeze() 154 | update_feat = fcache.values() 155 | if feature_indices_insert.shape[0] != update_feat.shape[0]: 156 | print("wrong dim feat!") 157 | del fcache 158 | 159 | # feature weights 160 | wcache_feat = torch.sparse.FloatTensor( 161 | feature_index, update_feat_weights, torch.Size([xs * ys * zs]) 162 | ).coalesce() 163 | weights_feat = wcache_feat.values().unsqueeze_(-1).repeat(1, f4).float() 164 | del wcache_feat 165 | 166 | # tsdf and weights update 167 | values_old = values_volume.view(xs * ys * zs)[indices] 168 | weights_old = weights_volume.view(xs * ys * zs)[indices] 169 | value_update = (weights_old * values_old + update) / (weights_old + weights) 170 | weight_update = weights_old + weights 171 | weight_update = torch.clamp(weight_update, 0, self.max_weight) 172 | 173 | if self.n_empty_space_voting > 0: 174 | # empty space update 175 | values_old_empty = values_volume.view(xs * ys * zs)[indices_empty] 176 | weights_old_empty = weights_volume.view(xs * ys * zs)[indices_empty] 177 | value_update_empty = torch.add( 178 | weights_old_empty * values_old_empty, self.trunc_value * weights_empty 179 | ) / (weights_old_empty + weights_empty) 180 | weight_update_empty = weights_old_empty + weights_empty 181 | weight_update_empty = torch.clamp(weight_update_empty, 0, self.max_weight) 182 | 183 | # feature update 184 | feature_weights_old = ( 185 | weights_volume.view(xs * ys * zs)[feature_indices] 186 | .unsqueeze_(-1) 187 | .repeat(1, f4) 188 | .float() 189 | ) 190 | 191 | features_old = features_volume.view(xs * ys * zs, f4)[feature_indices] 192 | 193 | # here we should not multiply the update_feat with weights_feat in the nominator since we already have that baked in 194 | feature_update = (feature_weights_old * features_old + update_feat) / ( 195 | feature_weights_old + weights_feat 196 | ) 197 | 198 | del update_feat, feature_weights_old, weights_feat 199 | 200 | # inser tsdf and tsdf weights 201 | insert_values(value_update, indices_insert, values_volume) 202 | insert_values(weight_update, indices_insert, weights_volume) 203 | 204 | # insert features 205 | insert_values(feature_update, feature_indices_insert, features_volume) 206 | 207 | if self.n_empty_space_voting > 0: 208 | # insert empty tsdf and weights 209 | insert_values(value_update_empty, indices_empty_insert, values_volume) 210 | insert_values(weight_update_empty, indices_empty_insert, weights_volume) 211 | 212 | return ( 213 | values_volume, 214 | features_volume, 215 | weights_volume, 216 | indices_insert, 217 | ) 218 | 219 | 220 | def get_index_mask(indices, shape): 221 | """Method to check whether indices are valid. 222 | 223 | Args: 224 | indices: indices to check 225 | shape: constraints for indices 226 | 227 | Returns: 228 | mask 229 | """ 230 | 231 | xs, ys, zs = shape 232 | 233 | valid = ( 234 | (indices[:, 0] >= 0) 235 | & (indices[:, 0] < xs) 236 | & (indices[:, 1] >= 0) 237 | & (indices[:, 1] < ys) 238 | & (indices[:, 2] >= 0) 239 | & (indices[:, 2] < zs) 240 | ) 241 | 242 | return valid 243 | 244 | 245 | def extract_indices(indices, mask): 246 | """Method to extract indices according to mask.""" 247 | 248 | x = torch.masked_select(indices[:, 0], mask) 249 | y = torch.masked_select(indices[:, 1], mask) 250 | z = torch.masked_select(indices[:, 2], mask) 251 | 252 | masked_indices = torch.cat( 253 | (x.unsqueeze_(1), y.unsqueeze_(1), z.unsqueeze_(1)), dim=1 254 | ) 255 | return masked_indices 256 | 257 | 258 | def insert_values(values, indices, volume): 259 | """Method to insert values back into volume.""" 260 | 261 | if volume.dim() == 3: 262 | volume = volume.half() 263 | volume[indices[:, 0], indices[:, 1], indices[:, 2]] = values.half() 264 | else: 265 | volume = volume.half() 266 | volume[indices[:, 0], indices[:, 1], indices[:, 2], :] = values.half() 267 | -------------------------------------------------------------------------------- /modules/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | 6 | class FusionNet(nn.Module): 7 | def __init__(self, config, sensor): 8 | 9 | super(FusionNet, self).__init__() 10 | 11 | self.scale = config.output_scale 12 | self.conf = config.confidence 13 | 14 | try: 15 | self.n_channels = ( 16 | 2 * eval("config.n_points_" + sensor) + 1 + int(config.confidence) 17 | ) 18 | self.n_points = eval("config.n_points_" + sensor) 19 | except AttributeError: 20 | self.n_channels = 2 * config.n_points + 1 + int(config.confidence) 21 | self.n_points = config.n_points 22 | 23 | self.block1 = nn.Sequential( 24 | nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1), 25 | nn.BatchNorm2d(self.n_channels), 26 | nn.LeakyReLU(), 27 | nn.Dropout2d(p=0.2), 28 | nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1), 29 | nn.BatchNorm2d(self.n_channels), 30 | nn.LeakyReLU(), 31 | nn.Dropout2d(p=0.2), 32 | ) 33 | 34 | self.block2 = nn.Sequential( 35 | nn.Conv2d(2 * self.n_channels, self.n_channels, (3, 3), padding=1), 36 | nn.BatchNorm2d(self.n_channels), 37 | nn.LeakyReLU(), 38 | nn.Dropout2d(p=0.2), 39 | nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1), 40 | nn.BatchNorm2d(self.n_channels), 41 | nn.LeakyReLU(), 42 | nn.Dropout2d(p=0.2), 43 | ) 44 | 45 | self.block3 = nn.Sequential( 46 | nn.Conv2d(3 * self.n_channels, self.n_channels, (3, 3), padding=1), 47 | nn.BatchNorm2d(self.n_channels), 48 | nn.LeakyReLU(), 49 | nn.Dropout2d(p=0.2), 50 | nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1), 51 | nn.BatchNorm2d(self.n_channels), 52 | nn.LeakyReLU(), 53 | nn.Dropout2d(p=0.2), 54 | ) 55 | 56 | self.block4 = nn.Sequential( 57 | nn.Conv2d(4 * self.n_channels, self.n_channels, (3, 3), padding=1), 58 | nn.BatchNorm2d(self.n_channels), 59 | nn.LeakyReLU(), 60 | nn.Dropout2d(p=0.2), 61 | nn.Conv2d(self.n_channels, self.n_channels, (3, 3), padding=1), 62 | nn.BatchNorm2d(self.n_channels), 63 | nn.LeakyReLU(), 64 | nn.Dropout2d(p=0.2), 65 | ) 66 | 67 | self.pred1 = nn.Sequential( 68 | nn.Conv2d(5 * self.n_channels, 4 * self.n_channels, (1, 1), padding=0), 69 | nn.BatchNorm2d(4 * self.n_channels), 70 | nn.LeakyReLU(), 71 | nn.Dropout2d(p=0.2), 72 | nn.Conv2d(4 * self.n_channels, 4 * self.n_channels, (1, 1), padding=0), 73 | nn.BatchNorm2d(4 * self.n_channels), 74 | nn.LeakyReLU(), 75 | nn.Dropout2d(p=0.2), 76 | ) 77 | 78 | self.pred2 = nn.Sequential( 79 | nn.Conv2d(4 * self.n_channels, 3 * self.n_channels, (1, 1), padding=0), 80 | nn.BatchNorm2d(3 * self.n_channels), 81 | nn.LeakyReLU(), 82 | nn.Dropout2d(p=0.2), 83 | nn.Conv2d(3 * self.n_channels, 3 * self.n_channels, (1, 1), padding=0), 84 | nn.BatchNorm2d(3 * self.n_channels), 85 | nn.LeakyReLU(), 86 | nn.Dropout2d(p=0.2), 87 | ) 88 | 89 | self.pred3 = nn.Sequential( 90 | nn.Conv2d(3 * self.n_channels, 2 * self.n_channels, (1, 1), padding=0), 91 | nn.BatchNorm2d(2 * self.n_channels), 92 | nn.LeakyReLU(), 93 | nn.Dropout2d(p=0.2), 94 | nn.Conv2d(2 * self.n_channels, 2 * self.n_channels, (1, 1), padding=0), 95 | nn.BatchNorm2d(2 * self.n_channels), 96 | nn.LeakyReLU(), 97 | nn.Dropout2d(p=0.2), 98 | ) 99 | 100 | self.pred4 = nn.Sequential( 101 | nn.Conv2d(2 * self.n_channels, 1 * self.n_channels, (1, 1), padding=0), 102 | nn.BatchNorm2d(self.n_channels), 103 | nn.LeakyReLU(), 104 | nn.Dropout2d(p=0.2), 105 | nn.Conv2d(1 * self.n_channels, 1 * self.n_channels, (1, 1), padding=0), 106 | nn.LeakyReLU(), 107 | nn.Conv2d(1 * self.n_channels, self.n_points, (1, 1), padding=0), 108 | ) 109 | 110 | self.tanh = nn.Tanh() 111 | self.relu = nn.ReLU() 112 | 113 | def forward(self, x): 114 | x1 = self.block1.forward(x) 115 | 116 | x1 = torch.cat([x, x1], dim=1) 117 | x2 = self.block2.forward(x1) 118 | x2 = torch.cat([x1, x2], dim=1) 119 | x3 = self.block3.forward(x2) 120 | x3 = torch.cat([x2, x3], dim=1) 121 | x4 = self.block4.forward(x3) 122 | x4 = torch.cat([x3, x4], dim=1) 123 | 124 | y = self.pred1.forward(x4) 125 | y = self.pred2.forward(y) 126 | y = self.pred3.forward(y) 127 | y = self.pred4.forward(y) 128 | 129 | tsdf = self.scale * self.tanh.forward(y) 130 | 131 | return tsdf 132 | -------------------------------------------------------------------------------- /modules/model_features.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from torch.nn.functional import normalize 5 | 6 | 7 | class EncoderBlock(nn.Module): 8 | """Encoder block for the fusion network in NeuralFusion""" 9 | 10 | def __init__(self, c_in, c_out, activation, resolution, layernorm): 11 | 12 | super(EncoderBlock, self).__init__() 13 | 14 | if layernorm: 15 | self.block = nn.Sequential( 16 | nn.Conv2d(c_in, c_out, (3, 3), padding=1), 17 | nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True), 18 | activation, 19 | nn.Conv2d(c_out, c_out, (3, 3), padding=1), 20 | nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True), 21 | activation, 22 | ) 23 | else: 24 | self.block = nn.Sequential( 25 | nn.Conv2d(c_in, c_out, (3, 3), padding=1), 26 | activation, 27 | nn.Conv2d(c_out, c_out, (3, 3), padding=1), 28 | activation, 29 | ) 30 | 31 | def forward(self, x): 32 | return self.block(x) 33 | 34 | 35 | class DecoderBlock(nn.Module): 36 | """Decoder block for the fusion network in NeuralFusion""" 37 | 38 | def __init__(self, c_in, c_out, activation, resolution, layernorm): 39 | 40 | super(DecoderBlock, self).__init__() 41 | 42 | if layernorm: 43 | self.block = nn.Sequential( 44 | nn.Conv2d(c_in, c_out, (3, 3), padding=1), 45 | nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True), 46 | activation, 47 | nn.Conv2d(c_out, c_out, (3, 3), padding=1), 48 | nn.LayerNorm([resolution[0], resolution[1]], elementwise_affine=True), 49 | activation, 50 | ) 51 | else: 52 | self.block = nn.Sequential( 53 | nn.Conv2d(c_in, c_out, (3, 3), padding=1), 54 | activation, 55 | nn.Conv2d(c_out, c_out, (3, 3), padding=1), 56 | activation, 57 | ) 58 | 59 | def forward(self, x): 60 | return self.block(x) 61 | 62 | 63 | class FeatureNet(nn.Module): 64 | """Network used in NeuralFusion""" 65 | 66 | def __init__(self, config, sensor): 67 | 68 | super(FeatureNet, self).__init__() 69 | 70 | try: 71 | self.n_points = eval("config.n_points_" + sensor) 72 | except AttributeError: 73 | self.n_points = config.n_points 74 | 75 | self.n_features = config.n_features - config.append_depth 76 | 77 | self.normalize = config.normalize 78 | self.w_rgb = config.w_rgb 79 | self.w_stereo_warp_right = config.stereo_warp_right 80 | self.w_intensity_gradient = config.w_intensity_gradient 81 | self.confidence = config.confidence 82 | 83 | # layer settings 84 | n_channels_input = self.n_features 85 | n_channels_output = self.n_features 86 | self.n_layers = config.n_layers 87 | self.height = config.resy 88 | self.width = config.resx 89 | resolution = (self.height, self.width) 90 | enc_activation = eval(config.enc_activation) 91 | dec_activation = eval(config.dec_activation) 92 | self.tsdf_out = self.n_points 93 | layernorm = config.layernorm 94 | self.append_depth = config.append_depth 95 | 96 | # define network submodules (encoder/decoder) 97 | self.encoder = nn.ModuleList() 98 | self.decoder = nn.ModuleList() 99 | 100 | if sensor == "tof": 101 | n_channels_first = ( 102 | config.depth 103 | + 3 * int(self.w_rgb) * config.w_rgb_tof 104 | + 2 * int(self.w_intensity_gradient) 105 | + int(self.confidence) 106 | ) 107 | elif sensor == "stereo": 108 | n_channels_first = ( 109 | config.depth 110 | + 3 * int(self.w_rgb) 111 | + 2 * int(self.w_intensity_gradient) 112 | + 3 * int(self.w_stereo_warp_right) 113 | + int(self.confidence) 114 | ) 115 | else: 116 | n_channels_first = ( 117 | config.depth 118 | + 3 * int(self.w_rgb) 119 | + 2 * int(self.w_intensity_gradient) 120 | + int(self.confidence) 121 | ) 122 | 123 | # add first encoder block 124 | self.encoder.append( 125 | EncoderBlock( 126 | n_channels_first, 127 | n_channels_input, 128 | enc_activation, 129 | resolution, 130 | layernorm, 131 | ) 132 | ) 133 | # add first decoder block 134 | if sensor == "stereo": 135 | self.decoder.append( 136 | DecoderBlock( 137 | (self.n_layers) * n_channels_input 138 | + config.depth 139 | + 3 * int(self.w_rgb) 140 | + 2 * int(self.w_intensity_gradient) 141 | + 3 * int(self.w_stereo_warp_right) 142 | + int(self.confidence), 143 | self.n_layers * n_channels_output, 144 | dec_activation, 145 | resolution, 146 | layernorm, 147 | ) 148 | ) 149 | elif sensor == "tof": 150 | self.decoder.append( 151 | DecoderBlock( 152 | (self.n_layers) * n_channels_input 153 | + config.depth 154 | + 3 * int(self.w_rgb) * config.w_rgb_tof 155 | + 2 * int(self.w_intensity_gradient) 156 | + int(self.confidence), 157 | self.n_layers * n_channels_output, 158 | dec_activation, 159 | resolution, 160 | layernorm, 161 | ) 162 | ) 163 | else: 164 | self.decoder.append( 165 | DecoderBlock( 166 | (self.n_layers) * n_channels_input 167 | + config.depth 168 | + 3 * int(self.w_rgb) 169 | + 2 * int(self.w_intensity_gradient) 170 | + int(self.confidence), 171 | self.n_layers * n_channels_output, 172 | dec_activation, 173 | resolution, 174 | layernorm, 175 | ) 176 | ) 177 | 178 | # adding model layers 179 | for l in range(1, self.n_layers): 180 | self.encoder.append( 181 | EncoderBlock( 182 | n_channels_first + l * n_channels_input, 183 | n_channels_input, 184 | enc_activation, 185 | resolution, 186 | layernorm, 187 | ) 188 | ) 189 | 190 | self.decoder.append( 191 | DecoderBlock( 192 | ((self.n_layers + 1) - l) * n_channels_output, 193 | ((self.n_layers + 1) - (l + 1)) * n_channels_output, 194 | dec_activation, 195 | resolution, 196 | layernorm, 197 | ) 198 | ) 199 | 200 | self.tanh = nn.Tanh() 201 | 202 | def forward(self, x): 203 | if self.append_depth: 204 | if self.w_rgb: 205 | d = x[:, 0, :, :].unsqueeze(1) 206 | else: 207 | d = x 208 | 209 | # encoding 210 | 211 | for enc in self.encoder: 212 | xmid = enc(x) 213 | if xmid.isnan().sum() > 0 or xmid.isinf().sum() > 0: 214 | print("xmid nan: ", xmid.isnan().sum()) 215 | print("xmid inf: ", xmid.isinf().sum()) 216 | x = torch.cat([x, xmid], dim=1) 217 | 218 | # decoding 219 | for dec in self.decoder: 220 | x = dec(x) 221 | 222 | if self.normalize: 223 | x = normalize(x, p=2, dim=1) 224 | 225 | if self.append_depth: 226 | x = torch.cat([x, d], dim=1) 227 | 228 | output = dict() 229 | 230 | output["feature"] = x 231 | 232 | return output 233 | 234 | 235 | class FeatureResNet(nn.Module): 236 | """Residual Network""" 237 | 238 | def __init__(self, config, sensor): 239 | 240 | super(FeatureResNet, self).__init__() 241 | 242 | try: 243 | self.n_points = eval("config.n_points_" + sensor) 244 | except AttributeError: 245 | self.n_points = config.n_points 246 | 247 | self.n_features = config.n_features - config.append_depth 248 | 249 | self.normalize = config.normalize 250 | self.w_rgb = config.w_rgb 251 | self.w_stereo_warp_right = config.stereo_warp_right 252 | self.w_intensity_gradient = config.w_intensity_gradient 253 | self.confidence = config.confidence 254 | 255 | # layer settings 256 | n_channels_input = self.n_features 257 | self.n_layers = config.n_layers 258 | self.height = config.resy 259 | self.width = config.resx 260 | resolution = (self.height, self.width) 261 | enc_activation = eval(config.enc_activation) 262 | self.tsdf_out = self.n_points 263 | layernorm = config.layernorm 264 | self.append_depth = config.append_depth 265 | 266 | # define network submodules (encoder/decoder) 267 | self.encoder = nn.ModuleList() 268 | 269 | if sensor == "tof": 270 | n_channels_first = ( 271 | config.depth 272 | + 3 * int(self.w_rgb) * config.w_rgb_tof 273 | + 2 * int(self.w_intensity_gradient) 274 | + int(self.confidence) 275 | ) 276 | elif ( 277 | sensor == "stereo" 278 | ): # I did not feed rgb to sgm_stereo. This line should have been sensor.endswith("stereo"): 279 | n_channels_first = ( 280 | config.depth 281 | + 3 * int(self.w_rgb) 282 | + 2 * int(self.w_intensity_gradient) 283 | + 3 * int(self.w_stereo_warp_right) 284 | + int(self.confidence) 285 | ) 286 | else: 287 | n_channels_first = ( 288 | config.depth 289 | + 3 * int(self.w_rgb) 290 | + 2 * int(self.w_intensity_gradient) 291 | + int(self.confidence) 292 | ) 293 | 294 | # add first encoder block 295 | self.encoder.append( 296 | EncoderBlock( 297 | n_channels_first, 298 | n_channels_input, 299 | enc_activation, 300 | resolution, 301 | layernorm, 302 | ) 303 | ) 304 | 305 | # adding model layers 306 | for l in range(1, self.n_layers): 307 | self.encoder.append( 308 | EncoderBlock( 309 | n_channels_input, 310 | n_channels_input, 311 | enc_activation, 312 | resolution, 313 | layernorm, 314 | ) 315 | ) 316 | 317 | self.tanh = nn.Tanh() 318 | 319 | def forward(self, x): 320 | if self.append_depth: 321 | if self.w_rgb: 322 | d = x[:, 0, :, :].unsqueeze(1) 323 | else: 324 | d = x 325 | 326 | # encoding 327 | 328 | for k, enc in enumerate(self.encoder): 329 | xmid = enc(x) 330 | if xmid.isnan().sum() > 0 or xmid.isinf().sum() > 0: 331 | print("xmid nan: ", xmid.isnan().sum()) 332 | print("xmid inf: ", xmid.isinf().sum()) 333 | 334 | if k > 0: 335 | x = x + xmid 336 | else: 337 | x = xmid 338 | 339 | if self.normalize: 340 | x = normalize(x, p=2, dim=1) 341 | 342 | if self.append_depth: 343 | x = torch.cat([x, d], dim=1) 344 | 345 | output = dict() 346 | 347 | output["feature"] = x 348 | 349 | return output 350 | -------------------------------------------------------------------------------- /modules/pipeline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | import math 4 | 5 | from modules.fuse_pipeline import Fuse_Pipeline 6 | from modules.filter_pipeline import Filter_Pipeline 7 | 8 | import numpy as np 9 | 10 | 11 | class Pipeline(torch.nn.Module): 12 | def __init__(self, config): 13 | 14 | super(Pipeline, self).__init__() 15 | 16 | self.config = config 17 | 18 | # setup pipeline 19 | self.fuse_pipeline = Fuse_Pipeline(config) 20 | if config.FILTERING_MODEL.do: 21 | if config.FILTERING_MODEL.model == "3dconv": 22 | self.filter_pipeline = Filter_Pipeline(config) 23 | else: 24 | self.filter_pipeline = ( 25 | None # used when we run the tsdf fusion or routedfusion 26 | ) 27 | else: 28 | self.filter_pipeline = None 29 | 30 | def forward(self, batch, database, epoch, device): # train step 31 | scene_id = batch["frame_id"][0].split("/")[0] 32 | 33 | frame = batch["frame_id"][0].split("/")[-1] 34 | 35 | fused_output = self.fuse_pipeline.fuse_training(batch, database, device) 36 | 37 | if self.config.FILTERING_MODEL.do: 38 | if self.filter_pipeline is not None: 39 | filtered_output = self.filter_pipeline.filter_training( 40 | fused_output, 41 | database, 42 | epoch, 43 | frame, 44 | scene_id, 45 | batch["sensor"], 46 | device, 47 | ) 48 | else: 49 | filtered_output = None 50 | 51 | if filtered_output == "save_and_exit": 52 | return "save_and_exit" 53 | 54 | if filtered_output is not None: 55 | fused_output["filtered_output"] = filtered_output 56 | else: 57 | if not self.config.FILTERING_MODEL.model == "routedfusion": 58 | return None 59 | 60 | return fused_output 61 | 62 | def test(self, loader, dataset, database, sensors, device): 63 | for k, batch in tqdm(enumerate(loader), total=len(dataset)): 64 | if self.config.DATA.collaborative_reconstruction: 65 | if ( 66 | math.ceil( 67 | int(batch["frame_id"][0].split("/")[-1]) 68 | / self.config.DATA.frames_per_chunk 69 | ) 70 | % 2 71 | == 0 72 | ): 73 | sensor_ = sensors[0] 74 | else: 75 | sensor_ = sensors[1] 76 | 77 | batch["depth"] = batch[sensor_ + "_depth"] 78 | batch["routing_net"] = "self._routing_network_" + sensor_ 79 | batch["mask"] = batch[sensor_ + "_mask"] 80 | if self.config.FILTERING_MODEL.model == "routedfusion": 81 | batch["sensor"] = self.config.DATA.input[0] 82 | else: 83 | batch["sensor"] = sensor_ 84 | 85 | batch["routingNet"] = sensor_ # used to be able to train routedfusion 86 | batch["fusionNet"] = sensor_ # used to be able to train routedfusion 87 | self.fuse_pipeline.fuse(batch, database, device) 88 | else: 89 | for sensor_ in sensors: 90 | if ( 91 | sensor_ + "_depth" 92 | ) in batch: # None on the Replica dataset when simulating sensors of different frame rates 93 | batch["depth"] = batch[sensor_ + "_depth"] 94 | batch["routing_net"] = "self._routing_network_" + sensor_ 95 | batch["mask"] = batch[sensor_ + "_mask"] 96 | if self.config.FILTERING_MODEL.model == "routedfusion": 97 | batch["sensor"] = self.config.DATA.input[0] 98 | else: 99 | batch["sensor"] = sensor_ 100 | 101 | batch[ 102 | "routingNet" 103 | ] = sensor_ # used to be able to train routedfusion 104 | batch[ 105 | "fusionNet" 106 | ] = sensor_ # used to be able to train routedfusion 107 | self.fuse_pipeline.fuse(batch, database, device) 108 | 109 | if self.filter_pipeline is not None: 110 | # run filtering network on all voxels which have a non-zero weight 111 | for scene in database.filtered.keys(): 112 | self.filter_pipeline.filter(scene, database, device) 113 | 114 | def test_tsdf(self, val_loader, val_dataset, val_database, sensors, device): 115 | 116 | for k, batch in tqdm(enumerate(val_loader), total=len(val_dataset)): 117 | 118 | if ( 119 | self.config.ROUTING.do 120 | and self.config.FILTERING_MODEL.model == "tsdf_early_fusion" 121 | ): 122 | batch["routing_net"] = "self._routing_network" 123 | batch["sensor"] = self.config.DATA.input[0] 124 | batch[ 125 | "fusionNet" 126 | ] = None # We don't use a fusion net during early fusion 127 | self.fuse_pipeline.fuse(batch, val_database, device) 128 | else: 129 | for sensor_ in sensors: 130 | batch["depth"] = batch[sensor_ + "_depth"] 131 | batch["routing_net"] = "self._routing_network_" + sensor_ 132 | batch["mask"] = batch[sensor_ + "_mask"] 133 | batch["sensor"] = sensor_ 134 | batch[ 135 | "routingNet" 136 | ] = sensor_ # used to be able to train routedfusion 137 | batch[ 138 | "fusionNet" 139 | ] = sensor_ # used to be able to train routedfusion 140 | self.fuse_pipeline.fuse(batch, val_database, device) 141 | 142 | if self.config.FILTERING_MODEL.do: 143 | # perform the fusion of the grids 144 | if self.config.FILTERING_MODEL.model == "tsdf_early_fusion": 145 | for scene in val_database.filtered.keys(): 146 | val_database.filtered[scene].volume = val_database.tsdf[ 147 | self.config.DATA.input[0] 148 | ][scene].volume 149 | 150 | elif ( 151 | self.config.FILTERING_MODEL.model == "tsdf_middle_fusion" 152 | ): # this is weighted average fusion 153 | for scene in val_database.filtered.keys(): 154 | weight_sum = np.zeros_like(val_database.filtered[scene].volume) 155 | for sensor_ in sensors: 156 | weight_sum += val_database.fusion_weights[sensor_][scene] 157 | val_database.filtered[scene].volume += ( 158 | val_database.tsdf[sensor_][scene].volume 159 | * val_database.fusion_weights[sensor_][scene] 160 | ) 161 | val_database.filtered[scene].volume = np.divide( 162 | val_database.filtered[scene].volume, 163 | weight_sum, 164 | out=np.zeros_like(weight_sum), 165 | where=weight_sum != 0.0, 166 | ) 167 | 168 | val_database.sensor_weighting[scene] = np.divide( 169 | val_database.fusion_weights[sensors[0]][scene], 170 | weight_sum, 171 | out=np.zeros_like(weight_sum), 172 | where=weight_sum != 0.0, 173 | ) 174 | -------------------------------------------------------------------------------- /modules/routing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ConfidenceRouting(torch.nn.Module): 5 | """ 6 | Confidence Routing Network 7 | """ 8 | 9 | def __init__(self, Cin, F, batchnorms=True): 10 | 11 | super().__init__() 12 | self.F = F 13 | 14 | Cout = 1 15 | 16 | if batchnorms: 17 | self.pre = torch.nn.Sequential( 18 | torch.nn.ReflectionPad2d(1), 19 | torch.nn.Conv2d(Cin, F, kernel_size=3, stride=1, padding=0), 20 | torch.nn.BatchNorm2d(F), 21 | torch.nn.ReLU(), 22 | torch.nn.ReflectionPad2d(1), 23 | torch.nn.Conv2d(F, F, kernel_size=3, stride=1, padding=0), 24 | torch.nn.BatchNorm2d(F), 25 | torch.nn.ReLU(), 26 | ) 27 | 28 | self.post = torch.nn.Sequential( 29 | torch.nn.ReflectionPad2d(1), 30 | torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0), 31 | torch.nn.BatchNorm2d(F), 32 | torch.nn.ReLU(), 33 | torch.nn.ReflectionPad2d(1), 34 | torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0), 35 | torch.nn.BatchNorm2d(Cout), 36 | torch.nn.ReLU(), 37 | ) 38 | 39 | self.process = torch.nn.Sequential( 40 | torch.nn.ReflectionPad2d(1), 41 | torch.nn.Conv2d(F, 2 * F, kernel_size=3, stride=1, padding=0), 42 | torch.nn.BatchNorm2d(2 * F), 43 | torch.nn.ReLU(), 44 | torch.nn.ReflectionPad2d(1), 45 | torch.nn.Conv2d(2 * F, 2 * F, kernel_size=3, stride=1, padding=0), 46 | torch.nn.BatchNorm2d(2 * F), 47 | torch.nn.ReLU(), 48 | ) 49 | else: 50 | self.pre = torch.nn.Sequential( 51 | torch.nn.ReflectionPad2d(1), 52 | torch.nn.Conv2d(Cin, F, kernel_size=3, stride=1, padding=0), 53 | torch.nn.ReLU(), 54 | torch.nn.ReflectionPad2d(1), 55 | torch.nn.Conv2d(F, F, kernel_size=3, stride=1, padding=0), 56 | torch.nn.ReLU(), 57 | ) 58 | 59 | self.post = torch.nn.Sequential( 60 | torch.nn.ReflectionPad2d(1), 61 | torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0), 62 | torch.nn.ReLU(), 63 | torch.nn.ReflectionPad2d(1), 64 | torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0), 65 | torch.nn.ReLU(), 66 | ) 67 | 68 | self.process = torch.nn.Sequential( 69 | torch.nn.ReflectionPad2d(1), 70 | torch.nn.Conv2d(F, 2 * F, kernel_size=3, stride=1, padding=0), 71 | torch.nn.ReLU(), 72 | torch.nn.ReflectionPad2d(1), 73 | torch.nn.Conv2d(2 * F, 2 * F, kernel_size=3, stride=1, padding=0), 74 | torch.nn.ReLU(), 75 | ) 76 | 77 | self.uncertainty = torch.nn.Sequential( 78 | torch.nn.ReflectionPad2d(1), 79 | torch.nn.Conv2d(3 * F, F, kernel_size=3, stride=1, padding=0), 80 | torch.nn.ReLU(), 81 | torch.nn.ReflectionPad2d(1), 82 | torch.nn.Conv2d(F, Cout, kernel_size=3, stride=1, padding=0), 83 | torch.nn.ReLU(), 84 | ) 85 | 86 | self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 87 | 88 | def forward(self, data): 89 | features = self.pre(data) 90 | lower_scale = self.maxpool(features) 91 | lower_features = self.process(lower_scale) 92 | upsampled = torch.nn.functional.interpolate( 93 | lower_features, scale_factor=2, mode="bilinear", align_corners=False 94 | ) 95 | H = data.shape[2] 96 | W = data.shape[3] 97 | upsampled = upsampled[:, :, :H, :W] 98 | output = self.post(torch.cat((features, upsampled), dim=1)) 99 | 100 | uncertainty = self.uncertainty(torch.cat((features, upsampled), dim=1)) 101 | 102 | return torch.cat((output, uncertainty), dim=1) 103 | -------------------------------------------------------------------------------- /modules/voxelgrid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | class FeatureGrid(object): 6 | def __init__(self, voxel_size, n_features, bbox=None): 7 | 8 | self._resolution = voxel_size 9 | self._bbox = bbox 10 | self._n_features = n_features 11 | self._volume = None 12 | 13 | if bbox is not None: 14 | self._origin = bbox[:, 0] 15 | 16 | volume_shape = np.diff(self._bbox, axis=1).ravel() / self.resolution 17 | # float16 conversion critical - otherwise, numerical 18 | # instabilies will cause wrong voxel grid size 19 | volume_shape = volume_shape.astype(np.float16) 20 | self._shape = ( 21 | np.ceil([volume_shape[0], volume_shape[1], volume_shape[2], n_features]) 22 | .astype(np.int32) 23 | .tolist() 24 | ) # round up 25 | 26 | self._volume = np.zeros(self._shape, dtype=np.float16) 27 | 28 | @property 29 | def resolution(self): 30 | return self._resolution 31 | 32 | @property 33 | def bbox(self): 34 | assert self._bbox is not None 35 | return self._bbox 36 | 37 | @property 38 | def volume(self): 39 | assert self._volume is not None 40 | return self._volume 41 | 42 | @volume.setter 43 | def volume(self, volume): 44 | self._volume = volume 45 | 46 | @property 47 | def origin(self): 48 | assert self._origin is not None 49 | return self._origin 50 | 51 | @property 52 | def shape(self): 53 | assert self._volume is not None 54 | return self._volume.shape 55 | 56 | def __getattr__(self, x, y, z): 57 | return self._volume[x, y, z, :] 58 | 59 | 60 | class VoxelGrid(object): 61 | def __init__(self, voxel_size, volume=None, bbox=None, initial_value=0.0): 62 | 63 | self._resolution = voxel_size 64 | 65 | self._volume = volume 66 | self._bbox = bbox 67 | 68 | if bbox is not None: 69 | self._origin = bbox[:, 0] 70 | 71 | if volume is None and bbox is not None: 72 | volume_shape = np.diff(self._bbox, axis=1).ravel() / self.resolution 73 | # float16 conversion critical - otherwise, numerical 74 | # instabilies will cause wrong voxel grid size 75 | volume_shape = volume_shape.astype(np.float16) 76 | 77 | volume_shape = np.ceil(volume_shape).astype(np.int32).tolist() # round up 78 | # float 16 conversion is critical 79 | self._volume = initial_value * np.ones(volume_shape).astype("float16") 80 | 81 | def from_array(self, array, bbox): 82 | 83 | self._volume = array 84 | self._bbox = bbox 85 | self._origin = bbox[:, 0] 86 | 87 | @property 88 | def resolution(self): 89 | return self._resolution 90 | 91 | @property 92 | def bbox(self): 93 | assert self._bbox is not None 94 | return self._bbox 95 | 96 | @property 97 | def volume(self): 98 | assert self._volume is not None 99 | return self._volume 100 | 101 | @volume.setter 102 | def volume(self, volume): 103 | self._volume = volume 104 | 105 | @property 106 | def origin(self): 107 | assert self._origin is not None 108 | return self._origin 109 | 110 | @property 111 | def shape(self): 112 | assert self._volume is not None 113 | return self._volume.shape 114 | 115 | def __getattr__(self, x, y, z): 116 | return self._volume[x, y, z] 117 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.0.0 2 | argon2-cffi==21.3.0 3 | argon2-cffi-bindings==21.2.0 4 | attrs==21.4.0 5 | backcall==0.2.0 6 | bleach==4.1.0 7 | cachetools==4.2.4 8 | certifi==2021.10.8 9 | cffi==1.15.0 10 | charset-normalizer==2.0.9 11 | cycler==0.11.0 12 | debugpy==1.5.1 13 | decorator==5.1.0 14 | defusedxml==0.7.1 15 | easydict==1.9 16 | entrypoints==0.3 17 | evaluate-3d-reconstruction @ file:///cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/deps/evaluate_3d_reconstruction 18 | fonttools==4.28.5 19 | google-auth==1.35.0 20 | google-auth-oauthlib==0.4.6 21 | grpcio==1.43.0 22 | h5py==2.10.0 23 | idna==3.3 24 | imageio==2.13.5 25 | importlib-metadata==4.10.0 26 | importlib-resources==5.4.0 27 | ipykernel==6.6.0 28 | ipython==7.30.1 29 | ipython-genutils==0.2.0 30 | ipywidgets==7.6.5 31 | jedi==0.18.1 32 | Jinja2==3.0.3 33 | joblib==1.1.0 34 | jsonschema==4.3.2 35 | jupyter-client==7.1.0 36 | jupyter-core==4.9.1 37 | jupyterlab-pygments==0.1.2 38 | jupyterlab-widgets==1.0.2 39 | kiwisolver==1.3.2 40 | Markdown==3.3.6 41 | MarkupSafe==2.0.1 42 | matplotlib==3.5.1 43 | matplotlib-inline==0.1.3 44 | mistune==0.8.4 45 | nbclient==0.5.9 46 | nbconvert==6.3.0 47 | nbformat==5.1.3 48 | nest-asyncio==1.5.4 49 | networkx==2.6.3 50 | notebook==6.4.6 51 | numpy==1.21.5 52 | oauthlib==3.1.1 53 | open3d @ file:///cluster/work/cvl/esandstroem/programs/Open3D/build/lib/python_package/pip_package/open3d-0.9.0.0-cp38-cp38-linux_x86_64.whl 54 | openTSNE==0.6.0 55 | packaging==21.3 56 | pandocfilters==1.5.0 57 | parso==0.8.3 58 | pexpect==4.8.0 59 | pickleshare==0.7.5 60 | Pillow==8.4.0 61 | prometheus-client==0.12.0 62 | prompt-toolkit==3.0.24 63 | protobuf==3.19.1 64 | ptyprocess==0.7.0 65 | pyasn1==0.4.8 66 | pyasn1-modules==0.2.8 67 | pycparser==2.21 68 | Pygments==2.10.0 69 | pyparsing==3.0.6 70 | pyquaternion==0.9.9 71 | pyrsistent==0.18.0 72 | python-dateutil==2.8.2 73 | PyWavelets==1.2.0 74 | PyYAML==5.3 75 | pyzmq==22.3.0 76 | requests==2.26.0 77 | requests-oauthlib==1.3.0 78 | rsa==4.8 79 | scikit-image==0.17.2 80 | scikit-learn==1.0.2 81 | scipy==1.7.3 82 | Send2Trash==1.8.0 83 | six==1.16.0 84 | tensorboard==2.2.1 85 | tensorboard-plugin-wit==1.8.0 86 | terminado==0.12.1 87 | testpath==0.5.0 88 | threadpoolctl==3.0.0 89 | tifffile==2021.11.2 90 | torch==1.7.1 91 | tornado==6.1 92 | tqdm==4.43.0 93 | traitlets==5.1.1 94 | trimesh==3.7.6 95 | typing-extensions==4.0.1 96 | urllib3==1.26.7 97 | wandb==0.12.9 98 | wcwidth==0.2.5 99 | webencodings==0.5.1 100 | Werkzeug==2.0.2 101 | widgetsnbextension==3.5.2 102 | zipp==3.6.0 103 | -------------------------------------------------------------------------------- /test_routing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | from skimage import io 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | from utils.loading import load_config 9 | from utils.setup import * 10 | from modules.routing import ConfidenceRouting 11 | 12 | 13 | def arg_parser(): 14 | 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument("--config", required=False) 18 | 19 | args = parser.parse_args() 20 | 21 | return vars(args) 22 | 23 | 24 | def prepare_input_data(batch, config, device): 25 | 26 | for k, sensor_ in enumerate(config.DATA.input): 27 | if k == 0: 28 | inputs = batch[sensor_ + "_depth"].unsqueeze_(1) 29 | else: 30 | inputs = torch.cat((batch[sensor_ + "_depth"].unsqueeze_(1), inputs), 1) 31 | inputs = inputs.to(device) 32 | 33 | if config.ROUTING.intensity_grad: 34 | intensity = batch["intensity"].unsqueeze_(1) 35 | grad = batch["gradient"].unsqueeze_(1) 36 | inputs = torch.cat((intensity, grad, inputs), 1) 37 | inputs = inputs.to(device) 38 | 39 | target = batch[config.DATA.target] # (batch size, height, width) 40 | target = target.to(device) 41 | target = target.unsqueeze_(1) # (batch size, channels, height, width) 42 | return inputs, target 43 | 44 | 45 | def test(config): 46 | 47 | if config.SETTINGS.gpu: 48 | device = torch.device("cuda:0") 49 | else: 50 | device = torch.device("cpu") 51 | 52 | # get test dataset 53 | test_data_config = get_data_config(config, mode="test") 54 | test_dataset = get_data(config.DATA.dataset, test_data_config) 55 | test_loader = torch.utils.data.DataLoader( 56 | test_dataset, config.TESTING.test_batch_size, config.TESTING.test_shuffle 57 | ) 58 | 59 | # define model 60 | Cin = len(config.DATA.input) 61 | 62 | if config.ROUTING.intensity_grad: 63 | Cin += 2 64 | 65 | model = ConfidenceRouting( 66 | Cin=Cin, F=config.MODEL.contraction, batchnorms=config.MODEL.normalization 67 | ) 68 | # load model 69 | checkpoint = torch.load(config.TESTING.model_path) 70 | 71 | model.load_state_dict(checkpoint["pipeline_state_dict"]) 72 | 73 | model = model.to(device) 74 | 75 | n_test_batches = int(len(test_dataset) / config.TESTING.test_batch_size) 76 | 77 | for i, batch in enumerate(tqdm(test_loader, total=n_test_batches)): 78 | inputs, target = prepare_input_data(batch, config, device) 79 | 80 | output = model.forward(inputs) 81 | 82 | est = output[:, 0, :, :].unsqueeze_(1) 83 | unc = output[:, 1, :, :].unsqueeze_(1) 84 | 85 | est = est.detach().cpu().numpy() 86 | est = est.squeeze() 87 | estplot = est 88 | est = est * 1000 89 | est = est.astype("uint16") 90 | 91 | unc = unc.detach().cpu().numpy() 92 | unc = ( 93 | unc.squeeze() 94 | ) # there is a relu activation function as the last step of the confidence decoder s.t. we always get non-negative numbers 95 | confidence = np.exp(-1.0 * unc) 96 | confidence *= 10000 97 | confidence = confidence.astype("uint16") 98 | 99 | output_dir_refined = ( 100 | config.DATA.root_dir 101 | + "/" 102 | + batch["frame_id"][0].split("/")[0] 103 | + "/" 104 | + batch["frame_id"][0].split("/")[1] 105 | + "/left_routing_refined_" 106 | + config.TESTING.model_path.split("/")[-3] 107 | ) 108 | output_dir_confidence = ( 109 | config.DATA.root_dir 110 | + "/" 111 | + batch["frame_id"][0].split("/")[0] 112 | + "/" 113 | + batch["frame_id"][0].split("/")[1] 114 | + "/left_routing_confidence_" 115 | + config.TESTING.model_path.split("/")[-3] 116 | ) 117 | 118 | if not os.path.exists(output_dir_refined): 119 | os.makedirs(output_dir_refined) 120 | 121 | if not os.path.exists(output_dir_confidence): 122 | os.makedirs(output_dir_confidence) 123 | 124 | io.imsave( 125 | output_dir_refined + "/" + batch["frame_id"][0].split("/")[-1] + ".png", est 126 | ) 127 | io.imsave( 128 | output_dir_confidence + "/" + batch["frame_id"][0].split("/")[-1] + ".png", 129 | confidence, 130 | ) 131 | 132 | 133 | if __name__ == "__main__": 134 | 135 | # get arguments 136 | args = arg_parser() 137 | 138 | # get configs 139 | # load config 140 | if args["config"]: 141 | config = load_config(args["config"]) 142 | else: 143 | raise ValueError("Missing configuration: Please specify config.") 144 | 145 | # train 146 | test(config) 147 | -------------------------------------------------------------------------------- /train_routing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import datetime 4 | import random 5 | 6 | import numpy as np 7 | 8 | from tqdm import tqdm 9 | 10 | from utils.loading import load_config_from_yaml 11 | from utils.setup import * 12 | 13 | from utils.loss import RoutingLoss 14 | from modules.routing import ConfidenceRouting 15 | import wandb 16 | 17 | 18 | def arg_parser(): 19 | 20 | parser = argparse.ArgumentParser() 21 | 22 | parser.add_argument("--config", required=True) 23 | 24 | args = parser.parse_args() 25 | 26 | return vars(args) 27 | 28 | 29 | def prepare_input_data(batch, config, device): 30 | 31 | for k, sensor_ in enumerate(config.DATA.input): 32 | if k == 0: 33 | inputs = batch[sensor_ + "_depth"].unsqueeze_(1) 34 | else: 35 | inputs = torch.cat((batch[sensor_ + "_depth"].unsqueeze_(1), inputs), 1) 36 | inputs = inputs.to(device) 37 | 38 | if config.ROUTING.intensity_grad: 39 | intensity = batch["intensity"].unsqueeze_(1) 40 | grad = batch["gradient"].unsqueeze_(1) 41 | inputs = torch.cat((intensity, grad, inputs), 1) 42 | inputs = inputs.to(device) 43 | 44 | target = batch[config.DATA.target] # (batch size, height, width) 45 | target = target.to(device) 46 | target = target.unsqueeze_(1) # (batch size, channels, height, width) 47 | return inputs, target 48 | 49 | 50 | def train(args, config): 51 | # set seed for reproducibility 52 | if config.SETTINGS.seed: 53 | random.seed(config.SETTINGS.seed) 54 | np.random.seed(config.SETTINGS.seed) 55 | torch.manual_seed(config.SETTINGS.seed) 56 | torch.cuda.manual_seed_all(config.SETTINGS.seed) 57 | torch.backends.cudnn.deterministic = True 58 | torch.cuda.manual_seed_all(config.SETTINGS.seed) 59 | torch.backends.cudnn.benchmark = False 60 | 61 | if config.SETTINGS.gpu: 62 | device = torch.device("cuda:0") 63 | else: 64 | device = torch.device("cpu") 65 | 66 | config.TIMESTAMP = datetime.datetime.now().strftime("%y%m%d-%H%M%S") 67 | print("model time stamp: ", config.TIMESTAMP) 68 | 69 | # initialize weights and biases logging 70 | wandb.init( 71 | config=config, 72 | entity="esandstroem", 73 | project="senfunet-routing", 74 | name=config.TIMESTAMP, 75 | notes="put comment here", 76 | ) 77 | # change run name of wandb 78 | wandb.run.name = config.TIMESTAMP 79 | wandb.run.save() 80 | 81 | workspace = get_workspace(config) 82 | workspace.save_config(config) 83 | 84 | # get train dataset 85 | train_data_config = get_data_config(config, mode="train") 86 | train_dataset = get_data(config.DATA.dataset, train_data_config) 87 | train_loader = torch.utils.data.DataLoader( 88 | train_dataset, config.TRAINING.train_batch_size, config.TRAINING.train_shuffle 89 | ) 90 | 91 | # get val dataset 92 | val_data_config = get_data_config(config, mode="val") 93 | val_dataset = get_data(config.DATA.dataset, val_data_config) 94 | 95 | val_loader = torch.utils.data.DataLoader( 96 | val_dataset, config.TRAINING.val_batch_size, config.TRAINING.val_shuffle 97 | ) 98 | 99 | # define model 100 | Cin = len(config.DATA.input) 101 | 102 | if config.ROUTING.intensity_grad: 103 | Cin += 2 104 | 105 | model = ConfidenceRouting( 106 | Cin=Cin, F=config.MODEL.contraction, batchnorms=config.MODEL.normalization 107 | ) 108 | model = model.to(device) 109 | 110 | # define loss function 111 | criterion = RoutingLoss(config) 112 | criterion = criterion.to(device) 113 | 114 | # add weight and gradient tracking in wandb 115 | wandb.watch(model, criterion, log="all", log_freq=1000) 116 | 117 | # define optimizer 118 | optimizer = torch.optim.RMSprop( 119 | model.parameters(), 120 | config.OPTIMIZATION.lr, 121 | config.OPTIMIZATION.rho, 122 | config.OPTIMIZATION.eps, 123 | momentum=config.OPTIMIZATION.momentum, 124 | weight_decay=config.OPTIMIZATION.weight_decay, 125 | ) 126 | 127 | n_train_batches = int(len(train_dataset) / config.TRAINING.train_batch_size) 128 | n_val_batches = int(len(val_dataset) / config.TRAINING.val_batch_size) 129 | 130 | val_loss_best = np.infty 131 | 132 | # sample validation visualization frames 133 | val_vis_ids = np.random.choice(np.arange(0, n_val_batches), 5, replace=False) 134 | 135 | # # define metrics 136 | l1_criterion = torch.nn.L1Loss() 137 | l2_criterion = torch.nn.MSELoss() 138 | 139 | for epoch in range(0, config.TRAINING.n_epochs): 140 | print("epoch: ", epoch) 141 | 142 | val_loss_t = 0.0 143 | val_loss_l1 = 0.0 144 | val_loss_l2 = 0.0 145 | 146 | train_loss_t = 0.0 147 | train_loss_l1 = 0.0 148 | train_loss_l2 = 0.0 149 | 150 | train_epoch_loss_t = 0.0 151 | train_epoch_loss_l1 = 0.0 152 | train_epoch_loss_l2 = 0.0 153 | 154 | # make ready for training and clear optimizer 155 | model.train() 156 | optimizer.zero_grad() 157 | 158 | for i, batch in enumerate(tqdm(train_loader, total=n_train_batches)): 159 | inputs, target = prepare_input_data(batch, config, device) 160 | 161 | output = model(inputs) 162 | 163 | est = output[:, 0, :, :].unsqueeze_(1) 164 | unc = output[:, 1, :, :].unsqueeze_(1) 165 | 166 | if not config.LOSS.completion: 167 | if len(config.DATA.input) == 1: 168 | mask = ( 169 | batch[config.DATA.input[0] + "_mask"].to(device).unsqueeze_(1) 170 | ) 171 | else: 172 | mask = batch["mask"].to(device).unsqueeze_(1) 173 | target = torch.where(mask == 0.0, torch.zeros_like(target), target) 174 | 175 | # compute training loss 176 | loss = criterion.forward(est, unc, target) 177 | loss.backward() 178 | 179 | # compute metrics for analysis 180 | loss_l1 = l1_criterion.forward(est, target) 181 | loss_l2 = l2_criterion.forward(est, target) 182 | 183 | train_loss_t += loss.item() 184 | train_loss_l1 += loss_l1.item() 185 | train_loss_l2 += loss_l2.item() 186 | 187 | train_epoch_loss_t += loss.item() 188 | train_epoch_loss_l1 += loss_l1.item() 189 | train_epoch_loss_l2 += loss_l2.item() 190 | 191 | if i % config.OPTIMIZATION.accumulation_steps == 0: 192 | optimizer.step() 193 | optimizer.zero_grad() 194 | 195 | if i % config.SETTINGS.log_freq == 0 and i > 0: 196 | # compute avg. loss per frame 197 | train_loss_t /= ( 198 | config.SETTINGS.log_freq * config.TRAINING.train_batch_size 199 | ) 200 | train_loss_l1 /= ( 201 | config.SETTINGS.log_freq * config.TRAINING.train_batch_size 202 | ) 203 | train_loss_l2 /= ( 204 | config.SETTINGS.log_freq * config.TRAINING.train_batch_size 205 | ) 206 | 207 | wandb.log( 208 | { 209 | "Train/total loss": train_loss_t, 210 | "Train/l1 loss": train_loss_l1, 211 | "Train/l2 loss": train_loss_l2, 212 | "Train/nbr_frames": (epoch * n_train_batches + i) 213 | * config.TRAINING.train_batch_size, 214 | } 215 | ) 216 | train_loss_t = 0 217 | train_loss_l1 = 0 218 | train_loss_l2 = 0 219 | 220 | train_epoch_loss_t /= n_train_batches * config.TRAINING.train_batch_size 221 | train_epoch_loss_l1 /= n_train_batches * config.TRAINING.train_batch_size 222 | train_epoch_loss_l2 /= n_train_batches * config.TRAINING.train_batch_size 223 | 224 | # log training metrics 225 | workspace.log("Epoch {} Loss {}".format(epoch, train_epoch_loss_t)) 226 | workspace.log("Epoch {} L1 Loss {}".format(epoch, train_epoch_loss_l1)) 227 | workspace.log("Epoch {} L2 Loss {}".format(epoch, train_epoch_loss_l2)) 228 | 229 | model.eval() 230 | 231 | for i, batch in enumerate(tqdm(val_loader, total=n_val_batches)): 232 | inputs, target = prepare_input_data(batch, config, device) 233 | 234 | output = model(inputs) 235 | 236 | est = output[:, 0, :, :].unsqueeze_(1) 237 | unc = output[:, 1, :, :].unsqueeze_(1) 238 | # visualize frames 239 | if i in val_vis_ids: 240 | # parse frames and normalize to range 0-1 241 | frame_est = est[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1) 242 | frame_est /= np.amax(frame_est) 243 | frame_gt = ( 244 | target[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1) 245 | ) 246 | frame_gt /= np.amax(frame_gt) 247 | frame_unc = unc[0, :, :, :].cpu().detach().numpy().reshape(512, 512, 1) 248 | frame_conf = np.exp(-1.0 * frame_unc) 249 | frame_unc /= np.amax(frame_unc) 250 | frame_l1 = np.abs(frame_est - frame_gt).reshape(512, 512, 1) 251 | frame_l1 /= np.amax(frame_l1) 252 | 253 | wandb.log( 254 | { 255 | "Val/images": [ 256 | wandb.Image( 257 | frame_est, 258 | caption="depth estimate {}".format(i), 259 | ), 260 | wandb.Image(frame_gt, caption="gt depth {}".format(i)), 261 | wandb.Image( 262 | frame_unc, 263 | caption="uncertainty estimate {}".format(i), 264 | ), 265 | wandb.Image( 266 | frame_conf, 267 | caption="confidence estimate {}".format(i), 268 | ), 269 | wandb.Image( 270 | frame_l1, 271 | caption="l1 depth error {}".format(i), 272 | ), 273 | ] 274 | } 275 | ) 276 | 277 | if not config.LOSS.completion: 278 | if len(config.DATA.input) == 1: 279 | mask = ( 280 | batch[config.DATA.input[0] + "_mask"].to(device).unsqueeze_(1) 281 | ) 282 | else: 283 | mask = batch["mask"].to(device).unsqueeze_(1) 284 | target = torch.where(mask == 0.0, torch.zeros_like(target), target) 285 | 286 | loss_t = criterion.forward(est, unc, target) 287 | loss_l1 = l1_criterion.forward(est, target) 288 | loss_l2 = l2_criterion.forward(est, target) 289 | 290 | val_loss_t += loss_t.item() 291 | val_loss_l1 += loss_l1.item() 292 | val_loss_l2 += loss_l2.item() 293 | 294 | val_loss_t /= n_val_batches * config.TRAINING.train_batch_size 295 | val_loss_l1 /= n_val_batches * config.TRAINING.train_batch_size 296 | val_loss_l2 /= n_val_batches * config.TRAINING.train_batch_size 297 | 298 | # log validation metrics 299 | workspace.log( 300 | "Epoch {} Validation Loss {}".format(epoch, val_loss_t), mode="val" 301 | ) 302 | workspace.log( 303 | "Epoch {} Validation L1 Loss {}".format(epoch, val_loss_l1), mode="val" 304 | ) 305 | workspace.log( 306 | "Epoch {} Validation L2 Loss {}".format(epoch, val_loss_l2), mode="val" 307 | ) 308 | 309 | wandb.log( 310 | { 311 | "Val/total loss": val_loss_t, 312 | "Val/l1 loss": val_loss_l1, 313 | "Val/l2 loss": val_loss_l2, 314 | "Val/epoch": epoch, 315 | } 316 | ) 317 | 318 | # define model state for storing 319 | model_state = { 320 | "epoch": epoch, 321 | "pipeline_state_dict": model.state_dict(), 322 | "optimizer_state_dict": optimizer.state_dict(), 323 | } 324 | 325 | if val_loss_t <= val_loss_best: 326 | val_loss_best = val_loss_t 327 | workspace.log( 328 | "Found new best model with loss {} at epoch {}".format( 329 | val_loss_best, epoch 330 | ), 331 | mode="val", 332 | ) 333 | workspace.save_model_state(model_state, is_best=True) 334 | else: 335 | workspace.save_model_state(model_state, is_best=False) 336 | 337 | 338 | if __name__ == "__main__": 339 | 340 | # get arguments 341 | args = arg_parser() 342 | 343 | # get configs 344 | config = load_config_from_yaml(args["config"]) 345 | 346 | # train 347 | train(args, config) 348 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eriksandstroem/SenFuNet/43c1682e29c700df4577d9dcf0ac3b8ebdd8f496/utils/__init__.py -------------------------------------------------------------------------------- /utils/loading.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import json 3 | import os 4 | import torch 5 | 6 | from easydict import EasyDict 7 | 8 | 9 | def load_config_from_yaml(path): 10 | """ 11 | Method to load the config file for 12 | neural network training 13 | :param path: yaml-filepath with configs stored 14 | :return: easydict containing config 15 | """ 16 | c = yaml.safe_load(open(path)) 17 | config = EasyDict(c) 18 | 19 | return config 20 | 21 | 22 | def load_config_from_json(path): 23 | """ 24 | Method to load the config file 25 | from json files. 26 | :param path: path to json file 27 | :return: easydict containing config 28 | """ 29 | with open(path, "r") as file: 30 | data = json.load(file) 31 | config = EasyDict(data) 32 | return config 33 | 34 | 35 | def load_config(path): 36 | """ 37 | Wrapper method around different methods 38 | loading config file based on file ending. 39 | """ 40 | 41 | if path[-4:] == "yaml": 42 | return load_config_from_yaml(path) 43 | elif path[-4:] == "json": 44 | return load_config_from_json(path) 45 | else: 46 | raise ValueError("Unsupported file format for config") 47 | 48 | 49 | def load_pipeline( 50 | file, model 51 | ): # loads all paramters that can be loaded in the checkpoint! 52 | 53 | checkpoint = file 54 | 55 | if not os.path.exists(checkpoint): 56 | raise FileNotFoundError("File doesn't exist {}".format(checkpoint)) 57 | try: 58 | if torch.cuda.is_available(): 59 | checkpoint = torch.load(checkpoint) 60 | else: 61 | checkpoint = torch.load(checkpoint, map_location=torch.device("cpu")) 62 | 63 | model.load_state_dict(checkpoint["pipeline_state_dict"]) 64 | print("loading full model") 65 | except: 66 | print("loading model partly") 67 | 68 | print( 69 | "nbr of entries in checkpoint model: ", 70 | len(checkpoint["pipeline_state_dict"].keys()), 71 | ) 72 | pretrained_dict = { 73 | k: v 74 | for k, v in checkpoint["pipeline_state_dict"].items() 75 | if k in model.state_dict() 76 | } 77 | print("nbr of entries found in created model: ", len(model.state_dict().keys())) 78 | print( 79 | "nbr of entries found in created model and checkpoint model: ", 80 | len(pretrained_dict.keys()), 81 | ) 82 | print("Keys in created model but not in checkpoint:") 83 | for key in model.state_dict().keys(): 84 | if key not in checkpoint["pipeline_state_dict"].keys(): 85 | print(key) 86 | print("...") 87 | print("Keys in checkpoint but not in created model") 88 | for key in checkpoint["pipeline_state_dict"].keys(): 89 | if key not in model.state_dict().keys(): 90 | print(key) 91 | 92 | model.load_state_dict(pretrained_dict, False) 93 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def evaluation(est, target, mask=None): 6 | 7 | mse = mse_fn(est, target, mask) 8 | mad = mad_fn(est, target, mask) 9 | iou = iou_fn(est, target, mask) 10 | acc = acc_fn(est, target, mask) 11 | 12 | return {"mse": mse, "mad": mad, "iou": iou, "acc": acc} 13 | 14 | 15 | def mse_fn(est, target, mask=None): 16 | 17 | if mask is not None: 18 | grid = mask * np.power(est - target, 2) 19 | grid = grid.astype( 20 | np.float32 21 | ) # required to not get inf values since we use float16 here as input grids 22 | metric = np.sum(grid) / np.sum(mask) 23 | else: 24 | metric = np.mean(np.power(est - target, 2)) 25 | 26 | return metric 27 | 28 | 29 | def mad_fn(est, target, mask=None): 30 | 31 | if mask is not None: 32 | grid = mask * np.abs(est - target) 33 | grid = grid.astype( 34 | np.float32 35 | ) # required to not get inf values since we use float16 here as input grids 36 | metric = np.sum(grid) / np.sum(mask) 37 | else: 38 | metric = np.mean(np.abs(est - target)) 39 | 40 | return metric 41 | 42 | 43 | def iou_fn(est, target, mask=None): 44 | 45 | est = est.astype( 46 | np.float32 47 | ) # required to not get inf values since we use float16 here as input grids 48 | target = target.astype(np.float32) 49 | if mask is not None: 50 | tp = (est < 0) & (target < 0) & (mask > 0) 51 | fp = (est < 0) & (target >= 0) & (mask > 0) 52 | fn = (est >= 0) & (target < 0) & (mask > 0) 53 | else: 54 | tp = (est < 0) & (target < 0) 55 | fp = (est < 0) & (target >= 0) 56 | fn = (est >= 0) & (target < 0) 57 | 58 | intersection = tp.sum() 59 | union = tp.sum() + fp.sum() + fn.sum() 60 | 61 | del tp, fp, fn 62 | metric = intersection / union 63 | return metric 64 | 65 | 66 | def acc_fn(est, target, mask=None): 67 | 68 | est = est.astype( 69 | np.float32 70 | ) # required to not get inf values since we use float16 here as input grids 71 | target = target.astype(np.float32) 72 | if mask is not None: 73 | tp = (est < 0) & (target < 0) & (mask > 0) 74 | tn = (est >= 0) & (target >= 0) & (mask > 0) 75 | else: 76 | tp = (est < 0) & (target < 0) 77 | tn = (est >= 0) & (target >= 0) 78 | 79 | acc = (tp.sum() + tn.sum()) / mask.sum() 80 | 81 | del tp, tn 82 | metric = acc 83 | return metric 84 | -------------------------------------------------------------------------------- /utils/saving.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | import torch 5 | 6 | 7 | def save_config_to_json(path, config): 8 | """Saves config to json file""" 9 | with open(os.path.join(path, "config.json"), "w") as file: 10 | json.dump(config, file) 11 | 12 | 13 | def save_checkpoint(state, is_best, checkpoint, is_best_filt=None): 14 | """Saves model and training parameters 15 | at checkpoint + 'last.pth.tar'. 16 | If is_best==True, also saves 17 | checkpoint + 'best.pth.tar' 18 | Args: 19 | state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict 20 | is_best: (dict) Dict of bools for each sensor. True at one sensor if it is the best model seen untill now 21 | checkpoint: (string) folder where parameters are to be saved 22 | is_best_filt: (bool) True if it is the best filtered model seen until now 23 | """ 24 | if not os.path.exists(checkpoint): 25 | print( 26 | "Checkpoint Directory does not exist! Making directory {}".format( 27 | checkpoint 28 | ) 29 | ) 30 | os.mkdir(checkpoint) 31 | 32 | filepath = os.path.join(checkpoint, "last.pth.tar") 33 | torch.save(state, filepath) 34 | if is_best_filt: 35 | shutil.copyfile(filepath, os.path.join(checkpoint, "best.pth.tar")) 36 | 37 | if isinstance(is_best, dict): 38 | for sensor in is_best.keys(): 39 | if is_best[sensor]: 40 | shutil.copyfile( 41 | filepath, os.path.join(checkpoint, "best_" + sensor + ".pth.tar") 42 | ) 43 | else: 44 | if is_best: 45 | shutil.copyfile( 46 | filepath, os.path.join(checkpoint, "best.pth.tar") 47 | ) # train routing network with multiple sensor inputs 48 | -------------------------------------------------------------------------------- /utils/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from dataset import Replica 5 | from dataset import CoRBS 6 | from dataset import Scene3D 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import matplotlib 11 | 12 | matplotlib.use("Agg") 13 | 14 | import trimesh 15 | import skimage.measure 16 | 17 | from modules.database import Database 18 | 19 | from utils import transform 20 | 21 | from copy import copy 22 | 23 | from utils.saving import * 24 | 25 | 26 | def get_data_config(config, mode): 27 | data_config = copy(config.DATA) 28 | try: 29 | data_config.filtering_model = config.FILTERING_MODEL.model 30 | except AttributeError: 31 | data_config.filtering_model = len( 32 | config.DATA.input 33 | ) # used when training routing network 34 | 35 | if mode == "train": 36 | data_config.mode = "train" 37 | data_config.scene_list = data_config.train_scene_list 38 | elif mode == "val": 39 | data_config.mode = "val" 40 | data_config.scene_list = data_config.val_scene_list 41 | elif mode == "test": 42 | data_config.mode = "test" 43 | data_config.scene_list = data_config.test_scene_list 44 | 45 | data_config.transform = transform.ToTensor() 46 | 47 | return data_config 48 | 49 | 50 | def get_data(dataset, config): 51 | try: 52 | return eval(dataset)(config.DATA) 53 | except AttributeError: 54 | return eval(dataset)(config) 55 | 56 | 57 | def get_database(dataset, config, mode="train"): 58 | 59 | # TODO: make this better 60 | database_config = copy(config.DATA) 61 | database_config.transform = transform.ToTensor() 62 | database_config.n_features = config.FEATURE_MODEL.n_features 63 | 64 | database_config.test_mode = mode == "val" or mode == "test" 65 | database_config.alpha_supervision = config.LOSS.alpha_supervision 66 | database_config.outlier_channel = ( 67 | config.FILTERING_MODEL.CONV3D_MODEL.outlier_channel 68 | ) 69 | database_config.scene_list = eval("config.DATA.{}_scene_list".format(mode)) 70 | 71 | return Database(dataset, database_config) 72 | 73 | 74 | def get_workspace(config): 75 | workspace_path = os.path.join(config.SETTINGS.experiment_path, config.TIMESTAMP) 76 | workspace = Workspace(workspace_path) 77 | workspace.save_config(config) 78 | return workspace 79 | 80 | 81 | def get_logger(path, name="training"): 82 | 83 | filehandler = logging.FileHandler(os.path.join(path, "{}.logs".format(name)), "a") 84 | consolehandler = logging.StreamHandler() 85 | 86 | formatter = logging.Formatter( 87 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 88 | ) 89 | 90 | filehandler.setFormatter(formatter) 91 | consolehandler.setFormatter(formatter) 92 | 93 | logger = logging.getLogger(name) 94 | 95 | for hdlr in logger.handlers[:]: # remove all old handlers 96 | logger.removeHandler(hdlr) 97 | 98 | logger.addHandler(filehandler) # set the new handler 99 | logger.addHandler(consolehandler) 100 | 101 | logger.setLevel(logging.DEBUG) 102 | 103 | return logger 104 | 105 | 106 | class Workspace(object): 107 | def __init__(self, path): 108 | 109 | self.workspace_path = path 110 | self.model_path = os.path.join(path, "model") 111 | self.log_path = os.path.join(path, "logs") 112 | self.output_path = os.path.join(path, "output") 113 | 114 | os.makedirs(self.workspace_path) 115 | os.makedirs(self.model_path) 116 | os.makedirs(self.log_path) 117 | os.makedirs(self.output_path) 118 | 119 | self._init_logger() 120 | 121 | def _init_logger(self): 122 | self.train_logger = get_logger(self.log_path, "training") 123 | self.val_logger = get_logger(self.log_path, "validation") 124 | 125 | def save_config(self, config): 126 | print("Saving config to ", self.workspace_path) 127 | save_config_to_json(self.workspace_path, config) 128 | 129 | def save_model_state(self, state, is_best, is_best_filt=None): 130 | save_checkpoint(state, is_best, self.model_path, is_best_filt) 131 | 132 | def save_alpha_histogram(self, database, sensors, epoch): 133 | 134 | for scene in database.scenes_gt.keys(): 135 | mask = np.zeros_like(database.sensor_weighting[scene], dtype=bool) 136 | for sensor_ in sensors: 137 | mask = np.logical_or( 138 | mask, (database.fusion_weights[sensor_][scene] > 0) 139 | ) 140 | 141 | hist = database.sensor_weighting[scene][mask].flatten().astype(np.float32) 142 | plt.hist(hist, bins=100) 143 | plt.savefig( 144 | self.output_path 145 | + "/sensor_weighting_grid_histogram_" 146 | + scene 147 | + "_epoch_" 148 | + str(epoch) 149 | + ".png" 150 | ) 151 | plt.clf() 152 | 153 | def log(self, message, mode="train"): 154 | if mode == "train": 155 | self.train_logger.info(message) 156 | elif mode == "val": 157 | self.val_logger.info(message) 158 | -------------------------------------------------------------------------------- /utils/transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class ToTensor(object): 6 | 7 | """Convert ndarrays in sample to Tensors.""" 8 | 9 | def __call__(self, sample): 10 | 11 | result = {} 12 | 13 | for key in sample.keys(): 14 | if type(sample[key]) is np.ndarray: 15 | 16 | if key == "image": 17 | # swap color axis because 18 | # numpy image: H x W x C 19 | # torch image: C X H X W 20 | image = sample[key].transpose((2, 0, 1)) 21 | image = torch.from_numpy(image) 22 | result[key] = image 23 | continue 24 | 25 | result[key] = torch.from_numpy(sample[key]) 26 | 27 | else: 28 | result[key] = sample[key] 29 | 30 | return result 31 | -------------------------------------------------------------------------------- /utils/visualize_sensor_weighting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import matplotlib.pyplot as plt 4 | import matplotlib 5 | import trimesh 6 | import skimage.measure 7 | 8 | matplotlib.use("Agg") 9 | 10 | 11 | def visualize_sensor_weighting( 12 | tsdf, 13 | sensor_weighting, 14 | test_dir, 15 | mask, 16 | truncation, 17 | length, 18 | max_resolution, 19 | resolution, 20 | voxel_size, 21 | outlier_channel, 22 | mc, 23 | ): 24 | cmap = plt.get_cmap("inferno") 25 | 26 | if outlier_channel: 27 | sensor_weighting = sensor_weighting[0, :, :, :] 28 | 29 | hist = sensor_weighting[mask].flatten() 30 | plt.clf() # clear plot (important) 31 | cm = plt.get_cmap("inferno") 32 | n, bins, patches = plt.hist(hist, bins=100) 33 | for c, p in zip(bins, patches): 34 | plt.setp(p, "facecolor", cm(c)) 35 | plt.savefig(test_dir + "/sensor_weighting_grid_histogram_no_outlier_filter.png") 36 | plt.clf() 37 | 38 | if mc == "Open3D": 39 | # Create the mesh using the given mask 40 | tsdf_cube = np.zeros((max_resolution, max_resolution, max_resolution)) 41 | tsdf_cube[: resolution[0], : resolution[1], : resolution[2]] = tsdf 42 | 43 | indices_x = mask.nonzero()[0] 44 | indices_y = mask.nonzero()[1] 45 | indices_z = mask.nonzero()[2] 46 | 47 | volume = o3d.integration.UniformTSDFVolume( 48 | length=length, 49 | resolution=max_resolution, 50 | sdf_trunc=truncation, 51 | color_type=o3d.integration.TSDFVolumeColorType.RGB8, 52 | ) 53 | 54 | for i in range(indices_x.shape[0]): 55 | volume.set_tsdf_at( 56 | tsdf_cube[indices_x[i], indices_y[i], indices_z[i]], 57 | indices_x[i], 58 | indices_y[i], 59 | indices_z[i], 60 | ) 61 | volume.set_weight_at(1, indices_x[i], indices_y[i], indices_z[i]) 62 | 63 | print("Extract a triangle mesh from the volume and visualize it.") 64 | mesh = volume.extract_triangle_mesh() 65 | 66 | del volume 67 | mesh.compute_vertex_normals() 68 | 69 | # read vertices from mesh 70 | vertices = mesh.vertices 71 | 72 | # we need to subtract half a voxel size from the vertices to get to the voxel points 73 | # since the marching cubes algorithm of open3d thinks that the tsdf voxel vertices are 74 | # always located at the mid point between the metric space resolution i.e. if we have a tsdf 75 | # grid of shape 2,2,2, a voxel size of 1 and -0.5 at the first voxel and 0.5 at the next, the marching cubes algorithm will generate a surface at 1.5 and not at 1.0. 76 | voxel_points = np.round( 77 | np.asarray(vertices - voxel_size / 2) * 1 / voxel_size 78 | ).astype(int) 79 | elif mc == "skimage": 80 | # Skimage marching cubes 81 | # --------------------------------------------- 82 | (verts, faces, normals, values,) = skimage.measure.marching_cubes_lewiner( 83 | tsdf, 84 | level=0, 85 | spacing=(voxel_size, voxel_size, voxel_size), 86 | mask=preprocess_weight_grid(mask), 87 | ) 88 | 89 | voxel_points = np.round(np.asarray(verts) * 1 / voxel_size).astype(int) 90 | 91 | # add 0.5 * voxel_size to vertices to match Open3D marching cubes output 92 | mesh = o3d.geometry.TriangleMesh( 93 | vertices=o3d.utility.Vector3dVector(verts + voxel_size / 2), 94 | triangles=o3d.utility.Vector3iVector(faces), 95 | ) 96 | mesh.compute_vertex_normals() 97 | 98 | # remove voxels if they are outside of the voxelgrid - these are treated as uninitialized. 99 | valid_points = ( 100 | (voxel_points[:, 0] >= 0) 101 | * (voxel_points[:, 0] < sensor_weighting.shape[0]) 102 | * (voxel_points[:, 1] >= 0) 103 | * (voxel_points[:, 1] < sensor_weighting.shape[1]) 104 | * (voxel_points[:, 2] >= 0) 105 | * (voxel_points[:, 2] < sensor_weighting.shape[2]) 106 | ) 107 | filtered_voxel_points = voxel_points[valid_points, :] 108 | 109 | vals = -np.ones(voxel_points.shape[0]) 110 | vals[valid_points] = sensor_weighting[ 111 | filtered_voxel_points[:, 0], 112 | filtered_voxel_points[:, 1], 113 | filtered_voxel_points[:, 2], 114 | ] 115 | colors = cmap((vals * 255).astype(int))[:, :-1] 116 | 117 | if (vals == -1).sum() > 0: 118 | print("Invalid index or indices found among voxel points!") 119 | 120 | colors[vals == -1] = [0, 1, 0] # make all uninitialized voxels green 121 | mesh.vertex_colors = o3d.utility.Vector3dVector(colors) 122 | o3d.io.write_triangle_mesh( 123 | test_dir + "/sensor_weighting_no_outlier_filter.ply", mesh 124 | ) 125 | 126 | # compute surface histogram 127 | n, bins, patches = plt.hist(vals.flatten(), bins=100) 128 | for c, p in zip(bins, patches): 129 | plt.setp(p, "facecolor", cm(c)) 130 | plt.savefig(test_dir + "/sensor_weighting_surface_histogram_no_outlier_filter.png") 131 | plt.clf() 132 | 133 | 134 | def preprocess_weight_grid(weights): 135 | """Function to compute the weight mask for skimage marching cubes corresponding to how Open3D marching cubes deals with masking. Open3D requires that all 8 corners of the voxel are initialized in order to draw a surface while skimage only requires 1 of the voxels to be initialized e.g. the index (1,1,1) determines if the voxel at (0,0,0) is initialized etc. 136 | 137 | Args: 138 | weights: weight grid 139 | 140 | Returns: 141 | mask: boolean grid to be used as input to skimage marching cubes algorithm 142 | """ 143 | mask = np.zeros_like(weights) 144 | indices = np.array(weights.nonzero()) 145 | indices = indices[:, ~np.any(indices == 0, axis=0)] 146 | for index in range(indices.shape[1]): 147 | i = indices[:, index][0] 148 | j = indices[:, index][1] 149 | k = indices[:, index][2] 150 | mask[i, j, k] = weights[i, j, k] 151 | mask[i, j, k] = mask[i, j, k] and weights[i, j, k - 1] 152 | mask[i, j, k] = mask[i, j, k] and weights[i, j - 1, k] 153 | mask[i, j, k] = mask[i, j, k] and weights[i, j - 1, k - 1] 154 | mask[i, j, k] = mask[i, j, k] and weights[i - 1, j, k] 155 | mask[i, j, k] = mask[i, j, k] and weights[i - 1, j, k - 1] 156 | mask[i, j, k] = mask[i, j, k] and weights[i - 1, j - 1, k] 157 | mask[i, j, k] = mask[i, j, k] and weights[i - 1, j - 1, k - 1] 158 | 159 | return mask > 0 160 | -------------------------------------------------------------------------------- /videos/create_depth_video.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import cv2 9 | 10 | 11 | def arg_parse(): 12 | parser = argparse.ArgumentParser( 13 | description="Script for creating a video of the depth." 14 | ) 15 | 16 | parser.add_argument("--scene", required=True) 17 | parser.add_argument("--sensor", required=True) 18 | parser.add_argument("--trajectory", required=True) 19 | parser.add_argument("--dataset", required=True) 20 | 21 | args = parser.parse_args() 22 | 23 | return vars(args) 24 | 25 | 26 | # From Johannes Schoenberger code. 27 | def read_array(path): 28 | with open(path, "rb") as fid: 29 | width, height, channels = np.genfromtxt( 30 | fid, delimiter="&", max_rows=1, usecols=(0, 1, 2), dtype=int 31 | ) 32 | fid.seek(0) 33 | num_delimiter = 0 34 | byte = fid.read(1) 35 | while True: 36 | if byte == b"&": 37 | num_delimiter += 1 38 | if num_delimiter >= 3: 39 | break 40 | byte = fid.read(1) 41 | array = np.fromfile(fid, np.float32) 42 | 43 | array = array.reshape((width, height, channels), order="F") 44 | return np.transpose(array, (1, 0, 2)).squeeze() 45 | 46 | 47 | def get_depth(sensor, scene, trajectory, dataset): 48 | if dataset == "replica": 49 | input_dir = ( 50 | "/cluster/work/cvl/esandstroem/data/replica/manual/" 51 | + scene 52 | + "/" 53 | + trajectory 54 | + "/" 55 | + sensor 56 | ) 57 | else: 58 | if sensor == "tof": 59 | # corbs 60 | # input_dir = '/cluster/work/cvl/esandstroem/data/corbs/human/data/H1_pre_registereddata/depth' 61 | # scene3d 62 | input_dir = ( 63 | "/cluster/work/cvl/esandstroem/data/scene3d/copyroom/copyroom_png/depth" 64 | ) 65 | else: 66 | # corbs 67 | # input_dir = '/cluster/work/cvl/esandstroem/data/corbs/human/colmap/dense/stereo/depth_maps' 68 | # scene3d 69 | input_dir = "/cluster/work/cvl/esandstroem/data/scene3d/copyroom/dense/stereo/depth_maps" 70 | 71 | # define output dir 72 | output_folder = "/cluster/project/cvl/esandstroem/src/late_fusion_3dconvnet/videos/" 73 | output_folder += "depth/" + scene + "/" + sensor 74 | 75 | if not os.path.exists(output_folder): 76 | os.makedirs(output_folder) 77 | 78 | images = os.listdir(input_dir) 79 | 80 | if dataset == "replica": 81 | images = sorted(images, key=lambda x: float(x[:-4])) 82 | else: 83 | if sensor == "tof": 84 | images = sorted(images, key=lambda x: float(x[:-4])) 85 | else: 86 | images = [x for x in images if x.endswith("geometric.bin")] 87 | images = sorted(images, key=lambda x: float(x[:-18])) 88 | 89 | for k, im in enumerate(images): 90 | # print(im) 91 | if dataset == "replica": 92 | im = cv2.imread(input_dir + "/" + im, -1) 93 | elif sensor == "tof": 94 | im = cv2.imread(input_dir + "/" + im, -1) 95 | else: 96 | im = read_array(input_dir + "/" + im) 97 | 98 | print(k) 99 | # cv2.imwrite(im, input_dir + '/' + im) 100 | print(output_folder) 101 | plt.imsave( 102 | output_folder + "/" + "%04d" % k + ".png", 103 | np.asarray(im), 104 | vmin=0, 105 | vmax=5, 106 | dpi=1, 107 | ) 108 | 109 | # vmin=0, vmax=25000 110 | # if k > 100: 111 | # break 112 | 113 | # create video of the rendered images 114 | os.chdir(output_folder) 115 | os.system( 116 | "ffmpeg -framerate 15 -i %04d.png -vcodec libx264 -preset veryslow -c:a libmp3lame -r 15 -crf 25 -pix_fmt yuv420p " 117 | + "/".join(output_folder.split("/")[:-1]) 118 | + ".mp4" 119 | ) 120 | 121 | # remove the images folder 122 | os.system("rm -r " + output_folder) 123 | 124 | 125 | if __name__ == "__main__": 126 | 127 | # parse commandline arguments 128 | args = arg_parse() 129 | 130 | get_depth(args["sensor"], args["scene"], args["trajectory"], args["dataset"]) 131 | -------------------------------------------------------------------------------- /videos/render_option.json: -------------------------------------------------------------------------------- 1 | { 2 | "background_color" : [ 1, 1, 1 ], 3 | "class_name" : "RenderOption", 4 | "default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ], 5 | "image_max_depth" : 3000, 6 | "image_stretch_option" : 0, 7 | "interpolation_option" : 0, 8 | "light0_color" : [ 1, 1, 1 ], 9 | "light0_diffuse_power" : 0.66000000000000003, 10 | "light0_position" : [ 0, 0, 2 ], 11 | "light0_specular_power" : 0.20000000000000001, 12 | "light0_specular_shininess" : 100, 13 | "light1_color" : [ 1, 1, 1 ], 14 | "light1_diffuse_power" : 0.66000000000000003, 15 | "light1_position" : [ 0, 0, 2 ], 16 | "light1_specular_power" : 0.20000000000000001, 17 | "light1_specular_shininess" : 100, 18 | "light2_color" : [ 1, 1, 1 ], 19 | "light2_diffuse_power" : 0.66000000000000003, 20 | "light2_position" : [ 0, 0, -2 ], 21 | "light2_specular_power" : 0.20000000000000001, 22 | "light2_specular_shininess" : 100, 23 | "light3_color" : [ 1, 1, 1 ], 24 | "light3_diffuse_power" : 0.66000000000000003, 25 | "light3_position" : [ 0, 0, -2 ], 26 | "light3_specular_power" : 0.20000000000000001, 27 | "light3_specular_shininess" : 100, 28 | "light_ambient_color" : [ 0, 0, 0 ], 29 | "light_on" : true, 30 | "mesh_color_option" : 1, 31 | "mesh_shade_option" : 0, 32 | "mesh_show_back_face" : false, 33 | "mesh_show_wireframe" : false, 34 | "point_color_option" : 9, 35 | "point_show_normal" : false, 36 | "point_size" : 5, 37 | "show_coordinate_frame" : false, 38 | "version_major" : 1, 39 | "version_minor" : 0 40 | } 41 | --------------------------------------------------------------------------------