├── .gitignore
├── LICENSE
├── __init__.py
├── augmentations.py
├── checkpoints
    ├── full_model_eigen
    │   └── checkpoint_eigen_split.ckpt
    ├── full_model_kitti
    │   ├── checkpoint_kitti_split.ckpt
    │   └── checkpoint_latest.ckpt
    └── full_model_kitti_ft
    │   └── checkpoint_kitti_ft.ckpt
├── core
    ├── commandline.py
    ├── configuration.py
    ├── logger.py
    ├── optim.py
    ├── runtime.py
    └── tools.py
├── datasets
    ├── __init__.py
    ├── cam_intrinsics
    │   ├── calib_cam_to_cam_2011_09_26.txt
    │   ├── calib_cam_to_cam_2011_09_28.txt
    │   ├── calib_cam_to_cam_2011_09_29.txt
    │   ├── calib_cam_to_cam_2011_09_30.txt
    │   └── calib_cam_to_cam_2011_10_03.txt
    ├── common.py
    ├── custom_batchsampler.py
    ├── index_generator
    │   ├── kitti_lidar_to_depth
    │   │   ├── godard_evaluation_kitti.py
    │   │   ├── godard_evaluation_utils.py
    │   │   └── test_files_eigen.txt
    │   ├── prepare_train_data.py
    │   └── provided
    │   │   ├── eigen_test_files.txt
    │   │   ├── excluded_frames.txt
    │   │   └── train_mapping.txt
    ├── index_txt
    │   ├── eigen_full.txt
    │   ├── eigen_text.txt
    │   ├── eigen_train.txt
    │   ├── eigen_valid.txt
    │   ├── kitti_full.txt
    │   ├── kitti_raw_all_imgs.txt
    │   ├── kitti_train.txt
    │   └── kitti_valid.txt
    ├── kitti_2015_test.py
    ├── kitti_2015_train.py
    ├── kitti_comb_mnsf.py
    ├── kitti_eigen_test.py
    ├── kitti_raw_monodepth.py
    └── kitti_raw_monosf.py
├── demo
    ├── demo.gif
    ├── demo_generator
    │   ├── cam_pose.json
    │   ├── kitti_img
    │   │   └── image_2
    │   │   │   ├── 000139_10.png
    │   │   │   └── 000139_11.png
    │   ├── results
    │   │   ├── disp_0
    │   │   │   └── 000139_10.png
    │   │   ├── disp_1
    │   │   │   └── 000139_10.png
    │   │   └── flow
    │   │   │   └── 000139_10.png
    │   ├── run.py
    │   ├── utils_misc.py
    │   └── vis
    │   │   └── __init__
    └── teaser.png
├── install_modules.sh
├── losses.py
├── main.py
├── models
    ├── __init__.py
    ├── correlation_package
    │   ├── __init__.py
    │   ├── correlation.py
    │   ├── correlation_cuda.cc
    │   ├── correlation_cuda_kernel.cu
    │   ├── correlation_cuda_kernel.cuh
    │   ├── readme.txt
    │   └── setup.py
    ├── forwardwarp_package
    │   ├── __init__.py
    │   ├── forward_warp.py
    │   ├── forward_warp_cuda.cpp
    │   ├── forward_warp_cuda_kernel.cu
    │   └── setup.py
    ├── model_monodepth_ablation.py
    ├── model_monosceneflow.py
    ├── model_monosceneflow_ablation.py
    ├── model_monosceneflow_ablation_decoder_split.py
    ├── modules_camconv.py
    ├── modules_monodepth.py
    └── modules_sceneflow.py
├── readme.md
├── requirements.txt
├── scripts
    ├── ablation1_augmentation
    │   ├── ablation1_eval_monodepth_aug.sh
    │   ├── ablation1_eval_monodepth_basic.sh
    │   ├── ablation1_eval_monodepth_cc.sh
    │   ├── ablation1_eval_monodepth_cc_aug.sh
    │   ├── ablation1_eval_monosf_base.sh
    │   ├── ablation1_eval_monosf_cc.sh
    │   ├── ablation1_eval_monosf_cc_aug.sh
    │   ├── ablation1_train_monodepth_kitti.sh
    │   ├── ablation1_train_monodepth_kitti_aug.sh
    │   ├── ablation1_train_monodepth_kitti_aug_cc.sh
    │   ├── ablation1_train_monodepth_kitti_cc.sh
    │   ├── ablation1_train_monosf_base.sh
    │   ├── ablation1_train_monosf_camconv.sh
    │   └── ablation1_train_monosf_camconv_aug.sh
    ├── ablation2_loss
    │   ├── ablation2_eval_monosf_loss_basic.sh
    │   ├── ablation2_eval_monosf_loss_noOcc.sh
    │   ├── ablation2_eval_monosf_loss_noPts.sh
    │   ├── ablation2_train_monosf_loss_basic.sh
    │   ├── ablation2_train_monosf_loss_noOcc.sh
    │   └── ablation2_train_monosf_loss_noPts.sh
    ├── ablation3_decoder_split
    │   ├── ablation3_eval_monosf_disp_only.sh
    │   ├── ablation3_eval_monosf_flow_only.sh
    │   ├── ablation3_eval_monosf_splitting_cont.sh
    │   ├── ablation3_eval_monosf_splitting_last1.sh
    │   ├── ablation3_eval_monosf_splitting_last2.sh
    │   ├── ablation3_eval_monosf_splitting_last3.sh
    │   ├── ablation3_eval_monosf_splitting_last4.sh
    │   ├── ablation3_eval_monosf_splitting_last5.sh
    │   ├── ablation3_train_monosf_disp_only.sh
    │   ├── ablation3_train_monosf_flow_only.sh
    │   ├── ablation3_train_monosf_splitting_cont.sh
    │   ├── ablation3_train_monosf_splitting_last1.sh
    │   ├── ablation3_train_monosf_splitting_last2.sh
    │   ├── ablation3_train_monosf_splitting_last3.sh
    │   ├── ablation3_train_monosf_splitting_last4.sh
    │   └── ablation3_train_monosf_splitting_last5.sh
    ├── eval_monodepth_selfsup_eigen_test.sh
    ├── eval_monodepth_selfsup_kitti_train.sh
    ├── eval_monosf_finetune_kitti_test.sh
    ├── eval_monosf_selfsup_kitti_test.sh
    ├── eval_monosf_selfsup_kitti_train.sh
    ├── train_monosf_kitti_finetune_1st_stage.sh
    ├── train_monosf_kitti_finetune_2nd_stage.sh
    ├── train_monosf_selfsup_eigen_train.sh
    └── train_monosf_selfsup_kitti_raw.sh
└── utils
    ├── __init__.py
    ├── flow.py
    ├── interpolation.py
    ├── monodepth_eval.py
    └── sceneflow_util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.so
3 | *.o
4 | *.egg
5 | *.egg-info/
6 | *.DS_Store


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/__init__.py


--------------------------------------------------------------------------------
/checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt


--------------------------------------------------------------------------------
/checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt


--------------------------------------------------------------------------------
/checkpoints/full_model_kitti/checkpoint_latest.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti/checkpoint_latest.ckpt


--------------------------------------------------------------------------------
/checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt


--------------------------------------------------------------------------------
/core/logger.py:
--------------------------------------------------------------------------------
  1 | ## Portions of Code from, copyright 2018 Jochen Gast
  2 | 
  3 | from __future__ import absolute_import, division, print_function
  4 | 
  5 | import colorama
  6 | import logging
  7 | import os
  8 | import re
  9 | import sys
 10 | from core import tools
 11 | 
 12 | 
 13 | def get_default_logging_format(colorize=False, brackets=False):
 14 |     style = colorama.Style.DIM if colorize else ''
 15 |     # color = colorama.Fore.CYAN if colorize else ''
 16 |     color = colorama.Fore.WHITE if colorize else ''
 17 |     reset = colorama.Style.RESET_ALL if colorize else ''
 18 |     if brackets:
 19 |         result = "{}{}[%(asctime)s]{} %(message)s".format(style, color, reset)
 20 |     else:
 21 |         result = "{}{}%(asctime)s{} %(message)s".format(style, color, reset)
 22 |     return result
 23 | 
 24 | 
 25 | def get_default_logging_datefmt():
 26 |     return "%Y-%m-%d %H:%M:%S"
 27 | 
 28 | 
 29 | def log_module_info(module):
 30 |     lines = module.__str__().split("\n")
 31 |     for line in lines:
 32 |         logging.info(line)
 33 | 
 34 | 
 35 | class LogbookFormatter(logging.Formatter):
 36 |     def __init__(self, fmt=None, datefmt=None):
 37 |         super(LogbookFormatter, self).__init__(fmt=fmt, datefmt=datefmt)
 38 |         self._re = re.compile(r"\033\[[0-9]+m")
 39 | 
 40 |     def remove_colors_from_msg(self, msg):
 41 |         msg = re.sub(self._re, "", msg)
 42 |         return msg
 43 | 
 44 |     def format(self, record=None):
 45 |         record.msg = self.remove_colors_from_msg(record.msg)
 46 |         return super(LogbookFormatter, self).format(record)
 47 | 
 48 | 
 49 | class ConsoleFormatter(logging.Formatter):
 50 |     def __init__(self, fmt=None, datefmt=None):
 51 |         super(ConsoleFormatter, self).__init__(fmt=fmt, datefmt=datefmt)
 52 | 
 53 |     def format(self, record=None):
 54 |         indent = sys.modules[__name__].global_indent
 55 |         record.msg = " " * indent + record.msg
 56 |         return super(ConsoleFormatter, self).format(record)
 57 | 
 58 | 
 59 | class SkipLogbookFilter(logging.Filter):
 60 |     def filter(self, record):
 61 |         return record.levelno != logging.LOGBOOK
 62 | 
 63 | 
 64 | def configure_logging(filename=None):
 65 |     # set global indent level
 66 |     sys.modules[__name__].global_indent = 0
 67 | 
 68 |     # add custom tqdm logger
 69 |     tools.addLoggingLevel("LOGBOOK", 1000)
 70 | 
 71 |     # create logger
 72 |     root_logger = logging.getLogger("")
 73 |     root_logger.setLevel(logging.INFO)
 74 | 
 75 |     # create console handler and set level to debug
 76 |     console = logging.StreamHandler()
 77 |     console.setLevel(logging.INFO)
 78 |     fmt = get_default_logging_format(colorize=True, brackets=False)
 79 |     datefmt = get_default_logging_datefmt()
 80 |     formatter = ConsoleFormatter(fmt=fmt, datefmt=datefmt)
 81 |     console.setFormatter(formatter)
 82 | 
 83 |     # Skip logging.tqdm requests for console outputs
 84 |     skip_logbook_filter = SkipLogbookFilter()
 85 |     console.addFilter(skip_logbook_filter)
 86 | 
 87 |     # add console to root_logger
 88 |     root_logger.addHandler(console)
 89 | 
 90 |     # add logbook
 91 |     if filename is not None:
 92 |         # ensure dir
 93 |         d = os.path.dirname(filename)
 94 |         if not os.path.exists(d):
 95 |             os.makedirs(d)
 96 | 
 97 |         # --------------------------------------------------------------------------------------
 98 |         # Configure handler that removes color codes from logbook
 99 |         # --------------------------------------------------------------------------------------
100 |         logbook = logging.FileHandler(filename=filename, mode="a", encoding="utf-8")
101 |         logbook.setLevel(logging.INFO)
102 |         fmt = get_default_logging_format(colorize=False, brackets=True)
103 |         logbook_formatter = LogbookFormatter(fmt=fmt, datefmt=datefmt)
104 |         logbook.setFormatter(logbook_formatter)
105 |         root_logger.addHandler(logbook)
106 | 
107 | 
108 | class LoggingBlock:
109 |     def __init__(self, title, emph=False):
110 |         self._emph = emph
111 |         bright = colorama.Style.BRIGHT
112 |         cyan = colorama.Fore.CYAN
113 |         reset = colorama.Style.RESET_ALL
114 |         if emph:
115 |             logging.info("%s==>%s %s%s%s" % (cyan, reset, bright, title, reset))
116 |         else:
117 |             logging.info(title)
118 | 
119 |     def __enter__(self):
120 |         sys.modules[__name__].global_indent += 2
121 |         return self
122 | 
123 |     def __exit__(self, exc_type, exc_value, traceback):
124 |         sys.modules[__name__].global_indent -= 2
125 | 


--------------------------------------------------------------------------------
/core/optim.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import sys
 3 | from core.tools import module_classes_to_dict
 4 | 
 5 | # ------------------------------------------------------------------------------------
 6 | # Export PyTorch optimizer
 7 | # ------------------------------------------------------------------------------------
 8 | _this = sys.modules[__name__]
 9 | _optimizer_classes = module_classes_to_dict(torch.optim, exclude_classes="Optimizer")
10 | for name, constructor in _optimizer_classes.items():
11 |     setattr(_this, name, constructor)
12 | __all__ = _optimizer_classes.keys()
13 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import kitti_2015_train
 2 | from . import kitti_2015_test
 3 | 
 4 | from . import kitti_raw_monosf
 5 | from . import kitti_raw_monodepth
 6 | 
 7 | from . import kitti_comb_mnsf
 8 | from . import kitti_eigen_test
 9 | 
10 | KITTI_2015_Train_Full_mnsf 				= kitti_2015_train.KITTI_2015_MonoSceneFlow_Full
11 | KITTI_2015_Train_Full_monodepth 		= kitti_2015_train.KITTI_2015_MonoDepth_Full
12 | 
13 | KITTI_2015_Test 						= kitti_2015_test.KITTI_2015_Test
14 | 
15 | KITTI_Raw_KittiSplit_Train_mnsf 	= kitti_raw_monosf.KITTI_Raw_KittiSplit_Train
16 | KITTI_Raw_KittiSplit_Valid_mnsf 	= kitti_raw_monosf.KITTI_Raw_KittiSplit_Valid
17 | KITTI_Raw_KittiSplit_Full_mnsf 		= kitti_raw_monosf.KITTI_Raw_KittiSplit_Full
18 | KITTI_Raw_EigenSplit_Train_mnsf 	= kitti_raw_monosf.KITTI_Raw_EigenSplit_Train
19 | KITTI_Raw_EigenSplit_Valid_mnsf 	= kitti_raw_monosf.KITTI_Raw_EigenSplit_Valid
20 | KITTI_Raw_EigenSplit_Full_mnsf 		= kitti_raw_monosf.KITTI_Raw_EigenSplit_Full
21 | 
22 | KITTI_Raw_KittiSplit_Train_monodepth	= kitti_raw_monodepth.KITTI_Raw_KittiSplit_Train
23 | KITTI_Raw_KittiSplit_Valid_monodepth	= kitti_raw_monodepth.KITTI_Raw_KittiSplit_Valid
24 | 
25 | KITTI_Comb_Train		= kitti_comb_mnsf.KITTI_Comb_Train
26 | KITTI_Comb_Val			= kitti_comb_mnsf.KITTI_Comb_Val
27 | KITTI_Comb_Full			= kitti_comb_mnsf.KITTI_Comb_Full
28 | 
29 | KITTI_Eigen_Test 			= kitti_eigen_test.KITTI_Eigen_Test
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/datasets/cam_intrinsics/calib_cam_to_cam_2011_09_26.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 13:57:47
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.842439e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.808141e+02 2.331966e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.728755e-01 2.037299e-01 2.219027e-03 1.383707e-03 -7.233722e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: 2.573699e-16 -1.059758e-16 1.614870e-16
 8 | S_rect_00: 1.242000e+03 3.750000e+02
 9 | R_rect_00: 9.999239e-01 9.837760e-03 -7.445048e-03 -9.869795e-03 9.999421e-01 -4.278459e-03 7.402527e-03 4.351614e-03 9.999631e-01
10 | P_rect_00: 7.215377e+02 0.000000e+00 6.095593e+02 0.000000e+00 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.895267e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.878386e+02 2.455590e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.644661e-01 1.790019e-01 1.148107e-03 -6.298563e-04 -5.314062e-02
14 | R_01: 9.993513e-01 1.860866e-02 -3.083487e-02 -1.887662e-02 9.997863e-01 -8.421873e-03 3.067156e-02 8.998467e-03 9.994890e-01
15 | T_01: -5.370000e-01 4.822061e-03 -1.252488e-02
16 | S_rect_01: 1.242000e+03 3.750000e+02
17 | R_rect_01: 9.996878e-01 -8.976826e-03 2.331651e-02 8.876121e-03 9.999508e-01 4.418952e-03 -2.335503e-02 -4.210612e-03 9.997184e-01
18 | P_rect_01: 7.215377e+02 0.000000e+00 6.095593e+02 -3.875744e+02 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.597910e+02 0.000000e+00 6.960217e+02 0.000000e+00 9.569251e+02 2.241806e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.691481e-01 1.968681e-01 1.353473e-03 5.677587e-04 -6.770705e-02
22 | R_02: 9.999758e-01 -5.267463e-03 -4.552439e-03 5.251945e-03 9.999804e-01 -3.413835e-03 4.570332e-03 3.389843e-03 9.999838e-01
23 | T_02: 5.956621e-02 2.900141e-04 2.577209e-03
24 | S_rect_02: 1.242000e+03 3.750000e+02
25 | R_rect_02: 9.998817e-01 1.511453e-02 -2.841595e-03 -1.511724e-02 9.998853e-01 -9.338510e-04 2.827154e-03 9.766976e-04 9.999955e-01
26 | P_rect_02: 7.215377e+02 0.000000e+00 6.095593e+02 4.485728e+01 0.000000e+00 7.215377e+02 1.728540e+02 2.163791e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.745884e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.037596e+02 0.000000e+00 6.957519e+02 0.000000e+00 9.019653e+02 2.242509e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.639558e-01 1.788651e-01 6.029694e-04 -3.922424e-04 -5.382460e-02
30 | R_03: 9.995599e-01 1.699522e-02 -2.431313e-02 -1.704422e-02 9.998531e-01 -1.809756e-03 2.427880e-02 2.223358e-03 9.997028e-01
31 | T_03: -4.731050e-01 5.551470e-03 -5.250882e-03
32 | S_rect_03: 1.242000e+03 3.750000e+02
33 | R_rect_03: 9.998321e-01 -7.193136e-03 1.685599e-02 7.232804e-03 9.999712e-01 -2.293585e-03 -1.683901e-02 2.415116e-03 9.998553e-01
34 | P_rect_03: 7.215377e+02 0.000000e+00 6.095593e+02 -3.395242e+02 0.000000e+00 7.215377e+02 1.728540e+02 2.199936e+00 0.000000e+00 0.000000e+00 1.000000e+00 2.729905e-03
35 | 


--------------------------------------------------------------------------------
/datasets/cam_intrinsics/calib_cam_to_cam_2011_09_28.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 13:58:50
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.812178e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.758994e+02 2.471364e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.791375e-01 2.148119e-01 1.227094e-03 2.343833e-03 -7.910379e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: -7.686159e-17 -2.989062e-17 -1.366428e-16
 8 | S_rect_00: 1.224000e+03 3.700000e+02
 9 | R_rect_00: 9.999128e-01 1.009263e-02 -8.511932e-03 -1.012729e-02 9.999406e-01 -4.037671e-03 8.470675e-03 4.123522e-03 9.999556e-01
10 | P_rect_00: 7.070493e+02 0.000000e+00 6.040814e+02 0.000000e+00 0.000000e+00 7.070493e+02 1.805066e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.863925e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.821423e+02 2.588854e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.673556e-01 1.862563e-01 8.496128e-05 1.699076e-04 -5.822524e-02
14 | R_01: 9.993552e-01 1.830187e-02 -3.089048e-02 -1.855578e-02 9.997962e-01 -7.952999e-03 3.073863e-02 8.521068e-03 9.994911e-01
15 | T_01: -5.370000e-01 4.509875e-03 -1.198621e-02
16 | S_rect_01: 1.224000e+03 3.700000e+02
17 | R_rect_01: 9.997157e-01 -8.395891e-03 2.231435e-02 8.304757e-03 9.999568e-01 4.173646e-03 -2.234842e-02 -3.987145e-03 9.997423e-01
18 | P_rect_01: 7.070493e+02 0.000000e+00 6.040814e+02 -3.797842e+02 0.000000e+00 7.070493e+02 1.805066e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.569475e+02 0.000000e+00 6.939767e+02 0.000000e+00 9.522352e+02 2.386081e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.750956e-01 2.076838e-01 4.348525e-04 1.603162e-03 -7.469243e-02
22 | R_02: 9.999838e-01 -5.012736e-03 -2.710741e-03 5.002007e-03 9.999797e-01 -3.950381e-03 2.730489e-03 3.936758e-03 9.999885e-01
23 | T_02: 5.989688e-02 -1.367835e-03 4.637624e-03
24 | S_rect_02: 1.224000e+03 3.700000e+02
25 | R_rect_02: 9.998691e-01 1.512763e-02 -5.741851e-03 -1.512861e-02 9.998855e-01 -1.287536e-04 5.739247e-03 2.156030e-04 9.999835e-01
26 | P_rect_02: 7.070493e+02 0.000000e+00 6.040814e+02 4.575831e+01 0.000000e+00 7.070493e+02 1.805066e+02 -3.454157e-01 0.000000e+00 0.000000e+00 1.000000e+00 4.981016e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.011007e+02 0.000000e+00 6.982947e+02 0.000000e+00 8.970639e+02 2.377447e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.686011e-01 1.908666e-01 -5.689518e-04 3.332341e-04 -6.302873e-02
30 | R_03: 9.995054e-01 1.665288e-02 -2.667675e-02 -1.671777e-02 9.998578e-01 -2.211228e-03 2.663614e-02 2.656110e-03 9.996417e-01
31 | T_03: -4.756270e-01 5.296617e-03 -5.437198e-03
32 | S_rect_03: 1.224000e+03 3.700000e+02
33 | R_rect_03: 9.998134e-01 -6.606294e-03 1.815174e-02 6.637329e-03 9.999766e-01 -1.650024e-03 -1.814042e-02 1.770195e-03 9.998339e-01
34 | P_rect_03: 7.070493e+02 0.000000e+00 6.040814e+02 -3.341081e+02 0.000000e+00 7.070493e+02 1.805066e+02 2.330660e+00 0.000000e+00 0.000000e+00 1.000000e+00 3.201153e-03
35 | 


--------------------------------------------------------------------------------
/datasets/cam_intrinsics/calib_cam_to_cam_2011_09_29.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 13:59:12
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.803769e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.757217e+02 2.441228e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.715862e-01 2.009708e-01 1.363807e-03 1.588184e-03 -6.967696e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: -5.551115e-17 -2.312965e-17 3.700743e-16
 8 | S_rect_00: 1.238000e+03 3.740000e+02
 9 | R_rect_00: 9.999478e-01 9.791707e-03 -2.925305e-03 -9.806939e-03 9.999382e-01 -5.238719e-03 2.873828e-03 5.267134e-03 9.999820e-01
10 | P_rect_00: 7.183351e+02 0.000000e+00 6.003891e+02 0.000000e+00 0.000000e+00 7.183351e+02 1.815122e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.904660e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.875575e+02 2.581179e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.645289e-01 1.817607e-01 7.785894e-05 -1.378884e-04 -5.526709e-02
14 | R_01: 9.993414e-01 1.820343e-02 -3.139101e-02 -1.853620e-02 9.997747e-01 -1.034255e-02 3.119567e-02 1.091761e-02 9.994537e-01
15 | T_01: -5.370000e-01 4.682272e-03 -1.524529e-02
16 | S_rect_01: 1.238000e+03 3.740000e+02
17 | R_rect_01: 9.995593e-01 -8.715472e-03 2.837724e-02 8.566270e-03 9.999489e-01 5.375144e-03 -2.842263e-02 -5.129688e-03 9.995828e-01
18 | P_rect_01: 7.183351e+02 0.000000e+00 6.003891e+02 -3.858846e+02 0.000000e+00 7.183351e+02 1.815122e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.607501e+02 0.000000e+00 6.944288e+02 0.000000e+00 9.570051e+02 2.363374e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.687738e-01 1.977559e-01 5.991384e-04 8.972739e-04 -6.822522e-02
22 | R_02: 9.999807e-01 -5.053665e-03 -3.619905e-03 5.036396e-03 9.999760e-01 -4.764072e-03 3.643894e-03 4.745749e-03 9.999821e-01
23 | T_02: 5.948968e-02 -8.603063e-04 2.662728e-03
24 | S_rect_02: 1.238000e+03 3.740000e+02
25 | R_rect_02: 9.998896e-01 1.484154e-02 7.649204e-04 -1.484114e-02 9.998897e-01 -5.289052e-04 -7.726858e-04 5.174945e-04 9.999996e-01
26 | P_rect_02: 7.183351e+02 0.000000e+00 6.003891e+02 4.450382e+01 0.000000e+00 7.183351e+02 1.815122e+02 -5.951107e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.616315e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.047872e+02 0.000000e+00 6.946163e+02 0.000000e+00 9.017079e+02 2.353088e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.643123e-01 1.845455e-01 -3.868479e-04 1.281135e-04 -5.959776e-02
30 | R_03: 9.995851e-01 1.666283e-02 -2.349366e-02 -1.674297e-02 9.998546e-01 -3.218496e-03 2.343662e-02 3.610514e-03 9.997188e-01
31 | T_03: -4.732167e-01 5.830806e-03 -4.405247e-03
32 | S_rect_03: 1.238000e+03 3.740000e+02
33 | R_rect_03: 9.997648e-01 -6.942395e-03 2.054627e-02 6.982006e-03 9.999739e-01 -1.856797e-03 -2.053284e-02 1.999814e-03 9.997872e-01
34 | P_rect_03: 7.183351e+02 0.000000e+00 6.003891e+02 -3.363147e+02 0.000000e+00 7.183351e+02 1.815122e+02 3.159867e+00 0.000000e+00 0.000000e+00 1.000000e+00 5.323834e-03
35 | 


--------------------------------------------------------------------------------
/datasets/cam_intrinsics/calib_cam_to_cam_2011_09_30.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 13:59:33
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.786977e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.717435e+02 2.497222e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.792567e-01 2.121203e-01 9.182571e-04 1.911304e-03 -7.605535e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: -1.850372e-17 6.938894e-17 -7.401487e-17
 8 | S_rect_00: 1.226000e+03 3.700000e+02
 9 | R_rect_00: 9.999280e-01 8.085985e-03 -8.866797e-03 -8.123205e-03 9.999583e-01 -4.169750e-03 8.832711e-03 4.241477e-03 9.999520e-01
10 | P_rect_00: 7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.892043e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.832048e+02 2.616538e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.720803e-01 1.944116e-01 -1.077099e-04 -9.031379e-05 -6.314998e-02
14 | R_01: 9.993424e-01 1.830363e-02 -3.129928e-02 -1.856768e-02 9.997943e-01 -8.166432e-03 3.114337e-02 8.742218e-03 9.994767e-01
15 | T_01: -5.370000e-01 5.591661e-03 -1.200541e-02
16 | S_rect_01: 1.226000e+03 3.700000e+02
17 | R_rect_01: 9.996960e-01 -1.040961e-02 2.234966e-02 1.031552e-02 9.999375e-01 4.321301e-03 -2.239324e-02 -4.089439e-03 9.997409e-01
18 | P_rect_01: 7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.591977e+02 0.000000e+00 6.944383e+02 0.000000e+00 9.529324e+02 2.416793e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.725637e-01 1.979803e-01 1.799970e-04 1.250593e-03 -6.608481e-02
22 | R_02: 9.999805e-01 -4.971067e-03 -3.793081e-03 4.954076e-03 9.999777e-01 -4.475856e-03 3.815246e-03 4.456977e-03 9.999828e-01
23 | T_02: 6.030222e-02 -1.293125e-03 5.900421e-03
24 | S_rect_02: 1.226000e+03 3.700000e+02
25 | R_rect_02: 9.999019e-01 1.307921e-02 -5.015634e-03 -1.307809e-02 9.999144e-01 2.561203e-04 5.018555e-03 -1.905003e-04 9.999874e-01
26 | P_rect_02: 7.070912e+02 0.000000e+00 6.018873e+02 4.688783e+01 0.000000e+00 7.070912e+02 1.831104e+02 1.178601e-01 0.000000e+00 0.000000e+00 1.000000e+00 6.203223e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.035972e+02 0.000000e+00 6.979803e+02 0.000000e+00 8.979356e+02 2.392935e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.726025e-01 1.973869e-01 -5.746215e-04 7.444947e-05 -6.699658e-02
30 | R_03: 9.994995e-01 1.667420e-02 -2.688514e-02 -1.673122e-02 9.998582e-01 -1.897204e-03 2.684969e-02 2.346075e-03 9.996367e-01
31 | T_03: -4.747879e-01 5.631988e-03 -5.233709e-03
32 | S_rect_03: 1.226000e+03 3.700000e+02
33 | R_rect_03: 9.998007e-01 -8.628355e-03 1.800315e-02 8.666473e-03 9.999604e-01 -2.040364e-03 -1.798483e-02 2.195981e-03 9.998358e-01
34 | P_rect_03: 7.070912e+02 0.000000e+00 6.018873e+02 -3.334597e+02 0.000000e+00 7.070912e+02 1.831104e+02 1.930130e+00 0.000000e+00 0.000000e+00 1.000000e+00 3.318498e-03
35 | 


--------------------------------------------------------------------------------
/datasets/cam_intrinsics/calib_cam_to_cam_2011_10_03.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 14:00:15
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.799200e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.741183e+02 2.486443e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.745594e-01 2.049385e-01 1.110145e-03 1.379375e-03 -7.084798e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: -9.251859e-17 8.326673e-17 -7.401487e-17
 8 | S_rect_00: 1.241000e+03 3.760000e+02
 9 | R_rect_00: 9.999454e-01 7.259129e-03 -7.519551e-03 -7.292213e-03 9.999638e-01 -4.381729e-03 7.487471e-03 4.436324e-03 9.999621e-01
10 | P_rect_00: 7.188560e+02 0.000000e+00 6.071928e+02 0.000000e+00 0.000000e+00 7.188560e+02 1.852157e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.903522e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.855674e+02 2.607319e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.712084e-01 1.978723e-01 -3.709831e-05 -3.440494e-04 -6.724045e-02
14 | R_01: 9.993440e-01 1.814887e-02 -3.134011e-02 -1.842595e-02 9.997935e-01 -8.575221e-03 3.117801e-02 9.147067e-03 9.994720e-01
15 | T_01: -5.370000e-01 5.964270e-03 -1.274584e-02
16 | S_rect_01: 1.241000e+03 3.760000e+02
17 | R_rect_01: 9.996568e-01 -1.110284e-02 2.372712e-02 1.099810e-02 9.999292e-01 4.539964e-03 -2.377585e-02 -4.277453e-03 9.997082e-01
18 | P_rect_01: 7.188560e+02 0.000000e+00 6.071928e+02 -3.861448e+02 0.000000e+00 7.188560e+02 1.852157e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.601149e+02 0.000000e+00 6.947923e+02 0.000000e+00 9.548911e+02 2.403547e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.685917e-01 1.928022e-01 4.069233e-04 7.247536e-04 -6.276909e-02
22 | R_02: 9.999788e-01 -5.008404e-03 -4.151018e-03 4.990516e-03 9.999783e-01 -4.308488e-03 4.172506e-03 4.287682e-03 9.999821e-01
23 | T_02: 5.954406e-02 -7.675338e-04 3.582565e-03
24 | S_rect_02: 1.241000e+03 3.760000e+02
25 | R_rect_02: 9.999191e-01 1.228161e-02 -3.316013e-03 -1.228209e-02 9.999246e-01 -1.245511e-04 3.314233e-03 1.652686e-04 9.999945e-01
26 | P_rect_02: 7.188560e+02 0.000000e+00 6.071928e+02 4.538225e+01 0.000000e+00 7.188560e+02 1.852157e+02 -1.130887e-01 0.000000e+00 0.000000e+00 1.000000e+00 3.779761e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.049931e+02 0.000000e+00 6.957698e+02 0.000000e+00 9.004945e+02 2.389820e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.735725e-01 2.066816e-01 -6.133284e-04 -1.193269e-04 -7.600861e-02
30 | R_03: 9.995578e-01 1.656369e-02 -2.469315e-02 -1.663353e-02 9.998582e-01 -2.625576e-03 2.464616e-02 3.035149e-03 9.996916e-01
31 | T_03: -4.738786e-01 5.991982e-03 -3.215069e-03
32 | S_rect_03: 1.241000e+03 3.760000e+02
33 | R_rect_03: 9.998092e-01 -9.354781e-03 1.714961e-02 9.382303e-03 9.999548e-01 -1.525064e-03 -1.713457e-02 1.685675e-03 9.998518e-01
34 | P_rect_03: 7.188560e+02 0.000000e+00 6.071928e+02 -3.372877e+02 0.000000e+00 7.188560e+02 1.852157e+02 2.369057e+00 0.000000e+00 0.000000e+00 1.000000e+00 4.915215e-03
35 | 


--------------------------------------------------------------------------------
/datasets/common.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os.path
  4 | import torch
  5 | import numpy as np
  6 | import skimage.io as io
  7 | import png
  8 | 
  9 | width_to_date = dict()
 10 | width_to_date[1242] = '2011_09_26'
 11 | width_to_date[1224] = '2011_09_28'
 12 | width_to_date[1238] = '2011_09_29'
 13 | width_to_date[1226] = '2011_09_30'
 14 | width_to_date[1241] = '2011_10_03'
 15 | 
 16 | 
 17 | def get_date_from_width(width):
 18 |     return width_to_date[width]
 19 | 
 20 | 
 21 | def list_flatten(input_list):
 22 |     return [img for sub_list in input_list for img in sub_list]
 23 | 
 24 | 
 25 | def intrinsic_scale(mat, sy, sx):
 26 |     out = mat.clone()
 27 |     out[0, 0] *= sx
 28 |     out[0, 2] *= sx
 29 |     out[1, 1] *= sy
 30 |     out[1, 2] *= sy
 31 |     return out
 32 | 
 33 | 
 34 | def kitti_adjust_intrinsic(k_l1, k_r1, crop_info):
 35 |     str_x = crop_info[0]
 36 |     str_y = crop_info[1]
 37 |     k_l1[0, 2] -= str_x
 38 |     k_l1[1, 2] -= str_y
 39 |     k_r1[0, 2] -= str_x
 40 |     k_r1[1, 2] -= str_y
 41 |     return k_l1, k_r1
 42 | 
 43 | def kitti_crop_image_list(img_list, crop_info):    
 44 |     str_x = crop_info[0]
 45 |     str_y = crop_info[1]
 46 |     end_x = crop_info[2]
 47 |     end_y = crop_info[3]
 48 | 
 49 |     transformed = [img[str_y:end_y, str_x:end_x, :] for img in img_list]
 50 | 
 51 |     return transformed
 52 | 
 53 | 
 54 | def numpy2torch(array):
 55 |     assert(isinstance(array, np.ndarray))
 56 |     if array.ndim == 3:
 57 |         array = np.transpose(array, (2, 0, 1))
 58 |     else:
 59 |         array = np.expand_dims(array, axis=0)
 60 |     return torch.from_numpy(array.copy()).float()
 61 | 
 62 | 
 63 | def read_image_as_byte(filename):
 64 |     return io.imread(filename)
 65 | 
 66 | 
 67 | def read_png_flow(flow_file):
 68 |     flow_object = png.Reader(filename=flow_file)
 69 |     flow_direct = flow_object.asDirect()
 70 |     flow_data = list(flow_direct[2])
 71 |     (w, h) = flow_direct[3]['size']
 72 |     flow = np.zeros((h, w, 3), dtype=np.float64)
 73 |     for i in range(len(flow_data)):
 74 |         flow[i, :, 0] = flow_data[i][0::3]
 75 |         flow[i, :, 1] = flow_data[i][1::3]
 76 |         flow[i, :, 2] = flow_data[i][2::3]
 77 | 
 78 |     invalid_idx = (flow[:, :, 2] == 0)
 79 |     flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
 80 |     flow[invalid_idx, 0] = 0
 81 |     flow[invalid_idx, 1] = 0
 82 |     return flow[:, :, 0:2], (1 - invalid_idx * 1)[:, :, None]
 83 | 
 84 | 
 85 | def read_png_disp(disp_file):
 86 |     disp_np = io.imread(disp_file).astype(np.uint16) / 256.0
 87 |     disp_np = np.expand_dims(disp_np, axis=2)
 88 |     mask_disp = (disp_np > 0).astype(np.float64)
 89 |     return disp_np, mask_disp
 90 | 
 91 |         
 92 | def read_raw_calib_file(filepath):
 93 |     # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
 94 |     """Read in a calibration file and parse into a dictionary."""
 95 |     data = {}
 96 | 
 97 |     with open(filepath, 'r') as f:
 98 |         for line in f.readlines():
 99 |             key, value = line.split(':', 1)
100 |             # The only non-float values in these files are dates, which
101 |             # we don't care about anyway
102 |             try:
103 |                 data[key] = np.array([float(x) for x in value.split()])
104 |             except ValueError:
105 |                 pass
106 |     return data
107 | 
108 | 
109 | def read_calib_into_dict(path_dir):
110 | 
111 |     calibration_file_list = ['2011_09_26', '2011_09_28', '2011_09_29', '2011_09_30', '2011_10_03']
112 |     intrinsic_dict_l = {}
113 |     intrinsic_dict_r = {}
114 | 
115 |     for ii, date in enumerate(calibration_file_list):
116 |         file_name = "cam_intrinsics/calib_cam_to_cam_" + date + '.txt'
117 |         file_name_full = os.path.join(path_dir, file_name)
118 |         file_data = read_raw_calib_file(file_name_full)
119 |         P_rect_02 = np.reshape(file_data['P_rect_02'], (3, 4))
120 |         P_rect_03 = np.reshape(file_data['P_rect_03'], (3, 4))
121 |         intrinsic_dict_l[date] = P_rect_02[:3, :3]
122 |         intrinsic_dict_r[date] = P_rect_03[:3, :3]
123 | 
124 |     return intrinsic_dict_l, intrinsic_dict_r


--------------------------------------------------------------------------------
/datasets/custom_batchsampler.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.sampler import Sampler
 2 | 
 3 | class CustomBatchSampler(Sampler):
 4 | 
 5 |     def __init__(self, sampler):
 6 |         for samp in sampler:
 7 |             if not isinstance(samp, Sampler):
 8 |                 raise ValueError("sampler should be an instance of "
 9 |                                  "torch.utils.data.Sampler, but got sampler={}"
10 |                                  .format(samp))
11 |         self.samplers = sampler
12 |         self.n_samples = [len(samp) for samp in self.samplers]
13 |         self.sample_cnt = [0 for samp in self.samplers]
14 |         self.iters = [iter(samp) for samp in self.samplers]
15 | 
16 |         self.batch_size = [1, 3]
17 | 
18 |     def __iter__(self):       
19 | 
20 |         for ii in range(len(self)):
21 | 
22 |             for ss, samp in enumerate(self.samplers):
23 |                 self.sample_cnt[ss] += self.batch_size[ss]
24 |                 if self.sample_cnt[ss] > self.n_samples[ss]:
25 |                     self.iters[ss] = iter(samp)
26 |                     self.sample_cnt[ss] = self.batch_size[ss]
27 | 
28 |             batch = []
29 | 
30 |             ## for each sampler
31 |             for ss in range(len(self.samplers)):
32 |                 if ss is 0:
33 |                     prev_idx = 0
34 |                 else:
35 |                     prev_idx = self.n_samples[ss-1]
36 | 
37 |                 for bb in range(self.batch_size[ss]):
38 |                     batch.append(next(self.iters[ss]) + prev_idx)
39 | 
40 |             yield batch        
41 | 
42 |     def __len__(self):
43 |         return len(self.samplers[0])
44 | 


--------------------------------------------------------------------------------
/datasets/index_generator/kitti_lidar_to_depth/godard_evaluation_kitti.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from glob import glob
 4 | from godard_evaluation_utils import *
 5 | 
 6 | 
 7 | data_path = '/fastdata/jhur/KITTI_raw/'
 8 | all_images = glob(data_path + '*/*/image_02/data/*.jpg')
 9 | 
10 | for ii in range(len(all_images)):   
11 |     all_images[ii] = all_images[ii].replace(data_path, "")
12 | 
13 | # num_samples = 697
14 | # test_files = read_text_lines('test_files_eigen.txt')
15 | gt_files, gt_calib, im_sizes, im_files, cams = read_file_data(all_images, data_path)
16 | 
17 | # num_test = len(im_files)
18 | num_samples = len(gt_files)
19 | print(num_samples)
20 | # for t_id in range(0, 2):    
21 | for t_id in range(num_samples):    
22 |     print(t_id)
23 |     camera_id = cams[t_id]  # 2 is left, 3 is right
24 |     depth = generate_depth_map(gt_calib[t_id], gt_files[t_id], im_sizes[t_id], camera_id, False, True)
25 |     # need to convert from disparity to depth
26 |     focal_length, baseline = get_focal_length_baseline(gt_calib[t_id], camera_id)
27 |     
28 |     npy_file_name = gt_files[t_id].replace("KITTI_raw", "KITTI_raw_depth").replace(".bin", ".npy").replace("velodyne_points", "projected_depth")
29 |     npy_file_dir = os.path.dirname(npy_file_name)
30 |     if not os.path.exists(npy_file_dir):
31 |         os.makedirs(npy_file_dir)
32 | 
33 |     np.save(npy_file_name, depth)
34 | 


--------------------------------------------------------------------------------
/datasets/index_generator/provided/train_mapping.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 2011_09_26 2011_09_26_drive_0005_sync 0000000010
  4 | 2011_09_26 2011_09_26_drive_0005_sync 0000000059
  5 | 
  6 | 
  7 | 
  8 | 2011_09_26 2011_09_26_drive_0009_sync 0000000354
  9 | 2011_09_26 2011_09_26_drive_0009_sync 0000000364
 10 | 2011_09_26 2011_09_26_drive_0009_sync 0000000374
 11 | 2011_09_26 2011_09_26_drive_0009_sync 0000000384
 12 | 2011_09_26 2011_09_26_drive_0009_sync 0000000394
 13 | 2011_09_26 2011_09_26_drive_0009_sync 0000000414
 14 | 2011_09_26 2011_09_26_drive_0011_sync 0000000111
 15 | 2011_09_26 2011_09_26_drive_0011_sync 0000000127
 16 | 2011_09_26 2011_09_26_drive_0011_sync 0000000147
 17 | 2011_09_26 2011_09_26_drive_0011_sync 0000000157
 18 | 2011_09_26 2011_09_26_drive_0011_sync 0000000167
 19 | 2011_09_26 2011_09_26_drive_0013_sync 0000000010
 20 | 2011_09_26 2011_09_26_drive_0013_sync 0000000020
 21 | 2011_09_26 2011_09_26_drive_0013_sync 0000000040
 22 | 2011_09_26 2011_09_26_drive_0013_sync 0000000070
 23 | 2011_09_26 2011_09_26_drive_0014_sync 0000000010
 24 | 2011_09_26 2011_09_26_drive_0014_sync 0000000020
 25 | 2011_09_26 2011_09_26_drive_0014_sync 0000000030
 26 | 2011_09_26 2011_09_26_drive_0014_sync 0000000050
 27 | 2011_09_26 2011_09_26_drive_0014_sync 0000000060
 28 | 2011_09_26 2011_09_26_drive_0014_sync 0000000129
 29 | 2011_09_26 2011_09_26_drive_0014_sync 0000000141
 30 | 2011_09_26 2011_09_26_drive_0014_sync 0000000152
 31 | 2011_09_26 2011_09_26_drive_0014_sync 0000000172
 32 | 2011_09_26 2011_09_26_drive_0014_sync 0000000192
 33 | 2011_09_26 2011_09_26_drive_0014_sync 0000000213
 34 | 2011_09_26 2011_09_26_drive_0014_sync 0000000240
 35 | 2011_09_26 2011_09_26_drive_0015_sync 0000000187
 36 | 2011_09_26 2011_09_26_drive_0015_sync 0000000197
 37 | 2011_09_26 2011_09_26_drive_0015_sync 0000000209
 38 | 2011_09_26 2011_09_26_drive_0015_sync 0000000219
 39 | 2011_09_26 2011_09_26_drive_0015_sync 0000000229
 40 | 2011_09_26 2011_09_26_drive_0015_sync 0000000239
 41 | 2011_09_26 2011_09_26_drive_0015_sync 0000000264
 42 | 2011_09_26 2011_09_26_drive_0015_sync 0000000273
 43 | 2011_09_26 2011_09_26_drive_0015_sync 0000000286
 44 | 2011_09_26 2011_09_26_drive_0017_sync 0000000010
 45 | 2011_09_26 2011_09_26_drive_0017_sync 0000000030
 46 | 2011_09_26 2011_09_26_drive_0017_sync 0000000040
 47 | 2011_09_26 2011_09_26_drive_0017_sync 0000000050
 48 | 2011_09_26 2011_09_26_drive_0018_sync 0000000046
 49 | 2011_09_26 2011_09_26_drive_0018_sync 0000000066
 50 | 2011_09_26 2011_09_26_drive_0018_sync 0000000076
 51 | 2011_09_26 2011_09_26_drive_0018_sync 0000000086
 52 | 2011_09_26 2011_09_26_drive_0018_sync 0000000096
 53 | 2011_09_26 2011_09_26_drive_0018_sync 0000000106
 54 | 2011_09_26 2011_09_26_drive_0018_sync 0000000133
 55 | 2011_09_26 2011_09_26_drive_0019_sync 0000000030
 56 | 2011_09_26 2011_09_26_drive_0019_sync 0000000087
 57 | 2011_09_26 2011_09_26_drive_0019_sync 0000000097
 58 | 2011_09_26 2011_09_26_drive_0022_sync 0000000634
 59 | 2011_09_26 2011_09_26_drive_0022_sync 0000000644
 60 | 2011_09_26 2011_09_26_drive_0022_sync 0000000654
 61 | 2011_09_26 2011_09_26_drive_0027_sync 0000000053
 62 | 2011_09_26 2011_09_26_drive_0027_sync 0000000103
 63 | 2011_09_26 2011_09_26_drive_0028_sync 0000000071
 64 | 2011_09_26 2011_09_26_drive_0028_sync 0000000118
 65 | 2011_09_26 2011_09_26_drive_0028_sync 0000000228
 66 | 2011_09_26 2011_09_26_drive_0028_sync 0000000269
 67 | 2011_09_26 2011_09_26_drive_0028_sync 0000000284
 68 | 2011_09_26 2011_09_26_drive_0028_sync 0000000303
 69 | 2011_09_26 2011_09_26_drive_0028_sync 0000000313
 70 | 2011_09_26 2011_09_26_drive_0028_sync 0000000378
 71 | 2011_09_26 2011_09_26_drive_0029_sync 0000000016
 72 | 2011_09_26 2011_09_26_drive_0029_sync 0000000123
 73 | 2011_09_26 2011_09_26_drive_0032_sync 0000000095
 74 | 2011_09_26 2011_09_26_drive_0032_sync 0000000114
 75 | 2011_09_26 2011_09_26_drive_0032_sync 0000000125
 76 | 2011_09_26 2011_09_26_drive_0032_sync 0000000207
 77 | 2011_09_26 2011_09_26_drive_0032_sync 0000000218
 78 | 2011_09_26 2011_09_26_drive_0032_sync 0000000330
 79 | 2011_09_26 2011_09_26_drive_0032_sync 0000000340
 80 | 2011_09_26 2011_09_26_drive_0032_sync 0000000350
 81 | 2011_09_26 2011_09_26_drive_0032_sync 0000000360
 82 | 2011_09_26 2011_09_26_drive_0032_sync 0000000378
 83 | 
 84 | 2011_09_26 2011_09_26_drive_0036_sync 0000000054
 85 | 2011_09_26 2011_09_26_drive_0036_sync 0000000402
 86 | 2011_09_26 2011_09_26_drive_0046_sync 0000000052
 87 | 2011_09_26 2011_09_26_drive_0046_sync 0000000062
 88 | 
 89 | 2011_09_26 2011_09_26_drive_0051_sync 0000000023
 90 | 2011_09_26 2011_09_26_drive_0051_sync 0000000218
 91 | 2011_09_26 2011_09_26_drive_0051_sync 0000000230
 92 | 2011_09_26 2011_09_26_drive_0051_sync 0000000282
 93 | 2011_09_26 2011_09_26_drive_0051_sync 0000000292
 94 | 2011_09_26 2011_09_26_drive_0051_sync 0000000302
 95 | 2011_09_26 2011_09_26_drive_0051_sync 0000000312
 96 | 2011_09_26 2011_09_26_drive_0051_sync 0000000322
 97 | 2011_09_26 2011_09_26_drive_0051_sync 0000000342
 98 | 2011_09_26 2011_09_26_drive_0051_sync 0000000356
 99 | 2011_09_26 2011_09_26_drive_0051_sync 0000000379
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 2011_09_26 2011_09_26_drive_0056_sync 0000000010
107 | 2011_09_26 2011_09_26_drive_0056_sync 0000000082
108 | 2011_09_26 2011_09_26_drive_0056_sync 0000000122
109 | 2011_09_26 2011_09_26_drive_0056_sync 0000000132
110 | 2011_09_26 2011_09_26_drive_0056_sync 0000000191
111 | 2011_09_26 2011_09_26_drive_0056_sync 0000000201
112 | 2011_09_26 2011_09_26_drive_0056_sync 0000000282
113 | 2011_09_26 2011_09_26_drive_0057_sync 0000000125
114 | 2011_09_26 2011_09_26_drive_0057_sync 0000000140
115 | 2011_09_26 2011_09_26_drive_0057_sync 0000000176
116 | 2011_09_26 2011_09_26_drive_0057_sync 0000000299
117 | 2011_09_26 2011_09_26_drive_0057_sync 0000000319
118 | 2011_09_26 2011_09_26_drive_0057_sync 0000000339
119 | 2011_09_26 2011_09_26_drive_0059_sync 0000000026
120 | 2011_09_26 2011_09_26_drive_0059_sync 0000000046
121 | 2011_09_26 2011_09_26_drive_0059_sync 0000000137
122 | 2011_09_26 2011_09_26_drive_0059_sync 0000000150
123 | 2011_09_26 2011_09_26_drive_0059_sync 0000000260
124 | 2011_09_26 2011_09_26_drive_0059_sync 0000000280
125 | 2011_09_26 2011_09_26_drive_0059_sync 0000000290
126 | 2011_09_26 2011_09_26_drive_0059_sync 0000000300
127 | 2011_09_26 2011_09_26_drive_0059_sync 0000000310
128 | 2011_09_26 2011_09_26_drive_0059_sync 0000000320
129 | 2011_09_26 2011_09_26_drive_0070_sync 0000000069
130 | 2011_09_26 2011_09_26_drive_0070_sync 0000000224
131 | 2011_09_26 2011_09_26_drive_0084_sync 0000000084
132 | 2011_09_26 2011_09_26_drive_0084_sync 0000000179
133 | 2011_09_26 2011_09_26_drive_0084_sync 0000000238
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 2011_09_26 2011_09_26_drive_0096_sync 0000000020
143 | 2011_09_26 2011_09_26_drive_0096_sync 0000000278
144 | 2011_09_26 2011_09_26_drive_0096_sync 0000000381
145 | 2011_09_26 2011_09_26_drive_0101_sync 0000000109
146 | 2011_09_26 2011_09_26_drive_0101_sync 0000000175
147 | 2011_09_26 2011_09_26_drive_0101_sync 0000000447
148 | 2011_09_26 2011_09_26_drive_0101_sync 0000000457
149 | 2011_09_26 2011_09_26_drive_0101_sync 0000000809
150 | 2011_09_26 2011_09_26_drive_0104_sync 0000000015
151 | 2011_09_26 2011_09_26_drive_0104_sync 0000000035
152 | 
153 | 
154 | 
155 | 
156 | 2011_09_28 2011_09_28_drive_0002_sync 0000000343
157 | 
158 | 2011_09_29 2011_09_29_drive_0004_sync 0000000036
159 | 2011_09_29 2011_09_29_drive_0004_sync 0000000079
160 | 2011_09_29 2011_09_29_drive_0004_sync 0000000094
161 | 2011_09_29 2011_09_29_drive_0004_sync 0000000105
162 | 2011_09_29 2011_09_29_drive_0004_sync 0000000162
163 | 2011_09_29 2011_09_29_drive_0004_sync 0000000258
164 | 2011_09_29 2011_09_29_drive_0004_sync 0000000285
165 | 2011_09_29 2011_09_29_drive_0004_sync 0000000308
166 | 
167 | 
168 | 
169 | 2011_09_29 2011_09_29_drive_0071_sync 0000000059
170 | 2011_09_29 2011_09_29_drive_0071_sync 0000000943
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 2011_10_03 2011_10_03_drive_0047_sync 0000000556
201 | 


--------------------------------------------------------------------------------
/datasets/kitti_2015_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os.path
  4 | import torch
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | 
  8 | from torchvision import transforms as vision_transforms
  9 | from .common import read_image_as_byte, read_calib_into_dict, get_date_from_width
 10 | 
 11 | 
 12 | 
 13 | class KITTI_2015_Test(data.Dataset):
 14 |     def __init__(self,
 15 |                  args,
 16 |                  root):
 17 | 
 18 |         self._args = args
 19 | 
 20 |         images_l_root = os.path.join(root, "data_scene_flow", "testing", "image_2_jpg")
 21 |         images_r_root = os.path.join(root, "data_scene_flow", "testing", "image_3_jpg")
 22 |         
 23 |         ## loading image -----------------------------------
 24 |         if not os.path.isdir(images_l_root):
 25 |             raise ValueError("Image directory %s not found!", images_l_root)
 26 |         if not os.path.isdir(images_r_root):
 27 |             raise ValueError("Image directory %s not found!", images_r_root)
 28 | 
 29 |         # ----------------------------------------------------------
 30 |         # Construct list of indices for training/validation
 31 |         # ----------------------------------------------------------
 32 |         num_images = 200
 33 |         list_of_indices = range(num_images)
 34 | 
 35 |         # ----------------------------------------------------------
 36 |         # Save list of actual filenames for inputs and disp/flow
 37 |         # ----------------------------------------------------------
 38 |         path_dir = os.path.dirname(os.path.realpath(__file__))
 39 |         self._image_list = []
 40 |         self._flow_list = []
 41 |         self._disp_list = []
 42 |         img_ext = '.jpg'
 43 | 
 44 |         for ii in list_of_indices:
 45 | 
 46 |             file_idx = '%.6d' % ii
 47 | 
 48 |             im_l1 = os.path.join(images_l_root, file_idx + "_10" + img_ext)
 49 |             im_l2 = os.path.join(images_l_root, file_idx + "_11" + img_ext)
 50 |             im_r1 = os.path.join(images_r_root, file_idx + "_10" + img_ext)
 51 |             im_r2 = os.path.join(images_r_root, file_idx + "_11" + img_ext)
 52 |            
 53 | 
 54 |             file_list = [im_l1, im_l2, im_r1, im_r2]
 55 |             for _, item in enumerate(file_list):
 56 |                 if not os.path.isfile(item):
 57 |                     raise ValueError("File not exist: %s", item)
 58 | 
 59 |             self._image_list.append([im_l1, im_l2, im_r1, im_r2])
 60 | 
 61 |         self._size = len(self._image_list)
 62 |         assert len(self._image_list) != 0
 63 | 
 64 |         ## loading calibration matrix
 65 |         self.intrinsic_dict_l = {}
 66 |         self.intrinsic_dict_r = {}        
 67 |         self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir)
 68 | 
 69 |         self._to_tensor = vision_transforms.Compose([
 70 |             vision_transforms.ToPILImage(),
 71 |             vision_transforms.transforms.ToTensor()
 72 |         ])
 73 | 
 74 | 
 75 |     def __getitem__(self, index):
 76 | 
 77 |         index = index % self._size
 78 |         im_l1_filename = self._image_list[index][0]
 79 |         im_l2_filename = self._image_list[index][1]
 80 |         im_r1_filename = self._image_list[index][2]
 81 |         im_r2_filename = self._image_list[index][3]
 82 | 
 83 |         # read float32 images and flow
 84 |         im_l1_np = read_image_as_byte(im_l1_filename)
 85 |         im_l2_np = read_image_as_byte(im_l2_filename)
 86 |         im_r1_np = read_image_as_byte(im_r1_filename)
 87 |         im_r2_np = read_image_as_byte(im_r2_filename)
 88 |         
 89 |         # example filename
 90 |         basename = os.path.basename(im_l1_filename)[:6]
 91 | 
 92 |         # find intrinsic
 93 |         k_l1 = torch.from_numpy(self.intrinsic_dict_l[get_date_from_width(im_l1_np.shape[1])]).float()
 94 |         k_r1 = torch.from_numpy(self.intrinsic_dict_r[get_date_from_width(im_r1_np.shape[1])]).float()
 95 | 
 96 |         im_l1 = self._to_tensor(im_l1_np)
 97 |         im_l2 = self._to_tensor(im_l2_np)
 98 |         im_r1 = self._to_tensor(im_r1_np)
 99 |         im_r2 = self._to_tensor(im_r2_np)
100 | 
101 |         # input size
102 |         h_orig, w_orig, _ = im_l1_np.shape
103 |         input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float()
104 | 
105 |         example_dict = {
106 |             "input_l1": im_l1,
107 |             "input_l2": im_l2,
108 |             "input_r1": im_r1,
109 |             "input_r2": im_r2,
110 |             "index": index,
111 |             "basename": basename,
112 |             "input_k_l1": k_l1,
113 |             "input_k_l2": k_l1,
114 |             "input_k_r1": k_r1,
115 |             "input_k_r2": k_r1,
116 |             "input_size": input_im_size
117 |         }
118 | 
119 |         return example_dict
120 | 
121 |     def __len__(self):
122 |         return self._size
123 | 
124 | 


--------------------------------------------------------------------------------
/datasets/kitti_comb_mnsf.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os.path
  4 | import torch
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | 
  8 | from torchvision import transforms as vision_transforms
  9 | from .common import read_image_as_byte
 10 | from .common import kitti_crop_image_list, kitti_adjust_intrinsic
 11 | 
 12 | ## Combining datasets
 13 | from .kitti_2015_train import KITTI_2015_MonoSceneFlow
 14 | from .kitti_raw_monosf import KITTI_Raw
 15 | from torch.utils.data.dataset import ConcatDataset
 16 | 
 17 | 
 18 | 
 19 | class KITTI_Raw_for_Finetune(KITTI_Raw):
 20 |     def __init__(self,
 21 |                  args,
 22 |                  root,
 23 |                  flip_augmentations=True,
 24 |                  preprocessing_crop=True,
 25 |                  crop_size=[370, 1224],
 26 |                  num_examples=-1,
 27 |                  index_file=""):
 28 |         super(KITTI_Raw_for_Finetune, self).__init__(
 29 |             args,
 30 |             images_root=root,
 31 |             flip_augmentations=flip_augmentations,
 32 |             preprocessing_crop=preprocessing_crop,
 33 |             crop_size=crop_size,
 34 |             num_examples=num_examples,
 35 |             index_file=index_file)
 36 | 
 37 |     def __getitem__(self, index):
 38 |         index = index % self._size
 39 | 
 40 |         # read images and flow
 41 |         img_list_np = [read_image_as_byte(img) for img in self._image_list[index]]
 42 | 
 43 |         # example filename
 44 |         im_l1_filename = self._image_list[index][0]
 45 |         basename = os.path.basename(im_l1_filename)[:6]
 46 |         dirname = os.path.dirname(im_l1_filename)[-51:]
 47 |         datename = dirname[:10]
 48 |         k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float()
 49 |         k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float()
 50 |         
 51 |         # input size
 52 |         h_orig, w_orig, _ = img_list_np[0].shape
 53 |         input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float()
 54 | 
 55 |         # cropping 
 56 |         if self._preprocessing_crop:
 57 |             # get starting positions
 58 |             crop_height = self._crop_size[0]
 59 |             crop_width = self._crop_size[1]
 60 |             x = np.random.uniform(0, w_orig - crop_width + 1)
 61 |             y = np.random.uniform(0, h_orig - crop_height + 1)
 62 |             crop_info = [int(x), int(y), int(x + crop_width), int(y + crop_height)]
 63 | 
 64 |             # cropping images and adjust intrinsic accordingly
 65 |             img_list_np = kitti_crop_image_list(img_list_np, crop_info)
 66 |             k_l1, k_r1 = kitti_adjust_intrinsic(k_l1, k_r1, crop_info)
 67 |         
 68 |         # to tensors
 69 |         img_list_tensor = [self._to_tensor(img) for img in img_list_np]
 70 |         im_l1 = img_list_tensor[0]
 71 |         im_l2 = img_list_tensor[1]
 72 |         im_r1 = img_list_tensor[2]
 73 |         im_r2 = img_list_tensor[3]
 74 | 
 75 |         void_tensor1 = im_l1[0:1, :, :] * 0
 76 |         void_tensor2 = im_l1[0:2, :, :] * 0
 77 |         
 78 |         common_dict = {
 79 |             "index": index,
 80 |             "basename": basename,
 81 |             "datename": datename,
 82 |             "input_size": input_im_size,
 83 |             "target_flow": void_tensor2,
 84 |             "target_flow_mask": void_tensor1,
 85 |             "target_flow_noc": void_tensor2,
 86 |             "target_flow_mask_noc": void_tensor1,
 87 |             "target_disp": void_tensor1,
 88 |             "target_disp_mask": void_tensor1,
 89 |             "target_disp2_occ": void_tensor1,
 90 |             "target_disp2_mask_occ": void_tensor1,
 91 |             "target_disp_noc": void_tensor1,
 92 |             "target_disp_mask_noc": void_tensor1,
 93 |             "target_disp2_noc": void_tensor1,
 94 |             "target_disp2_mask_noc": void_tensor1
 95 |         }
 96 | 
 97 |         # random flip
 98 |         if self._flip_augmentations is True and torch.rand(1) > 0.5:
 99 |             _, _, ww = im_l1.size()
100 |             im_l1_flip = torch.flip(im_l1, dims=[2])
101 |             im_l2_flip = torch.flip(im_l2, dims=[2])
102 |             im_r1_flip = torch.flip(im_r1, dims=[2])
103 |             im_r2_flip = torch.flip(im_r2, dims=[2])
104 | 
105 |             k_l1[0, 2] = ww - k_l1[0, 2]
106 |             k_r1[0, 2] = ww - k_r1[0, 2]
107 | 
108 |             example_dict = {
109 |                 "input_l1": im_r1_flip,
110 |                 "input_r1": im_l1_flip,
111 |                 "input_l2": im_r2_flip,
112 |                 "input_r2": im_l2_flip,                
113 |                 "input_k_l1": k_r1,
114 |                 "input_k_r1": k_l1,
115 |                 "input_k_l2": k_r1,
116 |                 "input_k_r2": k_l1,
117 |             }
118 |             example_dict.update(common_dict)
119 | 
120 |         else:
121 |             example_dict = {
122 |                 "input_l1": im_l1,
123 |                 "input_r1": im_r1,
124 |                 "input_l2": im_l2,
125 |                 "input_r2": im_r2,
126 |                 "input_k_l1": k_l1,
127 |                 "input_k_r1": k_r1,
128 |                 "input_k_l2": k_l1,
129 |                 "input_k_r2": k_r1,
130 |             }
131 |             example_dict.update(common_dict)
132 | 
133 |         return example_dict
134 | 
135 | 
136 | class KITTI_Comb_Train(ConcatDataset):  
137 |     def __init__(self, args, root):        
138 |         
139 |         self.dataset1 = KITTI_2015_MonoSceneFlow(
140 |             args, 
141 |             root + '/KITTI_flow/', 
142 |             preprocessing_crop=True, 
143 |             crop_size=[370, 1224], 
144 |             dstype="train")
145 | 
146 |         self.dataset2 = KITTI_Raw_for_Finetune(
147 |             args, 
148 |             root + '/KITTI_raw_noPCL/',
149 |             flip_augmentations=True,
150 |             preprocessing_crop=True,
151 |             crop_size=[370, 1224],
152 |             num_examples=-1,
153 |             index_file='index_txt/kitti_full.txt')
154 |       
155 |         super(KITTI_Comb_Train, self).__init__(
156 |             datasets=[self.dataset1, self.dataset2])
157 | 
158 | 
159 | class KITTI_Comb_Val(KITTI_2015_MonoSceneFlow):
160 |     def __init__(self,
161 |                  args,
162 |                  root,
163 |                  preprocessing_crop=False,
164 |                  crop_size=[370, 1224]):
165 |         super(KITTI_Comb_Val, self).__init__(
166 |             args,
167 |             data_root=root + '/KITTI_flow/',          
168 |             preprocessing_crop=preprocessing_crop,
169 |             crop_size=crop_size,
170 |             dstype="valid")
171 | 
172 | 
173 | 
174 | class KITTI_Comb_Full(ConcatDataset):  
175 |     def __init__(self, args, root):        
176 | 
177 |         self.dataset1 = KITTI_2015_MonoSceneFlow(
178 |             args, 
179 |             root + '/KITTI_flow/', 
180 |             preprocessing_crop=True,
181 |             crop_size=[370, 1224], 
182 |             dstype="full")
183 | 
184 |         self.dataset2 = KITTI_Raw_for_Finetune(
185 |             args, 
186 |             root + '/KITTI_raw_noPCL/',
187 |             flip_augmentations=True,
188 |             preprocessing_crop=True,
189 |             crop_size=[370, 1224],
190 |             num_examples=-1,
191 |             index_file='index_txt/kitti_raw_all_imgs.txt')
192 | 
193 |         super(KITTI_Comb_Full, self).__init__(
194 |             datasets=[self.dataset1, self.dataset2])
195 | 
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/datasets/kitti_eigen_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os.path
  4 | import torch
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | 
  8 | from torchvision import transforms as vision_transforms
  9 | from .common import read_image_as_byte, read_calib_into_dict
 10 | 
 11 | 
 12 | 
 13 | class KITTI_Eigen_Test(data.Dataset):
 14 |     def __init__(self,
 15 |                  args,
 16 |                  root,
 17 |                  num_examples=-1):
 18 | 
 19 |         self._args = args
 20 |         
 21 |         index_file = "index_txt/eigen_text.txt"
 22 | 
 23 | 
 24 |         path_dir = os.path.dirname(os.path.realpath(__file__))
 25 |         path_index_file = os.path.join(path_dir, index_file)
 26 | 
 27 |         if not os.path.exists(path_index_file):
 28 |             raise ValueError("Index File '%s' not found!", path_index_file)
 29 |         index_file = open(path_index_file, 'r')
 30 | 
 31 |         ## loading image -----------------------------------
 32 |         if not os.path.isdir(root):
 33 |             raise ValueError("Image directory '%s' not found!", root)
 34 | 
 35 |         filename_list = [line.rstrip().split(' ') for line in index_file.readlines()]
 36 |         self._image_list = []
 37 |         
 38 |         view1 = 'image_02/data'
 39 |         view2 = 'image_03/data'
 40 |         ext = '.jpg'
 41 |         for item in filename_list:
 42 | 
 43 |             name_l1 = root + '/' + item[0]
 44 |             name_depth = (root + '/' + item[0]).replace("jpg", "npy").replace("image_02", "projected_depth")
 45 |             idx_src = item[0].split('/')[4].split('.')[0]
 46 |             idx_tgt = '%.10d' % (int(idx_src) + 1)            
 47 |             name_l2 = name_l1.replace(idx_src, idx_tgt)
 48 |             if not os.path.isfile(name_l2):
 49 |                 idx_prev = '%.10d' % (int(idx_src) - 1)
 50 |                 name_l2 = name_l1.replace(idx_src, idx_prev)            
 51 | 
 52 |             if os.path.isfile(name_l1) and os.path.isfile(name_l2) and os.path.isfile(name_depth):
 53 |                 self._image_list.append([name_l1, name_l2, name_depth])
 54 | 
 55 |         if num_examples > 0:
 56 |             self._image_list = self._image_list[:num_examples]
 57 | 
 58 |         self._size = len(self._image_list)
 59 | 
 60 |         ## loading calibration matrix
 61 |         self.intrinsic_dict_l = {}
 62 |         self.intrinsic_dict_r = {}        
 63 |         self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir)
 64 | 
 65 |         self._to_tensor = vision_transforms.Compose([
 66 |             vision_transforms.ToPILImage(),
 67 |             vision_transforms.transforms.ToTensor()
 68 |         ])
 69 | 
 70 |     def __getitem__(self, index):
 71 |         index = index % self._size
 72 | 
 73 |         im_l1_filename = self._image_list[index][0]
 74 |         im_l2_filename = self._image_list[index][1]
 75 |         depth_filename = self._image_list[index][2]
 76 | 
 77 |         # read images and flow
 78 |         im_l1_np = read_image_as_byte(im_l1_filename)
 79 |         im_l2_np = read_image_as_byte(im_l2_filename)
 80 |         im_l1_depth_np = np.load(depth_filename)
 81 |         
 82 |         # example filename
 83 |         basename = os.path.dirname(im_l1_filename).split('/')[-3] + '_' + os.path.basename(im_l1_filename).split('.')[0]
 84 |         dirname = os.path.dirname(im_l1_filename)[-51:]
 85 |         datename = dirname[:10]
 86 | 
 87 |         k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float()
 88 |         k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float()
 89 | 
 90 |         im_l1 = self._to_tensor(im_l1_np)
 91 |         im_l2 = self._to_tensor(im_l2_np)
 92 |         im_l1_depth = torch.from_numpy(im_l1_depth_np).unsqueeze(0).float()
 93 | 
 94 |         # input size
 95 |         h_orig, w_orig, _ = im_l1_np.shape
 96 |         input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float()
 97 | 
 98 |     
 99 |         example_dict = {
100 |             "input_l1": im_l1,
101 |             "input_l2": im_l2,
102 |             "index": index,
103 |             "basename": basename,
104 |             "datename": datename,
105 |             "input_k_l1": k_l1,
106 |             "input_k_l2": k_l1,
107 |             "input_size": input_im_size,
108 |             "target_depth": im_l1_depth
109 |         }
110 | 
111 |         return example_dict
112 | 
113 |     def __len__(self):
114 |         return self._size
115 | 


--------------------------------------------------------------------------------
/datasets/kitti_raw_monodepth.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os.path
  4 | import torch
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | 
  8 | from torchvision import transforms as vision_transforms
  9 | from .common import read_image_as_byte, read_calib_into_dict
 10 | from .common import kitti_crop_image_list, kitti_adjust_intrinsic
 11 | from .common import intrinsic_scale
 12 | 
 13 | 
 14 | class KITTI_Raw(data.Dataset):
 15 |     def __init__(self,
 16 |                  args,
 17 |                  images_root=None,
 18 |                  preprocessing_crop=False,                 
 19 |                  crop_size=[370, 1224],
 20 |                  num_examples=-1,
 21 |                  index_file=None):
 22 | 
 23 |         self._args = args
 24 |         self._seq_len = 1
 25 |         self._preprocessing_crop = preprocessing_crop
 26 |         self._crop_size = crop_size
 27 | 
 28 |         path_dir = os.path.dirname(os.path.realpath(__file__))
 29 |         path_index_file = os.path.join(path_dir, index_file)
 30 | 
 31 |         if not os.path.exists(path_index_file):
 32 |             raise ValueError("Index File '%s' not found!", path_index_file)
 33 |         index_file = open(path_index_file, 'r')
 34 | 
 35 |         ## loading image -----------------------------------
 36 |         if not os.path.isdir(images_root):
 37 |             raise ValueError("Image directory '%s' not found!")
 38 | 
 39 |         filename_list = [line.rstrip().split(' ') for line in index_file.readlines()]
 40 |         self._image_list = []
 41 |         view1 = 'image_02/data'
 42 |         view2 = 'image_03/data'
 43 |         ext = '.jpg'
 44 |         for item in filename_list:
 45 |             date = item[0][:10]
 46 |             scene = item[0]
 47 |             idx_src = item[1]
 48 |             for ii in range(self._seq_len):
 49 |                 idx_tgt = '%.10d' % (int(idx_src) + ii + 1)
 50 |                 name_l1 = os.path.join(images_root, date, scene, view1, idx_src) + ext
 51 |                 name_r1 = os.path.join(images_root, date, scene, view2, idx_src) + ext
 52 |                 if os.path.isfile(name_l1) and os.path.isfile(name_r1):
 53 |                     self._image_list.append([name_l1, name_r1])
 54 | 
 55 |         if num_examples > 0:
 56 |             self._image_list = self._image_list[:num_examples]
 57 | 
 58 |         self._size = len(self._image_list)
 59 | 
 60 |         ## loading calibration matrix
 61 |         self.intrinsic_dict_l = {}
 62 |         self.intrinsic_dict_r = {}        
 63 |         self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir)
 64 | 
 65 |         # ----------------------------------------------------------
 66 |         # Image resize only
 67 |         # ----------------------------------------------------------
 68 |         self._resize_to_tensor = vision_transforms.Compose([
 69 |             vision_transforms.ToPILImage(),
 70 |             vision_transforms.Resize((256, 512)),
 71 |             vision_transforms.transforms.ToTensor()
 72 |         ])
 73 |         self._to_tensor = vision_transforms.Compose([
 74 |             vision_transforms.transforms.ToTensor()
 75 |         ])
 76 | 
 77 |     def __getitem__(self, index):
 78 |         index = index % self._size
 79 | 
 80 |         im_l1_filename = self._image_list[index][0]
 81 |         im_r1_filename = self._image_list[index][1]
 82 | 
 83 |         # read float32 images and flow
 84 |         im_l1_np = read_image_as_byte(im_l1_filename)
 85 |         im_r1_np = read_image_as_byte(im_r1_filename)
 86 | 
 87 |         # example filename
 88 |         basename = os.path.basename(im_l1_filename)[:6]
 89 |         dirname = os.path.dirname(im_l1_filename)[-51:]
 90 |         datename = dirname[:10]
 91 |         k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float()
 92 |         k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float()
 93 |         k_l1_orig = k_l1.clone()
 94 |         
 95 |         h_orig, w_orig, _ = im_l1_np.shape
 96 |         input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float()
 97 | 
 98 |         # resizing image 
 99 |         if self._preprocessing_crop == False:
100 |             # No Geometric Augmentation, Resizing to 256 x 512 here
101 |             # resizing input images
102 |             im_l1 = self._resize_to_tensor(im_l1_np)
103 |             im_r1 = self._resize_to_tensor(im_r1_np)
104 |             # resizing intrinsic matrix            
105 |             k_l1 = intrinsic_scale(k_l1, im_l1.size(1) / h_orig, im_l1.size(2) / w_orig)
106 |             k_r1 = intrinsic_scale(k_r1, im_r1.size(1) / h_orig, im_r1.size(2) / w_orig)
107 |         else:
108 |             # For Geometric Augmentation, first croping the images to 370 x 1224 here, 
109 |             # then do the augmentation in augmentation.py
110 |             # get starting positions
111 |             crop_height = self._crop_size[0]
112 |             crop_width = self._crop_size[1]
113 |             x = np.random.uniform(0, w_orig - crop_width + 1)
114 |             y = np.random.uniform(0, h_orig - crop_height + 1)
115 |             crop_info = [int(x), int(y), int(x + crop_width), int(y + crop_height)]
116 | 
117 |             # cropping images and adjust intrinsic accordingly
118 |             im_l1_np, im_r1_np = kitti_crop_image_list([im_l1_np, im_r1_np], crop_info)
119 |             im_l1 = self._to_tensor(im_l1_np)
120 |             im_r1 = self._to_tensor(im_r1_np)
121 |             k_l1, k_r1 = kitti_adjust_intrinsic(k_l1, k_r1, crop_info)
122 |         
123 |         # For CamCOnv
124 |         k_r1_flip = k_r1.clone()
125 |         k_r1_flip[0, 2] = im_r1.size(2) - k_r1_flip[0, 2]
126 | 
127 |         example_dict = {
128 |             "input_l1": im_l1,
129 |             "input_r1": im_r1,
130 |             "index": index,
131 |             "basename": basename,
132 |             "datename": datename,
133 |             "input_k_l1_orig": k_l1_orig,
134 |             "input_k_l1": k_l1,
135 |             "input_k_r1": k_r1,
136 |             "input_k_r1_flip": k_r1_flip,
137 |             "input_size": input_im_size
138 |         }
139 | 
140 |         return example_dict
141 | 
142 |     def __len__(self):
143 |         return self._size
144 | 
145 | 
146 | class KITTI_Raw_KittiSplit_Train(KITTI_Raw):
147 |     def __init__(self,
148 |                  args,
149 |                  root,
150 |                  preprocessing_crop=False,
151 |                  crop_size=[370, 1224],
152 |                  num_examples=-1):
153 |         super(KITTI_Raw_KittiSplit_Train, self).__init__(
154 |             args,
155 |             images_root=root,
156 |             preprocessing_crop=preprocessing_crop,
157 |             crop_size=crop_size,
158 |             num_examples=num_examples,
159 |             index_file="index_txt/kitti_train.txt")
160 | 
161 | 
162 | class KITTI_Raw_KittiSplit_Valid(KITTI_Raw):
163 |     def __init__(self,
164 |                  args,
165 |                  root,
166 |                  preprocessing_crop=False,
167 |                  crop_size=[370, 1224],
168 |                  num_examples=-1):
169 |         super(KITTI_Raw_KittiSplit_Valid, self).__init__(
170 |             args,
171 |             images_root=root,
172 |             preprocessing_crop=preprocessing_crop,
173 |             crop_size=crop_size,
174 |             num_examples=num_examples,
175 |             index_file="index_txt/kitti_valid.txt")


--------------------------------------------------------------------------------
/demo/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo.gif


--------------------------------------------------------------------------------
/demo/demo_generator/cam_pose.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"class_name" : "PinholeCameraTrajectory",
 3 |     "parameters" :
 4 |     [
 5 |         {
 6 | 			"class_name" : "PinholeCameraParameters",
 7 |             "extrinsic" :
 8 |             [
 9 |                 0.99875666779623684,
10 |                 -0.015804533047987467,
11 |                 0.047279332352442131,
12 |                 0,
13 |                 0.0042556066034366898,
14 |                 0.97198214070610867,
15 |                 0.2350161865931947,
16 |                 0,
17 |                 -0.049668987758906415,
18 |                 -0.23452278116103262,
19 |                 0.97084090188428929,
20 |                 0,
21 |                 -2.1888509581876607,
22 |                 -1.181867321777915,
23 |                 0.94394657256919134,
24 |                 1
25 |             ],
26 |             "intrinsic" :
27 |             {
28 |                 "height" : 376,
29 |                 "intrinsic_matrix" :
30 |                 [
31 |                     718.856,
32 |                     0,
33 |                     0,
34 |                     0,
35 |                     718.856,
36 |                     0,
37 |                     607.1928,
38 |                     185.2157,
39 |                     1
40 |                 ],
41 |                 "width" : 1241
42 |             },
43 | 			"version_major" : 1,
44 | 			"version_minor" : 0
45 |         }
46 |     ],
47 | 	"version_major" : 1,
48 | 	"version_minor" : 0
49 | }


--------------------------------------------------------------------------------
/demo/demo_generator/kitti_img/image_2/000139_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/kitti_img/image_2/000139_10.png


--------------------------------------------------------------------------------
/demo/demo_generator/kitti_img/image_2/000139_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/kitti_img/image_2/000139_11.png


--------------------------------------------------------------------------------
/demo/demo_generator/results/disp_0/000139_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/disp_0/000139_10.png


--------------------------------------------------------------------------------
/demo/demo_generator/results/disp_1/000139_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/disp_1/000139_10.png


--------------------------------------------------------------------------------
/demo/demo_generator/results/flow/000139_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/flow/000139_10.png


--------------------------------------------------------------------------------
/demo/demo_generator/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import skimage.io as io
  4 | from skimage.color import rgb2gray
  5 | # from skimage.color import lab2rgb
  6 | 
  7 | import open3d as o3d
  8 | import numpy as np
  9 | import torch
 10 | import math
 11 | 
 12 | from utils_misc import flow_to_png_middlebury, read_png_flow, read_png_disp
 13 | from utils_misc import numpy2torch, pixel2pts_ms
 14 | 
 15 | width_to_focal = dict()
 16 | width_to_focal[1242] = 721.5377
 17 | width_to_focal[1241] = 718.856
 18 | width_to_focal[1224] = 707.0493
 19 | width_to_focal[1238] = 718.3351
 20 | width_to_focal[1226] = 707.0912
 21 | 
 22 | cam_center_dict = dict()
 23 | cam_center_dict[1242] = [6.095593e+02, 1.728540e+02]
 24 | cam_center_dict[1241] = [6.071928e+02, 1.852157e+02]
 25 | cam_center_dict[1224] = [6.040814e+02, 1.805066e+02]
 26 | cam_center_dict[1238] = [6.003891e+02, 1.815122e+02]
 27 | cam_center_dict[1226] = [6.018873e+02, 1.831104e+02]
 28 | 
 29 | 
 30 | ########
 31 | sampling = [4,20,25,35,40]
 32 | imgflag = 1 # 0 is image, 1 is flow
 33 | ########
 34 | 
 35 | 
 36 | 
 37 | def get_pcd(img_idx, image_dir, result_dir, tt):
 38 | 
 39 |     idx_curr = '%06d' % (img_idx)
 40 | 
 41 |     im1_np0 = (io.imread(os.path.join(image_dir, "image_2/" + idx_curr + "_10.png")) / np.float32(255.0))[110:, :, :]
 42 | 
 43 |     flo_f_np0 = read_png_flow(os.path.join(result_dir, "flow/" + idx_curr + "_10.png"))[110:, :, :]
 44 |     disp1_np0 = read_png_disp(os.path.join(result_dir, "disp_0/" + idx_curr + "_10.png"))[110:, :, :]
 45 |     disp2_np0 = read_png_disp(os.path.join(result_dir, "disp_1/" + idx_curr + "_10.png"))[110:, :, :]
 46 | 
 47 |     im1 = numpy2torch(im1_np0).unsqueeze(0)
 48 |     disp1 = numpy2torch(disp1_np0).unsqueeze(0)
 49 |     disp_diff = numpy2torch(disp2_np0).unsqueeze(0)
 50 |     flo_f = numpy2torch(flo_f_np0).unsqueeze(0)
 51 | 
 52 |     _, _, hh, ww = im1.size()
 53 | 
 54 |     ## Intrinsic
 55 |     focal_length = width_to_focal[ww]
 56 |     cx = cam_center_dict[ww][0]
 57 |     cy = cam_center_dict[ww][1]
 58 | 
 59 |     k1_np = np.array([[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]])
 60 |     k1 = numpy2torch(k1_np)
 61 | 
 62 |     # Forward warping Pts1 using disp_change and flow
 63 |     pts1 = pixel2pts_ms(disp1, k1)
 64 |     pts1_warp = pixel2pts_ms(disp_diff, k1, flo_f)
 65 |     sf = pts1_warp - pts1
 66 | 
 67 |     ## Composing Image
 68 |     im1_np0_g = np.repeat(np.expand_dims(rgb2gray(im1_np0), axis=2), 3, axis=2)
 69 |     flow = torch.cat((sf[:, 0:1, :, :], sf[:, 2:3, :, :]), dim=1).data.cpu().numpy()[0, :, :, :]
 70 |     flow_img = flow_to_png_middlebury(flow) / np.float32(255.0)
 71 |     
 72 |     if imgflag == 0:
 73 |         flow_img = im1_np0
 74 |     else:
 75 |         flow_img = (flow_img * 0.75 + im1_np0_g * 0.25)
 76 |     
 77 |     ## Crop
 78 |     max_crop = (60, 0.7, 82)
 79 |     min_crop = (-60, -20, 0)
 80 | 
 81 |     x1 = -60
 82 |     x2 = 60
 83 |     y1 = 0.7
 84 |     y2 = -20
 85 |     z1 = 80
 86 |     z2 = 0
 87 |     pp1 = np.array([[x1, y1, z1]])
 88 |     pp2 = np.array([[x1, y1, z2]])
 89 |     pp3 = np.array([[x1, y2, z1]])
 90 |     pp4 = np.array([[x1, y2, z2]])
 91 |     pp5 = np.array([[x2, y1, z1]])
 92 |     pp6 = np.array([[x2, y1, z2]])
 93 |     pp7 = np.array([[x2, y2, z1]])
 94 |     pp8 = np.array([[x2, y2, z2]])
 95 |     bb_pts = np.concatenate((pp1, pp2, pp3, pp4, pp5, pp6, pp7, pp8), axis=0)
 96 |     wp = np.array([[1.0, 1.0, 1.0]])
 97 |     bb_colors = np.concatenate((wp, wp, wp, wp, wp, wp, wp, wp), axis=0)
 98 | 
 99 |     ## Open3D Vis
100 |     pts1_tform = pts1 + sf*tt
101 |     pts1_np = np.transpose(pts1_tform[0].view(3, -1).data.numpy(), (1, 0))
102 |     pts1_np = np.concatenate((pts1_np, bb_pts), axis=0)
103 |     pts1_color = np.reshape(flow_img, (hh * ww, 3))
104 |     pts1_color = np.concatenate((pts1_color, bb_colors), axis=0)
105 | 
106 |     pcd1 = o3d.geometry.PointCloud()
107 |     pcd1.points = o3d.utility.Vector3dVector(pts1_np)
108 |     pcd1.colors = o3d.utility.Vector3dVector(pts1_color)
109 | 
110 |     bbox = o3d.geometry.AxisAlignedBoundingBox(min_crop, max_crop)
111 |     pcd1 = pcd1.crop(bbox)
112 | 
113 |     return pcd1
114 | 
115 | 
116 | def custom_vis(imglist, kitti_data_dir, result_dir, vis_dir):
117 | 
118 |     custom_vis.index = 0
119 |     custom_vis.trajectory = o3d.io.read_pinhole_camera_trajectory("cam_pose.json")
120 |     custom_vis.vis = o3d.visualization.Visualizer()
121 | 
122 |     img_id = imglist[custom_vis.index]
123 |     init_pcd = get_pcd(img_id, kitti_data_dir, result_dir, 0)
124 |     custom_vis.prev_pcd = init_pcd
125 | 
126 |     def move_forward(vis):
127 | 
128 |         glb = custom_vis
129 | 
130 |         ## Capture
131 |         depth = vis.capture_depth_float_buffer(False)
132 |         image = vis.capture_screen_float_buffer(False)
133 |         save_id = imglist[glb.index-1]
134 |         file_name = ""
135 | 
136 |         if imgflag == 0:
137 |             file_name = os.path.join(vis_dir, "{:06d}_{:02d}.png".format(save_id, glb.index))
138 |         else:
139 |             file_name = os.path.join(vis_dir, "{:06d}_{:02d}.png".format(save_id, glb.index))
140 | 
141 |         print(' ' + str(glb.index) + ' '+ str(save_id) + ' '+ file_name)
142 |         io.imsave(file_name, np.asarray(image), check_contrast=False)
143 | 
144 |         ## Rendering
145 |         max_d_x = 13
146 |         max_d_y = 4
147 |         
148 |         if glb.index < sampling[0]:
149 |             tt = 0
150 |             rx = 0
151 |             ry = 0
152 |         elif glb.index < sampling[1]: # only rotation
153 |             tt = 0 
154 |             rad = 2 * 3.14159265359 / (sampling[1] - sampling[0]) * (glb.index - sampling[0])
155 |             rx = max_d_x * math.sin(rad)
156 |             ry = (max_d_y * math.cos(rad) - max_d_y)
157 |         elif glb.index < sampling[2]:
158 |             tt = 0
159 |             rx = 0
160 |             ry = 0
161 |         elif glb.index < sampling[3]:
162 |             tt = (glb.index - sampling[2]) / (sampling[3] - sampling[2]) 
163 |             rx = 0
164 |             ry = 0
165 |         else:
166 |             tt = 1
167 |             rx = 0
168 |             ry = 0
169 | 
170 |         img_id = imglist[glb.index]
171 |         pcd = get_pcd(img_id, kitti_data_dir, result_dir, tt)
172 |         glb.index = glb.index + 1
173 | 
174 |         vis.clear_geometries()
175 |         vis.add_geometry(pcd)
176 |         glb.prev_pcd = pcd
177 | 
178 |         ctr = vis.get_view_control()
179 |         ctr.scale(-24)
180 | 
181 |         ctr.rotate(rx, 980.0  + ry, 0, 0)
182 |         ctr.translate(-5, 0)
183 | 
184 |         if not glb.index < len(imglist):
185 |             custom_vis.vis.register_animation_callback(None)
186 | 
187 |         return False
188 | 
189 |     vis = custom_vis.vis
190 |     vis.create_window()
191 |     vis.add_geometry(init_pcd)
192 | 
193 |     ctr = vis.get_view_control()
194 |     ctr.scale(-24)
195 |     ctr.rotate(0, 980.0, 0, 0)
196 |     ctr.translate(-5, 0)
197 |     vis.register_animation_callback(move_forward)
198 |     vis.run()
199 |     vis.destroy_window()
200 | 
201 | ########################################################################
202 | 
203 | kitti_data_dir = "demo/demo_generator/kitti_img"    ## raw KITTI image
204 | result_dir = "demo/demo_generator/results"          ## disp_0, disp_1, flow
205 | vis_dir = "demo/demo_generator/vis"                 ## visualization output folder
206 | 
207 | imglist = []
208 | 
209 | for ii in range(0, sampling[-1]):
210 |     imglist.append(139)
211 | 
212 | custom_vis(imglist, kitti_data_dir, result_dir, vis_dir)


--------------------------------------------------------------------------------
/demo/demo_generator/utils_misc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import skimage.io as io
  4 | import cv2
  5 | 
  6 | TAG_CHAR = np.array([202021.25], np.float32)
  7 | UNKNOWN_FLOW_THRESH = 1e7
  8 | 
  9 | 
 10 | def compute_color(u, v):
 11 |     """
 12 |     compute optical flow color map
 13 |     :param u: optical flow horizontal map
 14 |     :param v: optical flow vertical map
 15 |     :return: optical flow in color code
 16 |     """
 17 |     [h, w] = u.shape
 18 |     img = np.zeros([h, w, 3])
 19 |     nanIdx = np.isnan(u) | np.isnan(v)
 20 |     u[nanIdx] = 0
 21 |     v[nanIdx] = 0
 22 | 
 23 |     colorwheel = make_color_wheel()
 24 |     ncols = np.size(colorwheel, 0)
 25 | 
 26 |     rad = np.sqrt(u ** 2 + v ** 2)
 27 | 
 28 |     a = np.arctan2(-v, -u) / np.pi
 29 | 
 30 |     fk = (a + 1) / 2 * (ncols - 1) + 1
 31 | 
 32 |     k0 = np.floor(fk).astype(int)
 33 | 
 34 |     k1 = k0 + 1
 35 |     k1[k1 == ncols + 1] = 1
 36 |     f = fk - k0
 37 | 
 38 |     for i in range(0, np.size(colorwheel, 1)):
 39 |         tmp = colorwheel[:, i]
 40 |         col0 = tmp[k0 - 1] / 255
 41 |         col1 = tmp[k1 - 1] / 255
 42 |         col = (1 - f) * col0 + f * col1
 43 | 
 44 |         idx = rad <= 1
 45 |         col[idx] = 1 - rad[idx] * (1 - col[idx])
 46 |         notidx = np.logical_not(idx)
 47 | 
 48 |         col[notidx] *= 0.75
 49 |         img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx)))
 50 | 
 51 |     return img
 52 | 
 53 | 
 54 | def make_color_wheel():
 55 |     """
 56 |     Generate color wheel according Middlebury color code
 57 |     :return: Color wheel
 58 |     """
 59 |     RY = 15
 60 |     YG = 6
 61 |     GC = 4
 62 |     CB = 11
 63 |     BM = 13
 64 |     MR = 6
 65 | 
 66 |     ncols = RY + YG + GC + CB + BM + MR
 67 | 
 68 |     colorwheel = np.zeros([ncols, 3])
 69 | 
 70 |     col = 0
 71 | 
 72 |     # RY
 73 |     colorwheel[0:RY, 0] = 255
 74 |     colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
 75 |     col += RY
 76 | 
 77 |     # YG
 78 |     colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
 79 |     colorwheel[col:col + YG, 1] = 255
 80 |     col += YG
 81 | 
 82 |     # GC
 83 |     colorwheel[col:col + GC, 1] = 255
 84 |     colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
 85 |     col += GC
 86 | 
 87 |     # CB
 88 |     colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
 89 |     colorwheel[col:col + CB, 2] = 255
 90 |     col += CB
 91 | 
 92 |     # BM
 93 |     colorwheel[col:col + BM, 2] = 255
 94 |     colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
 95 |     col += + BM
 96 | 
 97 |     # MR
 98 |     colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
 99 |     colorwheel[col:col + MR, 0] = 255
100 | 
101 |     return colorwheel
102 | 
103 | 
104 | def flow_to_png_middlebury(flow):
105 |     """
106 |     Convert flow into middlebury color code image
107 |     :param flow: optical flow map
108 |     :return: optical flow image in middlebury color
109 |     """
110 | 
111 |     flow = flow.transpose([1, 2, 0])
112 |     u = flow[:, :, 0]
113 |     v = flow[:, :, 1]
114 | 
115 |     maxu = -999.
116 |     maxv = -999.
117 |     minu = 999.
118 |     minv = 999.
119 | 
120 |     idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
121 |     u[idxUnknow] = 0
122 |     v[idxUnknow] = 0
123 | 
124 |     maxu = max(maxu, np.max(u))
125 |     minu = min(minu, np.min(u))
126 | 
127 |     maxv = max(maxv, np.max(v))
128 |     minv = min(minv, np.min(v))
129 | 
130 |     rad = np.sqrt(u ** 2 + v ** 2)
131 |     maxrad = max(-1, np.max(rad)) 
132 |     # maxrad = 4
133 | 
134 |     u = u / (maxrad + np.finfo(float).eps)
135 |     v = v / (maxrad + np.finfo(float).eps)
136 | 
137 |     img = compute_color(u, v)
138 | 
139 |     idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
140 |     img[idx] = 0
141 | 
142 |     return np.uint8(img)
143 | 
144 | 
145 | def numpy2torch(array):
146 |     assert(isinstance(array, np.ndarray))
147 |     if array.ndim == 3:
148 |         array = np.transpose(array, (2, 0, 1))
149 |     else:
150 |         array = np.expand_dims(array, axis=0)
151 |     return torch.from_numpy(array.copy()).float()
152 | 
153 | 
154 | def get_pixelgrid(b, h, w, flow=None):
155 | 
156 |     grid_h = torch.linspace(0.0, w - 1, w).view(1, 1, 1, w).expand(b, 1, h, w)
157 |     grid_v = torch.linspace(0.0, h - 1, h).view(1, 1, h, 1).expand(b, 1, h, w)    
158 |     ones = torch.ones_like(grid_h)
159 | 
160 |     if flow is None:
161 |         pixelgrid = torch.cat((grid_h, grid_v, ones), dim=1).float().requires_grad_(False)
162 |     else:
163 |         pixelgrid = torch.cat((grid_h + flow[:, 0:1, :, :], grid_v + flow[:, 1:2, :, :], ones), dim=1).float().requires_grad_(False)
164 |     
165 |     return pixelgrid
166 | 
167 | 
168 | def pixel2pts(depth, intrinsic, flow=None):
169 |     
170 |     b, _, h, w = depth.size()    
171 |     pixelgrid = get_pixelgrid(b, h, w, flow)
172 | 
173 |     depth_mat = depth.view(b, 1, -1)    
174 |     pixel_mat = pixelgrid.view(b, 3, -1)
175 | 
176 |     pts_mat = torch.matmul(torch.inverse(intrinsic), pixel_mat) * depth_mat
177 | 
178 |     pts = pts_mat.view(b, -1, h, w)
179 | 
180 |     return pts, pixelgrid
181 | 
182 | def disp2depth_kitti(pred_disp, focal_length):
183 |     pred_depth = focal_length * 0.54 / pred_disp
184 |     pred_depth = torch.clamp(pred_depth, 1e-3, 80)
185 |     return pred_depth
186 | 
187 | def pixel2pts_ms(output_disp, intrinsic, flow=None):
188 |     focal_length = intrinsic[:, 0, 0]
189 |     output_depth = disp2depth_kitti(output_disp, focal_length)
190 |     pts, _ = pixel2pts(output_depth, intrinsic, flow)
191 |     return pts
192 | 
193 | 
194 | def get_grid(x):
195 |     grid_H = torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3))
196 |     grid_V = torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3))
197 |     grid = torch.cat([grid_H, grid_V], 1)
198 |     grids_cuda = grid.float().requires_grad_(False)
199 |     return grids_cuda
200 | 
201 | 
202 | def read_png_disp(disp_file):
203 |     disp_np = io.imread(disp_file).astype(np.uint16) / 256.0
204 |     disp_np = np.expand_dims(disp_np, axis=2)
205 |     mask_disp = (disp_np > 0).astype(np.float64)
206 |     return disp_np
207 | 
208 | def read_png_flow(flow_file):
209 |     flow = cv2.imread(flow_file, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)[:,:,::-1].astype(np.float64)
210 |     flow, valid = flow[:, :, :2], flow[:, :, 2:]
211 |     flow = (flow - 2**15) / 64.0
212 |     return flow
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/demo/demo_generator/vis/__init__:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/vis/__init__


--------------------------------------------------------------------------------
/demo/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/teaser.png


--------------------------------------------------------------------------------
/install_modules.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ./models/correlation_package
3 | python setup.py install
4 | cd ../forwardwarp_package
5 | python setup.py install
6 | cd ../..
7 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | import logging
 5 | import subprocess
 6 | import torch
 7 | from core import commandline, runtime, logger, tools, configuration as config
 8 | 
 9 | def main():
10 |     
11 |     # Change working directory
12 |     os.chdir(os.path.dirname(os.path.realpath(__file__)))
13 | 
14 |     # Parse commandline arguments
15 |     args = commandline.setup_logging_and_parse_arguments(blocktitle="Commandline Arguments")
16 | 
17 |     # Set random seed, possibly on Cuda
18 |     config.configure_random_seed(args)
19 |     
20 |     # DataLoader
21 |     train_loader, validation_loader, inference_loader = config.configure_data_loaders(args)
22 |     success = any(loader is not None for loader in [train_loader, validation_loader, inference_loader])
23 |     if not success:
24 |         logging.info("No dataset could be loaded successfully. Please check dataset paths!")
25 |         quit()
26 | 
27 |     # Configure data augmentation
28 |     training_augmentation, validation_augmentation = config.configure_runtime_augmentations(args)
29 | 
30 |     # Configure model and loss
31 |     model_and_loss = config.configure_model_and_loss(args)
32 | 
33 |     # Resume from checkpoint if available 
34 |     checkpoint_saver, checkpoint_stats = config.configure_checkpoint_saver(args, model_and_loss)
35 |     
36 |     if checkpoint_stats is not None:
37 |         # Set checkpoint stats
38 |         if args.checkpoint_mode in ["resume_from_best", "resume_from_latest"]:
39 |             args.start_epoch = checkpoint_stats["epoch"] + 1
40 | 
41 |     # # Multi-GPU automation
42 |     # with logger.LoggingBlock("Multi GPU", emph=True):
43 |     #     if torch.cuda.device_count() > 1:
44 |     #         logging.info("Let's use %d GPUs!" % torch.cuda.device_count())
45 |     #         model_and_loss._model = torch.nn.DataParallel(model_and_loss._model)
46 |     #     else:
47 |     #         logging.info("Let's use %d GPU!" % torch.cuda.device_count())
48 | 
49 | 
50 |     # Checkpoint and save directory
51 |     with logger.LoggingBlock("Save Directory", emph=True):
52 |         logging.info("Save directory: %s" % args.save)
53 |         if not os.path.exists(args.save):
54 |             os.makedirs(args.save)
55 | 
56 |     # Configure optimizer
57 |     optimizer = config.configure_optimizer(args, model_and_loss)
58 | 
59 |     # Configure learning rate
60 |     lr_scheduler = config.configure_lr_scheduler(args, optimizer)
61 | 
62 |     # If this is just an evaluation: overwrite savers and epochs
63 |     if args.evaluation:
64 |         args.start_epoch = 1
65 |         args.total_epochs = 1
66 |         train_loader = None
67 |         checkpoint_saver = None
68 |         optimizer = None
69 |         lr_scheduler = None
70 | 
71 |     # Cuda optimization
72 |     if args.cuda:
73 |         torch.backends.cudnn.deterministic = True
74 |         torch.backends.cudnn.benchmark = False
75 | 
76 |     # Kickoff training, validation and/or testing
77 |     return runtime.exec_runtime(
78 |         args,
79 |         checkpoint_saver=checkpoint_saver,
80 |         model_and_loss=model_and_loss,
81 |         optimizer=optimizer,
82 |         lr_scheduler=lr_scheduler,
83 |         train_loader=train_loader,
84 |         validation_loader=validation_loader,
85 |         inference_loader=inference_loader,
86 |         training_augmentation=training_augmentation,
87 |         validation_augmentation=validation_augmentation)
88 | 
89 | if __name__ == "__main__":
90 |     main()
91 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import model_monosceneflow
 2 | from . import model_monosceneflow_ablation
 3 | from . import model_monosceneflow_ablation_decoder_split
 4 | from . import model_monodepth_ablation
 5 | 
 6 | ##########################################################################################
 7 | ## Monocular Scene Flow - The full model 
 8 | ##########################################################################################
 9 | 
10 | MonoSceneFlow_fullmodel			=	model_monosceneflow.MonoSceneFlow
11 | 
12 | ##########################################################################################
13 | ## Monocular Scene Flow - The models for the ablation studies
14 | ##########################################################################################
15 | 
16 | MonoSceneFlow_CamConv			=	model_monosceneflow_ablation.MonoSceneFlow_CamConv
17 | 
18 | MonoSceneFlow_FlowOnly			=	model_monosceneflow_ablation.MonoSceneFlow_OpticalFlowOnly
19 | MonoSceneFlow_DispOnly			=	model_monosceneflow_ablation.MonoSceneFlow_DisparityOnly
20 | 
21 | MonoSceneFlow_Split_Cont		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split_base
22 | MonoSceneFlow_Split_Last1		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split1
23 | MonoSceneFlow_Split_Last2		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split2
24 | MonoSceneFlow_Split_Last3		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split3
25 | MonoSceneFlow_Split_Last4		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split4
26 | MonoSceneFlow_Split_Last5		=	model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split5
27 | 
28 | ##########################################################################################
29 | ## Monocular Depth - The models for the ablation study in Table 1. 
30 | ##########################################################################################
31 | 
32 | MonoDepth_Baseline				= model_monodepth_ablation.MonoDepth_Baseline
33 | MonoDepth_CamConv				= model_monodepth_ablation.MonoDepth_CamConv


--------------------------------------------------------------------------------
/models/correlation_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/models/correlation_package/__init__.py


--------------------------------------------------------------------------------
/models/correlation_package/correlation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.modules.module import Module
 3 | from torch.autograd import Function
 4 | import correlation_cuda
 5 | 
 6 | class Correlation(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, input1, input2, param_dict):
10 |         ctx.save_for_backward(input1, input2)
11 | 
12 |         ctx.pad_size = param_dict["pad_size"]
13 |         ctx.kernel_size = param_dict["kernel_size"]
14 |         ctx.max_disp = param_dict["max_disp"]
15 |         ctx.stride1 = param_dict["stride1"]
16 |         ctx.stride2 = param_dict["stride2"]
17 |         ctx.corr_multiply = param_dict["corr_multiply"]
18 | 
19 |         with torch.cuda.device_of(input1):
20 |             rbot1 = input1.new()
21 |             rbot2 = input2.new()
22 |             output = input1.new()
23 | 
24 |             correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
25 |                 ctx.pad_size, ctx.kernel_size, ctx.max_disp, ctx.stride1, ctx.stride2, ctx.corr_multiply)
26 | 
27 |         return output
28 | 
29 |     @staticmethod
30 |     def backward(ctx, grad_output):
31 |         input1, input2 = ctx.saved_tensors
32 | 
33 |         with torch.cuda.device_of(input1):
34 |             rbot1 = input1.new()
35 |             rbot2 = input2.new()
36 | 
37 |             grad_input1 = input1.new()
38 |             grad_input2 = input2.new()
39 | 
40 |             correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
41 |                 ctx.pad_size, ctx.kernel_size, ctx.max_disp, ctx.stride1, ctx.stride2, ctx.corr_multiply)
42 | 
43 |         return grad_input1, grad_input2, None
44 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/Context.h>
  4 | #include <ATen/cuda/CUDAContext.h>
  5 | #include <stdio.h>
  6 | #include <iostream>
  7 | 
  8 | #include "correlation_cuda_kernel.cuh"
  9 | 
 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
 11 |                        int pad_size,
 12 |                        int kernel_size,
 13 |                        int max_displacement,
 14 |                        int stride1,
 15 |                        int stride2,
 16 |                        int corr_type_multiply)
 17 | {
 18 | 
 19 |   int batchSize = input1.size(0);
 20 | 
 21 |   int nInputChannels = input1.size(1);
 22 |   int inputHeight = input1.size(2);
 23 |   int inputWidth = input1.size(3);
 24 | 
 25 |   int kernel_radius = (kernel_size - 1) / 2;
 26 |   int border_radius = kernel_radius + max_displacement;
 27 | 
 28 |   int paddedInputHeight = inputHeight + 2 * pad_size;
 29 |   int paddedInputWidth = inputWidth + 2 * pad_size;
 30 | 
 31 |   int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
 32 | 
 33 |   int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
 34 |   int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
 35 | 
 36 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 37 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 38 |   output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
 39 | 
 40 |   rInput1.fill_(0);
 41 |   rInput2.fill_(0);
 42 |   output.fill_(0);
 43 | 
 44 |   int success = correlation_forward_cuda_kernel(
 45 |     output,
 46 |     output.size(0), 
 47 |     output.size(1),
 48 |     output.size(2),
 49 |     output.size(3),
 50 |     output.stride(0),
 51 |     output.stride(1),
 52 |     output.stride(2),
 53 |     output.stride(3),
 54 |     input1,
 55 |     input1.size(1),
 56 |     input1.size(2),
 57 |     input1.size(3),
 58 |     input1.stride(0),
 59 |     input1.stride(1),
 60 |     input1.stride(2),
 61 |     input1.stride(3),
 62 |     input2,
 63 |     input2.size(1),
 64 |     input2.stride(0),
 65 |     input2.stride(1),
 66 |     input2.stride(2),
 67 |     input2.stride(3),
 68 |     rInput1,
 69 |     rInput2,
 70 |     pad_size,     
 71 |     kernel_size,
 72 |     max_displacement,
 73 |     stride1,
 74 |     stride2,
 75 |     corr_type_multiply,
 76 | 	at::cuda::getCurrentCUDAStream()
 77 | 	//at::globalContext().getCurrentCUDAStream()
 78 |   );
 79 | 
 80 |   //check for errors
 81 |   if (!success) {
 82 |     AT_ERROR("CUDA call failed");
 83 |   }
 84 | 
 85 |   return 1;
 86 | 
 87 | }
 88 | 
 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 
 90 |                        at::Tensor& gradInput1, at::Tensor& gradInput2,
 91 |                        int pad_size,
 92 |                        int kernel_size,
 93 |                        int max_displacement,
 94 |                        int stride1,
 95 |                        int stride2,
 96 |                        int corr_type_multiply)
 97 | {
 98 | 
 99 |   int batchSize = input1.size(0);
100 |   int nInputChannels = input1.size(1);
101 |   int paddedInputHeight = input1.size(2)+ 2 * pad_size;
102 |   int paddedInputWidth = input1.size(3)+ 2 * pad_size;
103 | 
104 |   int height = input1.size(2);
105 |   int width = input1.size(3);
106 | 
107 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
108 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
109 |   gradInput1.resize_({batchSize, nInputChannels, height, width});
110 |   gradInput2.resize_({batchSize, nInputChannels, height, width});
111 | 
112 |   rInput1.fill_(0);
113 |   rInput2.fill_(0);
114 |   gradInput1.fill_(0);
115 |   gradInput2.fill_(0);
116 | 
117 |   int success = correlation_backward_cuda_kernel(gradOutput,
118 |                                                 gradOutput.size(0),
119 |                                                 gradOutput.size(1),
120 |                                                 gradOutput.size(2),
121 |                                                 gradOutput.size(3),
122 |                                                 gradOutput.stride(0),
123 |                                                 gradOutput.stride(1),
124 |                                                 gradOutput.stride(2),
125 |                                                 gradOutput.stride(3),
126 |                                                 input1,
127 |                                                 input1.size(1),
128 |                                                 input1.size(2),
129 |                                                 input1.size(3),
130 |                                                 input1.stride(0),
131 |                                                 input1.stride(1),
132 |                                                 input1.stride(2),
133 |                                                 input1.stride(3),
134 |                                                 input2,  
135 |                                                 input2.stride(0),
136 |                                                 input2.stride(1),
137 |                                                 input2.stride(2),
138 |                                                 input2.stride(3),
139 |                                                 gradInput1,
140 |                                                 gradInput1.stride(0),
141 |                                                 gradInput1.stride(1),
142 |                                                 gradInput1.stride(2),
143 |                                                 gradInput1.stride(3),
144 |                                                 gradInput2,
145 |                                                 gradInput2.size(1),
146 |                                                 gradInput2.stride(0),
147 |                                                 gradInput2.stride(1),
148 |                                                 gradInput2.stride(2),
149 |                                                 gradInput2.stride(3),
150 |                                                 rInput1,
151 |                                                 rInput2,
152 |                                                 pad_size,
153 |                                                 kernel_size,
154 |                                                 max_displacement,
155 |                                                 stride1, 
156 |                                                 stride2,
157 |                                                 corr_type_multiply,
158 | 												at::cuda::getCurrentCUDAStream()
159 |                                                 //at::globalContext().getCurrentCUDAStream()
160 |                                                );
161 | 
162 |   if (!success) {
163 |     AT_ERROR("CUDA call failed");
164 |   }
165 | 
166 |   return 1;
167 | }
168 | 
169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
170 |   m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
171 |   m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
172 | }
173 | 
174 | 


--------------------------------------------------------------------------------
/models/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/Context.h>
 5 | #include <cuda_runtime.h>
 6 | 
 7 | int correlation_forward_cuda_kernel(at::Tensor& output,
 8 |     int ob,
 9 |     int oc,
10 |     int oh,
11 |     int ow,
12 |     int osb,
13 |     int osc,
14 |     int osh,
15 |     int osw,
16 | 
17 |     at::Tensor& input1,
18 |     int ic,
19 |     int ih,
20 |     int iw,
21 |     int isb,
22 |     int isc,
23 |     int ish,
24 |     int isw,
25 | 
26 |     at::Tensor& input2,
27 |     int gc,
28 |     int gsb,
29 |     int gsc,
30 |     int gsh,
31 |     int gsw,
32 | 
33 |     at::Tensor& rInput1,
34 |     at::Tensor& rInput2,
35 |     int pad_size,
36 |     int kernel_size,
37 |     int max_displacement,
38 |     int stride1,
39 |     int stride2,
40 |     int corr_type_multiply,
41 |     cudaStream_t stream);
42 | 
43 | 
44 | int correlation_backward_cuda_kernel(   
45 |     at::Tensor& gradOutput,
46 |     int gob,
47 |     int goc,
48 |     int goh,
49 |     int gow,
50 |     int gosb,
51 |     int gosc,
52 |     int gosh,
53 |     int gosw,
54 | 
55 |     at::Tensor& input1,
56 |     int ic,
57 |     int ih,
58 |     int iw,
59 |     int isb,
60 |     int isc,
61 |     int ish,
62 |     int isw,
63 | 
64 |     at::Tensor& input2,
65 |     int gsb,
66 |     int gsc,
67 |     int gsh,
68 |     int gsw,
69 | 
70 |     at::Tensor& gradInput1, 
71 |     int gisb,
72 |     int gisc,
73 |     int gish,
74 |     int gisw,
75 | 
76 |     at::Tensor& gradInput2,
77 |     int ggc,
78 |     int ggsb,
79 |     int ggsc,
80 |     int ggsh,
81 |     int ggsw,
82 | 
83 |     at::Tensor& rInput1,
84 |     at::Tensor& rInput2,
85 |     int pad_size,
86 |     int kernel_size,
87 |     int max_displacement,
88 |     int stride1,
89 |     int stride2,
90 |     int corr_type_multiply,
91 |     cudaStream_t stream);
92 | 


--------------------------------------------------------------------------------
/models/correlation_package/readme.txt:
--------------------------------------------------------------------------------
1 | https://github.com/NVIDIA/flownet2-pytorch/tree/master/networks/correlation_package
2 | 
3 | Latest commit ff19163  on Aug 25, 2018


--------------------------------------------------------------------------------
/models/correlation_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from setuptools import setup, find_packages
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | cxx_args = ['-std=c++11']
 6 | 
 7 | nvcc_args = [
 8 |     '-gencode', 'arch=compute_50,code=sm_50',
 9 |     '-gencode', 'arch=compute_52,code=sm_52',
10 |     '-gencode', 'arch=compute_60,code=sm_60',
11 |     '-gencode', 'arch=compute_61,code=sm_61',
12 |     '-gencode', 'arch=compute_61,code=compute_61'
13 | ]
14 | 
15 | setup(
16 |     name='correlation_cuda',
17 |     ext_modules=[
18 |         CUDAExtension('correlation_cuda', [
19 |             'correlation_cuda.cc',
20 |             'correlation_cuda_kernel.cu'
21 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
22 |     ],
23 |     cmdclass={
24 |         'build_ext': BuildExtension
25 |     })
26 | 


--------------------------------------------------------------------------------
/models/forwardwarp_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/models/forwardwarp_package/__init__.py


--------------------------------------------------------------------------------
/models/forwardwarp_package/forward_warp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import Module, Parameter
 3 | from torch.autograd import Function
 4 | 
 5 | import forward_warp_cuda
 6 | 
 7 | class forward_warp_function(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx, im0, flow):
11 |         '''
12 |         im0: the first image with shape [B, C, H, W]
13 |         flow: the optical flow with shape [B, H, W, 2] (different to grid_sample, it's range is from [-W, -H] to [W, H])
14 |         '''
15 |         assert(len(im0.shape) == len(flow.shape) == 4)
16 |         assert(im0.shape[0] == flow.shape[0])
17 |         assert(im0.shape[-2:] == flow.shape[1:3])
18 |         assert(flow.shape[3] == 2)
19 |         
20 |         im0 = im0.contiguous()
21 |         flow = flow.contiguous()
22 |         ctx.save_for_backward(im0, flow)
23 | 
24 |         im1 = torch.zeros(im0.size(), dtype=im0.dtype, layout=im0.layout, device=im0.device)
25 | 
26 |         # with torch.cuda.device_of(im0):
27 |         forward_warp_cuda.forward(im0, flow, im1)
28 | 
29 |         return im1
30 | 
31 |     @staticmethod
32 |     def backward(ctx, grad_output):
33 | 
34 |         grad_output = grad_output.contiguous()
35 |         im0, flow = ctx.saved_variables
36 |         im0_grad = torch.zeros(im0.size(), dtype=im0.dtype, layout=im0.layout, device=im0.device)
37 |         flow_grad = torch.zeros(flow.size(), dtype=flow.dtype, layout=flow.layout, device=flow.device)
38 | 
39 |         #with torch.cuda.device_of(im0):
40 |         forward_warp_cuda.backward(grad_output, im0, flow, im0_grad, flow_grad)
41 | 
42 |         return im0_grad, flow_grad
43 | 
44 | 
45 | class forward_warp(Module):
46 | 
47 |     def __init__(self):
48 |         super(forward_warp, self).__init__()
49 |         
50 |     def forward(self, im0, flow):
51 | 
52 |         _, _, h, w = im0.size()
53 |         flow = torch.clamp(flow, -2*w, 2*w)
54 | 
55 |         return forward_warp_function.apply(im0, flow)
56 | 


--------------------------------------------------------------------------------
/models/forwardwarp_package/forward_warp_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // #include <torch/torch.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | int forward_warp_cuda_forward(const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im1);
 5 | int forward_warp_cuda_backward(const at::Tensor& grad_output, const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im0_grad, at::Tensor& flow_grad);
 6 | 
 7 | // Because of the incompatible of Pytorch 1.0 && Pytorch 0.4, we have to annotation this.
 8 | #define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDA tensor")
 9 | #define CHECK_CONTIGUOUS(x) AT_ASSERT(x.is_contiguous(), #x " must be contiguous")
10 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
11 | 
12 | int forward_warp_forward(const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im1){
13 |     CHECK_INPUT(im0);
14 |     CHECK_INPUT(flow);
15 | 
16 | 	// im1.resize_({im0.size(0), im0.size(1), im0.size(2), im0.size(3)});
17 | 	// im1.fill_(0);
18 |     int success = forward_warp_cuda_forward(im0, flow, im1);
19 | 
20 | 	if (!success) {
21 | 		AT_ERROR("CUDA call failed");
22 | 	}
23 | 	return 1;
24 | }
25 | 
26 | int forward_warp_backward(const at::Tensor& grad_output, const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im0_grad, at::Tensor& flow_grad){
27 |     CHECK_INPUT(grad_output);
28 |     CHECK_INPUT(im0);
29 |     CHECK_INPUT(flow);
30 | 
31 |  	// im0_grad.resize_({im0.size(0), im0.size(1), im0.size(2), im0.size(3)});
32 |  	// flow_grad.resize_({flow.size(0), flow.size(1), flow.size(2), flow.size(3)});
33 | 	// im0_grad.fill_(0);
34 | 	// flow_grad.fill_(0);
35 | 
36 |     int success = forward_warp_cuda_backward(grad_output, im0, flow, im0_grad, flow_grad);
37 | 
38 |     if (!success) {
39 | 		AT_ERROR("CUDA call failed");
40 | 	}
41 | 	return 1;
42 | }
43 | 
44 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
45 |     m.def("forward", &forward_warp_forward, "forward warp forward (CUDA)");
46 |     m.def("backward", &forward_warp_backward, "forward warp backward (CUDA)");
47 | }
48 | 


--------------------------------------------------------------------------------
/models/forwardwarp_package/forward_warp_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <THC/THCAtomics.cuh>
  3 | 
  4 | #include <cuda.h>
  5 | #include <cuda_runtime.h>
  6 | 
  7 | // Define CUDA_NUM_THREAS and GET_BLOCKS
  8 | const int CUDA_NUM_THREADS = 1024;
  9 | inline int GET_BLOCKS(const int N){ return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;}
 10 | 
 11 | // Define CUDA_KERNEL_LOOP
 12 | #define CUDA_KERNEL_LOOP(i, n) \
 13 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
 14 | 
 15 | 
 16 | 
 17 | static __forceinline__ __device__ 
 18 | int get_im_index(
 19 |         const int bb,
 20 |         const int cc,
 21 |         const int hh,
 22 |         const int ww,
 23 |         const size_t C,
 24 |         const size_t H,
 25 |         const size_t W) {
 26 |     return bb*C*H*W + cc*H*W + hh*W + ww;
 27 | }
 28 | 
 29 | template <typename scalar_t>
 30 | __global__ void forward_warp_cuda_forward_kernel(
 31 |     const int total_step,
 32 |     const scalar_t* im0,
 33 |     const scalar_t* flow,
 34 |     scalar_t* im1,
 35 |     const int B,
 36 |     const int C,
 37 |     const int H,
 38 |     const int W) {
 39 |     
 40 |     CUDA_KERNEL_LOOP(index, total_step-1) {
 41 |         const int bb = index / (H * W);
 42 |         const int hh = (index - bb*H*W) / W;
 43 |         const int ww = index % W;
 44 |         const scalar_t x = (scalar_t)ww + flow[index * 2 + 0];
 45 |         const scalar_t y = (scalar_t)hh + flow[index * 2 + 1];
 46 |         const int x_f = static_cast<int>(::floor(x));
 47 |         const int y_f = static_cast<int>(::floor(y));
 48 |         const int x_c = x_f + 1;
 49 |         const int y_c = y_f + 1;
 50 | 
 51 |         if(x_f>=0 && x_c<W && y_f>=0 && y_c<H){
 52 |             const scalar_t nw_k = (x_c - x) * (y_c - y);
 53 |             const scalar_t ne_k = (x - x_f) * (y_c - y);
 54 |             const scalar_t sw_k = (x_c - x) * (y - y_f);
 55 |             const scalar_t se_k = (x - x_f) * (y - y_f);
 56 |             const scalar_t* im0_p = im0 + get_im_index(bb, 0, hh, ww, C, H, W);
 57 |             scalar_t* im1_p = im1 + get_im_index(bb, 0, y_f, x_f, C, H, W);
 58 |             for (int cc = 0; cc < C; ++cc, im0_p += H*W, im1_p += H*W){
 59 |                 atomicAdd(im1_p,     nw_k*(*im0_p));
 60 |                 atomicAdd(im1_p+1,   ne_k*(*im0_p));
 61 |                 atomicAdd(im1_p+W,   sw_k*(*im0_p));
 62 |                 atomicAdd(im1_p+W+1, se_k*(*im0_p));
 63 |             }
 64 |         }   
 65 |     }
 66 | }
 67 | 
 68 | template <typename scalar_t>
 69 | __global__ void forward_warp_cuda_backward_kernel(
 70 |     const int total_step,
 71 |     const scalar_t* grad_output,
 72 |     const scalar_t* im0,
 73 |     const scalar_t* flow,
 74 |     scalar_t* im0_grad,
 75 |     scalar_t* flow_grad,
 76 |     const int B,
 77 |     const int C,
 78 |     const int H,
 79 |     const int W) {
 80 | 
 81 |     CUDA_KERNEL_LOOP(index, total_step) {
 82 |         const int bb = index / (H * W);
 83 |         const int hh = (index-bb*H*W) / W;
 84 |         const int ww = index % W;
 85 |         const scalar_t x = (scalar_t)ww + flow[index * 2 + 0];
 86 |         const scalar_t y = (scalar_t)hh + flow[index * 2 + 1];
 87 | 
 88 |         const int x_f = static_cast<int>(::floor(x));
 89 |         const int y_f = static_cast<int>(::floor(y));
 90 |         const int x_c = x_f + 1;
 91 |         const int y_c = y_f + 1;
 92 | 
 93 |         if(x_f>=0 && x_c<W && y_f>=0 && y_c<H){
 94 | 
 95 |             const scalar_t nw_k = (x_c - x) * (y_c - y);
 96 |             const scalar_t sw_k = (x_c - x) * (y - y_f);
 97 |             const scalar_t ne_k = (x - x_f) * (y_c - y);
 98 |             const scalar_t se_k = (x - x_f) * (y - y_f);
 99 |             scalar_t flow_grad_x = 0;
100 |             scalar_t flow_grad_y = 0;
101 |             scalar_t* im0_grad_p = im0_grad + get_im_index(bb, 0, hh, ww, C, H, W);
102 |             for (int cc = 0; cc < C; ++cc, im0_grad_p += H*W){
103 |                 const scalar_t nw_grad = grad_output[get_im_index(bb, cc, y_f, x_f, C, H, W)];
104 |                 const scalar_t ne_grad = grad_output[get_im_index(bb, cc, y_f, x_c, C, H, W)];
105 |                 const scalar_t sw_grad = grad_output[get_im_index(bb, cc, y_c, x_f, C, H, W)];
106 |                 const scalar_t se_grad = grad_output[get_im_index(bb, cc, y_c, x_c, C, H, W)];
107 |                 const scalar_t p = im0[get_im_index(bb, cc, hh, ww, C, H, W)];
108 |                 atomicAdd(im0_grad_p, nw_k*nw_grad);
109 |                 atomicAdd(im0_grad_p, ne_k*ne_grad);
110 |                 atomicAdd(im0_grad_p, sw_k*sw_grad);
111 |                 atomicAdd(im0_grad_p, se_k*se_grad);
112 |                 flow_grad_x -= (y_c-y)*p*nw_grad;
113 |                 flow_grad_y -= (x_c-x)*p*nw_grad;
114 |                 flow_grad_x += (y_c-y)*p*ne_grad;
115 |                 flow_grad_y -= (x-x_f)*p*ne_grad;
116 |                 flow_grad_x -= (y-y_f)*p*sw_grad;
117 |                 flow_grad_y += (x_c-x)*p*sw_grad;
118 |                 flow_grad_x += (y-y_f)*p*se_grad;
119 |                 flow_grad_y += (x-x_f)*p*se_grad;
120 |             }
121 |             flow_grad[index*2 + 0] = flow_grad_x;
122 |             flow_grad[index*2 + 1] = flow_grad_y;
123 |         }     
124 |     }
125 | }
126 | 
127 | int forward_warp_cuda_forward(
128 |     const at::Tensor& im0, 
129 |     const at::Tensor& flow,
130 |     at::Tensor& im1) {
131 |     
132 |     const int B = im0.size(0);
133 |     const int C = im0.size(1);
134 |     const int H = im0.size(2);
135 |     const int W = im0.size(3);
136 |     const int total_step = B * H * W;
137 |     
138 |     AT_DISPATCH_FLOATING_TYPES(im0.scalar_type(), "forward_warp_forward_cuda", ([&] {
139 |     forward_warp_cuda_forward_kernel<scalar_t>
140 |     <<<GET_BLOCKS(total_step), CUDA_NUM_THREADS>>>(
141 |         total_step,
142 |         im0.data<scalar_t>(),
143 |         flow.data<scalar_t>(),
144 |         im1.data<scalar_t>(),
145 |         B, C, H, W);
146 |     }));
147 | 
148 |     cudaError_t err = cudaGetLastError();
149 | 
150 |     // check for errors
151 |     if (err != cudaSuccess) {
152 |         printf("error in Forwardwarp : forward_cuda_kernel: %s\n", cudaGetErrorString(err));
153 |         return 0;
154 |     }
155 | 
156 |     return 1;
157 | }
158 | 
159 | int forward_warp_cuda_backward(
160 |     const at::Tensor& grad_output,
161 |     const at::Tensor& im0, 
162 |     const at::Tensor& flow, 
163 |     at::Tensor& im0_grad, 
164 |     at::Tensor& flow_grad) {
165 | 
166 |     const int B = im0.size(0);
167 |     const int C = im0.size(1);
168 |     const int H = im0.size(2);
169 |     const int W = im0.size(3);
170 |     const int total_step = B * H * W;
171 | 
172 |     AT_DISPATCH_FLOATING_TYPES(grad_output.scalar_type(), "forward_warp_backward_cuda", ([&] {
173 |     forward_warp_cuda_backward_kernel<scalar_t>
174 |     <<<GET_BLOCKS(total_step), CUDA_NUM_THREADS>>>(
175 |         total_step,
176 |         grad_output.data<scalar_t>(),
177 |         im0.data<scalar_t>(),
178 |         flow.data<scalar_t>(),
179 |         im0_grad.data<scalar_t>(),
180 |         flow_grad.data<scalar_t>(),
181 |         B, C, H, W);
182 |     }));
183 | 
184 |     cudaError_t err = cudaGetLastError();
185 | 
186 |     // check for errors
187 |     if (err != cudaSuccess) {
188 |         printf("error in Forwardwarp : forward_cuda_kernel: %s\n", cudaGetErrorString(err));
189 |         return 0;
190 |     }
191 | 
192 |     return 1;
193 | }
194 | 


--------------------------------------------------------------------------------
/models/forwardwarp_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from setuptools import setup, find_packages
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | cxx_args = ['-std=c++11']
 6 | 
 7 | nvcc_args = [
 8 |     '-gencode', 'arch=compute_50,code=sm_50',
 9 |     '-gencode', 'arch=compute_52,code=sm_52',
10 |     '-gencode', 'arch=compute_60,code=sm_60',
11 |     '-gencode', 'arch=compute_61,code=sm_61',
12 |     '-gencode', 'arch=compute_61,code=compute_61'
13 | ]
14 | 
15 | setup(
16 |     name='forward_warp_cuda',
17 |     ext_modules=[
18 |         CUDAExtension('forward_warp_cuda', [
19 |             'forward_warp_cuda.cpp',
20 |             'forward_warp_cuda_kernel.cu',
21 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
22 |     ],
23 |     cmdclass={
24 |         'build_ext': BuildExtension
25 |     })
26 | 


--------------------------------------------------------------------------------
/models/model_monodepth_ablation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from .modules_monodepth import Resnet18_MonoDepth_Single, Resnet18_MonoDepth_Single_CamConv
 6 | from utils.monodepth_eval import disp_post_processing
 7 | 
 8 | 
 9 | class MonoDepth_Baseline(nn.Module):
10 |     def __init__(self, args):
11 |         super(MonoDepth_Baseline, self).__init__()
12 | 
13 |         self._args = args
14 |         self._mono_resnet18 = Resnet18_MonoDepth_Single()
15 | 
16 |     def forward(self, input_dict):
17 | 
18 |         output_dict = {}
19 | 
20 |         if not self._args.evaluation:
21 |             disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_dict['input_l1'])
22 |             disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4 = self._mono_resnet18(torch.flip(input_dict['input_r1'], [3]))
23 | 
24 |             disp_r1_1 = torch.flip(disp_r1_1, [3])
25 |             disp_r1_2 = torch.flip(disp_r1_2, [3])
26 |             disp_r1_3 = torch.flip(disp_r1_3, [3])
27 |             disp_r1_4 = torch.flip(disp_r1_4, [3])
28 | 
29 |             output_dict['disp_l1'] = [disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4]
30 |             output_dict['disp_r1'] = [disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4]
31 | 
32 |         else:
33 |             inputs = torch.cat((input_dict['input_l1'], torch.flip(input_dict['input_l1'], [3])), dim=0)
34 |             disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(inputs)
35 |             out_disp_1_pp = disp_post_processing(disp_l1_1)
36 |             output_dict['disp_l1_pp'] = [out_disp_1_pp]
37 | 
38 |         return output_dict
39 | 
40 | 
41 | class MonoDepth_CamConv(nn.Module):
42 |     def __init__(self, args):
43 |         super(MonoDepth_CamConv, self).__init__()
44 | 
45 |         self._args = args
46 |         self._mono_resnet18 = Resnet18_MonoDepth_Single_CamConv()
47 | 
48 |     def forward(self, input_dict):
49 | 
50 |         output_dict = {}
51 | 
52 |         if not self._args.evaluation:
53 |             disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_dict['input_l1'], input_dict['input_k_l1'])
54 |             disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4 = self._mono_resnet18(torch.flip(input_dict['input_r1'], [3]), input_dict['input_k_r1_flip'])
55 | 
56 |             disp_r1_1 = torch.flip(disp_r1_1, [3])
57 |             disp_r1_2 = torch.flip(disp_r1_2, [3])
58 |             disp_r1_3 = torch.flip(disp_r1_3, [3])
59 |             disp_r1_4 = torch.flip(disp_r1_4, [3])
60 | 
61 |             output_dict['disp_l1'] = [disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4]
62 |             output_dict['disp_r1'] = [disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4]
63 | 
64 |         else:
65 |             input_img = torch.cat((input_dict['input_l1'], torch.flip(input_dict['input_l1'], [3])), dim=0)
66 |             intrinsic = torch.cat((input_dict['input_k_l1'], input_dict['input_k_l1_flip']), dim=0)
67 |             disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_img, intrinsic)
68 |             out_disp_1_pp = disp_post_processing(disp_l1_1)
69 |             output_dict['disp_l1_pp'] = [out_disp_1_pp]
70 | 
71 |         return output_dict
72 | 


--------------------------------------------------------------------------------
/models/model_monosceneflow.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as tf
  6 | import logging
  7 | 
  8 | from .correlation_package.correlation import Correlation
  9 | 
 10 | from .modules_sceneflow import get_grid, WarpingLayer_SF
 11 | from .modules_sceneflow import initialize_msra, upsample_outputs_as
 12 | from .modules_sceneflow import upconv
 13 | from .modules_sceneflow import FeatureExtractor, MonoSceneFlowDecoder, ContextNetwork
 14 | 
 15 | from utils.interpolation import interpolate2d_as
 16 | from utils.sceneflow_util import flow_horizontal_flip, intrinsic_scale, get_pixelgrid, post_processing
 17 | 
 18 | 
 19 | class MonoSceneFlow(nn.Module):
 20 |     def __init__(self, args):
 21 |         super(MonoSceneFlow, self).__init__()
 22 | 
 23 |         self._args = args
 24 |         self.num_chs = [3, 32, 64, 96, 128, 192, 256]
 25 |         self.search_range = 4
 26 |         self.output_level = 4
 27 |         self.num_levels = 7
 28 |         
 29 |         self.leakyRELU = nn.LeakyReLU(0.1, inplace=True)
 30 | 
 31 |         self.feature_pyramid_extractor = FeatureExtractor(self.num_chs)
 32 |         self.warping_layer_sf = WarpingLayer_SF()
 33 |         
 34 |         self.flow_estimators = nn.ModuleList()
 35 |         self.upconv_layers = nn.ModuleList()
 36 | 
 37 |         self.dim_corr = (self.search_range * 2 + 1) ** 2
 38 | 
 39 |         for l, ch in enumerate(self.num_chs[::-1]):
 40 |             if l > self.output_level:
 41 |                 break
 42 | 
 43 |             if l == 0:
 44 |                 num_ch_in = self.dim_corr + ch 
 45 |             else:
 46 |                 num_ch_in = self.dim_corr + ch + 32 + 3 + 1
 47 |                 self.upconv_layers.append(upconv(32, 32, 3, 2))
 48 | 
 49 |             layer_sf = MonoSceneFlowDecoder(num_ch_in)            
 50 |             self.flow_estimators.append(layer_sf)            
 51 | 
 52 |         self.corr_params = {"pad_size": self.search_range, "kernel_size": 1, "max_disp": self.search_range, "stride1": 1, "stride2": 1, "corr_multiply": 1}        
 53 |         self.context_networks = ContextNetwork(32 + 3 + 1)
 54 |         self.sigmoid = torch.nn.Sigmoid()
 55 | 
 56 |         initialize_msra(self.modules())
 57 | 
 58 |     def run_pwc(self, input_dict, x1_raw, x2_raw, k1, k2):
 59 |             
 60 |         output_dict = {}
 61 | 
 62 |         # on the bottom level are original images
 63 |         x1_pyramid = self.feature_pyramid_extractor(x1_raw) + [x1_raw]
 64 |         x2_pyramid = self.feature_pyramid_extractor(x2_raw) + [x2_raw]
 65 | 
 66 |         # outputs
 67 |         sceneflows_f = []
 68 |         sceneflows_b = []
 69 |         disps_1 = []
 70 |         disps_2 = []
 71 | 
 72 |         for l, (x1, x2) in enumerate(zip(x1_pyramid, x2_pyramid)):
 73 | 
 74 |             # warping
 75 |             if l == 0:
 76 |                 x2_warp = x2
 77 |                 x1_warp = x1
 78 |             else:
 79 |                 flow_f = interpolate2d_as(flow_f, x1, mode="bilinear")
 80 |                 flow_b = interpolate2d_as(flow_b, x1, mode="bilinear")
 81 |                 disp_l1 = interpolate2d_as(disp_l1, x1, mode="bilinear")
 82 |                 disp_l2 = interpolate2d_as(disp_l2, x1, mode="bilinear")
 83 |                 x1_out = self.upconv_layers[l-1](x1_out)
 84 |                 x2_out = self.upconv_layers[l-1](x2_out)
 85 |                 x2_warp = self.warping_layer_sf(x2, flow_f, disp_l1, k1, input_dict['aug_size'])  # becuase K can be changing when doing augmentation
 86 |                 x1_warp = self.warping_layer_sf(x1, flow_b, disp_l2, k2, input_dict['aug_size'])
 87 | 
 88 |             # correlation
 89 |             out_corr_f = Correlation.apply(x1, x2_warp, self.corr_params)
 90 |             out_corr_b = Correlation.apply(x2, x1_warp, self.corr_params)
 91 |             out_corr_relu_f = self.leakyRELU(out_corr_f)
 92 |             out_corr_relu_b = self.leakyRELU(out_corr_b)
 93 | 
 94 |             # monosf estimator
 95 |             if l == 0:
 96 |                 x1_out, flow_f, disp_l1 = self.flow_estimators[l](torch.cat([out_corr_relu_f, x1], dim=1))
 97 |                 x2_out, flow_b, disp_l2 = self.flow_estimators[l](torch.cat([out_corr_relu_b, x2], dim=1))
 98 |             else:
 99 |                 x1_out, flow_f_res, disp_l1 = self.flow_estimators[l](torch.cat([out_corr_relu_f, x1, x1_out, flow_f, disp_l1], dim=1))
100 |                 x2_out, flow_b_res, disp_l2 = self.flow_estimators[l](torch.cat([out_corr_relu_b, x2, x2_out, flow_b, disp_l2], dim=1))
101 |                 flow_f = flow_f + flow_f_res
102 |                 flow_b = flow_b + flow_b_res
103 | 
104 |             # upsampling or post-processing
105 |             if l != self.output_level:
106 |                 disp_l1 = self.sigmoid(disp_l1) * 0.3
107 |                 disp_l2 = self.sigmoid(disp_l2) * 0.3
108 |                 sceneflows_f.append(flow_f)
109 |                 sceneflows_b.append(flow_b)                
110 |                 disps_1.append(disp_l1)
111 |                 disps_2.append(disp_l2)
112 |             else:
113 |                 flow_res_f, disp_l1 = self.context_networks(torch.cat([x1_out, flow_f, disp_l1], dim=1))
114 |                 flow_res_b, disp_l2 = self.context_networks(torch.cat([x2_out, flow_b, disp_l2], dim=1))
115 |                 flow_f = flow_f + flow_res_f
116 |                 flow_b = flow_b + flow_res_b
117 |                 sceneflows_f.append(flow_f)
118 |                 sceneflows_b.append(flow_b)
119 |                 disps_1.append(disp_l1)
120 |                 disps_2.append(disp_l2)                
121 |                 break
122 | 
123 |         x1_rev = x1_pyramid[::-1]
124 | 
125 |         output_dict['flow_f'] = upsample_outputs_as(sceneflows_f[::-1], x1_rev)
126 |         output_dict['flow_b'] = upsample_outputs_as(sceneflows_b[::-1], x1_rev)
127 |         output_dict['disp_l1'] = upsample_outputs_as(disps_1[::-1], x1_rev)
128 |         output_dict['disp_l2'] = upsample_outputs_as(disps_2[::-1], x1_rev)
129 |         
130 |         return output_dict
131 | 
132 | 
133 |     def forward(self, input_dict):
134 | 
135 |         output_dict = {}
136 | 
137 |         ## Left
138 |         output_dict = self.run_pwc(input_dict, input_dict['input_l1_aug'], input_dict['input_l2_aug'], input_dict['input_k_l1_aug'], input_dict['input_k_l2_aug'])
139 |         
140 |         ## Right
141 |         ## ss: train val 
142 |         ## ft: train 
143 |         if self.training or (not self._args.finetuning and not self._args.evaluation):
144 |             input_r1_flip = torch.flip(input_dict['input_r1_aug'], [3])
145 |             input_r2_flip = torch.flip(input_dict['input_r2_aug'], [3])
146 |             k_r1_flip = input_dict["input_k_r1_flip_aug"]
147 |             k_r2_flip = input_dict["input_k_r2_flip_aug"]
148 | 
149 |             output_dict_r = self.run_pwc(input_dict, input_r1_flip, input_r2_flip, k_r1_flip, k_r2_flip)
150 | 
151 |             for ii in range(0, len(output_dict_r['flow_f'])):
152 |                 output_dict_r['flow_f'][ii] = flow_horizontal_flip(output_dict_r['flow_f'][ii])
153 |                 output_dict_r['flow_b'][ii] = flow_horizontal_flip(output_dict_r['flow_b'][ii])
154 |                 output_dict_r['disp_l1'][ii] = torch.flip(output_dict_r['disp_l1'][ii], [3])
155 |                 output_dict_r['disp_l2'][ii] = torch.flip(output_dict_r['disp_l2'][ii], [3])
156 | 
157 |             output_dict['output_dict_r'] = output_dict_r
158 | 
159 |         ## Post Processing 
160 |         ## ss:           eval
161 |         ## ft: train val eval
162 |         if self._args.evaluation or self._args.finetuning:
163 | 
164 |             input_l1_flip = torch.flip(input_dict['input_l1_aug'], [3])
165 |             input_l2_flip = torch.flip(input_dict['input_l2_aug'], [3])
166 |             k_l1_flip = input_dict["input_k_l1_flip_aug"]
167 |             k_l2_flip = input_dict["input_k_l2_flip_aug"]
168 | 
169 |             output_dict_flip = self.run_pwc(input_dict, input_l1_flip, input_l2_flip, k_l1_flip, k_l2_flip)
170 | 
171 |             flow_f_pp = []
172 |             flow_b_pp = []
173 |             disp_l1_pp = []
174 |             disp_l2_pp = []
175 | 
176 |             for ii in range(0, len(output_dict_flip['flow_f'])):
177 | 
178 |                 flow_f_pp.append(post_processing(output_dict['flow_f'][ii], flow_horizontal_flip(output_dict_flip['flow_f'][ii])))
179 |                 flow_b_pp.append(post_processing(output_dict['flow_b'][ii], flow_horizontal_flip(output_dict_flip['flow_b'][ii])))
180 |                 disp_l1_pp.append(post_processing(output_dict['disp_l1'][ii], torch.flip(output_dict_flip['disp_l1'][ii], [3])))
181 |                 disp_l2_pp.append(post_processing(output_dict['disp_l2'][ii], torch.flip(output_dict_flip['disp_l2'][ii], [3])))
182 | 
183 |             output_dict['flow_f_pp'] = flow_f_pp
184 |             output_dict['flow_b_pp'] = flow_b_pp
185 |             output_dict['disp_l1_pp'] = disp_l1_pp
186 |             output_dict['disp_l2_pp'] = disp_l2_pp
187 | 
188 |         return output_dict
189 | 


--------------------------------------------------------------------------------
/models/modules_camconv.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as tf
 6 | 
 7 | 
 8 | class CamConvModule(nn.Module):
 9 |     def __init__(self, norm_const=256.0):
10 |         super(CamConvModule, self).__init__()
11 | 
12 |         self._inputimg_size = None
13 |         self._norm_const = norm_const
14 | 
15 |         self._fx = None
16 |         self._fy = None
17 |         self._cx = None
18 |         self._cy = None
19 | 
20 |         self._grid_w = None
21 |         self._grid_h = None
22 |         self._norm_coord = None
23 |         self._centered_coord = None
24 |         self._fov_maps = None
25 | 
26 |     # Unsqueeze and Expand as
27 |     def ue_as(self, input_tensor, target_as):
28 |         return input_tensor.unsqueeze(1).unsqueeze(1).unsqueeze(1).expand(target_as.size()).clone()
29 | 
30 |     def interpolate2d(self, inputs, h, w, mode="bilinear"):
31 |         return tf.interpolate(inputs, [h, w], mode=mode, align_corners=True)
32 | 
33 |     def calculate_CoordConv(self, x):
34 | 
35 |         grid_w = torch.linspace(0, x.size(3) - 1, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3))
36 |         grid_h = torch.linspace(0, x.size(2) - 1, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3))        
37 |         self._grid_w = grid_w.float().requires_grad_(False).cuda()
38 |         self._grid_h = grid_h.float().requires_grad_(False).cuda()
39 |         norm_grid_w = self._grid_w / (x.size(3) - 1) * 2 - 1
40 |         norm_grid_h = self._grid_h / (x.size(2) - 1) * 2 - 1
41 |         self._norm_coord = torch.cat((norm_grid_w, norm_grid_h), dim=1)
42 | 
43 |         return None
44 | 
45 |     def calculate_CamConv(self):
46 | 
47 |         ## Centered coordinates    
48 |         centered_coord_w = self._grid_w - self.ue_as(self._cx, self._grid_w) + 0.5
49 |         centered_coord_h = self._grid_h - self.ue_as(self._cy, self._grid_h) + 0.5
50 |         self._centered_coord = torch.cat((centered_coord_w / self._norm_const, centered_coord_h / self._norm_const), dim=1)
51 | 
52 |         ## 3) FOV maps
53 |         fov_xx_channel = torch.atan(centered_coord_w / self.ue_as(self._fx, self._grid_w))
54 |         fov_yy_channel = torch.atan(centered_coord_h / self.ue_as(self._fy, self._grid_h))
55 |         self._fov_maps = torch.cat((fov_xx_channel, fov_yy_channel), dim=1)
56 | 
57 |         return None
58 | 
59 |     def initialize(self, intrinsic, input_img):
60 | 
61 |         self._fx = intrinsic[:, 0, 0]
62 |         self._fy = intrinsic[:, 1, 1]
63 |         self._cx = intrinsic[:, 0, 2]
64 |         self._cy = intrinsic[:, 1, 2]
65 |         self.calculate_CoordConv(input_img)
66 |         self.calculate_CamConv()
67 | 
68 |         return None
69 | 
70 |     def forward(self, input_tensor, input_img=None, intrinsic=None):
71 | 
72 |         if input_img is not None:
73 |             self.initialize(intrinsic, input_img)
74 | 
75 |         _, _, hh_t, ww_t = input_tensor.size()
76 |         cam_conv_tensor = torch.cat((self._norm_coord, self._centered_coord, self._fov_maps), dim=1)
77 |         cam_conv_tensor = self.interpolate2d(cam_conv_tensor, hh_t, ww_t, mode="bilinear")
78 | 
79 | 
80 |         return torch.cat((cam_conv_tensor.detach_(), input_tensor), dim=1)
81 | 


--------------------------------------------------------------------------------
/models/modules_sceneflow.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as tf
  6 | import logging 
  7 | 
  8 | from utils.interpolation import interpolate2d_as
  9 | from utils.sceneflow_util import pixel2pts_ms, pts2pixel_ms
 10 | 
 11 | def get_grid(x):
 12 |     grid_H = torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3))
 13 |     grid_V = torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3))
 14 |     grid = torch.cat([grid_H, grid_V], 1)
 15 |     grids_cuda = grid.float().requires_grad_(False).cuda()
 16 |     return grids_cuda
 17 | 
 18 | 
 19 | class WarpingLayer_Flow(nn.Module):
 20 |     def __init__(self):
 21 |         super(WarpingLayer_Flow, self).__init__()
 22 | 
 23 |     def forward(self, x, flow):
 24 |         flo_list = []
 25 |         flo_w = flow[:, 0] * 2 / max(x.size(3) - 1, 1)
 26 |         flo_h = flow[:, 1] * 2 / max(x.size(2) - 1, 1)
 27 |         flo_list.append(flo_w)
 28 |         flo_list.append(flo_h)
 29 |         flow_for_grid = torch.stack(flo_list).transpose(0, 1)
 30 |         grid = torch.add(get_grid(x), flow_for_grid).transpose(1, 2).transpose(2, 3)        
 31 |         x_warp = tf.grid_sample(x, grid)
 32 | 
 33 |         mask = torch.ones(x.size(), requires_grad=False).cuda()
 34 |         mask = tf.grid_sample(mask, grid)
 35 |         mask = (mask >= 1.0).float()
 36 | 
 37 |         return x_warp * mask
 38 | 
 39 | 
 40 | class WarpingLayer_SF(nn.Module):
 41 |     def __init__(self):
 42 |         super(WarpingLayer_SF, self).__init__()
 43 |  
 44 |     def forward(self, x, sceneflow, disp, k1, input_size):
 45 | 
 46 |         _, _, h_x, w_x = x.size()
 47 |         disp = interpolate2d_as(disp, x) * w_x
 48 | 
 49 |         local_scale = torch.zeros_like(input_size)
 50 |         local_scale[:, 0] = h_x
 51 |         local_scale[:, 1] = w_x
 52 | 
 53 |         pts1, k1_scale = pixel2pts_ms(k1, disp, local_scale / input_size)
 54 |         _, _, coord1 = pts2pixel_ms(k1_scale, pts1, sceneflow, [h_x, w_x])
 55 | 
 56 |         grid = coord1.transpose(1, 2).transpose(2, 3)
 57 |         x_warp = tf.grid_sample(x, grid)
 58 | 
 59 |         mask = torch.ones_like(x, requires_grad=False)
 60 |         mask = tf.grid_sample(mask, grid)
 61 |         mask = (mask >= 1.0).float()
 62 | 
 63 |         return x_warp * mask
 64 | 
 65 | 
 66 | def initialize_msra(modules):
 67 |     logging.info("Initializing MSRA")
 68 |     for layer in modules:
 69 |         if isinstance(layer, nn.Conv2d):
 70 |             nn.init.kaiming_normal_(layer.weight)
 71 |             if layer.bias is not None:
 72 |                 nn.init.constant_(layer.bias, 0)
 73 | 
 74 |         elif isinstance(layer, nn.ConvTranspose2d):
 75 |             nn.init.kaiming_normal_(layer.weight)
 76 |             if layer.bias is not None:
 77 |                 nn.init.constant_(layer.bias, 0)
 78 | 
 79 |         elif isinstance(layer, nn.LeakyReLU):
 80 |             pass
 81 | 
 82 |         elif isinstance(layer, nn.Sequential):
 83 |             pass
 84 | 
 85 | 
 86 | def upsample_outputs_as(input_list, ref_list):
 87 |     output_list = []
 88 |     for ii in range(0, len(input_list)):
 89 |         output_list.append(interpolate2d_as(input_list[ii], ref_list[ii]))
 90 | 
 91 |     return output_list
 92 | 
 93 | 
 94 | def conv(in_planes, out_planes, kernel_size=3, stride=1, dilation=1, isReLU=True):
 95 |     if isReLU:
 96 |         return nn.Sequential(
 97 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation,
 98 |                       padding=((kernel_size - 1) * dilation) // 2, bias=True),
 99 |             nn.LeakyReLU(0.1, inplace=True)
100 |         )
101 |     else:
102 |         return nn.Sequential(
103 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation,
104 |                       padding=((kernel_size - 1) * dilation) // 2, bias=True)
105 |         )
106 | 
107 | 
108 | class upconv(nn.Module):
109 |     def __init__(self, num_in_layers, num_out_layers, kernel_size, scale):
110 |         super(upconv, self).__init__()
111 |         self.scale = scale
112 |         self.conv1 = conv(num_in_layers, num_out_layers, kernel_size, 1)
113 | 
114 |     def forward(self, x):
115 |         x = nn.functional.interpolate(x, scale_factor=self.scale, mode='nearest')
116 |         return self.conv1(x)
117 | 
118 | 
119 | class FeatureExtractor(nn.Module):
120 |     def __init__(self, num_chs):
121 |         super(FeatureExtractor, self).__init__()
122 |         self.num_chs = num_chs
123 |         self.convs = nn.ModuleList()
124 | 
125 |         for l, (ch_in, ch_out) in enumerate(zip(num_chs[:-1], num_chs[1:])):
126 |             layer = nn.Sequential(
127 |                 conv(ch_in, ch_out, stride=2),
128 |                 conv(ch_out, ch_out)
129 |             )
130 |             self.convs.append(layer)
131 | 
132 |     def forward(self, x):
133 |         feature_pyramid = []
134 |         for conv in self.convs:
135 |             x = conv(x)
136 |             feature_pyramid.append(x)
137 | 
138 |         return feature_pyramid[::-1]
139 | 
140 | 
141 | class MonoSceneFlowDecoder(nn.Module):
142 |     def __init__(self, ch_in):
143 |         super(MonoSceneFlowDecoder, self).__init__()
144 | 
145 |         self.convs = nn.Sequential(
146 |             conv(ch_in, 128),
147 |             conv(128, 128),
148 |             conv(128, 96),
149 |             conv(96, 64),
150 |             conv(64, 32)
151 |         )
152 |         self.conv_sf = conv(32, 3, isReLU=False)
153 |         self.conv_d1 = conv(32, 1, isReLU=False)
154 | 
155 |     def forward(self, x):
156 |         x_out = self.convs(x)
157 |         sf = self.conv_sf(x_out)
158 |         disp1 = self.conv_d1(x_out)
159 | 
160 |         return x_out, sf, disp1
161 | 
162 | 
163 | class ContextNetwork(nn.Module):
164 |     def __init__(self, ch_in):
165 |         super(ContextNetwork, self).__init__()
166 | 
167 |         self.convs = nn.Sequential(
168 |             conv(ch_in, 128, 3, 1, 1),
169 |             conv(128, 128, 3, 1, 2),
170 |             conv(128, 128, 3, 1, 4),
171 |             conv(128, 96, 3, 1, 8),
172 |             conv(96, 64, 3, 1, 16),
173 |             conv(64, 32, 3, 1, 1)
174 |         )
175 |         self.conv_sf = conv(32, 3, isReLU=False)
176 |         self.conv_d1 = nn.Sequential(
177 |             conv(32, 1, isReLU=False), 
178 |             torch.nn.Sigmoid()
179 |         )
180 | 
181 |     def forward(self, x):
182 | 
183 |         x_out = self.convs(x)
184 |         sf = self.conv_sf(x_out)
185 |         disp1 = self.conv_d1(x_out) * 0.3
186 | 
187 |         return sf, disp1
188 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Self-Supervised Monocular Scene Flow Estimation
  2 | 
  3 | <img src=demo/demo.gif> 
  4 | 
  5 | > 3D visualization of estimated depth and scene flow from two temporally consecutive images.  
  6 | > Intermediate frames are interpolated using the estimated scene flow. (fine-tuned model, tested on KITTI Benchmark)
  7 | 
  8 | This repository is the official PyTorch implementation of the paper:  
  9 | 
 10 | &nbsp;&nbsp;&nbsp;[**Self-Supervised Monocular Scene Flow Estimation**](http://openaccess.thecvf.com/content_CVPR_2020/papers/Hur_Self-Supervised_Monocular_Scene_Flow_Estimation_CVPR_2020_paper.pdf)  
 11 | &nbsp;&nbsp;&nbsp;[Junhwa Hur](https://hurjunhwa.github.io) and [Stefan Roth](https://www.visinf.tu-darmstadt.de/visinf/team_members/sroth/sroth.en.jsp)  
 12 | &nbsp;&nbsp;&nbsp;*CVPR*, 2020 (**Oral Presentation**)  
 13 | &nbsp;&nbsp;&nbsp;[Paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Hur_Self-Supervised_Monocular_Scene_Flow_Estimation_CVPR_2020_paper.pdf) / [Supplemental](http://openaccess.thecvf.com/content_CVPR_2020/supplemental/Hur_Self-Supervised_Monocular_Scene_CVPR_2020_supplemental.pdf) / [Arxiv](https://arxiv.org/abs/2004.04143)
 14 | 
 15 | - Contact: junhwa.hur[at]gmail.com  
 16 | 
 17 | ## Getting started
 18 | This code has been developed with Anaconda (Python 3.7), **PyTorch 1.2.0** and CUDA 10.0 on Ubuntu 16.04.  
 19 | Based on a fresh [Anaconda](https://www.anaconda.com/download/) distribution and [PyTorch](https://pytorch.org/) installation, following packages need to be installed:  
 20 | 
 21 |   ```Shell
 22 |   conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0 -c pytorch
 23 |   pip install tensorboard
 24 |   pip install pypng==0.0.18
 25 |   pip install colorama
 26 |   pip install scikit-image
 27 |   pip install pytz
 28 |   pip install tqdm==4.30.0
 29 |   pip install future
 30 |   ```
 31 | 
 32 | Then, please excute the following to install the Correlation and Forward Warping layer:
 33 |   ```Shell
 34 |   ./install_modules.sh
 35 |   ```
 36 | 
 37 | **For PyTorch version > 1.3**  
 38 | Please put the **`align_corners=True`** flag in the `grid_sample` function in the following files:
 39 |   ```
 40 |   augmentations.py
 41 |   losses.py
 42 |   models/modules_sceneflow.py
 43 |   utils/sceneflow_util.py
 44 |   ```
 45 | 
 46 | 
 47 | ## Dataset
 48 | 
 49 | Please download the following to datasets for the experiment:
 50 |   - [KITTI Raw Data](http://www.cvlibs.net/datasets/kitti/raw_data.php) (synced+rectified data, please refer [MonoDepth2](https://github.com/nianticlabs/monodepth2#-kitti-training-data) for downloading all data more easily)
 51 |   - [KITTI Scene Flow 2015](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow)
 52 | 
 53 | To save space, we also convert the *KITTI Raw* **png** images to **jpeg**, following the convention from [MonoDepth](https://github.com/mrharicot/monodepth):
 54 |   ```
 55 |   find (data_folder)/ -name '*.png' | parallel 'convert {.}.png {.}.jpg && rm {}'
 56 |   ```   
 57 | We also converted images in *KITTI Scene Flow 2015* as well. Please convert the png images in `image_2` and `image_3` into jpg and save them into the seperate folder **`image_2_jpg`** and **`image_3_jpg`**.  
 58 | 
 59 | To save space further, you can delete the velodyne point data in KITTI raw data and optionally download the [*Eigen Split Projected Depth*](https://drive.google.com/file/d/1a97lgOgrChkLxi_nvRpmbsKspveQ6EyD/view?usp=sharing) for the monocular depth evaluation on the Eigen Split. We converted the velodyne point data of the Eigen Test images in the numpy array format using code from [MonoDepth](https://github.com/mrharicot/monodepth). After downloading and unzipping it, you can merge with the KITTI raw data folder.  
 60 |   - [Eigen Split Projected Depth](https://drive.google.com/file/d/1a97lgOgrChkLxi_nvRpmbsKspveQ6EyD/view?usp=sharing)
 61 | 
 62 | ## Training and Inference
 63 | The **[scripts](scripts/)** folder contains training\/inference scripts of all experiments demonstrated in the paper (including ablation study).
 64 | 
 65 | **For training**, you can simply run the following script files:
 66 | 
 67 | | Script                                       | Training                   | Dataset                |
 68 | |----------------------------------------------|----------------------------|------------------------|
 69 | | `./train_monosf_selfsup_kitti_raw.sh`        | Self-supervised            | KITTI Split            |
 70 | | `./train_monosf_selfsup_eigen_train.sh`      | Self-supervised            | Eigen Split            |
 71 | 
 72 | 
 73 | **Fine-tuning** is done in two stages: *(i)* first finding the stopping point using train\/valid split, and then *(ii)* fune-tuning using all data with the found iteration steps.  
 74 | | Script                                       | Training                   | Dataset                |
 75 | |----------------------------------------------|----------------------------|------------------------|
 76 | | `./train_monosf_kitti_finetune_1st_stage.sh` | Semi-supervised finetuning | KITTI raw + KITTI 2015 |
 77 | | `./train_monosf_kitti_finetune_2st_stage.sh` | Semi-supervised finetuning | KITTI raw + KITTI 2015 |
 78 | 
 79 | In the script files, please configure these following PATHs for experiments:
 80 |   - `EXPERIMENTS_HOME` : your own experiment directory where checkpoints and log files will be saved.
 81 |   - `KITTI_RAW_HOME` : the directory where *KITTI raw data* is located in your local system.
 82 |   - `KITTI_HOME` : the directory where *KITTI Scene Flow 2015* is located in your local system. 
 83 |   - `KITTI_COMB_HOME` : the directory where both *KITTI Scene Flow 2015* and *KITTI raw data* are located.  
 84 |    
 85 |   
 86 | **For testing the pretrained models**, you can simply run the following script files:
 87 | 
 88 | | Script                                    | Task          | Training        | Dataset          | 
 89 | |-------------------------------------------|---------------|-----------------|------------------|
 90 | | `./eval_monosf_selfsup_kitti_train.sh`    | MonoSceneFlow | Self-supervised | KITTI 2015 Train |
 91 | | `./eval_monosf_selfsup_kitti_test.sh`     | MonoSceneFlow | Self-supervised | KITTI 2015 Test  |
 92 | | `./eval_monosf_finetune_kitti_test.sh`    | MonoSceneFlow | fine-tuned      | KITTI 2015 Test  |
 93 | | `./eval_monodepth_selfsup_kitti_train.sh` | MonoDepth     | Self-supervised | KITTI test split |
 94 | | `./eval_monodepth_selfsup_eigen_test.sh`  | MonoDepth     | Self-supervised | Eigen test split |
 95 | 
 96 |   - Testing on *KITTI 2015 Test* gives output images for uploading on the [KITTI Scene Flow 2015 Benchmark](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php).  
 97 |   - To save output image, please turn on `--save_disp=True`, `--save_disp2=True`, and `--save_flow=True` in the script.  
 98 | 
 99 | ## Pretrained Models 
100 | 
101 | The **[checkpoints](checkpoints/)** folder contains the checkpoints of the pretrained models.  
102 | Pretrained models from the ablation study can be downloaded here: [download link](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/models/checkpoints_ablation_study.zip)
103 | 
104 | 
105 | ## Outputs and Visualization
106 | 
107 | Ouput images and visualization of the main experiments can be downloaded here:
108 |   - [Self-supervised, tested on KITTI 2015 Train](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/results/self_supervised_KITTI_train.zip)
109 |   - [Self-supervised, tested on Eigen Test](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/results/self_supervised_Eigen_test.zip)
110 |   - [Fined-tuned, tested on KITTI 2015 Train](https://drive.google.com/file/d/1JLCWT5-Ase8VkOkA9PWpkee7K0qpgm64/view?usp=sharing)
111 | 
112 | 
113 | ## Acknowledgement
114 | 
115 | Please cite our paper if you use our source code.  
116 | 
117 | ```bibtex
118 | @inproceedings{Hur:2020:SSM,  
119 |   Author = {Junhwa Hur and Stefan Roth},  
120 |   Booktitle = {CVPR},  
121 |   Title = {Self-Supervised Monocular Scene Flow Estimation},  
122 |   Year = {2020}  
123 | }
124 | ```
125 | 
126 | - Portions of the source code (e.g., training pipeline, runtime, argument parser, and logger) are from [Jochen Gast](https://scholar.google.com/citations?user=tmRcFacAAAAJ&hl=en)  
127 | - MonoDepth evaluation utils from [MonoDepth](https://github.com/mrharicot/monodepth)
128 | - MonoDepth PyTorch Implementation from [OniroAI / MonoDepth-PyTorch](https://github.com/OniroAI/MonoDepth-PyTorch)
129 | 
130 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | _libgcc_mutex=0.1=main
 5 | absl-py=0.9.0=pypi_0
 6 | blas=1.0=mkl
 7 | ca-certificates=2020.6.20=hecda079_0
 8 | cachetools=4.1.1=pypi_0
 9 | certifi=2020.6.20=py37hc8dfbb8_0
10 | cffi=1.14.1=py37he30daa8_0
11 | chardet=3.0.4=pypi_0
12 | cloudpickle=1.5.0=py_0
13 | colorama=0.4.3=py_0
14 | correlation-cuda=0.0.0=pypi_0
15 | cudatoolkit=10.0.130=0
16 | cycler=0.10.0=py_2
17 | cytoolz=0.10.1=py37h516909a_0
18 | dask-core=2.22.0=py_0
19 | decorator=4.4.2=py_0
20 | forward-warp-cuda=0.0.0=pypi_0
21 | freetype=2.10.2=h5ab3b9f_0
22 | future=0.18.2=py37hc8dfbb8_1
23 | google-auth=1.20.1=pypi_0
24 | google-auth-oauthlib=0.4.1=pypi_0
25 | grpcio=1.31.0=pypi_0
26 | idna=2.10=pypi_0
27 | imagecodecs-lite=2019.12.3=py37h03ebfcd_1
28 | imageio=2.9.0=py_0
29 | importlib-metadata=1.7.0=pypi_0
30 | intel-openmp=2020.1=217
31 | jpeg=9b=h024ee3a_2
32 | kiwisolver=1.2.0=py37h99015e2_0
33 | lcms2=2.11=h396b838_0
34 | ld_impl_linux-64=2.33.1=h53a641e_7
35 | libedit=3.1.20191231=h14c3975_1
36 | libffi=3.3=he6710b0_2
37 | libgcc-ng=9.1.0=hdf63c60_0
38 | libgfortran-ng=7.5.0=hdf63c60_14
39 | libpng=1.6.37=hbc83047_0
40 | libstdcxx-ng=9.1.0=hdf63c60_0
41 | libtiff=4.1.0=h2733197_1
42 | lz4-c=1.9.2=he6710b0_1
43 | markdown=3.2.2=pypi_0
44 | matplotlib-base=3.3.0=py37hd478181_1
45 | mkl=2020.1=217
46 | mkl-service=2.3.0=py37he904b0f_0
47 | mkl_fft=1.1.0=py37h23d657b_0
48 | mkl_random=1.1.1=py37h0573a6f_0
49 | ncurses=6.2=he6710b0_1
50 | networkx=2.4=py_1
51 | ninja=1.10.0=py37hfd86e86_0
52 | numpy=1.19.1=py37hbc911f0_0
53 | numpy-base=1.19.1=py37hfa32c7d_0
54 | oauthlib=3.1.0=pypi_0
55 | olefile=0.46=py37_0
56 | openssl=1.1.1g=h516909a_1
57 | pillow=7.2.0=py37hb39fc2d_0
58 | pip=20.2.2=py37_0
59 | protobuf=3.12.4=pypi_0
60 | pyasn1=0.4.8=pypi_0
61 | pyasn1-modules=0.2.8=pypi_0
62 | pycparser=2.20=py_2
63 | pyparsing=2.4.7=pyh9f0ad1d_0
64 | pypng=0.0.18=pypi_0
65 | python=3.7.7=hcff3b4d_5
66 | python-dateutil=2.8.1=py_0
67 | python_abi=3.7=1_cp37m
68 | pytorch=1.2.0=py3.7_cuda10.0.130_cudnn7.6.2_0
69 | pytz=2020.1=pyh9f0ad1d_0
70 | pywavelets=1.1.1=py37h03ebfcd_1
71 | pyyaml=5.3.1=py37h8f50634_0
72 | readline=8.0=h7b6447c_0
73 | requests=2.24.0=pypi_0
74 | requests-oauthlib=1.3.0=pypi_0
75 | rsa=4.6=pypi_0
76 | scikit-image=0.17.2=py37h0da4684_1
77 | scipy=1.5.0=py37h0b6359f_0
78 | setuptools=49.4.0=py37_0
79 | six=1.15.0=py_0
80 | sqlite=3.32.3=h62c20be_0
81 | tensorboard=2.3.0=pypi_0
82 | tensorboard-plugin-wit=1.7.0=pypi_0
83 | tifffile=2020.6.3=py_0
84 | tk=8.6.10=hbc83047_0
85 | toolz=0.10.0=py_0
86 | torchvision=0.4.0=py37_cu100
87 | tornado=6.0.4=py37h8f50634_1
88 | tqdm=4.40.0=py_0
89 | urllib3=1.25.10=pypi_0
90 | werkzeug=1.0.1=pypi_0
91 | wheel=0.34.2=py37_0
92 | xz=5.2.5=h7b6447c_0
93 | yaml=0.2.5=h516909a_0
94 | zipp=3.1.0=pypi_0
95 | zlib=1.2.11=h7b6447c_3
96 | zstd=1.4.5=h9ceee32_0
97 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monodepth_aug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_aug.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_Baseline
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth
11 | Valid_Loss_Function=Eval_MonoDepth
12 | 
13 | # training configuration
14 | SAVE_PATH="eval/abl1_depth/aug"
15 | python ../../main.py \
16 | --batch_size=1 \
17 | --batch_size_val=1 \
18 | --checkpoint=$CHECKPOINT \
19 | --model=$MODEL \
20 | --evaluation=True \
21 | --num_workers=4 \
22 | --save=$SAVE_PATH \
23 | --start_epoch=1 \
24 | --validation_dataset=$Valid_Dataset \
25 | --validation_dataset_root=$KITTI_HOME \
26 | --validation_loss=$Valid_Loss_Function \
27 | --validation_key=ab_r \
28 | # --save_disp=True
29 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monodepth_basic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_basic.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_Baseline
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth
11 | Valid_Loss_Function=Eval_MonoDepth
12 | 
13 | # training configuration
14 | SAVE_PATH="eval/abl1_depth/basic"
15 | python ../../main.py \
16 | --batch_size=1 \
17 | --batch_size_val=1 \
18 | --checkpoint=$CHECKPOINT \
19 | --model=$MODEL \
20 | --evaluation=True \
21 | --num_workers=4 \
22 | --save=$SAVE_PATH \
23 | --start_epoch=1 \
24 | --validation_dataset=$Valid_Dataset \
25 | --validation_dataset_root=$KITTI_HOME \
26 | --validation_loss=$Valid_Loss_Function \
27 | --validation_key=ab_r \
28 | # --save_disp=True
29 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monodepth_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_cc.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_CamConv
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth
11 | Valid_Loss_Function=Eval_MonoDepth
12 | 
13 | # training configuration
14 | SAVE_PATH="eval/abl1_depth/cc"
15 | python ../../main.py \
16 | --batch_size=1 \
17 | --batch_size_val=1 \
18 | --checkpoint=$CHECKPOINT \
19 | --model=$MODEL \
20 | --evaluation=True \
21 | --num_workers=4 \
22 | --save=$SAVE_PATH \
23 | --start_epoch=1 \
24 | --validation_dataset=$Valid_Dataset \
25 | --validation_dataset_root=$KITTI_HOME \
26 | --validation_loss=$Valid_Loss_Function \
27 | --validation_key=ab_r \
28 | # --save_disp=True
29 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monodepth_cc_aug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_cc_aug.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_CamConv
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth
11 | Valid_Loss_Function=Eval_MonoDepth
12 | 
13 | # training configuration
14 | SAVE_PATH="eval/abl1_depth/cc_aug"
15 | python ../../main.py \
16 | --batch_size=1 \
17 | --batch_size_val=1 \
18 | --checkpoint=$CHECKPOINT \
19 | --model=$MODEL \
20 | --evaluation=True \
21 | --num_workers=4 \
22 | --save=$SAVE_PATH \
23 | --start_epoch=1 \
24 | --validation_dataset=$Valid_Dataset \
25 | --validation_dataset_root=$KITTI_HOME \
26 | --validation_loss=$Valid_Loss_Function \
27 | --validation_key=ab_r \
28 | # --save_disp=True
29 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monosf_base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_base.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl1_sf/base"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monosf_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_cc.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_CamConv
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl1_sf/cc"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_eval_monosf_cc_aug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_cc_aug.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_CamConv
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl1_sf/cc_aug"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monodepth_kitti.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_Baseline
 9 | 
10 | # save path
11 | ALIAS="-noAug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth
18 | Train_Augmentation=Augmentation_MonoDepthBaseline
19 | Train_Loss_Function=Loss_MonoDepth
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth
22 | Valid_Loss_Function=Loss_MonoDepth
23 | 
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[30, 40]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=1e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=50 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_preprocessing_crop=False \
44 | --training_dataset_num_examples=-1 \
45 | --training_key=total_loss \
46 | --training_loss=$Train_Loss_Function \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_RAW_HOME \
49 | --validation_dataset_preprocessing_crop=False \
50 | --validation_key=total_loss \
51 | --validation_loss=$Valid_Loss_Function
52 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_aug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_Baseline
 9 | 
10 | # save path
11 | ALIAS="-Aug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth
18 | Train_Augmentation=Augmentation_MonoDepth
19 | Train_Loss_Function=Loss_MonoDepth
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth
22 | Valid_Loss_Function=Loss_MonoDepth
23 | 
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[30, 40]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=1e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=50 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_preprocessing_crop=True \
44 | --training_dataset_num_examples=-1 \
45 | --training_key=total_loss \
46 | --training_loss=$Train_Loss_Function \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_RAW_HOME \
49 | --validation_dataset_preprocessing_crop=False \
50 | --validation_key=total_loss \
51 | --validation_loss=$Valid_Loss_Function
52 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_aug_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_CamConv
 9 | 
10 | # save path
11 | ALIAS="-Aug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth
18 | Train_Augmentation=Augmentation_MonoDepth
19 | Train_Loss_Function=Loss_MonoDepth
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth
22 | Valid_Loss_Function=Loss_MonoDepth
23 | 
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[30, 40]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=1e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=50 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_preprocessing_crop=True \
44 | --training_dataset_num_examples=-1 \
45 | --training_key=total_loss \
46 | --training_loss=$Train_Loss_Function \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_RAW_HOME \
49 | --validation_dataset_preprocessing_crop=False \
50 | --validation_key=total_loss \
51 | --validation_loss=$Valid_Loss_Function
52 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoDepth_CamConv
 9 | 
10 | # save path
11 | ALIAS="-noAug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth
18 | Train_Augmentation=Augmentation_MonoDepthBaseline
19 | Train_Loss_Function=Loss_MonoDepth
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth
22 | Valid_Loss_Function=Loss_MonoDepth
23 | 
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[30, 40]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=1e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=50 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_preprocessing_crop=False \
44 | --training_dataset_num_examples=-1 \
45 | --training_key=total_loss \
46 | --training_loss=$Train_Loss_Function \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_RAW_HOME \
49 | --validation_dataset_preprocessing_crop=False \
50 | --validation_key=total_loss \
51 | --validation_loss=$Valid_Loss_Function
52 | 


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monosf_base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-noAug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_Resize_Only
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monosf_camconv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_CamConv
 9 | 
10 | # save path
11 | ALIAS="-noAug-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_Resize_Only
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation1_augmentation/ablation1_train_monosf_camconv_aug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_CamConv
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_eval_monosf_loss_basic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_basic.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl2/basic"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_eval_monosf_loss_noOcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_wo_occ.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl2/noOcc"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_eval_monosf_loss_noPts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_wo_3dpts.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl2/noPts"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_train_monosf_loss_basic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-loss_ablation_noPts_noOcc-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoPtsNoOcc
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_train_monosf_loss_noOcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-loss_ablation_noOcc-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoOcc
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation2_loss/ablation2_train_monosf_loss_noPts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-loss_ablation_noPts-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoPts
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_disp_only.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_only_disp.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_DispOnly
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_Disp_Only
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/disp_only"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=d1 \
31 | #--save_disp=True \
32 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_flow_only.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_only_flow.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_FlowOnly
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_Flow_Only
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/flow_only"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=f1 \
31 | #--save_flow=True
32 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_cont.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_cont.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Cont
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/cont"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last1.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last1
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/last1"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last2.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last2
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/last2"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last3.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last3
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/last3"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last4.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last4.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last4
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/last4"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last5.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last5.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last5
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/abl3/last5"
16 | python ../../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_disp_only.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_DispOnly
 9 | 
10 | # save path
11 | ALIAS="-disp-only-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_Disp_Only
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_Disp_Only
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_flow_only.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_FlowOnly
 9 | 
10 | # save path
11 | ALIAS="-flow-only-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_Flow_Only
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_Flow_Only
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_cont.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Cont
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last1
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last2
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last3
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last4.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last4
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last5.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_Split_Last5
 9 | 
10 | # save path
11 | ALIAS="-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \
54 | 


--------------------------------------------------------------------------------
/scripts/eval_monodepth_selfsup_eigen_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_RAW_HOME=""
 5 | CHECKPOINT="checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_Eigen_Test
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_MonoDepth_Eigen
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/monod_selfsup_eigen_test"
16 | python ../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_root=$KITTI_RAW_HOME \
28 | --validation_loss=$Valid_Loss_Function \
29 | --validation_key=ab_r \
30 | #--save_disp=True \


--------------------------------------------------------------------------------
/scripts/eval_monodepth_selfsup_kitti_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_Disp_Only
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/monod_selfsup_kitti_train"
16 | python ../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=ab \
31 | #--save_disp=True \


--------------------------------------------------------------------------------
/scripts/eval_monosf_finetune_kitti_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Test
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Test
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/monosf_ft_kitti_test"
16 | python ../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_root=$KITTI_HOME \
28 | --validation_loss=$Valid_Loss_Function \
29 | --validation_key=sf \
30 | # --save_disp=True \
31 | # --save_disp2=True \
32 | # --save_flow=True
33 | 


--------------------------------------------------------------------------------
/scripts/eval_monosf_selfsup_kitti_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Test
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Test
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/monosf_selfsup_kitti_test"
16 | python ../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_root=$KITTI_HOME \
28 | --validation_loss=$Valid_Loss_Function \
29 | --validation_key=sf \
30 | # --save_disp=True \
31 | # --save_disp2=True \
32 | # --save_flow=True
33 | 


--------------------------------------------------------------------------------
/scripts/eval_monosf_selfsup_kitti_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # DATASETS_HOME
 4 | KITTI_HOME=""
 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt"
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf
11 | Valid_Augmentation=Augmentation_Resize_Only
12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
13 | 
14 | # training configuration
15 | SAVE_PATH="eval/monosf_selfsup_kitti_train"
16 | python ../main.py \
17 | --batch_size=1 \
18 | --batch_size_val=1 \
19 | --checkpoint=$CHECKPOINT \
20 | --model=$MODEL \
21 | --evaluation=True \
22 | --num_workers=4 \
23 | --save=$SAVE_PATH \
24 | --start_epoch=1 \
25 | --validation_augmentation=$Valid_Augmentation \
26 | --validation_dataset=$Valid_Dataset \
27 | --validation_dataset_preprocessing_crop=False \
28 | --validation_dataset_root=$KITTI_HOME \
29 | --validation_loss=$Valid_Loss_Function \
30 | --validation_key=sf \
31 | # --save_disp=True \
32 | # --save_disp2=True \
33 | # --save_flow=True
34 | 


--------------------------------------------------------------------------------
/scripts/train_monosf_kitti_finetune_1st_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # datasets
 4 | KITTI_COMB_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-kitti_ft-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_latest.ckpt"
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Comb_Train
18 | Train_Augmentation=Augmentation_SceneFlow_Finetuning
19 | Train_Loss_Function=Loss_SceneFlow_SemiSupFinetune
20 | 
21 | Valid_Dataset=KITTI_Comb_Val
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
24 | 
25 | # training configuration
26 | python ../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --finetuning=True \
30 | --checkpoint=$CHECKPOINT \
31 | --lr_scheduler=MultiStepLR \
32 | --lr_scheduler_gamma=0.5 \
33 | --lr_scheduler_milestones="[125, 187, 250, 281, 312]" \
34 | --model=$MODEL \
35 | --num_workers=16 \
36 | --optimizer=Adam \
37 | --optimizer_lr=4e-5 \
38 | --save=$SAVE_PATH \
39 | --total_epochs=343 \
40 | --training_augmentation=$Train_Augmentation \
41 | --training_augmentation_photometric=True \
42 | --training_dataset=$Train_Dataset \
43 | --training_dataset_root=$KITTI_COMB_HOME \
44 | --training_loss=$Train_Loss_Function \
45 | --training_key=total_loss \
46 | --validation_augmentation=$Valid_Augmentation \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_COMB_HOME \
49 | --validation_key=sf \
50 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/train_monosf_kitti_finetune_2nd_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # datasets
 4 | KITTI_COMB_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-kitti_ft-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_latest.ckpt"
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Comb_Full
18 | Train_Augmentation=Augmentation_SceneFlow_Finetuning
19 | Train_Loss_Function=Loss_SceneFlow_SemiSupFinetune
20 | 
21 | Valid_Dataset=KITTI_Comb_Val
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train
24 | 
25 | # training configuration
26 | python ../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --finetuning=True \
30 | --checkpoint=$CHECKPOINT \
31 | --lr_scheduler=MultiStepLR \
32 | --lr_scheduler_gamma=0.5 \
33 | --lr_scheduler_milestones="[112, 162, 212, 237, 262]" \
34 | --model=$MODEL \
35 | --num_workers=16 \
36 | --optimizer=Adam \
37 | --optimizer_lr=4e-5 \
38 | --save=$SAVE_PATH \
39 | --total_epochs=157 \
40 | --training_augmentation=$Train_Augmentation \
41 | --training_augmentation_photometric=True \
42 | --training_dataset=$Train_Dataset \
43 | --training_dataset_root=$KITTI_COMB_HOME \
44 | --training_loss=$Train_Loss_Function \
45 | --training_key=total_loss \
46 | --validation_augmentation=$Valid_Augmentation \
47 | --validation_dataset=$Valid_Dataset \
48 | --validation_dataset_root=$KITTI_COMB_HOME \
49 | --validation_key=sf \
50 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/train_monosf_selfsup_eigen_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-eigen-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_EigenSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_EigenSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/scripts/train_monosf_selfsup_kitti_raw.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # experiments and datasets meta
 4 | KITTI_RAW_HOME=""
 5 | EXPERIMENTS_HOME=""
 6 | 
 7 | # model
 8 | MODEL=MonoSceneFlow_fullmodel
 9 | 
10 | # save path
11 | ALIAS="-kitti-"
12 | TIME=$(date +"%Y%m%d-%H%M%S")
13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME"
14 | CHECKPOINT=None
15 | 
16 | # Loss and Augmentation
17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf
18 | Train_Augmentation=Augmentation_SceneFlow
19 | Train_Loss_Function=Loss_SceneFlow_SelfSup
20 | 
21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf
22 | Valid_Augmentation=Augmentation_Resize_Only
23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup
24 | 
25 | # training configuration
26 | python ../main.py \
27 | --batch_size=4 \
28 | --batch_size_val=1 \
29 | --checkpoint=$CHECKPOINT \
30 | --lr_scheduler=MultiStepLR \
31 | --lr_scheduler_gamma=0.5 \
32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \
33 | --model=$MODEL \
34 | --num_workers=16 \
35 | --optimizer=Adam \
36 | --optimizer_lr=2e-4 \
37 | --save=$SAVE_PATH \
38 | --total_epochs=62 \
39 | --training_augmentation=$Train_Augmentation \
40 | --training_augmentation_photometric=True \
41 | --training_dataset=$Train_Dataset \
42 | --training_dataset_root=$KITTI_RAW_HOME \
43 | --training_dataset_flip_augmentations=True \
44 | --training_dataset_preprocessing_crop=True \
45 | --training_dataset_num_examples=-1 \
46 | --training_key=total_loss \
47 | --training_loss=$Train_Loss_Function \
48 | --validation_augmentation=$Valid_Augmentation \
49 | --validation_dataset=$Valid_Dataset \
50 | --validation_dataset_root=$KITTI_RAW_HOME \
51 | --validation_dataset_preprocessing_crop=False \
52 | --validation_key=total_loss \
53 | --validation_loss=$Valid_Loss_Function \


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/utils/__init__.py


--------------------------------------------------------------------------------
/utils/flow.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | import png
  5 | import matplotlib.colors as cl
  6 | from skimage import io
  7 | 
  8 | TAG_CHAR = np.array([202021.25], np.float32)
  9 | UNKNOWN_FLOW_THRESH = 1e7
 10 | 
 11 | 
 12 | def write_depth_png(filename, disp_map):
 13 | 
 14 |     io.imsave(filename, (disp_map * 256.0).astype(np.uint16)) 
 15 | 
 16 | 
 17 | def write_flow_png(filename, uv, v=None, mask=None):
 18 | 
 19 |     if v is None:
 20 |         assert (uv.ndim == 3)
 21 |         assert (uv.shape[2] == 2)
 22 |         u = uv[:, :, 0]
 23 |         v = uv[:, :, 1]
 24 |     else:
 25 |         u = uv
 26 | 
 27 |     assert (u.shape == v.shape)
 28 | 
 29 |     height_img, width_img = u.shape
 30 |     if mask is None:
 31 |         valid_mask = np.ones([height_img, width_img])
 32 |     else:
 33 |         valid_mask = mask
 34 | 
 35 |     flow_u = np.clip((u * 64 + 2 ** 15), 0.0, 65535.0).astype(np.uint16)
 36 |     flow_v = np.clip((v * 64 + 2 ** 15), 0.0, 65535.0).astype(np.uint16)
 37 |     
 38 |     output = np.stack((flow_u, flow_v, valid_mask), axis=-1)
 39 | 
 40 |     with open(filename, 'wb') as f:
 41 |         writer = png.Writer(width=width_img, height=height_img, bitdepth=16)
 42 |         writer.write(f, np.reshape(output, (-1, width_img*3)))
 43 | 
 44 | 
 45 | def compute_color(u, v):
 46 |     """
 47 |     compute optical flow color map
 48 |     :param u: optical flow horizontal map
 49 |     :param v: optical flow vertical map
 50 |     :return: optical flow in color code
 51 |     """
 52 |     [h, w] = u.shape
 53 |     img = np.zeros([h, w, 3])
 54 |     nanIdx = np.isnan(u) | np.isnan(v)
 55 |     u[nanIdx] = 0
 56 |     v[nanIdx] = 0
 57 | 
 58 |     colorwheel = make_color_wheel()
 59 |     ncols = np.size(colorwheel, 0)
 60 | 
 61 |     rad = np.sqrt(u ** 2 + v ** 2)
 62 | 
 63 |     a = np.arctan2(-v, -u) / np.pi
 64 | 
 65 |     fk = (a + 1) / 2 * (ncols - 1) + 1
 66 | 
 67 |     k0 = np.floor(fk).astype(int)
 68 | 
 69 |     k1 = k0 + 1
 70 |     k1[k1 == ncols + 1] = 1
 71 |     f = fk - k0
 72 | 
 73 |     for i in range(0, np.size(colorwheel, 1)):
 74 |         tmp = colorwheel[:, i]
 75 |         col0 = tmp[k0 - 1] / 255
 76 |         col1 = tmp[k1 - 1] / 255
 77 |         col = (1 - f) * col0 + f * col1
 78 | 
 79 |         idx = rad <= 1
 80 |         col[idx] = 1 - rad[idx] * (1 - col[idx])
 81 |         notidx = np.logical_not(idx)
 82 | 
 83 |         col[notidx] *= 0.75
 84 |         img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx)))
 85 | 
 86 |     return img
 87 | 
 88 | 
 89 | def make_color_wheel():
 90 |     """
 91 |     Generate color wheel according Middlebury color code
 92 |     :return: Color wheel
 93 |     """
 94 |     RY = 15
 95 |     YG = 6
 96 |     GC = 4
 97 |     CB = 11
 98 |     BM = 13
 99 |     MR = 6
100 | 
101 |     ncols = RY + YG + GC + CB + BM + MR
102 | 
103 |     colorwheel = np.zeros([ncols, 3])
104 | 
105 |     col = 0
106 | 
107 |     # RY
108 |     colorwheel[0:RY, 0] = 255
109 |     colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
110 |     col += RY
111 | 
112 |     # YG
113 |     colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
114 |     colorwheel[col:col + YG, 1] = 255
115 |     col += YG
116 | 
117 |     # GC
118 |     colorwheel[col:col + GC, 1] = 255
119 |     colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
120 |     col += GC
121 | 
122 |     # CB
123 |     colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
124 |     colorwheel[col:col + CB, 2] = 255
125 |     col += CB
126 | 
127 |     # BM
128 |     colorwheel[col:col + BM, 2] = 255
129 |     colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
130 |     col += + BM
131 | 
132 |     # MR
133 |     colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
134 |     colorwheel[col:col + MR, 0] = 255
135 | 
136 |     return colorwheel
137 | 
138 | 
139 | def flow_to_png_middlebury(flow):
140 |     """
141 |     Convert flow into middlebury color code image
142 |     :param flow: optical flow map
143 |     :return: optical flow image in middlebury color
144 |     """
145 | 
146 |     flow = flow.transpose([1, 2, 0])
147 |     u = flow[:, :, 0]
148 |     v = flow[:, :, 1]
149 | 
150 |     maxu = -999.
151 |     maxv = -999.
152 |     minu = 999.
153 |     minv = 999.
154 | 
155 |     idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
156 |     u[idxUnknow] = 0
157 |     v[idxUnknow] = 0
158 | 
159 |     maxu = max(maxu, np.max(u))
160 |     minu = min(minu, np.min(u))
161 | 
162 |     maxv = max(maxv, np.max(v))
163 |     minv = min(minv, np.min(v))
164 | 
165 |     rad = np.sqrt(u ** 2 + v ** 2)
166 |     maxrad = max(-1, np.max(rad))
167 | 
168 |     u = u / (maxrad + np.finfo(float).eps)
169 |     v = v / (maxrad + np.finfo(float).eps)
170 | 
171 |     img = compute_color(u, v)
172 | 
173 |     idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
174 |     img[idx] = 0
175 | 
176 |     return np.uint8(img)
177 | 
178 | 
179 | def np_flow2rgb(flow_map, max_value=None):
180 |     _, h, w = flow_map.shape
181 |     # flow_map[:,(flow_map[0] == 0) & (flow_map[1] == 0)] = float('nan')
182 |     # print np.any(np.isnan(flow_map))
183 |     rgb_map = np.ones((h, w, 3)).astype(np.float32)
184 |     max_value = 80
185 |     if max_value is not None:
186 |         normalized_flow_map = flow_map / max_value
187 |     else:
188 |         normalized_flow_map = flow_map / (np.abs(flow_map).max())
189 |     
190 |     rgb_map[:, :, 0] += normalized_flow_map[0]
191 |     rgb_map[:, :, 1] -= 0.5 * (normalized_flow_map[0] + normalized_flow_map[1])
192 |     rgb_map[:, :, 2] += normalized_flow_map[1]
193 |     return rgb_map.clip(0, 1)
194 | 


--------------------------------------------------------------------------------
/utils/interpolation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | from torch import nn
  7 | import torch.nn.functional as tf
  8 | 
  9 | 
 10 | def interpolate2d(inputs, size, mode="bilinear"):
 11 |     return tf.interpolate(inputs, size, mode=mode, align_corners=True)
 12 | 
 13 | 
 14 | def interpolate2d_as(inputs, target_as, mode="bilinear"):
 15 |     _, _, h, w = target_as.size()
 16 |     return tf.interpolate(inputs, [h, w], mode=mode, align_corners=True)
 17 | 
 18 | 
 19 | def _bchw2bhwc(tensor):
 20 |     return tensor.transpose(1,2).transpose(2,3)
 21 | 
 22 | 
 23 | def _bhwc2bchw(tensor):
 24 |     return tensor.transpose(2,3).transpose(1,2)
 25 | 
 26 | class Meshgrid(nn.Module):
 27 |     def __init__(self):
 28 |         super(Meshgrid, self).__init__()
 29 |         self.width = 0
 30 |         self.height = 0
 31 |         self.register_buffer("xx", torch.zeros(1,1))
 32 |         self.register_buffer("yy", torch.zeros(1,1))
 33 |         self.register_buffer("rangex", torch.zeros(1,1))
 34 |         self.register_buffer("rangey", torch.zeros(1,1))
 35 | 
 36 |     def _compute_meshgrid(self, width, height):
 37 |         torch.arange(0, width, out=self.rangex)
 38 |         torch.arange(0, height, out=self.rangey)
 39 |         self.xx = self.rangex.repeat(height, 1).contiguous()
 40 |         self.yy = self.rangey.repeat(width, 1).t().contiguous()
 41 | 
 42 |     def forward(self, width, height):
 43 |         if self.width != width or self.height != height:
 44 |             self._compute_meshgrid(width=width, height=height)
 45 |             self.width = width
 46 |             self.height = height
 47 |         return self.xx, self.yy
 48 | 
 49 | 
 50 | class BatchSub2Ind(nn.Module):
 51 |     def __init__(self):
 52 |         super(BatchSub2Ind, self).__init__()
 53 |         self.register_buffer("_offsets", torch.LongTensor())
 54 | 
 55 |     def forward(self, shape, row_sub, col_sub, out=None):
 56 |         batch_size = row_sub.size(0)
 57 |         height, width = shape
 58 |         ind = row_sub*width + col_sub
 59 |         torch.arange(batch_size, out=self._offsets)
 60 |         self._offsets *= (height*width)
 61 | 
 62 |         if out is None:
 63 |             return torch.add(ind, self._offsets.view(-1,1,1))
 64 |         else:
 65 |             torch.add(ind, self._offsets.view(-1,1,1), out=out)
 66 | 
 67 | 
 68 | class Interp2(nn.Module):
 69 |     def __init__(self, clamp=False):
 70 |         super(Interp2, self).__init__()
 71 |         self._clamp = clamp
 72 |         self._batch_sub2ind = BatchSub2Ind()
 73 |         self.register_buffer("_x0", torch.LongTensor())
 74 |         self.register_buffer("_x1", torch.LongTensor())
 75 |         self.register_buffer("_y0", torch.LongTensor())
 76 |         self.register_buffer("_y1", torch.LongTensor())
 77 |         self.register_buffer("_i00", torch.LongTensor())
 78 |         self.register_buffer("_i01", torch.LongTensor())
 79 |         self.register_buffer("_i10", torch.LongTensor())
 80 |         self.register_buffer("_i11", torch.LongTensor())
 81 |         self.register_buffer("_v00", torch.FloatTensor())
 82 |         self.register_buffer("_v01", torch.FloatTensor())
 83 |         self.register_buffer("_v10", torch.FloatTensor())
 84 |         self.register_buffer("_v11", torch.FloatTensor())
 85 |         self.register_buffer("_x", torch.FloatTensor())
 86 |         self.register_buffer("_y", torch.FloatTensor())
 87 | 
 88 |     def forward(self, v, xq, yq):
 89 |         batch_size, channels, height, width = v.size()
 90 | 
 91 |         # clamp if wanted
 92 |         if self._clamp:
 93 |             xq.clamp_(0, width - 1)
 94 |             yq.clamp_(0, height - 1)
 95 | 
 96 |         # ------------------------------------------------------------------
 97 |         # Find neighbors
 98 |         #
 99 |         # x0 = torch.floor(xq).long(),          x0.clamp_(0, width - 1)
100 |         # x1 = x0 + 1,                          x1.clamp_(0, width - 1)
101 |         # y0 = torch.floor(yq).long(),          y0.clamp_(0, height - 1)
102 |         # y1 = y0 + 1,                          y1.clamp_(0, height - 1)
103 |         #
104 |         # ------------------------------------------------------------------
105 |         self._x0 = torch.floor(xq).long().clamp(0, width - 1)
106 |         self._y0 = torch.floor(yq).long().clamp(0, height - 1)
107 | 
108 |         self._x1 = torch.add(self._x0, 1).clamp(0, width - 1)
109 |         self._y1 = torch.add(self._y0, 1).clamp(0, height - 1)
110 | 
111 |         # batch_sub2ind
112 |         self._batch_sub2ind([height, width], self._y0, self._x0, out=self._i00)
113 |         self._batch_sub2ind([height, width], self._y0, self._x1, out=self._i01)
114 |         self._batch_sub2ind([height, width], self._y1, self._x0, out=self._i10)
115 |         self._batch_sub2ind([height, width], self._y1, self._x1, out=self._i11)
116 | 
117 |         # reshape
118 |         v_flat = _bchw2bhwc(v).contiguous().view(-1, channels)
119 |         torch.index_select(v_flat, dim=0, index=self._i00.view(-1), out=self._v00)
120 |         torch.index_select(v_flat, dim=0, index=self._i01.view(-1), out=self._v01)
121 |         torch.index_select(v_flat, dim=0, index=self._i10.view(-1), out=self._v10)
122 |         torch.index_select(v_flat, dim=0, index=self._i11.view(-1), out=self._v11)
123 | 
124 |         # local_coords
125 |         torch.add(xq, - self._x0.float(), out=self._x)
126 |         torch.add(yq, - self._y0.float(), out=self._y)
127 | 
128 |         # weights
129 |         w00 = torch.unsqueeze((1.0 - self._y) * (1.0 - self._x), dim=1)
130 |         w01 = torch.unsqueeze((1.0 - self._y) * self._x, dim=1)
131 |         w10 = torch.unsqueeze(self._y * (1.0 - self._x), dim=1)
132 |         w11 = torch.unsqueeze(self._y * self._x, dim=1)
133 | 
134 |         def _reshape(u):
135 |             return _bhwc2bchw(u.view(batch_size, height, width, channels))
136 | 
137 |         # values
138 |         values = _reshape(self._v00)*w00 + _reshape(self._v01)*w01 \
139 |             + _reshape(self._v10)*w10 + _reshape(self._v11)*w11
140 | 
141 |         if self._clamp:
142 |             return values
143 |         else:
144 |             #  find_invalid
145 |             invalid = ((xq < 0) | (xq >= width) | (yq < 0) | (yq >= height)).unsqueeze(dim=1).float()
146 |             # maskout invalid
147 |             transformed = invalid * torch.zeros_like(values) + (1.0 - invalid)*values
148 | 
149 |         return transformed
150 | 


--------------------------------------------------------------------------------
/utils/monodepth_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | import torch.nn.functional as tf
 6 | 
 7 | 
 8 | def disp_post_processing(disp):
 9 |     b, _, h, w = disp.shape
10 |     b_h = int(b/2)
11 | 
12 |     l_disp = disp[0:b_h, :, :, :]
13 |     r_disp = torch.flip(disp[b_h:, :, :, :], [3])
14 |     m_disp = 0.5 * (l_disp + r_disp)
15 |     grid_l = torch.linspace(0.0, 1.0, w).view(1, 1, 1, w).expand(1, 1, h, w).float().requires_grad_(False).cuda()
16 |     l_mask = 1.0 - torch.clamp(20 * (grid_l - 0.05), 0, 1)
17 |     r_mask = torch.flip(l_mask, [3])
18 |     return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
19 | 
20 | 
21 | def compute_errors(gt, pred):
22 |     thresh = torch.max((gt / pred), (pred / gt))
23 |     a1 = (thresh < 1.25).float().mean()
24 |     a2 = (thresh < 1.25 ** 2).float().mean()
25 |     a3 = (thresh < 1.25 ** 3).float().mean()
26 | 
27 |     rmse = (gt - pred) ** 2
28 |     rmse = torch.sqrt(rmse.mean())
29 | 
30 |     rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
31 |     rmse_log = torch.sqrt(rmse_log.mean())
32 | 
33 |     abs_rel = torch.mean(torch.abs(gt - pred) / gt)
34 | 
35 |     sq_rel = torch.mean(((gt - pred) ** 2) / gt)
36 | 
37 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
38 | 
39 | 
40 | def compute_d1_all(gt_disps, disp_t, gt_mask):
41 |     disp_diff = torch.abs(gt_disps[gt_mask] - disp_t[gt_mask])
42 |     bad_pixels = (disp_diff >= 3) & ((disp_diff / gt_disps[gt_mask]) >= 0.05)
43 |     d1_all = 100.0 * bad_pixels.sum().float() / gt_mask.sum().float()
44 | 
45 |     return d1_all
46 | 


--------------------------------------------------------------------------------
/utils/sceneflow_util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as tf
  6 | 
  7 | 
  8 | def post_processing(l_disp, r_disp):
  9 |     
 10 |     b, _, h, w = l_disp.shape
 11 |     m_disp = 0.5 * (l_disp + r_disp)
 12 |     grid_l = torch.linspace(0.0, 1.0, w).view(1, 1, 1, w).expand(1, 1, h, w).float().requires_grad_(False).cuda()
 13 |     l_mask = 1.0 - torch.clamp(20 * (grid_l - 0.05), 0, 1)
 14 |     r_mask = torch.flip(l_mask, [3])
 15 |     return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
 16 | 
 17 | 
 18 | def flow_horizontal_flip(flow_input):
 19 | 
 20 |     flow_flip = torch.flip(flow_input, [3])
 21 |     flow_flip[:, 0:1, :, :] *= -1
 22 | 
 23 |     return flow_flip.contiguous()
 24 | 
 25 | 
 26 | def disp2depth_kitti(pred_disp, k_value):
 27 | 
 28 |     pred_depth = k_value.unsqueeze(1).unsqueeze(1).unsqueeze(1) * 0.54 / (pred_disp + 1e-8)
 29 |     pred_depth = torch.clamp(pred_depth, 1e-3, 80)
 30 | 
 31 |     return pred_depth
 32 | 
 33 | 
 34 | def get_pixelgrid(b, h, w):
 35 |     grid_h = torch.linspace(0.0, w - 1, w).view(1, 1, 1, w).expand(b, 1, h, w)
 36 |     grid_v = torch.linspace(0.0, h - 1, h).view(1, 1, h, 1).expand(b, 1, h, w)
 37 | 
 38 |     ones = torch.ones_like(grid_h)
 39 |     pixelgrid = torch.cat((grid_h, grid_v, ones), dim=1).float().requires_grad_(False).cuda()
 40 | 
 41 |     return pixelgrid
 42 | 
 43 | 
 44 | def pixel2pts(intrinsics, depth):
 45 |     b, _, h, w = depth.size()
 46 | 
 47 |     pixelgrid = get_pixelgrid(b, h, w)
 48 | 
 49 |     depth_mat = depth.view(b, 1, -1)
 50 |     pixel_mat = pixelgrid.view(b, 3, -1)
 51 |     pts_mat = torch.matmul(torch.inverse(intrinsics.cpu()).cuda(), pixel_mat) * depth_mat
 52 |     pts = pts_mat.view(b, -1, h, w)
 53 | 
 54 |     return pts, pixelgrid
 55 | 
 56 | 
 57 | def pts2pixel(pts, intrinsics):
 58 |     b, _, h, w = pts.size()
 59 |     proj_pts = torch.matmul(intrinsics, pts.view(b, 3, -1))
 60 |     pixels_mat = proj_pts.div(proj_pts[:, 2:3, :] + 1e-8)[:, 0:2, :]
 61 | 
 62 |     return pixels_mat.view(b, 2, h, w)
 63 | 
 64 | 
 65 | def intrinsic_scale(intrinsic, scale_y, scale_x):
 66 |     b, h, w = intrinsic.size()
 67 |     fx = intrinsic[:, 0, 0] * scale_x
 68 |     fy = intrinsic[:, 1, 1] * scale_y
 69 |     cx = intrinsic[:, 0, 2] * scale_x
 70 |     cy = intrinsic[:, 1, 2] * scale_y
 71 | 
 72 |     zeros = torch.zeros_like(fx)
 73 |     r1 = torch.stack([fx, zeros, cx], dim=1)
 74 |     r2 = torch.stack([zeros, fy, cy], dim=1)
 75 |     r3 = torch.tensor([0., 0., 1.], requires_grad=False).cuda().unsqueeze(0).expand(b, -1)
 76 |     intrinsic_s = torch.stack([r1, r2, r3], dim=1)
 77 | 
 78 |     return intrinsic_s
 79 | 
 80 | 
 81 | def pixel2pts_ms(intrinsic, output_disp, rel_scale):
 82 |     # pixel2pts
 83 |     intrinsic_dp_s = intrinsic_scale(intrinsic, rel_scale[:,0], rel_scale[:,1])
 84 |     output_depth = disp2depth_kitti(output_disp, intrinsic_dp_s[:, 0, 0])
 85 |     pts, _ = pixel2pts(intrinsic_dp_s, output_depth)
 86 | 
 87 |     return pts, intrinsic_dp_s
 88 | 
 89 | 
 90 | def pts2pixel_ms(intrinsic, pts, output_sf, disp_size):
 91 | 
 92 |     # +sceneflow and reprojection
 93 |     sf_s = tf.interpolate(output_sf, disp_size, mode="bilinear", align_corners=True)
 94 |     pts_tform = pts + sf_s
 95 |     coord = pts2pixel(pts_tform, intrinsic)
 96 | 
 97 |     norm_coord_w = coord[:, 0:1, :, :] / (disp_size[1] - 1) * 2 - 1
 98 |     norm_coord_h = coord[:, 1:2, :, :] / (disp_size[0] - 1) * 2 - 1
 99 |     norm_coord = torch.cat((norm_coord_w, norm_coord_h), dim=1)
100 | 
101 |     return sf_s, pts_tform, norm_coord
102 | 
103 | 
104 | def reconstructImg(coord, img):
105 |     grid = coord.transpose(1, 2).transpose(2, 3)
106 |     img_warp = tf.grid_sample(img, grid)
107 | 
108 |     mask = torch.ones_like(img, requires_grad=False)
109 |     mask = tf.grid_sample(mask, grid)
110 |     mask = (mask >= 1.0).float()
111 |     return img_warp * mask
112 | 
113 | 
114 | def reconstructPts(coord, pts):
115 |     grid = coord.transpose(1, 2).transpose(2, 3)
116 |     pts_warp = tf.grid_sample(pts, grid)
117 | 
118 |     mask = torch.ones_like(pts, requires_grad=False)
119 |     mask = tf.grid_sample(mask, grid)
120 |     mask = (mask >= 1.0).float()
121 |     return pts_warp * mask
122 | 
123 | 
124 | def projectSceneFlow2Flow(intrinsic, sceneflow, disp):
125 | 
126 |     _, _, h, w = disp.size()
127 | 
128 |     output_depth = disp2depth_kitti(disp, intrinsic[:, 0, 0])
129 |     pts, pixelgrid = pixel2pts(intrinsic, output_depth)
130 | 
131 |     sf_s = tf.interpolate(sceneflow, [h, w], mode="bilinear", align_corners=True)
132 |     pts_tform = pts + sf_s
133 |     coord = pts2pixel(pts_tform, intrinsic)
134 |     flow = coord - pixelgrid[:, 0:2, :, :]
135 | 
136 |     return flow
137 | 


--------------------------------------------------------------------------------