├── .gitignore ├── LICENSE ├── __init__.py ├── augmentations.py ├── checkpoints ├── full_model_eigen │ └── checkpoint_eigen_split.ckpt ├── full_model_kitti │ ├── checkpoint_kitti_split.ckpt │ └── checkpoint_latest.ckpt └── full_model_kitti_ft │ └── checkpoint_kitti_ft.ckpt ├── core ├── commandline.py ├── configuration.py ├── logger.py ├── optim.py ├── runtime.py └── tools.py ├── datasets ├── __init__.py ├── cam_intrinsics │ ├── calib_cam_to_cam_2011_09_26.txt │ ├── calib_cam_to_cam_2011_09_28.txt │ ├── calib_cam_to_cam_2011_09_29.txt │ ├── calib_cam_to_cam_2011_09_30.txt │ └── calib_cam_to_cam_2011_10_03.txt ├── common.py ├── custom_batchsampler.py ├── index_generator │ ├── kitti_lidar_to_depth │ │ ├── godard_evaluation_kitti.py │ │ ├── godard_evaluation_utils.py │ │ └── test_files_eigen.txt │ ├── prepare_train_data.py │ └── provided │ │ ├── eigen_test_files.txt │ │ ├── excluded_frames.txt │ │ └── train_mapping.txt ├── index_txt │ ├── eigen_full.txt │ ├── eigen_text.txt │ ├── eigen_train.txt │ ├── eigen_valid.txt │ ├── kitti_full.txt │ ├── kitti_raw_all_imgs.txt │ ├── kitti_train.txt │ └── kitti_valid.txt ├── kitti_2015_test.py ├── kitti_2015_train.py ├── kitti_comb_mnsf.py ├── kitti_eigen_test.py ├── kitti_raw_monodepth.py └── kitti_raw_monosf.py ├── demo ├── demo.gif ├── demo_generator │ ├── cam_pose.json │ ├── kitti_img │ │ └── image_2 │ │ │ ├── 000139_10.png │ │ │ └── 000139_11.png │ ├── results │ │ ├── disp_0 │ │ │ └── 000139_10.png │ │ ├── disp_1 │ │ │ └── 000139_10.png │ │ └── flow │ │ │ └── 000139_10.png │ ├── run.py │ ├── utils_misc.py │ └── vis │ │ └── __init__ └── teaser.png ├── install_modules.sh ├── losses.py ├── main.py ├── models ├── __init__.py ├── correlation_package │ ├── __init__.py │ ├── correlation.py │ ├── correlation_cuda.cc │ ├── correlation_cuda_kernel.cu │ ├── correlation_cuda_kernel.cuh │ ├── readme.txt │ └── setup.py ├── forwardwarp_package │ ├── __init__.py │ ├── forward_warp.py │ ├── forward_warp_cuda.cpp │ ├── forward_warp_cuda_kernel.cu │ └── setup.py ├── model_monodepth_ablation.py ├── model_monosceneflow.py ├── model_monosceneflow_ablation.py ├── model_monosceneflow_ablation_decoder_split.py ├── modules_camconv.py ├── modules_monodepth.py └── modules_sceneflow.py ├── readme.md ├── requirements.txt ├── scripts ├── ablation1_augmentation │ ├── ablation1_eval_monodepth_aug.sh │ ├── ablation1_eval_monodepth_basic.sh │ ├── ablation1_eval_monodepth_cc.sh │ ├── ablation1_eval_monodepth_cc_aug.sh │ ├── ablation1_eval_monosf_base.sh │ ├── ablation1_eval_monosf_cc.sh │ ├── ablation1_eval_monosf_cc_aug.sh │ ├── ablation1_train_monodepth_kitti.sh │ ├── ablation1_train_monodepth_kitti_aug.sh │ ├── ablation1_train_monodepth_kitti_aug_cc.sh │ ├── ablation1_train_monodepth_kitti_cc.sh │ ├── ablation1_train_monosf_base.sh │ ├── ablation1_train_monosf_camconv.sh │ └── ablation1_train_monosf_camconv_aug.sh ├── ablation2_loss │ ├── ablation2_eval_monosf_loss_basic.sh │ ├── ablation2_eval_monosf_loss_noOcc.sh │ ├── ablation2_eval_monosf_loss_noPts.sh │ ├── ablation2_train_monosf_loss_basic.sh │ ├── ablation2_train_monosf_loss_noOcc.sh │ └── ablation2_train_monosf_loss_noPts.sh ├── ablation3_decoder_split │ ├── ablation3_eval_monosf_disp_only.sh │ ├── ablation3_eval_monosf_flow_only.sh │ ├── ablation3_eval_monosf_splitting_cont.sh │ ├── ablation3_eval_monosf_splitting_last1.sh │ ├── ablation3_eval_monosf_splitting_last2.sh │ ├── ablation3_eval_monosf_splitting_last3.sh │ ├── ablation3_eval_monosf_splitting_last4.sh │ ├── ablation3_eval_monosf_splitting_last5.sh │ ├── ablation3_train_monosf_disp_only.sh │ ├── ablation3_train_monosf_flow_only.sh │ ├── ablation3_train_monosf_splitting_cont.sh │ ├── ablation3_train_monosf_splitting_last1.sh │ ├── ablation3_train_monosf_splitting_last2.sh │ ├── ablation3_train_monosf_splitting_last3.sh │ ├── ablation3_train_monosf_splitting_last4.sh │ └── ablation3_train_monosf_splitting_last5.sh ├── eval_monodepth_selfsup_eigen_test.sh ├── eval_monodepth_selfsup_kitti_train.sh ├── eval_monosf_finetune_kitti_test.sh ├── eval_monosf_selfsup_kitti_test.sh ├── eval_monosf_selfsup_kitti_train.sh ├── train_monosf_kitti_finetune_1st_stage.sh ├── train_monosf_kitti_finetune_2nd_stage.sh ├── train_monosf_selfsup_eigen_train.sh └── train_monosf_selfsup_kitti_raw.sh └── utils ├── __init__.py ├── flow.py ├── interpolation.py ├── monodepth_eval.py └── sceneflow_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | *.o 4 | *.egg 5 | *.egg-info/ 6 | *.DS_Store -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/__init__.py -------------------------------------------------------------------------------- /checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt -------------------------------------------------------------------------------- /checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt -------------------------------------------------------------------------------- /checkpoints/full_model_kitti/checkpoint_latest.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti/checkpoint_latest.ckpt -------------------------------------------------------------------------------- /checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt -------------------------------------------------------------------------------- /core/logger.py: -------------------------------------------------------------------------------- 1 | ## Portions of Code from, copyright 2018 Jochen Gast 2 | 3 | from __future__ import absolute_import, division, print_function 4 | 5 | import colorama 6 | import logging 7 | import os 8 | import re 9 | import sys 10 | from core import tools 11 | 12 | 13 | def get_default_logging_format(colorize=False, brackets=False): 14 | style = colorama.Style.DIM if colorize else '' 15 | # color = colorama.Fore.CYAN if colorize else '' 16 | color = colorama.Fore.WHITE if colorize else '' 17 | reset = colorama.Style.RESET_ALL if colorize else '' 18 | if brackets: 19 | result = "{}{}[%(asctime)s]{} %(message)s".format(style, color, reset) 20 | else: 21 | result = "{}{}%(asctime)s{} %(message)s".format(style, color, reset) 22 | return result 23 | 24 | 25 | def get_default_logging_datefmt(): 26 | return "%Y-%m-%d %H:%M:%S" 27 | 28 | 29 | def log_module_info(module): 30 | lines = module.__str__().split("\n") 31 | for line in lines: 32 | logging.info(line) 33 | 34 | 35 | class LogbookFormatter(logging.Formatter): 36 | def __init__(self, fmt=None, datefmt=None): 37 | super(LogbookFormatter, self).__init__(fmt=fmt, datefmt=datefmt) 38 | self._re = re.compile(r"\033\[[0-9]+m") 39 | 40 | def remove_colors_from_msg(self, msg): 41 | msg = re.sub(self._re, "", msg) 42 | return msg 43 | 44 | def format(self, record=None): 45 | record.msg = self.remove_colors_from_msg(record.msg) 46 | return super(LogbookFormatter, self).format(record) 47 | 48 | 49 | class ConsoleFormatter(logging.Formatter): 50 | def __init__(self, fmt=None, datefmt=None): 51 | super(ConsoleFormatter, self).__init__(fmt=fmt, datefmt=datefmt) 52 | 53 | def format(self, record=None): 54 | indent = sys.modules[__name__].global_indent 55 | record.msg = " " * indent + record.msg 56 | return super(ConsoleFormatter, self).format(record) 57 | 58 | 59 | class SkipLogbookFilter(logging.Filter): 60 | def filter(self, record): 61 | return record.levelno != logging.LOGBOOK 62 | 63 | 64 | def configure_logging(filename=None): 65 | # set global indent level 66 | sys.modules[__name__].global_indent = 0 67 | 68 | # add custom tqdm logger 69 | tools.addLoggingLevel("LOGBOOK", 1000) 70 | 71 | # create logger 72 | root_logger = logging.getLogger("") 73 | root_logger.setLevel(logging.INFO) 74 | 75 | # create console handler and set level to debug 76 | console = logging.StreamHandler() 77 | console.setLevel(logging.INFO) 78 | fmt = get_default_logging_format(colorize=True, brackets=False) 79 | datefmt = get_default_logging_datefmt() 80 | formatter = ConsoleFormatter(fmt=fmt, datefmt=datefmt) 81 | console.setFormatter(formatter) 82 | 83 | # Skip logging.tqdm requests for console outputs 84 | skip_logbook_filter = SkipLogbookFilter() 85 | console.addFilter(skip_logbook_filter) 86 | 87 | # add console to root_logger 88 | root_logger.addHandler(console) 89 | 90 | # add logbook 91 | if filename is not None: 92 | # ensure dir 93 | d = os.path.dirname(filename) 94 | if not os.path.exists(d): 95 | os.makedirs(d) 96 | 97 | # -------------------------------------------------------------------------------------- 98 | # Configure handler that removes color codes from logbook 99 | # -------------------------------------------------------------------------------------- 100 | logbook = logging.FileHandler(filename=filename, mode="a", encoding="utf-8") 101 | logbook.setLevel(logging.INFO) 102 | fmt = get_default_logging_format(colorize=False, brackets=True) 103 | logbook_formatter = LogbookFormatter(fmt=fmt, datefmt=datefmt) 104 | logbook.setFormatter(logbook_formatter) 105 | root_logger.addHandler(logbook) 106 | 107 | 108 | class LoggingBlock: 109 | def __init__(self, title, emph=False): 110 | self._emph = emph 111 | bright = colorama.Style.BRIGHT 112 | cyan = colorama.Fore.CYAN 113 | reset = colorama.Style.RESET_ALL 114 | if emph: 115 | logging.info("%s==>%s %s%s%s" % (cyan, reset, bright, title, reset)) 116 | else: 117 | logging.info(title) 118 | 119 | def __enter__(self): 120 | sys.modules[__name__].global_indent += 2 121 | return self 122 | 123 | def __exit__(self, exc_type, exc_value, traceback): 124 | sys.modules[__name__].global_indent -= 2 125 | -------------------------------------------------------------------------------- /core/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import sys 3 | from core.tools import module_classes_to_dict 4 | 5 | # ------------------------------------------------------------------------------------ 6 | # Export PyTorch optimizer 7 | # ------------------------------------------------------------------------------------ 8 | _this = sys.modules[__name__] 9 | _optimizer_classes = module_classes_to_dict(torch.optim, exclude_classes="Optimizer") 10 | for name, constructor in _optimizer_classes.items(): 11 | setattr(_this, name, constructor) 12 | __all__ = _optimizer_classes.keys() 13 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from . import kitti_2015_train 2 | from . import kitti_2015_test 3 | 4 | from . import kitti_raw_monosf 5 | from . import kitti_raw_monodepth 6 | 7 | from . import kitti_comb_mnsf 8 | from . import kitti_eigen_test 9 | 10 | KITTI_2015_Train_Full_mnsf = kitti_2015_train.KITTI_2015_MonoSceneFlow_Full 11 | KITTI_2015_Train_Full_monodepth = kitti_2015_train.KITTI_2015_MonoDepth_Full 12 | 13 | KITTI_2015_Test = kitti_2015_test.KITTI_2015_Test 14 | 15 | KITTI_Raw_KittiSplit_Train_mnsf = kitti_raw_monosf.KITTI_Raw_KittiSplit_Train 16 | KITTI_Raw_KittiSplit_Valid_mnsf = kitti_raw_monosf.KITTI_Raw_KittiSplit_Valid 17 | KITTI_Raw_KittiSplit_Full_mnsf = kitti_raw_monosf.KITTI_Raw_KittiSplit_Full 18 | KITTI_Raw_EigenSplit_Train_mnsf = kitti_raw_monosf.KITTI_Raw_EigenSplit_Train 19 | KITTI_Raw_EigenSplit_Valid_mnsf = kitti_raw_monosf.KITTI_Raw_EigenSplit_Valid 20 | KITTI_Raw_EigenSplit_Full_mnsf = kitti_raw_monosf.KITTI_Raw_EigenSplit_Full 21 | 22 | KITTI_Raw_KittiSplit_Train_monodepth = kitti_raw_monodepth.KITTI_Raw_KittiSplit_Train 23 | KITTI_Raw_KittiSplit_Valid_monodepth = kitti_raw_monodepth.KITTI_Raw_KittiSplit_Valid 24 | 25 | KITTI_Comb_Train = kitti_comb_mnsf.KITTI_Comb_Train 26 | KITTI_Comb_Val = kitti_comb_mnsf.KITTI_Comb_Val 27 | KITTI_Comb_Full = kitti_comb_mnsf.KITTI_Comb_Full 28 | 29 | KITTI_Eigen_Test = kitti_eigen_test.KITTI_Eigen_Test 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /datasets/cam_intrinsics/calib_cam_to_cam_2011_09_26.txt: -------------------------------------------------------------------------------- 1 | calib_time: 09-Jan-2012 13:57:47 2 | corner_dist: 9.950000e-02 3 | S_00: 1.392000e+03 5.120000e+02 4 | K_00: 9.842439e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.808141e+02 2.331966e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_00: -3.728755e-01 2.037299e-01 2.219027e-03 1.383707e-03 -7.233722e-02 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_00: 2.573699e-16 -1.059758e-16 1.614870e-16 8 | S_rect_00: 1.242000e+03 3.750000e+02 9 | R_rect_00: 9.999239e-01 9.837760e-03 -7.445048e-03 -9.869795e-03 9.999421e-01 -4.278459e-03 7.402527e-03 4.351614e-03 9.999631e-01 10 | P_rect_00: 7.215377e+02 0.000000e+00 6.095593e+02 0.000000e+00 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 11 | S_01: 1.392000e+03 5.120000e+02 12 | K_01: 9.895267e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.878386e+02 2.455590e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_01: -3.644661e-01 1.790019e-01 1.148107e-03 -6.298563e-04 -5.314062e-02 14 | R_01: 9.993513e-01 1.860866e-02 -3.083487e-02 -1.887662e-02 9.997863e-01 -8.421873e-03 3.067156e-02 8.998467e-03 9.994890e-01 15 | T_01: -5.370000e-01 4.822061e-03 -1.252488e-02 16 | S_rect_01: 1.242000e+03 3.750000e+02 17 | R_rect_01: 9.996878e-01 -8.976826e-03 2.331651e-02 8.876121e-03 9.999508e-01 4.418952e-03 -2.335503e-02 -4.210612e-03 9.997184e-01 18 | P_rect_01: 7.215377e+02 0.000000e+00 6.095593e+02 -3.875744e+02 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 19 | S_02: 1.392000e+03 5.120000e+02 20 | K_02: 9.597910e+02 0.000000e+00 6.960217e+02 0.000000e+00 9.569251e+02 2.241806e+02 0.000000e+00 0.000000e+00 1.000000e+00 21 | D_02: -3.691481e-01 1.968681e-01 1.353473e-03 5.677587e-04 -6.770705e-02 22 | R_02: 9.999758e-01 -5.267463e-03 -4.552439e-03 5.251945e-03 9.999804e-01 -3.413835e-03 4.570332e-03 3.389843e-03 9.999838e-01 23 | T_02: 5.956621e-02 2.900141e-04 2.577209e-03 24 | S_rect_02: 1.242000e+03 3.750000e+02 25 | R_rect_02: 9.998817e-01 1.511453e-02 -2.841595e-03 -1.511724e-02 9.998853e-01 -9.338510e-04 2.827154e-03 9.766976e-04 9.999955e-01 26 | P_rect_02: 7.215377e+02 0.000000e+00 6.095593e+02 4.485728e+01 0.000000e+00 7.215377e+02 1.728540e+02 2.163791e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.745884e-03 27 | S_03: 1.392000e+03 5.120000e+02 28 | K_03: 9.037596e+02 0.000000e+00 6.957519e+02 0.000000e+00 9.019653e+02 2.242509e+02 0.000000e+00 0.000000e+00 1.000000e+00 29 | D_03: -3.639558e-01 1.788651e-01 6.029694e-04 -3.922424e-04 -5.382460e-02 30 | R_03: 9.995599e-01 1.699522e-02 -2.431313e-02 -1.704422e-02 9.998531e-01 -1.809756e-03 2.427880e-02 2.223358e-03 9.997028e-01 31 | T_03: -4.731050e-01 5.551470e-03 -5.250882e-03 32 | S_rect_03: 1.242000e+03 3.750000e+02 33 | R_rect_03: 9.998321e-01 -7.193136e-03 1.685599e-02 7.232804e-03 9.999712e-01 -2.293585e-03 -1.683901e-02 2.415116e-03 9.998553e-01 34 | P_rect_03: 7.215377e+02 0.000000e+00 6.095593e+02 -3.395242e+02 0.000000e+00 7.215377e+02 1.728540e+02 2.199936e+00 0.000000e+00 0.000000e+00 1.000000e+00 2.729905e-03 35 | -------------------------------------------------------------------------------- /datasets/cam_intrinsics/calib_cam_to_cam_2011_09_28.txt: -------------------------------------------------------------------------------- 1 | calib_time: 09-Jan-2012 13:58:50 2 | corner_dist: 9.950000e-02 3 | S_00: 1.392000e+03 5.120000e+02 4 | K_00: 9.812178e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.758994e+02 2.471364e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_00: -3.791375e-01 2.148119e-01 1.227094e-03 2.343833e-03 -7.910379e-02 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_00: -7.686159e-17 -2.989062e-17 -1.366428e-16 8 | S_rect_00: 1.224000e+03 3.700000e+02 9 | R_rect_00: 9.999128e-01 1.009263e-02 -8.511932e-03 -1.012729e-02 9.999406e-01 -4.037671e-03 8.470675e-03 4.123522e-03 9.999556e-01 10 | P_rect_00: 7.070493e+02 0.000000e+00 6.040814e+02 0.000000e+00 0.000000e+00 7.070493e+02 1.805066e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 11 | S_01: 1.392000e+03 5.120000e+02 12 | K_01: 9.863925e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.821423e+02 2.588854e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_01: -3.673556e-01 1.862563e-01 8.496128e-05 1.699076e-04 -5.822524e-02 14 | R_01: 9.993552e-01 1.830187e-02 -3.089048e-02 -1.855578e-02 9.997962e-01 -7.952999e-03 3.073863e-02 8.521068e-03 9.994911e-01 15 | T_01: -5.370000e-01 4.509875e-03 -1.198621e-02 16 | S_rect_01: 1.224000e+03 3.700000e+02 17 | R_rect_01: 9.997157e-01 -8.395891e-03 2.231435e-02 8.304757e-03 9.999568e-01 4.173646e-03 -2.234842e-02 -3.987145e-03 9.997423e-01 18 | P_rect_01: 7.070493e+02 0.000000e+00 6.040814e+02 -3.797842e+02 0.000000e+00 7.070493e+02 1.805066e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 19 | S_02: 1.392000e+03 5.120000e+02 20 | K_02: 9.569475e+02 0.000000e+00 6.939767e+02 0.000000e+00 9.522352e+02 2.386081e+02 0.000000e+00 0.000000e+00 1.000000e+00 21 | D_02: -3.750956e-01 2.076838e-01 4.348525e-04 1.603162e-03 -7.469243e-02 22 | R_02: 9.999838e-01 -5.012736e-03 -2.710741e-03 5.002007e-03 9.999797e-01 -3.950381e-03 2.730489e-03 3.936758e-03 9.999885e-01 23 | T_02: 5.989688e-02 -1.367835e-03 4.637624e-03 24 | S_rect_02: 1.224000e+03 3.700000e+02 25 | R_rect_02: 9.998691e-01 1.512763e-02 -5.741851e-03 -1.512861e-02 9.998855e-01 -1.287536e-04 5.739247e-03 2.156030e-04 9.999835e-01 26 | P_rect_02: 7.070493e+02 0.000000e+00 6.040814e+02 4.575831e+01 0.000000e+00 7.070493e+02 1.805066e+02 -3.454157e-01 0.000000e+00 0.000000e+00 1.000000e+00 4.981016e-03 27 | S_03: 1.392000e+03 5.120000e+02 28 | K_03: 9.011007e+02 0.000000e+00 6.982947e+02 0.000000e+00 8.970639e+02 2.377447e+02 0.000000e+00 0.000000e+00 1.000000e+00 29 | D_03: -3.686011e-01 1.908666e-01 -5.689518e-04 3.332341e-04 -6.302873e-02 30 | R_03: 9.995054e-01 1.665288e-02 -2.667675e-02 -1.671777e-02 9.998578e-01 -2.211228e-03 2.663614e-02 2.656110e-03 9.996417e-01 31 | T_03: -4.756270e-01 5.296617e-03 -5.437198e-03 32 | S_rect_03: 1.224000e+03 3.700000e+02 33 | R_rect_03: 9.998134e-01 -6.606294e-03 1.815174e-02 6.637329e-03 9.999766e-01 -1.650024e-03 -1.814042e-02 1.770195e-03 9.998339e-01 34 | P_rect_03: 7.070493e+02 0.000000e+00 6.040814e+02 -3.341081e+02 0.000000e+00 7.070493e+02 1.805066e+02 2.330660e+00 0.000000e+00 0.000000e+00 1.000000e+00 3.201153e-03 35 | -------------------------------------------------------------------------------- /datasets/cam_intrinsics/calib_cam_to_cam_2011_09_29.txt: -------------------------------------------------------------------------------- 1 | calib_time: 09-Jan-2012 13:59:12 2 | corner_dist: 9.950000e-02 3 | S_00: 1.392000e+03 5.120000e+02 4 | K_00: 9.803769e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.757217e+02 2.441228e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_00: -3.715862e-01 2.009708e-01 1.363807e-03 1.588184e-03 -6.967696e-02 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_00: -5.551115e-17 -2.312965e-17 3.700743e-16 8 | S_rect_00: 1.238000e+03 3.740000e+02 9 | R_rect_00: 9.999478e-01 9.791707e-03 -2.925305e-03 -9.806939e-03 9.999382e-01 -5.238719e-03 2.873828e-03 5.267134e-03 9.999820e-01 10 | P_rect_00: 7.183351e+02 0.000000e+00 6.003891e+02 0.000000e+00 0.000000e+00 7.183351e+02 1.815122e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 11 | S_01: 1.392000e+03 5.120000e+02 12 | K_01: 9.904660e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.875575e+02 2.581179e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_01: -3.645289e-01 1.817607e-01 7.785894e-05 -1.378884e-04 -5.526709e-02 14 | R_01: 9.993414e-01 1.820343e-02 -3.139101e-02 -1.853620e-02 9.997747e-01 -1.034255e-02 3.119567e-02 1.091761e-02 9.994537e-01 15 | T_01: -5.370000e-01 4.682272e-03 -1.524529e-02 16 | S_rect_01: 1.238000e+03 3.740000e+02 17 | R_rect_01: 9.995593e-01 -8.715472e-03 2.837724e-02 8.566270e-03 9.999489e-01 5.375144e-03 -2.842263e-02 -5.129688e-03 9.995828e-01 18 | P_rect_01: 7.183351e+02 0.000000e+00 6.003891e+02 -3.858846e+02 0.000000e+00 7.183351e+02 1.815122e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 19 | S_02: 1.392000e+03 5.120000e+02 20 | K_02: 9.607501e+02 0.000000e+00 6.944288e+02 0.000000e+00 9.570051e+02 2.363374e+02 0.000000e+00 0.000000e+00 1.000000e+00 21 | D_02: -3.687738e-01 1.977559e-01 5.991384e-04 8.972739e-04 -6.822522e-02 22 | R_02: 9.999807e-01 -5.053665e-03 -3.619905e-03 5.036396e-03 9.999760e-01 -4.764072e-03 3.643894e-03 4.745749e-03 9.999821e-01 23 | T_02: 5.948968e-02 -8.603063e-04 2.662728e-03 24 | S_rect_02: 1.238000e+03 3.740000e+02 25 | R_rect_02: 9.998896e-01 1.484154e-02 7.649204e-04 -1.484114e-02 9.998897e-01 -5.289052e-04 -7.726858e-04 5.174945e-04 9.999996e-01 26 | P_rect_02: 7.183351e+02 0.000000e+00 6.003891e+02 4.450382e+01 0.000000e+00 7.183351e+02 1.815122e+02 -5.951107e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.616315e-03 27 | S_03: 1.392000e+03 5.120000e+02 28 | K_03: 9.047872e+02 0.000000e+00 6.946163e+02 0.000000e+00 9.017079e+02 2.353088e+02 0.000000e+00 0.000000e+00 1.000000e+00 29 | D_03: -3.643123e-01 1.845455e-01 -3.868479e-04 1.281135e-04 -5.959776e-02 30 | R_03: 9.995851e-01 1.666283e-02 -2.349366e-02 -1.674297e-02 9.998546e-01 -3.218496e-03 2.343662e-02 3.610514e-03 9.997188e-01 31 | T_03: -4.732167e-01 5.830806e-03 -4.405247e-03 32 | S_rect_03: 1.238000e+03 3.740000e+02 33 | R_rect_03: 9.997648e-01 -6.942395e-03 2.054627e-02 6.982006e-03 9.999739e-01 -1.856797e-03 -2.053284e-02 1.999814e-03 9.997872e-01 34 | P_rect_03: 7.183351e+02 0.000000e+00 6.003891e+02 -3.363147e+02 0.000000e+00 7.183351e+02 1.815122e+02 3.159867e+00 0.000000e+00 0.000000e+00 1.000000e+00 5.323834e-03 35 | -------------------------------------------------------------------------------- /datasets/cam_intrinsics/calib_cam_to_cam_2011_09_30.txt: -------------------------------------------------------------------------------- 1 | calib_time: 09-Jan-2012 13:59:33 2 | corner_dist: 9.950000e-02 3 | S_00: 1.392000e+03 5.120000e+02 4 | K_00: 9.786977e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.717435e+02 2.497222e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_00: -3.792567e-01 2.121203e-01 9.182571e-04 1.911304e-03 -7.605535e-02 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_00: -1.850372e-17 6.938894e-17 -7.401487e-17 8 | S_rect_00: 1.226000e+03 3.700000e+02 9 | R_rect_00: 9.999280e-01 8.085985e-03 -8.866797e-03 -8.123205e-03 9.999583e-01 -4.169750e-03 8.832711e-03 4.241477e-03 9.999520e-01 10 | P_rect_00: 7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 11 | S_01: 1.392000e+03 5.120000e+02 12 | K_01: 9.892043e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.832048e+02 2.616538e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_01: -3.720803e-01 1.944116e-01 -1.077099e-04 -9.031379e-05 -6.314998e-02 14 | R_01: 9.993424e-01 1.830363e-02 -3.129928e-02 -1.856768e-02 9.997943e-01 -8.166432e-03 3.114337e-02 8.742218e-03 9.994767e-01 15 | T_01: -5.370000e-01 5.591661e-03 -1.200541e-02 16 | S_rect_01: 1.226000e+03 3.700000e+02 17 | R_rect_01: 9.996960e-01 -1.040961e-02 2.234966e-02 1.031552e-02 9.999375e-01 4.321301e-03 -2.239324e-02 -4.089439e-03 9.997409e-01 18 | P_rect_01: 7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 19 | S_02: 1.392000e+03 5.120000e+02 20 | K_02: 9.591977e+02 0.000000e+00 6.944383e+02 0.000000e+00 9.529324e+02 2.416793e+02 0.000000e+00 0.000000e+00 1.000000e+00 21 | D_02: -3.725637e-01 1.979803e-01 1.799970e-04 1.250593e-03 -6.608481e-02 22 | R_02: 9.999805e-01 -4.971067e-03 -3.793081e-03 4.954076e-03 9.999777e-01 -4.475856e-03 3.815246e-03 4.456977e-03 9.999828e-01 23 | T_02: 6.030222e-02 -1.293125e-03 5.900421e-03 24 | S_rect_02: 1.226000e+03 3.700000e+02 25 | R_rect_02: 9.999019e-01 1.307921e-02 -5.015634e-03 -1.307809e-02 9.999144e-01 2.561203e-04 5.018555e-03 -1.905003e-04 9.999874e-01 26 | P_rect_02: 7.070912e+02 0.000000e+00 6.018873e+02 4.688783e+01 0.000000e+00 7.070912e+02 1.831104e+02 1.178601e-01 0.000000e+00 0.000000e+00 1.000000e+00 6.203223e-03 27 | S_03: 1.392000e+03 5.120000e+02 28 | K_03: 9.035972e+02 0.000000e+00 6.979803e+02 0.000000e+00 8.979356e+02 2.392935e+02 0.000000e+00 0.000000e+00 1.000000e+00 29 | D_03: -3.726025e-01 1.973869e-01 -5.746215e-04 7.444947e-05 -6.699658e-02 30 | R_03: 9.994995e-01 1.667420e-02 -2.688514e-02 -1.673122e-02 9.998582e-01 -1.897204e-03 2.684969e-02 2.346075e-03 9.996367e-01 31 | T_03: -4.747879e-01 5.631988e-03 -5.233709e-03 32 | S_rect_03: 1.226000e+03 3.700000e+02 33 | R_rect_03: 9.998007e-01 -8.628355e-03 1.800315e-02 8.666473e-03 9.999604e-01 -2.040364e-03 -1.798483e-02 2.195981e-03 9.998358e-01 34 | P_rect_03: 7.070912e+02 0.000000e+00 6.018873e+02 -3.334597e+02 0.000000e+00 7.070912e+02 1.831104e+02 1.930130e+00 0.000000e+00 0.000000e+00 1.000000e+00 3.318498e-03 35 | -------------------------------------------------------------------------------- /datasets/cam_intrinsics/calib_cam_to_cam_2011_10_03.txt: -------------------------------------------------------------------------------- 1 | calib_time: 09-Jan-2012 14:00:15 2 | corner_dist: 9.950000e-02 3 | S_00: 1.392000e+03 5.120000e+02 4 | K_00: 9.799200e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.741183e+02 2.486443e+02 0.000000e+00 0.000000e+00 1.000000e+00 5 | D_00: -3.745594e-01 2.049385e-01 1.110145e-03 1.379375e-03 -7.084798e-02 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 7 | T_00: -9.251859e-17 8.326673e-17 -7.401487e-17 8 | S_rect_00: 1.241000e+03 3.760000e+02 9 | R_rect_00: 9.999454e-01 7.259129e-03 -7.519551e-03 -7.292213e-03 9.999638e-01 -4.381729e-03 7.487471e-03 4.436324e-03 9.999621e-01 10 | P_rect_00: 7.188560e+02 0.000000e+00 6.071928e+02 0.000000e+00 0.000000e+00 7.188560e+02 1.852157e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 11 | S_01: 1.392000e+03 5.120000e+02 12 | K_01: 9.903522e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.855674e+02 2.607319e+02 0.000000e+00 0.000000e+00 1.000000e+00 13 | D_01: -3.712084e-01 1.978723e-01 -3.709831e-05 -3.440494e-04 -6.724045e-02 14 | R_01: 9.993440e-01 1.814887e-02 -3.134011e-02 -1.842595e-02 9.997935e-01 -8.575221e-03 3.117801e-02 9.147067e-03 9.994720e-01 15 | T_01: -5.370000e-01 5.964270e-03 -1.274584e-02 16 | S_rect_01: 1.241000e+03 3.760000e+02 17 | R_rect_01: 9.996568e-01 -1.110284e-02 2.372712e-02 1.099810e-02 9.999292e-01 4.539964e-03 -2.377585e-02 -4.277453e-03 9.997082e-01 18 | P_rect_01: 7.188560e+02 0.000000e+00 6.071928e+02 -3.861448e+02 0.000000e+00 7.188560e+02 1.852157e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 19 | S_02: 1.392000e+03 5.120000e+02 20 | K_02: 9.601149e+02 0.000000e+00 6.947923e+02 0.000000e+00 9.548911e+02 2.403547e+02 0.000000e+00 0.000000e+00 1.000000e+00 21 | D_02: -3.685917e-01 1.928022e-01 4.069233e-04 7.247536e-04 -6.276909e-02 22 | R_02: 9.999788e-01 -5.008404e-03 -4.151018e-03 4.990516e-03 9.999783e-01 -4.308488e-03 4.172506e-03 4.287682e-03 9.999821e-01 23 | T_02: 5.954406e-02 -7.675338e-04 3.582565e-03 24 | S_rect_02: 1.241000e+03 3.760000e+02 25 | R_rect_02: 9.999191e-01 1.228161e-02 -3.316013e-03 -1.228209e-02 9.999246e-01 -1.245511e-04 3.314233e-03 1.652686e-04 9.999945e-01 26 | P_rect_02: 7.188560e+02 0.000000e+00 6.071928e+02 4.538225e+01 0.000000e+00 7.188560e+02 1.852157e+02 -1.130887e-01 0.000000e+00 0.000000e+00 1.000000e+00 3.779761e-03 27 | S_03: 1.392000e+03 5.120000e+02 28 | K_03: 9.049931e+02 0.000000e+00 6.957698e+02 0.000000e+00 9.004945e+02 2.389820e+02 0.000000e+00 0.000000e+00 1.000000e+00 29 | D_03: -3.735725e-01 2.066816e-01 -6.133284e-04 -1.193269e-04 -7.600861e-02 30 | R_03: 9.995578e-01 1.656369e-02 -2.469315e-02 -1.663353e-02 9.998582e-01 -2.625576e-03 2.464616e-02 3.035149e-03 9.996916e-01 31 | T_03: -4.738786e-01 5.991982e-03 -3.215069e-03 32 | S_rect_03: 1.241000e+03 3.760000e+02 33 | R_rect_03: 9.998092e-01 -9.354781e-03 1.714961e-02 9.382303e-03 9.999548e-01 -1.525064e-03 -1.713457e-02 1.685675e-03 9.998518e-01 34 | P_rect_03: 7.188560e+02 0.000000e+00 6.071928e+02 -3.372877e+02 0.000000e+00 7.188560e+02 1.852157e+02 2.369057e+00 0.000000e+00 0.000000e+00 1.000000e+00 4.915215e-03 35 | -------------------------------------------------------------------------------- /datasets/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os.path 4 | import torch 5 | import numpy as np 6 | import skimage.io as io 7 | import png 8 | 9 | width_to_date = dict() 10 | width_to_date[1242] = '2011_09_26' 11 | width_to_date[1224] = '2011_09_28' 12 | width_to_date[1238] = '2011_09_29' 13 | width_to_date[1226] = '2011_09_30' 14 | width_to_date[1241] = '2011_10_03' 15 | 16 | 17 | def get_date_from_width(width): 18 | return width_to_date[width] 19 | 20 | 21 | def list_flatten(input_list): 22 | return [img for sub_list in input_list for img in sub_list] 23 | 24 | 25 | def intrinsic_scale(mat, sy, sx): 26 | out = mat.clone() 27 | out[0, 0] *= sx 28 | out[0, 2] *= sx 29 | out[1, 1] *= sy 30 | out[1, 2] *= sy 31 | return out 32 | 33 | 34 | def kitti_adjust_intrinsic(k_l1, k_r1, crop_info): 35 | str_x = crop_info[0] 36 | str_y = crop_info[1] 37 | k_l1[0, 2] -= str_x 38 | k_l1[1, 2] -= str_y 39 | k_r1[0, 2] -= str_x 40 | k_r1[1, 2] -= str_y 41 | return k_l1, k_r1 42 | 43 | def kitti_crop_image_list(img_list, crop_info): 44 | str_x = crop_info[0] 45 | str_y = crop_info[1] 46 | end_x = crop_info[2] 47 | end_y = crop_info[3] 48 | 49 | transformed = [img[str_y:end_y, str_x:end_x, :] for img in img_list] 50 | 51 | return transformed 52 | 53 | 54 | def numpy2torch(array): 55 | assert(isinstance(array, np.ndarray)) 56 | if array.ndim == 3: 57 | array = np.transpose(array, (2, 0, 1)) 58 | else: 59 | array = np.expand_dims(array, axis=0) 60 | return torch.from_numpy(array.copy()).float() 61 | 62 | 63 | def read_image_as_byte(filename): 64 | return io.imread(filename) 65 | 66 | 67 | def read_png_flow(flow_file): 68 | flow_object = png.Reader(filename=flow_file) 69 | flow_direct = flow_object.asDirect() 70 | flow_data = list(flow_direct[2]) 71 | (w, h) = flow_direct[3]['size'] 72 | flow = np.zeros((h, w, 3), dtype=np.float64) 73 | for i in range(len(flow_data)): 74 | flow[i, :, 0] = flow_data[i][0::3] 75 | flow[i, :, 1] = flow_data[i][1::3] 76 | flow[i, :, 2] = flow_data[i][2::3] 77 | 78 | invalid_idx = (flow[:, :, 2] == 0) 79 | flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0 80 | flow[invalid_idx, 0] = 0 81 | flow[invalid_idx, 1] = 0 82 | return flow[:, :, 0:2], (1 - invalid_idx * 1)[:, :, None] 83 | 84 | 85 | def read_png_disp(disp_file): 86 | disp_np = io.imread(disp_file).astype(np.uint16) / 256.0 87 | disp_np = np.expand_dims(disp_np, axis=2) 88 | mask_disp = (disp_np > 0).astype(np.float64) 89 | return disp_np, mask_disp 90 | 91 | 92 | def read_raw_calib_file(filepath): 93 | # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py 94 | """Read in a calibration file and parse into a dictionary.""" 95 | data = {} 96 | 97 | with open(filepath, 'r') as f: 98 | for line in f.readlines(): 99 | key, value = line.split(':', 1) 100 | # The only non-float values in these files are dates, which 101 | # we don't care about anyway 102 | try: 103 | data[key] = np.array([float(x) for x in value.split()]) 104 | except ValueError: 105 | pass 106 | return data 107 | 108 | 109 | def read_calib_into_dict(path_dir): 110 | 111 | calibration_file_list = ['2011_09_26', '2011_09_28', '2011_09_29', '2011_09_30', '2011_10_03'] 112 | intrinsic_dict_l = {} 113 | intrinsic_dict_r = {} 114 | 115 | for ii, date in enumerate(calibration_file_list): 116 | file_name = "cam_intrinsics/calib_cam_to_cam_" + date + '.txt' 117 | file_name_full = os.path.join(path_dir, file_name) 118 | file_data = read_raw_calib_file(file_name_full) 119 | P_rect_02 = np.reshape(file_data['P_rect_02'], (3, 4)) 120 | P_rect_03 = np.reshape(file_data['P_rect_03'], (3, 4)) 121 | intrinsic_dict_l[date] = P_rect_02[:3, :3] 122 | intrinsic_dict_r[date] = P_rect_03[:3, :3] 123 | 124 | return intrinsic_dict_l, intrinsic_dict_r -------------------------------------------------------------------------------- /datasets/custom_batchsampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.sampler import Sampler 2 | 3 | class CustomBatchSampler(Sampler): 4 | 5 | def __init__(self, sampler): 6 | for samp in sampler: 7 | if not isinstance(samp, Sampler): 8 | raise ValueError("sampler should be an instance of " 9 | "torch.utils.data.Sampler, but got sampler={}" 10 | .format(samp)) 11 | self.samplers = sampler 12 | self.n_samples = [len(samp) for samp in self.samplers] 13 | self.sample_cnt = [0 for samp in self.samplers] 14 | self.iters = [iter(samp) for samp in self.samplers] 15 | 16 | self.batch_size = [1, 3] 17 | 18 | def __iter__(self): 19 | 20 | for ii in range(len(self)): 21 | 22 | for ss, samp in enumerate(self.samplers): 23 | self.sample_cnt[ss] += self.batch_size[ss] 24 | if self.sample_cnt[ss] > self.n_samples[ss]: 25 | self.iters[ss] = iter(samp) 26 | self.sample_cnt[ss] = self.batch_size[ss] 27 | 28 | batch = [] 29 | 30 | ## for each sampler 31 | for ss in range(len(self.samplers)): 32 | if ss is 0: 33 | prev_idx = 0 34 | else: 35 | prev_idx = self.n_samples[ss-1] 36 | 37 | for bb in range(self.batch_size[ss]): 38 | batch.append(next(self.iters[ss]) + prev_idx) 39 | 40 | yield batch 41 | 42 | def __len__(self): 43 | return len(self.samplers[0]) 44 | -------------------------------------------------------------------------------- /datasets/index_generator/kitti_lidar_to_depth/godard_evaluation_kitti.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from glob import glob 4 | from godard_evaluation_utils import * 5 | 6 | 7 | data_path = '/fastdata/jhur/KITTI_raw/' 8 | all_images = glob(data_path + '*/*/image_02/data/*.jpg') 9 | 10 | for ii in range(len(all_images)): 11 | all_images[ii] = all_images[ii].replace(data_path, "") 12 | 13 | # num_samples = 697 14 | # test_files = read_text_lines('test_files_eigen.txt') 15 | gt_files, gt_calib, im_sizes, im_files, cams = read_file_data(all_images, data_path) 16 | 17 | # num_test = len(im_files) 18 | num_samples = len(gt_files) 19 | print(num_samples) 20 | # for t_id in range(0, 2): 21 | for t_id in range(num_samples): 22 | print(t_id) 23 | camera_id = cams[t_id] # 2 is left, 3 is right 24 | depth = generate_depth_map(gt_calib[t_id], gt_files[t_id], im_sizes[t_id], camera_id, False, True) 25 | # need to convert from disparity to depth 26 | focal_length, baseline = get_focal_length_baseline(gt_calib[t_id], camera_id) 27 | 28 | npy_file_name = gt_files[t_id].replace("KITTI_raw", "KITTI_raw_depth").replace(".bin", ".npy").replace("velodyne_points", "projected_depth") 29 | npy_file_dir = os.path.dirname(npy_file_name) 30 | if not os.path.exists(npy_file_dir): 31 | os.makedirs(npy_file_dir) 32 | 33 | np.save(npy_file_name, depth) 34 | -------------------------------------------------------------------------------- /datasets/index_generator/provided/train_mapping.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 2011_09_26 2011_09_26_drive_0005_sync 0000000010 4 | 2011_09_26 2011_09_26_drive_0005_sync 0000000059 5 | 6 | 7 | 8 | 2011_09_26 2011_09_26_drive_0009_sync 0000000354 9 | 2011_09_26 2011_09_26_drive_0009_sync 0000000364 10 | 2011_09_26 2011_09_26_drive_0009_sync 0000000374 11 | 2011_09_26 2011_09_26_drive_0009_sync 0000000384 12 | 2011_09_26 2011_09_26_drive_0009_sync 0000000394 13 | 2011_09_26 2011_09_26_drive_0009_sync 0000000414 14 | 2011_09_26 2011_09_26_drive_0011_sync 0000000111 15 | 2011_09_26 2011_09_26_drive_0011_sync 0000000127 16 | 2011_09_26 2011_09_26_drive_0011_sync 0000000147 17 | 2011_09_26 2011_09_26_drive_0011_sync 0000000157 18 | 2011_09_26 2011_09_26_drive_0011_sync 0000000167 19 | 2011_09_26 2011_09_26_drive_0013_sync 0000000010 20 | 2011_09_26 2011_09_26_drive_0013_sync 0000000020 21 | 2011_09_26 2011_09_26_drive_0013_sync 0000000040 22 | 2011_09_26 2011_09_26_drive_0013_sync 0000000070 23 | 2011_09_26 2011_09_26_drive_0014_sync 0000000010 24 | 2011_09_26 2011_09_26_drive_0014_sync 0000000020 25 | 2011_09_26 2011_09_26_drive_0014_sync 0000000030 26 | 2011_09_26 2011_09_26_drive_0014_sync 0000000050 27 | 2011_09_26 2011_09_26_drive_0014_sync 0000000060 28 | 2011_09_26 2011_09_26_drive_0014_sync 0000000129 29 | 2011_09_26 2011_09_26_drive_0014_sync 0000000141 30 | 2011_09_26 2011_09_26_drive_0014_sync 0000000152 31 | 2011_09_26 2011_09_26_drive_0014_sync 0000000172 32 | 2011_09_26 2011_09_26_drive_0014_sync 0000000192 33 | 2011_09_26 2011_09_26_drive_0014_sync 0000000213 34 | 2011_09_26 2011_09_26_drive_0014_sync 0000000240 35 | 2011_09_26 2011_09_26_drive_0015_sync 0000000187 36 | 2011_09_26 2011_09_26_drive_0015_sync 0000000197 37 | 2011_09_26 2011_09_26_drive_0015_sync 0000000209 38 | 2011_09_26 2011_09_26_drive_0015_sync 0000000219 39 | 2011_09_26 2011_09_26_drive_0015_sync 0000000229 40 | 2011_09_26 2011_09_26_drive_0015_sync 0000000239 41 | 2011_09_26 2011_09_26_drive_0015_sync 0000000264 42 | 2011_09_26 2011_09_26_drive_0015_sync 0000000273 43 | 2011_09_26 2011_09_26_drive_0015_sync 0000000286 44 | 2011_09_26 2011_09_26_drive_0017_sync 0000000010 45 | 2011_09_26 2011_09_26_drive_0017_sync 0000000030 46 | 2011_09_26 2011_09_26_drive_0017_sync 0000000040 47 | 2011_09_26 2011_09_26_drive_0017_sync 0000000050 48 | 2011_09_26 2011_09_26_drive_0018_sync 0000000046 49 | 2011_09_26 2011_09_26_drive_0018_sync 0000000066 50 | 2011_09_26 2011_09_26_drive_0018_sync 0000000076 51 | 2011_09_26 2011_09_26_drive_0018_sync 0000000086 52 | 2011_09_26 2011_09_26_drive_0018_sync 0000000096 53 | 2011_09_26 2011_09_26_drive_0018_sync 0000000106 54 | 2011_09_26 2011_09_26_drive_0018_sync 0000000133 55 | 2011_09_26 2011_09_26_drive_0019_sync 0000000030 56 | 2011_09_26 2011_09_26_drive_0019_sync 0000000087 57 | 2011_09_26 2011_09_26_drive_0019_sync 0000000097 58 | 2011_09_26 2011_09_26_drive_0022_sync 0000000634 59 | 2011_09_26 2011_09_26_drive_0022_sync 0000000644 60 | 2011_09_26 2011_09_26_drive_0022_sync 0000000654 61 | 2011_09_26 2011_09_26_drive_0027_sync 0000000053 62 | 2011_09_26 2011_09_26_drive_0027_sync 0000000103 63 | 2011_09_26 2011_09_26_drive_0028_sync 0000000071 64 | 2011_09_26 2011_09_26_drive_0028_sync 0000000118 65 | 2011_09_26 2011_09_26_drive_0028_sync 0000000228 66 | 2011_09_26 2011_09_26_drive_0028_sync 0000000269 67 | 2011_09_26 2011_09_26_drive_0028_sync 0000000284 68 | 2011_09_26 2011_09_26_drive_0028_sync 0000000303 69 | 2011_09_26 2011_09_26_drive_0028_sync 0000000313 70 | 2011_09_26 2011_09_26_drive_0028_sync 0000000378 71 | 2011_09_26 2011_09_26_drive_0029_sync 0000000016 72 | 2011_09_26 2011_09_26_drive_0029_sync 0000000123 73 | 2011_09_26 2011_09_26_drive_0032_sync 0000000095 74 | 2011_09_26 2011_09_26_drive_0032_sync 0000000114 75 | 2011_09_26 2011_09_26_drive_0032_sync 0000000125 76 | 2011_09_26 2011_09_26_drive_0032_sync 0000000207 77 | 2011_09_26 2011_09_26_drive_0032_sync 0000000218 78 | 2011_09_26 2011_09_26_drive_0032_sync 0000000330 79 | 2011_09_26 2011_09_26_drive_0032_sync 0000000340 80 | 2011_09_26 2011_09_26_drive_0032_sync 0000000350 81 | 2011_09_26 2011_09_26_drive_0032_sync 0000000360 82 | 2011_09_26 2011_09_26_drive_0032_sync 0000000378 83 | 84 | 2011_09_26 2011_09_26_drive_0036_sync 0000000054 85 | 2011_09_26 2011_09_26_drive_0036_sync 0000000402 86 | 2011_09_26 2011_09_26_drive_0046_sync 0000000052 87 | 2011_09_26 2011_09_26_drive_0046_sync 0000000062 88 | 89 | 2011_09_26 2011_09_26_drive_0051_sync 0000000023 90 | 2011_09_26 2011_09_26_drive_0051_sync 0000000218 91 | 2011_09_26 2011_09_26_drive_0051_sync 0000000230 92 | 2011_09_26 2011_09_26_drive_0051_sync 0000000282 93 | 2011_09_26 2011_09_26_drive_0051_sync 0000000292 94 | 2011_09_26 2011_09_26_drive_0051_sync 0000000302 95 | 2011_09_26 2011_09_26_drive_0051_sync 0000000312 96 | 2011_09_26 2011_09_26_drive_0051_sync 0000000322 97 | 2011_09_26 2011_09_26_drive_0051_sync 0000000342 98 | 2011_09_26 2011_09_26_drive_0051_sync 0000000356 99 | 2011_09_26 2011_09_26_drive_0051_sync 0000000379 100 | 101 | 102 | 103 | 104 | 105 | 106 | 2011_09_26 2011_09_26_drive_0056_sync 0000000010 107 | 2011_09_26 2011_09_26_drive_0056_sync 0000000082 108 | 2011_09_26 2011_09_26_drive_0056_sync 0000000122 109 | 2011_09_26 2011_09_26_drive_0056_sync 0000000132 110 | 2011_09_26 2011_09_26_drive_0056_sync 0000000191 111 | 2011_09_26 2011_09_26_drive_0056_sync 0000000201 112 | 2011_09_26 2011_09_26_drive_0056_sync 0000000282 113 | 2011_09_26 2011_09_26_drive_0057_sync 0000000125 114 | 2011_09_26 2011_09_26_drive_0057_sync 0000000140 115 | 2011_09_26 2011_09_26_drive_0057_sync 0000000176 116 | 2011_09_26 2011_09_26_drive_0057_sync 0000000299 117 | 2011_09_26 2011_09_26_drive_0057_sync 0000000319 118 | 2011_09_26 2011_09_26_drive_0057_sync 0000000339 119 | 2011_09_26 2011_09_26_drive_0059_sync 0000000026 120 | 2011_09_26 2011_09_26_drive_0059_sync 0000000046 121 | 2011_09_26 2011_09_26_drive_0059_sync 0000000137 122 | 2011_09_26 2011_09_26_drive_0059_sync 0000000150 123 | 2011_09_26 2011_09_26_drive_0059_sync 0000000260 124 | 2011_09_26 2011_09_26_drive_0059_sync 0000000280 125 | 2011_09_26 2011_09_26_drive_0059_sync 0000000290 126 | 2011_09_26 2011_09_26_drive_0059_sync 0000000300 127 | 2011_09_26 2011_09_26_drive_0059_sync 0000000310 128 | 2011_09_26 2011_09_26_drive_0059_sync 0000000320 129 | 2011_09_26 2011_09_26_drive_0070_sync 0000000069 130 | 2011_09_26 2011_09_26_drive_0070_sync 0000000224 131 | 2011_09_26 2011_09_26_drive_0084_sync 0000000084 132 | 2011_09_26 2011_09_26_drive_0084_sync 0000000179 133 | 2011_09_26 2011_09_26_drive_0084_sync 0000000238 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 2011_09_26 2011_09_26_drive_0096_sync 0000000020 143 | 2011_09_26 2011_09_26_drive_0096_sync 0000000278 144 | 2011_09_26 2011_09_26_drive_0096_sync 0000000381 145 | 2011_09_26 2011_09_26_drive_0101_sync 0000000109 146 | 2011_09_26 2011_09_26_drive_0101_sync 0000000175 147 | 2011_09_26 2011_09_26_drive_0101_sync 0000000447 148 | 2011_09_26 2011_09_26_drive_0101_sync 0000000457 149 | 2011_09_26 2011_09_26_drive_0101_sync 0000000809 150 | 2011_09_26 2011_09_26_drive_0104_sync 0000000015 151 | 2011_09_26 2011_09_26_drive_0104_sync 0000000035 152 | 153 | 154 | 155 | 156 | 2011_09_28 2011_09_28_drive_0002_sync 0000000343 157 | 158 | 2011_09_29 2011_09_29_drive_0004_sync 0000000036 159 | 2011_09_29 2011_09_29_drive_0004_sync 0000000079 160 | 2011_09_29 2011_09_29_drive_0004_sync 0000000094 161 | 2011_09_29 2011_09_29_drive_0004_sync 0000000105 162 | 2011_09_29 2011_09_29_drive_0004_sync 0000000162 163 | 2011_09_29 2011_09_29_drive_0004_sync 0000000258 164 | 2011_09_29 2011_09_29_drive_0004_sync 0000000285 165 | 2011_09_29 2011_09_29_drive_0004_sync 0000000308 166 | 167 | 168 | 169 | 2011_09_29 2011_09_29_drive_0071_sync 0000000059 170 | 2011_09_29 2011_09_29_drive_0071_sync 0000000943 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 2011_10_03 2011_10_03_drive_0047_sync 0000000556 201 | -------------------------------------------------------------------------------- /datasets/kitti_2015_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os.path 4 | import torch 5 | import torch.utils.data as data 6 | import numpy as np 7 | 8 | from torchvision import transforms as vision_transforms 9 | from .common import read_image_as_byte, read_calib_into_dict, get_date_from_width 10 | 11 | 12 | 13 | class KITTI_2015_Test(data.Dataset): 14 | def __init__(self, 15 | args, 16 | root): 17 | 18 | self._args = args 19 | 20 | images_l_root = os.path.join(root, "data_scene_flow", "testing", "image_2_jpg") 21 | images_r_root = os.path.join(root, "data_scene_flow", "testing", "image_3_jpg") 22 | 23 | ## loading image ----------------------------------- 24 | if not os.path.isdir(images_l_root): 25 | raise ValueError("Image directory %s not found!", images_l_root) 26 | if not os.path.isdir(images_r_root): 27 | raise ValueError("Image directory %s not found!", images_r_root) 28 | 29 | # ---------------------------------------------------------- 30 | # Construct list of indices for training/validation 31 | # ---------------------------------------------------------- 32 | num_images = 200 33 | list_of_indices = range(num_images) 34 | 35 | # ---------------------------------------------------------- 36 | # Save list of actual filenames for inputs and disp/flow 37 | # ---------------------------------------------------------- 38 | path_dir = os.path.dirname(os.path.realpath(__file__)) 39 | self._image_list = [] 40 | self._flow_list = [] 41 | self._disp_list = [] 42 | img_ext = '.jpg' 43 | 44 | for ii in list_of_indices: 45 | 46 | file_idx = '%.6d' % ii 47 | 48 | im_l1 = os.path.join(images_l_root, file_idx + "_10" + img_ext) 49 | im_l2 = os.path.join(images_l_root, file_idx + "_11" + img_ext) 50 | im_r1 = os.path.join(images_r_root, file_idx + "_10" + img_ext) 51 | im_r2 = os.path.join(images_r_root, file_idx + "_11" + img_ext) 52 | 53 | 54 | file_list = [im_l1, im_l2, im_r1, im_r2] 55 | for _, item in enumerate(file_list): 56 | if not os.path.isfile(item): 57 | raise ValueError("File not exist: %s", item) 58 | 59 | self._image_list.append([im_l1, im_l2, im_r1, im_r2]) 60 | 61 | self._size = len(self._image_list) 62 | assert len(self._image_list) != 0 63 | 64 | ## loading calibration matrix 65 | self.intrinsic_dict_l = {} 66 | self.intrinsic_dict_r = {} 67 | self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir) 68 | 69 | self._to_tensor = vision_transforms.Compose([ 70 | vision_transforms.ToPILImage(), 71 | vision_transforms.transforms.ToTensor() 72 | ]) 73 | 74 | 75 | def __getitem__(self, index): 76 | 77 | index = index % self._size 78 | im_l1_filename = self._image_list[index][0] 79 | im_l2_filename = self._image_list[index][1] 80 | im_r1_filename = self._image_list[index][2] 81 | im_r2_filename = self._image_list[index][3] 82 | 83 | # read float32 images and flow 84 | im_l1_np = read_image_as_byte(im_l1_filename) 85 | im_l2_np = read_image_as_byte(im_l2_filename) 86 | im_r1_np = read_image_as_byte(im_r1_filename) 87 | im_r2_np = read_image_as_byte(im_r2_filename) 88 | 89 | # example filename 90 | basename = os.path.basename(im_l1_filename)[:6] 91 | 92 | # find intrinsic 93 | k_l1 = torch.from_numpy(self.intrinsic_dict_l[get_date_from_width(im_l1_np.shape[1])]).float() 94 | k_r1 = torch.from_numpy(self.intrinsic_dict_r[get_date_from_width(im_r1_np.shape[1])]).float() 95 | 96 | im_l1 = self._to_tensor(im_l1_np) 97 | im_l2 = self._to_tensor(im_l2_np) 98 | im_r1 = self._to_tensor(im_r1_np) 99 | im_r2 = self._to_tensor(im_r2_np) 100 | 101 | # input size 102 | h_orig, w_orig, _ = im_l1_np.shape 103 | input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float() 104 | 105 | example_dict = { 106 | "input_l1": im_l1, 107 | "input_l2": im_l2, 108 | "input_r1": im_r1, 109 | "input_r2": im_r2, 110 | "index": index, 111 | "basename": basename, 112 | "input_k_l1": k_l1, 113 | "input_k_l2": k_l1, 114 | "input_k_r1": k_r1, 115 | "input_k_r2": k_r1, 116 | "input_size": input_im_size 117 | } 118 | 119 | return example_dict 120 | 121 | def __len__(self): 122 | return self._size 123 | 124 | -------------------------------------------------------------------------------- /datasets/kitti_comb_mnsf.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os.path 4 | import torch 5 | import torch.utils.data as data 6 | import numpy as np 7 | 8 | from torchvision import transforms as vision_transforms 9 | from .common import read_image_as_byte 10 | from .common import kitti_crop_image_list, kitti_adjust_intrinsic 11 | 12 | ## Combining datasets 13 | from .kitti_2015_train import KITTI_2015_MonoSceneFlow 14 | from .kitti_raw_monosf import KITTI_Raw 15 | from torch.utils.data.dataset import ConcatDataset 16 | 17 | 18 | 19 | class KITTI_Raw_for_Finetune(KITTI_Raw): 20 | def __init__(self, 21 | args, 22 | root, 23 | flip_augmentations=True, 24 | preprocessing_crop=True, 25 | crop_size=[370, 1224], 26 | num_examples=-1, 27 | index_file=""): 28 | super(KITTI_Raw_for_Finetune, self).__init__( 29 | args, 30 | images_root=root, 31 | flip_augmentations=flip_augmentations, 32 | preprocessing_crop=preprocessing_crop, 33 | crop_size=crop_size, 34 | num_examples=num_examples, 35 | index_file=index_file) 36 | 37 | def __getitem__(self, index): 38 | index = index % self._size 39 | 40 | # read images and flow 41 | img_list_np = [read_image_as_byte(img) for img in self._image_list[index]] 42 | 43 | # example filename 44 | im_l1_filename = self._image_list[index][0] 45 | basename = os.path.basename(im_l1_filename)[:6] 46 | dirname = os.path.dirname(im_l1_filename)[-51:] 47 | datename = dirname[:10] 48 | k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float() 49 | k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float() 50 | 51 | # input size 52 | h_orig, w_orig, _ = img_list_np[0].shape 53 | input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float() 54 | 55 | # cropping 56 | if self._preprocessing_crop: 57 | # get starting positions 58 | crop_height = self._crop_size[0] 59 | crop_width = self._crop_size[1] 60 | x = np.random.uniform(0, w_orig - crop_width + 1) 61 | y = np.random.uniform(0, h_orig - crop_height + 1) 62 | crop_info = [int(x), int(y), int(x + crop_width), int(y + crop_height)] 63 | 64 | # cropping images and adjust intrinsic accordingly 65 | img_list_np = kitti_crop_image_list(img_list_np, crop_info) 66 | k_l1, k_r1 = kitti_adjust_intrinsic(k_l1, k_r1, crop_info) 67 | 68 | # to tensors 69 | img_list_tensor = [self._to_tensor(img) for img in img_list_np] 70 | im_l1 = img_list_tensor[0] 71 | im_l2 = img_list_tensor[1] 72 | im_r1 = img_list_tensor[2] 73 | im_r2 = img_list_tensor[3] 74 | 75 | void_tensor1 = im_l1[0:1, :, :] * 0 76 | void_tensor2 = im_l1[0:2, :, :] * 0 77 | 78 | common_dict = { 79 | "index": index, 80 | "basename": basename, 81 | "datename": datename, 82 | "input_size": input_im_size, 83 | "target_flow": void_tensor2, 84 | "target_flow_mask": void_tensor1, 85 | "target_flow_noc": void_tensor2, 86 | "target_flow_mask_noc": void_tensor1, 87 | "target_disp": void_tensor1, 88 | "target_disp_mask": void_tensor1, 89 | "target_disp2_occ": void_tensor1, 90 | "target_disp2_mask_occ": void_tensor1, 91 | "target_disp_noc": void_tensor1, 92 | "target_disp_mask_noc": void_tensor1, 93 | "target_disp2_noc": void_tensor1, 94 | "target_disp2_mask_noc": void_tensor1 95 | } 96 | 97 | # random flip 98 | if self._flip_augmentations is True and torch.rand(1) > 0.5: 99 | _, _, ww = im_l1.size() 100 | im_l1_flip = torch.flip(im_l1, dims=[2]) 101 | im_l2_flip = torch.flip(im_l2, dims=[2]) 102 | im_r1_flip = torch.flip(im_r1, dims=[2]) 103 | im_r2_flip = torch.flip(im_r2, dims=[2]) 104 | 105 | k_l1[0, 2] = ww - k_l1[0, 2] 106 | k_r1[0, 2] = ww - k_r1[0, 2] 107 | 108 | example_dict = { 109 | "input_l1": im_r1_flip, 110 | "input_r1": im_l1_flip, 111 | "input_l2": im_r2_flip, 112 | "input_r2": im_l2_flip, 113 | "input_k_l1": k_r1, 114 | "input_k_r1": k_l1, 115 | "input_k_l2": k_r1, 116 | "input_k_r2": k_l1, 117 | } 118 | example_dict.update(common_dict) 119 | 120 | else: 121 | example_dict = { 122 | "input_l1": im_l1, 123 | "input_r1": im_r1, 124 | "input_l2": im_l2, 125 | "input_r2": im_r2, 126 | "input_k_l1": k_l1, 127 | "input_k_r1": k_r1, 128 | "input_k_l2": k_l1, 129 | "input_k_r2": k_r1, 130 | } 131 | example_dict.update(common_dict) 132 | 133 | return example_dict 134 | 135 | 136 | class KITTI_Comb_Train(ConcatDataset): 137 | def __init__(self, args, root): 138 | 139 | self.dataset1 = KITTI_2015_MonoSceneFlow( 140 | args, 141 | root + '/KITTI_flow/', 142 | preprocessing_crop=True, 143 | crop_size=[370, 1224], 144 | dstype="train") 145 | 146 | self.dataset2 = KITTI_Raw_for_Finetune( 147 | args, 148 | root + '/KITTI_raw_noPCL/', 149 | flip_augmentations=True, 150 | preprocessing_crop=True, 151 | crop_size=[370, 1224], 152 | num_examples=-1, 153 | index_file='index_txt/kitti_full.txt') 154 | 155 | super(KITTI_Comb_Train, self).__init__( 156 | datasets=[self.dataset1, self.dataset2]) 157 | 158 | 159 | class KITTI_Comb_Val(KITTI_2015_MonoSceneFlow): 160 | def __init__(self, 161 | args, 162 | root, 163 | preprocessing_crop=False, 164 | crop_size=[370, 1224]): 165 | super(KITTI_Comb_Val, self).__init__( 166 | args, 167 | data_root=root + '/KITTI_flow/', 168 | preprocessing_crop=preprocessing_crop, 169 | crop_size=crop_size, 170 | dstype="valid") 171 | 172 | 173 | 174 | class KITTI_Comb_Full(ConcatDataset): 175 | def __init__(self, args, root): 176 | 177 | self.dataset1 = KITTI_2015_MonoSceneFlow( 178 | args, 179 | root + '/KITTI_flow/', 180 | preprocessing_crop=True, 181 | crop_size=[370, 1224], 182 | dstype="full") 183 | 184 | self.dataset2 = KITTI_Raw_for_Finetune( 185 | args, 186 | root + '/KITTI_raw_noPCL/', 187 | flip_augmentations=True, 188 | preprocessing_crop=True, 189 | crop_size=[370, 1224], 190 | num_examples=-1, 191 | index_file='index_txt/kitti_raw_all_imgs.txt') 192 | 193 | super(KITTI_Comb_Full, self).__init__( 194 | datasets=[self.dataset1, self.dataset2]) 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /datasets/kitti_eigen_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os.path 4 | import torch 5 | import torch.utils.data as data 6 | import numpy as np 7 | 8 | from torchvision import transforms as vision_transforms 9 | from .common import read_image_as_byte, read_calib_into_dict 10 | 11 | 12 | 13 | class KITTI_Eigen_Test(data.Dataset): 14 | def __init__(self, 15 | args, 16 | root, 17 | num_examples=-1): 18 | 19 | self._args = args 20 | 21 | index_file = "index_txt/eigen_text.txt" 22 | 23 | 24 | path_dir = os.path.dirname(os.path.realpath(__file__)) 25 | path_index_file = os.path.join(path_dir, index_file) 26 | 27 | if not os.path.exists(path_index_file): 28 | raise ValueError("Index File '%s' not found!", path_index_file) 29 | index_file = open(path_index_file, 'r') 30 | 31 | ## loading image ----------------------------------- 32 | if not os.path.isdir(root): 33 | raise ValueError("Image directory '%s' not found!", root) 34 | 35 | filename_list = [line.rstrip().split(' ') for line in index_file.readlines()] 36 | self._image_list = [] 37 | 38 | view1 = 'image_02/data' 39 | view2 = 'image_03/data' 40 | ext = '.jpg' 41 | for item in filename_list: 42 | 43 | name_l1 = root + '/' + item[0] 44 | name_depth = (root + '/' + item[0]).replace("jpg", "npy").replace("image_02", "projected_depth") 45 | idx_src = item[0].split('/')[4].split('.')[0] 46 | idx_tgt = '%.10d' % (int(idx_src) + 1) 47 | name_l2 = name_l1.replace(idx_src, idx_tgt) 48 | if not os.path.isfile(name_l2): 49 | idx_prev = '%.10d' % (int(idx_src) - 1) 50 | name_l2 = name_l1.replace(idx_src, idx_prev) 51 | 52 | if os.path.isfile(name_l1) and os.path.isfile(name_l2) and os.path.isfile(name_depth): 53 | self._image_list.append([name_l1, name_l2, name_depth]) 54 | 55 | if num_examples > 0: 56 | self._image_list = self._image_list[:num_examples] 57 | 58 | self._size = len(self._image_list) 59 | 60 | ## loading calibration matrix 61 | self.intrinsic_dict_l = {} 62 | self.intrinsic_dict_r = {} 63 | self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir) 64 | 65 | self._to_tensor = vision_transforms.Compose([ 66 | vision_transforms.ToPILImage(), 67 | vision_transforms.transforms.ToTensor() 68 | ]) 69 | 70 | def __getitem__(self, index): 71 | index = index % self._size 72 | 73 | im_l1_filename = self._image_list[index][0] 74 | im_l2_filename = self._image_list[index][1] 75 | depth_filename = self._image_list[index][2] 76 | 77 | # read images and flow 78 | im_l1_np = read_image_as_byte(im_l1_filename) 79 | im_l2_np = read_image_as_byte(im_l2_filename) 80 | im_l1_depth_np = np.load(depth_filename) 81 | 82 | # example filename 83 | basename = os.path.dirname(im_l1_filename).split('/')[-3] + '_' + os.path.basename(im_l1_filename).split('.')[0] 84 | dirname = os.path.dirname(im_l1_filename)[-51:] 85 | datename = dirname[:10] 86 | 87 | k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float() 88 | k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float() 89 | 90 | im_l1 = self._to_tensor(im_l1_np) 91 | im_l2 = self._to_tensor(im_l2_np) 92 | im_l1_depth = torch.from_numpy(im_l1_depth_np).unsqueeze(0).float() 93 | 94 | # input size 95 | h_orig, w_orig, _ = im_l1_np.shape 96 | input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float() 97 | 98 | 99 | example_dict = { 100 | "input_l1": im_l1, 101 | "input_l2": im_l2, 102 | "index": index, 103 | "basename": basename, 104 | "datename": datename, 105 | "input_k_l1": k_l1, 106 | "input_k_l2": k_l1, 107 | "input_size": input_im_size, 108 | "target_depth": im_l1_depth 109 | } 110 | 111 | return example_dict 112 | 113 | def __len__(self): 114 | return self._size 115 | -------------------------------------------------------------------------------- /datasets/kitti_raw_monodepth.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os.path 4 | import torch 5 | import torch.utils.data as data 6 | import numpy as np 7 | 8 | from torchvision import transforms as vision_transforms 9 | from .common import read_image_as_byte, read_calib_into_dict 10 | from .common import kitti_crop_image_list, kitti_adjust_intrinsic 11 | from .common import intrinsic_scale 12 | 13 | 14 | class KITTI_Raw(data.Dataset): 15 | def __init__(self, 16 | args, 17 | images_root=None, 18 | preprocessing_crop=False, 19 | crop_size=[370, 1224], 20 | num_examples=-1, 21 | index_file=None): 22 | 23 | self._args = args 24 | self._seq_len = 1 25 | self._preprocessing_crop = preprocessing_crop 26 | self._crop_size = crop_size 27 | 28 | path_dir = os.path.dirname(os.path.realpath(__file__)) 29 | path_index_file = os.path.join(path_dir, index_file) 30 | 31 | if not os.path.exists(path_index_file): 32 | raise ValueError("Index File '%s' not found!", path_index_file) 33 | index_file = open(path_index_file, 'r') 34 | 35 | ## loading image ----------------------------------- 36 | if not os.path.isdir(images_root): 37 | raise ValueError("Image directory '%s' not found!") 38 | 39 | filename_list = [line.rstrip().split(' ') for line in index_file.readlines()] 40 | self._image_list = [] 41 | view1 = 'image_02/data' 42 | view2 = 'image_03/data' 43 | ext = '.jpg' 44 | for item in filename_list: 45 | date = item[0][:10] 46 | scene = item[0] 47 | idx_src = item[1] 48 | for ii in range(self._seq_len): 49 | idx_tgt = '%.10d' % (int(idx_src) + ii + 1) 50 | name_l1 = os.path.join(images_root, date, scene, view1, idx_src) + ext 51 | name_r1 = os.path.join(images_root, date, scene, view2, idx_src) + ext 52 | if os.path.isfile(name_l1) and os.path.isfile(name_r1): 53 | self._image_list.append([name_l1, name_r1]) 54 | 55 | if num_examples > 0: 56 | self._image_list = self._image_list[:num_examples] 57 | 58 | self._size = len(self._image_list) 59 | 60 | ## loading calibration matrix 61 | self.intrinsic_dict_l = {} 62 | self.intrinsic_dict_r = {} 63 | self.intrinsic_dict_l, self.intrinsic_dict_r = read_calib_into_dict(path_dir) 64 | 65 | # ---------------------------------------------------------- 66 | # Image resize only 67 | # ---------------------------------------------------------- 68 | self._resize_to_tensor = vision_transforms.Compose([ 69 | vision_transforms.ToPILImage(), 70 | vision_transforms.Resize((256, 512)), 71 | vision_transforms.transforms.ToTensor() 72 | ]) 73 | self._to_tensor = vision_transforms.Compose([ 74 | vision_transforms.transforms.ToTensor() 75 | ]) 76 | 77 | def __getitem__(self, index): 78 | index = index % self._size 79 | 80 | im_l1_filename = self._image_list[index][0] 81 | im_r1_filename = self._image_list[index][1] 82 | 83 | # read float32 images and flow 84 | im_l1_np = read_image_as_byte(im_l1_filename) 85 | im_r1_np = read_image_as_byte(im_r1_filename) 86 | 87 | # example filename 88 | basename = os.path.basename(im_l1_filename)[:6] 89 | dirname = os.path.dirname(im_l1_filename)[-51:] 90 | datename = dirname[:10] 91 | k_l1 = torch.from_numpy(self.intrinsic_dict_l[datename]).float() 92 | k_r1 = torch.from_numpy(self.intrinsic_dict_r[datename]).float() 93 | k_l1_orig = k_l1.clone() 94 | 95 | h_orig, w_orig, _ = im_l1_np.shape 96 | input_im_size = torch.from_numpy(np.array([h_orig, w_orig])).float() 97 | 98 | # resizing image 99 | if self._preprocessing_crop == False: 100 | # No Geometric Augmentation, Resizing to 256 x 512 here 101 | # resizing input images 102 | im_l1 = self._resize_to_tensor(im_l1_np) 103 | im_r1 = self._resize_to_tensor(im_r1_np) 104 | # resizing intrinsic matrix 105 | k_l1 = intrinsic_scale(k_l1, im_l1.size(1) / h_orig, im_l1.size(2) / w_orig) 106 | k_r1 = intrinsic_scale(k_r1, im_r1.size(1) / h_orig, im_r1.size(2) / w_orig) 107 | else: 108 | # For Geometric Augmentation, first croping the images to 370 x 1224 here, 109 | # then do the augmentation in augmentation.py 110 | # get starting positions 111 | crop_height = self._crop_size[0] 112 | crop_width = self._crop_size[1] 113 | x = np.random.uniform(0, w_orig - crop_width + 1) 114 | y = np.random.uniform(0, h_orig - crop_height + 1) 115 | crop_info = [int(x), int(y), int(x + crop_width), int(y + crop_height)] 116 | 117 | # cropping images and adjust intrinsic accordingly 118 | im_l1_np, im_r1_np = kitti_crop_image_list([im_l1_np, im_r1_np], crop_info) 119 | im_l1 = self._to_tensor(im_l1_np) 120 | im_r1 = self._to_tensor(im_r1_np) 121 | k_l1, k_r1 = kitti_adjust_intrinsic(k_l1, k_r1, crop_info) 122 | 123 | # For CamCOnv 124 | k_r1_flip = k_r1.clone() 125 | k_r1_flip[0, 2] = im_r1.size(2) - k_r1_flip[0, 2] 126 | 127 | example_dict = { 128 | "input_l1": im_l1, 129 | "input_r1": im_r1, 130 | "index": index, 131 | "basename": basename, 132 | "datename": datename, 133 | "input_k_l1_orig": k_l1_orig, 134 | "input_k_l1": k_l1, 135 | "input_k_r1": k_r1, 136 | "input_k_r1_flip": k_r1_flip, 137 | "input_size": input_im_size 138 | } 139 | 140 | return example_dict 141 | 142 | def __len__(self): 143 | return self._size 144 | 145 | 146 | class KITTI_Raw_KittiSplit_Train(KITTI_Raw): 147 | def __init__(self, 148 | args, 149 | root, 150 | preprocessing_crop=False, 151 | crop_size=[370, 1224], 152 | num_examples=-1): 153 | super(KITTI_Raw_KittiSplit_Train, self).__init__( 154 | args, 155 | images_root=root, 156 | preprocessing_crop=preprocessing_crop, 157 | crop_size=crop_size, 158 | num_examples=num_examples, 159 | index_file="index_txt/kitti_train.txt") 160 | 161 | 162 | class KITTI_Raw_KittiSplit_Valid(KITTI_Raw): 163 | def __init__(self, 164 | args, 165 | root, 166 | preprocessing_crop=False, 167 | crop_size=[370, 1224], 168 | num_examples=-1): 169 | super(KITTI_Raw_KittiSplit_Valid, self).__init__( 170 | args, 171 | images_root=root, 172 | preprocessing_crop=preprocessing_crop, 173 | crop_size=crop_size, 174 | num_examples=num_examples, 175 | index_file="index_txt/kitti_valid.txt") -------------------------------------------------------------------------------- /demo/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo.gif -------------------------------------------------------------------------------- /demo/demo_generator/cam_pose.json: -------------------------------------------------------------------------------- 1 | { 2 | "class_name" : "PinholeCameraTrajectory", 3 | "parameters" : 4 | [ 5 | { 6 | "class_name" : "PinholeCameraParameters", 7 | "extrinsic" : 8 | [ 9 | 0.99875666779623684, 10 | -0.015804533047987467, 11 | 0.047279332352442131, 12 | 0, 13 | 0.0042556066034366898, 14 | 0.97198214070610867, 15 | 0.2350161865931947, 16 | 0, 17 | -0.049668987758906415, 18 | -0.23452278116103262, 19 | 0.97084090188428929, 20 | 0, 21 | -2.1888509581876607, 22 | -1.181867321777915, 23 | 0.94394657256919134, 24 | 1 25 | ], 26 | "intrinsic" : 27 | { 28 | "height" : 376, 29 | "intrinsic_matrix" : 30 | [ 31 | 718.856, 32 | 0, 33 | 0, 34 | 0, 35 | 718.856, 36 | 0, 37 | 607.1928, 38 | 185.2157, 39 | 1 40 | ], 41 | "width" : 1241 42 | }, 43 | "version_major" : 1, 44 | "version_minor" : 0 45 | } 46 | ], 47 | "version_major" : 1, 48 | "version_minor" : 0 49 | } -------------------------------------------------------------------------------- /demo/demo_generator/kitti_img/image_2/000139_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/kitti_img/image_2/000139_10.png -------------------------------------------------------------------------------- /demo/demo_generator/kitti_img/image_2/000139_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/kitti_img/image_2/000139_11.png -------------------------------------------------------------------------------- /demo/demo_generator/results/disp_0/000139_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/disp_0/000139_10.png -------------------------------------------------------------------------------- /demo/demo_generator/results/disp_1/000139_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/disp_1/000139_10.png -------------------------------------------------------------------------------- /demo/demo_generator/results/flow/000139_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/results/flow/000139_10.png -------------------------------------------------------------------------------- /demo/demo_generator/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import skimage.io as io 4 | from skimage.color import rgb2gray 5 | # from skimage.color import lab2rgb 6 | 7 | import open3d as o3d 8 | import numpy as np 9 | import torch 10 | import math 11 | 12 | from utils_misc import flow_to_png_middlebury, read_png_flow, read_png_disp 13 | from utils_misc import numpy2torch, pixel2pts_ms 14 | 15 | width_to_focal = dict() 16 | width_to_focal[1242] = 721.5377 17 | width_to_focal[1241] = 718.856 18 | width_to_focal[1224] = 707.0493 19 | width_to_focal[1238] = 718.3351 20 | width_to_focal[1226] = 707.0912 21 | 22 | cam_center_dict = dict() 23 | cam_center_dict[1242] = [6.095593e+02, 1.728540e+02] 24 | cam_center_dict[1241] = [6.071928e+02, 1.852157e+02] 25 | cam_center_dict[1224] = [6.040814e+02, 1.805066e+02] 26 | cam_center_dict[1238] = [6.003891e+02, 1.815122e+02] 27 | cam_center_dict[1226] = [6.018873e+02, 1.831104e+02] 28 | 29 | 30 | ######## 31 | sampling = [4,20,25,35,40] 32 | imgflag = 1 # 0 is image, 1 is flow 33 | ######## 34 | 35 | 36 | 37 | def get_pcd(img_idx, image_dir, result_dir, tt): 38 | 39 | idx_curr = '%06d' % (img_idx) 40 | 41 | im1_np0 = (io.imread(os.path.join(image_dir, "image_2/" + idx_curr + "_10.png")) / np.float32(255.0))[110:, :, :] 42 | 43 | flo_f_np0 = read_png_flow(os.path.join(result_dir, "flow/" + idx_curr + "_10.png"))[110:, :, :] 44 | disp1_np0 = read_png_disp(os.path.join(result_dir, "disp_0/" + idx_curr + "_10.png"))[110:, :, :] 45 | disp2_np0 = read_png_disp(os.path.join(result_dir, "disp_1/" + idx_curr + "_10.png"))[110:, :, :] 46 | 47 | im1 = numpy2torch(im1_np0).unsqueeze(0) 48 | disp1 = numpy2torch(disp1_np0).unsqueeze(0) 49 | disp_diff = numpy2torch(disp2_np0).unsqueeze(0) 50 | flo_f = numpy2torch(flo_f_np0).unsqueeze(0) 51 | 52 | _, _, hh, ww = im1.size() 53 | 54 | ## Intrinsic 55 | focal_length = width_to_focal[ww] 56 | cx = cam_center_dict[ww][0] 57 | cy = cam_center_dict[ww][1] 58 | 59 | k1_np = np.array([[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]]) 60 | k1 = numpy2torch(k1_np) 61 | 62 | # Forward warping Pts1 using disp_change and flow 63 | pts1 = pixel2pts_ms(disp1, k1) 64 | pts1_warp = pixel2pts_ms(disp_diff, k1, flo_f) 65 | sf = pts1_warp - pts1 66 | 67 | ## Composing Image 68 | im1_np0_g = np.repeat(np.expand_dims(rgb2gray(im1_np0), axis=2), 3, axis=2) 69 | flow = torch.cat((sf[:, 0:1, :, :], sf[:, 2:3, :, :]), dim=1).data.cpu().numpy()[0, :, :, :] 70 | flow_img = flow_to_png_middlebury(flow) / np.float32(255.0) 71 | 72 | if imgflag == 0: 73 | flow_img = im1_np0 74 | else: 75 | flow_img = (flow_img * 0.75 + im1_np0_g * 0.25) 76 | 77 | ## Crop 78 | max_crop = (60, 0.7, 82) 79 | min_crop = (-60, -20, 0) 80 | 81 | x1 = -60 82 | x2 = 60 83 | y1 = 0.7 84 | y2 = -20 85 | z1 = 80 86 | z2 = 0 87 | pp1 = np.array([[x1, y1, z1]]) 88 | pp2 = np.array([[x1, y1, z2]]) 89 | pp3 = np.array([[x1, y2, z1]]) 90 | pp4 = np.array([[x1, y2, z2]]) 91 | pp5 = np.array([[x2, y1, z1]]) 92 | pp6 = np.array([[x2, y1, z2]]) 93 | pp7 = np.array([[x2, y2, z1]]) 94 | pp8 = np.array([[x2, y2, z2]]) 95 | bb_pts = np.concatenate((pp1, pp2, pp3, pp4, pp5, pp6, pp7, pp8), axis=0) 96 | wp = np.array([[1.0, 1.0, 1.0]]) 97 | bb_colors = np.concatenate((wp, wp, wp, wp, wp, wp, wp, wp), axis=0) 98 | 99 | ## Open3D Vis 100 | pts1_tform = pts1 + sf*tt 101 | pts1_np = np.transpose(pts1_tform[0].view(3, -1).data.numpy(), (1, 0)) 102 | pts1_np = np.concatenate((pts1_np, bb_pts), axis=0) 103 | pts1_color = np.reshape(flow_img, (hh * ww, 3)) 104 | pts1_color = np.concatenate((pts1_color, bb_colors), axis=0) 105 | 106 | pcd1 = o3d.geometry.PointCloud() 107 | pcd1.points = o3d.utility.Vector3dVector(pts1_np) 108 | pcd1.colors = o3d.utility.Vector3dVector(pts1_color) 109 | 110 | bbox = o3d.geometry.AxisAlignedBoundingBox(min_crop, max_crop) 111 | pcd1 = pcd1.crop(bbox) 112 | 113 | return pcd1 114 | 115 | 116 | def custom_vis(imglist, kitti_data_dir, result_dir, vis_dir): 117 | 118 | custom_vis.index = 0 119 | custom_vis.trajectory = o3d.io.read_pinhole_camera_trajectory("cam_pose.json") 120 | custom_vis.vis = o3d.visualization.Visualizer() 121 | 122 | img_id = imglist[custom_vis.index] 123 | init_pcd = get_pcd(img_id, kitti_data_dir, result_dir, 0) 124 | custom_vis.prev_pcd = init_pcd 125 | 126 | def move_forward(vis): 127 | 128 | glb = custom_vis 129 | 130 | ## Capture 131 | depth = vis.capture_depth_float_buffer(False) 132 | image = vis.capture_screen_float_buffer(False) 133 | save_id = imglist[glb.index-1] 134 | file_name = "" 135 | 136 | if imgflag == 0: 137 | file_name = os.path.join(vis_dir, "{:06d}_{:02d}.png".format(save_id, glb.index)) 138 | else: 139 | file_name = os.path.join(vis_dir, "{:06d}_{:02d}.png".format(save_id, glb.index)) 140 | 141 | print(' ' + str(glb.index) + ' '+ str(save_id) + ' '+ file_name) 142 | io.imsave(file_name, np.asarray(image), check_contrast=False) 143 | 144 | ## Rendering 145 | max_d_x = 13 146 | max_d_y = 4 147 | 148 | if glb.index < sampling[0]: 149 | tt = 0 150 | rx = 0 151 | ry = 0 152 | elif glb.index < sampling[1]: # only rotation 153 | tt = 0 154 | rad = 2 * 3.14159265359 / (sampling[1] - sampling[0]) * (glb.index - sampling[0]) 155 | rx = max_d_x * math.sin(rad) 156 | ry = (max_d_y * math.cos(rad) - max_d_y) 157 | elif glb.index < sampling[2]: 158 | tt = 0 159 | rx = 0 160 | ry = 0 161 | elif glb.index < sampling[3]: 162 | tt = (glb.index - sampling[2]) / (sampling[3] - sampling[2]) 163 | rx = 0 164 | ry = 0 165 | else: 166 | tt = 1 167 | rx = 0 168 | ry = 0 169 | 170 | img_id = imglist[glb.index] 171 | pcd = get_pcd(img_id, kitti_data_dir, result_dir, tt) 172 | glb.index = glb.index + 1 173 | 174 | vis.clear_geometries() 175 | vis.add_geometry(pcd) 176 | glb.prev_pcd = pcd 177 | 178 | ctr = vis.get_view_control() 179 | ctr.scale(-24) 180 | 181 | ctr.rotate(rx, 980.0 + ry, 0, 0) 182 | ctr.translate(-5, 0) 183 | 184 | if not glb.index < len(imglist): 185 | custom_vis.vis.register_animation_callback(None) 186 | 187 | return False 188 | 189 | vis = custom_vis.vis 190 | vis.create_window() 191 | vis.add_geometry(init_pcd) 192 | 193 | ctr = vis.get_view_control() 194 | ctr.scale(-24) 195 | ctr.rotate(0, 980.0, 0, 0) 196 | ctr.translate(-5, 0) 197 | vis.register_animation_callback(move_forward) 198 | vis.run() 199 | vis.destroy_window() 200 | 201 | ######################################################################## 202 | 203 | kitti_data_dir = "demo/demo_generator/kitti_img" ## raw KITTI image 204 | result_dir = "demo/demo_generator/results" ## disp_0, disp_1, flow 205 | vis_dir = "demo/demo_generator/vis" ## visualization output folder 206 | 207 | imglist = [] 208 | 209 | for ii in range(0, sampling[-1]): 210 | imglist.append(139) 211 | 212 | custom_vis(imglist, kitti_data_dir, result_dir, vis_dir) -------------------------------------------------------------------------------- /demo/demo_generator/utils_misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import skimage.io as io 4 | import cv2 5 | 6 | TAG_CHAR = np.array([202021.25], np.float32) 7 | UNKNOWN_FLOW_THRESH = 1e7 8 | 9 | 10 | def compute_color(u, v): 11 | """ 12 | compute optical flow color map 13 | :param u: optical flow horizontal map 14 | :param v: optical flow vertical map 15 | :return: optical flow in color code 16 | """ 17 | [h, w] = u.shape 18 | img = np.zeros([h, w, 3]) 19 | nanIdx = np.isnan(u) | np.isnan(v) 20 | u[nanIdx] = 0 21 | v[nanIdx] = 0 22 | 23 | colorwheel = make_color_wheel() 24 | ncols = np.size(colorwheel, 0) 25 | 26 | rad = np.sqrt(u ** 2 + v ** 2) 27 | 28 | a = np.arctan2(-v, -u) / np.pi 29 | 30 | fk = (a + 1) / 2 * (ncols - 1) + 1 31 | 32 | k0 = np.floor(fk).astype(int) 33 | 34 | k1 = k0 + 1 35 | k1[k1 == ncols + 1] = 1 36 | f = fk - k0 37 | 38 | for i in range(0, np.size(colorwheel, 1)): 39 | tmp = colorwheel[:, i] 40 | col0 = tmp[k0 - 1] / 255 41 | col1 = tmp[k1 - 1] / 255 42 | col = (1 - f) * col0 + f * col1 43 | 44 | idx = rad <= 1 45 | col[idx] = 1 - rad[idx] * (1 - col[idx]) 46 | notidx = np.logical_not(idx) 47 | 48 | col[notidx] *= 0.75 49 | img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx))) 50 | 51 | return img 52 | 53 | 54 | def make_color_wheel(): 55 | """ 56 | Generate color wheel according Middlebury color code 57 | :return: Color wheel 58 | """ 59 | RY = 15 60 | YG = 6 61 | GC = 4 62 | CB = 11 63 | BM = 13 64 | MR = 6 65 | 66 | ncols = RY + YG + GC + CB + BM + MR 67 | 68 | colorwheel = np.zeros([ncols, 3]) 69 | 70 | col = 0 71 | 72 | # RY 73 | colorwheel[0:RY, 0] = 255 74 | colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY)) 75 | col += RY 76 | 77 | # YG 78 | colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG)) 79 | colorwheel[col:col + YG, 1] = 255 80 | col += YG 81 | 82 | # GC 83 | colorwheel[col:col + GC, 1] = 255 84 | colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC)) 85 | col += GC 86 | 87 | # CB 88 | colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB)) 89 | colorwheel[col:col + CB, 2] = 255 90 | col += CB 91 | 92 | # BM 93 | colorwheel[col:col + BM, 2] = 255 94 | colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM)) 95 | col += + BM 96 | 97 | # MR 98 | colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) 99 | colorwheel[col:col + MR, 0] = 255 100 | 101 | return colorwheel 102 | 103 | 104 | def flow_to_png_middlebury(flow): 105 | """ 106 | Convert flow into middlebury color code image 107 | :param flow: optical flow map 108 | :return: optical flow image in middlebury color 109 | """ 110 | 111 | flow = flow.transpose([1, 2, 0]) 112 | u = flow[:, :, 0] 113 | v = flow[:, :, 1] 114 | 115 | maxu = -999. 116 | maxv = -999. 117 | minu = 999. 118 | minv = 999. 119 | 120 | idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) 121 | u[idxUnknow] = 0 122 | v[idxUnknow] = 0 123 | 124 | maxu = max(maxu, np.max(u)) 125 | minu = min(minu, np.min(u)) 126 | 127 | maxv = max(maxv, np.max(v)) 128 | minv = min(minv, np.min(v)) 129 | 130 | rad = np.sqrt(u ** 2 + v ** 2) 131 | maxrad = max(-1, np.max(rad)) 132 | # maxrad = 4 133 | 134 | u = u / (maxrad + np.finfo(float).eps) 135 | v = v / (maxrad + np.finfo(float).eps) 136 | 137 | img = compute_color(u, v) 138 | 139 | idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) 140 | img[idx] = 0 141 | 142 | return np.uint8(img) 143 | 144 | 145 | def numpy2torch(array): 146 | assert(isinstance(array, np.ndarray)) 147 | if array.ndim == 3: 148 | array = np.transpose(array, (2, 0, 1)) 149 | else: 150 | array = np.expand_dims(array, axis=0) 151 | return torch.from_numpy(array.copy()).float() 152 | 153 | 154 | def get_pixelgrid(b, h, w, flow=None): 155 | 156 | grid_h = torch.linspace(0.0, w - 1, w).view(1, 1, 1, w).expand(b, 1, h, w) 157 | grid_v = torch.linspace(0.0, h - 1, h).view(1, 1, h, 1).expand(b, 1, h, w) 158 | ones = torch.ones_like(grid_h) 159 | 160 | if flow is None: 161 | pixelgrid = torch.cat((grid_h, grid_v, ones), dim=1).float().requires_grad_(False) 162 | else: 163 | pixelgrid = torch.cat((grid_h + flow[:, 0:1, :, :], grid_v + flow[:, 1:2, :, :], ones), dim=1).float().requires_grad_(False) 164 | 165 | return pixelgrid 166 | 167 | 168 | def pixel2pts(depth, intrinsic, flow=None): 169 | 170 | b, _, h, w = depth.size() 171 | pixelgrid = get_pixelgrid(b, h, w, flow) 172 | 173 | depth_mat = depth.view(b, 1, -1) 174 | pixel_mat = pixelgrid.view(b, 3, -1) 175 | 176 | pts_mat = torch.matmul(torch.inverse(intrinsic), pixel_mat) * depth_mat 177 | 178 | pts = pts_mat.view(b, -1, h, w) 179 | 180 | return pts, pixelgrid 181 | 182 | def disp2depth_kitti(pred_disp, focal_length): 183 | pred_depth = focal_length * 0.54 / pred_disp 184 | pred_depth = torch.clamp(pred_depth, 1e-3, 80) 185 | return pred_depth 186 | 187 | def pixel2pts_ms(output_disp, intrinsic, flow=None): 188 | focal_length = intrinsic[:, 0, 0] 189 | output_depth = disp2depth_kitti(output_disp, focal_length) 190 | pts, _ = pixel2pts(output_depth, intrinsic, flow) 191 | return pts 192 | 193 | 194 | def get_grid(x): 195 | grid_H = torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3)) 196 | grid_V = torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3)) 197 | grid = torch.cat([grid_H, grid_V], 1) 198 | grids_cuda = grid.float().requires_grad_(False) 199 | return grids_cuda 200 | 201 | 202 | def read_png_disp(disp_file): 203 | disp_np = io.imread(disp_file).astype(np.uint16) / 256.0 204 | disp_np = np.expand_dims(disp_np, axis=2) 205 | mask_disp = (disp_np > 0).astype(np.float64) 206 | return disp_np 207 | 208 | def read_png_flow(flow_file): 209 | flow = cv2.imread(flow_file, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)[:,:,::-1].astype(np.float64) 210 | flow, valid = flow[:, :, :2], flow[:, :, 2:] 211 | flow = (flow - 2**15) / 64.0 212 | return flow 213 | 214 | 215 | -------------------------------------------------------------------------------- /demo/demo_generator/vis/__init__: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/demo_generator/vis/__init__ -------------------------------------------------------------------------------- /demo/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/demo/teaser.png -------------------------------------------------------------------------------- /install_modules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ./models/correlation_package 3 | python setup.py install 4 | cd ../forwardwarp_package 5 | python setup.py install 6 | cd ../.. 7 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import logging 5 | import subprocess 6 | import torch 7 | from core import commandline, runtime, logger, tools, configuration as config 8 | 9 | def main(): 10 | 11 | # Change working directory 12 | os.chdir(os.path.dirname(os.path.realpath(__file__))) 13 | 14 | # Parse commandline arguments 15 | args = commandline.setup_logging_and_parse_arguments(blocktitle="Commandline Arguments") 16 | 17 | # Set random seed, possibly on Cuda 18 | config.configure_random_seed(args) 19 | 20 | # DataLoader 21 | train_loader, validation_loader, inference_loader = config.configure_data_loaders(args) 22 | success = any(loader is not None for loader in [train_loader, validation_loader, inference_loader]) 23 | if not success: 24 | logging.info("No dataset could be loaded successfully. Please check dataset paths!") 25 | quit() 26 | 27 | # Configure data augmentation 28 | training_augmentation, validation_augmentation = config.configure_runtime_augmentations(args) 29 | 30 | # Configure model and loss 31 | model_and_loss = config.configure_model_and_loss(args) 32 | 33 | # Resume from checkpoint if available 34 | checkpoint_saver, checkpoint_stats = config.configure_checkpoint_saver(args, model_and_loss) 35 | 36 | if checkpoint_stats is not None: 37 | # Set checkpoint stats 38 | if args.checkpoint_mode in ["resume_from_best", "resume_from_latest"]: 39 | args.start_epoch = checkpoint_stats["epoch"] + 1 40 | 41 | # # Multi-GPU automation 42 | # with logger.LoggingBlock("Multi GPU", emph=True): 43 | # if torch.cuda.device_count() > 1: 44 | # logging.info("Let's use %d GPUs!" % torch.cuda.device_count()) 45 | # model_and_loss._model = torch.nn.DataParallel(model_and_loss._model) 46 | # else: 47 | # logging.info("Let's use %d GPU!" % torch.cuda.device_count()) 48 | 49 | 50 | # Checkpoint and save directory 51 | with logger.LoggingBlock("Save Directory", emph=True): 52 | logging.info("Save directory: %s" % args.save) 53 | if not os.path.exists(args.save): 54 | os.makedirs(args.save) 55 | 56 | # Configure optimizer 57 | optimizer = config.configure_optimizer(args, model_and_loss) 58 | 59 | # Configure learning rate 60 | lr_scheduler = config.configure_lr_scheduler(args, optimizer) 61 | 62 | # If this is just an evaluation: overwrite savers and epochs 63 | if args.evaluation: 64 | args.start_epoch = 1 65 | args.total_epochs = 1 66 | train_loader = None 67 | checkpoint_saver = None 68 | optimizer = None 69 | lr_scheduler = None 70 | 71 | # Cuda optimization 72 | if args.cuda: 73 | torch.backends.cudnn.deterministic = True 74 | torch.backends.cudnn.benchmark = False 75 | 76 | # Kickoff training, validation and/or testing 77 | return runtime.exec_runtime( 78 | args, 79 | checkpoint_saver=checkpoint_saver, 80 | model_and_loss=model_and_loss, 81 | optimizer=optimizer, 82 | lr_scheduler=lr_scheduler, 83 | train_loader=train_loader, 84 | validation_loader=validation_loader, 85 | inference_loader=inference_loader, 86 | training_augmentation=training_augmentation, 87 | validation_augmentation=validation_augmentation) 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import model_monosceneflow 2 | from . import model_monosceneflow_ablation 3 | from . import model_monosceneflow_ablation_decoder_split 4 | from . import model_monodepth_ablation 5 | 6 | ########################################################################################## 7 | ## Monocular Scene Flow - The full model 8 | ########################################################################################## 9 | 10 | MonoSceneFlow_fullmodel = model_monosceneflow.MonoSceneFlow 11 | 12 | ########################################################################################## 13 | ## Monocular Scene Flow - The models for the ablation studies 14 | ########################################################################################## 15 | 16 | MonoSceneFlow_CamConv = model_monosceneflow_ablation.MonoSceneFlow_CamConv 17 | 18 | MonoSceneFlow_FlowOnly = model_monosceneflow_ablation.MonoSceneFlow_OpticalFlowOnly 19 | MonoSceneFlow_DispOnly = model_monosceneflow_ablation.MonoSceneFlow_DisparityOnly 20 | 21 | MonoSceneFlow_Split_Cont = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split_base 22 | MonoSceneFlow_Split_Last1 = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split1 23 | MonoSceneFlow_Split_Last2 = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split2 24 | MonoSceneFlow_Split_Last3 = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split3 25 | MonoSceneFlow_Split_Last4 = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split4 26 | MonoSceneFlow_Split_Last5 = model_monosceneflow_ablation_decoder_split.SceneFlow_pwcnet_split5 27 | 28 | ########################################################################################## 29 | ## Monocular Depth - The models for the ablation study in Table 1. 30 | ########################################################################################## 31 | 32 | MonoDepth_Baseline = model_monodepth_ablation.MonoDepth_Baseline 33 | MonoDepth_CamConv = model_monodepth_ablation.MonoDepth_CamConv -------------------------------------------------------------------------------- /models/correlation_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/models/correlation_package/__init__.py -------------------------------------------------------------------------------- /models/correlation_package/correlation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.module import Module 3 | from torch.autograd import Function 4 | import correlation_cuda 5 | 6 | class Correlation(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, input1, input2, param_dict): 10 | ctx.save_for_backward(input1, input2) 11 | 12 | ctx.pad_size = param_dict["pad_size"] 13 | ctx.kernel_size = param_dict["kernel_size"] 14 | ctx.max_disp = param_dict["max_disp"] 15 | ctx.stride1 = param_dict["stride1"] 16 | ctx.stride2 = param_dict["stride2"] 17 | ctx.corr_multiply = param_dict["corr_multiply"] 18 | 19 | with torch.cuda.device_of(input1): 20 | rbot1 = input1.new() 21 | rbot2 = input2.new() 22 | output = input1.new() 23 | 24 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 25 | ctx.pad_size, ctx.kernel_size, ctx.max_disp, ctx.stride1, ctx.stride2, ctx.corr_multiply) 26 | 27 | return output 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | input1, input2 = ctx.saved_tensors 32 | 33 | with torch.cuda.device_of(input1): 34 | rbot1 = input1.new() 35 | rbot2 = input2.new() 36 | 37 | grad_input1 = input1.new() 38 | grad_input2 = input2.new() 39 | 40 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, 41 | ctx.pad_size, ctx.kernel_size, ctx.max_disp, ctx.stride1, ctx.stride2, ctx.corr_multiply) 42 | 43 | return grad_input1, grad_input2, None 44 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "correlation_cuda_kernel.cuh" 9 | 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, 11 | int pad_size, 12 | int kernel_size, 13 | int max_displacement, 14 | int stride1, 15 | int stride2, 16 | int corr_type_multiply) 17 | { 18 | 19 | int batchSize = input1.size(0); 20 | 21 | int nInputChannels = input1.size(1); 22 | int inputHeight = input1.size(2); 23 | int inputWidth = input1.size(3); 24 | 25 | int kernel_radius = (kernel_size - 1) / 2; 26 | int border_radius = kernel_radius + max_displacement; 27 | 28 | int paddedInputHeight = inputHeight + 2 * pad_size; 29 | int paddedInputWidth = inputWidth + 2 * pad_size; 30 | 31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); 32 | 33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); 34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); 35 | 36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); 39 | 40 | rInput1.fill_(0); 41 | rInput2.fill_(0); 42 | output.fill_(0); 43 | 44 | int success = correlation_forward_cuda_kernel( 45 | output, 46 | output.size(0), 47 | output.size(1), 48 | output.size(2), 49 | output.size(3), 50 | output.stride(0), 51 | output.stride(1), 52 | output.stride(2), 53 | output.stride(3), 54 | input1, 55 | input1.size(1), 56 | input1.size(2), 57 | input1.size(3), 58 | input1.stride(0), 59 | input1.stride(1), 60 | input1.stride(2), 61 | input1.stride(3), 62 | input2, 63 | input2.size(1), 64 | input2.stride(0), 65 | input2.stride(1), 66 | input2.stride(2), 67 | input2.stride(3), 68 | rInput1, 69 | rInput2, 70 | pad_size, 71 | kernel_size, 72 | max_displacement, 73 | stride1, 74 | stride2, 75 | corr_type_multiply, 76 | at::cuda::getCurrentCUDAStream() 77 | //at::globalContext().getCurrentCUDAStream() 78 | ); 79 | 80 | //check for errors 81 | if (!success) { 82 | AT_ERROR("CUDA call failed"); 83 | } 84 | 85 | return 1; 86 | 87 | } 88 | 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 90 | at::Tensor& gradInput1, at::Tensor& gradInput2, 91 | int pad_size, 92 | int kernel_size, 93 | int max_displacement, 94 | int stride1, 95 | int stride2, 96 | int corr_type_multiply) 97 | { 98 | 99 | int batchSize = input1.size(0); 100 | int nInputChannels = input1.size(1); 101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size; 102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size; 103 | 104 | int height = input1.size(2); 105 | int width = input1.size(3); 106 | 107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 109 | gradInput1.resize_({batchSize, nInputChannels, height, width}); 110 | gradInput2.resize_({batchSize, nInputChannels, height, width}); 111 | 112 | rInput1.fill_(0); 113 | rInput2.fill_(0); 114 | gradInput1.fill_(0); 115 | gradInput2.fill_(0); 116 | 117 | int success = correlation_backward_cuda_kernel(gradOutput, 118 | gradOutput.size(0), 119 | gradOutput.size(1), 120 | gradOutput.size(2), 121 | gradOutput.size(3), 122 | gradOutput.stride(0), 123 | gradOutput.stride(1), 124 | gradOutput.stride(2), 125 | gradOutput.stride(3), 126 | input1, 127 | input1.size(1), 128 | input1.size(2), 129 | input1.size(3), 130 | input1.stride(0), 131 | input1.stride(1), 132 | input1.stride(2), 133 | input1.stride(3), 134 | input2, 135 | input2.stride(0), 136 | input2.stride(1), 137 | input2.stride(2), 138 | input2.stride(3), 139 | gradInput1, 140 | gradInput1.stride(0), 141 | gradInput1.stride(1), 142 | gradInput1.stride(2), 143 | gradInput1.stride(3), 144 | gradInput2, 145 | gradInput2.size(1), 146 | gradInput2.stride(0), 147 | gradInput2.stride(1), 148 | gradInput2.stride(2), 149 | gradInput2.stride(3), 150 | rInput1, 151 | rInput2, 152 | pad_size, 153 | kernel_size, 154 | max_displacement, 155 | stride1, 156 | stride2, 157 | corr_type_multiply, 158 | at::cuda::getCurrentCUDAStream() 159 | //at::globalContext().getCurrentCUDAStream() 160 | ); 161 | 162 | if (!success) { 163 | AT_ERROR("CUDA call failed"); 164 | } 165 | 166 | return 1; 167 | } 168 | 169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); 171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); 172 | } 173 | 174 | -------------------------------------------------------------------------------- /models/correlation_package/correlation_cuda_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int correlation_forward_cuda_kernel(at::Tensor& output, 8 | int ob, 9 | int oc, 10 | int oh, 11 | int ow, 12 | int osb, 13 | int osc, 14 | int osh, 15 | int osw, 16 | 17 | at::Tensor& input1, 18 | int ic, 19 | int ih, 20 | int iw, 21 | int isb, 22 | int isc, 23 | int ish, 24 | int isw, 25 | 26 | at::Tensor& input2, 27 | int gc, 28 | int gsb, 29 | int gsc, 30 | int gsh, 31 | int gsw, 32 | 33 | at::Tensor& rInput1, 34 | at::Tensor& rInput2, 35 | int pad_size, 36 | int kernel_size, 37 | int max_displacement, 38 | int stride1, 39 | int stride2, 40 | int corr_type_multiply, 41 | cudaStream_t stream); 42 | 43 | 44 | int correlation_backward_cuda_kernel( 45 | at::Tensor& gradOutput, 46 | int gob, 47 | int goc, 48 | int goh, 49 | int gow, 50 | int gosb, 51 | int gosc, 52 | int gosh, 53 | int gosw, 54 | 55 | at::Tensor& input1, 56 | int ic, 57 | int ih, 58 | int iw, 59 | int isb, 60 | int isc, 61 | int ish, 62 | int isw, 63 | 64 | at::Tensor& input2, 65 | int gsb, 66 | int gsc, 67 | int gsh, 68 | int gsw, 69 | 70 | at::Tensor& gradInput1, 71 | int gisb, 72 | int gisc, 73 | int gish, 74 | int gisw, 75 | 76 | at::Tensor& gradInput2, 77 | int ggc, 78 | int ggsb, 79 | int ggsc, 80 | int ggsh, 81 | int ggsw, 82 | 83 | at::Tensor& rInput1, 84 | at::Tensor& rInput2, 85 | int pad_size, 86 | int kernel_size, 87 | int max_displacement, 88 | int stride1, 89 | int stride2, 90 | int corr_type_multiply, 91 | cudaStream_t stream); 92 | -------------------------------------------------------------------------------- /models/correlation_package/readme.txt: -------------------------------------------------------------------------------- 1 | https://github.com/NVIDIA/flownet2-pytorch/tree/master/networks/correlation_package 2 | 3 | Latest commit ff19163 on Aug 25, 2018 -------------------------------------------------------------------------------- /models/correlation_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from setuptools import setup, find_packages 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | cxx_args = ['-std=c++11'] 6 | 7 | nvcc_args = [ 8 | '-gencode', 'arch=compute_50,code=sm_50', 9 | '-gencode', 'arch=compute_52,code=sm_52', 10 | '-gencode', 'arch=compute_60,code=sm_60', 11 | '-gencode', 'arch=compute_61,code=sm_61', 12 | '-gencode', 'arch=compute_61,code=compute_61' 13 | ] 14 | 15 | setup( 16 | name='correlation_cuda', 17 | ext_modules=[ 18 | CUDAExtension('correlation_cuda', [ 19 | 'correlation_cuda.cc', 20 | 'correlation_cuda_kernel.cu' 21 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 22 | ], 23 | cmdclass={ 24 | 'build_ext': BuildExtension 25 | }) 26 | -------------------------------------------------------------------------------- /models/forwardwarp_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/models/forwardwarp_package/__init__.py -------------------------------------------------------------------------------- /models/forwardwarp_package/forward_warp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Module, Parameter 3 | from torch.autograd import Function 4 | 5 | import forward_warp_cuda 6 | 7 | class forward_warp_function(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, im0, flow): 11 | ''' 12 | im0: the first image with shape [B, C, H, W] 13 | flow: the optical flow with shape [B, H, W, 2] (different to grid_sample, it's range is from [-W, -H] to [W, H]) 14 | ''' 15 | assert(len(im0.shape) == len(flow.shape) == 4) 16 | assert(im0.shape[0] == flow.shape[0]) 17 | assert(im0.shape[-2:] == flow.shape[1:3]) 18 | assert(flow.shape[3] == 2) 19 | 20 | im0 = im0.contiguous() 21 | flow = flow.contiguous() 22 | ctx.save_for_backward(im0, flow) 23 | 24 | im1 = torch.zeros(im0.size(), dtype=im0.dtype, layout=im0.layout, device=im0.device) 25 | 26 | # with torch.cuda.device_of(im0): 27 | forward_warp_cuda.forward(im0, flow, im1) 28 | 29 | return im1 30 | 31 | @staticmethod 32 | def backward(ctx, grad_output): 33 | 34 | grad_output = grad_output.contiguous() 35 | im0, flow = ctx.saved_variables 36 | im0_grad = torch.zeros(im0.size(), dtype=im0.dtype, layout=im0.layout, device=im0.device) 37 | flow_grad = torch.zeros(flow.size(), dtype=flow.dtype, layout=flow.layout, device=flow.device) 38 | 39 | #with torch.cuda.device_of(im0): 40 | forward_warp_cuda.backward(grad_output, im0, flow, im0_grad, flow_grad) 41 | 42 | return im0_grad, flow_grad 43 | 44 | 45 | class forward_warp(Module): 46 | 47 | def __init__(self): 48 | super(forward_warp, self).__init__() 49 | 50 | def forward(self, im0, flow): 51 | 52 | _, _, h, w = im0.size() 53 | flow = torch.clamp(flow, -2*w, 2*w) 54 | 55 | return forward_warp_function.apply(im0, flow) 56 | -------------------------------------------------------------------------------- /models/forwardwarp_package/forward_warp_cuda.cpp: -------------------------------------------------------------------------------- 1 | // #include 2 | #include 3 | 4 | int forward_warp_cuda_forward(const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im1); 5 | int forward_warp_cuda_backward(const at::Tensor& grad_output, const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im0_grad, at::Tensor& flow_grad); 6 | 7 | // Because of the incompatible of Pytorch 1.0 && Pytorch 0.4, we have to annotation this. 8 | #define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDA tensor") 9 | #define CHECK_CONTIGUOUS(x) AT_ASSERT(x.is_contiguous(), #x " must be contiguous") 10 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 11 | 12 | int forward_warp_forward(const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im1){ 13 | CHECK_INPUT(im0); 14 | CHECK_INPUT(flow); 15 | 16 | // im1.resize_({im0.size(0), im0.size(1), im0.size(2), im0.size(3)}); 17 | // im1.fill_(0); 18 | int success = forward_warp_cuda_forward(im0, flow, im1); 19 | 20 | if (!success) { 21 | AT_ERROR("CUDA call failed"); 22 | } 23 | return 1; 24 | } 25 | 26 | int forward_warp_backward(const at::Tensor& grad_output, const at::Tensor& im0, const at::Tensor& flow, at::Tensor& im0_grad, at::Tensor& flow_grad){ 27 | CHECK_INPUT(grad_output); 28 | CHECK_INPUT(im0); 29 | CHECK_INPUT(flow); 30 | 31 | // im0_grad.resize_({im0.size(0), im0.size(1), im0.size(2), im0.size(3)}); 32 | // flow_grad.resize_({flow.size(0), flow.size(1), flow.size(2), flow.size(3)}); 33 | // im0_grad.fill_(0); 34 | // flow_grad.fill_(0); 35 | 36 | int success = forward_warp_cuda_backward(grad_output, im0, flow, im0_grad, flow_grad); 37 | 38 | if (!success) { 39 | AT_ERROR("CUDA call failed"); 40 | } 41 | return 1; 42 | } 43 | 44 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 45 | m.def("forward", &forward_warp_forward, "forward warp forward (CUDA)"); 46 | m.def("backward", &forward_warp_backward, "forward warp backward (CUDA)"); 47 | } 48 | -------------------------------------------------------------------------------- /models/forwardwarp_package/forward_warp_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | // Define CUDA_NUM_THREAS and GET_BLOCKS 8 | const int CUDA_NUM_THREADS = 1024; 9 | inline int GET_BLOCKS(const int N){ return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;} 10 | 11 | // Define CUDA_KERNEL_LOOP 12 | #define CUDA_KERNEL_LOOP(i, n) \ 13 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x) 14 | 15 | 16 | 17 | static __forceinline__ __device__ 18 | int get_im_index( 19 | const int bb, 20 | const int cc, 21 | const int hh, 22 | const int ww, 23 | const size_t C, 24 | const size_t H, 25 | const size_t W) { 26 | return bb*C*H*W + cc*H*W + hh*W + ww; 27 | } 28 | 29 | template 30 | __global__ void forward_warp_cuda_forward_kernel( 31 | const int total_step, 32 | const scalar_t* im0, 33 | const scalar_t* flow, 34 | scalar_t* im1, 35 | const int B, 36 | const int C, 37 | const int H, 38 | const int W) { 39 | 40 | CUDA_KERNEL_LOOP(index, total_step-1) { 41 | const int bb = index / (H * W); 42 | const int hh = (index - bb*H*W) / W; 43 | const int ww = index % W; 44 | const scalar_t x = (scalar_t)ww + flow[index * 2 + 0]; 45 | const scalar_t y = (scalar_t)hh + flow[index * 2 + 1]; 46 | const int x_f = static_cast(::floor(x)); 47 | const int y_f = static_cast(::floor(y)); 48 | const int x_c = x_f + 1; 49 | const int y_c = y_f + 1; 50 | 51 | if(x_f>=0 && x_c=0 && y_c 69 | __global__ void forward_warp_cuda_backward_kernel( 70 | const int total_step, 71 | const scalar_t* grad_output, 72 | const scalar_t* im0, 73 | const scalar_t* flow, 74 | scalar_t* im0_grad, 75 | scalar_t* flow_grad, 76 | const int B, 77 | const int C, 78 | const int H, 79 | const int W) { 80 | 81 | CUDA_KERNEL_LOOP(index, total_step) { 82 | const int bb = index / (H * W); 83 | const int hh = (index-bb*H*W) / W; 84 | const int ww = index % W; 85 | const scalar_t x = (scalar_t)ww + flow[index * 2 + 0]; 86 | const scalar_t y = (scalar_t)hh + flow[index * 2 + 1]; 87 | 88 | const int x_f = static_cast(::floor(x)); 89 | const int y_f = static_cast(::floor(y)); 90 | const int x_c = x_f + 1; 91 | const int y_c = y_f + 1; 92 | 93 | if(x_f>=0 && x_c=0 && y_c 140 | <<>>( 141 | total_step, 142 | im0.data(), 143 | flow.data(), 144 | im1.data(), 145 | B, C, H, W); 146 | })); 147 | 148 | cudaError_t err = cudaGetLastError(); 149 | 150 | // check for errors 151 | if (err != cudaSuccess) { 152 | printf("error in Forwardwarp : forward_cuda_kernel: %s\n", cudaGetErrorString(err)); 153 | return 0; 154 | } 155 | 156 | return 1; 157 | } 158 | 159 | int forward_warp_cuda_backward( 160 | const at::Tensor& grad_output, 161 | const at::Tensor& im0, 162 | const at::Tensor& flow, 163 | at::Tensor& im0_grad, 164 | at::Tensor& flow_grad) { 165 | 166 | const int B = im0.size(0); 167 | const int C = im0.size(1); 168 | const int H = im0.size(2); 169 | const int W = im0.size(3); 170 | const int total_step = B * H * W; 171 | 172 | AT_DISPATCH_FLOATING_TYPES(grad_output.scalar_type(), "forward_warp_backward_cuda", ([&] { 173 | forward_warp_cuda_backward_kernel 174 | <<>>( 175 | total_step, 176 | grad_output.data(), 177 | im0.data(), 178 | flow.data(), 179 | im0_grad.data(), 180 | flow_grad.data(), 181 | B, C, H, W); 182 | })); 183 | 184 | cudaError_t err = cudaGetLastError(); 185 | 186 | // check for errors 187 | if (err != cudaSuccess) { 188 | printf("error in Forwardwarp : forward_cuda_kernel: %s\n", cudaGetErrorString(err)); 189 | return 0; 190 | } 191 | 192 | return 1; 193 | } 194 | -------------------------------------------------------------------------------- /models/forwardwarp_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from setuptools import setup, find_packages 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | cxx_args = ['-std=c++11'] 6 | 7 | nvcc_args = [ 8 | '-gencode', 'arch=compute_50,code=sm_50', 9 | '-gencode', 'arch=compute_52,code=sm_52', 10 | '-gencode', 'arch=compute_60,code=sm_60', 11 | '-gencode', 'arch=compute_61,code=sm_61', 12 | '-gencode', 'arch=compute_61,code=compute_61' 13 | ] 14 | 15 | setup( 16 | name='forward_warp_cuda', 17 | ext_modules=[ 18 | CUDAExtension('forward_warp_cuda', [ 19 | 'forward_warp_cuda.cpp', 20 | 'forward_warp_cuda_kernel.cu', 21 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 22 | ], 23 | cmdclass={ 24 | 'build_ext': BuildExtension 25 | }) 26 | -------------------------------------------------------------------------------- /models/model_monodepth_ablation.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .modules_monodepth import Resnet18_MonoDepth_Single, Resnet18_MonoDepth_Single_CamConv 6 | from utils.monodepth_eval import disp_post_processing 7 | 8 | 9 | class MonoDepth_Baseline(nn.Module): 10 | def __init__(self, args): 11 | super(MonoDepth_Baseline, self).__init__() 12 | 13 | self._args = args 14 | self._mono_resnet18 = Resnet18_MonoDepth_Single() 15 | 16 | def forward(self, input_dict): 17 | 18 | output_dict = {} 19 | 20 | if not self._args.evaluation: 21 | disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_dict['input_l1']) 22 | disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4 = self._mono_resnet18(torch.flip(input_dict['input_r1'], [3])) 23 | 24 | disp_r1_1 = torch.flip(disp_r1_1, [3]) 25 | disp_r1_2 = torch.flip(disp_r1_2, [3]) 26 | disp_r1_3 = torch.flip(disp_r1_3, [3]) 27 | disp_r1_4 = torch.flip(disp_r1_4, [3]) 28 | 29 | output_dict['disp_l1'] = [disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4] 30 | output_dict['disp_r1'] = [disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4] 31 | 32 | else: 33 | inputs = torch.cat((input_dict['input_l1'], torch.flip(input_dict['input_l1'], [3])), dim=0) 34 | disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(inputs) 35 | out_disp_1_pp = disp_post_processing(disp_l1_1) 36 | output_dict['disp_l1_pp'] = [out_disp_1_pp] 37 | 38 | return output_dict 39 | 40 | 41 | class MonoDepth_CamConv(nn.Module): 42 | def __init__(self, args): 43 | super(MonoDepth_CamConv, self).__init__() 44 | 45 | self._args = args 46 | self._mono_resnet18 = Resnet18_MonoDepth_Single_CamConv() 47 | 48 | def forward(self, input_dict): 49 | 50 | output_dict = {} 51 | 52 | if not self._args.evaluation: 53 | disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_dict['input_l1'], input_dict['input_k_l1']) 54 | disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4 = self._mono_resnet18(torch.flip(input_dict['input_r1'], [3]), input_dict['input_k_r1_flip']) 55 | 56 | disp_r1_1 = torch.flip(disp_r1_1, [3]) 57 | disp_r1_2 = torch.flip(disp_r1_2, [3]) 58 | disp_r1_3 = torch.flip(disp_r1_3, [3]) 59 | disp_r1_4 = torch.flip(disp_r1_4, [3]) 60 | 61 | output_dict['disp_l1'] = [disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4] 62 | output_dict['disp_r1'] = [disp_r1_1, disp_r1_2, disp_r1_3, disp_r1_4] 63 | 64 | else: 65 | input_img = torch.cat((input_dict['input_l1'], torch.flip(input_dict['input_l1'], [3])), dim=0) 66 | intrinsic = torch.cat((input_dict['input_k_l1'], input_dict['input_k_l1_flip']), dim=0) 67 | disp_l1_1, disp_l1_2, disp_l1_3, disp_l1_4 = self._mono_resnet18(input_img, intrinsic) 68 | out_disp_1_pp = disp_post_processing(disp_l1_1) 69 | output_dict['disp_l1_pp'] = [out_disp_1_pp] 70 | 71 | return output_dict 72 | -------------------------------------------------------------------------------- /models/model_monosceneflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as tf 6 | import logging 7 | 8 | from .correlation_package.correlation import Correlation 9 | 10 | from .modules_sceneflow import get_grid, WarpingLayer_SF 11 | from .modules_sceneflow import initialize_msra, upsample_outputs_as 12 | from .modules_sceneflow import upconv 13 | from .modules_sceneflow import FeatureExtractor, MonoSceneFlowDecoder, ContextNetwork 14 | 15 | from utils.interpolation import interpolate2d_as 16 | from utils.sceneflow_util import flow_horizontal_flip, intrinsic_scale, get_pixelgrid, post_processing 17 | 18 | 19 | class MonoSceneFlow(nn.Module): 20 | def __init__(self, args): 21 | super(MonoSceneFlow, self).__init__() 22 | 23 | self._args = args 24 | self.num_chs = [3, 32, 64, 96, 128, 192, 256] 25 | self.search_range = 4 26 | self.output_level = 4 27 | self.num_levels = 7 28 | 29 | self.leakyRELU = nn.LeakyReLU(0.1, inplace=True) 30 | 31 | self.feature_pyramid_extractor = FeatureExtractor(self.num_chs) 32 | self.warping_layer_sf = WarpingLayer_SF() 33 | 34 | self.flow_estimators = nn.ModuleList() 35 | self.upconv_layers = nn.ModuleList() 36 | 37 | self.dim_corr = (self.search_range * 2 + 1) ** 2 38 | 39 | for l, ch in enumerate(self.num_chs[::-1]): 40 | if l > self.output_level: 41 | break 42 | 43 | if l == 0: 44 | num_ch_in = self.dim_corr + ch 45 | else: 46 | num_ch_in = self.dim_corr + ch + 32 + 3 + 1 47 | self.upconv_layers.append(upconv(32, 32, 3, 2)) 48 | 49 | layer_sf = MonoSceneFlowDecoder(num_ch_in) 50 | self.flow_estimators.append(layer_sf) 51 | 52 | self.corr_params = {"pad_size": self.search_range, "kernel_size": 1, "max_disp": self.search_range, "stride1": 1, "stride2": 1, "corr_multiply": 1} 53 | self.context_networks = ContextNetwork(32 + 3 + 1) 54 | self.sigmoid = torch.nn.Sigmoid() 55 | 56 | initialize_msra(self.modules()) 57 | 58 | def run_pwc(self, input_dict, x1_raw, x2_raw, k1, k2): 59 | 60 | output_dict = {} 61 | 62 | # on the bottom level are original images 63 | x1_pyramid = self.feature_pyramid_extractor(x1_raw) + [x1_raw] 64 | x2_pyramid = self.feature_pyramid_extractor(x2_raw) + [x2_raw] 65 | 66 | # outputs 67 | sceneflows_f = [] 68 | sceneflows_b = [] 69 | disps_1 = [] 70 | disps_2 = [] 71 | 72 | for l, (x1, x2) in enumerate(zip(x1_pyramid, x2_pyramid)): 73 | 74 | # warping 75 | if l == 0: 76 | x2_warp = x2 77 | x1_warp = x1 78 | else: 79 | flow_f = interpolate2d_as(flow_f, x1, mode="bilinear") 80 | flow_b = interpolate2d_as(flow_b, x1, mode="bilinear") 81 | disp_l1 = interpolate2d_as(disp_l1, x1, mode="bilinear") 82 | disp_l2 = interpolate2d_as(disp_l2, x1, mode="bilinear") 83 | x1_out = self.upconv_layers[l-1](x1_out) 84 | x2_out = self.upconv_layers[l-1](x2_out) 85 | x2_warp = self.warping_layer_sf(x2, flow_f, disp_l1, k1, input_dict['aug_size']) # becuase K can be changing when doing augmentation 86 | x1_warp = self.warping_layer_sf(x1, flow_b, disp_l2, k2, input_dict['aug_size']) 87 | 88 | # correlation 89 | out_corr_f = Correlation.apply(x1, x2_warp, self.corr_params) 90 | out_corr_b = Correlation.apply(x2, x1_warp, self.corr_params) 91 | out_corr_relu_f = self.leakyRELU(out_corr_f) 92 | out_corr_relu_b = self.leakyRELU(out_corr_b) 93 | 94 | # monosf estimator 95 | if l == 0: 96 | x1_out, flow_f, disp_l1 = self.flow_estimators[l](torch.cat([out_corr_relu_f, x1], dim=1)) 97 | x2_out, flow_b, disp_l2 = self.flow_estimators[l](torch.cat([out_corr_relu_b, x2], dim=1)) 98 | else: 99 | x1_out, flow_f_res, disp_l1 = self.flow_estimators[l](torch.cat([out_corr_relu_f, x1, x1_out, flow_f, disp_l1], dim=1)) 100 | x2_out, flow_b_res, disp_l2 = self.flow_estimators[l](torch.cat([out_corr_relu_b, x2, x2_out, flow_b, disp_l2], dim=1)) 101 | flow_f = flow_f + flow_f_res 102 | flow_b = flow_b + flow_b_res 103 | 104 | # upsampling or post-processing 105 | if l != self.output_level: 106 | disp_l1 = self.sigmoid(disp_l1) * 0.3 107 | disp_l2 = self.sigmoid(disp_l2) * 0.3 108 | sceneflows_f.append(flow_f) 109 | sceneflows_b.append(flow_b) 110 | disps_1.append(disp_l1) 111 | disps_2.append(disp_l2) 112 | else: 113 | flow_res_f, disp_l1 = self.context_networks(torch.cat([x1_out, flow_f, disp_l1], dim=1)) 114 | flow_res_b, disp_l2 = self.context_networks(torch.cat([x2_out, flow_b, disp_l2], dim=1)) 115 | flow_f = flow_f + flow_res_f 116 | flow_b = flow_b + flow_res_b 117 | sceneflows_f.append(flow_f) 118 | sceneflows_b.append(flow_b) 119 | disps_1.append(disp_l1) 120 | disps_2.append(disp_l2) 121 | break 122 | 123 | x1_rev = x1_pyramid[::-1] 124 | 125 | output_dict['flow_f'] = upsample_outputs_as(sceneflows_f[::-1], x1_rev) 126 | output_dict['flow_b'] = upsample_outputs_as(sceneflows_b[::-1], x1_rev) 127 | output_dict['disp_l1'] = upsample_outputs_as(disps_1[::-1], x1_rev) 128 | output_dict['disp_l2'] = upsample_outputs_as(disps_2[::-1], x1_rev) 129 | 130 | return output_dict 131 | 132 | 133 | def forward(self, input_dict): 134 | 135 | output_dict = {} 136 | 137 | ## Left 138 | output_dict = self.run_pwc(input_dict, input_dict['input_l1_aug'], input_dict['input_l2_aug'], input_dict['input_k_l1_aug'], input_dict['input_k_l2_aug']) 139 | 140 | ## Right 141 | ## ss: train val 142 | ## ft: train 143 | if self.training or (not self._args.finetuning and not self._args.evaluation): 144 | input_r1_flip = torch.flip(input_dict['input_r1_aug'], [3]) 145 | input_r2_flip = torch.flip(input_dict['input_r2_aug'], [3]) 146 | k_r1_flip = input_dict["input_k_r1_flip_aug"] 147 | k_r2_flip = input_dict["input_k_r2_flip_aug"] 148 | 149 | output_dict_r = self.run_pwc(input_dict, input_r1_flip, input_r2_flip, k_r1_flip, k_r2_flip) 150 | 151 | for ii in range(0, len(output_dict_r['flow_f'])): 152 | output_dict_r['flow_f'][ii] = flow_horizontal_flip(output_dict_r['flow_f'][ii]) 153 | output_dict_r['flow_b'][ii] = flow_horizontal_flip(output_dict_r['flow_b'][ii]) 154 | output_dict_r['disp_l1'][ii] = torch.flip(output_dict_r['disp_l1'][ii], [3]) 155 | output_dict_r['disp_l2'][ii] = torch.flip(output_dict_r['disp_l2'][ii], [3]) 156 | 157 | output_dict['output_dict_r'] = output_dict_r 158 | 159 | ## Post Processing 160 | ## ss: eval 161 | ## ft: train val eval 162 | if self._args.evaluation or self._args.finetuning: 163 | 164 | input_l1_flip = torch.flip(input_dict['input_l1_aug'], [3]) 165 | input_l2_flip = torch.flip(input_dict['input_l2_aug'], [3]) 166 | k_l1_flip = input_dict["input_k_l1_flip_aug"] 167 | k_l2_flip = input_dict["input_k_l2_flip_aug"] 168 | 169 | output_dict_flip = self.run_pwc(input_dict, input_l1_flip, input_l2_flip, k_l1_flip, k_l2_flip) 170 | 171 | flow_f_pp = [] 172 | flow_b_pp = [] 173 | disp_l1_pp = [] 174 | disp_l2_pp = [] 175 | 176 | for ii in range(0, len(output_dict_flip['flow_f'])): 177 | 178 | flow_f_pp.append(post_processing(output_dict['flow_f'][ii], flow_horizontal_flip(output_dict_flip['flow_f'][ii]))) 179 | flow_b_pp.append(post_processing(output_dict['flow_b'][ii], flow_horizontal_flip(output_dict_flip['flow_b'][ii]))) 180 | disp_l1_pp.append(post_processing(output_dict['disp_l1'][ii], torch.flip(output_dict_flip['disp_l1'][ii], [3]))) 181 | disp_l2_pp.append(post_processing(output_dict['disp_l2'][ii], torch.flip(output_dict_flip['disp_l2'][ii], [3]))) 182 | 183 | output_dict['flow_f_pp'] = flow_f_pp 184 | output_dict['flow_b_pp'] = flow_b_pp 185 | output_dict['disp_l1_pp'] = disp_l1_pp 186 | output_dict['disp_l2_pp'] = disp_l2_pp 187 | 188 | return output_dict 189 | -------------------------------------------------------------------------------- /models/modules_camconv.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as tf 6 | 7 | 8 | class CamConvModule(nn.Module): 9 | def __init__(self, norm_const=256.0): 10 | super(CamConvModule, self).__init__() 11 | 12 | self._inputimg_size = None 13 | self._norm_const = norm_const 14 | 15 | self._fx = None 16 | self._fy = None 17 | self._cx = None 18 | self._cy = None 19 | 20 | self._grid_w = None 21 | self._grid_h = None 22 | self._norm_coord = None 23 | self._centered_coord = None 24 | self._fov_maps = None 25 | 26 | # Unsqueeze and Expand as 27 | def ue_as(self, input_tensor, target_as): 28 | return input_tensor.unsqueeze(1).unsqueeze(1).unsqueeze(1).expand(target_as.size()).clone() 29 | 30 | def interpolate2d(self, inputs, h, w, mode="bilinear"): 31 | return tf.interpolate(inputs, [h, w], mode=mode, align_corners=True) 32 | 33 | def calculate_CoordConv(self, x): 34 | 35 | grid_w = torch.linspace(0, x.size(3) - 1, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3)) 36 | grid_h = torch.linspace(0, x.size(2) - 1, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3)) 37 | self._grid_w = grid_w.float().requires_grad_(False).cuda() 38 | self._grid_h = grid_h.float().requires_grad_(False).cuda() 39 | norm_grid_w = self._grid_w / (x.size(3) - 1) * 2 - 1 40 | norm_grid_h = self._grid_h / (x.size(2) - 1) * 2 - 1 41 | self._norm_coord = torch.cat((norm_grid_w, norm_grid_h), dim=1) 42 | 43 | return None 44 | 45 | def calculate_CamConv(self): 46 | 47 | ## Centered coordinates 48 | centered_coord_w = self._grid_w - self.ue_as(self._cx, self._grid_w) + 0.5 49 | centered_coord_h = self._grid_h - self.ue_as(self._cy, self._grid_h) + 0.5 50 | self._centered_coord = torch.cat((centered_coord_w / self._norm_const, centered_coord_h / self._norm_const), dim=1) 51 | 52 | ## 3) FOV maps 53 | fov_xx_channel = torch.atan(centered_coord_w / self.ue_as(self._fx, self._grid_w)) 54 | fov_yy_channel = torch.atan(centered_coord_h / self.ue_as(self._fy, self._grid_h)) 55 | self._fov_maps = torch.cat((fov_xx_channel, fov_yy_channel), dim=1) 56 | 57 | return None 58 | 59 | def initialize(self, intrinsic, input_img): 60 | 61 | self._fx = intrinsic[:, 0, 0] 62 | self._fy = intrinsic[:, 1, 1] 63 | self._cx = intrinsic[:, 0, 2] 64 | self._cy = intrinsic[:, 1, 2] 65 | self.calculate_CoordConv(input_img) 66 | self.calculate_CamConv() 67 | 68 | return None 69 | 70 | def forward(self, input_tensor, input_img=None, intrinsic=None): 71 | 72 | if input_img is not None: 73 | self.initialize(intrinsic, input_img) 74 | 75 | _, _, hh_t, ww_t = input_tensor.size() 76 | cam_conv_tensor = torch.cat((self._norm_coord, self._centered_coord, self._fov_maps), dim=1) 77 | cam_conv_tensor = self.interpolate2d(cam_conv_tensor, hh_t, ww_t, mode="bilinear") 78 | 79 | 80 | return torch.cat((cam_conv_tensor.detach_(), input_tensor), dim=1) 81 | -------------------------------------------------------------------------------- /models/modules_sceneflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as tf 6 | import logging 7 | 8 | from utils.interpolation import interpolate2d_as 9 | from utils.sceneflow_util import pixel2pts_ms, pts2pixel_ms 10 | 11 | def get_grid(x): 12 | grid_H = torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3)) 13 | grid_V = torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3)) 14 | grid = torch.cat([grid_H, grid_V], 1) 15 | grids_cuda = grid.float().requires_grad_(False).cuda() 16 | return grids_cuda 17 | 18 | 19 | class WarpingLayer_Flow(nn.Module): 20 | def __init__(self): 21 | super(WarpingLayer_Flow, self).__init__() 22 | 23 | def forward(self, x, flow): 24 | flo_list = [] 25 | flo_w = flow[:, 0] * 2 / max(x.size(3) - 1, 1) 26 | flo_h = flow[:, 1] * 2 / max(x.size(2) - 1, 1) 27 | flo_list.append(flo_w) 28 | flo_list.append(flo_h) 29 | flow_for_grid = torch.stack(flo_list).transpose(0, 1) 30 | grid = torch.add(get_grid(x), flow_for_grid).transpose(1, 2).transpose(2, 3) 31 | x_warp = tf.grid_sample(x, grid) 32 | 33 | mask = torch.ones(x.size(), requires_grad=False).cuda() 34 | mask = tf.grid_sample(mask, grid) 35 | mask = (mask >= 1.0).float() 36 | 37 | return x_warp * mask 38 | 39 | 40 | class WarpingLayer_SF(nn.Module): 41 | def __init__(self): 42 | super(WarpingLayer_SF, self).__init__() 43 | 44 | def forward(self, x, sceneflow, disp, k1, input_size): 45 | 46 | _, _, h_x, w_x = x.size() 47 | disp = interpolate2d_as(disp, x) * w_x 48 | 49 | local_scale = torch.zeros_like(input_size) 50 | local_scale[:, 0] = h_x 51 | local_scale[:, 1] = w_x 52 | 53 | pts1, k1_scale = pixel2pts_ms(k1, disp, local_scale / input_size) 54 | _, _, coord1 = pts2pixel_ms(k1_scale, pts1, sceneflow, [h_x, w_x]) 55 | 56 | grid = coord1.transpose(1, 2).transpose(2, 3) 57 | x_warp = tf.grid_sample(x, grid) 58 | 59 | mask = torch.ones_like(x, requires_grad=False) 60 | mask = tf.grid_sample(mask, grid) 61 | mask = (mask >= 1.0).float() 62 | 63 | return x_warp * mask 64 | 65 | 66 | def initialize_msra(modules): 67 | logging.info("Initializing MSRA") 68 | for layer in modules: 69 | if isinstance(layer, nn.Conv2d): 70 | nn.init.kaiming_normal_(layer.weight) 71 | if layer.bias is not None: 72 | nn.init.constant_(layer.bias, 0) 73 | 74 | elif isinstance(layer, nn.ConvTranspose2d): 75 | nn.init.kaiming_normal_(layer.weight) 76 | if layer.bias is not None: 77 | nn.init.constant_(layer.bias, 0) 78 | 79 | elif isinstance(layer, nn.LeakyReLU): 80 | pass 81 | 82 | elif isinstance(layer, nn.Sequential): 83 | pass 84 | 85 | 86 | def upsample_outputs_as(input_list, ref_list): 87 | output_list = [] 88 | for ii in range(0, len(input_list)): 89 | output_list.append(interpolate2d_as(input_list[ii], ref_list[ii])) 90 | 91 | return output_list 92 | 93 | 94 | def conv(in_planes, out_planes, kernel_size=3, stride=1, dilation=1, isReLU=True): 95 | if isReLU: 96 | return nn.Sequential( 97 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation, 98 | padding=((kernel_size - 1) * dilation) // 2, bias=True), 99 | nn.LeakyReLU(0.1, inplace=True) 100 | ) 101 | else: 102 | return nn.Sequential( 103 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, dilation=dilation, 104 | padding=((kernel_size - 1) * dilation) // 2, bias=True) 105 | ) 106 | 107 | 108 | class upconv(nn.Module): 109 | def __init__(self, num_in_layers, num_out_layers, kernel_size, scale): 110 | super(upconv, self).__init__() 111 | self.scale = scale 112 | self.conv1 = conv(num_in_layers, num_out_layers, kernel_size, 1) 113 | 114 | def forward(self, x): 115 | x = nn.functional.interpolate(x, scale_factor=self.scale, mode='nearest') 116 | return self.conv1(x) 117 | 118 | 119 | class FeatureExtractor(nn.Module): 120 | def __init__(self, num_chs): 121 | super(FeatureExtractor, self).__init__() 122 | self.num_chs = num_chs 123 | self.convs = nn.ModuleList() 124 | 125 | for l, (ch_in, ch_out) in enumerate(zip(num_chs[:-1], num_chs[1:])): 126 | layer = nn.Sequential( 127 | conv(ch_in, ch_out, stride=2), 128 | conv(ch_out, ch_out) 129 | ) 130 | self.convs.append(layer) 131 | 132 | def forward(self, x): 133 | feature_pyramid = [] 134 | for conv in self.convs: 135 | x = conv(x) 136 | feature_pyramid.append(x) 137 | 138 | return feature_pyramid[::-1] 139 | 140 | 141 | class MonoSceneFlowDecoder(nn.Module): 142 | def __init__(self, ch_in): 143 | super(MonoSceneFlowDecoder, self).__init__() 144 | 145 | self.convs = nn.Sequential( 146 | conv(ch_in, 128), 147 | conv(128, 128), 148 | conv(128, 96), 149 | conv(96, 64), 150 | conv(64, 32) 151 | ) 152 | self.conv_sf = conv(32, 3, isReLU=False) 153 | self.conv_d1 = conv(32, 1, isReLU=False) 154 | 155 | def forward(self, x): 156 | x_out = self.convs(x) 157 | sf = self.conv_sf(x_out) 158 | disp1 = self.conv_d1(x_out) 159 | 160 | return x_out, sf, disp1 161 | 162 | 163 | class ContextNetwork(nn.Module): 164 | def __init__(self, ch_in): 165 | super(ContextNetwork, self).__init__() 166 | 167 | self.convs = nn.Sequential( 168 | conv(ch_in, 128, 3, 1, 1), 169 | conv(128, 128, 3, 1, 2), 170 | conv(128, 128, 3, 1, 4), 171 | conv(128, 96, 3, 1, 8), 172 | conv(96, 64, 3, 1, 16), 173 | conv(64, 32, 3, 1, 1) 174 | ) 175 | self.conv_sf = conv(32, 3, isReLU=False) 176 | self.conv_d1 = nn.Sequential( 177 | conv(32, 1, isReLU=False), 178 | torch.nn.Sigmoid() 179 | ) 180 | 181 | def forward(self, x): 182 | 183 | x_out = self.convs(x) 184 | sf = self.conv_sf(x_out) 185 | disp1 = self.conv_d1(x_out) * 0.3 186 | 187 | return sf, disp1 188 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Self-Supervised Monocular Scene Flow Estimation 2 | 3 | 4 | 5 | > 3D visualization of estimated depth and scene flow from two temporally consecutive images. 6 | > Intermediate frames are interpolated using the estimated scene flow. (fine-tuned model, tested on KITTI Benchmark) 7 | 8 | This repository is the official PyTorch implementation of the paper: 9 | 10 |    [**Self-Supervised Monocular Scene Flow Estimation**](http://openaccess.thecvf.com/content_CVPR_2020/papers/Hur_Self-Supervised_Monocular_Scene_Flow_Estimation_CVPR_2020_paper.pdf) 11 |    [Junhwa Hur](https://hurjunhwa.github.io) and [Stefan Roth](https://www.visinf.tu-darmstadt.de/visinf/team_members/sroth/sroth.en.jsp) 12 |    *CVPR*, 2020 (**Oral Presentation**) 13 |    [Paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Hur_Self-Supervised_Monocular_Scene_Flow_Estimation_CVPR_2020_paper.pdf) / [Supplemental](http://openaccess.thecvf.com/content_CVPR_2020/supplemental/Hur_Self-Supervised_Monocular_Scene_CVPR_2020_supplemental.pdf) / [Arxiv](https://arxiv.org/abs/2004.04143) 14 | 15 | - Contact: junhwa.hur[at]gmail.com 16 | 17 | ## Getting started 18 | This code has been developed with Anaconda (Python 3.7), **PyTorch 1.2.0** and CUDA 10.0 on Ubuntu 16.04. 19 | Based on a fresh [Anaconda](https://www.anaconda.com/download/) distribution and [PyTorch](https://pytorch.org/) installation, following packages need to be installed: 20 | 21 | ```Shell 22 | conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0 -c pytorch 23 | pip install tensorboard 24 | pip install pypng==0.0.18 25 | pip install colorama 26 | pip install scikit-image 27 | pip install pytz 28 | pip install tqdm==4.30.0 29 | pip install future 30 | ``` 31 | 32 | Then, please excute the following to install the Correlation and Forward Warping layer: 33 | ```Shell 34 | ./install_modules.sh 35 | ``` 36 | 37 | **For PyTorch version > 1.3** 38 | Please put the **`align_corners=True`** flag in the `grid_sample` function in the following files: 39 | ``` 40 | augmentations.py 41 | losses.py 42 | models/modules_sceneflow.py 43 | utils/sceneflow_util.py 44 | ``` 45 | 46 | 47 | ## Dataset 48 | 49 | Please download the following to datasets for the experiment: 50 | - [KITTI Raw Data](http://www.cvlibs.net/datasets/kitti/raw_data.php) (synced+rectified data, please refer [MonoDepth2](https://github.com/nianticlabs/monodepth2#-kitti-training-data) for downloading all data more easily) 51 | - [KITTI Scene Flow 2015](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow) 52 | 53 | To save space, we also convert the *KITTI Raw* **png** images to **jpeg**, following the convention from [MonoDepth](https://github.com/mrharicot/monodepth): 54 | ``` 55 | find (data_folder)/ -name '*.png' | parallel 'convert {.}.png {.}.jpg && rm {}' 56 | ``` 57 | We also converted images in *KITTI Scene Flow 2015* as well. Please convert the png images in `image_2` and `image_3` into jpg and save them into the seperate folder **`image_2_jpg`** and **`image_3_jpg`**. 58 | 59 | To save space further, you can delete the velodyne point data in KITTI raw data and optionally download the [*Eigen Split Projected Depth*](https://drive.google.com/file/d/1a97lgOgrChkLxi_nvRpmbsKspveQ6EyD/view?usp=sharing) for the monocular depth evaluation on the Eigen Split. We converted the velodyne point data of the Eigen Test images in the numpy array format using code from [MonoDepth](https://github.com/mrharicot/monodepth). After downloading and unzipping it, you can merge with the KITTI raw data folder. 60 | - [Eigen Split Projected Depth](https://drive.google.com/file/d/1a97lgOgrChkLxi_nvRpmbsKspveQ6EyD/view?usp=sharing) 61 | 62 | ## Training and Inference 63 | The **[scripts](scripts/)** folder contains training\/inference scripts of all experiments demonstrated in the paper (including ablation study). 64 | 65 | **For training**, you can simply run the following script files: 66 | 67 | | Script | Training | Dataset | 68 | |----------------------------------------------|----------------------------|------------------------| 69 | | `./train_monosf_selfsup_kitti_raw.sh` | Self-supervised | KITTI Split | 70 | | `./train_monosf_selfsup_eigen_train.sh` | Self-supervised | Eigen Split | 71 | 72 | 73 | **Fine-tuning** is done in two stages: *(i)* first finding the stopping point using train\/valid split, and then *(ii)* fune-tuning using all data with the found iteration steps. 74 | | Script | Training | Dataset | 75 | |----------------------------------------------|----------------------------|------------------------| 76 | | `./train_monosf_kitti_finetune_1st_stage.sh` | Semi-supervised finetuning | KITTI raw + KITTI 2015 | 77 | | `./train_monosf_kitti_finetune_2st_stage.sh` | Semi-supervised finetuning | KITTI raw + KITTI 2015 | 78 | 79 | In the script files, please configure these following PATHs for experiments: 80 | - `EXPERIMENTS_HOME` : your own experiment directory where checkpoints and log files will be saved. 81 | - `KITTI_RAW_HOME` : the directory where *KITTI raw data* is located in your local system. 82 | - `KITTI_HOME` : the directory where *KITTI Scene Flow 2015* is located in your local system. 83 | - `KITTI_COMB_HOME` : the directory where both *KITTI Scene Flow 2015* and *KITTI raw data* are located. 84 | 85 | 86 | **For testing the pretrained models**, you can simply run the following script files: 87 | 88 | | Script | Task | Training | Dataset | 89 | |-------------------------------------------|---------------|-----------------|------------------| 90 | | `./eval_monosf_selfsup_kitti_train.sh` | MonoSceneFlow | Self-supervised | KITTI 2015 Train | 91 | | `./eval_monosf_selfsup_kitti_test.sh` | MonoSceneFlow | Self-supervised | KITTI 2015 Test | 92 | | `./eval_monosf_finetune_kitti_test.sh` | MonoSceneFlow | fine-tuned | KITTI 2015 Test | 93 | | `./eval_monodepth_selfsup_kitti_train.sh` | MonoDepth | Self-supervised | KITTI test split | 94 | | `./eval_monodepth_selfsup_eigen_test.sh` | MonoDepth | Self-supervised | Eigen test split | 95 | 96 | - Testing on *KITTI 2015 Test* gives output images for uploading on the [KITTI Scene Flow 2015 Benchmark](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php). 97 | - To save output image, please turn on `--save_disp=True`, `--save_disp2=True`, and `--save_flow=True` in the script. 98 | 99 | ## Pretrained Models 100 | 101 | The **[checkpoints](checkpoints/)** folder contains the checkpoints of the pretrained models. 102 | Pretrained models from the ablation study can be downloaded here: [download link](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/models/checkpoints_ablation_study.zip) 103 | 104 | 105 | ## Outputs and Visualization 106 | 107 | Ouput images and visualization of the main experiments can be downloaded here: 108 | - [Self-supervised, tested on KITTI 2015 Train](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/results/self_supervised_KITTI_train.zip) 109 | - [Self-supervised, tested on Eigen Test](https://download.visinf.tu-darmstadt.de/data/2020-cvpr-hur-self-mono-sf/results/self_supervised_Eigen_test.zip) 110 | - [Fined-tuned, tested on KITTI 2015 Train](https://drive.google.com/file/d/1JLCWT5-Ase8VkOkA9PWpkee7K0qpgm64/view?usp=sharing) 111 | 112 | 113 | ## Acknowledgement 114 | 115 | Please cite our paper if you use our source code. 116 | 117 | ```bibtex 118 | @inproceedings{Hur:2020:SSM, 119 | Author = {Junhwa Hur and Stefan Roth}, 120 | Booktitle = {CVPR}, 121 | Title = {Self-Supervised Monocular Scene Flow Estimation}, 122 | Year = {2020} 123 | } 124 | ``` 125 | 126 | - Portions of the source code (e.g., training pipeline, runtime, argument parser, and logger) are from [Jochen Gast](https://scholar.google.com/citations?user=tmRcFacAAAAJ&hl=en) 127 | - MonoDepth evaluation utils from [MonoDepth](https://github.com/mrharicot/monodepth) 128 | - MonoDepth PyTorch Implementation from [OniroAI / MonoDepth-PyTorch](https://github.com/OniroAI/MonoDepth-PyTorch) 129 | 130 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=main 5 | absl-py=0.9.0=pypi_0 6 | blas=1.0=mkl 7 | ca-certificates=2020.6.20=hecda079_0 8 | cachetools=4.1.1=pypi_0 9 | certifi=2020.6.20=py37hc8dfbb8_0 10 | cffi=1.14.1=py37he30daa8_0 11 | chardet=3.0.4=pypi_0 12 | cloudpickle=1.5.0=py_0 13 | colorama=0.4.3=py_0 14 | correlation-cuda=0.0.0=pypi_0 15 | cudatoolkit=10.0.130=0 16 | cycler=0.10.0=py_2 17 | cytoolz=0.10.1=py37h516909a_0 18 | dask-core=2.22.0=py_0 19 | decorator=4.4.2=py_0 20 | forward-warp-cuda=0.0.0=pypi_0 21 | freetype=2.10.2=h5ab3b9f_0 22 | future=0.18.2=py37hc8dfbb8_1 23 | google-auth=1.20.1=pypi_0 24 | google-auth-oauthlib=0.4.1=pypi_0 25 | grpcio=1.31.0=pypi_0 26 | idna=2.10=pypi_0 27 | imagecodecs-lite=2019.12.3=py37h03ebfcd_1 28 | imageio=2.9.0=py_0 29 | importlib-metadata=1.7.0=pypi_0 30 | intel-openmp=2020.1=217 31 | jpeg=9b=h024ee3a_2 32 | kiwisolver=1.2.0=py37h99015e2_0 33 | lcms2=2.11=h396b838_0 34 | ld_impl_linux-64=2.33.1=h53a641e_7 35 | libedit=3.1.20191231=h14c3975_1 36 | libffi=3.3=he6710b0_2 37 | libgcc-ng=9.1.0=hdf63c60_0 38 | libgfortran-ng=7.5.0=hdf63c60_14 39 | libpng=1.6.37=hbc83047_0 40 | libstdcxx-ng=9.1.0=hdf63c60_0 41 | libtiff=4.1.0=h2733197_1 42 | lz4-c=1.9.2=he6710b0_1 43 | markdown=3.2.2=pypi_0 44 | matplotlib-base=3.3.0=py37hd478181_1 45 | mkl=2020.1=217 46 | mkl-service=2.3.0=py37he904b0f_0 47 | mkl_fft=1.1.0=py37h23d657b_0 48 | mkl_random=1.1.1=py37h0573a6f_0 49 | ncurses=6.2=he6710b0_1 50 | networkx=2.4=py_1 51 | ninja=1.10.0=py37hfd86e86_0 52 | numpy=1.19.1=py37hbc911f0_0 53 | numpy-base=1.19.1=py37hfa32c7d_0 54 | oauthlib=3.1.0=pypi_0 55 | olefile=0.46=py37_0 56 | openssl=1.1.1g=h516909a_1 57 | pillow=7.2.0=py37hb39fc2d_0 58 | pip=20.2.2=py37_0 59 | protobuf=3.12.4=pypi_0 60 | pyasn1=0.4.8=pypi_0 61 | pyasn1-modules=0.2.8=pypi_0 62 | pycparser=2.20=py_2 63 | pyparsing=2.4.7=pyh9f0ad1d_0 64 | pypng=0.0.18=pypi_0 65 | python=3.7.7=hcff3b4d_5 66 | python-dateutil=2.8.1=py_0 67 | python_abi=3.7=1_cp37m 68 | pytorch=1.2.0=py3.7_cuda10.0.130_cudnn7.6.2_0 69 | pytz=2020.1=pyh9f0ad1d_0 70 | pywavelets=1.1.1=py37h03ebfcd_1 71 | pyyaml=5.3.1=py37h8f50634_0 72 | readline=8.0=h7b6447c_0 73 | requests=2.24.0=pypi_0 74 | requests-oauthlib=1.3.0=pypi_0 75 | rsa=4.6=pypi_0 76 | scikit-image=0.17.2=py37h0da4684_1 77 | scipy=1.5.0=py37h0b6359f_0 78 | setuptools=49.4.0=py37_0 79 | six=1.15.0=py_0 80 | sqlite=3.32.3=h62c20be_0 81 | tensorboard=2.3.0=pypi_0 82 | tensorboard-plugin-wit=1.7.0=pypi_0 83 | tifffile=2020.6.3=py_0 84 | tk=8.6.10=hbc83047_0 85 | toolz=0.10.0=py_0 86 | torchvision=0.4.0=py37_cu100 87 | tornado=6.0.4=py37h8f50634_1 88 | tqdm=4.40.0=py_0 89 | urllib3=1.25.10=pypi_0 90 | werkzeug=1.0.1=pypi_0 91 | wheel=0.34.2=py37_0 92 | xz=5.2.5=h7b6447c_0 93 | yaml=0.2.5=h516909a_0 94 | zipp=3.1.0=pypi_0 95 | zlib=1.2.11=h7b6447c_3 96 | zstd=1.4.5=h9ceee32_0 97 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monodepth_aug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_aug.ckpt" 6 | 7 | # model 8 | MODEL=MonoDepth_Baseline 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth 11 | Valid_Loss_Function=Eval_MonoDepth 12 | 13 | # training configuration 14 | SAVE_PATH="eval/abl1_depth/aug" 15 | python ../../main.py \ 16 | --batch_size=1 \ 17 | --batch_size_val=1 \ 18 | --checkpoint=$CHECKPOINT \ 19 | --model=$MODEL \ 20 | --evaluation=True \ 21 | --num_workers=4 \ 22 | --save=$SAVE_PATH \ 23 | --start_epoch=1 \ 24 | --validation_dataset=$Valid_Dataset \ 25 | --validation_dataset_root=$KITTI_HOME \ 26 | --validation_loss=$Valid_Loss_Function \ 27 | --validation_key=ab_r \ 28 | # --save_disp=True 29 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monodepth_basic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_basic.ckpt" 6 | 7 | # model 8 | MODEL=MonoDepth_Baseline 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth 11 | Valid_Loss_Function=Eval_MonoDepth 12 | 13 | # training configuration 14 | SAVE_PATH="eval/abl1_depth/basic" 15 | python ../../main.py \ 16 | --batch_size=1 \ 17 | --batch_size_val=1 \ 18 | --checkpoint=$CHECKPOINT \ 19 | --model=$MODEL \ 20 | --evaluation=True \ 21 | --num_workers=4 \ 22 | --save=$SAVE_PATH \ 23 | --start_epoch=1 \ 24 | --validation_dataset=$Valid_Dataset \ 25 | --validation_dataset_root=$KITTI_HOME \ 26 | --validation_loss=$Valid_Loss_Function \ 27 | --validation_key=ab_r \ 28 | # --save_disp=True 29 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monodepth_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_cc.ckpt" 6 | 7 | # model 8 | MODEL=MonoDepth_CamConv 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth 11 | Valid_Loss_Function=Eval_MonoDepth 12 | 13 | # training configuration 14 | SAVE_PATH="eval/abl1_depth/cc" 15 | python ../../main.py \ 16 | --batch_size=1 \ 17 | --batch_size_val=1 \ 18 | --checkpoint=$CHECKPOINT \ 19 | --model=$MODEL \ 20 | --evaluation=True \ 21 | --num_workers=4 \ 22 | --save=$SAVE_PATH \ 23 | --start_epoch=1 \ 24 | --validation_dataset=$Valid_Dataset \ 25 | --validation_dataset_root=$KITTI_HOME \ 26 | --validation_loss=$Valid_Loss_Function \ 27 | --validation_key=ab_r \ 28 | # --save_disp=True 29 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monodepth_cc_aug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monodepth_augmentation/checkpoint_cc_aug.ckpt" 6 | 7 | # model 8 | MODEL=MonoDepth_CamConv 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_monodepth 11 | Valid_Loss_Function=Eval_MonoDepth 12 | 13 | # training configuration 14 | SAVE_PATH="eval/abl1_depth/cc_aug" 15 | python ../../main.py \ 16 | --batch_size=1 \ 17 | --batch_size_val=1 \ 18 | --checkpoint=$CHECKPOINT \ 19 | --model=$MODEL \ 20 | --evaluation=True \ 21 | --num_workers=4 \ 22 | --save=$SAVE_PATH \ 23 | --start_epoch=1 \ 24 | --validation_dataset=$Valid_Dataset \ 25 | --validation_dataset_root=$KITTI_HOME \ 26 | --validation_loss=$Valid_Loss_Function \ 27 | --validation_key=ab_r \ 28 | # --save_disp=True 29 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monosf_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_base.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl1_sf/base" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monosf_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_cc.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_CamConv 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl1_sf/cc" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_eval_monosf_cc_aug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl1_monosf_augmentation/checkpoint_cc_aug.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_CamConv 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl1_sf/cc_aug" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monodepth_kitti.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoDepth_Baseline 9 | 10 | # save path 11 | ALIAS="-noAug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth 18 | Train_Augmentation=Augmentation_MonoDepthBaseline 19 | Train_Loss_Function=Loss_MonoDepth 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth 22 | Valid_Loss_Function=Loss_MonoDepth 23 | 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[30, 40]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=1e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=50 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_preprocessing_crop=False \ 44 | --training_dataset_num_examples=-1 \ 45 | --training_key=total_loss \ 46 | --training_loss=$Train_Loss_Function \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_RAW_HOME \ 49 | --validation_dataset_preprocessing_crop=False \ 50 | --validation_key=total_loss \ 51 | --validation_loss=$Valid_Loss_Function 52 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_aug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoDepth_Baseline 9 | 10 | # save path 11 | ALIAS="-Aug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth 18 | Train_Augmentation=Augmentation_MonoDepth 19 | Train_Loss_Function=Loss_MonoDepth 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth 22 | Valid_Loss_Function=Loss_MonoDepth 23 | 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[30, 40]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=1e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=50 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_preprocessing_crop=True \ 44 | --training_dataset_num_examples=-1 \ 45 | --training_key=total_loss \ 46 | --training_loss=$Train_Loss_Function \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_RAW_HOME \ 49 | --validation_dataset_preprocessing_crop=False \ 50 | --validation_key=total_loss \ 51 | --validation_loss=$Valid_Loss_Function 52 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_aug_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoDepth_CamConv 9 | 10 | # save path 11 | ALIAS="-Aug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth 18 | Train_Augmentation=Augmentation_MonoDepth 19 | Train_Loss_Function=Loss_MonoDepth 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth 22 | Valid_Loss_Function=Loss_MonoDepth 23 | 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[30, 40]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=1e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=50 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_preprocessing_crop=True \ 44 | --training_dataset_num_examples=-1 \ 45 | --training_key=total_loss \ 46 | --training_loss=$Train_Loss_Function \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_RAW_HOME \ 49 | --validation_dataset_preprocessing_crop=False \ 50 | --validation_key=total_loss \ 51 | --validation_loss=$Valid_Loss_Function 52 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monodepth_kitti_cc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoDepth_CamConv 9 | 10 | # save path 11 | ALIAS="-noAug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_monodepth 18 | Train_Augmentation=Augmentation_MonoDepthBaseline 19 | Train_Loss_Function=Loss_MonoDepth 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_monodepth 22 | Valid_Loss_Function=Loss_MonoDepth 23 | 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[30, 40]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=1e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=50 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_preprocessing_crop=False \ 44 | --training_dataset_num_examples=-1 \ 45 | --training_key=total_loss \ 46 | --training_loss=$Train_Loss_Function \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_RAW_HOME \ 49 | --validation_dataset_preprocessing_crop=False \ 50 | --validation_key=total_loss \ 51 | --validation_loss=$Valid_Loss_Function 52 | -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monosf_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-noAug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_Resize_Only 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monosf_camconv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_CamConv 9 | 10 | # save path 11 | ALIAS="-noAug-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_Resize_Only 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation1_augmentation/ablation1_train_monosf_camconv_aug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_CamConv 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_eval_monosf_loss_basic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_basic.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl2/basic" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_eval_monosf_loss_noOcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_wo_occ.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl2/noOcc" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_eval_monosf_loss_noPts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl2_monosf_loss/checkpoint_wo_3dpts.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl2/noPts" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_train_monosf_loss_basic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-loss_ablation_noPts_noOcc-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoPtsNoOcc 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_train_monosf_loss_noOcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-loss_ablation_noOcc-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoOcc 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation2_loss/ablation2_train_monosf_loss_noPts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-loss_ablation_noPts-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup_NoPts 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_disp_only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_only_disp.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_DispOnly 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_Disp_Only 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/disp_only" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=d1 \ 31 | #--save_disp=True \ 32 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_flow_only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_only_flow.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_FlowOnly 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_Flow_Only 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/flow_only" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=f1 \ 31 | #--save_flow=True 32 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_cont.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_cont.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Cont 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/cont" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last1.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last1 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/last1" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last2.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last2 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/last2" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last3.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last3 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/last3" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last4.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last4 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/last4" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_eval_monosf_splitting_last5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/abl3_monosf_decoder_split/checkpoint_split_last5.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last5 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/abl3/last5" 16 | python ../../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_disp_only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_DispOnly 9 | 10 | # save path 11 | ALIAS="-disp-only-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_Disp_Only 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_Disp_Only 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_flow_only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_FlowOnly 9 | 10 | # save path 11 | ALIAS="-flow-only-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_Flow_Only 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_Flow_Only 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_cont.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Cont 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last1 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last2 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last3 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last4 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/ablation3_decoder_split/ablation3_train_monosf_splitting_last5.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_Split_Last5 9 | 10 | # save path 11 | ALIAS="-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ 54 | -------------------------------------------------------------------------------- /scripts/eval_monodepth_selfsup_eigen_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_RAW_HOME="" 5 | CHECKPOINT="checkpoints/full_model_eigen/checkpoint_eigen_split.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_Eigen_Test 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_MonoDepth_Eigen 13 | 14 | # training configuration 15 | SAVE_PATH="eval/monod_selfsup_eigen_test" 16 | python ../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_root=$KITTI_RAW_HOME \ 28 | --validation_loss=$Valid_Loss_Function \ 29 | --validation_key=ab_r \ 30 | #--save_disp=True \ -------------------------------------------------------------------------------- /scripts/eval_monodepth_selfsup_kitti_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_Disp_Only 13 | 14 | # training configuration 15 | SAVE_PATH="eval/monod_selfsup_kitti_train" 16 | python ../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=ab \ 31 | #--save_disp=True \ -------------------------------------------------------------------------------- /scripts/eval_monosf_finetune_kitti_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/full_model_kitti_ft/checkpoint_kitti_ft.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Test 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Test 13 | 14 | # training configuration 15 | SAVE_PATH="eval/monosf_ft_kitti_test" 16 | python ../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_root=$KITTI_HOME \ 28 | --validation_loss=$Valid_Loss_Function \ 29 | --validation_key=sf \ 30 | # --save_disp=True \ 31 | # --save_disp2=True \ 32 | # --save_flow=True 33 | -------------------------------------------------------------------------------- /scripts/eval_monosf_selfsup_kitti_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Test 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Test 13 | 14 | # training configuration 15 | SAVE_PATH="eval/monosf_selfsup_kitti_test" 16 | python ../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_root=$KITTI_HOME \ 28 | --validation_loss=$Valid_Loss_Function \ 29 | --validation_key=sf \ 30 | # --save_disp=True \ 31 | # --save_disp2=True \ 32 | # --save_flow=True 33 | -------------------------------------------------------------------------------- /scripts/eval_monosf_selfsup_kitti_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATASETS_HOME 4 | KITTI_HOME="" 5 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_kitti_split.ckpt" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | Valid_Dataset=KITTI_2015_Train_Full_mnsf 11 | Valid_Augmentation=Augmentation_Resize_Only 12 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 13 | 14 | # training configuration 15 | SAVE_PATH="eval/monosf_selfsup_kitti_train" 16 | python ../main.py \ 17 | --batch_size=1 \ 18 | --batch_size_val=1 \ 19 | --checkpoint=$CHECKPOINT \ 20 | --model=$MODEL \ 21 | --evaluation=True \ 22 | --num_workers=4 \ 23 | --save=$SAVE_PATH \ 24 | --start_epoch=1 \ 25 | --validation_augmentation=$Valid_Augmentation \ 26 | --validation_dataset=$Valid_Dataset \ 27 | --validation_dataset_preprocessing_crop=False \ 28 | --validation_dataset_root=$KITTI_HOME \ 29 | --validation_loss=$Valid_Loss_Function \ 30 | --validation_key=sf \ 31 | # --save_disp=True \ 32 | # --save_disp2=True \ 33 | # --save_flow=True 34 | -------------------------------------------------------------------------------- /scripts/train_monosf_kitti_finetune_1st_stage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # datasets 4 | KITTI_COMB_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-kitti_ft-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_latest.ckpt" 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Comb_Train 18 | Train_Augmentation=Augmentation_SceneFlow_Finetuning 19 | Train_Loss_Function=Loss_SceneFlow_SemiSupFinetune 20 | 21 | Valid_Dataset=KITTI_Comb_Val 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 24 | 25 | # training configuration 26 | python ../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --finetuning=True \ 30 | --checkpoint=$CHECKPOINT \ 31 | --lr_scheduler=MultiStepLR \ 32 | --lr_scheduler_gamma=0.5 \ 33 | --lr_scheduler_milestones="[125, 187, 250, 281, 312]" \ 34 | --model=$MODEL \ 35 | --num_workers=16 \ 36 | --optimizer=Adam \ 37 | --optimizer_lr=4e-5 \ 38 | --save=$SAVE_PATH \ 39 | --total_epochs=343 \ 40 | --training_augmentation=$Train_Augmentation \ 41 | --training_augmentation_photometric=True \ 42 | --training_dataset=$Train_Dataset \ 43 | --training_dataset_root=$KITTI_COMB_HOME \ 44 | --training_loss=$Train_Loss_Function \ 45 | --training_key=total_loss \ 46 | --validation_augmentation=$Valid_Augmentation \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_COMB_HOME \ 49 | --validation_key=sf \ 50 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/train_monosf_kitti_finetune_2nd_stage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # datasets 4 | KITTI_COMB_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-kitti_ft-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT="checkpoints/full_model_kitti/checkpoint_latest.ckpt" 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Comb_Full 18 | Train_Augmentation=Augmentation_SceneFlow_Finetuning 19 | Train_Loss_Function=Loss_SceneFlow_SemiSupFinetune 20 | 21 | Valid_Dataset=KITTI_Comb_Val 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Eval_SceneFlow_KITTI_Train 24 | 25 | # training configuration 26 | python ../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --finetuning=True \ 30 | --checkpoint=$CHECKPOINT \ 31 | --lr_scheduler=MultiStepLR \ 32 | --lr_scheduler_gamma=0.5 \ 33 | --lr_scheduler_milestones="[112, 162, 212, 237, 262]" \ 34 | --model=$MODEL \ 35 | --num_workers=16 \ 36 | --optimizer=Adam \ 37 | --optimizer_lr=4e-5 \ 38 | --save=$SAVE_PATH \ 39 | --total_epochs=157 \ 40 | --training_augmentation=$Train_Augmentation \ 41 | --training_augmentation_photometric=True \ 42 | --training_dataset=$Train_Dataset \ 43 | --training_dataset_root=$KITTI_COMB_HOME \ 44 | --training_loss=$Train_Loss_Function \ 45 | --training_key=total_loss \ 46 | --validation_augmentation=$Valid_Augmentation \ 47 | --validation_dataset=$Valid_Dataset \ 48 | --validation_dataset_root=$KITTI_COMB_HOME \ 49 | --validation_key=sf \ 50 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/train_monosf_selfsup_eigen_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-eigen-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_EigenSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_EigenSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /scripts/train_monosf_selfsup_kitti_raw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # experiments and datasets meta 4 | KITTI_RAW_HOME="" 5 | EXPERIMENTS_HOME="" 6 | 7 | # model 8 | MODEL=MonoSceneFlow_fullmodel 9 | 10 | # save path 11 | ALIAS="-kitti-" 12 | TIME=$(date +"%Y%m%d-%H%M%S") 13 | SAVE_PATH="$EXPERIMENTS_HOME/$MODEL$ALIAS$TIME" 14 | CHECKPOINT=None 15 | 16 | # Loss and Augmentation 17 | Train_Dataset=KITTI_Raw_KittiSplit_Train_mnsf 18 | Train_Augmentation=Augmentation_SceneFlow 19 | Train_Loss_Function=Loss_SceneFlow_SelfSup 20 | 21 | Valid_Dataset=KITTI_Raw_KittiSplit_Valid_mnsf 22 | Valid_Augmentation=Augmentation_Resize_Only 23 | Valid_Loss_Function=Loss_SceneFlow_SelfSup 24 | 25 | # training configuration 26 | python ../main.py \ 27 | --batch_size=4 \ 28 | --batch_size_val=1 \ 29 | --checkpoint=$CHECKPOINT \ 30 | --lr_scheduler=MultiStepLR \ 31 | --lr_scheduler_gamma=0.5 \ 32 | --lr_scheduler_milestones="[23, 39, 47, 54]" \ 33 | --model=$MODEL \ 34 | --num_workers=16 \ 35 | --optimizer=Adam \ 36 | --optimizer_lr=2e-4 \ 37 | --save=$SAVE_PATH \ 38 | --total_epochs=62 \ 39 | --training_augmentation=$Train_Augmentation \ 40 | --training_augmentation_photometric=True \ 41 | --training_dataset=$Train_Dataset \ 42 | --training_dataset_root=$KITTI_RAW_HOME \ 43 | --training_dataset_flip_augmentations=True \ 44 | --training_dataset_preprocessing_crop=True \ 45 | --training_dataset_num_examples=-1 \ 46 | --training_key=total_loss \ 47 | --training_loss=$Train_Loss_Function \ 48 | --validation_augmentation=$Valid_Augmentation \ 49 | --validation_dataset=$Valid_Dataset \ 50 | --validation_dataset_root=$KITTI_RAW_HOME \ 51 | --validation_dataset_preprocessing_crop=False \ 52 | --validation_key=total_loss \ 53 | --validation_loss=$Valid_Loss_Function \ -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visinf/self-mono-sf/eec356d95038da49e6705194e6dc0780b750f2b0/utils/__init__.py -------------------------------------------------------------------------------- /utils/flow.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | import png 5 | import matplotlib.colors as cl 6 | from skimage import io 7 | 8 | TAG_CHAR = np.array([202021.25], np.float32) 9 | UNKNOWN_FLOW_THRESH = 1e7 10 | 11 | 12 | def write_depth_png(filename, disp_map): 13 | 14 | io.imsave(filename, (disp_map * 256.0).astype(np.uint16)) 15 | 16 | 17 | def write_flow_png(filename, uv, v=None, mask=None): 18 | 19 | if v is None: 20 | assert (uv.ndim == 3) 21 | assert (uv.shape[2] == 2) 22 | u = uv[:, :, 0] 23 | v = uv[:, :, 1] 24 | else: 25 | u = uv 26 | 27 | assert (u.shape == v.shape) 28 | 29 | height_img, width_img = u.shape 30 | if mask is None: 31 | valid_mask = np.ones([height_img, width_img]) 32 | else: 33 | valid_mask = mask 34 | 35 | flow_u = np.clip((u * 64 + 2 ** 15), 0.0, 65535.0).astype(np.uint16) 36 | flow_v = np.clip((v * 64 + 2 ** 15), 0.0, 65535.0).astype(np.uint16) 37 | 38 | output = np.stack((flow_u, flow_v, valid_mask), axis=-1) 39 | 40 | with open(filename, 'wb') as f: 41 | writer = png.Writer(width=width_img, height=height_img, bitdepth=16) 42 | writer.write(f, np.reshape(output, (-1, width_img*3))) 43 | 44 | 45 | def compute_color(u, v): 46 | """ 47 | compute optical flow color map 48 | :param u: optical flow horizontal map 49 | :param v: optical flow vertical map 50 | :return: optical flow in color code 51 | """ 52 | [h, w] = u.shape 53 | img = np.zeros([h, w, 3]) 54 | nanIdx = np.isnan(u) | np.isnan(v) 55 | u[nanIdx] = 0 56 | v[nanIdx] = 0 57 | 58 | colorwheel = make_color_wheel() 59 | ncols = np.size(colorwheel, 0) 60 | 61 | rad = np.sqrt(u ** 2 + v ** 2) 62 | 63 | a = np.arctan2(-v, -u) / np.pi 64 | 65 | fk = (a + 1) / 2 * (ncols - 1) + 1 66 | 67 | k0 = np.floor(fk).astype(int) 68 | 69 | k1 = k0 + 1 70 | k1[k1 == ncols + 1] = 1 71 | f = fk - k0 72 | 73 | for i in range(0, np.size(colorwheel, 1)): 74 | tmp = colorwheel[:, i] 75 | col0 = tmp[k0 - 1] / 255 76 | col1 = tmp[k1 - 1] / 255 77 | col = (1 - f) * col0 + f * col1 78 | 79 | idx = rad <= 1 80 | col[idx] = 1 - rad[idx] * (1 - col[idx]) 81 | notidx = np.logical_not(idx) 82 | 83 | col[notidx] *= 0.75 84 | img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx))) 85 | 86 | return img 87 | 88 | 89 | def make_color_wheel(): 90 | """ 91 | Generate color wheel according Middlebury color code 92 | :return: Color wheel 93 | """ 94 | RY = 15 95 | YG = 6 96 | GC = 4 97 | CB = 11 98 | BM = 13 99 | MR = 6 100 | 101 | ncols = RY + YG + GC + CB + BM + MR 102 | 103 | colorwheel = np.zeros([ncols, 3]) 104 | 105 | col = 0 106 | 107 | # RY 108 | colorwheel[0:RY, 0] = 255 109 | colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY)) 110 | col += RY 111 | 112 | # YG 113 | colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG)) 114 | colorwheel[col:col + YG, 1] = 255 115 | col += YG 116 | 117 | # GC 118 | colorwheel[col:col + GC, 1] = 255 119 | colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC)) 120 | col += GC 121 | 122 | # CB 123 | colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB)) 124 | colorwheel[col:col + CB, 2] = 255 125 | col += CB 126 | 127 | # BM 128 | colorwheel[col:col + BM, 2] = 255 129 | colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM)) 130 | col += + BM 131 | 132 | # MR 133 | colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) 134 | colorwheel[col:col + MR, 0] = 255 135 | 136 | return colorwheel 137 | 138 | 139 | def flow_to_png_middlebury(flow): 140 | """ 141 | Convert flow into middlebury color code image 142 | :param flow: optical flow map 143 | :return: optical flow image in middlebury color 144 | """ 145 | 146 | flow = flow.transpose([1, 2, 0]) 147 | u = flow[:, :, 0] 148 | v = flow[:, :, 1] 149 | 150 | maxu = -999. 151 | maxv = -999. 152 | minu = 999. 153 | minv = 999. 154 | 155 | idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) 156 | u[idxUnknow] = 0 157 | v[idxUnknow] = 0 158 | 159 | maxu = max(maxu, np.max(u)) 160 | minu = min(minu, np.min(u)) 161 | 162 | maxv = max(maxv, np.max(v)) 163 | minv = min(minv, np.min(v)) 164 | 165 | rad = np.sqrt(u ** 2 + v ** 2) 166 | maxrad = max(-1, np.max(rad)) 167 | 168 | u = u / (maxrad + np.finfo(float).eps) 169 | v = v / (maxrad + np.finfo(float).eps) 170 | 171 | img = compute_color(u, v) 172 | 173 | idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) 174 | img[idx] = 0 175 | 176 | return np.uint8(img) 177 | 178 | 179 | def np_flow2rgb(flow_map, max_value=None): 180 | _, h, w = flow_map.shape 181 | # flow_map[:,(flow_map[0] == 0) & (flow_map[1] == 0)] = float('nan') 182 | # print np.any(np.isnan(flow_map)) 183 | rgb_map = np.ones((h, w, 3)).astype(np.float32) 184 | max_value = 80 185 | if max_value is not None: 186 | normalized_flow_map = flow_map / max_value 187 | else: 188 | normalized_flow_map = flow_map / (np.abs(flow_map).max()) 189 | 190 | rgb_map[:, :, 0] += normalized_flow_map[0] 191 | rgb_map[:, :, 1] -= 0.5 * (normalized_flow_map[0] + normalized_flow_map[1]) 192 | rgb_map[:, :, 2] += normalized_flow_map[1] 193 | return rgb_map.clip(0, 1) 194 | -------------------------------------------------------------------------------- /utils/interpolation.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from torch import nn 7 | import torch.nn.functional as tf 8 | 9 | 10 | def interpolate2d(inputs, size, mode="bilinear"): 11 | return tf.interpolate(inputs, size, mode=mode, align_corners=True) 12 | 13 | 14 | def interpolate2d_as(inputs, target_as, mode="bilinear"): 15 | _, _, h, w = target_as.size() 16 | return tf.interpolate(inputs, [h, w], mode=mode, align_corners=True) 17 | 18 | 19 | def _bchw2bhwc(tensor): 20 | return tensor.transpose(1,2).transpose(2,3) 21 | 22 | 23 | def _bhwc2bchw(tensor): 24 | return tensor.transpose(2,3).transpose(1,2) 25 | 26 | class Meshgrid(nn.Module): 27 | def __init__(self): 28 | super(Meshgrid, self).__init__() 29 | self.width = 0 30 | self.height = 0 31 | self.register_buffer("xx", torch.zeros(1,1)) 32 | self.register_buffer("yy", torch.zeros(1,1)) 33 | self.register_buffer("rangex", torch.zeros(1,1)) 34 | self.register_buffer("rangey", torch.zeros(1,1)) 35 | 36 | def _compute_meshgrid(self, width, height): 37 | torch.arange(0, width, out=self.rangex) 38 | torch.arange(0, height, out=self.rangey) 39 | self.xx = self.rangex.repeat(height, 1).contiguous() 40 | self.yy = self.rangey.repeat(width, 1).t().contiguous() 41 | 42 | def forward(self, width, height): 43 | if self.width != width or self.height != height: 44 | self._compute_meshgrid(width=width, height=height) 45 | self.width = width 46 | self.height = height 47 | return self.xx, self.yy 48 | 49 | 50 | class BatchSub2Ind(nn.Module): 51 | def __init__(self): 52 | super(BatchSub2Ind, self).__init__() 53 | self.register_buffer("_offsets", torch.LongTensor()) 54 | 55 | def forward(self, shape, row_sub, col_sub, out=None): 56 | batch_size = row_sub.size(0) 57 | height, width = shape 58 | ind = row_sub*width + col_sub 59 | torch.arange(batch_size, out=self._offsets) 60 | self._offsets *= (height*width) 61 | 62 | if out is None: 63 | return torch.add(ind, self._offsets.view(-1,1,1)) 64 | else: 65 | torch.add(ind, self._offsets.view(-1,1,1), out=out) 66 | 67 | 68 | class Interp2(nn.Module): 69 | def __init__(self, clamp=False): 70 | super(Interp2, self).__init__() 71 | self._clamp = clamp 72 | self._batch_sub2ind = BatchSub2Ind() 73 | self.register_buffer("_x0", torch.LongTensor()) 74 | self.register_buffer("_x1", torch.LongTensor()) 75 | self.register_buffer("_y0", torch.LongTensor()) 76 | self.register_buffer("_y1", torch.LongTensor()) 77 | self.register_buffer("_i00", torch.LongTensor()) 78 | self.register_buffer("_i01", torch.LongTensor()) 79 | self.register_buffer("_i10", torch.LongTensor()) 80 | self.register_buffer("_i11", torch.LongTensor()) 81 | self.register_buffer("_v00", torch.FloatTensor()) 82 | self.register_buffer("_v01", torch.FloatTensor()) 83 | self.register_buffer("_v10", torch.FloatTensor()) 84 | self.register_buffer("_v11", torch.FloatTensor()) 85 | self.register_buffer("_x", torch.FloatTensor()) 86 | self.register_buffer("_y", torch.FloatTensor()) 87 | 88 | def forward(self, v, xq, yq): 89 | batch_size, channels, height, width = v.size() 90 | 91 | # clamp if wanted 92 | if self._clamp: 93 | xq.clamp_(0, width - 1) 94 | yq.clamp_(0, height - 1) 95 | 96 | # ------------------------------------------------------------------ 97 | # Find neighbors 98 | # 99 | # x0 = torch.floor(xq).long(), x0.clamp_(0, width - 1) 100 | # x1 = x0 + 1, x1.clamp_(0, width - 1) 101 | # y0 = torch.floor(yq).long(), y0.clamp_(0, height - 1) 102 | # y1 = y0 + 1, y1.clamp_(0, height - 1) 103 | # 104 | # ------------------------------------------------------------------ 105 | self._x0 = torch.floor(xq).long().clamp(0, width - 1) 106 | self._y0 = torch.floor(yq).long().clamp(0, height - 1) 107 | 108 | self._x1 = torch.add(self._x0, 1).clamp(0, width - 1) 109 | self._y1 = torch.add(self._y0, 1).clamp(0, height - 1) 110 | 111 | # batch_sub2ind 112 | self._batch_sub2ind([height, width], self._y0, self._x0, out=self._i00) 113 | self._batch_sub2ind([height, width], self._y0, self._x1, out=self._i01) 114 | self._batch_sub2ind([height, width], self._y1, self._x0, out=self._i10) 115 | self._batch_sub2ind([height, width], self._y1, self._x1, out=self._i11) 116 | 117 | # reshape 118 | v_flat = _bchw2bhwc(v).contiguous().view(-1, channels) 119 | torch.index_select(v_flat, dim=0, index=self._i00.view(-1), out=self._v00) 120 | torch.index_select(v_flat, dim=0, index=self._i01.view(-1), out=self._v01) 121 | torch.index_select(v_flat, dim=0, index=self._i10.view(-1), out=self._v10) 122 | torch.index_select(v_flat, dim=0, index=self._i11.view(-1), out=self._v11) 123 | 124 | # local_coords 125 | torch.add(xq, - self._x0.float(), out=self._x) 126 | torch.add(yq, - self._y0.float(), out=self._y) 127 | 128 | # weights 129 | w00 = torch.unsqueeze((1.0 - self._y) * (1.0 - self._x), dim=1) 130 | w01 = torch.unsqueeze((1.0 - self._y) * self._x, dim=1) 131 | w10 = torch.unsqueeze(self._y * (1.0 - self._x), dim=1) 132 | w11 = torch.unsqueeze(self._y * self._x, dim=1) 133 | 134 | def _reshape(u): 135 | return _bhwc2bchw(u.view(batch_size, height, width, channels)) 136 | 137 | # values 138 | values = _reshape(self._v00)*w00 + _reshape(self._v01)*w01 \ 139 | + _reshape(self._v10)*w10 + _reshape(self._v11)*w11 140 | 141 | if self._clamp: 142 | return values 143 | else: 144 | # find_invalid 145 | invalid = ((xq < 0) | (xq >= width) | (yq < 0) | (yq >= height)).unsqueeze(dim=1).float() 146 | # maskout invalid 147 | transformed = invalid * torch.zeros_like(values) + (1.0 - invalid)*values 148 | 149 | return transformed 150 | -------------------------------------------------------------------------------- /utils/monodepth_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as tf 6 | 7 | 8 | def disp_post_processing(disp): 9 | b, _, h, w = disp.shape 10 | b_h = int(b/2) 11 | 12 | l_disp = disp[0:b_h, :, :, :] 13 | r_disp = torch.flip(disp[b_h:, :, :, :], [3]) 14 | m_disp = 0.5 * (l_disp + r_disp) 15 | grid_l = torch.linspace(0.0, 1.0, w).view(1, 1, 1, w).expand(1, 1, h, w).float().requires_grad_(False).cuda() 16 | l_mask = 1.0 - torch.clamp(20 * (grid_l - 0.05), 0, 1) 17 | r_mask = torch.flip(l_mask, [3]) 18 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp 19 | 20 | 21 | def compute_errors(gt, pred): 22 | thresh = torch.max((gt / pred), (pred / gt)) 23 | a1 = (thresh < 1.25).float().mean() 24 | a2 = (thresh < 1.25 ** 2).float().mean() 25 | a3 = (thresh < 1.25 ** 3).float().mean() 26 | 27 | rmse = (gt - pred) ** 2 28 | rmse = torch.sqrt(rmse.mean()) 29 | 30 | rmse_log = (torch.log(gt) - torch.log(pred)) ** 2 31 | rmse_log = torch.sqrt(rmse_log.mean()) 32 | 33 | abs_rel = torch.mean(torch.abs(gt - pred) / gt) 34 | 35 | sq_rel = torch.mean(((gt - pred) ** 2) / gt) 36 | 37 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 38 | 39 | 40 | def compute_d1_all(gt_disps, disp_t, gt_mask): 41 | disp_diff = torch.abs(gt_disps[gt_mask] - disp_t[gt_mask]) 42 | bad_pixels = (disp_diff >= 3) & ((disp_diff / gt_disps[gt_mask]) >= 0.05) 43 | d1_all = 100.0 * bad_pixels.sum().float() / gt_mask.sum().float() 44 | 45 | return d1_all 46 | -------------------------------------------------------------------------------- /utils/sceneflow_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as tf 6 | 7 | 8 | def post_processing(l_disp, r_disp): 9 | 10 | b, _, h, w = l_disp.shape 11 | m_disp = 0.5 * (l_disp + r_disp) 12 | grid_l = torch.linspace(0.0, 1.0, w).view(1, 1, 1, w).expand(1, 1, h, w).float().requires_grad_(False).cuda() 13 | l_mask = 1.0 - torch.clamp(20 * (grid_l - 0.05), 0, 1) 14 | r_mask = torch.flip(l_mask, [3]) 15 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp 16 | 17 | 18 | def flow_horizontal_flip(flow_input): 19 | 20 | flow_flip = torch.flip(flow_input, [3]) 21 | flow_flip[:, 0:1, :, :] *= -1 22 | 23 | return flow_flip.contiguous() 24 | 25 | 26 | def disp2depth_kitti(pred_disp, k_value): 27 | 28 | pred_depth = k_value.unsqueeze(1).unsqueeze(1).unsqueeze(1) * 0.54 / (pred_disp + 1e-8) 29 | pred_depth = torch.clamp(pred_depth, 1e-3, 80) 30 | 31 | return pred_depth 32 | 33 | 34 | def get_pixelgrid(b, h, w): 35 | grid_h = torch.linspace(0.0, w - 1, w).view(1, 1, 1, w).expand(b, 1, h, w) 36 | grid_v = torch.linspace(0.0, h - 1, h).view(1, 1, h, 1).expand(b, 1, h, w) 37 | 38 | ones = torch.ones_like(grid_h) 39 | pixelgrid = torch.cat((grid_h, grid_v, ones), dim=1).float().requires_grad_(False).cuda() 40 | 41 | return pixelgrid 42 | 43 | 44 | def pixel2pts(intrinsics, depth): 45 | b, _, h, w = depth.size() 46 | 47 | pixelgrid = get_pixelgrid(b, h, w) 48 | 49 | depth_mat = depth.view(b, 1, -1) 50 | pixel_mat = pixelgrid.view(b, 3, -1) 51 | pts_mat = torch.matmul(torch.inverse(intrinsics.cpu()).cuda(), pixel_mat) * depth_mat 52 | pts = pts_mat.view(b, -1, h, w) 53 | 54 | return pts, pixelgrid 55 | 56 | 57 | def pts2pixel(pts, intrinsics): 58 | b, _, h, w = pts.size() 59 | proj_pts = torch.matmul(intrinsics, pts.view(b, 3, -1)) 60 | pixels_mat = proj_pts.div(proj_pts[:, 2:3, :] + 1e-8)[:, 0:2, :] 61 | 62 | return pixels_mat.view(b, 2, h, w) 63 | 64 | 65 | def intrinsic_scale(intrinsic, scale_y, scale_x): 66 | b, h, w = intrinsic.size() 67 | fx = intrinsic[:, 0, 0] * scale_x 68 | fy = intrinsic[:, 1, 1] * scale_y 69 | cx = intrinsic[:, 0, 2] * scale_x 70 | cy = intrinsic[:, 1, 2] * scale_y 71 | 72 | zeros = torch.zeros_like(fx) 73 | r1 = torch.stack([fx, zeros, cx], dim=1) 74 | r2 = torch.stack([zeros, fy, cy], dim=1) 75 | r3 = torch.tensor([0., 0., 1.], requires_grad=False).cuda().unsqueeze(0).expand(b, -1) 76 | intrinsic_s = torch.stack([r1, r2, r3], dim=1) 77 | 78 | return intrinsic_s 79 | 80 | 81 | def pixel2pts_ms(intrinsic, output_disp, rel_scale): 82 | # pixel2pts 83 | intrinsic_dp_s = intrinsic_scale(intrinsic, rel_scale[:,0], rel_scale[:,1]) 84 | output_depth = disp2depth_kitti(output_disp, intrinsic_dp_s[:, 0, 0]) 85 | pts, _ = pixel2pts(intrinsic_dp_s, output_depth) 86 | 87 | return pts, intrinsic_dp_s 88 | 89 | 90 | def pts2pixel_ms(intrinsic, pts, output_sf, disp_size): 91 | 92 | # +sceneflow and reprojection 93 | sf_s = tf.interpolate(output_sf, disp_size, mode="bilinear", align_corners=True) 94 | pts_tform = pts + sf_s 95 | coord = pts2pixel(pts_tform, intrinsic) 96 | 97 | norm_coord_w = coord[:, 0:1, :, :] / (disp_size[1] - 1) * 2 - 1 98 | norm_coord_h = coord[:, 1:2, :, :] / (disp_size[0] - 1) * 2 - 1 99 | norm_coord = torch.cat((norm_coord_w, norm_coord_h), dim=1) 100 | 101 | return sf_s, pts_tform, norm_coord 102 | 103 | 104 | def reconstructImg(coord, img): 105 | grid = coord.transpose(1, 2).transpose(2, 3) 106 | img_warp = tf.grid_sample(img, grid) 107 | 108 | mask = torch.ones_like(img, requires_grad=False) 109 | mask = tf.grid_sample(mask, grid) 110 | mask = (mask >= 1.0).float() 111 | return img_warp * mask 112 | 113 | 114 | def reconstructPts(coord, pts): 115 | grid = coord.transpose(1, 2).transpose(2, 3) 116 | pts_warp = tf.grid_sample(pts, grid) 117 | 118 | mask = torch.ones_like(pts, requires_grad=False) 119 | mask = tf.grid_sample(mask, grid) 120 | mask = (mask >= 1.0).float() 121 | return pts_warp * mask 122 | 123 | 124 | def projectSceneFlow2Flow(intrinsic, sceneflow, disp): 125 | 126 | _, _, h, w = disp.size() 127 | 128 | output_depth = disp2depth_kitti(disp, intrinsic[:, 0, 0]) 129 | pts, pixelgrid = pixel2pts(intrinsic, output_depth) 130 | 131 | sf_s = tf.interpolate(sceneflow, [h, w], mode="bilinear", align_corners=True) 132 | pts_tform = pts + sf_s 133 | coord = pts2pixel(pts_tform, intrinsic) 134 | flow = coord - pixelgrid[:, 0:2, :, :] 135 | 136 | return flow 137 | --------------------------------------------------------------------------------