├── .gitignore ├── LICENSE.md ├── Makefile ├── README.md ├── configs ├── default_config.py ├── eval_ddad.yaml ├── eval_image.yaml ├── eval_kitti.yaml ├── overfit_ddad.yaml ├── overfit_kitti.yaml ├── train_ddad.yaml ├── train_kitti.yaml ├── train_omnicam.yaml ├── train_packnet_san_ddad.yaml └── train_packnet_san_kitti.yaml ├── docker └── Dockerfile ├── docs ├── .nojekyll ├── README.html ├── _modules │ ├── index.html │ ├── packnet_sfm │ │ ├── datasets │ │ │ ├── augmentations.html │ │ │ ├── dgp_dataset.html │ │ │ ├── image_dataset.html │ │ │ ├── kitti_dataset.html │ │ │ ├── kitti_dataset_utils.html │ │ │ └── transforms.html │ │ ├── geometry │ │ │ ├── camera.html │ │ │ ├── camera_utils.html │ │ │ ├── pose.html │ │ │ └── pose_utils.html │ │ ├── losses │ │ │ ├── loss_base.html │ │ │ ├── multiview_photometric_loss.html │ │ │ └── supervised_loss.html │ │ ├── models │ │ │ ├── SelfSupModel.html │ │ │ ├── SemiSupModel.html │ │ │ ├── SfmModel.html │ │ │ ├── model_checkpoint.html │ │ │ ├── model_utils.html │ │ │ └── model_wrapper.html │ │ ├── networks │ │ │ ├── depth │ │ │ │ ├── DepthResNet.html │ │ │ │ └── PackNet01.html │ │ │ ├── layers │ │ │ │ ├── packnet │ │ │ │ │ └── layers01.html │ │ │ │ └── resnet │ │ │ │ │ ├── depth_decoder.html │ │ │ │ │ ├── layers.html │ │ │ │ │ ├── pose_decoder.html │ │ │ │ │ └── resnet_encoder.html │ │ │ └── pose │ │ │ │ ├── PoseNet.html │ │ │ │ └── PoseResNet.html │ │ ├── trainers │ │ │ └── base_trainer.html │ │ └── utils │ │ │ ├── config.html │ │ │ ├── depth.html │ │ │ ├── horovod.html │ │ │ ├── image.html │ │ │ ├── load.html │ │ │ ├── logging.html │ │ │ ├── misc.html │ │ │ ├── reduce.html │ │ │ ├── save.html │ │ │ └── types.html │ └── scripts │ │ ├── eval.html │ │ ├── infer.html │ │ └── train.html ├── _sources │ ├── README.rst.txt │ ├── configs │ │ ├── configs.default_config.rst.txt │ │ ├── configs.eval_ddad.rst.txt │ │ ├── configs.eval_kitti.rst.txt │ │ ├── configs.overfit_ddad.rst.txt │ │ ├── configs.overfit_kitti.rst.txt │ │ ├── configs.rst.txt │ │ ├── configs.train_ddad.rst.txt │ │ └── configs.train_kitti.rst.txt │ ├── datasets │ │ ├── KITTIDataset.rst.txt │ │ ├── datasets.DGPDataset.rst.txt │ │ ├── datasets.ImageDataset.rst.txt │ │ ├── datasets.KITTIDataset.rst.txt │ │ ├── datasets.KITTIDataset_utils.rst.txt │ │ ├── datasets.augmentations.rst.txt │ │ ├── datasets.rst.txt │ │ └── datasets.transforms.rst.txt │ ├── geometry │ │ ├── camera.rst.txt │ │ ├── geometry.camera.camera.rst.txt │ │ ├── geometry.camera.camera_utils.rst.txt │ │ ├── geometry.pose.pose.rst.txt │ │ ├── geometry.pose.pose_utils.rst.txt │ │ ├── geometry.rst.txt │ │ └── pose.rst.txt │ ├── index.rst.txt │ ├── loggers │ │ ├── loggers.WandbLogger.rst.txt │ │ └── loggers.rst.txt │ ├── losses │ │ ├── losses.loss_base.rst.txt │ │ ├── losses.multiview_photometric_loss.rst.txt │ │ ├── losses.rst.txt │ │ └── losses.supervised_loss.rst.txt │ ├── models │ │ ├── models.Checkpoint.rst.txt │ │ ├── models.SelfSupModel.rst.txt │ │ ├── models.SemiSupModel.rst.txt │ │ ├── models.SfmModel.rst.txt │ │ ├── models.Utilities.rst.txt │ │ ├── models.Wrapper.rst.txt │ │ └── models.rst.txt │ ├── networks │ │ ├── depth │ │ │ ├── depth.rst.txt │ │ │ ├── networks.depth.DepthResNet.rst.txt │ │ │ └── networks.depth.PackNet01.rst.txt │ │ ├── layers │ │ │ ├── layers.rst.txt │ │ │ ├── packnet │ │ │ │ ├── layers01.rst.txt │ │ │ │ └── packnet.rst.txt │ │ │ └── resnet │ │ │ │ ├── depth_decoder.rst.txt │ │ │ │ ├── layers.rst.txt │ │ │ │ ├── pose_decoder.rst.txt │ │ │ │ ├── resnet.rst.txt │ │ │ │ └── resnet_encoder.rst.txt │ │ ├── networks.rst.txt │ │ └── pose │ │ │ ├── networks.pose.PoseNet.rst.txt │ │ │ ├── networks.pose.PoseResNet.rst.txt │ │ │ └── pose.rst.txt │ ├── scripts │ │ ├── scripts.eval.rst.txt │ │ ├── scripts.infer.rst.txt │ │ ├── scripts.rst.txt │ │ └── scripts.train.rst.txt │ ├── trainers │ │ ├── trainers.BaseTrainer.rst.txt │ │ ├── trainers.HorovodTrainer.rst.txt │ │ └── trainers.rst.txt │ └── utils │ │ ├── utils.config.rst.txt │ │ ├── utils.depth.rst.txt │ │ ├── utils.horovod.rst.txt │ │ ├── utils.image.rst.txt │ │ ├── utils.load.rst.txt │ │ ├── utils.logging.rst.txt │ │ ├── utils.misc.rst.txt │ │ ├── utils.reduce.rst.txt │ │ ├── utils.rst.txt │ │ ├── utils.save.rst.txt │ │ └── utils.types.rst.txt ├── _static │ ├── basic.css │ ├── css │ │ ├── badge_only.css │ │ └── theme.css │ ├── custom.css │ ├── doctools.js │ ├── documentation_options.js │ ├── file.png │ ├── fonts │ │ ├── Inconsolata-Bold.ttf │ │ ├── Inconsolata-Regular.ttf │ │ ├── Inconsolata.ttf │ │ ├── Lato-Bold.ttf │ │ ├── Lato-Regular.ttf │ │ ├── Lato │ │ │ ├── lato-bold.eot │ │ │ ├── lato-bold.ttf │ │ │ ├── lato-bold.woff │ │ │ ├── lato-bold.woff2 │ │ │ ├── lato-bolditalic.eot │ │ │ ├── lato-bolditalic.ttf │ │ │ ├── lato-bolditalic.woff │ │ │ ├── lato-bolditalic.woff2 │ │ │ ├── lato-italic.eot │ │ │ ├── lato-italic.ttf │ │ │ ├── lato-italic.woff │ │ │ ├── lato-italic.woff2 │ │ │ ├── lato-regular.eot │ │ │ ├── lato-regular.ttf │ │ │ ├── lato-regular.woff │ │ │ └── lato-regular.woff2 │ │ ├── RobotoSlab-Bold.ttf │ │ ├── RobotoSlab-Regular.ttf │ │ ├── RobotoSlab │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ └── roboto-slab-v7-regular.woff2 │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.svg │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ └── fontawesome-webfont.woff2 │ ├── jquery-3.5.1.js │ ├── jquery.js │ ├── js │ │ ├── modernizr.min.js │ │ └── theme.js │ ├── language_data.js │ ├── logo.png │ ├── minus.png │ ├── plus.png │ ├── pygments.css │ ├── searchtools.js │ ├── underscore-1.3.1.js │ └── underscore.js ├── configs │ ├── configs.default_config.html │ ├── configs.eval_ddad.html │ ├── configs.eval_kitti.html │ ├── configs.html │ ├── configs.overfit_ddad.html │ ├── configs.overfit_kitti.html │ ├── configs.train_ddad.html │ └── configs.train_kitti.html ├── datasets │ ├── KITTIDataset.html │ ├── datasets.DGPDataset.html │ ├── datasets.ImageDataset.html │ ├── datasets.KITTIDataset.html │ ├── datasets.KITTIDataset_utils.html │ ├── datasets.augmentations.html │ ├── datasets.html │ └── datasets.transforms.html ├── genindex.html ├── geometry │ ├── camera.html │ ├── geometry.camera.camera.html │ ├── geometry.camera.camera_utils.html │ ├── geometry.html │ ├── geometry.pose.pose.html │ ├── geometry.pose.pose_utils.html │ └── pose.html ├── index.html ├── loggers │ ├── loggers.WandbLogger.html │ └── loggers.html ├── losses │ ├── losses.html │ ├── losses.loss_base.html │ ├── losses.multiview_photometric_loss.html │ └── losses.supervised_loss.html ├── models │ ├── models.Checkpoint.html │ ├── models.SelfSupModel.html │ ├── models.SemiSupModel.html │ ├── models.SfmModel.html │ ├── models.Utilities.html │ ├── models.Wrapper.html │ └── models.html ├── networks │ ├── depth │ │ ├── depth.html │ │ ├── networks.depth.DepthResNet.html │ │ └── networks.depth.PackNet01.html │ ├── layers │ │ ├── layers.html │ │ ├── packnet │ │ │ ├── layers01.html │ │ │ └── packnet.html │ │ └── resnet │ │ │ ├── depth_decoder.html │ │ │ ├── layers.html │ │ │ ├── pose_decoder.html │ │ │ ├── resnet.html │ │ │ └── resnet_encoder.html │ ├── networks.html │ └── pose │ │ ├── networks.pose.PoseNet.html │ │ ├── networks.pose.PoseResNet.html │ │ └── pose.html ├── objects.inv ├── py-modindex.html ├── scripts │ ├── scripts.eval.html │ ├── scripts.html │ ├── scripts.infer.html │ └── scripts.train.html ├── search.html ├── searchindex.js ├── trainers │ ├── trainers.BaseTrainer.html │ ├── trainers.HorovodTrainer.html │ └── trainers.html └── utils │ ├── utils.config.html │ ├── utils.depth.html │ ├── utils.horovod.html │ ├── utils.html │ ├── utils.image.html │ ├── utils.load.html │ ├── utils.logging.html │ ├── utils.misc.html │ ├── utils.reduce.html │ ├── utils.save.html │ └── utils.types.html ├── media ├── figs │ ├── packnet-ddad.gif │ └── tri-logo.png └── tests │ ├── ddad.png │ └── kitti.png ├── packnet_sfm ├── __init__.py ├── datasets │ ├── __init__.py │ ├── augmentations.py │ ├── dgp_dataset.py │ ├── image_dataset.py │ ├── kitti_dataset.py │ ├── kitti_dataset_utils.py │ └── transforms.py ├── geometry │ ├── __init__.py │ ├── camera.py │ ├── camera_generic.py │ ├── camera_utils.py │ ├── pose.py │ └── pose_utils.py ├── loggers │ ├── __init__.py │ └── wandb_logger.py ├── losses │ ├── __init__.py │ ├── generic_multiview_photometric_loss.py │ ├── loss_base.py │ ├── multiview_photometric_loss.py │ ├── supervised_loss.py │ └── velocity_loss.py ├── models │ ├── GenericSelfSupModel.py │ ├── GenericSfmModel.py │ ├── SelfSupModel.py │ ├── SemiSupCompletionModel.py │ ├── SemiSupModel.py │ ├── SfmModel.py │ ├── VelSupModel.py │ ├── __init__.py │ ├── base_model.py │ ├── model_checkpoint.py │ ├── model_utils.py │ └── model_wrapper.py ├── networks │ ├── __init__.py │ ├── depth │ │ ├── DepthResNet.py │ │ ├── PackNet01.py │ │ ├── PackNetSAN01.py │ │ ├── PackNetSlim01.py │ │ └── RaySurfaceResNet.py │ ├── layers │ │ ├── minkowski.py │ │ ├── minkowski_encoder.py │ │ ├── packnet │ │ │ └── layers01.py │ │ └── resnet │ │ │ ├── depth_decoder.py │ │ │ ├── layers.py │ │ │ ├── pose_decoder.py │ │ │ ├── raysurface_decoder.py │ │ │ └── resnet_encoder.py │ └── pose │ │ ├── PoseNet.py │ │ └── PoseResNet.py ├── trainers │ ├── __init__.py │ ├── base_trainer.py │ └── horovod_trainer.py └── utils │ ├── __init__.py │ ├── config.py │ ├── depth.py │ ├── horovod.py │ ├── image.py │ ├── load.py │ ├── logging.py │ ├── misc.py │ ├── reduce.py │ ├── save.py │ └── types.py └── scripts ├── eval.py ├── evaluate_depth_maps.py ├── infer.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Toyota Research Institute (TRI) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Handy commands: 2 | # - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`) 3 | PROJECT ?= packnet-sfm 4 | WORKSPACE ?= /workspace/$(PROJECT) 5 | DOCKER_IMAGE ?= ${PROJECT}:latest 6 | 7 | SHMSIZE ?= 444G 8 | WANDB_MODE ?= run 9 | DOCKER_OPTS := \ 10 | --name ${PROJECT} \ 11 | --rm -it \ 12 | --shm-size=${SHMSIZE} \ 13 | -e AWS_DEFAULT_REGION \ 14 | -e AWS_ACCESS_KEY_ID \ 15 | -e AWS_SECRET_ACCESS_KEY \ 16 | -e WANDB_API_KEY \ 17 | -e WANDB_ENTITY \ 18 | -e WANDB_MODE \ 19 | -e HOST_HOSTNAME= \ 20 | -e OMP_NUM_THREADS=1 -e KMP_AFFINITY="granularity=fine,compact,1,0" \ 21 | -e OMPI_ALLOW_RUN_AS_ROOT=1 \ 22 | -e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ 23 | -e NCCL_DEBUG=VERSION \ 24 | -e DISPLAY=${DISPLAY} \ 25 | -e XAUTHORITY \ 26 | -e NVIDIA_DRIVER_CAPABILITIES=all \ 27 | -v ~/.aws:/root/.aws \ 28 | -v /root/.ssh:/root/.ssh \ 29 | -v ~/.cache:/root/.cache \ 30 | -v /data:/data \ 31 | -v /mnt/fsx/:/mnt/fsx \ 32 | -v /dev/null:/dev/raw1394 \ 33 | -v /tmp:/tmp \ 34 | -v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \ 35 | -v /var/run/docker.sock:/var/run/docker.sock \ 36 | -v ${PWD}:${WORKSPACE} \ 37 | -w ${WORKSPACE} \ 38 | --privileged \ 39 | --ipc=host \ 40 | --network=host 41 | 42 | NGPUS=$(shell nvidia-smi -L | wc -l) 43 | MPI_CMD=mpirun \ 44 | -allow-run-as-root \ 45 | -np ${NGPUS} \ 46 | -H localhost:${NGPUS} \ 47 | -x MASTER_ADDR=127.0.0.1 \ 48 | -x MASTER_PORT=23457 \ 49 | -x HOROVOD_TIMELINE \ 50 | -x OMP_NUM_THREADS=1 \ 51 | -x KMP_AFFINITY='granularity=fine,compact,1,0' \ 52 | -bind-to none -map-by slot -x NCCL_DEBUG=INFO -x NCCL_MIN_NRINGS=4 \ 53 | --report-bindings 54 | 55 | 56 | .PHONY: all clean docker-build docker-overfit-pose 57 | 58 | all: clean 59 | 60 | clean: 61 | find . -name "*.pyc" | xargs rm -f && \ 62 | find . -name "__pycache__" | xargs rm -rf 63 | 64 | docker-build: 65 | docker build \ 66 | -f docker/Dockerfile \ 67 | -t ${DOCKER_IMAGE} . 68 | 69 | docker-start-interactive: docker-build 70 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash 71 | 72 | docker-start-jupyter: docker-build 73 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ 74 | bash -c "jupyter notebook --port=8888 -ip=0.0.0.0 --allow-root --no-browser" 75 | 76 | docker-run: docker-build 77 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ 78 | bash -c "${COMMAND}" 79 | 80 | docker-run-mpi: docker-build 81 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ 82 | bash -c "${MPI_CMD} ${COMMAND}" -------------------------------------------------------------------------------- /configs/eval_ddad.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: 'SelfSupModel' 3 | depth_net: 4 | name: 'PackNet01' 5 | version: '1A' 6 | pose_net: 7 | name: 'PoseNet' 8 | version: '' 9 | params: 10 | crop: '' 11 | min_depth: 0.0 12 | max_depth: 200.0 13 | datasets: 14 | augmentation: 15 | image_shape: (384, 640) 16 | test: 17 | dataset: ['DGP'] 18 | path: ['/data/datasets/DDAD/ddad.json'] 19 | split: ['val'] 20 | depth_type: ['lidar'] 21 | cameras: [['camera_01']] 22 | save: 23 | folder: '/data/save' 24 | depth: 25 | rgb: True 26 | viz: True 27 | npz: True 28 | png: True 29 | -------------------------------------------------------------------------------- /configs/eval_image.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: 'SelfSupModel' 3 | depth_net: 4 | name: 'PackNet01' 5 | version: '1A' 6 | pose_net: 7 | name: 'PoseNet' 8 | version: '' 9 | datasets: 10 | augmentation: 11 | image_shape: (384, 640) 12 | test: 13 | dataset: ['Image'] 14 | path: ['images'] 15 | split: ['{:010d}'] 16 | save: 17 | folder: '/data/save' 18 | depth: 19 | rgb: True 20 | viz: True 21 | npz: True 22 | png: True 23 | -------------------------------------------------------------------------------- /configs/eval_kitti.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: 'SelfSupModel' 3 | depth_net: 4 | name: 'PackNet01' 5 | version: '1A' 6 | pose_net: 7 | name: 'PoseNet' 8 | version: '' 9 | params: 10 | crop: 'garg' 11 | min_depth: 0.0 12 | max_depth: 80.0 13 | datasets: 14 | augmentation: 15 | image_shape: (192, 640) 16 | test: 17 | dataset: ['KITTI'] 18 | path: ['/data/datasets/KITTI_raw'] 19 | split: ['data_splits/eigen_test_files.txt'] 20 | depth_type: ['velodyne'] 21 | save: 22 | folder: '/data/save' 23 | depth: 24 | rgb: True 25 | viz: True 26 | npz: True 27 | png: True 28 | -------------------------------------------------------------------------------- /configs/overfit_ddad.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | max_epochs: 1 3 | model: 4 | name: 'SelfSupModel' 5 | optimizer: 6 | name: 'Adam' 7 | depth: 8 | lr: 0.0001 9 | pose: 10 | lr: 0.0001 11 | scheduler: 12 | name: 'StepLR' 13 | step_size: 30 14 | gamma: 0.5 15 | depth_net: 16 | name: 'DepthResNet' 17 | version: '18pt' 18 | pose_net: 19 | name: 'PoseResNet' 20 | version: '18pt' 21 | params: 22 | crop: '' 23 | min_depth: 0.0 24 | max_depth: 200.0 25 | datasets: 26 | augmentation: 27 | image_shape: (384, 640) 28 | train: 29 | batch_size: 4 30 | dataset: ['DGP'] 31 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 32 | split: ['train'] 33 | depth_type: ['lidar'] 34 | cameras: [['camera_01']] 35 | repeat: [500] 36 | validation: 37 | dataset: ['DGP'] 38 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 39 | split: ['train'] 40 | depth_type: ['lidar'] 41 | cameras: [['camera_01']] 42 | test: 43 | dataset: ['DGP'] 44 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 45 | split: ['train'] 46 | depth_type: ['lidar'] 47 | cameras: [['camera_01']] 48 | -------------------------------------------------------------------------------- /configs/overfit_kitti.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | max_epochs: 1 3 | model: 4 | name: 'SelfSupModel' 5 | optimizer: 6 | name: 'Adam' 7 | depth: 8 | lr: 0.0002 9 | pose: 10 | lr: 0.0002 11 | scheduler: 12 | name: 'StepLR' 13 | step_size: 30 14 | gamma: 0.5 15 | depth_net: 16 | name: 'DepthResNet' 17 | version: '18pt' 18 | pose_net: 19 | name: 'PoseResNet' 20 | version: '18pt' 21 | params: 22 | crop: 'garg' 23 | min_depth: 0.0 24 | max_depth: 80.0 25 | datasets: 26 | augmentation: 27 | image_shape: (192, 640) 28 | train: 29 | batch_size: 6 30 | dataset: ['KITTI'] 31 | path: ['/data/datasets/KITTI_tiny'] 32 | split: ['kitti_tiny.txt'] 33 | depth_type: ['velodyne'] 34 | repeat: [1000] 35 | validation: 36 | dataset: ['KITTI'] 37 | path: ['/data/datasets/KITTI_tiny'] 38 | split: ['kitti_tiny.txt'] 39 | depth_type: ['velodyne'] 40 | test: 41 | dataset: ['KITTI'] 42 | path: ['/data/datasets/KITTI_tiny'] 43 | split: ['kitti_tiny.txt'] 44 | depth_type: ['velodyne'] 45 | -------------------------------------------------------------------------------- /configs/train_ddad.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: 'SelfSupModel' 3 | optimizer: 4 | name: 'Adam' 5 | depth: 6 | lr: 0.00009 7 | pose: 8 | lr: 0.00009 9 | scheduler: 10 | name: 'StepLR' 11 | step_size: 30 12 | gamma: 0.5 13 | depth_net: 14 | name: 'PackNet01' 15 | version: '1A' 16 | pose_net: 17 | name: 'PoseNet' 18 | version: '' 19 | params: 20 | crop: '' 21 | min_depth: 0.0 22 | max_depth: 200.0 23 | datasets: 24 | augmentation: 25 | image_shape: (384, 640) 26 | train: 27 | batch_size: 2 28 | num_workers: 8 29 | dataset: ['DGP'] 30 | path: ['/data/datasets/DDAD/ddad.json'] 31 | split: ['train'] 32 | depth_type: ['lidar'] 33 | cameras: [['camera_01']] 34 | repeat: [5] 35 | validation: 36 | num_workers: 8 37 | dataset: ['DGP'] 38 | path: ['/data/datasets/DDAD/ddad.json'] 39 | split: ['val'] 40 | depth_type: ['lidar'] 41 | cameras: [['camera_01']] 42 | test: 43 | num_workers: 8 44 | dataset: ['DGP'] 45 | path: ['/data/datasets/DDAD/ddad.json'] 46 | split: ['val'] 47 | depth_type: ['lidar'] 48 | cameras: [['camera_01']] 49 | -------------------------------------------------------------------------------- /configs/train_kitti.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | name: 'SelfSupModel' 3 | optimizer: 4 | name: 'Adam' 5 | depth: 6 | lr: 0.0002 7 | pose: 8 | lr: 0.0002 9 | scheduler: 10 | name: 'StepLR' 11 | step_size: 30 12 | gamma: 0.5 13 | depth_net: 14 | name: 'PackNet01' 15 | version: '1A' 16 | pose_net: 17 | name: 'PoseNet' 18 | version: '' 19 | params: 20 | crop: 'garg' 21 | min_depth: 0.0 22 | max_depth: 80.0 23 | datasets: 24 | augmentation: 25 | image_shape: (192, 640) 26 | train: 27 | batch_size: 4 28 | dataset: ['KITTI'] 29 | path: ['/data/datasets/KITTI_raw'] 30 | split: ['data_splits/eigen_zhou_files.txt'] 31 | depth_type: ['velodyne'] 32 | repeat: [2] 33 | validation: 34 | dataset: ['KITTI'] 35 | path: ['/data/datasets/KITTI_raw'] 36 | split: ['data_splits/eigen_val_files.txt', 37 | 'data_splits/eigen_test_files.txt'] 38 | depth_type: ['velodyne'] 39 | test: 40 | dataset: ['KITTI'] 41 | path: ['/data/datasets/KITTI_raw'] 42 | split: ['data_splits/eigen_test_files.txt'] 43 | depth_type: ['velodyne'] 44 | -------------------------------------------------------------------------------- /configs/train_omnicam.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | max_epochs: 50 3 | model: 4 | name: 'GenericSelfSupModel' 5 | optimizer: 6 | name: 'Adam' 7 | depth: 8 | lr: 0.0002 9 | pose: 10 | lr: 0.0002 11 | scheduler: 12 | name: 'StepLR' 13 | step_size: 30 14 | gamma: 0.5 15 | depth_net: 16 | name: 'RaySurfaceResNet' 17 | version: '18pt' 18 | pose_net: 19 | name: 'PoseResNet' 20 | version: '18pt' 21 | params: 22 | crop: 'garg' 23 | min_depth: 0.0 24 | max_depth: 80.0 25 | datasets: 26 | augmentation: 27 | image_shape: (384, 384) 28 | train: 29 | batch_size: 1 30 | dataset: ['Image'] 31 | path: ['/data/datasets/omnicam'] 32 | split: ['{:09}'] 33 | depth_type: [''] 34 | repeat: [1] 35 | validation: 36 | dataset: ['KITTI'] 37 | path: ['/data/datasets/KITTI_tiny'] 38 | split: ['kitti_tiny.txt'] 39 | depth_type: ['velodyne'] 40 | test: 41 | dataset: ['KITTI'] 42 | path: ['/data/datasets/KITTI_tiny'] 43 | split: ['kitti_tiny.txt'] 44 | depth_type: ['velodyne'] 45 | -------------------------------------------------------------------------------- /configs/train_packnet_san_ddad.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | validate_first: True 3 | model: 4 | name: 'SemiSupCompletionModel' 5 | loss: 6 | supervised_method: 'sparse-silog' 7 | supervised_num_scales: 1 8 | supervised_loss_weight: 1.0 9 | optimizer: 10 | name: 'Adam' 11 | depth: 12 | lr: 0.0001 13 | pose: 14 | lr: 0.0001 15 | scheduler: 16 | name: 'StepLR' 17 | step_size: 30 18 | gamma: 0.5 19 | depth_net: 20 | name: 'PackNetSAN01' 21 | version: '1A' 22 | dropout: 0.5 23 | params: 24 | crop: '' 25 | min_depth: 0.0 26 | max_depth: 200.0 27 | scale_output: 'resize' 28 | datasets: 29 | augmentation: 30 | image_shape: (384, 640) 31 | train: 32 | batch_size: 1 33 | num_workers: 8 34 | dataset: ['DGP'] 35 | path: ['/mnt/fsx/ddad_train_val/ddad.json'] 36 | split: ['train'] 37 | depth_type: ['lidar'] 38 | cameras: [['camera_01'],['camera_05'],['camera_06'],['camera_09']] 39 | repeat: [5] 40 | validation: 41 | num_workers: 8 42 | dataset: ['DGP'] 43 | path: ['/mnt/fsx/ddad_train_val/ddad.json'] 44 | split: ['val'] 45 | depth_type: ['lidar'] 46 | input_depth_type: ['', '', '', '', 'lidar', 'lidar', 'lidar', 'lidar'] 47 | cameras: [['camera_01'],['camera_05'],['camera_06'],['camera_09'], 48 | ['camera_01'],['camera_05'],['camera_06'],['camera_09']] 49 | test: 50 | num_workers: 8 51 | dataset: ['DGP'] 52 | path: ['/mnt/fsx/ddad_train_val/ddad.json'] 53 | split: ['val'] 54 | depth_type: ['lidar'] 55 | cameras: [['camera_01']] 56 | -------------------------------------------------------------------------------- /configs/train_packnet_san_kitti.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | validate_first: True 3 | model: 4 | name: 'SemiSupCompletionModel' 5 | loss: 6 | supervised_method: 'sparse-silog' 7 | supervised_num_scales: 1 8 | supervised_loss_weight: 1.0 9 | optimizer: 10 | name: 'Adam' 11 | depth: 12 | lr: 0.0001 13 | pose: 14 | lr: 0.0001 15 | scheduler: 16 | name: 'StepLR' 17 | step_size: 30 18 | gamma: 0.5 19 | depth_net: 20 | name: 'PackNetSAN01' 21 | version: '1A' 22 | dropout: 0.5 23 | params: 24 | crop: 'garg' 25 | min_depth: 0.0 26 | max_depth: 80.0 27 | scale_output: 'top-center' 28 | datasets: 29 | augmentation: 30 | crop_train_borders: (-352, 0, 0.5, 1216) 31 | crop_eval_borders: (-352, 0, 0.5, 1216) 32 | train: 33 | batch_size: 1 34 | dataset: ['KITTI'] 35 | path: ['/mnt/fsx/KITTI_raw'] 36 | split: ['data_splits/eigen_zhou_files.txt'] 37 | input_depth_type: ['velodyne'] 38 | depth_type: ['groundtruth'] 39 | repeat: [2] 40 | validation: 41 | dataset: ['KITTI'] 42 | path: ['/mnt/fsx/KITTI_raw'] 43 | split: ['data_splits/eigen_test_files.txt', 44 | 'data_splits/eigen_test_files.txt'] 45 | input_depth_type: ['velodyne',''] 46 | depth_type: ['groundtruth','groundtruth'] 47 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | FROM nvidia/cuda:10.2-devel-ubuntu18.04 4 | 5 | ENV PROJECT=packnet-sfm 6 | ENV PYTORCH_VERSION=1.8.1 7 | ENV TORCHVISION_VERSION=0.9.1 8 | ENV CUDNN_VERSION=7.6.5.32-1+cuda10.2 9 | ENV NCCL_VERSION=2.7.8-1+cuda10.2 10 | ENV HOROVOD_VERSION=65de4c961d1e5ad2828f2f6c4329072834f27661 11 | ENV TRT_VERSION=6.0.1.5 12 | ENV LC_ALL=C.UTF-8 13 | ENV LANG=C.UTF-8 14 | 15 | ARG python=3.6 16 | ENV PYTHON_VERSION=${python} 17 | ENV DEBIAN_FRONTEND=noninteractive 18 | 19 | # Set default shell to /bin/bash 20 | SHELL ["/bin/bash", "-cu"] 21 | 22 | RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ 23 | build-essential \ 24 | cmake \ 25 | g++-4.8 \ 26 | git \ 27 | curl \ 28 | docker.io \ 29 | vim \ 30 | wget \ 31 | ca-certificates \ 32 | libcudnn7=${CUDNN_VERSION} \ 33 | libnccl2=${NCCL_VERSION} \ 34 | libnccl-dev=${NCCL_VERSION} \ 35 | libjpeg-dev \ 36 | libpng-dev \ 37 | python${PYTHON_VERSION} \ 38 | python${PYTHON_VERSION}-dev \ 39 | python3-tk \ 40 | librdmacm1 \ 41 | libibverbs1 \ 42 | ibverbs-providers \ 43 | libgtk2.0-dev \ 44 | unzip \ 45 | bzip2 \ 46 | htop \ 47 | gnuplot \ 48 | ffmpeg 49 | 50 | # Install Open MPI 51 | RUN mkdir /tmp/openmpi && \ 52 | cd /tmp/openmpi && \ 53 | wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz && \ 54 | tar zxf openmpi-4.0.0.tar.gz && \ 55 | cd openmpi-4.0.0 && \ 56 | ./configure --enable-orterun-prefix-by-default && \ 57 | make -j $(nproc) all && \ 58 | make install && \ 59 | ldconfig && \ 60 | rm -rf /tmp/openmpi 61 | 62 | # Install OpenSSH for MPI to communicate between containers 63 | RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \ 64 | mkdir -p /var/run/sshd 65 | 66 | # Allow OpenSSH to talk to containers without asking for confirmation 67 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ 68 | echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ 69 | mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config 70 | 71 | # Instal Python and pip 72 | RUN if [[ "${PYTHON_VERSION}" == "3.6" ]]; then \ 73 | apt-get install -y python${PYTHON_VERSION}-distutils; \ 74 | fi 75 | 76 | RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python 77 | 78 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ 79 | python get-pip.py && \ 80 | rm get-pip.py 81 | 82 | # Install Pydata and other deps 83 | RUN pip install future typing numpy pandas matplotlib jupyter h5py \ 84 | awscli boto3 tqdm termcolor path.py pillow-simd opencv-python-headless \ 85 | mpi4py onnx onnxruntime pycuda yacs cython==0.29.10 86 | 87 | # Install PyTorch 88 | RUN pip install torch==${PYTORCH_VERSION} \ 89 | torchvision==${TORCHVISION_VERSION} && ldconfig 90 | 91 | # Install apex 92 | RUN mkdir /workspace 93 | WORKDIR /workspace 94 | RUN git clone https://github.com/NVIDIA/apex.git 95 | WORKDIR /workspace/apex 96 | RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . 97 | ENV PYTHONPATH="/workspace/apex:$PYTHONPATH" 98 | WORKDIR /workspace 99 | 100 | # install horovod (for distributed training) 101 | RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \ 102 | HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_WITH_PYTORCH=1 \ 103 | pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION} && \ 104 | ldconfig 105 | 106 | # Settings for S3 107 | RUN aws configure set default.s3.max_concurrent_requests 100 && \ 108 | aws configure set default.s3.max_queue_size 10000 109 | 110 | # Install Minkowski Engine 111 | ENV TORCH_CUDA_ARCH_LIST=Volta;Turing;Kepler+Tesla 112 | RUN pip install ninja 113 | RUN apt-get update && apt-get install -y libopenblas-dev 114 | WORKDIR /workspace 115 | RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git 116 | RUN cd /workspace/MinkowskiEngine && \ 117 | python setup.py install --force_cuda 118 | 119 | # Add Tini (cf. https://github.com/jupyter/docker-stacks) 120 | ENV TINI_VERSION v0.19.0 121 | ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini 122 | RUN chmod +x /tini 123 | ENTRYPOINT ["/tini", "-g", "--"] 124 | 125 | # Install DGP (dataset utils) 126 | WORKDIR /workspace 127 | RUN git clone https://github.com/TRI-ML/dgp.git 128 | ENV PYTHONPATH="/workspace/dgp:$PYTHONPATH" 129 | 130 | # Override DGP wandb with required version 131 | RUN pip install wandb==0.8.21 pyquaternion xarray diskcache tenacity pycocotools 132 | 133 | # Expose Port for jupyter (8888) 134 | EXPOSE 8888 135 | 136 | # create project workspace dir 137 | RUN mkdir -p /workspace/experiments 138 | RUN mkdir -p /workspace/${PROJECT} 139 | WORKDIR /workspace/${PROJECT} 140 | 141 | # Copy project source last (to avoid cache busting) 142 | WORKDIR /workspace/${PROJECT} 143 | COPY . /workspace/${PROJECT} 144 | ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH" 145 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/.nojekyll -------------------------------------------------------------------------------- /docs/_sources/configs/configs.eval_ddad.rst.txt: -------------------------------------------------------------------------------- 1 | eval_ddad 2 | ========= 3 | 4 | .. code:: yaml 5 | 6 | model: 7 | name: 'SelfSupModel' 8 | depth_net: 9 | name: 'PackNet01' 10 | version: '1A' 11 | pose_net: 12 | name: 'PoseNet' 13 | version: '' 14 | params: 15 | crop: '' 16 | min_depth: 0.0 17 | max_depth: 200.0 18 | datasets: 19 | augmentation: 20 | image_shape: (384, 640) 21 | test: 22 | dataset: ['DGP'] 23 | path: ['/data/datasets/DDAD/ddad.json'] 24 | split: ['val'] 25 | depth_type: ['lidar'] 26 | cameras: ['camera_01'] 27 | save: 28 | folder: '/data/save' 29 | viz: True 30 | npz: True 31 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.eval_kitti.rst.txt: -------------------------------------------------------------------------------- 1 | eval_kitti 2 | ========== 3 | 4 | .. code:: yaml 5 | 6 | model: 7 | name: 'SelfSupModel' 8 | depth_net: 9 | name: 'PackNet01' 10 | version: '1A' 11 | pose_net: 12 | name: 'PoseNet' 13 | version: '' 14 | params: 15 | crop: 'garg' 16 | min_depth: 0.0 17 | max_depth: 80.0 18 | datasets: 19 | augmentation: 20 | image_shape: (192, 640) 21 | test: 22 | dataset: ['KITTI'] 23 | path: ['/data/datasets/KITTI_raw'] 24 | split: ['data_splits/eigen_test_files.txt'] 25 | depth_type: ['velodyne'] 26 | save: 27 | folder: '/data/save' 28 | viz: True 29 | npz: True 30 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.overfit_ddad.rst.txt: -------------------------------------------------------------------------------- 1 | overfit_ddad 2 | ============ 3 | 4 | .. code:: yaml 5 | 6 | arch: 7 | max_epochs: 1 8 | model: 9 | name: 'SelfSupModel' 10 | optimizer: 11 | name: 'Adam' 12 | depth: 13 | lr: 0.0001 14 | pose: 15 | lr: 0.0001 16 | scheduler: 17 | name: 'StepLR' 18 | step_size: 30 19 | gamma: 0.5 20 | depth_net: 21 | name: 'DepthResNet' 22 | version: '18pt' 23 | pose_net: 24 | name: 'PoseResNet' 25 | version: '18pt' 26 | params: 27 | crop: '' 28 | min_depth: 0.0 29 | max_depth: 200.0 30 | datasets: 31 | augmentation: 32 | image_shape: (384, 640) 33 | train: 34 | batch_size: 4 35 | dataset: ['DGP'] 36 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 37 | split: ['train'] 38 | depth_type: ['lidar'] 39 | cameras: ['camera_01'] 40 | repeat: [300] 41 | validation: 42 | dataset: ['DGP'] 43 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 44 | split: ['train'] 45 | depth_type: ['lidar'] 46 | cameras: ['camera_01'] 47 | test: 48 | dataset: ['DGP'] 49 | path: ['/data/datasets/DDAD_tiny/ddad_tiny.json'] 50 | split: ['train'] 51 | depth_type: ['lidar'] 52 | cameras: ['camera_01'] 53 | 54 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.overfit_kitti.rst.txt: -------------------------------------------------------------------------------- 1 | overfit_kitti 2 | ============= 3 | 4 | .. code:: yaml 5 | 6 | arch: 7 | max_epochs: 1 8 | model: 9 | name: 'SelfSupModel' 10 | optimizer: 11 | name: 'Adam' 12 | depth: 13 | lr: 0.0002 14 | pose: 15 | lr: 0.0002 16 | scheduler: 17 | name: 'StepLR' 18 | step_size: 30 19 | gamma: 0.5 20 | depth_net: 21 | name: 'DepthResNet' 22 | version: '18pt' 23 | pose_net: 24 | name: 'PoseResNet' 25 | version: '18pt' 26 | params: 27 | crop: 'garg' 28 | min_depth: 0.0 29 | max_depth: 80.0 30 | datasets: 31 | augmentation: 32 | image_shape: (192, 640) 33 | train: 34 | batch_size: 6 35 | dataset: ['KITTI'] 36 | path: ['/data/datasets/KITTI_tiny'] 37 | split: ['kitti_tiny.txt'] 38 | depth_type: ['velodyne'] 39 | repeat: [1000] 40 | validation: 41 | dataset: ['KITTI'] 42 | path: ['/data/datasets/KITTI_tiny'] 43 | split: ['kitti_tiny.txt'] 44 | depth_type: ['velodyne'] 45 | test: 46 | dataset: ['KITTI'] 47 | path: ['/data/datasets/KITTI_tiny'] 48 | split: ['kitti_tiny.txt'] 49 | depth_type: ['velodyne'] 50 | 51 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.rst.txt: -------------------------------------------------------------------------------- 1 | Configs 2 | ======= 3 | 4 | .. toctree:: 5 | configs.default_config 6 | configs.overfit_kitti 7 | configs.overfit_ddad 8 | configs.train_kitti 9 | configs.train_ddad 10 | configs.eval_kitti 11 | configs.eval_ddad 12 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.train_ddad.rst.txt: -------------------------------------------------------------------------------- 1 | train_ddad 2 | ========== 3 | 4 | .. code:: yaml 5 | 6 | model: 7 | name: 'SelfSupModel' 8 | optimizer: 9 | name: 'Adam' 10 | depth: 11 | lr: 0.00009 12 | pose: 13 | lr: 0.00009 14 | scheduler: 15 | name: 'StepLR' 16 | step_size: 30 17 | gamma: 0.5 18 | depth_net: 19 | name: 'PackNet01' 20 | version: '1A' 21 | pose_net: 22 | name: 'PoseNet' 23 | version: '' 24 | params: 25 | crop: '' 26 | min_depth: 0.0 27 | max_depth: 200.0 28 | datasets: 29 | augmentation: 30 | image_shape: (384, 640) 31 | train: 32 | batch_size: 2 33 | num_workers: 8 34 | dataset: ['DGP'] 35 | path: ['/data/datasets/DDAD/ddad.json'] 36 | split: ['train'] 37 | depth_type: ['lidar'] 38 | cameras: ['camera_01'] 39 | repeat: [5] 40 | validation: 41 | num_workers: 8 42 | dataset: ['DGP'] 43 | path: ['/data/datasets/DDAD/ddad.json'] 44 | split: ['val'] 45 | depth_type: ['lidar'] 46 | cameras: ['camera_01'] 47 | test: 48 | num_workers: 8 49 | dataset: ['DGP'] 50 | path: ['/data/datasets/DDAD/ddad.json'] 51 | split: ['val'] 52 | depth_type: ['lidar'] 53 | cameras: ['camera_01'] 54 | 55 | -------------------------------------------------------------------------------- /docs/_sources/configs/configs.train_kitti.rst.txt: -------------------------------------------------------------------------------- 1 | train_kitti 2 | =========== 3 | 4 | .. code:: yaml 5 | 6 | model: 7 | name: 'SelfSupModel' 8 | optimizer: 9 | name: 'Adam' 10 | depth: 11 | lr: 0.0002 12 | pose: 13 | lr: 0.0002 14 | scheduler: 15 | name: 'StepLR' 16 | step_size: 30 17 | gamma: 0.5 18 | depth_net: 19 | name: 'PackNet01' 20 | version: '1A' 21 | pose_net: 22 | name: 'PoseNet' 23 | version: '' 24 | params: 25 | crop: 'garg' 26 | min_depth: 0.0 27 | max_depth: 80.0 28 | datasets: 29 | augmentation: 30 | image_shape: (192, 640) 31 | train: 32 | batch_size: 4 33 | dataset: ['KITTI'] 34 | path: ['/data/datasets/KITTI_raw'] 35 | split: ['data_splits/eigen_zhou_files.txt'] 36 | depth_type: ['velodyne'] 37 | repeat: [2] 38 | validation: 39 | dataset: ['KITTI'] 40 | path: ['/data/datasets/KITTI_raw'] 41 | split: ['data_splits/eigen_val_files.txt', 42 | 'data_splits/eigen_test_files.txt'] 43 | depth_type: ['velodyne'] 44 | test: 45 | dataset: ['KITTI'] 46 | path: ['/data/datasets/KITTI_raw'] 47 | split: ['data_splits/eigen_test_files.txt'] 48 | depth_type: ['velodyne'] 49 | 50 | -------------------------------------------------------------------------------- /docs/_sources/datasets/KITTIDataset.rst.txt: -------------------------------------------------------------------------------- 1 | KITTIDataset 2 | ============ 3 | 4 | .. toctree:: 5 | datasets.KITTIDataset 6 | datasets.KITTIDataset_utils 7 | 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.DGPDataset.rst.txt: -------------------------------------------------------------------------------- 1 | DGPDataset 2 | ========== 3 | 4 | .. automodule:: packnet_sfm.datasets.dgp_dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.ImageDataset.rst.txt: -------------------------------------------------------------------------------- 1 | ImageDataset 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.datasets.image_dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.KITTIDataset.rst.txt: -------------------------------------------------------------------------------- 1 | KITTIDataset 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.datasets.kitti_dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.KITTIDataset_utils.rst.txt: -------------------------------------------------------------------------------- 1 | kitti_dataset_utils 2 | =================== 3 | 4 | .. automodule:: packnet_sfm.datasets.kitti_dataset_utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.augmentations.rst.txt: -------------------------------------------------------------------------------- 1 | Augmentations 2 | ============= 3 | 4 | .. automodule:: packnet_sfm.datasets.augmentations 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.rst.txt: -------------------------------------------------------------------------------- 1 | Datasets 2 | ======== 3 | 4 | .. toctree:: 5 | datasets.augmentations 6 | datasets.transforms 7 | KITTIDataset.rst 8 | datasets.DGPDataset 9 | datasets.ImageDataset 10 | 11 | 12 | -------------------------------------------------------------------------------- /docs/_sources/datasets/datasets.transforms.rst.txt: -------------------------------------------------------------------------------- 1 | Transforms 2 | ========== 3 | 4 | .. automodule:: packnet_sfm.datasets.transforms 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/geometry/camera.rst.txt: -------------------------------------------------------------------------------- 1 | Camera 2 | ====== 3 | 4 | .. toctree:: 5 | geometry.camera.camera 6 | geometry.camera.camera_utils 7 | 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/geometry.camera.camera.rst.txt: -------------------------------------------------------------------------------- 1 | Camera 2 | ====== 3 | 4 | .. automodule:: packnet_sfm.geometry.camera 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/geometry.camera.camera_utils.rst.txt: -------------------------------------------------------------------------------- 1 | Camera_utils 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.geometry.camera_utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/geometry.pose.pose.rst.txt: -------------------------------------------------------------------------------- 1 | Pose 2 | ==== 3 | 4 | .. automodule:: packnet_sfm.geometry.pose 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/geometry.pose.pose_utils.rst.txt: -------------------------------------------------------------------------------- 1 | Pose_utils 2 | ========== 3 | 4 | .. automodule:: packnet_sfm.geometry.pose_utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/geometry.rst.txt: -------------------------------------------------------------------------------- 1 | Geometry 2 | ======== 3 | 4 | .. toctree:: 5 | camera.rst 6 | pose.rst 7 | 8 | -------------------------------------------------------------------------------- /docs/_sources/geometry/pose.rst.txt: -------------------------------------------------------------------------------- 1 | Pose 2 | ==== 3 | 4 | .. toctree:: 5 | geometry.pose.pose 6 | geometry.pose.pose_utils 7 | 8 | -------------------------------------------------------------------------------- /docs/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. PackNet-SfM documentation master file, created by 2 | sphinx-quickstart on Thu Apr 23 07:39:57 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. include:: README.rst 7 | 8 | .. toctree:: 9 | :maxdepth: 5 10 | :caption: Contents 11 | 12 | configs/configs.rst 13 | scripts/scripts.rst 14 | trainers/trainers.rst 15 | datasets/datasets.rst 16 | models/models.rst 17 | networks/networks.rst 18 | losses/losses.rst 19 | loggers/loggers.rst 20 | geometry/geometry.rst 21 | utils/utils.rst 22 | 23 | .. toctree:: 24 | :glob: 25 | :maxdepth: 1 26 | :caption: Contact 27 | 28 | Toyota Research Institute 29 | PackNet-SfM GitHub 30 | DDAD GitHub 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | -------------------------------------------------------------------------------- /docs/_sources/loggers/loggers.WandbLogger.rst.txt: -------------------------------------------------------------------------------- 1 | WandbLogger 2 | =========== 3 | 4 | .. automodule:: packnet_sfm.loggers.WandbLogger 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/loggers/loggers.rst.txt: -------------------------------------------------------------------------------- 1 | Loggers 2 | ======= 3 | 4 | .. toctree:: 5 | loggers.WandbLogger 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/_sources/losses/losses.loss_base.rst.txt: -------------------------------------------------------------------------------- 1 | LossBase 2 | ======== 3 | 4 | .. automodule:: packnet_sfm.losses.loss_base 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/losses/losses.multiview_photometric_loss.rst.txt: -------------------------------------------------------------------------------- 1 | MultiViewPhotometricLoss 2 | ======================== 3 | 4 | .. automodule:: packnet_sfm.losses.multiview_photometric_loss 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/losses/losses.rst.txt: -------------------------------------------------------------------------------- 1 | Losses 2 | ====== 3 | 4 | .. toctree:: 5 | losses.loss_base 6 | losses.multiview_photometric_loss 7 | losses.supervised_loss 8 | -------------------------------------------------------------------------------- /docs/_sources/losses/losses.supervised_loss.rst.txt: -------------------------------------------------------------------------------- 1 | SupervisedLoss 2 | ============== 3 | 4 | .. automodule:: packnet_sfm.losses.supervised_loss 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/models/models.Checkpoint.rst.txt: -------------------------------------------------------------------------------- 1 | Checkpoint 2 | ========== 3 | 4 | .. automodule:: packnet_sfm.models.model_checkpoint 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.SelfSupModel.rst.txt: -------------------------------------------------------------------------------- 1 | SelfSupModel 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.models.SelfSupModel 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.SemiSupModel.rst.txt: -------------------------------------------------------------------------------- 1 | SemiSupModel 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.models.SemiSupModel 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.SfmModel.rst.txt: -------------------------------------------------------------------------------- 1 | SfmModel 2 | ======== 3 | 4 | .. automodule:: packnet_sfm.models.SfmModel 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.Utilities.rst.txt: -------------------------------------------------------------------------------- 1 | Utilities 2 | ========= 3 | 4 | .. automodule:: packnet_sfm.models.model_utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.Wrapper.rst.txt: -------------------------------------------------------------------------------- 1 | Wrapper 2 | ======= 3 | 4 | .. automodule:: packnet_sfm.models.model_wrapper 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/models/models.rst.txt: -------------------------------------------------------------------------------- 1 | Models 2 | ====== 3 | 4 | .. toctree:: 5 | models.Utilities 6 | models.Checkpoint 7 | models.Wrapper 8 | models.SfmModel 9 | models.SelfSupModel 10 | models.SemiSupModel 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/_sources/networks/depth/depth.rst.txt: -------------------------------------------------------------------------------- 1 | Depth Networks 2 | ============== 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | networks.depth.PackNet01 8 | networks.depth.DepthResNet 9 | 10 | -------------------------------------------------------------------------------- /docs/_sources/networks/depth/networks.depth.DepthResNet.rst.txt: -------------------------------------------------------------------------------- 1 | DepthResNet 2 | =========== 3 | 4 | .. automodule:: packnet_sfm.networks.depth.DepthResNet 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/networks/depth/networks.depth.PackNet01.rst.txt: -------------------------------------------------------------------------------- 1 | PackNet01 2 | ========= 3 | 4 | .. automodule:: packnet_sfm.networks.depth.PackNet01 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/layers.rst.txt: -------------------------------------------------------------------------------- 1 | Network Layers 2 | ============== 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | resnet/resnet.rst 8 | packnet/packnet.rst 9 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/packnet/layers01.rst.txt: -------------------------------------------------------------------------------- 1 | layers01 2 | ======== 3 | 4 | .. automodule:: packnet_sfm.networks.layers.packnet.layers01 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/_sources/networks/layers/packnet/packnet.rst.txt: -------------------------------------------------------------------------------- 1 | PackNet 2 | ======= 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | layers01.rst 8 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/resnet/depth_decoder.rst.txt: -------------------------------------------------------------------------------- 1 | depth_decoder 2 | ============= 3 | 4 | .. automodule:: packnet_sfm.networks.layers.resnet.depth_decoder 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/resnet/layers.rst.txt: -------------------------------------------------------------------------------- 1 | layers 2 | ====== 3 | 4 | .. automodule:: packnet_sfm.networks.layers.resnet.layers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/resnet/pose_decoder.rst.txt: -------------------------------------------------------------------------------- 1 | pose_decoder 2 | ============ 3 | 4 | .. automodule:: packnet_sfm.networks.layers.resnet.pose_decoder 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/resnet/resnet.rst.txt: -------------------------------------------------------------------------------- 1 | ResNet 2 | ====== 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | layers.rst 8 | resnet_encoder.rst 9 | depth_decoder.rst 10 | pose_decoder.rst 11 | -------------------------------------------------------------------------------- /docs/_sources/networks/layers/resnet/resnet_encoder.rst.txt: -------------------------------------------------------------------------------- 1 | resnet_encoder 2 | ============== 3 | 4 | .. automodule:: packnet_sfm.networks.layers.resnet.resnet_encoder 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/networks/networks.rst.txt: -------------------------------------------------------------------------------- 1 | Networks 2 | ======== 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | layers/layers.rst 8 | depth/depth.rst 9 | pose/pose.rst 10 | 11 | -------------------------------------------------------------------------------- /docs/_sources/networks/pose/networks.pose.PoseNet.rst.txt: -------------------------------------------------------------------------------- 1 | PoseNet 2 | ======= 3 | 4 | .. automodule:: packnet_sfm.networks.pose.PoseNet 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/networks/pose/networks.pose.PoseResNet.rst.txt: -------------------------------------------------------------------------------- 1 | PoseResNet 2 | ========== 3 | 4 | .. automodule:: packnet_sfm.networks.pose.PoseResNet 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/networks/pose/pose.rst.txt: -------------------------------------------------------------------------------- 1 | Pose Networks 2 | ============= 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | networks.pose.PoseNet 8 | networks.pose.PoseResNet 9 | 10 | -------------------------------------------------------------------------------- /docs/_sources/scripts/scripts.eval.rst.txt: -------------------------------------------------------------------------------- 1 | Evaluation 2 | ========== 3 | 4 | .. automodule:: scripts.eval 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/scripts/scripts.infer.rst.txt: -------------------------------------------------------------------------------- 1 | Inference 2 | ========= 3 | 4 | .. automodule:: scripts.infer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/scripts/scripts.rst.txt: -------------------------------------------------------------------------------- 1 | Scripts 2 | ======= 3 | 4 | .. toctree:: 5 | scripts.train 6 | scripts.eval 7 | scripts.infer 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/scripts/scripts.train.rst.txt: -------------------------------------------------------------------------------- 1 | Training 2 | ======== 3 | 4 | .. automodule:: scripts.train 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/_sources/trainers/trainers.BaseTrainer.rst.txt: -------------------------------------------------------------------------------- 1 | BaseTrainer 2 | =========== 3 | 4 | .. automodule:: packnet_sfm.trainers.base_trainer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/trainers/trainers.HorovodTrainer.rst.txt: -------------------------------------------------------------------------------- 1 | HorovodTrainer 2 | ============== 3 | 4 | .. automodule:: packnet_sfm.trainers.HorovodTrainer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/trainers/trainers.rst.txt: -------------------------------------------------------------------------------- 1 | Trainers 2 | ======== 3 | 4 | .. toctree:: 5 | trainers.BaseTrainer.rst 6 | trainers.HorovodTrainer.rst 7 | 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.config.rst.txt: -------------------------------------------------------------------------------- 1 | Config 2 | ====== 3 | 4 | .. automodule:: packnet_sfm.utils.config 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.depth.rst.txt: -------------------------------------------------------------------------------- 1 | Depth 2 | ===== 3 | 4 | .. automodule:: packnet_sfm.utils.depth 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.horovod.rst.txt: -------------------------------------------------------------------------------- 1 | Horovod 2 | ======= 3 | 4 | .. automodule:: packnet_sfm.utils.horovod 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.image.rst.txt: -------------------------------------------------------------------------------- 1 | Image 2 | ===== 3 | 4 | .. automodule:: packnet_sfm.utils.image 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.load.rst.txt: -------------------------------------------------------------------------------- 1 | Loading 2 | ======= 3 | 4 | .. automodule:: packnet_sfm.utils.load 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.logging.rst.txt: -------------------------------------------------------------------------------- 1 | Logging 2 | ======= 3 | 4 | .. automodule:: packnet_sfm.utils.logging 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.misc.rst.txt: -------------------------------------------------------------------------------- 1 | Misc. 2 | ===== 3 | 4 | .. automodule:: packnet_sfm.utils.misc 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.reduce.rst.txt: -------------------------------------------------------------------------------- 1 | Reduce 2 | ====== 3 | 4 | .. automodule:: packnet_sfm.utils.reduce 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.rst.txt: -------------------------------------------------------------------------------- 1 | Utils 2 | ===== 3 | 4 | .. toctree:: 5 | utils.config 6 | utils.horovod 7 | utils.reduce 8 | utils.logging 9 | utils.save 10 | utils.load 11 | utils.image 12 | utils.depth 13 | utils.types 14 | utils.misc 15 | 16 | 17 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.save.rst.txt: -------------------------------------------------------------------------------- 1 | Saving 2 | ====== 3 | 4 | .. automodule:: packnet_sfm.utils.save 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_sources/utils/utils.types.rst.txt: -------------------------------------------------------------------------------- 1 | Types 2 | ===== 3 | 4 | .. automodule:: packnet_sfm.utils.types 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | .wy-side-nav-search, .wy-nav-top { 2 | background: #ffffff; 3 | } 4 | .wy-nav-side { 5 | background: #222222; 6 | } 7 | .wy-menu > .caption > span.caption-text { 8 | color: #bb0000; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /docs/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '1.0', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false 12 | }; -------------------------------------------------------------------------------- /docs/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/file.png -------------------------------------------------------------------------------- /docs/_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Inconsolata.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato-Bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato-Regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */ 2 | /* Built 20190212 16:02 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e 0 58 | self.strides = 1 59 | 60 | self.files = [] 61 | file_tree = read_files(root_dir) 62 | for k, v in file_tree.items(): 63 | file_set = set(file_tree[k]) 64 | files = [fname for fname in sorted(v) if self._has_context(fname, file_set)] 65 | self.files.extend([[k, fname] for fname in files]) 66 | 67 | self.data_transform = data_transform 68 | 69 | def __len__(self): 70 | return len(self.files) 71 | 72 | def _change_idx(self, idx, filename): 73 | _, ext = os.path.splitext(os.path.basename(filename)) 74 | return self.split.format(idx) + ext 75 | 76 | def _has_context(self, filename, file_set): 77 | context_paths = self._get_context_file_paths(filename) 78 | return all([f in file_set for f in context_paths]) 79 | 80 | def _get_context_file_paths(self, filename): 81 | fidx = get_idx(filename) 82 | idxs = list(np.arange(-self.backward_context * self.strides, 0, self.strides)) + \ 83 | list(np.arange(0, self.forward_context * self.strides, self.strides) + self.strides) 84 | return [self._change_idx(fidx + i, filename) for i in idxs] 85 | 86 | def _read_rgb_context_files(self, session, filename): 87 | context_paths = self._get_context_file_paths(filename) 88 | return [load_image(os.path.join(self.root_dir, session, filename)) 89 | for filename in context_paths] 90 | 91 | def _read_rgb_file(self, session, filename): 92 | return load_image(os.path.join(self.root_dir, session, filename)) 93 | 94 | def __getitem__(self, idx): 95 | session, filename = self.files[idx] 96 | image = self._read_rgb_file(session, filename) 97 | 98 | sample = { 99 | 'idx': idx, 100 | 'filename': '%s_%s' % (session, os.path.splitext(filename)[0]), 101 | # 102 | 'rgb': image, 103 | 'intrinsics': dummy_calibration(image) 104 | } 105 | 106 | if self.has_context: 107 | sample['rgb_context'] = \ 108 | self._read_rgb_context_files(session, filename) 109 | 110 | if self.data_transform: 111 | sample = self.data_transform(sample) 112 | 113 | return sample 114 | 115 | ######################################################################################################################## 116 | -------------------------------------------------------------------------------- /packnet_sfm/datasets/kitti_dataset_utils.py: -------------------------------------------------------------------------------- 1 | """Provides helper methods for loading and parsing KITTI data.""" 2 | 3 | from collections import namedtuple 4 | 5 | import numpy as np 6 | 7 | __author__ = "Lee Clement" 8 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 9 | 10 | # Per dataformat.txt 11 | OxtsPacket = namedtuple('OxtsPacket', 12 | 'lat, lon, alt, ' + 13 | 'roll, pitch, yaw, ' + 14 | 'vn, ve, vf, vl, vu, ' + 15 | 'ax, ay, az, af, al, au, ' + 16 | 'wx, wy, wz, wf, wl, wu, ' + 17 | 'pos_accuracy, vel_accuracy, ' + 18 | 'navstat, numsats, ' + 19 | 'posmode, velmode, orimode') 20 | 21 | # Bundle into an easy-to-access structure 22 | OxtsData = namedtuple('OxtsData', 'packet, T_w_imu') 23 | 24 | 25 | def rotx(t): 26 | """ 27 | Rotation about the x-axis 28 | 29 | Parameters 30 | ---------- 31 | t : float 32 | Theta angle 33 | 34 | Returns 35 | ------- 36 | matrix : np.array [3,3] 37 | Rotation matrix 38 | """ 39 | c = np.cos(t) 40 | s = np.sin(t) 41 | return np.array([[1, 0, 0], 42 | [0, c, -s], 43 | [0, s, c]]) 44 | 45 | 46 | def roty(t): 47 | """ 48 | Rotation about the y-axis 49 | 50 | Parameters 51 | ---------- 52 | t : float 53 | Theta angle 54 | 55 | Returns 56 | ------- 57 | matrix : np.array [3,3] 58 | Rotation matrix 59 | """ 60 | c = np.cos(t) 61 | s = np.sin(t) 62 | return np.array([[c, 0, s], 63 | [0, 1, 0], 64 | [-s, 0, c]]) 65 | 66 | 67 | def rotz(t): 68 | """ 69 | Rotation about the z-axis 70 | 71 | Parameters 72 | ---------- 73 | t : float 74 | Theta angle 75 | 76 | Returns 77 | ------- 78 | matrix : np.array [3,3] 79 | Rotation matrix 80 | """ 81 | c = np.cos(t) 82 | s = np.sin(t) 83 | return np.array([[c, -s, 0], 84 | [s, c, 0], 85 | [0, 0, 1]]) 86 | 87 | 88 | def transform_from_rot_trans(R, t): 89 | """ 90 | Transformation matrix from rotation matrix and translation vector. 91 | 92 | Parameters 93 | ---------- 94 | R : np.array [3,3] 95 | Rotation matrix 96 | t : np.array [3] 97 | translation vector 98 | 99 | Returns 100 | ------- 101 | matrix : np.array [4,4] 102 | Transformation matrix 103 | """ 104 | R = R.reshape(3, 3) 105 | t = t.reshape(3, 1) 106 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 107 | 108 | 109 | def read_calib_file(filepath): 110 | """ 111 | Read in a calibration file and parse into a dictionary 112 | 113 | Parameters 114 | ---------- 115 | filepath : str 116 | File path to read from 117 | 118 | Returns 119 | ------- 120 | calib : dict 121 | Dictionary with calibration values 122 | """ 123 | data = {} 124 | 125 | with open(filepath, 'r') as f: 126 | for line in f.readlines(): 127 | key, value = line.split(':', 1) 128 | # The only non-float values in these files are dates, which 129 | # we don't care about anyway 130 | try: 131 | data[key] = np.array([float(x) for x in value.split()]) 132 | except ValueError: 133 | pass 134 | 135 | return data 136 | 137 | 138 | def pose_from_oxts_packet(raw_data, scale): 139 | """ 140 | Helper method to compute a SE(3) pose matrix from an OXTS packet 141 | 142 | Parameters 143 | ---------- 144 | raw_data : dict 145 | Oxts data to read from 146 | scale : float 147 | Oxts scale 148 | 149 | Returns 150 | ------- 151 | R : np.array [3,3] 152 | Rotation matrix 153 | t : np.array [3] 154 | Translation vector 155 | """ 156 | packet = OxtsPacket(*raw_data) 157 | er = 6378137. # earth radius (approx.) in meters 158 | 159 | # Use a Mercator projection to get the translation vector 160 | tx = scale * packet.lon * np.pi * er / 180. 161 | ty = scale * er * \ 162 | np.log(np.tan((90. + packet.lat) * np.pi / 360.)) 163 | tz = packet.alt 164 | t = np.array([tx, ty, tz]) 165 | 166 | # Use the Euler angles to get the rotation matrix 167 | Rx = rotx(packet.roll) 168 | Ry = roty(packet.pitch) 169 | Rz = rotz(packet.yaw) 170 | R = Rz.dot(Ry.dot(Rx)) 171 | 172 | # Combine the translation and rotation into a homogeneous transform 173 | return R, t 174 | 175 | 176 | def load_oxts_packets_and_poses(oxts_files): 177 | """ 178 | Generator to read OXTS ground truth data. 179 | Poses are given in an East-North-Up coordinate system 180 | whose origin is the first GPS position. 181 | 182 | Parameters 183 | ---------- 184 | oxts_files : list of str 185 | List of oxts files to read from 186 | 187 | Returns 188 | ------- 189 | oxts : list of dict 190 | List of oxts ground-truth data 191 | """ 192 | # Scale for Mercator projection (from first lat value) 193 | scale = None 194 | # Origin of the global coordinate system (first GPS position) 195 | origin = None 196 | 197 | oxts = [] 198 | 199 | for filename in oxts_files: 200 | with open(filename, 'r') as f: 201 | for line in f.readlines(): 202 | line = line.split() 203 | # Last five entries are flags and counts 204 | line[:-5] = [float(x) for x in line[:-5]] 205 | line[-5:] = [int(float(x)) for x in line[-5:]] 206 | 207 | packet = OxtsPacket(*line) 208 | 209 | if scale is None: 210 | scale = np.cos(packet.lat * np.pi / 180.) 211 | 212 | R, t = pose_from_oxts_packet(packet, scale) 213 | 214 | if origin is None: 215 | origin = t 216 | 217 | T_w_imu = transform_from_rot_trans(R, t - origin) 218 | 219 | oxts.append(OxtsData(packet, T_w_imu)) 220 | 221 | return oxts 222 | 223 | 224 | -------------------------------------------------------------------------------- /packnet_sfm/datasets/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from functools import partial 4 | from packnet_sfm.datasets.augmentations import resize_image, resize_sample, resize_depth, \ 5 | duplicate_sample, colorjitter_sample, to_tensor_sample, crop_sample, crop_sample_input, resize_depth_preserve 6 | from packnet_sfm.utils.misc import parse_crop_borders 7 | 8 | ######################################################################################################################## 9 | 10 | def train_transforms(sample, image_shape, jittering, crop_train_borders): 11 | """ 12 | Training data augmentation transformations 13 | 14 | Parameters 15 | ---------- 16 | sample : dict 17 | Sample to be augmented 18 | image_shape : tuple (height, width) 19 | Image dimension to reshape 20 | jittering : tuple (brightness, contrast, saturation, hue) 21 | Color jittering parameters 22 | crop_train_borders : tuple (left, top, right, down) 23 | Border for cropping 24 | 25 | Returns 26 | ------- 27 | sample : dict 28 | Augmented sample 29 | """ 30 | if len(crop_train_borders) > 0: 31 | borders = parse_crop_borders(crop_train_borders, sample['rgb'].size[::-1]) 32 | sample = crop_sample(sample, borders) 33 | if len(image_shape) > 0: 34 | sample = resize_sample(sample, image_shape) 35 | sample = duplicate_sample(sample) 36 | if len(jittering) > 0: 37 | sample = colorjitter_sample(sample, jittering) 38 | sample = to_tensor_sample(sample) 39 | return sample 40 | 41 | def validation_transforms(sample, image_shape, crop_eval_borders): 42 | """ 43 | Validation data augmentation transformations 44 | 45 | Parameters 46 | ---------- 47 | sample : dict 48 | Sample to be augmented 49 | image_shape : tuple (height, width) 50 | Image dimension to reshape 51 | crop_eval_borders : tuple (left, top, right, down) 52 | Border for cropping 53 | 54 | Returns 55 | ------- 56 | sample : dict 57 | Augmented sample 58 | """ 59 | if len(crop_eval_borders) > 0: 60 | borders = parse_crop_borders(crop_eval_borders, sample['rgb'].size[::-1]) 61 | sample = crop_sample_input(sample, borders) 62 | if len(image_shape) > 0: 63 | sample['rgb'] = resize_image(sample['rgb'], image_shape) 64 | if 'input_depth' in sample: 65 | sample['input_depth'] = resize_depth_preserve(sample['input_depth'], image_shape) 66 | sample = to_tensor_sample(sample) 67 | return sample 68 | 69 | def test_transforms(sample, image_shape, crop_eval_borders): 70 | """ 71 | Test data augmentation transformations 72 | 73 | Parameters 74 | ---------- 75 | sample : dict 76 | Sample to be augmented 77 | image_shape : tuple (height, width) 78 | Image dimension to reshape 79 | 80 | Returns 81 | ------- 82 | sample : dict 83 | Augmented sample 84 | """ 85 | if len(crop_eval_borders) > 0: 86 | borders = parse_crop_borders(crop_eval_borders, sample['rgb'].size[::-1]) 87 | sample = crop_sample_input(sample, borders) 88 | if len(image_shape) > 0: 89 | sample['rgb'] = resize_image(sample['rgb'], image_shape) 90 | if 'input_depth' in sample: 91 | sample['input_depth'] = resize_depth(sample['input_depth'], image_shape) 92 | sample = to_tensor_sample(sample) 93 | return sample 94 | 95 | def get_transforms(mode, image_shape, jittering, crop_train_borders, 96 | crop_eval_borders, **kwargs): 97 | """ 98 | Get data augmentation transformations for each split 99 | 100 | Parameters 101 | ---------- 102 | mode : str {'train', 'validation', 'test'} 103 | Mode from which we want the data augmentation transformations 104 | image_shape : tuple (height, width) 105 | Image dimension to reshape 106 | jittering : tuple (brightness, contrast, saturation, hue) 107 | Color jittering parameters 108 | crop_train_borders : tuple (left, top, right, down) 109 | Border for cropping 110 | crop_eval_borders : tuple (left, top, right, down) 111 | Border for cropping 112 | 113 | Returns 114 | ------- 115 | XXX_transform: Partial function 116 | Data augmentation transformation for that mode 117 | """ 118 | if mode == 'train': 119 | return partial(train_transforms, 120 | image_shape=image_shape, 121 | jittering=jittering, 122 | crop_train_borders=crop_train_borders) 123 | elif mode == 'validation': 124 | return partial(validation_transforms, 125 | crop_eval_borders=crop_eval_borders, 126 | image_shape=image_shape) 127 | elif mode == 'test': 128 | return partial(test_transforms, 129 | crop_eval_borders=crop_eval_borders, 130 | image_shape=image_shape) 131 | else: 132 | raise ValueError('Unknown mode {}'.format(mode)) 133 | 134 | ######################################################################################################################## 135 | 136 | -------------------------------------------------------------------------------- /packnet_sfm/geometry/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/geometry/__init__.py -------------------------------------------------------------------------------- /packnet_sfm/geometry/camera_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn.functional as funct 5 | 6 | ######################################################################################################################## 7 | 8 | 9 | def construct_K(fx, fy, cx, cy, dtype=torch.float, device=None): 10 | """Construct a [3,3] camera intrinsics from pinhole parameters""" 11 | return torch.tensor([[fx, 0, cx], 12 | [0, fy, cy], 13 | [0, 0, 1]], dtype=dtype, device=device) 14 | 15 | 16 | def scale_intrinsics(K, x_scale, y_scale): 17 | """Scale intrinsics given x_scale and y_scale factors""" 18 | K[..., 0, 0] *= x_scale 19 | K[..., 1, 1] *= y_scale 20 | K[..., 0, 2] = (K[..., 0, 2] + 0.5) * x_scale - 0.5 21 | K[..., 1, 2] = (K[..., 1, 2] + 0.5) * y_scale - 0.5 22 | return K 23 | 24 | ######################################################################################################################## 25 | 26 | 27 | def view_synthesis(ref_image, depth, ref_cam, cam, 28 | mode='bilinear', padding_mode='zeros'): 29 | """ 30 | Synthesize an image from another plus a depth map. 31 | 32 | Parameters 33 | ---------- 34 | ref_image : torch.Tensor [B,3,H,W] 35 | Reference image to be warped 36 | depth : torch.Tensor [B,1,H,W] 37 | Depth map from the original image 38 | ref_cam : Camera 39 | Camera class for the reference image 40 | cam : Camera 41 | Camera class for the original image 42 | mode : str 43 | Interpolation mode 44 | padding_mode : str 45 | Padding mode for interpolation 46 | 47 | Returns 48 | ------- 49 | ref_warped : torch.Tensor [B,3,H,W] 50 | Warped reference image in the original frame of reference 51 | """ 52 | assert depth.size(1) == 1 53 | # Reconstruct world points from target_camera 54 | world_points = cam.reconstruct(depth, frame='w') 55 | # Project world points onto reference camera 56 | ref_coords = ref_cam.project(world_points, frame='w') 57 | # View-synthesis given the projected reference points 58 | return funct.grid_sample(ref_image, ref_coords, mode=mode, 59 | padding_mode=padding_mode, align_corners=True) 60 | 61 | ######################################################################################################################## 62 | 63 | 64 | def view_synthesis_generic(ref_image, depth, ref_cam, cam, 65 | mode='bilinear', padding_mode='zeros', progress=0.0): 66 | """ 67 | Synthesize an image from another plus a depth map. 68 | 69 | Parameters 70 | ---------- 71 | ref_image : torch.Tensor [B,3,H,W] 72 | Reference image to be warped 73 | depth : torch.Tensor [B,1,H,W] 74 | Depth map from the original image 75 | ref_cam : Camera 76 | Camera class for the reference image 77 | cam : Camera 78 | Camera class for the original image 79 | mode : str 80 | Interpolation mode 81 | padding_mode : str 82 | Padding mode for interpolation 83 | 84 | Returns 85 | ------- 86 | ref_warped : torch.Tensor [B,3,H,W] 87 | Warped reference image in the original frame of reference 88 | """ 89 | assert depth.size(1) == 1 90 | # Reconstruct world points from target_camera 91 | world_points = cam.reconstruct(depth, frame='w') 92 | # Project world points onto reference camera 93 | ref_coords = ref_cam.project(world_points, progress=progress, frame='w') 94 | # View-synthesis given the projected reference points 95 | return funct.grid_sample(ref_image, ref_coords, mode=mode, 96 | padding_mode=padding_mode, align_corners=True) 97 | 98 | ######################################################################################################################## 99 | -------------------------------------------------------------------------------- /packnet_sfm/geometry/pose.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | from packnet_sfm.geometry.pose_utils import invert_pose, pose_vec2mat 5 | 6 | ######################################################################################################################## 7 | 8 | class Pose: 9 | """ 10 | Pose class, that encapsulates a [4,4] transformation matrix 11 | for a specific reference frame 12 | """ 13 | def __init__(self, mat): 14 | """ 15 | Initializes a Pose object. 16 | 17 | Parameters 18 | ---------- 19 | mat : torch.Tensor [B,4,4] 20 | Transformation matrix 21 | """ 22 | assert tuple(mat.shape[-2:]) == (4, 4) 23 | if mat.dim() == 2: 24 | mat = mat.unsqueeze(0) 25 | assert mat.dim() == 3 26 | self.mat = mat 27 | 28 | def __len__(self): 29 | """Batch size of the transformation matrix""" 30 | return len(self.mat) 31 | 32 | ######################################################################################################################## 33 | 34 | @classmethod 35 | def identity(cls, N=1, device=None, dtype=torch.float): 36 | """Initializes as a [4,4] identity matrix""" 37 | return cls(torch.eye(4, device=device, dtype=dtype).repeat([N,1,1])) 38 | 39 | @classmethod 40 | def from_vec(cls, vec, mode): 41 | """Initializes from a [B,6] batch vector""" 42 | mat = pose_vec2mat(vec, mode) # [B,3,4] 43 | pose = torch.eye(4, device=vec.device, dtype=vec.dtype).repeat([len(vec), 1, 1]) 44 | pose[:, :3, :3] = mat[:, :3, :3] 45 | pose[:, :3, -1] = mat[:, :3, -1] 46 | return cls(pose) 47 | 48 | ######################################################################################################################## 49 | 50 | @property 51 | def shape(self): 52 | """Returns the transformation matrix shape""" 53 | return self.mat.shape 54 | 55 | def item(self): 56 | """Returns the transformation matrix""" 57 | return self.mat 58 | 59 | def repeat(self, *args, **kwargs): 60 | """Repeats the transformation matrix multiple times""" 61 | self.mat = self.mat.repeat(*args, **kwargs) 62 | return self 63 | 64 | def inverse(self): 65 | """Returns a new Pose that is the inverse of this one""" 66 | return Pose(invert_pose(self.mat)) 67 | 68 | def to(self, *args, **kwargs): 69 | """Moves object to a specific device""" 70 | self.mat = self.mat.to(*args, **kwargs) 71 | return self 72 | 73 | ######################################################################################################################## 74 | 75 | def transform_pose(self, pose): 76 | """Creates a new pose object that compounds this and another one (self * pose)""" 77 | assert tuple(pose.shape[-2:]) == (4, 4) 78 | return Pose(self.mat.bmm(pose.item())) 79 | 80 | def transform_points(self, points): 81 | """Transforms 3D points using this object""" 82 | assert points.shape[1] == 3 83 | B, _, H, W = points.shape 84 | out = self.mat[:,:3,:3].bmm(points.view(B, 3, -1)) + \ 85 | self.mat[:,:3,-1].unsqueeze(-1) 86 | return out.view(B, 3, H, W) 87 | 88 | def __matmul__(self, other): 89 | """Transforms the input (Pose or 3D points) using this object""" 90 | if isinstance(other, Pose): 91 | return self.transform_pose(other) 92 | elif isinstance(other, torch.Tensor): 93 | if other.shape[1] == 3 and other.dim() > 2: 94 | assert other.dim() == 3 or other.dim() == 4 95 | return self.transform_points(other) 96 | else: 97 | raise ValueError('Unknown tensor dimensions {}'.format(other.shape)) 98 | else: 99 | raise NotImplementedError() 100 | 101 | ######################################################################################################################## 102 | -------------------------------------------------------------------------------- /packnet_sfm/geometry/pose_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import numpy as np 5 | 6 | ######################################################################################################################## 7 | 8 | def euler2mat(angle): 9 | """Convert euler angles to rotation matrix""" 10 | B = angle.size(0) 11 | x, y, z = angle[:, 0], angle[:, 1], angle[:, 2] 12 | 13 | cosz = torch.cos(z) 14 | sinz = torch.sin(z) 15 | 16 | zeros = z.detach() * 0 17 | ones = zeros.detach() + 1 18 | zmat = torch.stack([cosz, -sinz, zeros, 19 | sinz, cosz, zeros, 20 | zeros, zeros, ones], dim=1).view(B, 3, 3) 21 | 22 | cosy = torch.cos(y) 23 | siny = torch.sin(y) 24 | 25 | ymat = torch.stack([cosy, zeros, siny, 26 | zeros, ones, zeros, 27 | -siny, zeros, cosy], dim=1).view(B, 3, 3) 28 | 29 | cosx = torch.cos(x) 30 | sinx = torch.sin(x) 31 | 32 | xmat = torch.stack([ones, zeros, zeros, 33 | zeros, cosx, -sinx, 34 | zeros, sinx, cosx], dim=1).view(B, 3, 3) 35 | 36 | rot_mat = xmat.bmm(ymat).bmm(zmat) 37 | return rot_mat 38 | 39 | ######################################################################################################################## 40 | 41 | def pose_vec2mat(vec, mode='euler'): 42 | """Convert Euler parameters to transformation matrix.""" 43 | if mode is None: 44 | return vec 45 | trans, rot = vec[:, :3].unsqueeze(-1), vec[:, 3:] 46 | if mode == 'euler': 47 | rot_mat = euler2mat(rot) 48 | else: 49 | raise ValueError('Rotation mode not supported {}'.format(mode)) 50 | mat = torch.cat([rot_mat, trans], dim=2) # [B,3,4] 51 | return mat 52 | 53 | ######################################################################################################################## 54 | 55 | def invert_pose(T): 56 | """Inverts a [B,4,4] torch.tensor pose""" 57 | Tinv = torch.eye(4, device=T.device, dtype=T.dtype).repeat([len(T), 1, 1]) 58 | Tinv[:, :3, :3] = torch.transpose(T[:, :3, :3], -2, -1) 59 | Tinv[:, :3, -1] = torch.bmm(-1. * Tinv[:, :3, :3], T[:, :3, -1].unsqueeze(-1)).squeeze(-1) 60 | return Tinv 61 | 62 | ######################################################################################################################## 63 | 64 | def invert_pose_numpy(T): 65 | """Inverts a [4,4] np.array pose""" 66 | Tinv = np.copy(T) 67 | R, t = Tinv[:3, :3], Tinv[:3, 3] 68 | Tinv[:3, :3], Tinv[:3, 3] = R.T, - np.matmul(R.T, t) 69 | return Tinv 70 | 71 | ######################################################################################################################## 72 | -------------------------------------------------------------------------------- /packnet_sfm/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from packnet_sfm.loggers.wandb_logger import WandbLogger 2 | 3 | __all__ = ["WandbLogger"] -------------------------------------------------------------------------------- /packnet_sfm/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/losses/__init__.py -------------------------------------------------------------------------------- /packnet_sfm/losses/loss_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import numpy as np 4 | import torch.nn as nn 5 | from packnet_sfm.utils.types import is_list 6 | 7 | ######################################################################################################################## 8 | 9 | class ProgressiveScaling: 10 | """ 11 | Helper class to manage progressive scaling. 12 | After a certain training progress percentage, decrease the number of scales by 1. 13 | 14 | Parameters 15 | ---------- 16 | progressive_scaling : float 17 | Training progress percentage where the number of scales is decreased 18 | num_scales : int 19 | Initial number of scales 20 | """ 21 | def __init__(self, progressive_scaling, num_scales=4): 22 | self.num_scales = num_scales 23 | # Use it only if bigger than zero (make a list) 24 | if progressive_scaling > 0.0: 25 | self.progressive_scaling = np.float32( 26 | [progressive_scaling * (i + 1) for i in range(num_scales - 1)] + [1.0]) 27 | # Otherwise, disable it 28 | else: 29 | self.progressive_scaling = progressive_scaling 30 | def __call__(self, progress): 31 | """ 32 | Call for an update in the number of scales 33 | 34 | Parameters 35 | ---------- 36 | progress : float 37 | Training progress percentage 38 | 39 | Returns 40 | ------- 41 | num_scales : int 42 | New number of scales 43 | """ 44 | if is_list(self.progressive_scaling): 45 | return int(self.num_scales - 46 | np.searchsorted(self.progressive_scaling, progress)) 47 | else: 48 | return self.num_scales 49 | 50 | ######################################################################################################################## 51 | 52 | class LossBase(nn.Module): 53 | """Base class for losses.""" 54 | def __init__(self): 55 | """Initializes logs and metrics dictionaries""" 56 | super().__init__() 57 | self._logs = {} 58 | self._metrics = {} 59 | 60 | ######################################################################################################################## 61 | 62 | @property 63 | def logs(self): 64 | """Return logs.""" 65 | return self._logs 66 | 67 | @property 68 | def metrics(self): 69 | """Return metrics.""" 70 | return self._metrics 71 | 72 | def add_metric(self, key, val): 73 | """Add a new metric to the dictionary and detach it.""" 74 | self._metrics[key] = val.detach() 75 | 76 | ######################################################################################################################## 77 | -------------------------------------------------------------------------------- /packnet_sfm/losses/velocity_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from packnet_sfm.utils.image import match_scales 7 | from packnet_sfm.losses.loss_base import LossBase 8 | 9 | 10 | class VelocityLoss(LossBase): 11 | """ 12 | Velocity loss for pose translation. 13 | """ 14 | def __init__(self, **kwargs): 15 | super().__init__() 16 | 17 | def forward(self, pred_pose, gt_pose_context, **kwargs): 18 | """ 19 | Calculates velocity loss. 20 | 21 | Parameters 22 | ---------- 23 | pred_pose : list of Pose 24 | Predicted pose transformation between origin and reference 25 | gt_pose_context : list of Pose 26 | Ground-truth pose transformation between origin and reference 27 | 28 | Returns 29 | ------- 30 | losses_and_metrics : dict 31 | Output dictionary 32 | """ 33 | pred_trans = [pose.mat[:, :3, -1].norm(dim=-1) for pose in pred_pose] 34 | gt_trans = [pose[:, :3, -1].norm(dim=-1) for pose in gt_pose_context] 35 | # Calculate velocity supervision loss 36 | loss = sum([(pred - gt).abs().mean() 37 | for pred, gt in zip(pred_trans, gt_trans)]) / len(gt_trans) 38 | self.add_metric('velocity_loss', loss) 39 | return { 40 | 'loss': loss.unsqueeze(0), 41 | 'metrics': self.metrics, 42 | } 43 | -------------------------------------------------------------------------------- /packnet_sfm/models/GenericSelfSupModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from packnet_sfm.models.GenericSfmModel import GenericSfmModel 4 | from packnet_sfm.losses.generic_multiview_photometric_loss import GenericMultiViewPhotometricLoss 5 | from packnet_sfm.models.model_utils import merge_outputs 6 | import numpy as np 7 | 8 | 9 | class GenericSelfSupModel(GenericSfmModel): 10 | """ 11 | Model that inherits a depth and pose network from GenericSfmModel and 12 | includes the photometric loss for self-supervised training. 13 | 14 | Parameters 15 | ---------- 16 | depth_net : nn.Module 17 | Depth network to be used 18 | pose_net : nn.Module 19 | Pose network to be used 20 | kwargs : dict 21 | Extra parameters 22 | """ 23 | 24 | def __init__(self, depth_net=None, pose_net=None, **kwargs): 25 | # Initializes GenericSfmModel 26 | super().__init__(depth_net, pose_net, **kwargs) 27 | # Initializes the photometric loss 28 | self._photometric_loss = GenericMultiViewPhotometricLoss(**kwargs) 29 | 30 | @property 31 | def logs(self): 32 | """Return logs.""" 33 | return { 34 | **super().logs, 35 | **self._photometric_loss.logs 36 | } 37 | 38 | @property 39 | def requires_depth_net(self): 40 | return True 41 | 42 | @property 43 | def requires_pose_net(self): 44 | return True 45 | 46 | @property 47 | def requires_gt_depth(self): 48 | return False 49 | 50 | @property 51 | def requires_gt_pose(self): 52 | return False 53 | 54 | def self_supervised_loss(self, image, ref_images, inv_depths, ray_surface, poses, 55 | intrinsics, return_logs=False, progress=0.0): 56 | """ 57 | Calculates the self-supervised photometric loss. 58 | 59 | Parameters 60 | ---------- 61 | image : torch.Tensor [B,3,H,W] 62 | Original image 63 | ref_images : list of torch.Tensor [B,3,H,W] 64 | Reference images from context 65 | inv_depths : torch.Tensor [B,1,H,W] 66 | Predicted inverse depth maps from the original image 67 | poses : list of Pose 68 | List containing predicted poses between original and context images 69 | intrinsics : torch.Tensor [B,3,3] 70 | Camera intrinsics 71 | return_logs : bool 72 | True if logs are stored 73 | progress : 74 | Training progress percentage 75 | 76 | Returns 77 | ------- 78 | output : dict 79 | Dictionary containing a "loss" scalar a "metrics" dictionary 80 | """ 81 | return self._photometric_loss( 82 | image, ref_images, inv_depths, ray_surface, intrinsics, intrinsics, poses, 83 | return_logs=return_logs, progress=progress) 84 | 85 | def forward(self, batch, return_logs=True, force_flip=False, progress=0.0): 86 | """ 87 | Processes a batch. 88 | 89 | Parameters 90 | ---------- 91 | batch : dict 92 | Input batch 93 | return_logs : boolf 94 | True if logs are stored 95 | progress : 96 | Training progress percentage 97 | 98 | Returns 99 | ------- 100 | output : dict 101 | Dictionary containing a "loss" scalar and different metrics and predictions 102 | for logging and downstream usage. 103 | """ 104 | # Calculate predicted depth and pose output 105 | output = super().forward(batch, return_logs=return_logs) 106 | 107 | if not self.training: 108 | # If not training, no need for self-supervised loss 109 | return output 110 | else: 111 | # Otherwise, calculate self-supervised loss 112 | self_sup_output = self.self_supervised_loss( 113 | batch['rgb_original'], batch['rgb_context_original'], 114 | output['inv_depths'], output['ray_surface'], output['poses'], batch['intrinsics'], 115 | return_logs=return_logs, progress=progress) 116 | # Return loss and metrics 117 | return { 118 | 'loss': self_sup_output['loss'], 119 | **merge_outputs(output, self_sup_output), 120 | } 121 | -------------------------------------------------------------------------------- /packnet_sfm/models/GenericSfmModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import random 4 | import torch.nn as nn 5 | from packnet_sfm.geometry.pose import Pose 6 | from packnet_sfm.utils.misc import make_list 7 | from packnet_sfm.models.SfmModel import SfmModel 8 | import torch.nn.functional as F 9 | 10 | class GenericSfmModel(SfmModel): 11 | """ 12 | Model class encapsulating a pose and depth networks. 13 | 14 | Parameters 15 | ---------- 16 | depth_net : nn.Module 17 | Depth network to be used 18 | pose_net : nn.Module 19 | Pose network to be used 20 | rotation_mode : str 21 | Rotation mode for the pose network 22 | flip_lr_prob : float 23 | Probability of flipping when using the depth network 24 | upsample_depth_maps : bool 25 | True if depth map scales are upsampled to highest resolution 26 | kwargs : dict 27 | Extra parameters 28 | """ 29 | 30 | def flip_model(self, model, image, flip): 31 | """ 32 | Flip input image and flip output inverse depth map 33 | 34 | Parameters 35 | ---------- 36 | model : nn.Module 37 | Module to be used 38 | image : torch.Tensor [B,3,H,W] 39 | Input image 40 | flip : bool 41 | True if the flip is happening 42 | 43 | Returns 44 | ------- 45 | inv_depths : list of torch.Tensor [B,1,H,W] 46 | List of predicted inverse depth maps 47 | """ 48 | if flip: 49 | return [flip_lr(inv_depth) for inv_depth in model(flip_lr(image))] 50 | else: 51 | return model(image) 52 | 53 | def interpolate_scales(self, images, shape=None, mode='bilinear', align_corners=False): 54 | """ 55 | Interpolate list of images to the same shape 56 | 57 | Parameters 58 | ---------- 59 | images : list of torch.Tensor [B,?,?,?] 60 | Images to be interpolated, with different resolutions 61 | shape : tuple (H, W) 62 | Output shape 63 | mode : str 64 | Interpolation mode 65 | align_corners : bool 66 | True if corners will be aligned after interpolation 67 | 68 | Returns 69 | ------- 70 | images : list of torch.Tensor [B,?,H,W] 71 | Interpolated images, with the same resolution 72 | """ 73 | # If no shape is provided, interpolate to highest resolution 74 | if shape is None: 75 | shape = images[0].shape 76 | # Take last two dimensions as shape 77 | if len(shape) > 2: 78 | shape = shape[-2:] 79 | # Interpolate all images 80 | return [F.interpolate(image, shape, mode=mode, 81 | align_corners=align_corners) for image in images] 82 | 83 | def compute_depth_net(self, image): 84 | """Computes inverse depth maps from single images""" 85 | # Randomly flip and estimate inverse depth maps 86 | inv_depths, raysurf = self.flip_model(self.depth_net, image, False) 87 | inv_depths = make_list(inv_depths) 88 | # If upsampling depth maps 89 | if self.upsample_depth_maps: 90 | inv_depths = self.interpolate_scales( 91 | inv_depths, mode='nearest', align_corners=None) 92 | # Return inverse depth maps 93 | return inv_depths, raysurf 94 | 95 | def forward(self, batch, return_logs=False): 96 | """ 97 | Processes a batch. 98 | 99 | Parameters 100 | ---------- 101 | batch : dict 102 | Input batch 103 | return_logs : bool 104 | True if logs are stored 105 | 106 | Returns 107 | ------- 108 | output : dict 109 | Dictionary containing predicted inverse depth maps and poses 110 | """ 111 | #print(logs) 112 | # Generate inverse depth predictions 113 | inv_depths, raysurf = self.compute_depth_net(batch['rgb']) 114 | # Generate pose predictions if available 115 | pose = None 116 | if 'rgb_context' in batch and self.pose_net is not None: 117 | pose = self.compute_poses(batch['rgb'], 118 | #pose = self.compute_pose_net(batch['rgb'], 119 | batch['rgb_context']) 120 | # Return output dictionary 121 | return { 122 | 'inv_depths': inv_depths, 123 | 'poses': pose, 124 | 'ray_surface': raysurf 125 | } 126 | -------------------------------------------------------------------------------- /packnet_sfm/models/SelfSupModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from packnet_sfm.models.SfmModel import SfmModel 4 | from packnet_sfm.losses.multiview_photometric_loss import MultiViewPhotometricLoss 5 | from packnet_sfm.models.model_utils import merge_outputs 6 | 7 | 8 | class SelfSupModel(SfmModel): 9 | """ 10 | Model that inherits a depth and pose network from SfmModel and 11 | includes the photometric loss for self-supervised training. 12 | 13 | Parameters 14 | ---------- 15 | kwargs : dict 16 | Extra parameters 17 | """ 18 | def __init__(self, **kwargs): 19 | # Initializes SfmModel 20 | super().__init__(**kwargs) 21 | # Initializes the photometric loss 22 | self._photometric_loss = MultiViewPhotometricLoss(**kwargs) 23 | 24 | @property 25 | def logs(self): 26 | """Return logs.""" 27 | return { 28 | **super().logs, 29 | **self._photometric_loss.logs 30 | } 31 | 32 | def self_supervised_loss(self, image, ref_images, inv_depths, poses, 33 | intrinsics, return_logs=False, progress=0.0): 34 | """ 35 | Calculates the self-supervised photometric loss. 36 | 37 | Parameters 38 | ---------- 39 | image : torch.Tensor [B,3,H,W] 40 | Original image 41 | ref_images : list of torch.Tensor [B,3,H,W] 42 | Reference images from context 43 | inv_depths : torch.Tensor [B,1,H,W] 44 | Predicted inverse depth maps from the original image 45 | poses : list of Pose 46 | List containing predicted poses between original and context images 47 | intrinsics : torch.Tensor [B,3,3] 48 | Camera intrinsics 49 | return_logs : bool 50 | True if logs are stored 51 | progress : 52 | Training progress percentage 53 | 54 | Returns 55 | ------- 56 | output : dict 57 | Dictionary containing a "loss" scalar a "metrics" dictionary 58 | """ 59 | return self._photometric_loss( 60 | image, ref_images, inv_depths, intrinsics, intrinsics, poses, 61 | return_logs=return_logs, progress=progress) 62 | 63 | def forward(self, batch, return_logs=False, progress=0.0): 64 | """ 65 | Processes a batch. 66 | 67 | Parameters 68 | ---------- 69 | batch : dict 70 | Input batch 71 | return_logs : bool 72 | True if logs are stored 73 | progress : 74 | Training progress percentage 75 | 76 | Returns 77 | ------- 78 | output : dict 79 | Dictionary containing a "loss" scalar and different metrics and predictions 80 | for logging and downstream usage. 81 | """ 82 | # Calculate predicted depth and pose output 83 | output = super().forward(batch, return_logs=return_logs) 84 | if not self.training: 85 | # If not training, no need for self-supervised loss 86 | return output 87 | else: 88 | # Otherwise, calculate self-supervised loss 89 | self_sup_output = self.self_supervised_loss( 90 | batch['rgb_original'], batch['rgb_context_original'], 91 | output['inv_depths'], output['poses'], batch['intrinsics'], 92 | return_logs=return_logs, progress=progress) 93 | # Return loss and metrics 94 | return { 95 | 'loss': self_sup_output['loss'], 96 | **merge_outputs(output, self_sup_output), 97 | } 98 | -------------------------------------------------------------------------------- /packnet_sfm/models/SemiSupCompletionModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | 5 | from packnet_sfm.models.SelfSupModel import SfmModel, SelfSupModel 6 | from packnet_sfm.losses.supervised_loss import SupervisedLoss 7 | from packnet_sfm.models.model_utils import merge_outputs 8 | from packnet_sfm.utils.depth import depth2inv 9 | 10 | 11 | class SemiSupCompletionModel(SelfSupModel): 12 | """ 13 | Semi-Supervised model for depth prediction and completion. 14 | 15 | Parameters 16 | ---------- 17 | supervised_loss_weight : float 18 | Weight for the supervised loss 19 | kwargs : dict 20 | Extra parameters 21 | """ 22 | def __init__(self, supervised_loss_weight=0.9, weight_rgbd=1.0, **kwargs): 23 | # Initializes SelfSupModel 24 | super().__init__(**kwargs) 25 | # If supervision weight is 0.0, use SelfSupModel directly 26 | assert 0. < supervised_loss_weight <= 1., "Model requires (0, 1] supervision" 27 | # Store weight and initializes supervised loss 28 | self.supervised_loss_weight = supervised_loss_weight 29 | self._supervised_loss = SupervisedLoss(**kwargs) 30 | 31 | # Pose network is only required if there is self-supervision 32 | if self.supervised_loss_weight == 1: 33 | self._network_requirements.remove('pose_net') 34 | # GT depth is only required if there is supervision 35 | if self.supervised_loss_weight > 0: 36 | self._train_requirements.append('gt_depth') 37 | 38 | self._input_keys = ['rgb', 'input_depth', 'intrinsics'] 39 | 40 | self.weight_rgbd = weight_rgbd 41 | 42 | @property 43 | def logs(self): 44 | """Return logs.""" 45 | return { 46 | **super().logs, 47 | **self._supervised_loss.logs 48 | } 49 | 50 | def supervised_loss(self, inv_depths, gt_inv_depths, 51 | return_logs=False, progress=0.0): 52 | """ 53 | Calculates the supervised loss. 54 | 55 | Parameters 56 | ---------- 57 | inv_depths : torch.Tensor [B,1,H,W] 58 | Predicted inverse depth maps from the original image 59 | gt_inv_depths : torch.Tensor [B,1,H,W] 60 | Ground-truth inverse depth maps from the original image 61 | return_logs : bool 62 | True if logs are stored 63 | progress : 64 | Training progress percentage 65 | 66 | Returns 67 | ------- 68 | output : dict 69 | Dictionary containing a "loss" scalar a "metrics" dictionary 70 | """ 71 | return self._supervised_loss( 72 | inv_depths, gt_inv_depths, 73 | return_logs=return_logs, progress=progress) 74 | 75 | def forward(self, batch, return_logs=False, progress=0.0, **kwargs): 76 | """ 77 | Processes a batch. 78 | 79 | Parameters 80 | ---------- 81 | batch : dict 82 | Input batch 83 | return_logs : bool 84 | True if logs are stored 85 | progress : 86 | Training progress percentage 87 | 88 | Returns 89 | ------- 90 | output : dict 91 | Dictionary containing a "loss" scalar and different metrics and predictions 92 | for logging and downstream usage. 93 | """ 94 | if not self.training: 95 | # If not training, no need for self-supervised loss 96 | return SfmModel.forward(self, batch, return_logs=return_logs, **kwargs) 97 | else: 98 | if self.supervised_loss_weight == 1.: 99 | # If no self-supervision, no need to calculate loss 100 | self_sup_output = SfmModel.forward(self, batch, return_logs=return_logs, **kwargs) 101 | loss = torch.tensor([0.]).type_as(batch['rgb']) 102 | else: 103 | # Otherwise, calculate and weight self-supervised loss 104 | self_sup_output = SelfSupModel.forward( 105 | self, batch, return_logs=return_logs, progress=progress, **kwargs) 106 | loss = (1.0 - self.supervised_loss_weight) * self_sup_output['loss'] 107 | # Calculate and weight supervised loss 108 | sup_output = self.supervised_loss( 109 | self_sup_output['inv_depths'], depth2inv(batch['depth']), 110 | return_logs=return_logs, progress=progress) 111 | loss += self.supervised_loss_weight * sup_output['loss'] 112 | if 'inv_depths_rgbd' in self_sup_output: 113 | sup_output2 = self.supervised_loss( 114 | self_sup_output['inv_depths_rgbd'], depth2inv(batch['depth']), 115 | return_logs=return_logs, progress=progress) 116 | loss += self.weight_rgbd * self.supervised_loss_weight * sup_output2['loss'] 117 | if 'depth_loss' in self_sup_output: 118 | loss += self_sup_output['depth_loss'] 119 | # Merge and return outputs 120 | return { 121 | 'loss': loss, 122 | **merge_outputs(self_sup_output, sup_output), 123 | } 124 | -------------------------------------------------------------------------------- /packnet_sfm/models/SemiSupModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | 5 | from packnet_sfm.models.SelfSupModel import SfmModel, SelfSupModel 6 | from packnet_sfm.losses.supervised_loss import SupervisedLoss 7 | from packnet_sfm.models.model_utils import merge_outputs 8 | from packnet_sfm.utils.depth import depth2inv 9 | 10 | 11 | class SemiSupModel(SelfSupModel): 12 | """ 13 | Model that inherits a depth and pose networks, plus the self-supervised loss from 14 | SelfSupModel and includes a supervised loss for semi-supervision. 15 | 16 | Parameters 17 | ---------- 18 | supervised_loss_weight : float 19 | Weight for the supervised loss 20 | kwargs : dict 21 | Extra parameters 22 | """ 23 | def __init__(self, supervised_loss_weight=0.9, **kwargs): 24 | # Initializes SelfSupModel 25 | super().__init__(**kwargs) 26 | # If supervision weight is 0.0, use SelfSupModel directly 27 | assert 0. < supervised_loss_weight <= 1., "Model requires (0, 1] supervision" 28 | # Store weight and initializes supervised loss 29 | self.supervised_loss_weight = supervised_loss_weight 30 | self._supervised_loss = SupervisedLoss(**kwargs) 31 | 32 | # Pose network is only required if there is self-supervision 33 | if self.supervised_loss_weight == 1: 34 | self._network_requirements.remove('pose_net') 35 | # GT depth is only required if there is supervision 36 | if self.supervised_loss_weight > 0: 37 | self._train_requirements.append('gt_depth') 38 | 39 | @property 40 | def logs(self): 41 | """Return logs.""" 42 | return { 43 | **super().logs, 44 | **self._supervised_loss.logs 45 | } 46 | 47 | def supervised_loss(self, inv_depths, gt_inv_depths, 48 | return_logs=False, progress=0.0): 49 | """ 50 | Calculates the supervised loss. 51 | 52 | Parameters 53 | ---------- 54 | inv_depths : torch.Tensor [B,1,H,W] 55 | Predicted inverse depth maps from the original image 56 | gt_inv_depths : torch.Tensor [B,1,H,W] 57 | Ground-truth inverse depth maps from the original image 58 | return_logs : bool 59 | True if logs are stored 60 | progress : 61 | Training progress percentage 62 | 63 | Returns 64 | ------- 65 | output : dict 66 | Dictionary containing a "loss" scalar a "metrics" dictionary 67 | """ 68 | return self._supervised_loss( 69 | inv_depths, gt_inv_depths, 70 | return_logs=return_logs, progress=progress) 71 | 72 | def forward(self, batch, return_logs=False, progress=0.0): 73 | """ 74 | Processes a batch. 75 | 76 | Parameters 77 | ---------- 78 | batch : dict 79 | Input batch 80 | return_logs : bool 81 | True if logs are stored 82 | progress : 83 | Training progress percentage 84 | 85 | Returns 86 | ------- 87 | output : dict 88 | Dictionary containing a "loss" scalar and different metrics and predictions 89 | for logging and downstream usage. 90 | """ 91 | if not self.training: 92 | # If not training, no need for self-supervised loss 93 | return SfmModel.forward(self, batch) 94 | else: 95 | if self.supervised_loss_weight == 1.: 96 | # If no self-supervision, no need to calculate loss 97 | self_sup_output = SfmModel.forward(self, batch) 98 | loss = torch.tensor([0.]).type_as(batch['rgb']) 99 | else: 100 | # Otherwise, calculate and weight self-supervised loss 101 | self_sup_output = SelfSupModel.forward(self, batch) 102 | loss = (1.0 - self.supervised_loss_weight) * self_sup_output['loss'] 103 | # Calculate and weight supervised loss 104 | sup_output = self.supervised_loss( 105 | self_sup_output['inv_depths'], depth2inv(batch['depth']), 106 | return_logs=return_logs, progress=progress) 107 | loss += self.supervised_loss_weight * sup_output['loss'] 108 | # Merge and return outputs 109 | return { 110 | 'loss': loss, 111 | **merge_outputs(self_sup_output, sup_output), 112 | } 113 | -------------------------------------------------------------------------------- /packnet_sfm/models/SfmModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import random 4 | 5 | from packnet_sfm.geometry.pose import Pose 6 | from packnet_sfm.models.base_model import BaseModel 7 | from packnet_sfm.models.model_utils import flip_batch_input, flip_output, upsample_output 8 | from packnet_sfm.utils.misc import filter_dict 9 | 10 | 11 | class SfmModel(BaseModel): 12 | """ 13 | Model class encapsulating a pose and depth networks. 14 | 15 | Parameters 16 | ---------- 17 | depth_net : nn.Module 18 | Depth network to be used 19 | pose_net : nn.Module 20 | Pose network to be used 21 | rotation_mode : str 22 | Rotation mode for the pose network 23 | flip_lr_prob : float 24 | Probability of flipping when using the depth network 25 | upsample_depth_maps : bool 26 | True if depth map scales are upsampled to highest resolution 27 | kwargs : dict 28 | Extra parameters 29 | """ 30 | def __init__(self, depth_net=None, pose_net=None, 31 | rotation_mode='euler', flip_lr_prob=0.0, 32 | upsample_depth_maps=False, **kwargs): 33 | super().__init__() 34 | self.depth_net = depth_net 35 | self.pose_net = pose_net 36 | self.rotation_mode = rotation_mode 37 | self.flip_lr_prob = flip_lr_prob 38 | self.upsample_depth_maps = upsample_depth_maps 39 | 40 | self._network_requirements = [ 41 | 'depth_net', 42 | 'pose_net', 43 | ] 44 | 45 | def add_depth_net(self, depth_net): 46 | """Add a depth network to the model""" 47 | self.depth_net = depth_net 48 | 49 | def add_pose_net(self, pose_net): 50 | """Add a pose network to the model""" 51 | self.pose_net = pose_net 52 | 53 | def depth_net_flipping(self, batch, flip): 54 | """ 55 | Runs depth net with the option of flipping 56 | 57 | Parameters 58 | ---------- 59 | batch : dict 60 | Input batch 61 | flip : bool 62 | True if the flip is happening 63 | 64 | Returns 65 | ------- 66 | output : dict 67 | Dictionary with depth network output (e.g. 'inv_depths' and 'uncertainty') 68 | """ 69 | # Which keys are being passed to the depth network 70 | batch_input = {key: batch[key] for key in filter_dict(batch, self._input_keys)} 71 | if flip: 72 | # Run depth network with flipped inputs 73 | output = self.depth_net(**flip_batch_input(batch_input)) 74 | # Flip output back if training 75 | output = flip_output(output) 76 | else: 77 | # Run depth network 78 | output = self.depth_net(**batch_input) 79 | return output 80 | 81 | def compute_depth_net(self, batch, force_flip=False): 82 | """Computes inverse depth maps from single images""" 83 | # Randomly flip and estimate inverse depth maps 84 | flag_flip_lr = random.random() < self.flip_lr_prob if self.training else force_flip 85 | output = self.depth_net_flipping(batch, flag_flip_lr) 86 | # If upsampling depth maps at training time 87 | if self.training and self.upsample_depth_maps: 88 | output = upsample_output(output, mode='nearest', align_corners=None) 89 | # Return inverse depth maps 90 | return output 91 | 92 | def compute_pose_net(self, image, contexts): 93 | """Compute poses from image and a sequence of context images""" 94 | pose_vec = self.pose_net(image, contexts) 95 | return [Pose.from_vec(pose_vec[:, i], self.rotation_mode) 96 | for i in range(pose_vec.shape[1])] 97 | 98 | def forward(self, batch, return_logs=False, force_flip=False): 99 | """ 100 | Processes a batch. 101 | 102 | Parameters 103 | ---------- 104 | batch : dict 105 | Input batch 106 | return_logs : bool 107 | True if logs are stored 108 | force_flip : bool 109 | If true, force batch flipping for inverse depth calculation 110 | 111 | Returns 112 | ------- 113 | output : dict 114 | Dictionary containing the output of depth and pose networks 115 | """ 116 | # Generate inverse depth predictions 117 | depth_output = self.compute_depth_net(batch, force_flip=force_flip) 118 | # Generate pose predictions if available 119 | pose_output = None 120 | if 'rgb_context' in batch and self.pose_net is not None: 121 | pose_output = self.compute_pose_net( 122 | batch['rgb'], batch['rgb_context']) 123 | # Return output dictionary 124 | return { 125 | **depth_output, 126 | 'poses': pose_output, 127 | } 128 | -------------------------------------------------------------------------------- /packnet_sfm/models/VelSupModel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from packnet_sfm.models.SelfSupModel import SelfSupModel 4 | from packnet_sfm.losses.velocity_loss import VelocityLoss 5 | 6 | 7 | class VelSupModel(SelfSupModel): 8 | """ 9 | Self-supervised model with additional velocity supervision loss. 10 | 11 | Parameters 12 | ---------- 13 | velocity_loss_weight : float 14 | Weight for velocity supervision 15 | kwargs : dict 16 | Extra parameters 17 | """ 18 | def __init__(self, velocity_loss_weight=0.1, **kwargs): 19 | # Initializes SelfSupModel 20 | super().__init__(**kwargs) 21 | # Stores velocity supervision loss weight 22 | self._velocity_loss = VelocityLoss(**kwargs) 23 | self.velocity_loss_weight = velocity_loss_weight 24 | 25 | # GT pose is required 26 | self._train_requirements['gt_pose'] = True 27 | 28 | def forward(self, batch, return_logs=False, progress=0.0): 29 | """ 30 | Processes a batch. 31 | 32 | Parameters 33 | ---------- 34 | batch : dict 35 | Input batch 36 | return_logs : bool 37 | True if logs are stored 38 | progress : 39 | Training progress percentage 40 | 41 | Returns 42 | ------- 43 | output : dict 44 | Dictionary containing a "loss" scalar and different metrics and predictions 45 | for logging and downstream usage. 46 | """ 47 | output = super().forward(batch, return_logs, progress) 48 | if self.training: 49 | # Update self-supervised loss with velocity supervision 50 | velocity_loss = self._velocity_loss(output['poses'], batch['pose_context']) 51 | output['loss'] += self.velocity_loss_weight * velocity_loss['loss'] 52 | return output 53 | -------------------------------------------------------------------------------- /packnet_sfm/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Structure-from-Motion (SfM) Models and wrappers 3 | =============================================== 4 | 5 | - SfmModel is a torch.nn.Module wrapping both a Depth and a Pose network to enable training in a Structure-from-Motion setup (i.e. from videos) 6 | - SelfSupModel is an SfmModel specialized for self-supervised learning (using videos only) 7 | - SemiSupModel is an SfmModel specialized for semi-supervised learning (using videos and depth supervision) 8 | - ModelWrapper is a torch.nn.Module that wraps an SfmModel to enable easy training and eval with a trainer 9 | - ModelCheckpoint enables saving/restoring state of torch.nn.Module objects 10 | 11 | """ 12 | -------------------------------------------------------------------------------- /packnet_sfm/models/base_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class BaseModel(nn.Module): 7 | """ 8 | Base Model class defines APIs for packnet_sfm model wrapper. 9 | 10 | Parameters 11 | ---------- 12 | kwargs : dict 13 | Extra parameters 14 | """ 15 | def __init__(self, **kwargs): 16 | super().__init__() 17 | 18 | self._logs = {} 19 | self._losses = {} 20 | 21 | self._network_requirements = [] # Which networks the model requires 22 | self._train_requirements = [] # Which GT information the model requires at training time 23 | self._input_keys = ['rgb'] # Which input keys are provided to the model 24 | 25 | def _forward_unimplemented(self, *args): 26 | pass 27 | 28 | @property 29 | def logs(self): 30 | """Return logs.""" 31 | return self._logs 32 | 33 | @property 34 | def losses(self): 35 | """Return metrics.""" 36 | return self._losses 37 | 38 | def add_loss(self, key, val): 39 | """Add a new loss to the dictionary and detaches it.""" 40 | self._losses[key] = val.detach() 41 | 42 | @property 43 | def network_requirements(self): 44 | """ 45 | Networks required to run the model 46 | 47 | Returns 48 | ------- 49 | requirements : dict 50 | key : str 51 | Attribute name in model object pointing to corresponding network. 52 | value : str 53 | Task Name. 54 | """ 55 | return self._network_requirements 56 | 57 | @property 58 | def train_requirements(self): 59 | """ 60 | Information required by the model at training stage 61 | 62 | Returns 63 | ------- 64 | requirements : dict 65 | gt_depth : bool 66 | Whether ground truth depth is required by the model at training time 67 | gt_pose : bool 68 | Whether ground truth pose is required by the model at training time 69 | """ 70 | return self._train_requirements 71 | 72 | def add_net(self, network_module, network_name): 73 | """Add a network module as an attribute in the model 74 | 75 | Parameters 76 | ---------- 77 | network_module: torch.nn.Module 78 | 79 | network_name: str 80 | name of the network as well as the attribute in the network. 81 | """ 82 | assert network_name in self._network_requirements, "Network module not required!" 83 | setattr(self, network_name, network_module) 84 | 85 | def forward(self, batch, return_logs=False, **kwargs): 86 | """ 87 | Processes a batch. 88 | 89 | Parameters 90 | ---------- 91 | batch : dict 92 | Input batch 93 | return_logs : bool 94 | True if logs are stored 95 | """ 96 | raise NotImplementedError("Please implement forward function in your own subclass model.") 97 | -------------------------------------------------------------------------------- /packnet_sfm/models/model_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from packnet_sfm.utils.image import flip_lr, interpolate_scales 4 | from packnet_sfm.utils.misc import filter_dict 5 | from packnet_sfm.utils.types import is_tensor, is_list, is_numpy 6 | 7 | 8 | def flip(tensor, flip_fn): 9 | """ 10 | Flip tensors or list of tensors based on a function 11 | 12 | Parameters 13 | ---------- 14 | tensor : torch.Tensor or list[torch.Tensor] or list[list[torch.Tensor]] 15 | Tensor to be flipped 16 | flip_fn : Function 17 | Flip function 18 | 19 | Returns 20 | ------- 21 | tensor : torch.Tensor or list[torch.Tensor] or list[list[torch.Tensor]] 22 | Flipped tensor or list of tensors 23 | """ 24 | if not is_list(tensor): 25 | return flip_fn(tensor) 26 | else: 27 | if not is_list(tensor[0]): 28 | return [flip_fn(val) for val in tensor] 29 | else: 30 | return [[flip_fn(v) for v in val] for val in tensor] 31 | 32 | 33 | def merge_outputs(*outputs): 34 | """ 35 | Merges model outputs for logging 36 | 37 | Parameters 38 | ---------- 39 | outputs : tuple of dict 40 | Outputs to be merged 41 | 42 | Returns 43 | ------- 44 | output : dict 45 | Dictionary with a "metrics" key containing a dictionary with various metrics and 46 | all other keys that are not "loss" (it is handled differently). 47 | """ 48 | ignore = ['loss'] # Keys to ignore 49 | combine = ['metrics'] # Keys to combine 50 | merge = {key: {} for key in combine} 51 | for output in outputs: 52 | # Iterate over all keys 53 | for key, val in output.items(): 54 | # Combine these keys 55 | if key in combine: 56 | for sub_key, sub_val in output[key].items(): 57 | assert sub_key not in merge[key].keys(), \ 58 | 'Combining duplicated key {} to {}'.format(sub_key, key) 59 | merge[key][sub_key] = sub_val 60 | # Ignore these keys 61 | elif key not in ignore: 62 | assert key not in merge.keys(), \ 63 | 'Adding duplicated key {}'.format(key) 64 | merge[key] = val 65 | return merge 66 | 67 | 68 | def stack_batch(batch): 69 | """ 70 | Stack multi-camera batches (B,N,C,H,W becomes BN,C,H,W) 71 | 72 | Parameters 73 | ---------- 74 | batch : dict 75 | Batch 76 | 77 | Returns 78 | ------- 79 | batch : dict 80 | Stacked batch 81 | """ 82 | # If there is multi-camera information 83 | if len(batch['rgb'].shape) == 5: 84 | assert batch['rgb'].shape[0] == 1, 'Only batch size 1 is supported for multi-cameras' 85 | # Loop over all keys 86 | for key in batch.keys(): 87 | # If list, stack every item 88 | if is_list(batch[key]): 89 | if is_tensor(batch[key][0]) or is_numpy(batch[key][0]): 90 | batch[key] = [sample[0] for sample in batch[key]] 91 | # Else, stack single item 92 | else: 93 | batch[key] = batch[key][0] 94 | return batch 95 | 96 | 97 | def flip_batch_input(batch): 98 | """ 99 | Flip batch input information (copies data first) 100 | 101 | Parameters 102 | ---------- 103 | batch : dict 104 | Batch information 105 | 106 | Returns 107 | ------- 108 | batch : dict 109 | Flipped batch 110 | """ 111 | # Flip tensors 112 | for key in filter_dict(batch, [ 113 | 'rgb', 'rgb_context', 114 | 'input_depth', 'input_depth_context', 115 | ]): 116 | batch[key] = flip(batch[key], flip_lr) 117 | # Flip intrinsics 118 | for key in filter_dict(batch, [ 119 | 'intrinsics' 120 | ]): 121 | batch[key] = batch[key].clone() 122 | batch[key][:, 0, 2] = batch['rgb'].shape[3] - batch[key][:, 0, 2] 123 | # Return flipped batch 124 | return batch 125 | 126 | 127 | def flip_output(output): 128 | """ 129 | Flip output information 130 | 131 | Parameters 132 | ---------- 133 | output : dict 134 | Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty') 135 | 136 | Returns 137 | ------- 138 | output : dict 139 | Flipped output 140 | """ 141 | # Flip tensors 142 | for key in filter_dict(output, [ 143 | 'uncertainty', 'logits_semantic', 'ord_probability', 144 | 'inv_depths', 'inv_depths_context', 'inv_depths1', 'inv_depths2', 145 | 'pred_depth', 'pred_depth_context', 'pred_depth1', 'pred_depth2', 146 | 'pred_inv_depth', 'pred_inv_depth_context', 'pred_inv_depth1', 'pred_inv_depth2', 147 | ]): 148 | output[key] = flip(output[key], flip_lr) 149 | return output 150 | 151 | 152 | def upsample_output(output, mode='nearest', align_corners=None): 153 | """ 154 | Upsample multi-scale outputs to full resolution. 155 | 156 | Parameters 157 | ---------- 158 | output : dict 159 | Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty') 160 | mode : str 161 | Which interpolation mode is used 162 | align_corners: bool or None 163 | Whether corners will be aligned during interpolation 164 | 165 | Returns 166 | ------- 167 | output : dict 168 | Upsampled output 169 | """ 170 | for key in filter_dict(output, [ 171 | 'inv_depths', 'uncertainty' 172 | ]): 173 | output[key] = interpolate_scales( 174 | output[key], mode=mode, align_corners=align_corners) 175 | for key in filter_dict(output, [ 176 | 'inv_depths_context' 177 | ]): 178 | output[key] = [interpolate_scales( 179 | val, mode=mode, align_corners=align_corners) for val in output[key]] 180 | return output -------------------------------------------------------------------------------- /packnet_sfm/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/networks/__init__.py -------------------------------------------------------------------------------- /packnet_sfm/networks/depth/DepthResNet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch.nn as nn 4 | from functools import partial 5 | 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder 7 | from packnet_sfm.networks.layers.resnet.depth_decoder import DepthDecoder 8 | from packnet_sfm.networks.layers.resnet.layers import disp_to_depth 9 | 10 | ######################################################################################################################## 11 | 12 | class DepthResNet(nn.Module): 13 | """ 14 | Inverse depth network based on the ResNet architecture. 15 | 16 | Parameters 17 | ---------- 18 | version : str 19 | Has a XY format, where: 20 | X is the number of residual layers [18, 34, 50] and 21 | Y is an optional ImageNet pretrained flag added by the "pt" suffix 22 | Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch 23 | kwargs : dict 24 | Extra parameters 25 | """ 26 | def __init__(self, version=None, **kwargs): 27 | super().__init__() 28 | assert version is not None, "DispResNet needs a version" 29 | 30 | num_layers = int(version[:2]) # First two characters are the number of layers 31 | pretrained = version[2:] == 'pt' # If the last characters are "pt", use ImageNet pretraining 32 | assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers) 33 | 34 | self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained) 35 | self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc) 36 | self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0) 37 | 38 | def forward(self, rgb): 39 | """ 40 | Runs the network and returns inverse depth maps 41 | (4 scales if training and 1 if not). 42 | """ 43 | x = self.encoder(rgb) 44 | x = self.decoder(x) 45 | disps = [x[('disp', i)] for i in range(4)] 46 | 47 | if self.training: 48 | return { 49 | 'inv_depths': [self.scale_inv_depth(d)[0] for d in disps], 50 | } 51 | else: 52 | return { 53 | 'inv_depths': self.scale_inv_depth(disps[0])[0], 54 | } 55 | 56 | ######################################################################################################################## 57 | -------------------------------------------------------------------------------- /packnet_sfm/networks/depth/RaySurfaceResNet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch.nn as nn 4 | from functools import partial 5 | 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder 7 | from packnet_sfm.networks.layers.resnet.depth_decoder import DepthDecoder 8 | from packnet_sfm.networks.layers.resnet.raysurface_decoder import RaySurfaceDecoder 9 | from packnet_sfm.networks.layers.resnet.layers import disp_to_depth 10 | 11 | ######################################################################################################################## 12 | 13 | class RaySurfaceResNet(nn.Module): 14 | """ 15 | Inverse depth network based on the ResNet architecture. 16 | 17 | Parameters 18 | ---------- 19 | version : str 20 | Has a XY format, where: 21 | X is the number of residual layers [18, 34, 50] and 22 | Y is an optional ImageNet pretrained flag added by the "pt" suffix 23 | Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch 24 | kwargs : dict 25 | Extra parameters 26 | """ 27 | 28 | """ 29 | Ray surface network; decodes to a ray surface and a depth map. 30 | X (int): Number of residual layers [18, 34, 50] 31 | Y (str): If Y == pt, use a pretrained model 32 | """ 33 | 34 | def __init__(self, version=None, **kwargs): 35 | super().__init__() 36 | assert version is not None, "RaySurfaceResNet needs a version" 37 | 38 | num_layers = int(version[:2]) # First two characters are the number of layers 39 | pretrained = version[2:] == 'pt' # If the last characters are "pt", use ImageNet pretraining 40 | assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers) 41 | 42 | self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained) 43 | self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc) 44 | self.ray_surf = RaySurfaceDecoder(num_ch_enc=self.encoder.num_ch_enc) 45 | 46 | self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0) 47 | 48 | def forward(self, rgb): 49 | """ 50 | Runs the network and returns inverse depth maps and ray surface 51 | (4 scales if training and 1 if not). 52 | """ 53 | x = self.encoder(rgb) 54 | r = self.ray_surf(x) 55 | x = self.decoder(x) 56 | disps = [x[('disp', i)] for i in range(4)] 57 | 58 | if self.training: 59 | return [self.scale_inv_depth(d)[0] for d in disps], r 60 | else: 61 | return self.scale_inv_depth(disps[0])[0], r 62 | 63 | ######################################################################################################################## 64 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/minkowski.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Toyota Research Institute. All rights reserved. 2 | 3 | 4 | import MinkowskiEngine as ME 5 | import torch 6 | 7 | 8 | def sparsify_features(x): 9 | """ 10 | Sparsify features 11 | 12 | Parameters 13 | ---------- 14 | x : Dense feature map [B,C,H,W] 15 | 16 | Returns 17 | ------- 18 | Sparse feature map (features only in valid coordinates) 19 | """ 20 | b, c, h, w = x.shape 21 | 22 | u = torch.arange(w).reshape(1, w).repeat([h, 1]) 23 | v = torch.arange(h).reshape(h, 1).repeat([1, w]) 24 | uv = torch.stack([v, u], 2).reshape(-1, 2) 25 | 26 | coords = [uv] * b 27 | feats = [feats.permute(1, 2, 0).reshape(-1, c) for feats in x] 28 | coords, feats = ME.utils.sparse_collate(coords=coords, feats=feats) 29 | return ME.SparseTensor(coordinates=coords, features=feats, device=x.device) 30 | 31 | 32 | def sparsify_depth(x): 33 | """ 34 | Sparsify depth map 35 | 36 | Parameters 37 | ---------- 38 | x : Dense depth map [B,1,H,W] 39 | 40 | Returns 41 | ------- 42 | Sparse depth map (range values only in valid pixels) 43 | """ 44 | b, c, h, w = x.shape 45 | 46 | u = torch.arange(w, device=x.device).reshape(1, w).repeat([h, 1]) 47 | v = torch.arange(h, device=x.device).reshape(h, 1).repeat([1, w]) 48 | uv = torch.stack([v, u], 2) 49 | 50 | idxs = [(d > 0)[0] for d in x] 51 | 52 | coords = [uv[idx] for idx in idxs] 53 | feats = [feats.permute(1, 2, 0)[idx] for idx, feats in zip(idxs, x)] 54 | coords, feats = ME.utils.sparse_collate(coords=coords, feats=feats) 55 | return ME.SparseTensor(coordinates=coords, features=feats, device=x.device) 56 | 57 | 58 | def densify_features(x, shape): 59 | """ 60 | Densify features from a sparse tensor 61 | 62 | Parameters 63 | ---------- 64 | x : Sparse tensor 65 | shape : Dense shape [B,C,H,W] 66 | 67 | Returns 68 | ------- 69 | Dense tensor containing sparse information 70 | """ 71 | stride = x.tensor_stride 72 | coords, feats = x.C.long(), x.F 73 | shape = (shape[0], shape[2] // stride[0], shape[3] // stride[1], feats.shape[1]) 74 | dense = torch.zeros(shape, device=x.device) 75 | dense[coords[:, 0], 76 | coords[:, 1] // stride[0], 77 | coords[:, 2] // stride[1]] = feats 78 | return dense.permute(0, 3, 1, 2).contiguous() 79 | 80 | 81 | def densify_add_features_unc(x, s, u, shape): 82 | """ 83 | Densify and add features considering uncertainty 84 | 85 | Parameters 86 | ---------- 87 | x : Dense tensor [B,C,H,W] 88 | s : Sparse tensor 89 | u : Sparse tensor with uncertainty 90 | shape : Dense tensor shape 91 | 92 | Returns 93 | ------- 94 | Densified sparse tensor with added uncertainty 95 | """ 96 | stride = s.tensor_stride 97 | coords, feats = s.C.long(), s.F 98 | shape = (shape[0], shape[2] // stride[0], shape[3] // stride[1], feats.shape[1]) 99 | 100 | dense = torch.zeros(shape, device=s.device) 101 | dense[coords[:, -1], 102 | coords[:, 0] // stride[0], 103 | coords[:, 1] // stride[1]] = feats 104 | dense = dense.permute(0, 3, 1, 2).contiguous() 105 | 106 | mult = torch.ones(shape, device=s.device) 107 | mult[coords[:, -1], 108 | coords[:, 0] // stride[0], 109 | coords[:, 1] // stride[1]] = 1.0 - u.F 110 | mult = mult.permute(0, 3, 1, 2).contiguous() 111 | 112 | return x * mult + dense 113 | 114 | 115 | def map_add_features(x, s): 116 | """ 117 | Map dense features to sparse tensor and add them. 118 | 119 | Parameters 120 | ---------- 121 | x : Dense tensor [B,C,H,W] 122 | s : Sparse tensor 123 | 124 | Returns 125 | ------- 126 | Sparse tensor with added dense information in valid areas 127 | """ 128 | stride = s.tensor_stride 129 | coords = s.coords.long() 130 | feats = x.permute(0, 2, 3, 1) 131 | feats = feats[coords[:, -1], 132 | coords[:, 0] // stride[0], 133 | coords[:, 1] // stride[1]] 134 | return ME.SparseTensor(coords=coords, feats=feats + s.feats, 135 | coords_manager=s.coords_man, force_creation=True, 136 | tensor_stride=s.tensor_stride) 137 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/minkowski_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import MinkowskiEngine as ME 4 | import torch.nn as nn 5 | 6 | from packnet_sfm.networks.layers.minkowski import \ 7 | sparsify_depth, densify_features, densify_add_features_unc, map_add_features 8 | 9 | 10 | class MinkConv2D(nn.Module): 11 | """ 12 | Minkowski Convolutional Block 13 | 14 | Parameters 15 | ---------- 16 | in_planes : number of input channels 17 | out_planes : number of output channels 18 | kernel_size : convolutional kernel size 19 | stride : convolutional stride 20 | with_uncertainty : with uncertainty or now 21 | add_rgb : add RGB information as channels 22 | """ 23 | def __init__(self, in_planes, out_planes, kernel_size, stride, 24 | with_uncertainty=False, add_rgb=False): 25 | super().__init__() 26 | self.layer3 = nn.Sequential( 27 | ME.MinkowskiConvolution( 28 | in_planes, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2), 29 | ME.MinkowskiBatchNorm(out_planes * 2), 30 | ME.MinkowskiReLU(inplace=True), 31 | ME.MinkowskiConvolution( 32 | out_planes * 2, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2), 33 | ME.MinkowskiBatchNorm(out_planes * 2), 34 | ME.MinkowskiReLU(inplace=True), 35 | ME.MinkowskiConvolution( 36 | out_planes * 2, out_planes, kernel_size=kernel_size, stride=1, dimension=2), 37 | ) 38 | 39 | self.layer2 = nn.Sequential( 40 | ME.MinkowskiConvolution( 41 | in_planes, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2), 42 | ME.MinkowskiBatchNorm(out_planes * 2), 43 | ME.MinkowskiReLU(inplace=True), 44 | ME.MinkowskiConvolution( 45 | out_planes * 2, out_planes, kernel_size=kernel_size, stride=1, dimension=2), 46 | ) 47 | 48 | self.layer1 = nn.Sequential( 49 | ME.MinkowskiConvolution( 50 | in_planes, out_planes, kernel_size=kernel_size, stride=1, dimension=2), 51 | ) 52 | 53 | self.layer_final = nn.Sequential( 54 | ME.MinkowskiBatchNorm(out_planes), 55 | ME.MinkowskiReLU(inplace=True) 56 | ) 57 | self.pool = None if stride == 1 else ME.MinkowskiMaxPooling(3, stride, dimension=2) 58 | 59 | self.add_rgb = add_rgb 60 | self.with_uncertainty = with_uncertainty 61 | if with_uncertainty: 62 | self.unc_layer = nn.Sequential( 63 | ME.MinkowskiConvolution( 64 | out_planes, 1, kernel_size=3, stride=1, dimension=2), 65 | ME.MinkowskiSigmoid() 66 | ) 67 | 68 | def forward(self, x): 69 | """ 70 | Processes sparse information 71 | 72 | Parameters 73 | ---------- 74 | x : Sparse tensor 75 | 76 | Returns 77 | ------- 78 | Processed tensor 79 | """ 80 | if self.pool is not None: 81 | x = self.pool(x) 82 | x1 = self.layer1(x) 83 | x2 = self.layer2(x) 84 | x3 = self.layer3(x) 85 | return None, self.layer_final(x1 + x2 + x3) 86 | 87 | 88 | class MinkowskiEncoder(nn.Module): 89 | """ 90 | Depth completion Minkowski Encoder 91 | 92 | Parameters 93 | ---------- 94 | channels : number of channels 95 | with_uncertainty : with uncertainty or not 96 | add_rgb : add RGB information to depth features or not 97 | """ 98 | def __init__(self, channels, with_uncertainty=False, add_rgb=False): 99 | super().__init__() 100 | self.mconvs = nn.ModuleList() 101 | kernel_sizes = [5, 5] + [3] * (len(channels) - 1) 102 | self.mconvs.append( 103 | MinkConv2D(1, channels[0], kernel_sizes[0], 2, 104 | with_uncertainty=with_uncertainty)) 105 | for i in range(0, len(channels) - 1): 106 | self.mconvs.append( 107 | MinkConv2D(channels[i], channels[i+1], kernel_sizes[i+1], 2, 108 | with_uncertainty=with_uncertainty)) 109 | self.d = self.n = self.shape = 0 110 | self.with_uncertainty = with_uncertainty 111 | self.add_rgb = add_rgb 112 | 113 | def prep(self, d): 114 | self.d = sparsify_depth(d) 115 | self.shape = d.shape 116 | self.n = 0 117 | 118 | def forward(self, x=None): 119 | 120 | unc, self.d = self.mconvs[self.n](self.d) 121 | self.n += 1 122 | 123 | if self.with_uncertainty: 124 | out = densify_add_features_unc(x, unc * self.d, unc, self.shape) 125 | else: 126 | out = densify_features(self.d, self.shape) 127 | 128 | if self.add_rgb: 129 | self.d = map_add_features(x, self.d) 130 | 131 | return out 132 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/resnet/depth_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | 12 | from collections import OrderedDict 13 | from .layers import ConvBlock, Conv3x3, upsample 14 | 15 | 16 | class DepthDecoder(nn.Module): 17 | def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True): 18 | super(DepthDecoder, self).__init__() 19 | 20 | self.num_output_channels = num_output_channels 21 | self.use_skips = use_skips 22 | self.upsample_mode = 'nearest' 23 | self.scales = scales 24 | 25 | self.num_ch_enc = num_ch_enc 26 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 27 | 28 | # decoder 29 | self.convs = OrderedDict() 30 | for i in range(4, -1, -1): 31 | # upconv_0 32 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 33 | num_ch_out = self.num_ch_dec[i] 34 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 35 | 36 | # upconv_1 37 | num_ch_in = self.num_ch_dec[i] 38 | if self.use_skips and i > 0: 39 | num_ch_in += self.num_ch_enc[i - 1] 40 | num_ch_out = self.num_ch_dec[i] 41 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 42 | 43 | for s in self.scales: 44 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 45 | 46 | self.decoder = nn.ModuleList(list(self.convs.values())) 47 | self.sigmoid = nn.Sigmoid() 48 | 49 | def forward(self, input_features): 50 | self.outputs = {} 51 | 52 | # decoder 53 | x = input_features[-1] 54 | for i in range(4, -1, -1): 55 | x = self.convs[("upconv", i, 0)](x) 56 | x = [upsample(x)] 57 | if self.use_skips and i > 0: 58 | x += [input_features[i - 1]] 59 | x = torch.cat(x, 1) 60 | x = self.convs[("upconv", i, 1)](x) 61 | if i in self.scales: 62 | self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x)) 63 | 64 | return self.outputs 65 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/resnet/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/layers.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def disp_to_depth(disp, min_depth, max_depth): 13 | """Convert network's sigmoid output into depth prediction 14 | The formula for this conversion is given in the 'additional considerations' 15 | section of the paper. 16 | """ 17 | min_disp = 1 / max_depth 18 | max_disp = 1 / min_depth 19 | scaled_disp = min_disp + (max_disp - min_disp) * disp 20 | depth = 1 / scaled_disp 21 | return scaled_disp, depth 22 | 23 | 24 | class ConvBlock(nn.Module): 25 | """Layer to perform a convolution followed by ELU 26 | """ 27 | def __init__(self, in_channels, out_channels): 28 | super(ConvBlock, self).__init__() 29 | 30 | self.conv = Conv3x3(in_channels, out_channels) 31 | self.nonlin = nn.ELU(inplace=True) 32 | 33 | def forward(self, x): 34 | out = self.conv(x) 35 | out = self.nonlin(out) 36 | return out 37 | 38 | 39 | class Conv3x3(nn.Module): 40 | """Layer to pad and convolve input 41 | """ 42 | def __init__(self, in_channels, out_channels, use_refl=True): 43 | super(Conv3x3, self).__init__() 44 | 45 | if use_refl: 46 | self.pad = nn.ReflectionPad2d(1) 47 | else: 48 | self.pad = nn.ZeroPad2d(1) 49 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) 50 | 51 | def forward(self, x): 52 | out = self.pad(x) 53 | out = self.conv(out) 54 | return out 55 | 56 | 57 | def upsample(x): 58 | """Upsample input tensor by a factor of 2 59 | """ 60 | return F.interpolate(x, scale_factor=2, mode="nearest") 61 | 62 | 63 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/resnet/pose_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/pose_decoder.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import torch 9 | import torch.nn as nn 10 | from collections import OrderedDict 11 | 12 | 13 | class PoseDecoder(nn.Module): 14 | def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1): 15 | super(PoseDecoder, self).__init__() 16 | 17 | self.num_ch_enc = num_ch_enc 18 | self.num_input_features = num_input_features 19 | 20 | if num_frames_to_predict_for is None: 21 | num_frames_to_predict_for = num_input_features - 1 22 | self.num_frames_to_predict_for = num_frames_to_predict_for 23 | 24 | self.convs = OrderedDict() 25 | self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1) 26 | self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1) 27 | self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1) 28 | self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1) 29 | 30 | self.relu = nn.ReLU() 31 | 32 | self.net = nn.ModuleList(list(self.convs.values())) 33 | 34 | def forward(self, input_features): 35 | last_features = [f[-1] for f in input_features] 36 | 37 | cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features] 38 | cat_features = torch.cat(cat_features, 1) 39 | 40 | out = cat_features 41 | for i in range(3): 42 | out = self.convs[("pose", i)](out) 43 | if i != 2: 44 | out = self.relu(out) 45 | 46 | out = out.mean(3).mean(2) 47 | 48 | out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) 49 | 50 | axisangle = out[..., :3] 51 | translation = out[..., 3:] 52 | 53 | return axisangle, translation 54 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/resnet/raysurface_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | 12 | from collections import OrderedDict 13 | from .layers import ConvBlock, Conv3x3, upsample 14 | 15 | 16 | class RaySurfaceDecoder(nn.Module): 17 | def __init__(self, num_ch_enc, scales=[0], num_output_channels=3, use_skips=True): 18 | super(RaySurfaceDecoder, self).__init__() 19 | 20 | self.num_output_channels = num_output_channels 21 | self.use_skips = use_skips 22 | self.upsample_mode = 'nearest' 23 | self.scales = scales 24 | 25 | self.num_ch_enc = num_ch_enc 26 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 27 | 28 | # decoder 29 | self.convs = OrderedDict() 30 | for i in range(4, -1, -1): 31 | # upconv_0 32 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 33 | num_ch_out = self.num_ch_dec[i] 34 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 35 | 36 | # upconv_1 37 | num_ch_in = self.num_ch_dec[i] 38 | if self.use_skips and i > 0: 39 | num_ch_in += self.num_ch_enc[i - 1] 40 | num_ch_out = self.num_ch_dec[i] 41 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 42 | 43 | for s in self.scales: 44 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 45 | 46 | self.decoder = nn.ModuleList(list(self.convs.values())) 47 | self.tanh = nn.Tanh() 48 | 49 | def forward(self, input_features): 50 | self.outputs = {} 51 | 52 | # decoder 53 | x = input_features[-1] 54 | for i in range(4, -1, -1): 55 | x = self.convs[("upconv", i, 0)](x) 56 | x = [upsample(x)] 57 | if self.use_skips and i > 0: 58 | x += [input_features[i - 1]] 59 | x = torch.cat(x, 1) 60 | x = self.convs[("upconv", i, 1)](x) 61 | if i in self.scales: 62 | self.outputs[("raysurf", i)] = self.tanh(self.convs[("dispconv", i)](x)) 63 | 64 | return self.outputs 65 | -------------------------------------------------------------------------------- /packnet_sfm/networks/layers/resnet/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/resnet_encoder.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torchvision.models as models 13 | import torch.utils.model_zoo as model_zoo 14 | 15 | 16 | class ResNetMultiImageInput(models.ResNet): 17 | """Constructs a resnet model with varying number of input images. 18 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 19 | """ 20 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 21 | super(ResNetMultiImageInput, self).__init__(block, layers) 22 | self.inplanes = 64 23 | self.conv1 = nn.Conv2d( 24 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 25 | self.bn1 = nn.BatchNorm2d(64) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 28 | self.layer1 = self._make_layer(block, 64, layers[0]) 29 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 30 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 31 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 32 | 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 36 | elif isinstance(m, nn.BatchNorm2d): 37 | nn.init.constant_(m.weight, 1) 38 | nn.init.constant_(m.bias, 0) 39 | 40 | 41 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 42 | """Constructs a ResNet model. 43 | Args: 44 | num_layers (int): Number of resnet layers. Must be 18 or 50 45 | pretrained (bool): If True, returns a model pre-trained on ImageNet 46 | num_input_images (int): Number of frames stacked as input 47 | """ 48 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 49 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 50 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 51 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 52 | 53 | if pretrained: 54 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 55 | loaded['conv1.weight'] = torch.cat( 56 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 57 | model.load_state_dict(loaded) 58 | return model 59 | 60 | 61 | class ResnetEncoder(nn.Module): 62 | """Pytorch module for a resnet encoder 63 | """ 64 | def __init__(self, num_layers, pretrained, num_input_images=1): 65 | super(ResnetEncoder, self).__init__() 66 | 67 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 68 | 69 | resnets = {18: models.resnet18, 70 | 34: models.resnet34, 71 | 50: models.resnet50, 72 | 101: models.resnet101, 73 | 152: models.resnet152} 74 | 75 | if num_layers not in resnets: 76 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 77 | 78 | if num_input_images > 1: 79 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 80 | else: 81 | self.encoder = resnets[num_layers](pretrained) 82 | 83 | if num_layers > 34: 84 | self.num_ch_enc[1:] *= 4 85 | 86 | def forward(self, input_image): 87 | self.features = [] 88 | x = (input_image - 0.45) / 0.225 89 | x = self.encoder.conv1(x) 90 | x = self.encoder.bn1(x) 91 | self.features.append(self.encoder.relu(x)) 92 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 93 | self.features.append(self.encoder.layer2(self.features[-1])) 94 | self.features.append(self.encoder.layer3(self.features[-1])) 95 | self.features.append(self.encoder.layer4(self.features[-1])) 96 | 97 | return self.features 98 | -------------------------------------------------------------------------------- /packnet_sfm/networks/pose/PoseNet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from SfmLearner 4 | # https://github.com/ClementPinard/SfmLearner-Pytorch/blob/master/models/PoseExpNet.py 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | ######################################################################################################################## 10 | 11 | def conv_gn(in_planes, out_planes, kernel_size=3): 12 | """ 13 | Convolutional block with GroupNorm 14 | 15 | Parameters 16 | ---------- 17 | in_planes : int 18 | Number of input channels 19 | out_planes : int 20 | Number of output channels 21 | kernel_size : int 22 | Convolutional kernel size 23 | 24 | Returns 25 | ------- 26 | layers : nn.Sequential 27 | Sequence of Conv2D + GroupNorm + ReLU 28 | """ 29 | return nn.Sequential( 30 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, 31 | padding=(kernel_size - 1) // 2, stride=2), 32 | nn.GroupNorm(16, out_planes), 33 | nn.ReLU(inplace=True) 34 | ) 35 | 36 | ######################################################################################################################## 37 | 38 | class PoseNet(nn.Module): 39 | """Pose network """ 40 | 41 | def __init__(self, nb_ref_imgs=2, rotation_mode='euler', **kwargs): 42 | super().__init__() 43 | self.nb_ref_imgs = nb_ref_imgs 44 | self.rotation_mode = rotation_mode 45 | 46 | conv_channels = [16, 32, 64, 128, 256, 256, 256] 47 | self.conv1 = conv_gn(3 * (1 + self.nb_ref_imgs), conv_channels[0], kernel_size=7) 48 | self.conv2 = conv_gn(conv_channels[0], conv_channels[1], kernel_size=5) 49 | self.conv3 = conv_gn(conv_channels[1], conv_channels[2]) 50 | self.conv4 = conv_gn(conv_channels[2], conv_channels[3]) 51 | self.conv5 = conv_gn(conv_channels[3], conv_channels[4]) 52 | self.conv6 = conv_gn(conv_channels[4], conv_channels[5]) 53 | self.conv7 = conv_gn(conv_channels[5], conv_channels[6]) 54 | 55 | self.pose_pred = nn.Conv2d(conv_channels[6], 6 * self.nb_ref_imgs, 56 | kernel_size=1, padding=0) 57 | 58 | self.init_weights() 59 | 60 | def init_weights(self): 61 | for m in self.modules(): 62 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 63 | nn.init.xavier_uniform_(m.weight.data) 64 | if m.bias is not None: 65 | m.bias.data.zero_() 66 | 67 | def forward(self, image, context): 68 | assert (len(context) == self.nb_ref_imgs) 69 | input = [image] 70 | input.extend(context) 71 | input = torch.cat(input, 1) 72 | out_conv1 = self.conv1(input) 73 | out_conv2 = self.conv2(out_conv1) 74 | out_conv3 = self.conv3(out_conv2) 75 | out_conv4 = self.conv4(out_conv3) 76 | out_conv5 = self.conv5(out_conv4) 77 | out_conv6 = self.conv6(out_conv5) 78 | out_conv7 = self.conv7(out_conv6) 79 | 80 | pose = self.pose_pred(out_conv7) 81 | pose = pose.mean(3).mean(2) 82 | pose = 0.01 * pose.view(pose.size(0), self.nb_ref_imgs, 6) 83 | 84 | return pose 85 | 86 | ######################################################################################################################## 87 | -------------------------------------------------------------------------------- /packnet_sfm/networks/pose/PoseResNet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder 7 | from packnet_sfm.networks.layers.resnet.pose_decoder import PoseDecoder 8 | 9 | ######################################################################################################################## 10 | 11 | class PoseResNet(nn.Module): 12 | """ 13 | Pose network based on the ResNet architecture. 14 | 15 | Parameters 16 | ---------- 17 | version : str 18 | Has a XY format, where: 19 | X is the number of residual layers [18, 34, 50] and 20 | Y is an optional ImageNet pretrained flag added by the "pt" suffix 21 | Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch 22 | kwargs : dict 23 | Extra parameters 24 | """ 25 | def __init__(self, version=None, **kwargs): 26 | super().__init__() 27 | assert version is not None, "PoseResNet needs a version" 28 | 29 | num_layers = int(version[:2]) # First two characters are the number of layers 30 | pretrained = version[2:] == 'pt' # If the last characters are "pt", use ImageNet pretraining 31 | assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers) 32 | 33 | self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained, num_input_images=2) 34 | self.decoder = PoseDecoder(self.encoder.num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) 35 | 36 | def forward(self, target_image, ref_imgs): 37 | """ 38 | Runs the network and returns predicted poses 39 | (1 for each reference image). 40 | """ 41 | outputs = [] 42 | for i, ref_img in enumerate(ref_imgs): 43 | inputs = torch.cat([target_image, ref_img], 1) 44 | axisangle, translation = self.decoder([self.encoder(inputs)]) 45 | outputs.append(torch.cat([translation[:, 0], axisangle[:, 0]], 2)) 46 | pose = torch.cat(outputs, 1) 47 | return pose 48 | 49 | ######################################################################################################################## 50 | 51 | -------------------------------------------------------------------------------- /packnet_sfm/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Trainers 3 | ======== 4 | 5 | Trainer classes providing an easy way to train and evaluate SfM models 6 | when wrapped in a ModelWrapper. 7 | 8 | Inspired by pytorch-lightning. 9 | 10 | """ 11 | 12 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer 13 | 14 | __all__ = ["HorovodTrainer"] -------------------------------------------------------------------------------- /packnet_sfm/trainers/base_trainer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | from tqdm import tqdm 5 | from packnet_sfm.utils.logging import prepare_dataset_prefix 6 | 7 | 8 | def sample_to_cuda(data, dtype=None): 9 | if isinstance(data, str): 10 | return data 11 | elif isinstance(data, dict): 12 | return {key: sample_to_cuda(data[key], dtype) for key in data.keys()} 13 | elif isinstance(data, list): 14 | return [sample_to_cuda(val, dtype) for val in data] 15 | else: 16 | # only convert floats (e.g., to half), otherwise preserve (e.g, ints) 17 | dtype = dtype if torch.is_floating_point(data) else None 18 | return data.to('cuda', dtype=dtype) 19 | 20 | 21 | class BaseTrainer: 22 | def __init__(self, min_epochs=0, max_epochs=50, 23 | validate_first=False, checkpoint=None, **kwargs): 24 | 25 | self.min_epochs = min_epochs 26 | self.max_epochs = max_epochs 27 | self.validate_first = validate_first 28 | 29 | self.checkpoint = checkpoint 30 | self.module = None 31 | 32 | @property 33 | def proc_rank(self): 34 | raise NotImplementedError('Not implemented for BaseTrainer') 35 | 36 | @property 37 | def world_size(self): 38 | raise NotImplementedError('Not implemented for BaseTrainer') 39 | 40 | @property 41 | def is_rank_0(self): 42 | return self.proc_rank == 0 43 | 44 | def check_and_save(self, module, output): 45 | if self.checkpoint: 46 | self.checkpoint.check_and_save(module, output) 47 | 48 | def train_progress_bar(self, dataloader, config, ncols=120): 49 | return tqdm(enumerate(dataloader, 0), 50 | unit=' images', unit_scale=self.world_size * config.batch_size, 51 | total=len(dataloader), smoothing=0, 52 | disable=not self.is_rank_0, ncols=ncols, 53 | ) 54 | 55 | def val_progress_bar(self, dataloader, config, n=0, ncols=120): 56 | return tqdm(enumerate(dataloader, 0), 57 | unit=' images', unit_scale=self.world_size * config.batch_size, 58 | total=len(dataloader), smoothing=0, 59 | disable=not self.is_rank_0, ncols=ncols, 60 | desc=prepare_dataset_prefix(config, n) 61 | ) 62 | 63 | def test_progress_bar(self, dataloader, config, n=0, ncols=120): 64 | return tqdm(enumerate(dataloader, 0), 65 | unit=' images', unit_scale=self.world_size * config.batch_size, 66 | total=len(dataloader), smoothing=0, 67 | disable=not self.is_rank_0, ncols=ncols, 68 | desc=prepare_dataset_prefix(config, n) 69 | ) 70 | -------------------------------------------------------------------------------- /packnet_sfm/trainers/horovod_trainer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import os 4 | import torch 5 | import horovod.torch as hvd 6 | from packnet_sfm.trainers.base_trainer import BaseTrainer, sample_to_cuda 7 | from packnet_sfm.utils.config import prep_logger_and_checkpoint 8 | from packnet_sfm.utils.logging import print_config 9 | from packnet_sfm.utils.logging import AvgMeter 10 | 11 | 12 | class HorovodTrainer(BaseTrainer): 13 | def __init__(self, **kwargs): 14 | super().__init__(**kwargs) 15 | 16 | hvd.init() 17 | torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", 1))) 18 | torch.cuda.set_device(hvd.local_rank()) 19 | torch.backends.cudnn.benchmark = True 20 | 21 | self.avg_loss = AvgMeter(50) 22 | self.dtype = kwargs.get("dtype", None) # just for test for now 23 | 24 | @property 25 | def proc_rank(self): 26 | return hvd.rank() 27 | 28 | @property 29 | def world_size(self): 30 | return hvd.size() 31 | 32 | def fit(self, module): 33 | 34 | # Prepare module for training 35 | module.trainer = self 36 | # Update and print module configuration 37 | prep_logger_and_checkpoint(module) 38 | print_config(module.config) 39 | 40 | # Send module to GPU 41 | module = module.to('cuda') 42 | # Configure optimizer and scheduler 43 | module.configure_optimizers() 44 | 45 | # Create distributed optimizer 46 | compression = hvd.Compression.none 47 | optimizer = hvd.DistributedOptimizer(module.optimizer, 48 | named_parameters=module.named_parameters(), compression=compression) 49 | scheduler = module.scheduler 50 | 51 | # Get train and val dataloaders 52 | train_dataloader = module.train_dataloader() 53 | val_dataloaders = module.val_dataloader() 54 | 55 | # Validate before training if requested 56 | if self.validate_first: 57 | validation_output = self.validate(val_dataloaders, module) 58 | self.check_and_save(module, validation_output) 59 | 60 | # Epoch loop 61 | for epoch in range(module.current_epoch, self.max_epochs): 62 | # Train 63 | self.train(train_dataloader, module, optimizer) 64 | # Validation 65 | validation_output = self.validate(val_dataloaders, module) 66 | # Check and save model 67 | self.check_and_save(module, validation_output) 68 | # Update current epoch 69 | module.current_epoch += 1 70 | # Take a scheduler step 71 | scheduler.step() 72 | 73 | def train(self, dataloader, module, optimizer): 74 | # Set module to train 75 | module.train() 76 | # Shuffle dataloader sampler 77 | if hasattr(dataloader.sampler, "set_epoch"): 78 | dataloader.sampler.set_epoch(module.current_epoch) 79 | # Prepare progress bar 80 | progress_bar = self.train_progress_bar( 81 | dataloader, module.config.datasets.train) 82 | # Start training loop 83 | outputs = [] 84 | # For all batches 85 | for i, batch in progress_bar: 86 | # Reset optimizer 87 | optimizer.zero_grad() 88 | # Send samples to GPU and take a training step 89 | batch = sample_to_cuda(batch) 90 | output = module.training_step(batch, i) 91 | # Backprop through loss and take an optimizer step 92 | output['loss'].backward() 93 | optimizer.step() 94 | # Append output to list of outputs 95 | output['loss'] = output['loss'].detach() 96 | outputs.append(output) 97 | # Update progress bar if in rank 0 98 | if self.is_rank_0: 99 | progress_bar.set_description( 100 | 'Epoch {} | Avg.Loss {:.4f}'.format( 101 | module.current_epoch, self.avg_loss(output['loss'].item()))) 102 | # Return outputs for epoch end 103 | return module.training_epoch_end(outputs) 104 | 105 | def validate(self, dataloaders, module): 106 | # Set module to eval 107 | module.eval() 108 | # Start validation loop 109 | all_outputs = [] 110 | # For all validation datasets 111 | for n, dataloader in enumerate(dataloaders): 112 | # Prepare progress bar for that dataset 113 | progress_bar = self.val_progress_bar( 114 | dataloader, module.config.datasets.validation, n) 115 | outputs = [] 116 | # For all batches 117 | for i, batch in progress_bar: 118 | # Send batch to GPU and take a validation step 119 | batch = sample_to_cuda(batch) 120 | output = module.validation_step(batch, i, n) 121 | # Append output to list of outputs 122 | outputs.append(output) 123 | # Append dataset outputs to list of all outputs 124 | all_outputs.append(outputs) 125 | # Return all outputs for epoch end 126 | return module.validation_epoch_end(all_outputs) 127 | 128 | def test(self, module): 129 | # Send module to GPU 130 | module = module.to('cuda', dtype=self.dtype) 131 | # Get test dataloaders 132 | test_dataloaders = module.test_dataloader() 133 | # Run evaluation 134 | self.evaluate(test_dataloaders, module) 135 | 136 | @torch.no_grad() 137 | def evaluate(self, dataloaders, module): 138 | # Set module to eval 139 | module.eval() 140 | # Start evaluation loop 141 | all_outputs = [] 142 | # For all test datasets 143 | for n, dataloader in enumerate(dataloaders): 144 | # Prepare progress bar for that dataset 145 | progress_bar = self.val_progress_bar( 146 | dataloader, module.config.datasets.test, n) 147 | outputs = [] 148 | # For all batches 149 | for i, batch in progress_bar: 150 | # Send batch to GPU and take a test step 151 | batch = sample_to_cuda(batch, self.dtype) 152 | output = module.test_step(batch, i, n) 153 | # Append output to list of outputs 154 | outputs.append(output) 155 | # Append dataset outputs to list of all outputs 156 | all_outputs.append(outputs) 157 | # Return all outputs for epoch end 158 | return module.test_epoch_end(all_outputs) 159 | -------------------------------------------------------------------------------- /packnet_sfm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/utils/__init__.py -------------------------------------------------------------------------------- /packnet_sfm/utils/horovod.py: -------------------------------------------------------------------------------- 1 | 2 | try: 3 | import horovod.torch as hvd 4 | HAS_HOROVOD = True 5 | except ImportError: 6 | HAS_HOROVOD = False 7 | 8 | 9 | def hvd_init(): 10 | if HAS_HOROVOD: 11 | hvd.init() 12 | return HAS_HOROVOD 13 | 14 | def on_rank_0(func): 15 | def wrapper(*args, **kwargs): 16 | if rank() == 0: 17 | func(*args, **kwargs) 18 | return wrapper 19 | 20 | def rank(): 21 | return hvd.rank() if HAS_HOROVOD else 0 22 | 23 | def world_size(): 24 | return hvd.size() if HAS_HOROVOD else 1 25 | 26 | @on_rank_0 27 | def print0(string='\n'): 28 | print(string) 29 | 30 | def reduce_value(value, average, name): 31 | """ 32 | Reduce the mean value of a tensor from all GPUs 33 | 34 | Parameters 35 | ---------- 36 | value : torch.Tensor 37 | Value to be reduced 38 | average : bool 39 | Whether values will be averaged or not 40 | name : str 41 | Value name 42 | 43 | Returns 44 | ------- 45 | value : torch.Tensor 46 | reduced value 47 | """ 48 | return hvd.allreduce(value, average=average, name=name) 49 | -------------------------------------------------------------------------------- /packnet_sfm/utils/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import os 4 | from termcolor import colored 5 | from functools import partial 6 | 7 | from packnet_sfm.utils.horovod import on_rank_0 8 | 9 | 10 | def pcolor(string, color, on_color=None, attrs=None): 11 | """ 12 | Produces a colored string for printing 13 | 14 | Parameters 15 | ---------- 16 | string : str 17 | String that will be colored 18 | color : str 19 | Color to use 20 | on_color : str 21 | Background color to use 22 | attrs : list of str 23 | Different attributes for the string 24 | 25 | Returns 26 | ------- 27 | string: str 28 | Colored string 29 | """ 30 | return colored(string, color, on_color, attrs) 31 | 32 | 33 | def prepare_dataset_prefix(config, dataset_idx): 34 | """ 35 | Concatenates dataset path and split for metrics logging 36 | 37 | Parameters 38 | ---------- 39 | config : CfgNode 40 | Dataset configuration 41 | dataset_idx : int 42 | Dataset index for multiple datasets 43 | 44 | Returns 45 | ------- 46 | prefix : str 47 | Dataset prefix for metrics logging 48 | """ 49 | # Path is always available 50 | prefix = '{}'.format(os.path.splitext(config.path[dataset_idx].split('/')[-1])[0]) 51 | # If split is available and does not contain { character 52 | if config.split[dataset_idx] != '' and '{' not in config.split[dataset_idx]: 53 | prefix += '-{}'.format(os.path.splitext(os.path.basename(config.split[dataset_idx]))[0]) 54 | # If depth type is available 55 | if config.depth_type[dataset_idx] != '': 56 | prefix += '-{}'.format(config.depth_type[dataset_idx]) 57 | # If we are using specific cameras 58 | if len(config.cameras[dataset_idx]) == 1: # only allows single cameras 59 | prefix += '-{}'.format(config.cameras[dataset_idx][0]) 60 | # Return full prefix 61 | return prefix 62 | 63 | 64 | def s3_url(config): 65 | """ 66 | Generate the s3 url where the models will be saved 67 | 68 | Parameters 69 | ---------- 70 | config : CfgNode 71 | Model configuration 72 | 73 | Returns 74 | ------- 75 | url : str 76 | String containing the URL pointing to the s3 bucket 77 | """ 78 | return 'https://s3.console.aws.amazon.com/s3/buckets/{}/{}'.format( 79 | config.checkpoint.s3_path[5:], config.name) 80 | 81 | 82 | @on_rank_0 83 | def print_config(config, color=('blue', 'red', 'cyan'), attrs=('bold', 'dark')): 84 | """ 85 | Prints header for model configuration 86 | 87 | Parameters 88 | ---------- 89 | config : CfgNode 90 | Model configuration 91 | color : list of str 92 | Color pallete for the header 93 | attrs : 94 | Colored string attributes 95 | """ 96 | # Recursive print function 97 | def print_recursive(rec_args, n=2, l=0): 98 | if l == 0: 99 | print(pcolor('config:', color[1], attrs=attrs)) 100 | for key, val in rec_args.items(): 101 | if isinstance(val, dict): 102 | print(pcolor('{} {}:'.format('-' * n, key), color[1], attrs=attrs)) 103 | print_recursive(val, n + 2, l + 1) 104 | else: 105 | print('{}: {}'.format(pcolor('{} {}'.format('-' * n, key), color[2]), val)) 106 | 107 | # Color partial functions 108 | pcolor1 = partial(pcolor, color='blue', attrs=['bold', 'dark']) 109 | pcolor2 = partial(pcolor, color='blue', attrs=['bold']) 110 | # Config and name 111 | line = pcolor1('#' * 120) 112 | path = pcolor1('### Config: ') + \ 113 | pcolor2('{}'.format(config.default.replace('/', '.'))) + \ 114 | pcolor1(' -> ') + \ 115 | pcolor2('{}'.format(config.config.replace('/', '.'))) 116 | name = pcolor1('### Name: ') + \ 117 | pcolor2('{}'.format(config.name)) 118 | # Add wandb link if available 119 | if not config.wandb.dry_run: 120 | name += pcolor1(' -> ') + \ 121 | pcolor2('{}'.format(config.wandb.url)) 122 | # Add s3 link if available 123 | if config.checkpoint.s3_path is not '': 124 | name += pcolor1('\n### s3:') + \ 125 | pcolor2(' {}'.format(config.checkpoint.s3_url)) 126 | # Create header string 127 | header = '%s\n%s\n%s\n%s' % (line, path, name, line) 128 | 129 | # Print header, config and header again 130 | print() 131 | print(header) 132 | print_recursive(config) 133 | print(header) 134 | print() 135 | 136 | 137 | class AvgMeter: 138 | """Average meter for logging""" 139 | def __init__(self, n_max=100): 140 | """ 141 | Initializes a AvgMeter object. 142 | 143 | Parameters 144 | ---------- 145 | n_max : int 146 | Number of steps to average over 147 | """ 148 | self.n_max = n_max 149 | self.values = [] 150 | 151 | def __call__(self, value): 152 | """Appends new value and returns average""" 153 | self.values.append(value) 154 | if len(self.values) > self.n_max: 155 | self.values.pop(0) 156 | return self.get() 157 | 158 | def get(self): 159 | """Get current average""" 160 | return sum(self.values) / len(self.values) 161 | 162 | def reset(self): 163 | """Reset meter""" 164 | self.values.clear() 165 | 166 | def get_and_reset(self): 167 | """Gets current average and resets""" 168 | average = self.get() 169 | self.reset() 170 | return average 171 | -------------------------------------------------------------------------------- /packnet_sfm/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | from packnet_sfm.utils.types import is_list, is_int 4 | 5 | ######################################################################################################################## 6 | 7 | def filter_dict(dictionary, keywords): 8 | """ 9 | Returns only the keywords that are part of a dictionary 10 | 11 | Parameters 12 | ---------- 13 | dictionary : dict 14 | Dictionary for filtering 15 | keywords : list of str 16 | Keywords that will be filtered 17 | 18 | Returns 19 | ------- 20 | keywords : list of str 21 | List containing the keywords that are keys in dictionary 22 | """ 23 | return [key for key in keywords if key in dictionary] 24 | 25 | ######################################################################################################################## 26 | 27 | def make_list(var, n=None): 28 | """ 29 | Wraps the input into a list, and optionally repeats it to be size n 30 | 31 | Parameters 32 | ---------- 33 | var : Any 34 | Variable to be wrapped in a list 35 | n : int 36 | How much the wrapped variable will be repeated 37 | 38 | Returns 39 | ------- 40 | var_list : list 41 | List generated from var 42 | """ 43 | var = var if is_list(var) else [var] 44 | if n is None: 45 | return var 46 | else: 47 | assert len(var) == 1 or len(var) == n, 'Wrong list length for make_list' 48 | return var * n if len(var) == 1 else var 49 | 50 | ######################################################################################################################## 51 | 52 | def same_shape(shape1, shape2): 53 | """ 54 | Checks if two shapes are the same 55 | 56 | Parameters 57 | ---------- 58 | shape1 : tuple 59 | First shape 60 | shape2 : tuple 61 | Second shape 62 | 63 | Returns 64 | ------- 65 | flag : bool 66 | True if both shapes are the same (same length and dimensions) 67 | """ 68 | if len(shape1) != len(shape2): 69 | return False 70 | for i in range(len(shape1)): 71 | if shape1[i] != shape2[i]: 72 | return False 73 | return True 74 | 75 | ######################################################################################################################## 76 | 77 | def parse_crop_borders(borders, shape): 78 | """ 79 | Calculate borders for cropping. 80 | 81 | Parameters 82 | ---------- 83 | borders : tuple 84 | Border input for parsing. Can be one of the following forms: 85 | (int, int, int, int): y, height, x, width 86 | (int, int): y, x --> y, height = image_height - y, x, width = image_width - x 87 | Negative numbers are taken from image borders, according to the shape argument 88 | Float numbers for y and x are treated as percentage, according to the shape argument, 89 | and in this case height and width are centered at that point. 90 | shape : tuple 91 | Image shape (image_height, image_width), used to determine negative crop boundaries 92 | 93 | Returns 94 | ------- 95 | borders : tuple (left, top, right, bottom) 96 | Parsed borders for cropping 97 | """ 98 | # Return full image if there are no borders to crop 99 | if len(borders) == 0: 100 | return 0, 0, shape[1], shape[0] 101 | # Copy borders for modification 102 | borders = list(borders).copy() 103 | # If borders are 4-dimensional 104 | if len(borders) == 4: 105 | borders = [borders[2], borders[0], borders[3], borders[1]] 106 | if is_int(borders[0]): 107 | # If horizontal cropping is integer (regular cropping) 108 | borders[0] += shape[1] if borders[0] < 0 else 0 109 | borders[2] += shape[1] if borders[2] <= 0 else borders[0] 110 | else: 111 | # If horizontal cropping is float (center cropping) 112 | center_w, half_w = borders[0] * shape[1], borders[2] / 2 113 | borders[0] = int(center_w - half_w) 114 | borders[2] = int(center_w + half_w) 115 | if is_int(borders[1]): 116 | # If vertical cropping is integer (regular cropping) 117 | borders[1] += shape[0] if borders[1] < 0 else 0 118 | borders[3] += shape[0] if borders[3] <= 0 else borders[1] 119 | else: 120 | # If vertical cropping is float (center cropping) 121 | center_h, half_h = borders[1] * shape[0], borders[3] / 2 122 | borders[1] = int(center_h - half_h) 123 | borders[3] = int(center_h + half_h) 124 | # If borders are 2-dimensional 125 | elif len(borders) == 2: 126 | borders = [borders[1], borders[0]] 127 | if is_int(borders[0]): 128 | # If cropping is integer (regular cropping) 129 | borders = (max(0, borders[0]), 130 | max(0, borders[1]), 131 | shape[1] + min(0, borders[0]), 132 | shape[0] + min(0, borders[1])) 133 | else: 134 | # If cropping is float (center cropping) 135 | center_w, half_w = borders[0] * shape[1], borders[1] / 2 136 | center_h, half_h = borders[0] * shape[0], borders[1] / 2 137 | borders = (int(center_w - half_w), int(center_h - half_h), 138 | int(center_w + half_w), int(center_h + half_h)) 139 | # Otherwise, invalid 140 | else: 141 | raise NotImplementedError('Crop tuple must have 2 or 4 values.') 142 | # Assert that borders are valid 143 | assert 0 <= borders[0] < borders[2] <= shape[1] and \ 144 | 0 <= borders[1] < borders[3] <= shape[0], 'Crop borders {} are invalid'.format(borders) 145 | # Return updated borders 146 | return borders -------------------------------------------------------------------------------- /packnet_sfm/utils/save.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import numpy as np 4 | import os 5 | 6 | from packnet_sfm.utils.image import write_image 7 | from packnet_sfm.utils.depth import write_depth, inv2depth, viz_inv_depth 8 | from packnet_sfm.utils.logging import prepare_dataset_prefix 9 | 10 | 11 | def save_depth(batch, output, args, dataset, save): 12 | """ 13 | Save depth predictions in various ways 14 | 15 | Parameters 16 | ---------- 17 | batch : dict 18 | Batch from dataloader 19 | output : dict 20 | Output from model 21 | args : tuple 22 | Step arguments 23 | dataset : CfgNode 24 | Dataset configuration 25 | save : CfgNode 26 | Save configuration 27 | """ 28 | # If there is no save folder, don't save 29 | if save.folder is '': 30 | return 31 | 32 | # If we want to save 33 | if save.depth.rgb or save.depth.viz or save.depth.npz or save.depth.png: 34 | # Retrieve useful tensors 35 | rgb = batch['rgb'] 36 | pred_inv_depth = output['inv_depth'] 37 | 38 | # Prepare path strings 39 | filename = batch['filename'] 40 | dataset_idx = 0 if len(args) == 1 else args[1] 41 | save_path = os.path.join(save.folder, 'depth', 42 | prepare_dataset_prefix(dataset, dataset_idx), 43 | os.path.basename(save.pretrained).split('.')[0]) 44 | # Create folder 45 | os.makedirs(save_path, exist_ok=True) 46 | 47 | # For each image in the batch 48 | length = rgb.shape[0] 49 | for i in range(length): 50 | # Save numpy depth maps 51 | if save.depth.npz: 52 | write_depth('{}/{}_depth.npz'.format(save_path, filename[i]), 53 | depth=inv2depth(pred_inv_depth[i]), 54 | intrinsics=batch['intrinsics'][i] if 'intrinsics' in batch else None) 55 | # Save png depth maps 56 | if save.depth.png: 57 | write_depth('{}/{}_depth.png'.format(save_path, filename[i]), 58 | depth=inv2depth(pred_inv_depth[i])) 59 | # Save rgb images 60 | if save.depth.rgb: 61 | rgb_i = rgb[i].permute(1, 2, 0).detach().cpu().numpy() * 255 62 | write_image('{}/{}_rgb.png'.format(save_path, filename[i]), rgb_i) 63 | # Save inverse depth visualizations 64 | if save.depth.viz: 65 | viz_i = viz_inv_depth(pred_inv_depth[i]) * 255 66 | write_image('{}/{}_viz.png'.format(save_path, filename[i]), viz_i) 67 | -------------------------------------------------------------------------------- /packnet_sfm/utils/types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import yacs 4 | import numpy as np 5 | import torch 6 | 7 | ######################################################################################################################## 8 | 9 | def is_numpy(data): 10 | """Checks if data is a numpy array.""" 11 | return isinstance(data, np.ndarray) 12 | 13 | def is_tensor(data): 14 | """Checks if data is a torch tensor.""" 15 | return type(data) == torch.Tensor 16 | 17 | def is_tuple(data): 18 | """Checks if data is a tuple.""" 19 | return isinstance(data, tuple) 20 | 21 | def is_list(data): 22 | """Checks if data is a list.""" 23 | return isinstance(data, list) 24 | 25 | def is_dict(data): 26 | """Checks if data is a dictionary.""" 27 | return isinstance(data, dict) 28 | 29 | def is_str(data): 30 | """Checks if data is a string.""" 31 | return isinstance(data, str) 32 | 33 | def is_int(data): 34 | """Checks if data is an integer.""" 35 | return isinstance(data, int) 36 | 37 | def is_seq(data): 38 | """Checks if data is a list or tuple.""" 39 | return is_tuple(data) or is_list(data) 40 | 41 | def is_cfg(data): 42 | """Checks if data is a configuration node""" 43 | return type(data) == yacs.config.CfgNode 44 | 45 | ######################################################################################################################## -------------------------------------------------------------------------------- /scripts/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import argparse 4 | import torch 5 | 6 | from packnet_sfm.models.model_wrapper import ModelWrapper 7 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer 8 | from packnet_sfm.utils.config import parse_test_file 9 | from packnet_sfm.utils.load import set_debug 10 | from packnet_sfm.utils.horovod import hvd_init 11 | 12 | 13 | def parse_args(): 14 | """Parse arguments for training script""" 15 | parser = argparse.ArgumentParser(description='PackNet-SfM evaluation script') 16 | parser.add_argument('--checkpoint', type=str, help='Checkpoint (.ckpt)') 17 | parser.add_argument('--config', type=str, default=None, help='Configuration (.yaml)') 18 | parser.add_argument('--half', action="store_true", help='Use half precision (fp16)') 19 | args = parser.parse_args() 20 | assert args.checkpoint.endswith('.ckpt'), \ 21 | 'You need to provide a .ckpt file as checkpoint' 22 | assert args.config is None or args.config.endswith('.yaml'), \ 23 | 'You need to provide a .yaml file as configuration' 24 | return args 25 | 26 | 27 | def test(ckpt_file, cfg_file, half): 28 | """ 29 | Monocular depth estimation test script. 30 | 31 | Parameters 32 | ---------- 33 | ckpt_file : str 34 | Checkpoint path for a pretrained model 35 | cfg_file : str 36 | Configuration file 37 | half: bool 38 | use half precision (fp16) 39 | """ 40 | # Initialize horovod 41 | hvd_init() 42 | 43 | # Parse arguments 44 | config, state_dict = parse_test_file(ckpt_file, cfg_file) 45 | 46 | # Set debug if requested 47 | set_debug(config.debug) 48 | 49 | # Initialize monodepth model from checkpoint arguments 50 | model_wrapper = ModelWrapper(config) 51 | # Restore model state 52 | model_wrapper.load_state_dict(state_dict) 53 | 54 | # change to half precision for evaluation if requested 55 | config.arch["dtype"] = torch.float16 if half else None 56 | 57 | # Create trainer with args.arch parameters 58 | trainer = HorovodTrainer(**config.arch) 59 | 60 | # Test model 61 | trainer.test(model_wrapper) 62 | 63 | 64 | if __name__ == '__main__': 65 | args = parse_args() 66 | test(args.checkpoint, args.config, args.half) 67 | -------------------------------------------------------------------------------- /scripts/evaluate_depth_maps.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import numpy as np 4 | import os 5 | import torch 6 | 7 | from glob import glob 8 | from argparse import Namespace 9 | from packnet_sfm.utils.depth import load_depth 10 | from tqdm import tqdm 11 | 12 | from packnet_sfm.utils.depth import load_depth, compute_depth_metrics 13 | 14 | 15 | def parse_args(): 16 | """Parse arguments for benchmark script""" 17 | parser = argparse.ArgumentParser(description='PackNet-SfM benchmark script') 18 | parser.add_argument('--pred_folder', type=str, 19 | help='Folder containing predicted depth maps (.npz with key "depth")') 20 | parser.add_argument('--gt_folder', type=str, 21 | help='Folder containing ground-truth depth maps (.npz with key "depth")') 22 | parser.add_argument('--use_gt_scale', action='store_true', 23 | help='Use ground-truth median scaling on predicted depth maps') 24 | parser.add_argument('--min_depth', type=float, default=0., 25 | help='Minimum distance to consider during evaluation') 26 | parser.add_argument('--max_depth', type=float, default=80., 27 | help='Maximum distance to consider during evaluation') 28 | parser.add_argument('--crop', type=str, default='', choices=['', 'garg'], 29 | help='Which crop to use during evaluation') 30 | args = parser.parse_args() 31 | return args 32 | 33 | 34 | def main(args): 35 | # Get and sort ground-truth and predicted files 36 | exts = ('npz', 'png') 37 | gt_files, pred_files = [], [] 38 | for ext in exts: 39 | gt_files.extend(glob(os.path.join(args.gt_folder, '*.{}'.format(ext)))) 40 | pred_files.extend(glob(os.path.join(args.pred_folder, '*.{}'.format(ext)))) 41 | # Sort ground-truth and prediction 42 | gt_files.sort() 43 | pred_files.sort() 44 | # Loop over all files 45 | metrics = [] 46 | progress_bar = tqdm(zip(gt_files, pred_files), total=len(gt_files)) 47 | for gt, pred in progress_bar: 48 | # Get and prepare ground-truth and predictions 49 | gt = torch.tensor(load_depth(gt)).unsqueeze(0).unsqueeze(0) 50 | pred = torch.tensor(load_depth(pred)).unsqueeze(0).unsqueeze(0) 51 | # Calculate metrics 52 | metrics.append(compute_depth_metrics( 53 | args, gt, pred, use_gt_scale=args.use_gt_scale)) 54 | # Get and print average value 55 | metrics = (sum(metrics) / len(metrics)).detach().cpu().numpy() 56 | names = ['abs_rel', 'sqr_rel', 'rmse', 'rmse_log', 'a1', 'a2', 'a3'] 57 | for name, metric in zip(names, metrics): 58 | print('{} = {}'.format(name, metric)) 59 | 60 | 61 | if __name__ == '__main__': 62 | args = parse_args() 63 | main(args) 64 | -------------------------------------------------------------------------------- /scripts/infer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import argparse 4 | import numpy as np 5 | import os 6 | import torch 7 | 8 | from glob import glob 9 | from cv2 import imwrite 10 | 11 | from packnet_sfm.models.model_wrapper import ModelWrapper 12 | from packnet_sfm.datasets.augmentations import resize_image, to_tensor 13 | from packnet_sfm.utils.horovod import hvd_init, rank, world_size, print0 14 | from packnet_sfm.utils.image import load_image 15 | from packnet_sfm.utils.config import parse_test_file 16 | from packnet_sfm.utils.load import set_debug 17 | from packnet_sfm.utils.depth import write_depth, inv2depth, viz_inv_depth 18 | from packnet_sfm.utils.logging import pcolor 19 | 20 | 21 | def is_image(file, ext=('.png', '.jpg',)): 22 | """Check if a file is an image with certain extensions""" 23 | return file.endswith(ext) 24 | 25 | 26 | def parse_args(): 27 | parser = argparse.ArgumentParser(description='PackNet-SfM inference of depth maps from images') 28 | parser.add_argument('--checkpoint', type=str, help='Checkpoint (.ckpt)') 29 | parser.add_argument('--input', type=str, help='Input file or folder') 30 | parser.add_argument('--output', type=str, help='Output file or folder') 31 | parser.add_argument('--image_shape', type=int, nargs='+', default=None, 32 | help='Input and output image shape ' 33 | '(default: checkpoint\'s config.datasets.augmentation.image_shape)') 34 | parser.add_argument('--half', action="store_true", help='Use half precision (fp16)') 35 | parser.add_argument('--save', type=str, choices=['npz', 'png'], default=None, 36 | help='Save format (npz or png). Default is None (no depth map is saved).') 37 | args = parser.parse_args() 38 | assert args.checkpoint.endswith('.ckpt'), \ 39 | 'You need to provide a .ckpt file as checkpoint' 40 | assert args.image_shape is None or len(args.image_shape) == 2, \ 41 | 'You need to provide a 2-dimensional tuple as shape (H,W)' 42 | assert (is_image(args.input) and is_image(args.output)) or \ 43 | (not is_image(args.input) and not is_image(args.input)), \ 44 | 'Input and output must both be images or folders' 45 | return args 46 | 47 | 48 | @torch.no_grad() 49 | def infer_and_save_depth(input_file, output_file, model_wrapper, image_shape, half, save): 50 | """ 51 | Process a single input file to produce and save visualization 52 | 53 | Parameters 54 | ---------- 55 | input_file : str 56 | Image file 57 | output_file : str 58 | Output file, or folder where the output will be saved 59 | model_wrapper : nn.Module 60 | Model wrapper used for inference 61 | image_shape : Image shape 62 | Input image shape 63 | half: bool 64 | use half precision (fp16) 65 | save: str 66 | Save format (npz or png) 67 | """ 68 | if not is_image(output_file): 69 | # If not an image, assume it's a folder and append the input name 70 | os.makedirs(output_file, exist_ok=True) 71 | output_file = os.path.join(output_file, os.path.basename(input_file)) 72 | 73 | # change to half precision for evaluation if requested 74 | dtype = torch.float16 if half else None 75 | 76 | # Load image 77 | image = load_image(input_file) 78 | # Resize and to tensor 79 | image = resize_image(image, image_shape) 80 | image = to_tensor(image).unsqueeze(0) 81 | 82 | # Send image to GPU if available 83 | if torch.cuda.is_available(): 84 | image = image.to('cuda:{}'.format(rank()), dtype=dtype) 85 | 86 | # Depth inference (returns predicted inverse depth) 87 | pred_inv_depth = model_wrapper.depth(image)['inv_depths'][0] 88 | 89 | if save == 'npz' or save == 'png': 90 | # Get depth from predicted depth map and save to different formats 91 | filename = '{}.{}'.format(os.path.splitext(output_file)[0], save) 92 | print('Saving {} to {}'.format( 93 | pcolor(input_file, 'cyan', attrs=['bold']), 94 | pcolor(filename, 'magenta', attrs=['bold']))) 95 | write_depth(filename, depth=inv2depth(pred_inv_depth)) 96 | else: 97 | # Prepare RGB image 98 | rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255 99 | # Prepare inverse depth 100 | viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255 101 | # Concatenate both vertically 102 | image = np.concatenate([rgb, viz_pred_inv_depth], 0) 103 | # Save visualization 104 | print('Saving {} to {}'.format( 105 | pcolor(input_file, 'cyan', attrs=['bold']), 106 | pcolor(output_file, 'magenta', attrs=['bold']))) 107 | imwrite(output_file, image[:, :, ::-1]) 108 | 109 | 110 | def main(args): 111 | 112 | # Initialize horovod 113 | hvd_init() 114 | 115 | # Parse arguments 116 | config, state_dict = parse_test_file(args.checkpoint) 117 | 118 | # If no image shape is provided, use the checkpoint one 119 | image_shape = args.image_shape 120 | if image_shape is None: 121 | image_shape = config.datasets.augmentation.image_shape 122 | 123 | # Set debug if requested 124 | set_debug(config.debug) 125 | 126 | # Initialize model wrapper from checkpoint arguments 127 | model_wrapper = ModelWrapper(config, load_datasets=False) 128 | # Restore monodepth_model state 129 | model_wrapper.load_state_dict(state_dict) 130 | 131 | # change to half precision for evaluation if requested 132 | dtype = torch.float16 if args.half else None 133 | 134 | # Send model to GPU if available 135 | if torch.cuda.is_available(): 136 | model_wrapper = model_wrapper.to('cuda:{}'.format(rank()), dtype=dtype) 137 | 138 | # Set to eval mode 139 | model_wrapper.eval() 140 | 141 | if os.path.isdir(args.input): 142 | # If input file is a folder, search for image files 143 | files = [] 144 | for ext in ['png', 'jpg']: 145 | files.extend(glob((os.path.join(args.input, '*.{}'.format(ext))))) 146 | files.sort() 147 | print0('Found {} files'.format(len(files))) 148 | else: 149 | # Otherwise, use it as is 150 | files = [args.input] 151 | 152 | # Process each file 153 | for fn in files[rank()::world_size()]: 154 | infer_and_save_depth( 155 | fn, args.output, model_wrapper, image_shape, args.half, args.save) 156 | 157 | 158 | if __name__ == '__main__': 159 | args = parse_args() 160 | main(args) 161 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import argparse 4 | 5 | from packnet_sfm.models.model_wrapper import ModelWrapper 6 | from packnet_sfm.models.model_checkpoint import ModelCheckpoint 7 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer 8 | from packnet_sfm.utils.config import parse_train_file 9 | from packnet_sfm.utils.load import set_debug, filter_args_create 10 | from packnet_sfm.utils.horovod import hvd_init, rank 11 | from packnet_sfm.loggers import WandbLogger 12 | 13 | 14 | def parse_args(): 15 | """Parse arguments for training script""" 16 | parser = argparse.ArgumentParser(description='PackNet-SfM training script') 17 | parser.add_argument('file', type=str, help='Input file (.ckpt or .yaml)') 18 | args = parser.parse_args() 19 | assert args.file.endswith(('.ckpt', '.yaml')), \ 20 | 'You need to provide a .ckpt of .yaml file' 21 | return args 22 | 23 | 24 | def train(file): 25 | """ 26 | Monocular depth estimation training script. 27 | 28 | Parameters 29 | ---------- 30 | file : str 31 | Filepath, can be either a 32 | **.yaml** for a yacs configuration file or a 33 | **.ckpt** for a pre-trained checkpoint file. 34 | """ 35 | # Initialize horovod 36 | hvd_init() 37 | 38 | # Produce configuration and checkpoint from filename 39 | config, ckpt = parse_train_file(file) 40 | 41 | # Set debug if requested 42 | set_debug(config.debug) 43 | 44 | # Wandb Logger 45 | logger = None if config.wandb.dry_run or rank() > 0 \ 46 | else filter_args_create(WandbLogger, config.wandb) 47 | 48 | # model checkpoint 49 | checkpoint = None if config.checkpoint.filepath is '' or rank() > 0 else \ 50 | filter_args_create(ModelCheckpoint, config.checkpoint) 51 | 52 | # Initialize model wrapper 53 | model_wrapper = ModelWrapper(config, resume=ckpt, logger=logger) 54 | 55 | # Create trainer with args.arch parameters 56 | trainer = HorovodTrainer(**config.arch, checkpoint=checkpoint) 57 | 58 | # Train model 59 | trainer.fit(model_wrapper) 60 | 61 | 62 | if __name__ == '__main__': 63 | args = parse_args() 64 | train(args.file) 65 | --------------------------------------------------------------------------------