├── .gitignore
├── LICENSE.md
├── Makefile
├── README.md
├── configs
    ├── default_config.py
    ├── eval_ddad.yaml
    ├── eval_image.yaml
    ├── eval_kitti.yaml
    ├── overfit_ddad.yaml
    ├── overfit_kitti.yaml
    ├── train_ddad.yaml
    ├── train_kitti.yaml
    ├── train_omnicam.yaml
    ├── train_packnet_san_ddad.yaml
    └── train_packnet_san_kitti.yaml
├── docker
    └── Dockerfile
├── docs
    ├── .nojekyll
    ├── README.html
    ├── _modules
    │   ├── index.html
    │   ├── packnet_sfm
    │   │   ├── datasets
    │   │   │   ├── augmentations.html
    │   │   │   ├── dgp_dataset.html
    │   │   │   ├── image_dataset.html
    │   │   │   ├── kitti_dataset.html
    │   │   │   ├── kitti_dataset_utils.html
    │   │   │   └── transforms.html
    │   │   ├── geometry
    │   │   │   ├── camera.html
    │   │   │   ├── camera_utils.html
    │   │   │   ├── pose.html
    │   │   │   └── pose_utils.html
    │   │   ├── losses
    │   │   │   ├── loss_base.html
    │   │   │   ├── multiview_photometric_loss.html
    │   │   │   └── supervised_loss.html
    │   │   ├── models
    │   │   │   ├── SelfSupModel.html
    │   │   │   ├── SemiSupModel.html
    │   │   │   ├── SfmModel.html
    │   │   │   ├── model_checkpoint.html
    │   │   │   ├── model_utils.html
    │   │   │   └── model_wrapper.html
    │   │   ├── networks
    │   │   │   ├── depth
    │   │   │   │   ├── DepthResNet.html
    │   │   │   │   └── PackNet01.html
    │   │   │   ├── layers
    │   │   │   │   ├── packnet
    │   │   │   │   │   └── layers01.html
    │   │   │   │   └── resnet
    │   │   │   │   │   ├── depth_decoder.html
    │   │   │   │   │   ├── layers.html
    │   │   │   │   │   ├── pose_decoder.html
    │   │   │   │   │   └── resnet_encoder.html
    │   │   │   └── pose
    │   │   │   │   ├── PoseNet.html
    │   │   │   │   └── PoseResNet.html
    │   │   ├── trainers
    │   │   │   └── base_trainer.html
    │   │   └── utils
    │   │   │   ├── config.html
    │   │   │   ├── depth.html
    │   │   │   ├── horovod.html
    │   │   │   ├── image.html
    │   │   │   ├── load.html
    │   │   │   ├── logging.html
    │   │   │   ├── misc.html
    │   │   │   ├── reduce.html
    │   │   │   ├── save.html
    │   │   │   └── types.html
    │   └── scripts
    │   │   ├── eval.html
    │   │   ├── infer.html
    │   │   └── train.html
    ├── _sources
    │   ├── README.rst.txt
    │   ├── configs
    │   │   ├── configs.default_config.rst.txt
    │   │   ├── configs.eval_ddad.rst.txt
    │   │   ├── configs.eval_kitti.rst.txt
    │   │   ├── configs.overfit_ddad.rst.txt
    │   │   ├── configs.overfit_kitti.rst.txt
    │   │   ├── configs.rst.txt
    │   │   ├── configs.train_ddad.rst.txt
    │   │   └── configs.train_kitti.rst.txt
    │   ├── datasets
    │   │   ├── KITTIDataset.rst.txt
    │   │   ├── datasets.DGPDataset.rst.txt
    │   │   ├── datasets.ImageDataset.rst.txt
    │   │   ├── datasets.KITTIDataset.rst.txt
    │   │   ├── datasets.KITTIDataset_utils.rst.txt
    │   │   ├── datasets.augmentations.rst.txt
    │   │   ├── datasets.rst.txt
    │   │   └── datasets.transforms.rst.txt
    │   ├── geometry
    │   │   ├── camera.rst.txt
    │   │   ├── geometry.camera.camera.rst.txt
    │   │   ├── geometry.camera.camera_utils.rst.txt
    │   │   ├── geometry.pose.pose.rst.txt
    │   │   ├── geometry.pose.pose_utils.rst.txt
    │   │   ├── geometry.rst.txt
    │   │   └── pose.rst.txt
    │   ├── index.rst.txt
    │   ├── loggers
    │   │   ├── loggers.WandbLogger.rst.txt
    │   │   └── loggers.rst.txt
    │   ├── losses
    │   │   ├── losses.loss_base.rst.txt
    │   │   ├── losses.multiview_photometric_loss.rst.txt
    │   │   ├── losses.rst.txt
    │   │   └── losses.supervised_loss.rst.txt
    │   ├── models
    │   │   ├── models.Checkpoint.rst.txt
    │   │   ├── models.SelfSupModel.rst.txt
    │   │   ├── models.SemiSupModel.rst.txt
    │   │   ├── models.SfmModel.rst.txt
    │   │   ├── models.Utilities.rst.txt
    │   │   ├── models.Wrapper.rst.txt
    │   │   └── models.rst.txt
    │   ├── networks
    │   │   ├── depth
    │   │   │   ├── depth.rst.txt
    │   │   │   ├── networks.depth.DepthResNet.rst.txt
    │   │   │   └── networks.depth.PackNet01.rst.txt
    │   │   ├── layers
    │   │   │   ├── layers.rst.txt
    │   │   │   ├── packnet
    │   │   │   │   ├── layers01.rst.txt
    │   │   │   │   └── packnet.rst.txt
    │   │   │   └── resnet
    │   │   │   │   ├── depth_decoder.rst.txt
    │   │   │   │   ├── layers.rst.txt
    │   │   │   │   ├── pose_decoder.rst.txt
    │   │   │   │   ├── resnet.rst.txt
    │   │   │   │   └── resnet_encoder.rst.txt
    │   │   ├── networks.rst.txt
    │   │   └── pose
    │   │   │   ├── networks.pose.PoseNet.rst.txt
    │   │   │   ├── networks.pose.PoseResNet.rst.txt
    │   │   │   └── pose.rst.txt
    │   ├── scripts
    │   │   ├── scripts.eval.rst.txt
    │   │   ├── scripts.infer.rst.txt
    │   │   ├── scripts.rst.txt
    │   │   └── scripts.train.rst.txt
    │   ├── trainers
    │   │   ├── trainers.BaseTrainer.rst.txt
    │   │   ├── trainers.HorovodTrainer.rst.txt
    │   │   └── trainers.rst.txt
    │   └── utils
    │   │   ├── utils.config.rst.txt
    │   │   ├── utils.depth.rst.txt
    │   │   ├── utils.horovod.rst.txt
    │   │   ├── utils.image.rst.txt
    │   │   ├── utils.load.rst.txt
    │   │   ├── utils.logging.rst.txt
    │   │   ├── utils.misc.rst.txt
    │   │   ├── utils.reduce.rst.txt
    │   │   ├── utils.rst.txt
    │   │   ├── utils.save.rst.txt
    │   │   └── utils.types.rst.txt
    ├── _static
    │   ├── basic.css
    │   ├── css
    │   │   ├── badge_only.css
    │   │   └── theme.css
    │   ├── custom.css
    │   ├── doctools.js
    │   ├── documentation_options.js
    │   ├── file.png
    │   ├── fonts
    │   │   ├── Inconsolata-Bold.ttf
    │   │   ├── Inconsolata-Regular.ttf
    │   │   ├── Inconsolata.ttf
    │   │   ├── Lato-Bold.ttf
    │   │   ├── Lato-Regular.ttf
    │   │   ├── Lato
    │   │   │   ├── lato-bold.eot
    │   │   │   ├── lato-bold.ttf
    │   │   │   ├── lato-bold.woff
    │   │   │   ├── lato-bold.woff2
    │   │   │   ├── lato-bolditalic.eot
    │   │   │   ├── lato-bolditalic.ttf
    │   │   │   ├── lato-bolditalic.woff
    │   │   │   ├── lato-bolditalic.woff2
    │   │   │   ├── lato-italic.eot
    │   │   │   ├── lato-italic.ttf
    │   │   │   ├── lato-italic.woff
    │   │   │   ├── lato-italic.woff2
    │   │   │   ├── lato-regular.eot
    │   │   │   ├── lato-regular.ttf
    │   │   │   ├── lato-regular.woff
    │   │   │   └── lato-regular.woff2
    │   │   ├── RobotoSlab-Bold.ttf
    │   │   ├── RobotoSlab-Regular.ttf
    │   │   ├── RobotoSlab
    │   │   │   ├── roboto-slab-v7-bold.eot
    │   │   │   ├── roboto-slab-v7-bold.ttf
    │   │   │   ├── roboto-slab-v7-bold.woff
    │   │   │   ├── roboto-slab-v7-bold.woff2
    │   │   │   ├── roboto-slab-v7-regular.eot
    │   │   │   ├── roboto-slab-v7-regular.ttf
    │   │   │   ├── roboto-slab-v7-regular.woff
    │   │   │   └── roboto-slab-v7-regular.woff2
    │   │   ├── fontawesome-webfont.eot
    │   │   ├── fontawesome-webfont.svg
    │   │   ├── fontawesome-webfont.ttf
    │   │   ├── fontawesome-webfont.woff
    │   │   └── fontawesome-webfont.woff2
    │   ├── jquery-3.5.1.js
    │   ├── jquery.js
    │   ├── js
    │   │   ├── modernizr.min.js
    │   │   └── theme.js
    │   ├── language_data.js
    │   ├── logo.png
    │   ├── minus.png
    │   ├── plus.png
    │   ├── pygments.css
    │   ├── searchtools.js
    │   ├── underscore-1.3.1.js
    │   └── underscore.js
    ├── configs
    │   ├── configs.default_config.html
    │   ├── configs.eval_ddad.html
    │   ├── configs.eval_kitti.html
    │   ├── configs.html
    │   ├── configs.overfit_ddad.html
    │   ├── configs.overfit_kitti.html
    │   ├── configs.train_ddad.html
    │   └── configs.train_kitti.html
    ├── datasets
    │   ├── KITTIDataset.html
    │   ├── datasets.DGPDataset.html
    │   ├── datasets.ImageDataset.html
    │   ├── datasets.KITTIDataset.html
    │   ├── datasets.KITTIDataset_utils.html
    │   ├── datasets.augmentations.html
    │   ├── datasets.html
    │   └── datasets.transforms.html
    ├── genindex.html
    ├── geometry
    │   ├── camera.html
    │   ├── geometry.camera.camera.html
    │   ├── geometry.camera.camera_utils.html
    │   ├── geometry.html
    │   ├── geometry.pose.pose.html
    │   ├── geometry.pose.pose_utils.html
    │   └── pose.html
    ├── index.html
    ├── loggers
    │   ├── loggers.WandbLogger.html
    │   └── loggers.html
    ├── losses
    │   ├── losses.html
    │   ├── losses.loss_base.html
    │   ├── losses.multiview_photometric_loss.html
    │   └── losses.supervised_loss.html
    ├── models
    │   ├── models.Checkpoint.html
    │   ├── models.SelfSupModel.html
    │   ├── models.SemiSupModel.html
    │   ├── models.SfmModel.html
    │   ├── models.Utilities.html
    │   ├── models.Wrapper.html
    │   └── models.html
    ├── networks
    │   ├── depth
    │   │   ├── depth.html
    │   │   ├── networks.depth.DepthResNet.html
    │   │   └── networks.depth.PackNet01.html
    │   ├── layers
    │   │   ├── layers.html
    │   │   ├── packnet
    │   │   │   ├── layers01.html
    │   │   │   └── packnet.html
    │   │   └── resnet
    │   │   │   ├── depth_decoder.html
    │   │   │   ├── layers.html
    │   │   │   ├── pose_decoder.html
    │   │   │   ├── resnet.html
    │   │   │   └── resnet_encoder.html
    │   ├── networks.html
    │   └── pose
    │   │   ├── networks.pose.PoseNet.html
    │   │   ├── networks.pose.PoseResNet.html
    │   │   └── pose.html
    ├── objects.inv
    ├── py-modindex.html
    ├── scripts
    │   ├── scripts.eval.html
    │   ├── scripts.html
    │   ├── scripts.infer.html
    │   └── scripts.train.html
    ├── search.html
    ├── searchindex.js
    ├── trainers
    │   ├── trainers.BaseTrainer.html
    │   ├── trainers.HorovodTrainer.html
    │   └── trainers.html
    └── utils
    │   ├── utils.config.html
    │   ├── utils.depth.html
    │   ├── utils.horovod.html
    │   ├── utils.html
    │   ├── utils.image.html
    │   ├── utils.load.html
    │   ├── utils.logging.html
    │   ├── utils.misc.html
    │   ├── utils.reduce.html
    │   ├── utils.save.html
    │   └── utils.types.html
├── media
    ├── figs
    │   ├── packnet-ddad.gif
    │   └── tri-logo.png
    └── tests
    │   ├── ddad.png
    │   └── kitti.png
├── packnet_sfm
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── augmentations.py
    │   ├── dgp_dataset.py
    │   ├── image_dataset.py
    │   ├── kitti_dataset.py
    │   ├── kitti_dataset_utils.py
    │   └── transforms.py
    ├── geometry
    │   ├── __init__.py
    │   ├── camera.py
    │   ├── camera_generic.py
    │   ├── camera_utils.py
    │   ├── pose.py
    │   └── pose_utils.py
    ├── loggers
    │   ├── __init__.py
    │   └── wandb_logger.py
    ├── losses
    │   ├── __init__.py
    │   ├── generic_multiview_photometric_loss.py
    │   ├── loss_base.py
    │   ├── multiview_photometric_loss.py
    │   ├── supervised_loss.py
    │   └── velocity_loss.py
    ├── models
    │   ├── GenericSelfSupModel.py
    │   ├── GenericSfmModel.py
    │   ├── SelfSupModel.py
    │   ├── SemiSupCompletionModel.py
    │   ├── SemiSupModel.py
    │   ├── SfmModel.py
    │   ├── VelSupModel.py
    │   ├── __init__.py
    │   ├── base_model.py
    │   ├── model_checkpoint.py
    │   ├── model_utils.py
    │   └── model_wrapper.py
    ├── networks
    │   ├── __init__.py
    │   ├── depth
    │   │   ├── DepthResNet.py
    │   │   ├── PackNet01.py
    │   │   ├── PackNetSAN01.py
    │   │   ├── PackNetSlim01.py
    │   │   └── RaySurfaceResNet.py
    │   ├── layers
    │   │   ├── minkowski.py
    │   │   ├── minkowski_encoder.py
    │   │   ├── packnet
    │   │   │   └── layers01.py
    │   │   └── resnet
    │   │   │   ├── depth_decoder.py
    │   │   │   ├── layers.py
    │   │   │   ├── pose_decoder.py
    │   │   │   ├── raysurface_decoder.py
    │   │   │   └── resnet_encoder.py
    │   └── pose
    │   │   ├── PoseNet.py
    │   │   └── PoseResNet.py
    ├── trainers
    │   ├── __init__.py
    │   ├── base_trainer.py
    │   └── horovod_trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── depth.py
    │   ├── horovod.py
    │   ├── image.py
    │   ├── load.py
    │   ├── logging.py
    │   ├── misc.py
    │   ├── reduce.py
    │   ├── save.py
    │   └── types.py
└── scripts
    ├── eval.py
    ├── evaluate_depth_maps.py
    ├── infer.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Toyota Research Institute (TRI)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Handy commands:
 2 | # - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`)
 3 | PROJECT ?= packnet-sfm
 4 | WORKSPACE ?= /workspace/$(PROJECT)
 5 | DOCKER_IMAGE ?= ${PROJECT}:latest
 6 | 
 7 | SHMSIZE ?= 444G
 8 | WANDB_MODE ?= run
 9 | DOCKER_OPTS := \
10 | 			--name ${PROJECT} \
11 | 			--rm -it \
12 | 			--shm-size=${SHMSIZE} \
13 | 			-e AWS_DEFAULT_REGION \
14 | 			-e AWS_ACCESS_KEY_ID \
15 | 			-e AWS_SECRET_ACCESS_KEY \
16 | 			-e WANDB_API_KEY \
17 | 			-e WANDB_ENTITY \
18 | 			-e WANDB_MODE \
19 | 			-e HOST_HOSTNAME= \
20 | 			-e OMP_NUM_THREADS=1 -e KMP_AFFINITY="granularity=fine,compact,1,0" \
21 | 			-e OMPI_ALLOW_RUN_AS_ROOT=1 \
22 | 			-e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
23 | 			-e NCCL_DEBUG=VERSION \
24 |             -e DISPLAY=${DISPLAY} \
25 |             -e XAUTHORITY \
26 |             -e NVIDIA_DRIVER_CAPABILITIES=all \
27 | 			-v ~/.aws:/root/.aws \
28 | 			-v /root/.ssh:/root/.ssh \
29 | 			-v ~/.cache:/root/.cache \
30 | 			-v /data:/data \
31 | 			-v /mnt/fsx/:/mnt/fsx \
32 | 			-v /dev/null:/dev/raw1394 \
33 | 			-v /tmp:/tmp \
34 | 			-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
35 | 			-v /var/run/docker.sock:/var/run/docker.sock \
36 | 			-v ${PWD}:${WORKSPACE} \
37 | 			-w ${WORKSPACE} \
38 | 			--privileged \
39 | 			--ipc=host \
40 | 			--network=host
41 | 
42 | NGPUS=$(shell nvidia-smi -L | wc -l)
43 | MPI_CMD=mpirun \
44 | 		-allow-run-as-root \
45 | 		-np ${NGPUS} \
46 | 		-H localhost:${NGPUS} \
47 | 		-x MASTER_ADDR=127.0.0.1 \
48 | 		-x MASTER_PORT=23457 \
49 | 		-x HOROVOD_TIMELINE \
50 | 		-x OMP_NUM_THREADS=1 \
51 | 		-x KMP_AFFINITY='granularity=fine,compact,1,0' \
52 | 		-bind-to none -map-by slot -x NCCL_DEBUG=INFO -x NCCL_MIN_NRINGS=4 \
53 | 		--report-bindings
54 | 
55 | 
56 | .PHONY: all clean docker-build docker-overfit-pose
57 | 
58 | all: clean
59 | 
60 | clean:
61 | 	find . -name "*.pyc" | xargs rm -f && \
62 | 	find . -name "__pycache__" | xargs rm -rf
63 | 
64 | docker-build:
65 | 	docker build \
66 | 		-f docker/Dockerfile \
67 | 		-t ${DOCKER_IMAGE} .
68 | 
69 | docker-start-interactive: docker-build
70 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash
71 | 
72 | docker-start-jupyter: docker-build
73 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
74 | 		bash -c "jupyter notebook --port=8888 -ip=0.0.0.0 --allow-root --no-browser"
75 | 
76 | docker-run: docker-build
77 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
78 | 		bash -c "${COMMAND}"
79 | 
80 | docker-run-mpi: docker-build
81 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
82 | 		bash -c "${MPI_CMD} ${COMMAND}"


--------------------------------------------------------------------------------
/configs/eval_ddad.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     name: 'SelfSupModel'
 3 |     depth_net:
 4 |         name: 'PackNet01'
 5 |         version: '1A'
 6 |     pose_net:
 7 |         name: 'PoseNet'
 8 |         version: ''
 9 |     params:
10 |         crop: ''
11 |         min_depth: 0.0
12 |         max_depth: 200.0
13 | datasets:
14 |     augmentation:
15 |         image_shape: (384, 640)
16 |     test:
17 |         dataset: ['DGP']
18 |         path: ['/data/datasets/DDAD/ddad.json']
19 |         split: ['val']
20 |         depth_type: ['lidar']
21 |         cameras: [['camera_01']]
22 | save:
23 |     folder: '/data/save'
24 |     depth:
25 |         rgb: True
26 |         viz: True
27 |         npz: True
28 |         png: True
29 | 


--------------------------------------------------------------------------------
/configs/eval_image.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     name: 'SelfSupModel'
 3 |     depth_net:
 4 |         name: 'PackNet01'
 5 |         version: '1A'
 6 |     pose_net:
 7 |         name: 'PoseNet'
 8 |         version: ''
 9 | datasets:
10 |     augmentation:
11 |         image_shape: (384, 640)
12 |     test:
13 |         dataset: ['Image']
14 |         path: ['images']
15 |         split: ['{:010d}']
16 | save:
17 |     folder: '/data/save'
18 |     depth:
19 |         rgb: True
20 |         viz: True
21 |         npz: True
22 |         png: True
23 | 


--------------------------------------------------------------------------------
/configs/eval_kitti.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     name: 'SelfSupModel'
 3 |     depth_net:
 4 |         name: 'PackNet01'
 5 |         version: '1A'
 6 |     pose_net:
 7 |         name: 'PoseNet'
 8 |         version: ''
 9 |     params:
10 |         crop: 'garg'
11 |         min_depth: 0.0
12 |         max_depth: 80.0
13 | datasets:
14 |     augmentation:
15 |         image_shape: (192, 640)
16 |     test:
17 |         dataset: ['KITTI']
18 |         path: ['/data/datasets/KITTI_raw']
19 |         split: ['data_splits/eigen_test_files.txt']
20 |         depth_type: ['velodyne']
21 | save:
22 |     folder: '/data/save'
23 |     depth:
24 |         rgb: True
25 |         viz: True
26 |         npz: True
27 |         png: True
28 | 


--------------------------------------------------------------------------------
/configs/overfit_ddad.yaml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     max_epochs: 1
 3 | model:
 4 |     name: 'SelfSupModel'
 5 |     optimizer:
 6 |         name: 'Adam'
 7 |         depth:
 8 |             lr: 0.0001
 9 |         pose:
10 |             lr: 0.0001
11 |     scheduler:
12 |         name: 'StepLR'
13 |         step_size: 30
14 |         gamma: 0.5
15 |     depth_net:
16 |         name: 'DepthResNet'
17 |         version: '18pt'
18 |     pose_net:
19 |         name: 'PoseResNet'
20 |         version: '18pt'
21 |     params:
22 |         crop: ''
23 |         min_depth: 0.0
24 |         max_depth: 200.0
25 | datasets:
26 |     augmentation:
27 |         image_shape: (384, 640)
28 |     train:
29 |         batch_size: 4
30 |         dataset: ['DGP']
31 |         path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
32 |         split: ['train']
33 |         depth_type: ['lidar']
34 |         cameras: [['camera_01']]
35 |         repeat: [500]
36 |     validation:
37 |         dataset: ['DGP']
38 |         path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
39 |         split: ['train']
40 |         depth_type: ['lidar']
41 |         cameras: [['camera_01']]
42 |     test:
43 |         dataset: ['DGP']
44 |         path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
45 |         split: ['train']
46 |         depth_type: ['lidar']
47 |         cameras: [['camera_01']]
48 | 


--------------------------------------------------------------------------------
/configs/overfit_kitti.yaml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     max_epochs: 1
 3 | model:
 4 |     name: 'SelfSupModel'
 5 |     optimizer:
 6 |         name: 'Adam'
 7 |         depth:
 8 |             lr: 0.0002
 9 |         pose:
10 |             lr: 0.0002
11 |     scheduler:
12 |         name: 'StepLR'
13 |         step_size: 30
14 |         gamma: 0.5
15 |     depth_net:
16 |         name: 'DepthResNet'
17 |         version: '18pt'
18 |     pose_net:
19 |         name: 'PoseResNet'
20 |         version: '18pt'
21 |     params:
22 |         crop: 'garg'
23 |         min_depth: 0.0
24 |         max_depth: 80.0
25 | datasets:
26 |     augmentation:
27 |         image_shape: (192, 640)
28 |     train:
29 |         batch_size: 6
30 |         dataset: ['KITTI']
31 |         path: ['/data/datasets/KITTI_tiny']
32 |         split: ['kitti_tiny.txt']
33 |         depth_type: ['velodyne']
34 |         repeat: [1000]
35 |     validation:
36 |         dataset: ['KITTI']
37 |         path: ['/data/datasets/KITTI_tiny']
38 |         split: ['kitti_tiny.txt']
39 |         depth_type: ['velodyne']
40 |     test:
41 |         dataset: ['KITTI']
42 |         path: ['/data/datasets/KITTI_tiny']
43 |         split: ['kitti_tiny.txt']
44 |         depth_type: ['velodyne']
45 | 


--------------------------------------------------------------------------------
/configs/train_ddad.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     name: 'SelfSupModel'
 3 |     optimizer:
 4 |         name: 'Adam'
 5 |         depth:
 6 |             lr: 0.00009
 7 |         pose:
 8 |             lr: 0.00009
 9 |     scheduler:
10 |         name: 'StepLR'
11 |         step_size: 30
12 |         gamma: 0.5
13 |     depth_net:
14 |         name: 'PackNet01'
15 |         version: '1A'
16 |     pose_net:
17 |         name: 'PoseNet'
18 |         version: ''
19 |     params:
20 |         crop: ''
21 |         min_depth: 0.0
22 |         max_depth: 200.0
23 | datasets:
24 |     augmentation:
25 |         image_shape: (384, 640)
26 |     train:
27 |         batch_size: 2
28 |         num_workers: 8
29 |         dataset: ['DGP']
30 |         path: ['/data/datasets/DDAD/ddad.json']
31 |         split: ['train']
32 |         depth_type: ['lidar']
33 |         cameras: [['camera_01']]
34 |         repeat: [5]
35 |     validation:
36 |         num_workers: 8
37 |         dataset: ['DGP']
38 |         path: ['/data/datasets/DDAD/ddad.json']
39 |         split: ['val']
40 |         depth_type: ['lidar']
41 |         cameras: [['camera_01']]
42 |     test:
43 |         num_workers: 8
44 |         dataset: ['DGP']
45 |         path: ['/data/datasets/DDAD/ddad.json']
46 |         split: ['val']
47 |         depth_type: ['lidar']
48 |         cameras: [['camera_01']]
49 | 


--------------------------------------------------------------------------------
/configs/train_kitti.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     name: 'SelfSupModel'
 3 |     optimizer:
 4 |         name: 'Adam'
 5 |         depth:
 6 |             lr: 0.0002
 7 |         pose:
 8 |             lr: 0.0002
 9 |     scheduler:
10 |         name: 'StepLR'
11 |         step_size: 30
12 |         gamma: 0.5
13 |     depth_net:
14 |         name: 'PackNet01'
15 |         version: '1A'
16 |     pose_net:
17 |         name: 'PoseNet'
18 |         version: ''
19 |     params:
20 |         crop: 'garg'
21 |         min_depth: 0.0
22 |         max_depth: 80.0
23 | datasets:
24 |     augmentation:
25 |         image_shape: (192, 640)
26 |     train:
27 |         batch_size: 4
28 |         dataset: ['KITTI']
29 |         path: ['/data/datasets/KITTI_raw']
30 |         split: ['data_splits/eigen_zhou_files.txt']
31 |         depth_type: ['velodyne']
32 |         repeat: [2]
33 |     validation:
34 |         dataset: ['KITTI']
35 |         path: ['/data/datasets/KITTI_raw']
36 |         split: ['data_splits/eigen_val_files.txt',
37 |                 'data_splits/eigen_test_files.txt']
38 |         depth_type: ['velodyne']
39 |     test:
40 |         dataset: ['KITTI']
41 |         path: ['/data/datasets/KITTI_raw']
42 |         split: ['data_splits/eigen_test_files.txt']
43 |         depth_type: ['velodyne']
44 | 


--------------------------------------------------------------------------------
/configs/train_omnicam.yaml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     max_epochs: 50
 3 | model:
 4 |     name: 'GenericSelfSupModel'
 5 |     optimizer:
 6 |         name: 'Adam'
 7 |         depth:
 8 |             lr: 0.0002
 9 |         pose:
10 |             lr: 0.0002
11 |     scheduler:
12 |         name: 'StepLR'
13 |         step_size: 30
14 |         gamma: 0.5
15 |     depth_net:
16 |         name: 'RaySurfaceResNet'
17 |         version: '18pt'
18 |     pose_net:
19 |         name: 'PoseResNet'
20 |         version: '18pt'
21 |     params:
22 |         crop: 'garg'
23 |         min_depth: 0.0
24 |         max_depth: 80.0
25 | datasets:
26 |     augmentation:
27 |         image_shape: (384, 384)
28 |     train:
29 |         batch_size: 1
30 |         dataset: ['Image']
31 |         path: ['/data/datasets/omnicam']
32 |         split: ['{:09}']
33 |         depth_type: ['']
34 |         repeat: [1]
35 |     validation:
36 |         dataset: ['KITTI']
37 |         path: ['/data/datasets/KITTI_tiny']
38 |         split: ['kitti_tiny.txt']
39 |         depth_type: ['velodyne']
40 |     test:
41 |         dataset: ['KITTI']
42 |         path: ['/data/datasets/KITTI_tiny']
43 |         split: ['kitti_tiny.txt']
44 |         depth_type: ['velodyne']
45 | 


--------------------------------------------------------------------------------
/configs/train_packnet_san_ddad.yaml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     validate_first: True
 3 | model:
 4 |     name: 'SemiSupCompletionModel'
 5 |     loss:
 6 |         supervised_method: 'sparse-silog'
 7 |         supervised_num_scales: 1
 8 |         supervised_loss_weight: 1.0
 9 |     optimizer:
10 |         name: 'Adam'
11 |         depth:
12 |             lr: 0.0001
13 |         pose:
14 |             lr: 0.0001
15 |     scheduler:
16 |         name: 'StepLR'
17 |         step_size: 30
18 |         gamma: 0.5
19 |     depth_net:
20 |         name: 'PackNetSAN01'
21 |         version: '1A'
22 |         dropout: 0.5
23 |     params:
24 |         crop: ''
25 |         min_depth: 0.0
26 |         max_depth: 200.0
27 |         scale_output: 'resize'
28 | datasets:
29 |     augmentation:
30 |         image_shape: (384, 640)
31 |     train:
32 |         batch_size: 1
33 |         num_workers: 8
34 |         dataset: ['DGP']
35 |         path: ['/mnt/fsx/ddad_train_val/ddad.json']
36 |         split: ['train']
37 |         depth_type: ['lidar']
38 |         cameras: [['camera_01'],['camera_05'],['camera_06'],['camera_09']]
39 |         repeat: [5]
40 |     validation:
41 |         num_workers: 8
42 |         dataset: ['DGP']
43 |         path: ['/mnt/fsx/ddad_train_val/ddad.json']
44 |         split: ['val']
45 |         depth_type: ['lidar']
46 |         input_depth_type: ['', '', '', '', 'lidar', 'lidar', 'lidar', 'lidar']
47 |         cameras: [['camera_01'],['camera_05'],['camera_06'],['camera_09'],
48 |                   ['camera_01'],['camera_05'],['camera_06'],['camera_09']]
49 |     test:
50 |         num_workers: 8
51 |         dataset: ['DGP']
52 |         path: ['/mnt/fsx/ddad_train_val/ddad.json']
53 |         split: ['val']
54 |         depth_type: ['lidar']
55 |         cameras: [['camera_01']]
56 | 


--------------------------------------------------------------------------------
/configs/train_packnet_san_kitti.yaml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     validate_first: True
 3 | model:
 4 |     name: 'SemiSupCompletionModel'
 5 |     loss:
 6 |         supervised_method: 'sparse-silog'
 7 |         supervised_num_scales: 1
 8 |         supervised_loss_weight: 1.0
 9 |     optimizer:
10 |         name: 'Adam'
11 |         depth:
12 |             lr: 0.0001
13 |         pose:
14 |             lr: 0.0001
15 |     scheduler:
16 |         name: 'StepLR'
17 |         step_size: 30
18 |         gamma: 0.5
19 |     depth_net:
20 |         name: 'PackNetSAN01'
21 |         version: '1A'
22 |         dropout: 0.5
23 |     params:
24 |         crop: 'garg'
25 |         min_depth: 0.0
26 |         max_depth: 80.0
27 |         scale_output: 'top-center'
28 | datasets:
29 |     augmentation:
30 |         crop_train_borders: (-352, 0, 0.5, 1216)
31 |         crop_eval_borders: (-352, 0, 0.5, 1216)
32 |     train:
33 |         batch_size: 1
34 |         dataset: ['KITTI']
35 |         path: ['/mnt/fsx/KITTI_raw']
36 |         split: ['data_splits/eigen_zhou_files.txt']
37 |         input_depth_type: ['velodyne']
38 |         depth_type: ['groundtruth']
39 |         repeat: [2]
40 |     validation:
41 |         dataset: ['KITTI']
42 |         path: ['/mnt/fsx/KITTI_raw']
43 |         split: ['data_splits/eigen_test_files.txt',
44 |                 'data_splits/eigen_test_files.txt']
45 |         input_depth_type: ['velodyne','']
46 |         depth_type: ['groundtruth','groundtruth']
47 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | FROM nvidia/cuda:10.2-devel-ubuntu18.04
  4 | 
  5 | ENV PROJECT=packnet-sfm
  6 | ENV PYTORCH_VERSION=1.8.1
  7 | ENV TORCHVISION_VERSION=0.9.1
  8 | ENV CUDNN_VERSION=7.6.5.32-1+cuda10.2
  9 | ENV NCCL_VERSION=2.7.8-1+cuda10.2
 10 | ENV HOROVOD_VERSION=65de4c961d1e5ad2828f2f6c4329072834f27661
 11 | ENV TRT_VERSION=6.0.1.5
 12 | ENV LC_ALL=C.UTF-8
 13 | ENV LANG=C.UTF-8
 14 | 
 15 | ARG python=3.6
 16 | ENV PYTHON_VERSION=${python}
 17 | ENV DEBIAN_FRONTEND=noninteractive
 18 | 
 19 | # Set default shell to /bin/bash
 20 | SHELL ["/bin/bash", "-cu"]
 21 | 
 22 | RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
 23 |     build-essential \
 24 |     cmake \
 25 |     g++-4.8 \
 26 |     git \
 27 |     curl \
 28 |     docker.io \
 29 |     vim \
 30 |     wget \
 31 |     ca-certificates \
 32 |     libcudnn7=${CUDNN_VERSION} \
 33 |     libnccl2=${NCCL_VERSION} \
 34 |     libnccl-dev=${NCCL_VERSION} \
 35 |     libjpeg-dev \
 36 |     libpng-dev \
 37 |     python${PYTHON_VERSION} \
 38 |     python${PYTHON_VERSION}-dev \
 39 |     python3-tk \
 40 |     librdmacm1 \
 41 |     libibverbs1 \
 42 |     ibverbs-providers \
 43 |     libgtk2.0-dev \
 44 |     unzip \
 45 |     bzip2 \
 46 |     htop \
 47 |     gnuplot \
 48 |     ffmpeg
 49 | 
 50 | # Install Open MPI
 51 | RUN mkdir /tmp/openmpi && \
 52 |     cd /tmp/openmpi && \
 53 |     wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz && \
 54 |     tar zxf openmpi-4.0.0.tar.gz && \
 55 |     cd openmpi-4.0.0 && \
 56 |     ./configure --enable-orterun-prefix-by-default && \
 57 |     make -j $(nproc) all && \
 58 |     make install && \
 59 |     ldconfig && \
 60 |     rm -rf /tmp/openmpi
 61 | 
 62 | # Install OpenSSH for MPI to communicate between containers
 63 | RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
 64 |     mkdir -p /var/run/sshd
 65 | 
 66 | # Allow OpenSSH to talk to containers without asking for confirmation
 67 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
 68 |     echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
 69 |     mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
 70 | 
 71 | # Instal Python and pip
 72 | RUN if [[ "${PYTHON_VERSION}" == "3.6" ]]; then \
 73 |     apt-get install -y python${PYTHON_VERSION}-distutils; \
 74 |     fi
 75 | 
 76 | RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
 77 | 
 78 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
 79 |     python get-pip.py && \
 80 |     rm get-pip.py
 81 | 
 82 | # Install Pydata and other deps
 83 | RUN pip install future typing numpy pandas matplotlib jupyter h5py \
 84 |     awscli boto3 tqdm termcolor path.py pillow-simd opencv-python-headless \
 85 |     mpi4py onnx onnxruntime pycuda yacs cython==0.29.10
 86 | 
 87 | # Install PyTorch
 88 | RUN pip install torch==${PYTORCH_VERSION} \
 89 |     torchvision==${TORCHVISION_VERSION} && ldconfig
 90 | 
 91 | # Install apex
 92 | RUN mkdir /workspace
 93 | WORKDIR /workspace
 94 | RUN git clone https://github.com/NVIDIA/apex.git
 95 | WORKDIR /workspace/apex
 96 | RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
 97 | ENV PYTHONPATH="/workspace/apex:$PYTHONPATH"
 98 | WORKDIR /workspace
 99 | 
100 | # install horovod (for distributed training)
101 | RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
102 |     HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_WITH_PYTORCH=1 \
103 |     pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION} && \
104 |     ldconfig
105 | 
106 | # Settings for S3
107 | RUN aws configure set default.s3.max_concurrent_requests 100 && \
108 |     aws configure set default.s3.max_queue_size 10000
109 | 
110 | # Install Minkowski Engine
111 | ENV TORCH_CUDA_ARCH_LIST=Volta;Turing;Kepler+Tesla
112 | RUN pip install ninja
113 | RUN apt-get update && apt-get install -y libopenblas-dev
114 | WORKDIR /workspace
115 | RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git
116 | RUN cd /workspace/MinkowskiEngine && \
117 |     python setup.py install --force_cuda
118 | 
119 | # Add Tini (cf. https://github.com/jupyter/docker-stacks)
120 | ENV TINI_VERSION v0.19.0
121 | ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
122 | RUN chmod +x /tini
123 | ENTRYPOINT ["/tini", "-g", "--"]
124 | 
125 | # Install DGP (dataset utils)
126 | WORKDIR /workspace
127 | RUN git clone https://github.com/TRI-ML/dgp.git
128 | ENV PYTHONPATH="/workspace/dgp:$PYTHONPATH"
129 | 
130 | # Override DGP wandb with required version
131 | RUN pip install wandb==0.8.21 pyquaternion xarray diskcache tenacity pycocotools
132 | 
133 | # Expose Port for jupyter (8888)
134 | EXPOSE 8888
135 | 
136 | # create project workspace dir
137 | RUN mkdir -p /workspace/experiments
138 | RUN mkdir -p /workspace/${PROJECT}
139 | WORKDIR /workspace/${PROJECT}
140 | 
141 | # Copy project source last (to avoid cache busting)
142 | WORKDIR /workspace/${PROJECT}
143 | COPY . /workspace/${PROJECT}
144 | ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH"
145 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.eval_ddad.rst.txt:
--------------------------------------------------------------------------------
 1 | eval_ddad
 2 | =========
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	model:
 7 | 	    name: 'SelfSupModel'
 8 | 	    depth_net:
 9 | 		name: 'PackNet01'
10 | 		version: '1A'
11 | 	    pose_net:
12 | 		name: 'PoseNet'
13 | 		version: ''
14 | 	    params:
15 | 		crop: ''
16 | 		min_depth: 0.0
17 | 		max_depth: 200.0
18 | 	datasets:
19 | 	    augmentation:
20 | 		image_shape: (384, 640)
21 | 	    test:
22 | 		dataset: ['DGP']
23 | 		path: ['/data/datasets/DDAD/ddad.json']
24 | 		split: ['val']
25 | 		depth_type: ['lidar']
26 | 		cameras: ['camera_01']
27 | 	save:
28 | 	    folder: '/data/save'
29 | 	    viz: True
30 | 	    npz: True
31 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.eval_kitti.rst.txt:
--------------------------------------------------------------------------------
 1 | eval_kitti
 2 | ==========
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	model:
 7 | 	    name: 'SelfSupModel'
 8 | 	    depth_net:
 9 | 		name: 'PackNet01'
10 | 		version: '1A'
11 | 	    pose_net:
12 | 		name: 'PoseNet'
13 | 		version: ''
14 | 	    params:
15 | 		crop: 'garg'
16 | 		min_depth: 0.0
17 | 		max_depth: 80.0
18 | 	datasets:
19 | 	    augmentation:
20 | 		image_shape: (192, 640)
21 | 	    test:
22 | 		dataset: ['KITTI']
23 | 		path: ['/data/datasets/KITTI_raw']
24 | 		split: ['data_splits/eigen_test_files.txt']
25 | 		depth_type: ['velodyne']
26 | 	save:
27 | 	    folder: '/data/save'
28 | 	    viz: True
29 | 	    npz: True
30 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.overfit_ddad.rst.txt:
--------------------------------------------------------------------------------
 1 | overfit_ddad
 2 | ============
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	arch:
 7 | 	    max_epochs: 1
 8 | 	model:
 9 | 	    name: 'SelfSupModel'
10 | 	    optimizer:
11 | 		name: 'Adam'
12 | 		depth:
13 | 		    lr: 0.0001
14 | 		pose:
15 | 		    lr: 0.0001
16 | 	    scheduler:
17 | 		name: 'StepLR'
18 | 		step_size: 30
19 | 		gamma: 0.5
20 | 	    depth_net:
21 | 		name: 'DepthResNet'
22 | 		version: '18pt'
23 | 	    pose_net:
24 | 		name: 'PoseResNet'
25 | 		version: '18pt'
26 | 	    params:
27 | 		crop: ''
28 | 		min_depth: 0.0
29 | 		max_depth: 200.0
30 | 	datasets:
31 | 	    augmentation:
32 | 		image_shape: (384, 640)
33 | 	    train:
34 | 		batch_size: 4
35 | 		dataset: ['DGP']
36 | 		path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
37 | 		split: ['train']
38 | 		depth_type: ['lidar']
39 | 		cameras: ['camera_01']
40 | 		repeat: [300]
41 | 	    validation:
42 | 		dataset: ['DGP']
43 | 		path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
44 | 		split: ['train']
45 | 		depth_type: ['lidar']
46 | 		cameras: ['camera_01']
47 | 	    test:
48 | 		dataset: ['DGP']
49 | 		path: ['/data/datasets/DDAD_tiny/ddad_tiny.json']
50 | 		split: ['train']
51 | 		depth_type: ['lidar']
52 | 		cameras: ['camera_01']
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.overfit_kitti.rst.txt:
--------------------------------------------------------------------------------
 1 | overfit_kitti
 2 | =============
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	arch:
 7 | 	    max_epochs: 1
 8 | 	model:
 9 | 	    name: 'SelfSupModel'
10 | 	    optimizer:
11 | 		name: 'Adam'
12 | 		depth:
13 | 		    lr: 0.0002
14 | 		pose:
15 | 		    lr: 0.0002
16 | 	    scheduler:
17 | 		name: 'StepLR'
18 | 		step_size: 30
19 | 		gamma: 0.5
20 | 	    depth_net:
21 | 		name: 'DepthResNet'
22 | 		version: '18pt'
23 | 	    pose_net:
24 | 		name: 'PoseResNet'
25 | 		version: '18pt'
26 | 	    params:
27 | 		crop: 'garg'
28 | 		min_depth: 0.0
29 | 		max_depth: 80.0
30 | 	datasets:
31 | 	    augmentation:
32 | 		image_shape: (192, 640)
33 | 	    train:
34 | 		batch_size: 6
35 | 		dataset: ['KITTI']
36 | 		path: ['/data/datasets/KITTI_tiny']
37 | 		split: ['kitti_tiny.txt']
38 | 		depth_type: ['velodyne']
39 | 		repeat: [1000]
40 | 	    validation:
41 | 		dataset: ['KITTI']
42 | 		path: ['/data/datasets/KITTI_tiny']
43 | 		split: ['kitti_tiny.txt']
44 | 		depth_type: ['velodyne']
45 | 	    test:
46 | 		dataset: ['KITTI']
47 | 		path: ['/data/datasets/KITTI_tiny']
48 | 		split: ['kitti_tiny.txt']
49 | 		depth_type: ['velodyne']
50 | 
51 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.rst.txt:
--------------------------------------------------------------------------------
 1 | Configs
 2 | =======
 3 | 
 4 | .. toctree::
 5 |    configs.default_config
 6 |    configs.overfit_kitti
 7 |    configs.overfit_ddad
 8 |    configs.train_kitti
 9 |    configs.train_ddad
10 |    configs.eval_kitti
11 |    configs.eval_ddad
12 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.train_ddad.rst.txt:
--------------------------------------------------------------------------------
 1 | train_ddad
 2 | ==========
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	model:
 7 | 	    name: 'SelfSupModel'
 8 | 	    optimizer:
 9 | 		name: 'Adam'
10 | 		depth:
11 | 		    lr: 0.00009
12 | 		pose:
13 | 		    lr: 0.00009
14 | 	    scheduler:
15 | 		name: 'StepLR'
16 | 		step_size: 30
17 | 		gamma: 0.5
18 | 	    depth_net:
19 | 		name: 'PackNet01'
20 | 		version: '1A'
21 | 	    pose_net:
22 | 		name: 'PoseNet'
23 | 		version: ''
24 | 	    params:
25 | 		crop: ''
26 | 		min_depth: 0.0
27 | 		max_depth: 200.0
28 | 	datasets:
29 | 	    augmentation:
30 | 		image_shape: (384, 640)
31 | 	    train:
32 | 		batch_size: 2
33 | 		num_workers: 8
34 | 		dataset: ['DGP']
35 | 		path: ['/data/datasets/DDAD/ddad.json']
36 | 		split: ['train']
37 | 		depth_type: ['lidar']
38 | 		cameras: ['camera_01']
39 | 		repeat: [5]
40 | 	    validation:
41 | 		num_workers: 8
42 | 		dataset: ['DGP']
43 | 		path: ['/data/datasets/DDAD/ddad.json']
44 | 		split: ['val']
45 | 		depth_type: ['lidar']
46 | 		cameras: ['camera_01']
47 | 	    test:
48 | 		num_workers: 8
49 | 		dataset: ['DGP']
50 | 		path: ['/data/datasets/DDAD/ddad.json']
51 | 		split: ['val']
52 | 		depth_type: ['lidar']
53 | 		cameras: ['camera_01']
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/_sources/configs/configs.train_kitti.rst.txt:
--------------------------------------------------------------------------------
 1 | train_kitti
 2 | ===========
 3 | 
 4 | .. code:: yaml
 5 | 
 6 | 	model:
 7 | 	    name: 'SelfSupModel'
 8 | 	    optimizer:
 9 | 		name: 'Adam'
10 | 		depth:
11 | 		    lr: 0.0002
12 | 		pose:
13 | 		    lr: 0.0002
14 | 	    scheduler:
15 | 		name: 'StepLR'
16 | 		step_size: 30
17 | 		gamma: 0.5
18 | 	    depth_net:
19 | 		name: 'PackNet01'
20 | 		version: '1A'
21 | 	    pose_net:
22 | 		name: 'PoseNet'
23 | 		version: ''
24 | 	    params:
25 | 		crop: 'garg'
26 | 		min_depth: 0.0
27 | 		max_depth: 80.0
28 | 	datasets:
29 | 	    augmentation:
30 | 		image_shape: (192, 640)
31 | 	    train:
32 | 		batch_size: 4
33 | 		dataset: ['KITTI']
34 | 		path: ['/data/datasets/KITTI_raw']
35 | 		split: ['data_splits/eigen_zhou_files.txt']
36 | 		depth_type: ['velodyne']
37 | 		repeat: [2]
38 | 	    validation:
39 | 		dataset: ['KITTI']
40 | 		path: ['/data/datasets/KITTI_raw']
41 | 		split: ['data_splits/eigen_val_files.txt',
42 | 		        'data_splits/eigen_test_files.txt']
43 | 		depth_type: ['velodyne']
44 | 	    test:
45 | 		dataset: ['KITTI']
46 | 		path: ['/data/datasets/KITTI_raw']
47 | 		split: ['data_splits/eigen_test_files.txt']
48 | 		depth_type: ['velodyne']
49 | 
50 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/KITTIDataset.rst.txt:
--------------------------------------------------------------------------------
1 | KITTIDataset
2 | ============
3 | 
4 | .. toctree::
5 |    datasets.KITTIDataset
6 |    datasets.KITTIDataset_utils
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.DGPDataset.rst.txt:
--------------------------------------------------------------------------------
1 | DGPDataset
2 | ==========
3 | 
4 | .. automodule:: packnet_sfm.datasets.dgp_dataset
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.ImageDataset.rst.txt:
--------------------------------------------------------------------------------
1 | ImageDataset
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.datasets.image_dataset
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.KITTIDataset.rst.txt:
--------------------------------------------------------------------------------
1 | KITTIDataset
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.datasets.kitti_dataset
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.KITTIDataset_utils.rst.txt:
--------------------------------------------------------------------------------
1 | kitti_dataset_utils
2 | ===================
3 | 
4 | .. automodule:: packnet_sfm.datasets.kitti_dataset_utils
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.augmentations.rst.txt:
--------------------------------------------------------------------------------
1 | Augmentations
2 | =============
3 | 
4 | .. automodule:: packnet_sfm.datasets.augmentations
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.rst.txt:
--------------------------------------------------------------------------------
 1 | Datasets
 2 | ========
 3 | 
 4 | .. toctree::
 5 |    datasets.augmentations
 6 |    datasets.transforms
 7 |    KITTIDataset.rst
 8 |    datasets.DGPDataset
 9 |    datasets.ImageDataset
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/docs/_sources/datasets/datasets.transforms.rst.txt:
--------------------------------------------------------------------------------
1 | Transforms
2 | ==========
3 | 
4 | .. automodule:: packnet_sfm.datasets.transforms
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/camera.rst.txt:
--------------------------------------------------------------------------------
1 | Camera
2 | ======
3 | 
4 | .. toctree::
5 |    geometry.camera.camera
6 |    geometry.camera.camera_utils
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/geometry.camera.camera.rst.txt:
--------------------------------------------------------------------------------
1 | Camera
2 | ======
3 | 
4 | .. automodule:: packnet_sfm.geometry.camera
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/geometry.camera.camera_utils.rst.txt:
--------------------------------------------------------------------------------
1 | Camera_utils
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.geometry.camera_utils
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/geometry.pose.pose.rst.txt:
--------------------------------------------------------------------------------
1 | Pose
2 | ====
3 | 
4 | .. automodule:: packnet_sfm.geometry.pose
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/geometry.pose.pose_utils.rst.txt:
--------------------------------------------------------------------------------
1 | Pose_utils
2 | ==========
3 | 
4 | .. automodule:: packnet_sfm.geometry.pose_utils
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/geometry.rst.txt:
--------------------------------------------------------------------------------
1 | Geometry
2 | ========
3 | 
4 | .. toctree::
5 |    camera.rst
6 |    pose.rst
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/_sources/geometry/pose.rst.txt:
--------------------------------------------------------------------------------
1 | Pose
2 | ====
3 | 
4 | .. toctree::
5 |    geometry.pose.pose
6 |    geometry.pose.pose_utils
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | .. PackNet-SfM documentation master file, created by
 2 |    sphinx-quickstart on Thu Apr 23 07:39:57 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. include:: README.rst
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 5
10 |    :caption: Contents
11 | 
12 |    configs/configs.rst
13 |    scripts/scripts.rst
14 |    trainers/trainers.rst
15 |    datasets/datasets.rst
16 |    models/models.rst
17 |    networks/networks.rst
18 |    losses/losses.rst
19 |    loggers/loggers.rst
20 |    geometry/geometry.rst
21 |    utils/utils.rst
22 | 
23 | .. toctree::
24 |    :glob:
25 |    :maxdepth: 1
26 |    :caption: Contact
27 | 
28 |    Toyota Research Institute <https://tri.global>
29 |    PackNet-SfM GitHub <https://github.com/TRI-ML/packnet-sfm>
30 |    DDAD GitHub <https://github.com/TRI-ML/DDAD>
31 | 
32 | Indices and tables
33 | ==================
34 | 
35 | * :ref:`genindex`
36 | * :ref:`modindex`
37 | * :ref:`search`
38 | 


--------------------------------------------------------------------------------
/docs/_sources/loggers/loggers.WandbLogger.rst.txt:
--------------------------------------------------------------------------------
1 | WandbLogger
2 | ===========
3 | 
4 | .. automodule:: packnet_sfm.loggers.WandbLogger
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/loggers/loggers.rst.txt:
--------------------------------------------------------------------------------
1 | Loggers
2 | =======
3 | 
4 | .. toctree::
5 |    loggers.WandbLogger
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/_sources/losses/losses.loss_base.rst.txt:
--------------------------------------------------------------------------------
1 | LossBase
2 | ========
3 | 
4 | .. automodule:: packnet_sfm.losses.loss_base
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/losses/losses.multiview_photometric_loss.rst.txt:
--------------------------------------------------------------------------------
1 | MultiViewPhotometricLoss
2 | ========================
3 | 
4 | .. automodule:: packnet_sfm.losses.multiview_photometric_loss
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/losses/losses.rst.txt:
--------------------------------------------------------------------------------
1 | Losses
2 | ======
3 | 
4 | .. toctree::
5 |    losses.loss_base
6 |    losses.multiview_photometric_loss
7 |    losses.supervised_loss
8 | 


--------------------------------------------------------------------------------
/docs/_sources/losses/losses.supervised_loss.rst.txt:
--------------------------------------------------------------------------------
1 | SupervisedLoss
2 | ==============
3 | 
4 | .. automodule:: packnet_sfm.losses.supervised_loss
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.Checkpoint.rst.txt:
--------------------------------------------------------------------------------
1 | Checkpoint
2 | ==========
3 | 
4 | .. automodule:: packnet_sfm.models.model_checkpoint
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.SelfSupModel.rst.txt:
--------------------------------------------------------------------------------
1 | SelfSupModel
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.models.SelfSupModel
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.SemiSupModel.rst.txt:
--------------------------------------------------------------------------------
1 | SemiSupModel
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.models.SemiSupModel
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.SfmModel.rst.txt:
--------------------------------------------------------------------------------
1 | SfmModel
2 | ========
3 | 
4 | .. automodule:: packnet_sfm.models.SfmModel
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.Utilities.rst.txt:
--------------------------------------------------------------------------------
1 | Utilities
2 | =========
3 | 
4 | .. automodule:: packnet_sfm.models.model_utils
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.Wrapper.rst.txt:
--------------------------------------------------------------------------------
1 | Wrapper
2 | =======
3 | 
4 | .. automodule:: packnet_sfm.models.model_wrapper
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/models/models.rst.txt:
--------------------------------------------------------------------------------
 1 | Models
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    models.Utilities  
 6 |    models.Checkpoint
 7 |    models.Wrapper
 8 |    models.SfmModel
 9 |    models.SelfSupModel
10 |    models.SemiSupModel
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/depth/depth.rst.txt:
--------------------------------------------------------------------------------
 1 | Depth Networks
 2 | ==============
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 | 
 7 |    networks.depth.PackNet01
 8 |    networks.depth.DepthResNet
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/depth/networks.depth.DepthResNet.rst.txt:
--------------------------------------------------------------------------------
1 | DepthResNet
2 | ===========
3 | 
4 | .. automodule:: packnet_sfm.networks.depth.DepthResNet
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/depth/networks.depth.PackNet01.rst.txt:
--------------------------------------------------------------------------------
1 | PackNet01
2 | =========
3 | 
4 | .. automodule:: packnet_sfm.networks.depth.PackNet01
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/layers.rst.txt:
--------------------------------------------------------------------------------
1 | Network Layers
2 | ==============
3 | 
4 | .. toctree::
5 |    :glob:
6 | 
7 |    resnet/resnet.rst
8 |    packnet/packnet.rst
9 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/packnet/layers01.rst.txt:
--------------------------------------------------------------------------------
1 | layers01
2 | ========
3 | 
4 | .. automodule:: packnet_sfm.networks.layers.packnet.layers01
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/packnet/packnet.rst.txt:
--------------------------------------------------------------------------------
1 | PackNet
2 | =======
3 | 
4 | .. toctree::
5 |    :glob:
6 | 
7 |    layers01.rst
8 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/resnet/depth_decoder.rst.txt:
--------------------------------------------------------------------------------
1 | depth_decoder
2 | =============
3 | 
4 | .. automodule:: packnet_sfm.networks.layers.resnet.depth_decoder
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/resnet/layers.rst.txt:
--------------------------------------------------------------------------------
1 | layers
2 | ======
3 | 
4 | .. automodule:: packnet_sfm.networks.layers.resnet.layers
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/resnet/pose_decoder.rst.txt:
--------------------------------------------------------------------------------
1 | pose_decoder
2 | ============
3 | 
4 | .. automodule:: packnet_sfm.networks.layers.resnet.pose_decoder
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/resnet/resnet.rst.txt:
--------------------------------------------------------------------------------
 1 | ResNet
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 | 
 7 |    layers.rst
 8 |    resnet_encoder.rst
 9 |    depth_decoder.rst
10 |    pose_decoder.rst
11 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/layers/resnet/resnet_encoder.rst.txt:
--------------------------------------------------------------------------------
1 | resnet_encoder
2 | ==============
3 | 
4 | .. automodule:: packnet_sfm.networks.layers.resnet.resnet_encoder
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/networks.rst.txt:
--------------------------------------------------------------------------------
 1 | Networks
 2 | ========
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 | 
 7 |    layers/layers.rst
 8 |    depth/depth.rst
 9 |    pose/pose.rst
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/pose/networks.pose.PoseNet.rst.txt:
--------------------------------------------------------------------------------
1 | PoseNet
2 | =======
3 | 
4 | .. automodule:: packnet_sfm.networks.pose.PoseNet
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/pose/networks.pose.PoseResNet.rst.txt:
--------------------------------------------------------------------------------
1 | PoseResNet
2 | ==========
3 | 
4 | .. automodule:: packnet_sfm.networks.pose.PoseResNet
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/networks/pose/pose.rst.txt:
--------------------------------------------------------------------------------
 1 | Pose Networks
 2 | =============
 3 | 
 4 | .. toctree::
 5 |    :glob:
 6 | 
 7 |    networks.pose.PoseNet
 8 |    networks.pose.PoseResNet
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/_sources/scripts/scripts.eval.rst.txt:
--------------------------------------------------------------------------------
1 | Evaluation
2 | ==========
3 | 
4 | .. automodule:: scripts.eval
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/scripts/scripts.infer.rst.txt:
--------------------------------------------------------------------------------
1 | Inference
2 | =========
3 | 
4 | .. automodule:: scripts.infer
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/scripts/scripts.rst.txt:
--------------------------------------------------------------------------------
1 | Scripts
2 | =======
3 | 
4 | .. toctree::
5 |    scripts.train
6 |    scripts.eval
7 |    scripts.infer
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/scripts/scripts.train.rst.txt:
--------------------------------------------------------------------------------
1 | Training
2 | ========
3 | 
4 | .. automodule:: scripts.train
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/_sources/trainers/trainers.BaseTrainer.rst.txt:
--------------------------------------------------------------------------------
1 | BaseTrainer
2 | ===========
3 | 
4 | .. automodule:: packnet_sfm.trainers.base_trainer
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/trainers/trainers.HorovodTrainer.rst.txt:
--------------------------------------------------------------------------------
1 | HorovodTrainer
2 | ==============
3 | 
4 | .. automodule:: packnet_sfm.trainers.HorovodTrainer
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/trainers/trainers.rst.txt:
--------------------------------------------------------------------------------
1 | Trainers
2 | ========
3 | 
4 | .. toctree::
5 |    trainers.BaseTrainer.rst
6 |    trainers.HorovodTrainer.rst
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.config.rst.txt:
--------------------------------------------------------------------------------
1 | Config
2 | ======
3 | 
4 | .. automodule:: packnet_sfm.utils.config
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.depth.rst.txt:
--------------------------------------------------------------------------------
1 | Depth
2 | =====
3 | 
4 | .. automodule:: packnet_sfm.utils.depth
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.horovod.rst.txt:
--------------------------------------------------------------------------------
1 | Horovod
2 | =======
3 | 
4 | .. automodule:: packnet_sfm.utils.horovod
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.image.rst.txt:
--------------------------------------------------------------------------------
1 | Image
2 | =====
3 | 
4 | .. automodule:: packnet_sfm.utils.image
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.load.rst.txt:
--------------------------------------------------------------------------------
1 | Loading
2 | =======
3 | 
4 | .. automodule:: packnet_sfm.utils.load
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.logging.rst.txt:
--------------------------------------------------------------------------------
1 | Logging
2 | =======
3 | 
4 | .. automodule:: packnet_sfm.utils.logging
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.misc.rst.txt:
--------------------------------------------------------------------------------
1 | Misc.
2 | =====
3 | 
4 | .. automodule:: packnet_sfm.utils.misc
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.reduce.rst.txt:
--------------------------------------------------------------------------------
1 | Reduce
2 | ======
3 | 
4 | .. automodule:: packnet_sfm.utils.reduce
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.rst.txt:
--------------------------------------------------------------------------------
 1 | Utils
 2 | =====
 3 | 
 4 | .. toctree::
 5 |    utils.config
 6 |    utils.horovod
 7 |    utils.reduce
 8 |    utils.logging
 9 |    utils.save
10 |    utils.load
11 |    utils.image
12 |    utils.depth
13 |    utils.types
14 |    utils.misc
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.save.rst.txt:
--------------------------------------------------------------------------------
1 | Saving
2 | ======
3 | 
4 | .. automodule:: packnet_sfm.utils.save
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_sources/utils/utils.types.rst.txt:
--------------------------------------------------------------------------------
1 | Types
2 | =====
3 | 
4 | .. automodule:: packnet_sfm.utils.types
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | ﻿.fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
2 | 


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | .wy-side-nav-search, .wy-nav-top {
 2 |     background: #ffffff;
 3 | }
 4 | .wy-nav-side {
 5 |     background: #222222;
 6 | }
 7 | .wy-menu > .caption > span.caption-text {
 8 |   color: #bb0000;
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/_static/documentation_options.js:
--------------------------------------------------------------------------------
 1 | var DOCUMENTATION_OPTIONS = {
 2 |     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
 3 |     VERSION: '1.0',
 4 |     LANGUAGE: 'None',
 5 |     COLLAPSE_INDEX: false,
 6 |     BUILDER: 'html',
 7 |     FILE_SUFFIX: '.html',
 8 |     LINK_SUFFIX: '.html',
 9 |     HAS_SOURCE: true,
10 |     SOURCELINK_SUFFIX: '.txt',
11 |     NAVIGATION_WITH_KEYS: false
12 | };


--------------------------------------------------------------------------------
/docs/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/file.png


--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata-Bold.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata-Regular.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Inconsolata.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Inconsolata.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato-Bold.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato-Regular.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-bolditalic.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-italic.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab-Bold.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab-Regular.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2


--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */
2 | /* Built 20190212 16:02 */
3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n<l.length;n++)c(l[n]);return c}({"sphinx-rtd-theme":[function(n,e,i){var jQuery="undefined"!=typeof window?window.jQuery:n("jquery");e.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(e){var i=this;void 0===e&&(e=!0),i.isRunning||(i.isRunning=!0,jQuery(function(n){i.init(n),i.reset(),i.win.on("hashchange",i.reset),e&&i.win.on("scroll",function(){i.linkScroll||i.winScroll||(i.winScroll=!0,requestAnimationFrame(function(){i.onScroll()}))}),i.win.on("resize",function(){i.winResize||(i.winResize=!0,requestAnimationFrame(function(){i.onResize()}))}),i.onResize()}))},enableSticky:function(){this.enable(!0)},init:function(i){i(document);var t=this;this.navBar=i("div.wy-side-scroll:first"),this.win=i(window),i(document).on("click","[data-toggle='wy-nav-top']",function(){i("[data-toggle='wy-nav-shift']").toggleClass("shift"),i("[data-toggle='rst-versions']").toggleClass("shift")}).on("click",".wy-menu-vertical .current ul li a",function(){var n=i(this);i("[data-toggle='wy-nav-shift']").removeClass("shift"),i("[data-toggle='rst-versions']").toggleClass("shift"),t.toggleCurrent(n),t.hashChange()}).on("click","[data-toggle='rst-current-version']",function(){i("[data-toggle='rst-versions']").toggleClass("shift-up")}),i("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),i("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),i("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i('<span class="toctree-expand"></span>'),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0<i.length&&($(".wy-menu-vertical .current").removeClass("current"),i.addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l1").parent().addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l2").addClass("current"),i.closest("li.toctree-l3").addClass("current"),i.closest("li.toctree-l4").addClass("current"),i[0].scrollIntoView())}catch(o){console.log("Error expanding nav for anchor",o)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,i=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;e<n.length&&!window.requestAnimationFrame;++e)window.requestAnimationFrame=window[n[e]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[n[e]+"CancelAnimationFrame"]||window[n[e]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(n,e){var i=(new Date).getTime(),t=Math.max(0,16-(i-r)),o=window.setTimeout(function(){n(i+t)},t);return r=i+t,o}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()},{jquery:"jquery"}]},{},["sphinx-rtd-theme"]);


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/minus.png


--------------------------------------------------------------------------------
/docs/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/_static/plus.png


--------------------------------------------------------------------------------
/docs/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #f8f8f8; }
 3 | .highlight .c { color: #408080; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #008000; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #BC7A00 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408080; font-style: italic } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #888888 } /* Generic.Output */
19 | .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #008000 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #B00040 } /* Keyword.Type */
29 | .highlight .m { color: #666666 } /* Literal.Number */
30 | .highlight .s { color: #BA2121 } /* Literal.String */
31 | .highlight .na { color: #7D9029 } /* Name.Attribute */
32 | .highlight .nb { color: #008000 } /* Name.Builtin */
33 | .highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #880000 } /* Name.Constant */
35 | .highlight .nd { color: #AA22FF } /* Name.Decorator */
36 | .highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
38 | .highlight .nf { color: #0000FF } /* Name.Function */
39 | .highlight .nl { color: #A0A000 } /* Name.Label */
40 | .highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #19177C } /* Name.Variable */
43 | .highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #666666 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #666666 } /* Literal.Number.Float */
47 | .highlight .mh { color: #666666 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #666666 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #666666 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #BA2121 } /* Literal.String.Affix */
51 | .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #BA2121 } /* Literal.String.Char */
53 | .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #BA2121 } /* Literal.String.Double */
56 | .highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
59 | .highlight .sx { color: #008000 } /* Literal.String.Other */
60 | .highlight .sr { color: #BB6688 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #BA2121 } /* Literal.String.Single */
62 | .highlight .ss { color: #19177C } /* Literal.String.Symbol */
63 | .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #0000FF } /* Name.Function.Magic */
65 | .highlight .vc { color: #19177C } /* Name.Variable.Class */
66 | .highlight .vg { color: #19177C } /* Name.Variable.Global */
67 | .highlight .vi { color: #19177C } /* Name.Variable.Instance */
68 | .highlight .vm { color: #19177C } /* Name.Variable.Magic */
69 | .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/docs/objects.inv


--------------------------------------------------------------------------------
/media/figs/packnet-ddad.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/media/figs/packnet-ddad.gif


--------------------------------------------------------------------------------
/media/figs/tri-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/media/figs/tri-logo.png


--------------------------------------------------------------------------------
/media/tests/ddad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/media/tests/ddad.png


--------------------------------------------------------------------------------
/media/tests/kitti.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/media/tests/kitti.png


--------------------------------------------------------------------------------
/packnet_sfm/__init__.py:
--------------------------------------------------------------------------------
 1 | """Root package info"""
 2 | 
 3 | __version__ = '0.1.0'
 4 | __author__ = 'TRI ML team'
 5 | __license__ = 'MIT'
 6 | __copyright__ = 'Copyright (c) 2019-2020, TRI'
 7 | __homepage__ = 'https://github.com/TRI-ML/packnet-sfm'
 8 | __docs__ = 'packnet-sfm is a library for monocular depth and pose estimation'
 9 | __long_docs__ = """
10 | Official [PyTorch](https://pytorch.org/) implementation of _self-supervised_
11 | monocular depth estimation methods invented by the ML Team at
12 | [Toyota Research Institute (TRI)](https://www.tri.global/),
13 | in particular for _PackNet_:
14 | [**3D Packing for Self-Supervised Monocular Depth Estimation (CVPR 2020 oral)**](https://arxiv.org/abs/1905.02693),
15 | *Vitor Guizilini, Rares Ambrus, Sudeep Pillai, Allan Raventos and Adrien Gaidon*.
16 | 
17 | Although self-supervised (i.e. trained only on monocular videos),
18 | PackNet outperforms other self, semi, and fully supervised methods.
19 | Furthermore, it gets better with input resolution and number of parameters, generalizes better, and can run in real-time (with TensorRT). See [References](#references) for more info on our models.
20 | 
21 | """
22 | 


--------------------------------------------------------------------------------
/packnet_sfm/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PackNet-SfM datasets
 3 | ====================
 4 | 
 5 | These datasets output images, camera calibration, depth maps and poses for depth and pose estimation
 6 | 
 7 | - KITTIDataset: reads from KITTI_raw
 8 | - DGPDataset: reads from a DGP .json file
 9 | - ImageDataset: reads from a folder containing image sequences (no support for depth maps)
10 | 
11 | """
12 | 


--------------------------------------------------------------------------------
/packnet_sfm/datasets/image_dataset.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import re
  3 | from collections import defaultdict
  4 | import os
  5 | 
  6 | from torch.utils.data import Dataset
  7 | import numpy as np
  8 | from packnet_sfm.utils.image import load_image
  9 | 
 10 | ########################################################################################################################
 11 | #### FUNCTIONS
 12 | ########################################################################################################################
 13 | 
 14 | def dummy_calibration(image):
 15 |     w, h = [float(d) for d in image.size]
 16 |     return np.array([[1000. , 0.    , w / 2. - 0.5],
 17 |                      [0.    , 1000. , h / 2. - 0.5],
 18 |                      [0.    , 0.    , 1.          ]])
 19 | 
 20 | def get_idx(filename):
 21 |     return int(re.search(r'\d+', filename).group())
 22 | 
 23 | def read_files(directory, ext=('.png', '.jpg', '.jpeg'), skip_empty=True):
 24 |     files = defaultdict(list)
 25 |     for entry in os.scandir(directory):
 26 |         relpath = os.path.relpath(entry.path, directory)
 27 |         if entry.is_dir():
 28 |             d_files = read_files(entry.path, ext=ext, skip_empty=skip_empty)
 29 |             if skip_empty and not len(d_files):
 30 |                 continue
 31 |             files[relpath] = d_files[entry.path]
 32 |         elif entry.is_file():
 33 |             if ext is None or entry.path.lower().endswith(tuple(ext)):
 34 |                 files[directory].append(relpath)
 35 |     return files
 36 | 
 37 | ########################################################################################################################
 38 | #### DATASET
 39 | ########################################################################################################################
 40 | 
 41 | class ImageDataset(Dataset):
 42 |     def __init__(self, root_dir, split, data_transform=None,
 43 |                  forward_context=0, back_context=0, strides=(1,),
 44 |                  depth_type=None, **kwargs):
 45 |         super().__init__()
 46 |         # Asserts
 47 |         assert depth_type is None or depth_type == '', \
 48 |             'ImageDataset currently does not support depth types'
 49 |         assert len(strides) == 1 and strides[0] == 1, \
 50 |             'ImageDataset currently only supports stride of 1.'
 51 | 
 52 |         self.root_dir = root_dir
 53 |         self.split = split
 54 | 
 55 |         self.backward_context = back_context
 56 |         self.forward_context = forward_context
 57 |         self.has_context = self.backward_context + self.forward_context > 0
 58 |         self.strides = 1
 59 | 
 60 |         self.files = []
 61 |         file_tree = read_files(root_dir)
 62 |         for k, v in file_tree.items():
 63 |             file_set = set(file_tree[k])
 64 |             files = [fname for fname in sorted(v) if self._has_context(fname, file_set)]
 65 |             self.files.extend([[k, fname] for fname in files])
 66 | 
 67 |         self.data_transform = data_transform
 68 | 
 69 |     def __len__(self):
 70 |         return len(self.files)
 71 | 
 72 |     def _change_idx(self, idx, filename):
 73 |         _, ext = os.path.splitext(os.path.basename(filename))
 74 |         return self.split.format(idx) + ext
 75 | 
 76 |     def _has_context(self, filename, file_set):
 77 |         context_paths = self._get_context_file_paths(filename)
 78 |         return all([f in file_set for f in context_paths])
 79 | 
 80 |     def _get_context_file_paths(self, filename):
 81 |         fidx = get_idx(filename)
 82 |         idxs = list(np.arange(-self.backward_context * self.strides, 0, self.strides)) + \
 83 |                list(np.arange(0, self.forward_context * self.strides, self.strides) + self.strides)
 84 |         return [self._change_idx(fidx + i, filename) for i in idxs]
 85 | 
 86 |     def _read_rgb_context_files(self, session, filename):
 87 |         context_paths = self._get_context_file_paths(filename)
 88 |         return [load_image(os.path.join(self.root_dir, session, filename))
 89 |                 for filename in context_paths]
 90 | 
 91 |     def _read_rgb_file(self, session, filename):
 92 |         return load_image(os.path.join(self.root_dir, session, filename))
 93 | 
 94 |     def __getitem__(self, idx):
 95 |         session, filename = self.files[idx]
 96 |         image = self._read_rgb_file(session, filename)
 97 | 
 98 |         sample = {
 99 |             'idx': idx,
100 |             'filename': '%s_%s' % (session, os.path.splitext(filename)[0]),
101 |             #
102 |             'rgb': image,
103 |             'intrinsics': dummy_calibration(image)
104 |         }
105 | 
106 |         if self.has_context:
107 |             sample['rgb_context'] = \
108 |                 self._read_rgb_context_files(session, filename)
109 | 
110 |         if self.data_transform:
111 |             sample = self.data_transform(sample)
112 | 
113 |         return sample
114 | 
115 | ########################################################################################################################
116 | 


--------------------------------------------------------------------------------
/packnet_sfm/datasets/kitti_dataset_utils.py:
--------------------------------------------------------------------------------
  1 | """Provides helper methods for loading and parsing KITTI data."""
  2 | 
  3 | from collections import namedtuple
  4 | 
  5 | import numpy as np
  6 | 
  7 | __author__ = "Lee Clement"
  8 | __email__ = "lee.clement@robotics.utias.utoronto.ca"
  9 | 
 10 | # Per dataformat.txt
 11 | OxtsPacket = namedtuple('OxtsPacket',
 12 |                         'lat, lon, alt, ' +
 13 |                         'roll, pitch, yaw, ' +
 14 |                         'vn, ve, vf, vl, vu, ' +
 15 |                         'ax, ay, az, af, al, au, ' +
 16 |                         'wx, wy, wz, wf, wl, wu, ' +
 17 |                         'pos_accuracy, vel_accuracy, ' +
 18 |                         'navstat, numsats, ' +
 19 |                         'posmode, velmode, orimode')
 20 | 
 21 | # Bundle into an easy-to-access structure
 22 | OxtsData = namedtuple('OxtsData', 'packet, T_w_imu')
 23 | 
 24 | 
 25 | def rotx(t):
 26 |     """
 27 |     Rotation about the x-axis
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     t : float
 32 |         Theta angle
 33 | 
 34 |     Returns
 35 |     -------
 36 |     matrix : np.array [3,3]
 37 |         Rotation matrix
 38 |     """
 39 |     c = np.cos(t)
 40 |     s = np.sin(t)
 41 |     return np.array([[1,  0,  0],
 42 |                      [0,  c, -s],
 43 |                      [0,  s,  c]])
 44 | 
 45 | 
 46 | def roty(t):
 47 |     """
 48 |     Rotation about the y-axis
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     t : float
 53 |         Theta angle
 54 | 
 55 |     Returns
 56 |     -------
 57 |     matrix : np.array [3,3]
 58 |         Rotation matrix
 59 |     """
 60 |     c = np.cos(t)
 61 |     s = np.sin(t)
 62 |     return np.array([[c,  0,  s],
 63 |                      [0,  1,  0],
 64 |                      [-s, 0,  c]])
 65 | 
 66 | 
 67 | def rotz(t):
 68 |     """
 69 |     Rotation about the z-axis
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     t : float
 74 |         Theta angle
 75 | 
 76 |     Returns
 77 |     -------
 78 |     matrix : np.array [3,3]
 79 |         Rotation matrix
 80 |     """
 81 |     c = np.cos(t)
 82 |     s = np.sin(t)
 83 |     return np.array([[c, -s,  0],
 84 |                      [s,  c,  0],
 85 |                      [0,  0,  1]])
 86 | 
 87 | 
 88 | def transform_from_rot_trans(R, t):
 89 |     """
 90 |     Transformation matrix from rotation matrix and translation vector.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     R : np.array [3,3]
 95 |         Rotation matrix
 96 |     t : np.array [3]
 97 |         translation vector
 98 | 
 99 |     Returns
100 |     -------
101 |     matrix : np.array [4,4]
102 |         Transformation matrix
103 |     """
104 |     R = R.reshape(3, 3)
105 |     t = t.reshape(3, 1)
106 |     return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))
107 | 
108 | 
109 | def read_calib_file(filepath):
110 |     """
111 |     Read in a calibration file and parse into a dictionary
112 | 
113 |     Parameters
114 |     ----------
115 |     filepath : str
116 |         File path to read from
117 | 
118 |     Returns
119 |     -------
120 |     calib : dict
121 |         Dictionary with calibration values
122 |     """
123 |     data = {}
124 | 
125 |     with open(filepath, 'r') as f:
126 |         for line in f.readlines():
127 |             key, value = line.split(':', 1)
128 |             # The only non-float values in these files are dates, which
129 |             # we don't care about anyway
130 |             try:
131 |                 data[key] = np.array([float(x) for x in value.split()])
132 |             except ValueError:
133 |                 pass
134 | 
135 |     return data
136 | 
137 | 
138 | def pose_from_oxts_packet(raw_data, scale):
139 |     """
140 |     Helper method to compute a SE(3) pose matrix from an OXTS packet
141 | 
142 |     Parameters
143 |     ----------
144 |     raw_data : dict
145 |         Oxts data to read from
146 |     scale : float
147 |         Oxts scale
148 | 
149 |     Returns
150 |     -------
151 |     R : np.array [3,3]
152 |         Rotation matrix
153 |     t : np.array [3]
154 |         Translation vector
155 |     """
156 |     packet = OxtsPacket(*raw_data)
157 |     er = 6378137.  # earth radius (approx.) in meters
158 | 
159 |     # Use a Mercator projection to get the translation vector
160 |     tx = scale * packet.lon * np.pi * er / 180.
161 |     ty = scale * er * \
162 |         np.log(np.tan((90. + packet.lat) * np.pi / 360.))
163 |     tz = packet.alt
164 |     t = np.array([tx, ty, tz])
165 | 
166 |     # Use the Euler angles to get the rotation matrix
167 |     Rx = rotx(packet.roll)
168 |     Ry = roty(packet.pitch)
169 |     Rz = rotz(packet.yaw)
170 |     R = Rz.dot(Ry.dot(Rx))
171 | 
172 |     # Combine the translation and rotation into a homogeneous transform
173 |     return R, t
174 | 
175 | 
176 | def load_oxts_packets_and_poses(oxts_files):
177 |     """
178 |     Generator to read OXTS ground truth data.
179 |     Poses are given in an East-North-Up coordinate system
180 |     whose origin is the first GPS position.
181 | 
182 |     Parameters
183 |     ----------
184 |     oxts_files : list of str
185 |         List of oxts files to read from
186 | 
187 |     Returns
188 |     -------
189 |     oxts : list of dict
190 |         List of oxts ground-truth data
191 |     """
192 |     # Scale for Mercator projection (from first lat value)
193 |     scale = None
194 |     # Origin of the global coordinate system (first GPS position)
195 |     origin = None
196 | 
197 |     oxts = []
198 | 
199 |     for filename in oxts_files:
200 |         with open(filename, 'r') as f:
201 |             for line in f.readlines():
202 |                 line = line.split()
203 |                 # Last five entries are flags and counts
204 |                 line[:-5] = [float(x) for x in line[:-5]]
205 |                 line[-5:] = [int(float(x)) for x in line[-5:]]
206 | 
207 |                 packet = OxtsPacket(*line)
208 | 
209 |                 if scale is None:
210 |                     scale = np.cos(packet.lat * np.pi / 180.)
211 | 
212 |                 R, t = pose_from_oxts_packet(packet, scale)
213 | 
214 |                 if origin is None:
215 |                     origin = t
216 | 
217 |                 T_w_imu = transform_from_rot_trans(R, t - origin)
218 | 
219 |                 oxts.append(OxtsData(packet, T_w_imu))
220 | 
221 |     return oxts
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/packnet_sfm/datasets/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | from functools import partial
  4 | from packnet_sfm.datasets.augmentations import resize_image, resize_sample, resize_depth, \
  5 |     duplicate_sample, colorjitter_sample, to_tensor_sample, crop_sample, crop_sample_input, resize_depth_preserve
  6 | from packnet_sfm.utils.misc import parse_crop_borders
  7 | 
  8 | ########################################################################################################################
  9 | 
 10 | def train_transforms(sample, image_shape, jittering, crop_train_borders):
 11 |     """
 12 |     Training data augmentation transformations
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     sample : dict
 17 |         Sample to be augmented
 18 |     image_shape : tuple (height, width)
 19 |         Image dimension to reshape
 20 |     jittering : tuple (brightness, contrast, saturation, hue)
 21 |         Color jittering parameters
 22 |     crop_train_borders : tuple (left, top, right, down)
 23 |         Border for cropping
 24 | 
 25 |     Returns
 26 |     -------
 27 |     sample : dict
 28 |         Augmented sample
 29 |     """
 30 |     if len(crop_train_borders) > 0:
 31 |         borders = parse_crop_borders(crop_train_borders, sample['rgb'].size[::-1])
 32 |         sample = crop_sample(sample, borders)
 33 |     if len(image_shape) > 0:
 34 |         sample = resize_sample(sample, image_shape)
 35 |     sample = duplicate_sample(sample)
 36 |     if len(jittering) > 0:
 37 |         sample = colorjitter_sample(sample, jittering)
 38 |     sample = to_tensor_sample(sample)
 39 |     return sample
 40 | 
 41 | def validation_transforms(sample, image_shape, crop_eval_borders):
 42 |     """
 43 |     Validation data augmentation transformations
 44 | 
 45 |     Parameters
 46 |     ----------
 47 |     sample : dict
 48 |         Sample to be augmented
 49 |     image_shape : tuple (height, width)
 50 |         Image dimension to reshape
 51 |     crop_eval_borders : tuple (left, top, right, down)
 52 |         Border for cropping
 53 | 
 54 |     Returns
 55 |     -------
 56 |     sample : dict
 57 |         Augmented sample
 58 |     """
 59 |     if len(crop_eval_borders) > 0:
 60 |         borders = parse_crop_borders(crop_eval_borders, sample['rgb'].size[::-1])
 61 |         sample = crop_sample_input(sample, borders)
 62 |     if len(image_shape) > 0:
 63 |         sample['rgb'] = resize_image(sample['rgb'], image_shape)
 64 |         if 'input_depth' in sample:
 65 |             sample['input_depth'] = resize_depth_preserve(sample['input_depth'], image_shape)
 66 |     sample = to_tensor_sample(sample)
 67 |     return sample
 68 | 
 69 | def test_transforms(sample, image_shape, crop_eval_borders):
 70 |     """
 71 |     Test data augmentation transformations
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     sample : dict
 76 |         Sample to be augmented
 77 |     image_shape : tuple (height, width)
 78 |         Image dimension to reshape
 79 | 
 80 |     Returns
 81 |     -------
 82 |     sample : dict
 83 |         Augmented sample
 84 |     """
 85 |     if len(crop_eval_borders) > 0:
 86 |         borders = parse_crop_borders(crop_eval_borders, sample['rgb'].size[::-1])
 87 |         sample = crop_sample_input(sample, borders)
 88 |     if len(image_shape) > 0:
 89 |         sample['rgb'] = resize_image(sample['rgb'], image_shape)
 90 |         if 'input_depth' in sample:
 91 |             sample['input_depth'] = resize_depth(sample['input_depth'], image_shape)
 92 |     sample = to_tensor_sample(sample)
 93 |     return sample
 94 | 
 95 | def get_transforms(mode, image_shape, jittering, crop_train_borders,
 96 |                    crop_eval_borders, **kwargs):
 97 |     """
 98 |     Get data augmentation transformations for each split
 99 | 
100 |     Parameters
101 |     ----------
102 |     mode : str {'train', 'validation', 'test'}
103 |         Mode from which we want the data augmentation transformations
104 |     image_shape : tuple (height, width)
105 |         Image dimension to reshape
106 |     jittering : tuple (brightness, contrast, saturation, hue)
107 |         Color jittering parameters
108 |     crop_train_borders : tuple (left, top, right, down)
109 |         Border for cropping
110 |     crop_eval_borders : tuple (left, top, right, down)
111 |         Border for cropping
112 | 
113 |     Returns
114 |     -------
115 |         XXX_transform: Partial function
116 |             Data augmentation transformation for that mode
117 |     """
118 |     if mode == 'train':
119 |         return partial(train_transforms,
120 |                        image_shape=image_shape,
121 |                        jittering=jittering,
122 |                        crop_train_borders=crop_train_borders)
123 |     elif mode == 'validation':
124 |         return partial(validation_transforms,
125 |                        crop_eval_borders=crop_eval_borders,
126 |                        image_shape=image_shape)
127 |     elif mode == 'test':
128 |         return partial(test_transforms,
129 |                        crop_eval_borders=crop_eval_borders,
130 |                        image_shape=image_shape)
131 |     else:
132 |         raise ValueError('Unknown mode {}'.format(mode))
133 | 
134 | ########################################################################################################################
135 | 
136 | 


--------------------------------------------------------------------------------
/packnet_sfm/geometry/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/geometry/__init__.py


--------------------------------------------------------------------------------
/packnet_sfm/geometry/camera_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn.functional as funct
 5 | 
 6 | ########################################################################################################################
 7 | 
 8 | 
 9 | def construct_K(fx, fy, cx, cy, dtype=torch.float, device=None):
10 |     """Construct a [3,3] camera intrinsics from pinhole parameters"""
11 |     return torch.tensor([[fx,  0, cx],
12 |                          [0, fy, cy],
13 |                          [0,  0,  1]], dtype=dtype, device=device)
14 | 
15 | 
16 | def scale_intrinsics(K, x_scale, y_scale):
17 |     """Scale intrinsics given x_scale and y_scale factors"""
18 |     K[..., 0, 0] *= x_scale
19 |     K[..., 1, 1] *= y_scale
20 |     K[..., 0, 2] = (K[..., 0, 2] + 0.5) * x_scale - 0.5
21 |     K[..., 1, 2] = (K[..., 1, 2] + 0.5) * y_scale - 0.5
22 |     return K
23 | 
24 | ########################################################################################################################
25 | 
26 | 
27 | def view_synthesis(ref_image, depth, ref_cam, cam,
28 |                    mode='bilinear', padding_mode='zeros'):
29 |     """
30 |     Synthesize an image from another plus a depth map.
31 | 
32 |     Parameters
33 |     ----------
34 |     ref_image : torch.Tensor [B,3,H,W]
35 |         Reference image to be warped
36 |     depth : torch.Tensor [B,1,H,W]
37 |         Depth map from the original image
38 |     ref_cam : Camera
39 |         Camera class for the reference image
40 |     cam : Camera
41 |         Camera class for the original image
42 |     mode : str
43 |         Interpolation mode
44 |     padding_mode : str
45 |         Padding mode for interpolation
46 | 
47 |     Returns
48 |     -------
49 |     ref_warped : torch.Tensor [B,3,H,W]
50 |         Warped reference image in the original frame of reference
51 |     """
52 |     assert depth.size(1) == 1
53 |     # Reconstruct world points from target_camera
54 |     world_points = cam.reconstruct(depth, frame='w')
55 |     # Project world points onto reference camera
56 |     ref_coords = ref_cam.project(world_points, frame='w')
57 |     # View-synthesis given the projected reference points
58 |     return funct.grid_sample(ref_image, ref_coords, mode=mode,
59 |                              padding_mode=padding_mode, align_corners=True)
60 | 
61 | ########################################################################################################################
62 | 
63 | 
64 | def view_synthesis_generic(ref_image, depth, ref_cam, cam,
65 |                            mode='bilinear', padding_mode='zeros', progress=0.0):
66 |     """
67 |     Synthesize an image from another plus a depth map.
68 | 
69 |     Parameters
70 |     ----------
71 |     ref_image : torch.Tensor [B,3,H,W]
72 |         Reference image to be warped
73 |     depth : torch.Tensor [B,1,H,W]
74 |         Depth map from the original image
75 |     ref_cam : Camera
76 |         Camera class for the reference image
77 |     cam : Camera
78 |         Camera class for the original image
79 |     mode : str
80 |         Interpolation mode
81 |     padding_mode : str
82 |         Padding mode for interpolation
83 | 
84 |     Returns
85 |     -------
86 |     ref_warped : torch.Tensor [B,3,H,W]
87 |         Warped reference image in the original frame of reference
88 |     """
89 |     assert depth.size(1) == 1
90 |     # Reconstruct world points from target_camera
91 |     world_points = cam.reconstruct(depth, frame='w')
92 |     # Project world points onto reference camera
93 |     ref_coords = ref_cam.project(world_points, progress=progress, frame='w')
94 |     # View-synthesis given the projected reference points
95 |     return funct.grid_sample(ref_image, ref_coords, mode=mode,
96 |                              padding_mode=padding_mode, align_corners=True)
97 | 
98 | ########################################################################################################################
99 | 


--------------------------------------------------------------------------------
/packnet_sfm/geometry/pose.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | from packnet_sfm.geometry.pose_utils import invert_pose, pose_vec2mat
  5 | 
  6 | ########################################################################################################################
  7 | 
  8 | class Pose:
  9 |     """
 10 |     Pose class, that encapsulates a [4,4] transformation matrix
 11 |     for a specific reference frame
 12 |     """
 13 |     def __init__(self, mat):
 14 |         """
 15 |         Initializes a Pose object.
 16 | 
 17 |         Parameters
 18 |         ----------
 19 |         mat : torch.Tensor [B,4,4]
 20 |             Transformation matrix
 21 |         """
 22 |         assert tuple(mat.shape[-2:]) == (4, 4)
 23 |         if mat.dim() == 2:
 24 |             mat = mat.unsqueeze(0)
 25 |         assert mat.dim() == 3
 26 |         self.mat = mat
 27 | 
 28 |     def __len__(self):
 29 |         """Batch size of the transformation matrix"""
 30 |         return len(self.mat)
 31 | 
 32 | ########################################################################################################################
 33 | 
 34 |     @classmethod
 35 |     def identity(cls, N=1, device=None, dtype=torch.float):
 36 |         """Initializes as a [4,4] identity matrix"""
 37 |         return cls(torch.eye(4, device=device, dtype=dtype).repeat([N,1,1]))
 38 | 
 39 |     @classmethod
 40 |     def from_vec(cls, vec, mode):
 41 |         """Initializes from a [B,6] batch vector"""
 42 |         mat = pose_vec2mat(vec, mode)  # [B,3,4]
 43 |         pose = torch.eye(4, device=vec.device, dtype=vec.dtype).repeat([len(vec), 1, 1])
 44 |         pose[:, :3, :3] = mat[:, :3, :3]
 45 |         pose[:, :3, -1] = mat[:, :3, -1]
 46 |         return cls(pose)
 47 | 
 48 | ########################################################################################################################
 49 | 
 50 |     @property
 51 |     def shape(self):
 52 |         """Returns the transformation matrix shape"""
 53 |         return self.mat.shape
 54 | 
 55 |     def item(self):
 56 |         """Returns the transformation matrix"""
 57 |         return self.mat
 58 | 
 59 |     def repeat(self, *args, **kwargs):
 60 |         """Repeats the transformation matrix multiple times"""
 61 |         self.mat = self.mat.repeat(*args, **kwargs)
 62 |         return self
 63 | 
 64 |     def inverse(self):
 65 |         """Returns a new Pose that is the inverse of this one"""
 66 |         return Pose(invert_pose(self.mat))
 67 | 
 68 |     def to(self, *args, **kwargs):
 69 |         """Moves object to a specific device"""
 70 |         self.mat = self.mat.to(*args, **kwargs)
 71 |         return self
 72 | 
 73 | ########################################################################################################################
 74 | 
 75 |     def transform_pose(self, pose):
 76 |         """Creates a new pose object that compounds this and another one (self * pose)"""
 77 |         assert tuple(pose.shape[-2:]) == (4, 4)
 78 |         return Pose(self.mat.bmm(pose.item()))
 79 | 
 80 |     def transform_points(self, points):
 81 |         """Transforms 3D points using this object"""
 82 |         assert points.shape[1] == 3
 83 |         B, _, H, W = points.shape
 84 |         out = self.mat[:,:3,:3].bmm(points.view(B, 3, -1)) + \
 85 |               self.mat[:,:3,-1].unsqueeze(-1)
 86 |         return out.view(B, 3, H, W)
 87 | 
 88 |     def __matmul__(self, other):
 89 |         """Transforms the input (Pose or 3D points) using this object"""
 90 |         if isinstance(other, Pose):
 91 |             return self.transform_pose(other)
 92 |         elif isinstance(other, torch.Tensor):
 93 |             if other.shape[1] == 3 and other.dim() > 2:
 94 |                 assert other.dim() == 3 or other.dim() == 4
 95 |                 return self.transform_points(other)
 96 |             else:
 97 |                 raise ValueError('Unknown tensor dimensions {}'.format(other.shape))
 98 |         else:
 99 |             raise NotImplementedError()
100 | 
101 | ########################################################################################################################
102 | 


--------------------------------------------------------------------------------
/packnet_sfm/geometry/pose_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | ########################################################################################################################
 7 | 
 8 | def euler2mat(angle):
 9 |     """Convert euler angles to rotation matrix"""
10 |     B = angle.size(0)
11 |     x, y, z = angle[:, 0], angle[:, 1], angle[:, 2]
12 | 
13 |     cosz = torch.cos(z)
14 |     sinz = torch.sin(z)
15 | 
16 |     zeros = z.detach() * 0
17 |     ones = zeros.detach() + 1
18 |     zmat = torch.stack([cosz, -sinz, zeros,
19 |                         sinz, cosz, zeros,
20 |                         zeros, zeros, ones], dim=1).view(B, 3, 3)
21 | 
22 |     cosy = torch.cos(y)
23 |     siny = torch.sin(y)
24 | 
25 |     ymat = torch.stack([cosy, zeros, siny,
26 |                         zeros, ones, zeros,
27 |                         -siny, zeros, cosy], dim=1).view(B, 3, 3)
28 | 
29 |     cosx = torch.cos(x)
30 |     sinx = torch.sin(x)
31 | 
32 |     xmat = torch.stack([ones, zeros, zeros,
33 |                         zeros, cosx, -sinx,
34 |                         zeros, sinx, cosx], dim=1).view(B, 3, 3)
35 | 
36 |     rot_mat = xmat.bmm(ymat).bmm(zmat)
37 |     return rot_mat
38 | 
39 | ########################################################################################################################
40 | 
41 | def pose_vec2mat(vec, mode='euler'):
42 |     """Convert Euler parameters to transformation matrix."""
43 |     if mode is None:
44 |         return vec
45 |     trans, rot = vec[:, :3].unsqueeze(-1), vec[:, 3:]
46 |     if mode == 'euler':
47 |         rot_mat = euler2mat(rot)
48 |     else:
49 |         raise ValueError('Rotation mode not supported {}'.format(mode))
50 |     mat = torch.cat([rot_mat, trans], dim=2)  # [B,3,4]
51 |     return mat
52 | 
53 | ########################################################################################################################
54 | 
55 | def invert_pose(T):
56 |     """Inverts a [B,4,4] torch.tensor pose"""
57 |     Tinv = torch.eye(4, device=T.device, dtype=T.dtype).repeat([len(T), 1, 1])
58 |     Tinv[:, :3, :3] = torch.transpose(T[:, :3, :3], -2, -1)
59 |     Tinv[:, :3, -1] = torch.bmm(-1. * Tinv[:, :3, :3], T[:, :3, -1].unsqueeze(-1)).squeeze(-1)
60 |     return Tinv
61 | 
62 | ########################################################################################################################
63 | 
64 | def invert_pose_numpy(T):
65 |     """Inverts a [4,4] np.array pose"""
66 |     Tinv = np.copy(T)
67 |     R, t = Tinv[:3, :3], Tinv[:3, 3]
68 |     Tinv[:3, :3], Tinv[:3, 3] = R.T, - np.matmul(R.T, t)
69 |     return Tinv
70 | 
71 | ########################################################################################################################
72 | 


--------------------------------------------------------------------------------
/packnet_sfm/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from packnet_sfm.loggers.wandb_logger import WandbLogger
2 | 
3 | __all__ = ["WandbLogger"]


--------------------------------------------------------------------------------
/packnet_sfm/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/losses/__init__.py


--------------------------------------------------------------------------------
/packnet_sfm/losses/loss_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import numpy as np
 4 | import torch.nn as nn
 5 | from packnet_sfm.utils.types import is_list
 6 | 
 7 | ########################################################################################################################
 8 | 
 9 | class ProgressiveScaling:
10 |     """
11 |     Helper class to manage progressive scaling.
12 |     After a certain training progress percentage, decrease the number of scales by 1.
13 | 
14 |     Parameters
15 |     ----------
16 |     progressive_scaling : float
17 |         Training progress percentage where the number of scales is decreased
18 |     num_scales : int
19 |         Initial number of scales
20 |     """
21 |     def __init__(self, progressive_scaling, num_scales=4):
22 |         self.num_scales = num_scales
23 |         # Use it only if bigger than zero (make a list)
24 |         if progressive_scaling > 0.0:
25 |             self.progressive_scaling = np.float32(
26 |                 [progressive_scaling * (i + 1) for i in range(num_scales - 1)] + [1.0])
27 |         # Otherwise, disable it
28 |         else:
29 |             self.progressive_scaling = progressive_scaling
30 |     def __call__(self, progress):
31 |         """
32 |         Call for an update in the number of scales
33 | 
34 |         Parameters
35 |         ----------
36 |         progress : float
37 |             Training progress percentage
38 | 
39 |         Returns
40 |         -------
41 |         num_scales : int
42 |             New number of scales
43 |         """
44 |         if is_list(self.progressive_scaling):
45 |             return int(self.num_scales -
46 |                        np.searchsorted(self.progressive_scaling, progress))
47 |         else:
48 |             return self.num_scales
49 | 
50 | ########################################################################################################################
51 | 
52 | class LossBase(nn.Module):
53 |     """Base class for losses."""
54 |     def __init__(self):
55 |         """Initializes logs and metrics dictionaries"""
56 |         super().__init__()
57 |         self._logs = {}
58 |         self._metrics = {}
59 | 
60 | ########################################################################################################################
61 | 
62 |     @property
63 |     def logs(self):
64 |         """Return logs."""
65 |         return self._logs
66 | 
67 |     @property
68 |     def metrics(self):
69 |         """Return metrics."""
70 |         return self._metrics
71 | 
72 |     def add_metric(self, key, val):
73 |         """Add a new metric to the dictionary and detach it."""
74 |         self._metrics[key] = val.detach()
75 | 
76 | ########################################################################################################################
77 | 


--------------------------------------------------------------------------------
/packnet_sfm/losses/velocity_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from packnet_sfm.utils.image import match_scales
 7 | from packnet_sfm.losses.loss_base import LossBase
 8 | 
 9 | 
10 | class VelocityLoss(LossBase):
11 |     """
12 |     Velocity loss for pose translation.
13 |     """
14 |     def __init__(self, **kwargs):
15 |         super().__init__()
16 | 
17 |     def forward(self, pred_pose, gt_pose_context, **kwargs):
18 |         """
19 |         Calculates velocity loss.
20 | 
21 |         Parameters
22 |         ----------
23 |         pred_pose : list of Pose
24 |             Predicted pose transformation between origin and reference
25 |         gt_pose_context : list of Pose
26 |             Ground-truth pose transformation between origin and reference
27 | 
28 |         Returns
29 |         -------
30 |         losses_and_metrics : dict
31 |             Output dictionary
32 |         """
33 |         pred_trans = [pose.mat[:, :3, -1].norm(dim=-1) for pose in pred_pose]
34 |         gt_trans = [pose[:, :3, -1].norm(dim=-1) for pose in gt_pose_context]
35 |         # Calculate velocity supervision loss
36 |         loss = sum([(pred - gt).abs().mean()
37 |                     for pred, gt in zip(pred_trans, gt_trans)]) / len(gt_trans)
38 |         self.add_metric('velocity_loss', loss)
39 |         return {
40 |             'loss': loss.unsqueeze(0),
41 |             'metrics': self.metrics,
42 |         }
43 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/GenericSelfSupModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | from packnet_sfm.models.GenericSfmModel import GenericSfmModel
  4 | from packnet_sfm.losses.generic_multiview_photometric_loss import GenericMultiViewPhotometricLoss
  5 | from packnet_sfm.models.model_utils import merge_outputs
  6 | import numpy as np
  7 | 
  8 | 
  9 | class GenericSelfSupModel(GenericSfmModel):
 10 |     """
 11 |     Model that inherits a depth and pose network from GenericSfmModel and
 12 |     includes the photometric loss for self-supervised training.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     depth_net : nn.Module
 17 |         Depth network to be used
 18 |     pose_net : nn.Module
 19 |         Pose network to be used
 20 |     kwargs : dict
 21 |         Extra parameters
 22 |     """
 23 | 
 24 |     def __init__(self, depth_net=None, pose_net=None, **kwargs):
 25 |         # Initializes GenericSfmModel
 26 |         super().__init__(depth_net, pose_net, **kwargs)
 27 |         # Initializes the photometric loss
 28 |         self._photometric_loss = GenericMultiViewPhotometricLoss(**kwargs)
 29 | 
 30 |     @property
 31 |     def logs(self):
 32 |         """Return logs."""
 33 |         return {
 34 |             **super().logs,
 35 |             **self._photometric_loss.logs
 36 |         }
 37 | 
 38 |     @property
 39 |     def requires_depth_net(self):
 40 |         return True
 41 | 
 42 |     @property
 43 |     def requires_pose_net(self):
 44 |         return True
 45 | 
 46 |     @property
 47 |     def requires_gt_depth(self):
 48 |         return False
 49 | 
 50 |     @property
 51 |     def requires_gt_pose(self):
 52 |         return False
 53 | 
 54 |     def self_supervised_loss(self, image, ref_images, inv_depths, ray_surface, poses,
 55 |                              intrinsics, return_logs=False, progress=0.0):
 56 |         """
 57 |         Calculates the self-supervised photometric loss.
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         image : torch.Tensor [B,3,H,W]
 62 |             Original image
 63 |         ref_images : list of torch.Tensor [B,3,H,W]
 64 |             Reference images from context
 65 |         inv_depths : torch.Tensor [B,1,H,W]
 66 |             Predicted inverse depth maps from the original image
 67 |         poses : list of Pose
 68 |             List containing predicted poses between original and context images
 69 |         intrinsics : torch.Tensor [B,3,3]
 70 |             Camera intrinsics
 71 |         return_logs : bool
 72 |             True if logs are stored
 73 |         progress :
 74 |             Training progress percentage
 75 | 
 76 |         Returns
 77 |         -------
 78 |         output : dict
 79 |             Dictionary containing a "loss" scalar a "metrics" dictionary
 80 |         """
 81 |         return self._photometric_loss(
 82 |             image, ref_images, inv_depths, ray_surface, intrinsics, intrinsics, poses,
 83 |             return_logs=return_logs, progress=progress)
 84 | 
 85 |     def forward(self, batch, return_logs=True, force_flip=False, progress=0.0):
 86 |         """
 87 |         Processes a batch.
 88 | 
 89 |         Parameters
 90 |         ----------
 91 |         batch : dict
 92 |             Input batch
 93 |         return_logs : boolf
 94 |             True if logs are stored
 95 |         progress :
 96 |             Training progress percentage
 97 | 
 98 |         Returns
 99 |         -------
100 |         output : dict
101 |             Dictionary containing a "loss" scalar and different metrics and predictions
102 |             for logging and downstream usage.
103 |         """
104 |         # Calculate predicted depth and pose output
105 |         output = super().forward(batch, return_logs=return_logs)
106 | 
107 |         if not self.training:
108 |             # If not training, no need for self-supervised loss
109 |             return output
110 |         else:
111 |             # Otherwise, calculate self-supervised loss
112 |             self_sup_output = self.self_supervised_loss(
113 |                 batch['rgb_original'], batch['rgb_context_original'],
114 |                 output['inv_depths'], output['ray_surface'], output['poses'], batch['intrinsics'],
115 |                 return_logs=return_logs, progress=progress)
116 |             # Return loss and metrics
117 |             return {
118 |                 'loss': self_sup_output['loss'],
119 |                 **merge_outputs(output, self_sup_output),
120 |             }
121 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/GenericSfmModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import random
  4 | import torch.nn as nn
  5 | from packnet_sfm.geometry.pose import Pose
  6 | from packnet_sfm.utils.misc import make_list
  7 | from packnet_sfm.models.SfmModel import SfmModel
  8 | import torch.nn.functional as F
  9 | 
 10 | class GenericSfmModel(SfmModel):
 11 |     """
 12 |     Model class encapsulating a pose and depth networks.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     depth_net : nn.Module
 17 |         Depth network to be used
 18 |     pose_net : nn.Module
 19 |         Pose network to be used
 20 |     rotation_mode : str
 21 |         Rotation mode for the pose network
 22 |     flip_lr_prob : float
 23 |         Probability of flipping when using the depth network
 24 |     upsample_depth_maps : bool
 25 |         True if depth map scales are upsampled to highest resolution
 26 |     kwargs : dict
 27 |         Extra parameters
 28 |     """
 29 | 
 30 |     def flip_model(self, model, image, flip):
 31 |         """
 32 |         Flip input image and flip output inverse depth map
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         model : nn.Module
 37 |             Module to be used
 38 |         image : torch.Tensor [B,3,H,W]
 39 |             Input image
 40 |         flip : bool
 41 |             True if the flip is happening
 42 | 
 43 |         Returns
 44 |         -------
 45 |         inv_depths : list of torch.Tensor [B,1,H,W]
 46 |             List of predicted inverse depth maps
 47 |         """
 48 |         if flip:
 49 |             return [flip_lr(inv_depth) for inv_depth in model(flip_lr(image))]
 50 |         else:
 51 |             return model(image)
 52 | 
 53 |     def interpolate_scales(self, images, shape=None, mode='bilinear', align_corners=False):
 54 |         """
 55 |         Interpolate list of images to the same shape
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         images : list of torch.Tensor [B,?,?,?]
 60 |             Images to be interpolated, with different resolutions
 61 |         shape : tuple (H, W)
 62 |             Output shape
 63 |         mode : str
 64 |             Interpolation mode
 65 |         align_corners : bool
 66 |             True if corners will be aligned after interpolation
 67 | 
 68 |         Returns
 69 |         -------
 70 |         images : list of torch.Tensor [B,?,H,W]
 71 |             Interpolated images, with the same resolution
 72 |         """
 73 |         # If no shape is provided, interpolate to highest resolution
 74 |         if shape is None:
 75 |             shape = images[0].shape
 76 |         # Take last two dimensions as shape
 77 |         if len(shape) > 2:
 78 |             shape = shape[-2:]
 79 |         # Interpolate all images
 80 |         return [F.interpolate(image, shape, mode=mode,
 81 |                                   align_corners=align_corners) for image in images]
 82 | 
 83 |     def compute_depth_net(self, image):
 84 |         """Computes inverse depth maps from single images"""
 85 |         # Randomly flip and estimate inverse depth maps
 86 |         inv_depths, raysurf = self.flip_model(self.depth_net, image, False)
 87 |         inv_depths = make_list(inv_depths)
 88 |         # If upsampling depth maps
 89 |         if self.upsample_depth_maps:
 90 |             inv_depths = self.interpolate_scales(
 91 |                 inv_depths, mode='nearest', align_corners=None)
 92 |         # Return inverse depth maps
 93 |         return inv_depths, raysurf
 94 | 
 95 |     def forward(self, batch, return_logs=False):
 96 |         """
 97 |         Processes a batch.
 98 | 
 99 |         Parameters
100 |         ----------
101 |         batch : dict
102 |             Input batch
103 |         return_logs : bool
104 |             True if logs are stored
105 | 
106 |         Returns
107 |         -------
108 |         output : dict
109 |             Dictionary containing predicted inverse depth maps and poses
110 |         """
111 |         #print(logs)
112 |         # Generate inverse depth predictions
113 |         inv_depths, raysurf = self.compute_depth_net(batch['rgb'])
114 |         # Generate pose predictions if available
115 |         pose = None
116 |         if 'rgb_context' in batch and self.pose_net is not None:
117 |             pose = self.compute_poses(batch['rgb'],
118 |             #pose = self.compute_pose_net(batch['rgb'],
119 |                                       batch['rgb_context'])
120 |         # Return output dictionary
121 |         return {
122 |             'inv_depths': inv_depths,
123 |             'poses': pose,
124 |             'ray_surface': raysurf
125 |         }
126 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/SelfSupModel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | from packnet_sfm.models.SfmModel import SfmModel
 4 | from packnet_sfm.losses.multiview_photometric_loss import MultiViewPhotometricLoss
 5 | from packnet_sfm.models.model_utils import merge_outputs
 6 | 
 7 | 
 8 | class SelfSupModel(SfmModel):
 9 |     """
10 |     Model that inherits a depth and pose network from SfmModel and
11 |     includes the photometric loss for self-supervised training.
12 | 
13 |     Parameters
14 |     ----------
15 |     kwargs : dict
16 |         Extra parameters
17 |     """
18 |     def __init__(self, **kwargs):
19 |         # Initializes SfmModel
20 |         super().__init__(**kwargs)
21 |         # Initializes the photometric loss
22 |         self._photometric_loss = MultiViewPhotometricLoss(**kwargs)
23 | 
24 |     @property
25 |     def logs(self):
26 |         """Return logs."""
27 |         return {
28 |             **super().logs,
29 |             **self._photometric_loss.logs
30 |         }
31 | 
32 |     def self_supervised_loss(self, image, ref_images, inv_depths, poses,
33 |                              intrinsics, return_logs=False, progress=0.0):
34 |         """
35 |         Calculates the self-supervised photometric loss.
36 | 
37 |         Parameters
38 |         ----------
39 |         image : torch.Tensor [B,3,H,W]
40 |             Original image
41 |         ref_images : list of torch.Tensor [B,3,H,W]
42 |             Reference images from context
43 |         inv_depths : torch.Tensor [B,1,H,W]
44 |             Predicted inverse depth maps from the original image
45 |         poses : list of Pose
46 |             List containing predicted poses between original and context images
47 |         intrinsics : torch.Tensor [B,3,3]
48 |             Camera intrinsics
49 |         return_logs : bool
50 |             True if logs are stored
51 |         progress :
52 |             Training progress percentage
53 | 
54 |         Returns
55 |         -------
56 |         output : dict
57 |             Dictionary containing a "loss" scalar a "metrics" dictionary
58 |         """
59 |         return self._photometric_loss(
60 |             image, ref_images, inv_depths, intrinsics, intrinsics, poses,
61 |             return_logs=return_logs, progress=progress)
62 | 
63 |     def forward(self, batch, return_logs=False, progress=0.0):
64 |         """
65 |         Processes a batch.
66 | 
67 |         Parameters
68 |         ----------
69 |         batch : dict
70 |             Input batch
71 |         return_logs : bool
72 |             True if logs are stored
73 |         progress :
74 |             Training progress percentage
75 | 
76 |         Returns
77 |         -------
78 |         output : dict
79 |             Dictionary containing a "loss" scalar and different metrics and predictions
80 |             for logging and downstream usage.
81 |         """
82 |         # Calculate predicted depth and pose output
83 |         output = super().forward(batch, return_logs=return_logs)
84 |         if not self.training:
85 |             # If not training, no need for self-supervised loss
86 |             return output
87 |         else:
88 |             # Otherwise, calculate self-supervised loss
89 |             self_sup_output = self.self_supervised_loss(
90 |                 batch['rgb_original'], batch['rgb_context_original'],
91 |                 output['inv_depths'], output['poses'], batch['intrinsics'],
92 |                 return_logs=return_logs, progress=progress)
93 |             # Return loss and metrics
94 |             return {
95 |                 'loss': self_sup_output['loss'],
96 |                 **merge_outputs(output, self_sup_output),
97 |             }
98 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/SemiSupCompletionModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | 
  5 | from packnet_sfm.models.SelfSupModel import SfmModel, SelfSupModel
  6 | from packnet_sfm.losses.supervised_loss import SupervisedLoss
  7 | from packnet_sfm.models.model_utils import merge_outputs
  8 | from packnet_sfm.utils.depth import depth2inv
  9 | 
 10 | 
 11 | class SemiSupCompletionModel(SelfSupModel):
 12 |     """
 13 |     Semi-Supervised model for depth prediction and completion.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     supervised_loss_weight : float
 18 |         Weight for the supervised loss
 19 |     kwargs : dict
 20 |         Extra parameters
 21 |     """
 22 |     def __init__(self, supervised_loss_weight=0.9, weight_rgbd=1.0, **kwargs):
 23 |         # Initializes SelfSupModel
 24 |         super().__init__(**kwargs)
 25 |         # If supervision weight is 0.0, use SelfSupModel directly
 26 |         assert 0. < supervised_loss_weight <= 1., "Model requires (0, 1] supervision"
 27 |         # Store weight and initializes supervised loss
 28 |         self.supervised_loss_weight = supervised_loss_weight
 29 |         self._supervised_loss = SupervisedLoss(**kwargs)
 30 | 
 31 |         # Pose network is only required if there is self-supervision
 32 |         if self.supervised_loss_weight == 1:
 33 |             self._network_requirements.remove('pose_net')
 34 |         # GT depth is only required if there is supervision
 35 |         if self.supervised_loss_weight > 0:
 36 |             self._train_requirements.append('gt_depth')
 37 | 
 38 |         self._input_keys = ['rgb', 'input_depth', 'intrinsics']
 39 | 
 40 |         self.weight_rgbd = weight_rgbd
 41 | 
 42 |     @property
 43 |     def logs(self):
 44 |         """Return logs."""
 45 |         return {
 46 |             **super().logs,
 47 |             **self._supervised_loss.logs
 48 |         }
 49 | 
 50 |     def supervised_loss(self, inv_depths, gt_inv_depths,
 51 |                         return_logs=False, progress=0.0):
 52 |         """
 53 |         Calculates the supervised loss.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         inv_depths : torch.Tensor [B,1,H,W]
 58 |             Predicted inverse depth maps from the original image
 59 |         gt_inv_depths : torch.Tensor [B,1,H,W]
 60 |             Ground-truth inverse depth maps from the original image
 61 |         return_logs : bool
 62 |             True if logs are stored
 63 |         progress :
 64 |             Training progress percentage
 65 | 
 66 |         Returns
 67 |         -------
 68 |         output : dict
 69 |             Dictionary containing a "loss" scalar a "metrics" dictionary
 70 |         """
 71 |         return self._supervised_loss(
 72 |             inv_depths, gt_inv_depths,
 73 |             return_logs=return_logs, progress=progress)
 74 | 
 75 |     def forward(self, batch, return_logs=False, progress=0.0, **kwargs):
 76 |         """
 77 |         Processes a batch.
 78 | 
 79 |         Parameters
 80 |         ----------
 81 |         batch : dict
 82 |             Input batch
 83 |         return_logs : bool
 84 |             True if logs are stored
 85 |         progress :
 86 |             Training progress percentage
 87 | 
 88 |         Returns
 89 |         -------
 90 |         output : dict
 91 |             Dictionary containing a "loss" scalar and different metrics and predictions
 92 |             for logging and downstream usage.
 93 |         """
 94 |         if not self.training:
 95 |             # If not training, no need for self-supervised loss
 96 |             return SfmModel.forward(self, batch, return_logs=return_logs, **kwargs)
 97 |         else:
 98 |             if self.supervised_loss_weight == 1.:
 99 |                 # If no self-supervision, no need to calculate loss
100 |                 self_sup_output = SfmModel.forward(self, batch, return_logs=return_logs, **kwargs)
101 |                 loss = torch.tensor([0.]).type_as(batch['rgb'])
102 |             else:
103 |                 # Otherwise, calculate and weight self-supervised loss
104 |                 self_sup_output = SelfSupModel.forward(
105 |                     self, batch, return_logs=return_logs, progress=progress, **kwargs)
106 |                 loss = (1.0 - self.supervised_loss_weight) * self_sup_output['loss']
107 |             # Calculate and weight supervised loss
108 |             sup_output = self.supervised_loss(
109 |                 self_sup_output['inv_depths'], depth2inv(batch['depth']),
110 |                 return_logs=return_logs, progress=progress)
111 |             loss += self.supervised_loss_weight * sup_output['loss']
112 |             if 'inv_depths_rgbd' in self_sup_output:
113 |                 sup_output2 = self.supervised_loss(
114 |                     self_sup_output['inv_depths_rgbd'], depth2inv(batch['depth']),
115 |                     return_logs=return_logs, progress=progress)
116 |                 loss += self.weight_rgbd * self.supervised_loss_weight * sup_output2['loss']
117 |                 if 'depth_loss' in self_sup_output:
118 |                     loss += self_sup_output['depth_loss']
119 |             # Merge and return outputs
120 |             return {
121 |                 'loss': loss,
122 |                 **merge_outputs(self_sup_output, sup_output),
123 |             }
124 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/SemiSupModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | 
  5 | from packnet_sfm.models.SelfSupModel import SfmModel, SelfSupModel
  6 | from packnet_sfm.losses.supervised_loss import SupervisedLoss
  7 | from packnet_sfm.models.model_utils import merge_outputs
  8 | from packnet_sfm.utils.depth import depth2inv
  9 | 
 10 | 
 11 | class SemiSupModel(SelfSupModel):
 12 |     """
 13 |     Model that inherits a depth and pose networks, plus the self-supervised loss from
 14 |     SelfSupModel and includes a supervised loss for semi-supervision.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     supervised_loss_weight : float
 19 |         Weight for the supervised loss
 20 |     kwargs : dict
 21 |         Extra parameters
 22 |     """
 23 |     def __init__(self, supervised_loss_weight=0.9, **kwargs):
 24 |         # Initializes SelfSupModel
 25 |         super().__init__(**kwargs)
 26 |         # If supervision weight is 0.0, use SelfSupModel directly
 27 |         assert 0. < supervised_loss_weight <= 1., "Model requires (0, 1] supervision"
 28 |         # Store weight and initializes supervised loss
 29 |         self.supervised_loss_weight = supervised_loss_weight
 30 |         self._supervised_loss = SupervisedLoss(**kwargs)
 31 | 
 32 |         # Pose network is only required if there is self-supervision
 33 |         if self.supervised_loss_weight == 1:
 34 |             self._network_requirements.remove('pose_net')
 35 |         # GT depth is only required if there is supervision
 36 |         if self.supervised_loss_weight > 0:
 37 |             self._train_requirements.append('gt_depth')
 38 | 
 39 |     @property
 40 |     def logs(self):
 41 |         """Return logs."""
 42 |         return {
 43 |             **super().logs,
 44 |             **self._supervised_loss.logs
 45 |         }
 46 | 
 47 |     def supervised_loss(self, inv_depths, gt_inv_depths,
 48 |                         return_logs=False, progress=0.0):
 49 |         """
 50 |         Calculates the supervised loss.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         inv_depths : torch.Tensor [B,1,H,W]
 55 |             Predicted inverse depth maps from the original image
 56 |         gt_inv_depths : torch.Tensor [B,1,H,W]
 57 |             Ground-truth inverse depth maps from the original image
 58 |         return_logs : bool
 59 |             True if logs are stored
 60 |         progress :
 61 |             Training progress percentage
 62 | 
 63 |         Returns
 64 |         -------
 65 |         output : dict
 66 |             Dictionary containing a "loss" scalar a "metrics" dictionary
 67 |         """
 68 |         return self._supervised_loss(
 69 |             inv_depths, gt_inv_depths,
 70 |             return_logs=return_logs, progress=progress)
 71 | 
 72 |     def forward(self, batch, return_logs=False, progress=0.0):
 73 |         """
 74 |         Processes a batch.
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         batch : dict
 79 |             Input batch
 80 |         return_logs : bool
 81 |             True if logs are stored
 82 |         progress :
 83 |             Training progress percentage
 84 | 
 85 |         Returns
 86 |         -------
 87 |         output : dict
 88 |             Dictionary containing a "loss" scalar and different metrics and predictions
 89 |             for logging and downstream usage.
 90 |         """
 91 |         if not self.training:
 92 |             # If not training, no need for self-supervised loss
 93 |             return SfmModel.forward(self, batch)
 94 |         else:
 95 |             if self.supervised_loss_weight == 1.:
 96 |                 # If no self-supervision, no need to calculate loss
 97 |                 self_sup_output = SfmModel.forward(self, batch)
 98 |                 loss = torch.tensor([0.]).type_as(batch['rgb'])
 99 |             else:
100 |                 # Otherwise, calculate and weight self-supervised loss
101 |                 self_sup_output = SelfSupModel.forward(self, batch)
102 |                 loss = (1.0 - self.supervised_loss_weight) * self_sup_output['loss']
103 |             # Calculate and weight supervised loss
104 |             sup_output = self.supervised_loss(
105 |                 self_sup_output['inv_depths'], depth2inv(batch['depth']),
106 |                 return_logs=return_logs, progress=progress)
107 |             loss += self.supervised_loss_weight * sup_output['loss']
108 |             # Merge and return outputs
109 |             return {
110 |                 'loss': loss,
111 |                 **merge_outputs(self_sup_output, sup_output),
112 |             }
113 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/SfmModel.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import random
  4 | 
  5 | from packnet_sfm.geometry.pose import Pose
  6 | from packnet_sfm.models.base_model import BaseModel
  7 | from packnet_sfm.models.model_utils import flip_batch_input, flip_output, upsample_output
  8 | from packnet_sfm.utils.misc import filter_dict
  9 | 
 10 | 
 11 | class SfmModel(BaseModel):
 12 |     """
 13 |     Model class encapsulating a pose and depth networks.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     depth_net : nn.Module
 18 |         Depth network to be used
 19 |     pose_net : nn.Module
 20 |         Pose network to be used
 21 |     rotation_mode : str
 22 |         Rotation mode for the pose network
 23 |     flip_lr_prob : float
 24 |         Probability of flipping when using the depth network
 25 |     upsample_depth_maps : bool
 26 |         True if depth map scales are upsampled to highest resolution
 27 |     kwargs : dict
 28 |         Extra parameters
 29 |     """
 30 |     def __init__(self, depth_net=None, pose_net=None,
 31 |                  rotation_mode='euler', flip_lr_prob=0.0,
 32 |                  upsample_depth_maps=False, **kwargs):
 33 |         super().__init__()
 34 |         self.depth_net = depth_net
 35 |         self.pose_net = pose_net
 36 |         self.rotation_mode = rotation_mode
 37 |         self.flip_lr_prob = flip_lr_prob
 38 |         self.upsample_depth_maps = upsample_depth_maps
 39 | 
 40 |         self._network_requirements = [
 41 |             'depth_net',
 42 |             'pose_net',
 43 |         ]
 44 | 
 45 |     def add_depth_net(self, depth_net):
 46 |         """Add a depth network to the model"""
 47 |         self.depth_net = depth_net
 48 | 
 49 |     def add_pose_net(self, pose_net):
 50 |         """Add a pose network to the model"""
 51 |         self.pose_net = pose_net
 52 | 
 53 |     def depth_net_flipping(self, batch, flip):
 54 |         """
 55 |         Runs depth net with the option of flipping
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         batch : dict
 60 |             Input batch
 61 |         flip : bool
 62 |             True if the flip is happening
 63 | 
 64 |         Returns
 65 |         -------
 66 |         output : dict
 67 |             Dictionary with depth network output (e.g. 'inv_depths' and 'uncertainty')
 68 |         """
 69 |         # Which keys are being passed to the depth network
 70 |         batch_input = {key: batch[key] for key in filter_dict(batch, self._input_keys)}
 71 |         if flip:
 72 |             # Run depth network with flipped inputs
 73 |             output = self.depth_net(**flip_batch_input(batch_input))
 74 |             # Flip output back if training
 75 |             output = flip_output(output)
 76 |         else:
 77 |             # Run depth network
 78 |             output = self.depth_net(**batch_input)
 79 |         return output
 80 | 
 81 |     def compute_depth_net(self, batch, force_flip=False):
 82 |         """Computes inverse depth maps from single images"""
 83 |         # Randomly flip and estimate inverse depth maps
 84 |         flag_flip_lr = random.random() < self.flip_lr_prob if self.training else force_flip
 85 |         output = self.depth_net_flipping(batch, flag_flip_lr)
 86 |         # If upsampling depth maps at training time
 87 |         if self.training and self.upsample_depth_maps:
 88 |             output = upsample_output(output, mode='nearest', align_corners=None)
 89 |         # Return inverse depth maps
 90 |         return output
 91 | 
 92 |     def compute_pose_net(self, image, contexts):
 93 |         """Compute poses from image and a sequence of context images"""
 94 |         pose_vec = self.pose_net(image, contexts)
 95 |         return [Pose.from_vec(pose_vec[:, i], self.rotation_mode)
 96 |                 for i in range(pose_vec.shape[1])]
 97 | 
 98 |     def forward(self, batch, return_logs=False, force_flip=False):
 99 |         """
100 |         Processes a batch.
101 | 
102 |         Parameters
103 |         ----------
104 |         batch : dict
105 |             Input batch
106 |         return_logs : bool
107 |             True if logs are stored
108 |         force_flip : bool
109 |             If true, force batch flipping for inverse depth calculation
110 | 
111 |         Returns
112 |         -------
113 |         output : dict
114 |             Dictionary containing the output of depth and pose networks
115 |         """
116 |         # Generate inverse depth predictions
117 |         depth_output = self.compute_depth_net(batch, force_flip=force_flip)
118 |         # Generate pose predictions if available
119 |         pose_output = None
120 |         if 'rgb_context' in batch and self.pose_net is not None:
121 |             pose_output = self.compute_pose_net(
122 |                 batch['rgb'], batch['rgb_context'])
123 |         # Return output dictionary
124 |         return {
125 |             **depth_output,
126 |             'poses': pose_output,
127 |         }
128 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/VelSupModel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | from packnet_sfm.models.SelfSupModel import SelfSupModel
 4 | from packnet_sfm.losses.velocity_loss import VelocityLoss
 5 | 
 6 | 
 7 | class VelSupModel(SelfSupModel):
 8 |     """
 9 |     Self-supervised model with additional velocity supervision loss.
10 | 
11 |     Parameters
12 |     ----------
13 |     velocity_loss_weight : float
14 |         Weight for velocity supervision
15 |     kwargs : dict
16 |         Extra parameters
17 |     """
18 |     def __init__(self, velocity_loss_weight=0.1, **kwargs):
19 |         # Initializes SelfSupModel
20 |         super().__init__(**kwargs)
21 |         # Stores velocity supervision loss weight
22 |         self._velocity_loss = VelocityLoss(**kwargs)
23 |         self.velocity_loss_weight = velocity_loss_weight
24 | 
25 |         # GT pose is required
26 |         self._train_requirements['gt_pose'] = True
27 | 
28 |     def forward(self, batch, return_logs=False, progress=0.0):
29 |         """
30 |         Processes a batch.
31 | 
32 |         Parameters
33 |         ----------
34 |         batch : dict
35 |             Input batch
36 |         return_logs : bool
37 |             True if logs are stored
38 |         progress :
39 |             Training progress percentage
40 | 
41 |         Returns
42 |         -------
43 |         output : dict
44 |             Dictionary containing a "loss" scalar and different metrics and predictions
45 |             for logging and downstream usage.
46 |         """
47 |         output = super().forward(batch, return_logs, progress)
48 |         if self.training:
49 |             # Update self-supervised loss with velocity supervision
50 |             velocity_loss = self._velocity_loss(output['poses'], batch['pose_context'])
51 |             output['loss'] += self.velocity_loss_weight * velocity_loss['loss']
52 |         return output
53 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Structure-from-Motion (SfM) Models and wrappers
 3 | ===============================================
 4 | 
 5 | - SfmModel is a torch.nn.Module wrapping both a Depth and a Pose network to enable training in a Structure-from-Motion setup (i.e. from videos)
 6 | - SelfSupModel is an SfmModel specialized for self-supervised learning (using videos only)
 7 | - SemiSupModel is an SfmModel specialized for semi-supervised learning (using videos and depth supervision)
 8 | - ModelWrapper is a torch.nn.Module that wraps an SfmModel to enable easy training and eval with a trainer
 9 | - ModelCheckpoint enables saving/restoring state of torch.nn.Module objects
10 | 
11 | """
12 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/base_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class BaseModel(nn.Module):
 7 |     """
 8 |     Base Model class defines APIs for packnet_sfm model wrapper.
 9 | 
10 |     Parameters
11 |     ----------
12 |     kwargs : dict
13 |         Extra parameters
14 |     """
15 |     def __init__(self, **kwargs):
16 |         super().__init__()
17 | 
18 |         self._logs = {}
19 |         self._losses = {}
20 | 
21 |         self._network_requirements = []     # Which networks the model requires
22 |         self._train_requirements = []       # Which GT information the model requires at training time
23 |         self._input_keys = ['rgb']          # Which input keys are provided to the model
24 | 
25 |     def _forward_unimplemented(self, *args):
26 |         pass
27 | 
28 |     @property
29 |     def logs(self):
30 |         """Return logs."""
31 |         return self._logs
32 | 
33 |     @property
34 |     def losses(self):
35 |         """Return metrics."""
36 |         return self._losses
37 | 
38 |     def add_loss(self, key, val):
39 |         """Add a new loss to the dictionary and detaches it."""
40 |         self._losses[key] = val.detach()
41 | 
42 |     @property
43 |     def network_requirements(self):
44 |         """
45 |         Networks required to run the model
46 | 
47 |         Returns
48 |         -------
49 |         requirements : dict
50 |             key : str
51 |                 Attribute name in model object pointing to corresponding network.
52 |             value : str
53 |                 Task Name.
54 |         """
55 |         return self._network_requirements
56 | 
57 |     @property
58 |     def train_requirements(self):
59 |         """
60 |         Information required by the model at training stage
61 | 
62 |         Returns
63 |         -------
64 |         requirements : dict
65 |             gt_depth : bool
66 |                 Whether ground truth depth is required by the model at training time
67 |             gt_pose : bool
68 |                 Whether ground truth pose is required by the model at training time
69 |         """
70 |         return self._train_requirements
71 | 
72 |     def add_net(self, network_module, network_name):
73 |         """Add a network module as an attribute in the model
74 | 
75 |         Parameters
76 |         ----------
77 |         network_module: torch.nn.Module
78 | 
79 |         network_name: str
80 |             name of the network as well as the attribute in the network.
81 |         """
82 |         assert network_name in self._network_requirements, "Network module not required!"
83 |         setattr(self, network_name, network_module)
84 | 
85 |     def forward(self, batch, return_logs=False, **kwargs):
86 |         """
87 |         Processes a batch.
88 | 
89 |         Parameters
90 |         ----------
91 |         batch : dict
92 |             Input batch
93 |         return_logs : bool
94 |             True if logs are stored
95 |         """
96 |         raise NotImplementedError("Please implement forward function in your own subclass model.")
97 | 


--------------------------------------------------------------------------------
/packnet_sfm/models/model_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | from packnet_sfm.utils.image import flip_lr, interpolate_scales
  4 | from packnet_sfm.utils.misc import filter_dict
  5 | from packnet_sfm.utils.types import is_tensor, is_list, is_numpy
  6 | 
  7 | 
  8 | def flip(tensor, flip_fn):
  9 |     """
 10 |     Flip tensors or list of tensors based on a function
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     tensor : torch.Tensor or list[torch.Tensor] or list[list[torch.Tensor]]
 15 |         Tensor to be flipped
 16 |     flip_fn : Function
 17 |         Flip function
 18 | 
 19 |     Returns
 20 |     -------
 21 |     tensor : torch.Tensor or list[torch.Tensor] or list[list[torch.Tensor]]
 22 |         Flipped tensor or list of tensors
 23 |     """
 24 |     if not is_list(tensor):
 25 |         return flip_fn(tensor)
 26 |     else:
 27 |         if not is_list(tensor[0]):
 28 |             return [flip_fn(val) for val in tensor]
 29 |         else:
 30 |             return [[flip_fn(v) for v in val] for val in tensor]
 31 | 
 32 | 
 33 | def merge_outputs(*outputs):
 34 |     """
 35 |     Merges model outputs for logging
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     outputs : tuple of dict
 40 |         Outputs to be merged
 41 | 
 42 |     Returns
 43 |     -------
 44 |     output : dict
 45 |         Dictionary with a "metrics" key containing a dictionary with various metrics and
 46 |         all other keys that are not "loss" (it is handled differently).
 47 |     """
 48 |     ignore = ['loss'] # Keys to ignore
 49 |     combine = ['metrics'] # Keys to combine
 50 |     merge = {key: {} for key in combine}
 51 |     for output in outputs:
 52 |         # Iterate over all keys
 53 |         for key, val in output.items():
 54 |             # Combine these keys
 55 |             if key in combine:
 56 |                 for sub_key, sub_val in output[key].items():
 57 |                     assert sub_key not in merge[key].keys(), \
 58 |                         'Combining duplicated key {} to {}'.format(sub_key, key)
 59 |                     merge[key][sub_key] = sub_val
 60 |             # Ignore these keys
 61 |             elif key not in ignore:
 62 |                 assert key not in merge.keys(), \
 63 |                     'Adding duplicated key {}'.format(key)
 64 |                 merge[key] = val
 65 |     return merge
 66 | 
 67 | 
 68 | def stack_batch(batch):
 69 |     """
 70 |     Stack multi-camera batches (B,N,C,H,W becomes BN,C,H,W)
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     batch : dict
 75 |         Batch
 76 | 
 77 |     Returns
 78 |     -------
 79 |     batch : dict
 80 |         Stacked batch
 81 |     """
 82 |     # If there is multi-camera information
 83 |     if len(batch['rgb'].shape) == 5:
 84 |         assert batch['rgb'].shape[0] == 1, 'Only batch size 1 is supported for multi-cameras'
 85 |         # Loop over all keys
 86 |         for key in batch.keys():
 87 |             # If list, stack every item
 88 |             if is_list(batch[key]):
 89 |                 if is_tensor(batch[key][0]) or is_numpy(batch[key][0]):
 90 |                     batch[key] = [sample[0] for sample in batch[key]]
 91 |             # Else, stack single item
 92 |             else:
 93 |                 batch[key] = batch[key][0]
 94 |     return batch
 95 | 
 96 | 
 97 | def flip_batch_input(batch):
 98 |     """
 99 |     Flip batch input information (copies data first)
100 | 
101 |     Parameters
102 |     ----------
103 |     batch : dict
104 |         Batch information
105 | 
106 |     Returns
107 |     -------
108 |     batch : dict
109 |         Flipped batch
110 |     """
111 |     # Flip tensors
112 |     for key in filter_dict(batch, [
113 |         'rgb', 'rgb_context',
114 |         'input_depth', 'input_depth_context',
115 |     ]):
116 |         batch[key] = flip(batch[key], flip_lr)
117 |     # Flip intrinsics
118 |     for key in filter_dict(batch, [
119 |         'intrinsics'
120 |     ]):
121 |         batch[key] = batch[key].clone()
122 |         batch[key][:, 0, 2] = batch['rgb'].shape[3] - batch[key][:, 0, 2]
123 |     # Return flipped batch
124 |     return batch
125 | 
126 | 
127 | def flip_output(output):
128 |     """
129 |     Flip output information
130 | 
131 |     Parameters
132 |     ----------
133 |     output : dict
134 |         Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty')
135 | 
136 |     Returns
137 |     -------
138 |     output : dict
139 |         Flipped output
140 |     """
141 |     # Flip tensors
142 |     for key in filter_dict(output, [
143 |         'uncertainty', 'logits_semantic', 'ord_probability',
144 |         'inv_depths', 'inv_depths_context', 'inv_depths1', 'inv_depths2',
145 |         'pred_depth', 'pred_depth_context', 'pred_depth1', 'pred_depth2',
146 |         'pred_inv_depth', 'pred_inv_depth_context', 'pred_inv_depth1', 'pred_inv_depth2',
147 |     ]):
148 |         output[key] = flip(output[key], flip_lr)
149 |     return output
150 | 
151 | 
152 | def upsample_output(output, mode='nearest', align_corners=None):
153 |     """
154 |     Upsample multi-scale outputs to full resolution.
155 | 
156 |     Parameters
157 |     ----------
158 |     output : dict
159 |         Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty')
160 |     mode : str
161 |         Which interpolation mode is used
162 |     align_corners: bool or None
163 |         Whether corners will be aligned during interpolation
164 | 
165 |     Returns
166 |     -------
167 |     output : dict
168 |         Upsampled output
169 |     """
170 |     for key in filter_dict(output, [
171 |         'inv_depths', 'uncertainty'
172 |     ]):
173 |         output[key] = interpolate_scales(
174 |             output[key], mode=mode, align_corners=align_corners)
175 |     for key in filter_dict(output, [
176 |         'inv_depths_context'
177 |     ]):
178 |         output[key] = [interpolate_scales(
179 |             val, mode=mode, align_corners=align_corners) for val in output[key]]
180 |     return output


--------------------------------------------------------------------------------
/packnet_sfm/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/networks/__init__.py


--------------------------------------------------------------------------------
/packnet_sfm/networks/depth/DepthResNet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch.nn as nn
 4 | from functools import partial
 5 | 
 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder
 7 | from packnet_sfm.networks.layers.resnet.depth_decoder import DepthDecoder
 8 | from packnet_sfm.networks.layers.resnet.layers import disp_to_depth
 9 | 
10 | ########################################################################################################################
11 | 
12 | class DepthResNet(nn.Module):
13 |     """
14 |     Inverse depth network based on the ResNet architecture.
15 | 
16 |     Parameters
17 |     ----------
18 |     version : str
19 |         Has a XY format, where:
20 |         X is the number of residual layers [18, 34, 50] and
21 |         Y is an optional ImageNet pretrained flag added by the "pt" suffix
22 |         Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch
23 |     kwargs : dict
24 |         Extra parameters
25 |     """
26 |     def __init__(self, version=None, **kwargs):
27 |         super().__init__()
28 |         assert version is not None, "DispResNet needs a version"
29 | 
30 |         num_layers = int(version[:2])       # First two characters are the number of layers
31 |         pretrained = version[2:] == 'pt'    # If the last characters are "pt", use ImageNet pretraining
32 |         assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers)
33 | 
34 |         self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained)
35 |         self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc)
36 |         self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0)
37 | 
38 |     def forward(self, rgb):
39 |         """
40 |         Runs the network and returns inverse depth maps
41 |         (4 scales if training and 1 if not).
42 |         """
43 |         x = self.encoder(rgb)
44 |         x = self.decoder(x)
45 |         disps = [x[('disp', i)] for i in range(4)]
46 | 
47 |         if self.training:
48 |             return {
49 |                 'inv_depths': [self.scale_inv_depth(d)[0] for d in disps],
50 |             }
51 |         else:
52 |             return {
53 |                 'inv_depths': self.scale_inv_depth(disps[0])[0],
54 |             }
55 | 
56 | ########################################################################################################################
57 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/depth/RaySurfaceResNet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch.nn as nn
 4 | from functools import partial
 5 | 
 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder
 7 | from packnet_sfm.networks.layers.resnet.depth_decoder import DepthDecoder
 8 | from packnet_sfm.networks.layers.resnet.raysurface_decoder import RaySurfaceDecoder
 9 | from packnet_sfm.networks.layers.resnet.layers import disp_to_depth
10 | 
11 | ########################################################################################################################
12 | 
13 | class RaySurfaceResNet(nn.Module):
14 |     """
15 |     Inverse depth network based on the ResNet architecture.
16 | 
17 |     Parameters
18 |     ----------
19 |     version : str
20 |         Has a XY format, where:
21 |         X is the number of residual layers [18, 34, 50] and
22 |         Y is an optional ImageNet pretrained flag added by the "pt" suffix
23 |         Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch
24 |     kwargs : dict
25 |         Extra parameters
26 |     """
27 | 
28 |     """
29 |     Ray surface network; decodes to a ray surface and a depth map.
30 |         X (int): Number of residual layers [18, 34, 50]
31 |         Y (str): If Y == pt, use a pretrained model
32 |     """
33 | 
34 |     def __init__(self, version=None, **kwargs):
35 |         super().__init__()
36 |         assert version is not None, "RaySurfaceResNet needs a version"
37 | 
38 |         num_layers = int(version[:2])       # First two characters are the number of layers
39 |         pretrained = version[2:] == 'pt'    # If the last characters are "pt", use ImageNet pretraining
40 |         assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers)
41 | 
42 |         self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained)
43 |         self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc)
44 |         self.ray_surf = RaySurfaceDecoder(num_ch_enc=self.encoder.num_ch_enc)
45 | 
46 |         self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0)
47 | 
48 |     def forward(self, rgb):
49 |         """
50 |         Runs the network and returns inverse depth maps and ray surface
51 |         (4 scales if training and 1 if not).
52 |         """
53 |         x = self.encoder(rgb)
54 |         r = self.ray_surf(x)
55 |         x = self.decoder(x)
56 |         disps = [x[('disp', i)] for i in range(4)]
57 | 
58 |         if self.training:
59 |             return [self.scale_inv_depth(d)[0] for d in disps], r
60 |         else:
61 |             return self.scale_inv_depth(disps[0])[0], r
62 | 
63 | ########################################################################################################################
64 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/minkowski.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | 
  4 | import MinkowskiEngine as ME
  5 | import torch
  6 | 
  7 | 
  8 | def sparsify_features(x):
  9 |     """
 10 |     Sparsify features
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     x : Dense feature map [B,C,H,W]
 15 | 
 16 |     Returns
 17 |     -------
 18 |     Sparse feature map (features only in valid coordinates)
 19 |     """
 20 |     b, c, h, w = x.shape
 21 | 
 22 |     u = torch.arange(w).reshape(1, w).repeat([h, 1])
 23 |     v = torch.arange(h).reshape(h, 1).repeat([1, w])
 24 |     uv = torch.stack([v, u], 2).reshape(-1, 2)
 25 | 
 26 |     coords = [uv] * b
 27 |     feats = [feats.permute(1, 2, 0).reshape(-1, c) for feats in x]
 28 |     coords, feats = ME.utils.sparse_collate(coords=coords, feats=feats)
 29 |     return ME.SparseTensor(coordinates=coords, features=feats, device=x.device)
 30 | 
 31 | 
 32 | def sparsify_depth(x):
 33 |     """
 34 |     Sparsify depth map
 35 | 
 36 |     Parameters
 37 |     ----------
 38 |     x : Dense depth map [B,1,H,W]
 39 | 
 40 |     Returns
 41 |     -------
 42 |     Sparse depth map (range values only in valid pixels)
 43 |     """
 44 |     b, c, h, w = x.shape
 45 | 
 46 |     u = torch.arange(w, device=x.device).reshape(1, w).repeat([h, 1])
 47 |     v = torch.arange(h, device=x.device).reshape(h, 1).repeat([1, w])
 48 |     uv = torch.stack([v, u], 2)
 49 | 
 50 |     idxs = [(d > 0)[0] for d in x]
 51 | 
 52 |     coords = [uv[idx] for idx in idxs]
 53 |     feats = [feats.permute(1, 2, 0)[idx] for idx, feats in zip(idxs, x)]
 54 |     coords, feats = ME.utils.sparse_collate(coords=coords, feats=feats)
 55 |     return ME.SparseTensor(coordinates=coords, features=feats, device=x.device)
 56 | 
 57 | 
 58 | def densify_features(x, shape):
 59 |     """
 60 |     Densify features from a sparse tensor
 61 | 
 62 |     Parameters
 63 |     ----------
 64 |     x : Sparse tensor
 65 |     shape : Dense shape [B,C,H,W]
 66 | 
 67 |     Returns
 68 |     -------
 69 |     Dense tensor containing sparse information
 70 |     """
 71 |     stride = x.tensor_stride
 72 |     coords, feats = x.C.long(), x.F
 73 |     shape = (shape[0], shape[2] // stride[0], shape[3] // stride[1], feats.shape[1])
 74 |     dense = torch.zeros(shape, device=x.device)
 75 |     dense[coords[:, 0],
 76 |           coords[:, 1] // stride[0],
 77 |           coords[:, 2] // stride[1]] = feats
 78 |     return dense.permute(0, 3, 1, 2).contiguous()
 79 | 
 80 | 
 81 | def densify_add_features_unc(x, s, u, shape):
 82 |     """
 83 |     Densify and add features considering uncertainty
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     x : Dense tensor [B,C,H,W]
 88 |     s : Sparse tensor
 89 |     u : Sparse tensor with uncertainty
 90 |     shape : Dense tensor shape
 91 | 
 92 |     Returns
 93 |     -------
 94 |     Densified sparse tensor with added uncertainty
 95 |     """
 96 |     stride = s.tensor_stride
 97 |     coords, feats = s.C.long(), s.F
 98 |     shape = (shape[0], shape[2] // stride[0], shape[3] // stride[1], feats.shape[1])
 99 | 
100 |     dense = torch.zeros(shape, device=s.device)
101 |     dense[coords[:, -1],
102 |           coords[:, 0] // stride[0],
103 |           coords[:, 1] // stride[1]] = feats
104 |     dense = dense.permute(0, 3, 1, 2).contiguous()
105 | 
106 |     mult = torch.ones(shape, device=s.device)
107 |     mult[coords[:, -1],
108 |          coords[:, 0] // stride[0],
109 |          coords[:, 1] // stride[1]] = 1.0 - u.F
110 |     mult = mult.permute(0, 3, 1, 2).contiguous()
111 | 
112 |     return x * mult + dense
113 | 
114 | 
115 | def map_add_features(x, s):
116 |     """
117 |     Map dense features to sparse tensor and add them.
118 | 
119 |     Parameters
120 |     ----------
121 |     x : Dense tensor [B,C,H,W]
122 |     s : Sparse tensor
123 | 
124 |     Returns
125 |     -------
126 |     Sparse tensor with added dense information in valid areas
127 |     """
128 |     stride = s.tensor_stride
129 |     coords = s.coords.long()
130 |     feats = x.permute(0, 2, 3, 1)
131 |     feats = feats[coords[:, -1],
132 |                   coords[:, 0] // stride[0],
133 |                   coords[:, 1] // stride[1]]
134 |     return ME.SparseTensor(coords=coords, feats=feats + s.feats,
135 |                            coords_manager=s.coords_man, force_creation=True,
136 |                            tensor_stride=s.tensor_stride)
137 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/minkowski_encoder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import MinkowskiEngine as ME
  4 | import torch.nn as nn
  5 | 
  6 | from packnet_sfm.networks.layers.minkowski import \
  7 |     sparsify_depth, densify_features, densify_add_features_unc, map_add_features
  8 | 
  9 | 
 10 | class MinkConv2D(nn.Module):
 11 |     """
 12 |     Minkowski Convolutional Block
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     in_planes : number of input channels
 17 |     out_planes : number of output channels
 18 |     kernel_size : convolutional kernel size
 19 |     stride : convolutional stride
 20 |     with_uncertainty : with uncertainty or now
 21 |     add_rgb : add RGB information as channels
 22 |     """
 23 |     def __init__(self, in_planes, out_planes, kernel_size, stride,
 24 |                  with_uncertainty=False, add_rgb=False):
 25 |         super().__init__()
 26 |         self.layer3 = nn.Sequential(
 27 |             ME.MinkowskiConvolution(
 28 |                 in_planes, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2),
 29 |             ME.MinkowskiBatchNorm(out_planes * 2),
 30 |             ME.MinkowskiReLU(inplace=True),
 31 |             ME.MinkowskiConvolution(
 32 |                 out_planes * 2, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2),
 33 |             ME.MinkowskiBatchNorm(out_planes * 2),
 34 |             ME.MinkowskiReLU(inplace=True),
 35 |             ME.MinkowskiConvolution(
 36 |                 out_planes * 2, out_planes, kernel_size=kernel_size, stride=1, dimension=2),
 37 |         )
 38 | 
 39 |         self.layer2 = nn.Sequential(
 40 |             ME.MinkowskiConvolution(
 41 |                 in_planes, out_planes * 2, kernel_size=kernel_size, stride=1, dimension=2),
 42 |             ME.MinkowskiBatchNorm(out_planes * 2),
 43 |             ME.MinkowskiReLU(inplace=True),
 44 |             ME.MinkowskiConvolution(
 45 |                 out_planes * 2, out_planes, kernel_size=kernel_size, stride=1, dimension=2),
 46 |         )
 47 | 
 48 |         self.layer1 = nn.Sequential(
 49 |             ME.MinkowskiConvolution(
 50 |                 in_planes, out_planes, kernel_size=kernel_size, stride=1, dimension=2),
 51 |         )
 52 | 
 53 |         self.layer_final = nn.Sequential(
 54 |             ME.MinkowskiBatchNorm(out_planes),
 55 |             ME.MinkowskiReLU(inplace=True)
 56 |         )
 57 |         self.pool = None if stride == 1 else ME.MinkowskiMaxPooling(3, stride, dimension=2)
 58 | 
 59 |         self.add_rgb = add_rgb
 60 |         self.with_uncertainty = with_uncertainty
 61 |         if with_uncertainty:
 62 |             self.unc_layer = nn.Sequential(
 63 |                 ME.MinkowskiConvolution(
 64 |                     out_planes, 1, kernel_size=3, stride=1, dimension=2),
 65 |                 ME.MinkowskiSigmoid()
 66 |             )
 67 | 
 68 |     def forward(self, x):
 69 |         """
 70 |         Processes sparse information
 71 | 
 72 |         Parameters
 73 |         ----------
 74 |         x : Sparse tensor
 75 | 
 76 |         Returns
 77 |         -------
 78 |         Processed tensor
 79 |         """
 80 |         if self.pool is not None:
 81 |             x = self.pool(x)
 82 |         x1 = self.layer1(x)
 83 |         x2 = self.layer2(x)
 84 |         x3 = self.layer3(x)
 85 |         return None, self.layer_final(x1 + x2 + x3)
 86 | 
 87 | 
 88 | class MinkowskiEncoder(nn.Module):
 89 |     """
 90 |     Depth completion Minkowski Encoder
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     channels : number of channels
 95 |     with_uncertainty : with uncertainty or not
 96 |     add_rgb : add RGB information to depth features or not
 97 |     """
 98 |     def __init__(self, channels, with_uncertainty=False, add_rgb=False):
 99 |         super().__init__()
100 |         self.mconvs = nn.ModuleList()
101 |         kernel_sizes = [5, 5] + [3] * (len(channels) - 1)
102 |         self.mconvs.append(
103 |             MinkConv2D(1, channels[0], kernel_sizes[0], 2,
104 |                        with_uncertainty=with_uncertainty))
105 |         for i in range(0, len(channels) - 1):
106 |             self.mconvs.append(
107 |                 MinkConv2D(channels[i], channels[i+1], kernel_sizes[i+1], 2,
108 |                            with_uncertainty=with_uncertainty))
109 |         self.d = self.n = self.shape = 0
110 |         self.with_uncertainty = with_uncertainty
111 |         self.add_rgb = add_rgb
112 | 
113 |     def prep(self, d):
114 |         self.d = sparsify_depth(d)
115 |         self.shape = d.shape
116 |         self.n = 0
117 | 
118 |     def forward(self, x=None):
119 | 
120 |         unc, self.d = self.mconvs[self.n](self.d)
121 |         self.n += 1
122 | 
123 |         if self.with_uncertainty:
124 |             out = densify_add_features_unc(x, unc * self.d, unc, self.shape)
125 |         else:
126 |             out = densify_features(self.d, self.shape)
127 | 
128 |         if self.add_rgb:
129 |             self.d = map_add_features(x, self.d)
130 | 
131 |         return out
132 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/resnet/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import numpy as np
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from collections import OrderedDict
13 | from .layers import ConvBlock, Conv3x3, upsample
14 | 
15 | 
16 | class DepthDecoder(nn.Module):
17 |     def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
18 |         super(DepthDecoder, self).__init__()
19 | 
20 |         self.num_output_channels = num_output_channels
21 |         self.use_skips = use_skips
22 |         self.upsample_mode = 'nearest'
23 |         self.scales = scales
24 | 
25 |         self.num_ch_enc = num_ch_enc
26 |         self.num_ch_dec = np.array([16, 32, 64, 128, 256])
27 | 
28 |         # decoder
29 |         self.convs = OrderedDict()
30 |         for i in range(4, -1, -1):
31 |             # upconv_0
32 |             num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
33 |             num_ch_out = self.num_ch_dec[i]
34 |             self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
35 | 
36 |             # upconv_1
37 |             num_ch_in = self.num_ch_dec[i]
38 |             if self.use_skips and i > 0:
39 |                 num_ch_in += self.num_ch_enc[i - 1]
40 |             num_ch_out = self.num_ch_dec[i]
41 |             self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
42 | 
43 |         for s in self.scales:
44 |             self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
45 | 
46 |         self.decoder = nn.ModuleList(list(self.convs.values()))
47 |         self.sigmoid = nn.Sigmoid()
48 | 
49 |     def forward(self, input_features):
50 |         self.outputs = {}
51 | 
52 |         # decoder
53 |         x = input_features[-1]
54 |         for i in range(4, -1, -1):
55 |             x = self.convs[("upconv", i, 0)](x)
56 |             x = [upsample(x)]
57 |             if self.use_skips and i > 0:
58 |                 x += [input_features[i - 1]]
59 |             x = torch.cat(x, 1)
60 |             x = self.convs[("upconv", i, 1)](x)
61 |             if i in self.scales:
62 |                 self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x))
63 | 
64 |         return self.outputs
65 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/resnet/layers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/layers.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | def disp_to_depth(disp, min_depth, max_depth):
13 |     """Convert network's sigmoid output into depth prediction
14 |     The formula for this conversion is given in the 'additional considerations'
15 |     section of the paper.
16 |     """
17 |     min_disp = 1 / max_depth
18 |     max_disp = 1 / min_depth
19 |     scaled_disp = min_disp + (max_disp - min_disp) * disp
20 |     depth = 1 / scaled_disp
21 |     return scaled_disp, depth
22 | 
23 | 
24 | class ConvBlock(nn.Module):
25 |     """Layer to perform a convolution followed by ELU
26 |     """
27 |     def __init__(self, in_channels, out_channels):
28 |         super(ConvBlock, self).__init__()
29 | 
30 |         self.conv = Conv3x3(in_channels, out_channels)
31 |         self.nonlin = nn.ELU(inplace=True)
32 | 
33 |     def forward(self, x):
34 |         out = self.conv(x)
35 |         out = self.nonlin(out)
36 |         return out
37 | 
38 | 
39 | class Conv3x3(nn.Module):
40 |     """Layer to pad and convolve input
41 |     """
42 |     def __init__(self, in_channels, out_channels, use_refl=True):
43 |         super(Conv3x3, self).__init__()
44 | 
45 |         if use_refl:
46 |             self.pad = nn.ReflectionPad2d(1)
47 |         else:
48 |             self.pad = nn.ZeroPad2d(1)
49 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
50 | 
51 |     def forward(self, x):
52 |         out = self.pad(x)
53 |         out = self.conv(out)
54 |         return out
55 | 
56 | 
57 | def upsample(x):
58 |     """Upsample input tensor by a factor of 2
59 |     """
60 |     return F.interpolate(x, scale_factor=2, mode="nearest")
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/resnet/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/pose_decoder.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | from collections import OrderedDict
11 | 
12 | 
13 | class PoseDecoder(nn.Module):
14 |     def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1):
15 |         super(PoseDecoder, self).__init__()
16 | 
17 |         self.num_ch_enc = num_ch_enc
18 |         self.num_input_features = num_input_features
19 | 
20 |         if num_frames_to_predict_for is None:
21 |             num_frames_to_predict_for = num_input_features - 1
22 |         self.num_frames_to_predict_for = num_frames_to_predict_for
23 | 
24 |         self.convs = OrderedDict()
25 |         self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1)
26 |         self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1)
27 |         self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1)
28 |         self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1)
29 | 
30 |         self.relu = nn.ReLU()
31 | 
32 |         self.net = nn.ModuleList(list(self.convs.values()))
33 | 
34 |     def forward(self, input_features):
35 |         last_features = [f[-1] for f in input_features]
36 | 
37 |         cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features]
38 |         cat_features = torch.cat(cat_features, 1)
39 | 
40 |         out = cat_features
41 |         for i in range(3):
42 |             out = self.convs[("pose", i)](out)
43 |             if i != 2:
44 |                 out = self.relu(out)
45 | 
46 |         out = out.mean(3).mean(2)
47 | 
48 |         out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6)
49 | 
50 |         axisangle = out[..., :3]
51 |         translation = out[..., 3:]
52 | 
53 |         return axisangle, translation
54 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/resnet/raysurface_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import numpy as np
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from collections import OrderedDict
13 | from .layers import ConvBlock, Conv3x3, upsample
14 | 
15 | 
16 | class RaySurfaceDecoder(nn.Module):
17 |     def __init__(self, num_ch_enc, scales=[0], num_output_channels=3, use_skips=True):
18 |         super(RaySurfaceDecoder, self).__init__()
19 | 
20 |         self.num_output_channels = num_output_channels
21 |         self.use_skips = use_skips
22 |         self.upsample_mode = 'nearest'
23 |         self.scales = scales
24 | 
25 |         self.num_ch_enc = num_ch_enc
26 |         self.num_ch_dec = np.array([16, 32, 64, 128, 256])
27 | 
28 |         # decoder
29 |         self.convs = OrderedDict()
30 |         for i in range(4, -1, -1):
31 |             # upconv_0
32 |             num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
33 |             num_ch_out = self.num_ch_dec[i]
34 |             self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
35 | 
36 |             # upconv_1
37 |             num_ch_in = self.num_ch_dec[i]
38 |             if self.use_skips and i > 0:
39 |                 num_ch_in += self.num_ch_enc[i - 1]
40 |             num_ch_out = self.num_ch_dec[i]
41 |             self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
42 | 
43 |         for s in self.scales:
44 |             self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
45 | 
46 |         self.decoder = nn.ModuleList(list(self.convs.values()))
47 |         self.tanh = nn.Tanh()
48 | 
49 |     def forward(self, input_features):
50 |         self.outputs = {}
51 | 
52 |         # decoder
53 |         x = input_features[-1]
54 |         for i in range(4, -1, -1):
55 |             x = self.convs[("upconv", i, 0)](x)
56 |             x = [upsample(x)]
57 |             if self.use_skips and i > 0:
58 |                 x += [input_features[i - 1]]
59 |             x = torch.cat(x, 1)
60 |             x = self.convs[("upconv", i, 1)](x)
61 |             if i in self.scales:
62 |                 self.outputs[("raysurf", i)] = self.tanh(self.convs[("dispconv", i)](x))
63 | 
64 |         return self.outputs
65 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/layers/resnet/resnet_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/resnet_encoder.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import numpy as np
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torchvision.models as models
13 | import torch.utils.model_zoo as model_zoo
14 | 
15 | 
16 | class ResNetMultiImageInput(models.ResNet):
17 |     """Constructs a resnet model with varying number of input images.
18 |     Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
19 |     """
20 |     def __init__(self, block, layers, num_classes=1000, num_input_images=1):
21 |         super(ResNetMultiImageInput, self).__init__(block, layers)
22 |         self.inplanes = 64
23 |         self.conv1 = nn.Conv2d(
24 |             num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
25 |         self.bn1 = nn.BatchNorm2d(64)
26 |         self.relu = nn.ReLU(inplace=True)
27 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
28 |         self.layer1 = self._make_layer(block, 64, layers[0])
29 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
30 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
31 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
32 | 
33 |         for m in self.modules():
34 |             if isinstance(m, nn.Conv2d):
35 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
36 |             elif isinstance(m, nn.BatchNorm2d):
37 |                 nn.init.constant_(m.weight, 1)
38 |                 nn.init.constant_(m.bias, 0)
39 | 
40 | 
41 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
42 |     """Constructs a ResNet model.
43 |     Args:
44 |         num_layers (int): Number of resnet layers. Must be 18 or 50
45 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
46 |         num_input_images (int): Number of frames stacked as input
47 |     """
48 |     assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
49 |     blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
50 |     block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
51 |     model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
52 | 
53 |     if pretrained:
54 |         loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
55 |         loaded['conv1.weight'] = torch.cat(
56 |             [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
57 |         model.load_state_dict(loaded)
58 |     return model
59 | 
60 | 
61 | class ResnetEncoder(nn.Module):
62 |     """Pytorch module for a resnet encoder
63 |     """
64 |     def __init__(self, num_layers, pretrained, num_input_images=1):
65 |         super(ResnetEncoder, self).__init__()
66 | 
67 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
68 | 
69 |         resnets = {18: models.resnet18,
70 |                    34: models.resnet34,
71 |                    50: models.resnet50,
72 |                    101: models.resnet101,
73 |                    152: models.resnet152}
74 | 
75 |         if num_layers not in resnets:
76 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
77 | 
78 |         if num_input_images > 1:
79 |             self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
80 |         else:
81 |             self.encoder = resnets[num_layers](pretrained)
82 | 
83 |         if num_layers > 34:
84 |             self.num_ch_enc[1:] *= 4
85 | 
86 |     def forward(self, input_image):
87 |         self.features = []
88 |         x = (input_image - 0.45) / 0.225
89 |         x = self.encoder.conv1(x)
90 |         x = self.encoder.bn1(x)
91 |         self.features.append(self.encoder.relu(x))
92 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
93 |         self.features.append(self.encoder.layer2(self.features[-1]))
94 |         self.features.append(self.encoder.layer3(self.features[-1]))
95 |         self.features.append(self.encoder.layer4(self.features[-1]))
96 | 
97 |         return self.features
98 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/pose/PoseNet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from SfmLearner
 4 | # https://github.com/ClementPinard/SfmLearner-Pytorch/blob/master/models/PoseExpNet.py
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | ########################################################################################################################
10 | 
11 | def conv_gn(in_planes, out_planes, kernel_size=3):
12 |     """
13 |     Convolutional block with GroupNorm
14 | 
15 |     Parameters
16 |     ----------
17 |     in_planes : int
18 |         Number of input channels
19 |     out_planes : int
20 |         Number of output channels
21 |     kernel_size : int
22 |         Convolutional kernel size
23 | 
24 |     Returns
25 |     -------
26 |     layers : nn.Sequential
27 |         Sequence of Conv2D + GroupNorm + ReLU
28 |     """
29 |     return nn.Sequential(
30 |         nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,
31 |                   padding=(kernel_size - 1) // 2, stride=2),
32 |         nn.GroupNorm(16, out_planes),
33 |         nn.ReLU(inplace=True)
34 |     )
35 | 
36 | ########################################################################################################################
37 | 
38 | class PoseNet(nn.Module):
39 |     """Pose network """
40 | 
41 |     def __init__(self, nb_ref_imgs=2, rotation_mode='euler', **kwargs):
42 |         super().__init__()
43 |         self.nb_ref_imgs = nb_ref_imgs
44 |         self.rotation_mode = rotation_mode
45 | 
46 |         conv_channels = [16, 32, 64, 128, 256, 256, 256]
47 |         self.conv1 = conv_gn(3 * (1 + self.nb_ref_imgs), conv_channels[0], kernel_size=7)
48 |         self.conv2 = conv_gn(conv_channels[0], conv_channels[1], kernel_size=5)
49 |         self.conv3 = conv_gn(conv_channels[1], conv_channels[2])
50 |         self.conv4 = conv_gn(conv_channels[2], conv_channels[3])
51 |         self.conv5 = conv_gn(conv_channels[3], conv_channels[4])
52 |         self.conv6 = conv_gn(conv_channels[4], conv_channels[5])
53 |         self.conv7 = conv_gn(conv_channels[5], conv_channels[6])
54 | 
55 |         self.pose_pred = nn.Conv2d(conv_channels[6], 6 * self.nb_ref_imgs,
56 |                                    kernel_size=1, padding=0)
57 | 
58 |         self.init_weights()
59 | 
60 |     def init_weights(self):
61 |         for m in self.modules():
62 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
63 |                 nn.init.xavier_uniform_(m.weight.data)
64 |                 if m.bias is not None:
65 |                     m.bias.data.zero_()
66 | 
67 |     def forward(self, image, context):
68 |         assert (len(context) == self.nb_ref_imgs)
69 |         input = [image]
70 |         input.extend(context)
71 |         input = torch.cat(input, 1)
72 |         out_conv1 = self.conv1(input)
73 |         out_conv2 = self.conv2(out_conv1)
74 |         out_conv3 = self.conv3(out_conv2)
75 |         out_conv4 = self.conv4(out_conv3)
76 |         out_conv5 = self.conv5(out_conv4)
77 |         out_conv6 = self.conv6(out_conv5)
78 |         out_conv7 = self.conv7(out_conv6)
79 | 
80 |         pose = self.pose_pred(out_conv7)
81 |         pose = pose.mean(3).mean(2)
82 |         pose = 0.01 * pose.view(pose.size(0), self.nb_ref_imgs, 6)
83 | 
84 |         return pose
85 | 
86 | ########################################################################################################################
87 | 


--------------------------------------------------------------------------------
/packnet_sfm/networks/pose/PoseResNet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from packnet_sfm.networks.layers.resnet.resnet_encoder import ResnetEncoder
 7 | from packnet_sfm.networks.layers.resnet.pose_decoder import PoseDecoder
 8 | 
 9 | ########################################################################################################################
10 | 
11 | class PoseResNet(nn.Module):
12 |     """
13 |     Pose network based on the ResNet architecture.
14 | 
15 |     Parameters
16 |     ----------
17 |     version : str
18 |         Has a XY format, where:
19 |         X is the number of residual layers [18, 34, 50] and
20 |         Y is an optional ImageNet pretrained flag added by the "pt" suffix
21 |         Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch
22 |     kwargs : dict
23 |         Extra parameters
24 |     """
25 |     def __init__(self, version=None, **kwargs):
26 |         super().__init__()
27 |         assert version is not None, "PoseResNet needs a version"
28 | 
29 |         num_layers = int(version[:2])       # First two characters are the number of layers
30 |         pretrained = version[2:] == 'pt'    # If the last characters are "pt", use ImageNet pretraining
31 |         assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers)
32 | 
33 |         self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained, num_input_images=2)
34 |         self.decoder = PoseDecoder(self.encoder.num_ch_enc, num_input_features=1, num_frames_to_predict_for=2)
35 | 
36 |     def forward(self, target_image, ref_imgs):
37 |         """
38 |         Runs the network and returns predicted poses
39 |         (1 for each reference image).
40 |         """
41 |         outputs = []
42 |         for i, ref_img in enumerate(ref_imgs):
43 |             inputs = torch.cat([target_image, ref_img], 1)
44 |             axisangle, translation = self.decoder([self.encoder(inputs)])
45 |             outputs.append(torch.cat([translation[:, 0], axisangle[:, 0]], 2))
46 |         pose = torch.cat(outputs, 1)
47 |         return pose
48 | 
49 | ########################################################################################################################
50 | 
51 | 


--------------------------------------------------------------------------------
/packnet_sfm/trainers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Trainers
 3 | ========
 4 | 
 5 | Trainer classes providing an easy way to train and evaluate SfM models
 6 | when wrapped in a ModelWrapper.
 7 | 
 8 | Inspired by pytorch-lightning.
 9 | 
10 | """
11 | 
12 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer
13 | 
14 | __all__ = ["HorovodTrainer"]


--------------------------------------------------------------------------------
/packnet_sfm/trainers/base_trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch
 4 | from tqdm import tqdm
 5 | from packnet_sfm.utils.logging import prepare_dataset_prefix
 6 | 
 7 | 
 8 | def sample_to_cuda(data, dtype=None):
 9 |     if isinstance(data, str):
10 |         return data
11 |     elif isinstance(data, dict):
12 |         return {key: sample_to_cuda(data[key], dtype) for key in data.keys()}
13 |     elif isinstance(data, list):
14 |         return [sample_to_cuda(val, dtype) for val in data]
15 |     else:
16 |         # only convert floats (e.g., to half), otherwise preserve (e.g, ints)
17 |         dtype = dtype if torch.is_floating_point(data) else None
18 |         return data.to('cuda', dtype=dtype)
19 | 
20 | 
21 | class BaseTrainer:
22 |     def __init__(self, min_epochs=0, max_epochs=50,
23 |                  validate_first=False, checkpoint=None, **kwargs):
24 | 
25 |         self.min_epochs = min_epochs
26 |         self.max_epochs = max_epochs
27 |         self.validate_first = validate_first
28 | 
29 |         self.checkpoint = checkpoint
30 |         self.module = None
31 | 
32 |     @property
33 |     def proc_rank(self):
34 |         raise NotImplementedError('Not implemented for BaseTrainer')
35 | 
36 |     @property
37 |     def world_size(self):
38 |         raise NotImplementedError('Not implemented for BaseTrainer')
39 | 
40 |     @property
41 |     def is_rank_0(self):
42 |         return self.proc_rank == 0
43 | 
44 |     def check_and_save(self, module, output):
45 |         if self.checkpoint:
46 |             self.checkpoint.check_and_save(module, output)
47 | 
48 |     def train_progress_bar(self, dataloader, config, ncols=120):
49 |         return tqdm(enumerate(dataloader, 0),
50 |                     unit=' images', unit_scale=self.world_size * config.batch_size,
51 |                     total=len(dataloader), smoothing=0,
52 |                     disable=not self.is_rank_0, ncols=ncols,
53 |                     )
54 | 
55 |     def val_progress_bar(self, dataloader, config, n=0, ncols=120):
56 |         return tqdm(enumerate(dataloader, 0),
57 |                     unit=' images', unit_scale=self.world_size * config.batch_size,
58 |                     total=len(dataloader), smoothing=0,
59 |                     disable=not self.is_rank_0, ncols=ncols,
60 |                     desc=prepare_dataset_prefix(config, n)
61 |                     )
62 | 
63 |     def test_progress_bar(self, dataloader, config, n=0, ncols=120):
64 |         return tqdm(enumerate(dataloader, 0),
65 |                     unit=' images', unit_scale=self.world_size * config.batch_size,
66 |                     total=len(dataloader), smoothing=0,
67 |                     disable=not self.is_rank_0, ncols=ncols,
68 |                     desc=prepare_dataset_prefix(config, n)
69 |                     )
70 | 


--------------------------------------------------------------------------------
/packnet_sfm/trainers/horovod_trainer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import os
  4 | import torch
  5 | import horovod.torch as hvd
  6 | from packnet_sfm.trainers.base_trainer import BaseTrainer, sample_to_cuda
  7 | from packnet_sfm.utils.config import prep_logger_and_checkpoint
  8 | from packnet_sfm.utils.logging import print_config
  9 | from packnet_sfm.utils.logging import AvgMeter
 10 | 
 11 | 
 12 | class HorovodTrainer(BaseTrainer):
 13 |     def __init__(self, **kwargs):
 14 |         super().__init__(**kwargs)
 15 | 
 16 |         hvd.init()
 17 |         torch.set_num_threads(int(os.environ.get("OMP_NUM_THREADS", 1)))
 18 |         torch.cuda.set_device(hvd.local_rank())
 19 |         torch.backends.cudnn.benchmark = True
 20 | 
 21 |         self.avg_loss = AvgMeter(50)
 22 |         self.dtype = kwargs.get("dtype", None)  # just for test for now
 23 | 
 24 |     @property
 25 |     def proc_rank(self):
 26 |         return hvd.rank()
 27 | 
 28 |     @property
 29 |     def world_size(self):
 30 |         return hvd.size()
 31 | 
 32 |     def fit(self, module):
 33 | 
 34 |         # Prepare module for training
 35 |         module.trainer = self
 36 |         # Update and print module configuration
 37 |         prep_logger_and_checkpoint(module)
 38 |         print_config(module.config)
 39 | 
 40 |         # Send module to GPU
 41 |         module = module.to('cuda')
 42 |         # Configure optimizer and scheduler
 43 |         module.configure_optimizers()
 44 | 
 45 |         # Create distributed optimizer
 46 |         compression = hvd.Compression.none
 47 |         optimizer = hvd.DistributedOptimizer(module.optimizer,
 48 |             named_parameters=module.named_parameters(), compression=compression)
 49 |         scheduler = module.scheduler
 50 | 
 51 |         # Get train and val dataloaders
 52 |         train_dataloader = module.train_dataloader()
 53 |         val_dataloaders = module.val_dataloader()
 54 | 
 55 |         # Validate before training if requested
 56 |         if self.validate_first:
 57 |             validation_output = self.validate(val_dataloaders, module)
 58 |             self.check_and_save(module, validation_output)
 59 | 
 60 |         # Epoch loop
 61 |         for epoch in range(module.current_epoch, self.max_epochs):
 62 |             # Train
 63 |             self.train(train_dataloader, module, optimizer)
 64 |             # Validation
 65 |             validation_output = self.validate(val_dataloaders, module)
 66 |             # Check and save model
 67 |             self.check_and_save(module, validation_output)
 68 |             # Update current epoch
 69 |             module.current_epoch += 1
 70 |             # Take a scheduler step
 71 |             scheduler.step()
 72 | 
 73 |     def train(self, dataloader, module, optimizer):
 74 |         # Set module to train
 75 |         module.train()
 76 |         # Shuffle dataloader sampler
 77 |         if hasattr(dataloader.sampler, "set_epoch"):
 78 |             dataloader.sampler.set_epoch(module.current_epoch)
 79 |         # Prepare progress bar
 80 |         progress_bar = self.train_progress_bar(
 81 |             dataloader, module.config.datasets.train)
 82 |         # Start training loop
 83 |         outputs = []
 84 |         # For all batches
 85 |         for i, batch in progress_bar:
 86 |             # Reset optimizer
 87 |             optimizer.zero_grad()
 88 |             # Send samples to GPU and take a training step
 89 |             batch = sample_to_cuda(batch)
 90 |             output = module.training_step(batch, i)
 91 |             # Backprop through loss and take an optimizer step
 92 |             output['loss'].backward()
 93 |             optimizer.step()
 94 |             # Append output to list of outputs
 95 |             output['loss'] = output['loss'].detach()
 96 |             outputs.append(output)
 97 |             # Update progress bar if in rank 0
 98 |             if self.is_rank_0:
 99 |                 progress_bar.set_description(
100 |                     'Epoch {} | Avg.Loss {:.4f}'.format(
101 |                         module.current_epoch, self.avg_loss(output['loss'].item())))
102 |         # Return outputs for epoch end
103 |         return module.training_epoch_end(outputs)
104 | 
105 |     def validate(self, dataloaders, module):
106 |         # Set module to eval
107 |         module.eval()
108 |         # Start validation loop
109 |         all_outputs = []
110 |         # For all validation datasets
111 |         for n, dataloader in enumerate(dataloaders):
112 |             # Prepare progress bar for that dataset
113 |             progress_bar = self.val_progress_bar(
114 |                 dataloader, module.config.datasets.validation, n)
115 |             outputs = []
116 |             # For all batches
117 |             for i, batch in progress_bar:
118 |                 # Send batch to GPU and take a validation step
119 |                 batch = sample_to_cuda(batch)
120 |                 output = module.validation_step(batch, i, n)
121 |                 # Append output to list of outputs
122 |                 outputs.append(output)
123 |             # Append dataset outputs to list of all outputs
124 |             all_outputs.append(outputs)
125 |         # Return all outputs for epoch end
126 |         return module.validation_epoch_end(all_outputs)
127 | 
128 |     def test(self, module):
129 |         # Send module to GPU
130 |         module = module.to('cuda', dtype=self.dtype)
131 |         # Get test dataloaders
132 |         test_dataloaders = module.test_dataloader()
133 |         # Run evaluation
134 |         self.evaluate(test_dataloaders, module)
135 | 
136 |     @torch.no_grad()
137 |     def evaluate(self, dataloaders, module):
138 |         # Set module to eval
139 |         module.eval()
140 |         # Start evaluation loop
141 |         all_outputs = []
142 |         # For all test datasets
143 |         for n, dataloader in enumerate(dataloaders):
144 |             # Prepare progress bar for that dataset
145 |             progress_bar = self.val_progress_bar(
146 |                 dataloader, module.config.datasets.test, n)
147 |             outputs = []
148 |             # For all batches
149 |             for i, batch in progress_bar:
150 |                 # Send batch to GPU and take a test step
151 |                 batch = sample_to_cuda(batch, self.dtype)
152 |                 output = module.test_step(batch, i, n)
153 |                 # Append output to list of outputs
154 |                 outputs.append(output)
155 |             # Append dataset outputs to list of all outputs
156 |             all_outputs.append(outputs)
157 |         # Return all outputs for epoch end
158 |         return module.test_epoch_end(all_outputs)
159 | 


--------------------------------------------------------------------------------
/packnet_sfm/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/packnet-sfm/de53b310533ff6b01eaa23a8ba5ac01bac5587b1/packnet_sfm/utils/__init__.py


--------------------------------------------------------------------------------
/packnet_sfm/utils/horovod.py:
--------------------------------------------------------------------------------
 1 | 
 2 | try:
 3 |     import horovod.torch as hvd
 4 |     HAS_HOROVOD = True
 5 | except ImportError:
 6 |     HAS_HOROVOD = False
 7 | 
 8 | 
 9 | def hvd_init():
10 |     if HAS_HOROVOD:
11 |         hvd.init()
12 |     return HAS_HOROVOD
13 | 
14 | def on_rank_0(func):
15 |     def wrapper(*args, **kwargs):
16 |         if rank() == 0:
17 |             func(*args, **kwargs)
18 |     return wrapper
19 | 
20 | def rank():
21 |     return hvd.rank() if HAS_HOROVOD else 0
22 | 
23 | def world_size():
24 |     return hvd.size() if HAS_HOROVOD else 1
25 | 
26 | @on_rank_0
27 | def print0(string='\n'):
28 |     print(string)
29 | 
30 | def reduce_value(value, average, name):
31 |     """
32 |     Reduce the mean value of a tensor from all GPUs
33 | 
34 |     Parameters
35 |     ----------
36 |     value : torch.Tensor
37 |         Value to be reduced
38 |     average : bool
39 |         Whether values will be averaged or not
40 |     name : str
41 |         Value name
42 | 
43 |     Returns
44 |     -------
45 |     value : torch.Tensor
46 |         reduced value
47 |     """
48 |     return hvd.allreduce(value, average=average, name=name)
49 | 


--------------------------------------------------------------------------------
/packnet_sfm/utils/logging.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import os
  4 | from termcolor import colored
  5 | from functools import partial
  6 | 
  7 | from packnet_sfm.utils.horovod import on_rank_0
  8 | 
  9 | 
 10 | def pcolor(string, color, on_color=None, attrs=None):
 11 |     """
 12 |     Produces a colored string for printing
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     string : str
 17 |         String that will be colored
 18 |     color : str
 19 |         Color to use
 20 |     on_color : str
 21 |         Background color to use
 22 |     attrs : list of str
 23 |         Different attributes for the string
 24 | 
 25 |     Returns
 26 |     -------
 27 |     string: str
 28 |         Colored string
 29 |     """
 30 |     return colored(string, color, on_color, attrs)
 31 | 
 32 | 
 33 | def prepare_dataset_prefix(config, dataset_idx):
 34 |     """
 35 |     Concatenates dataset path and split for metrics logging
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     config : CfgNode
 40 |         Dataset configuration
 41 |     dataset_idx : int
 42 |         Dataset index for multiple datasets
 43 | 
 44 |     Returns
 45 |     -------
 46 |     prefix : str
 47 |         Dataset prefix for metrics logging
 48 |     """
 49 |     # Path is always available
 50 |     prefix = '{}'.format(os.path.splitext(config.path[dataset_idx].split('/')[-1])[0])
 51 |     # If split is available and does not contain { character
 52 |     if config.split[dataset_idx] != '' and '{' not in config.split[dataset_idx]:
 53 |         prefix += '-{}'.format(os.path.splitext(os.path.basename(config.split[dataset_idx]))[0])
 54 |     # If depth type is available
 55 |     if config.depth_type[dataset_idx] != '':
 56 |         prefix += '-{}'.format(config.depth_type[dataset_idx])
 57 |     # If we are using specific cameras
 58 |     if len(config.cameras[dataset_idx]) == 1:  # only allows single cameras
 59 |         prefix += '-{}'.format(config.cameras[dataset_idx][0])
 60 |     # Return full prefix
 61 |     return prefix
 62 | 
 63 | 
 64 | def s3_url(config):
 65 |     """
 66 |     Generate the s3 url where the models will be saved
 67 | 
 68 |     Parameters
 69 |     ----------
 70 |     config : CfgNode
 71 |         Model configuration
 72 | 
 73 |     Returns
 74 |     -------
 75 |     url : str
 76 |         String containing the URL pointing to the s3 bucket
 77 |     """
 78 |     return 'https://s3.console.aws.amazon.com/s3/buckets/{}/{}'.format(
 79 |         config.checkpoint.s3_path[5:], config.name)
 80 | 
 81 | 
 82 | @on_rank_0
 83 | def print_config(config, color=('blue', 'red', 'cyan'), attrs=('bold', 'dark')):
 84 |     """
 85 |     Prints header for model configuration
 86 | 
 87 |     Parameters
 88 |     ----------
 89 |     config : CfgNode
 90 |         Model configuration
 91 |     color : list of str
 92 |         Color pallete for the header
 93 |     attrs :
 94 |         Colored string attributes
 95 |     """
 96 |     # Recursive print function
 97 |     def print_recursive(rec_args, n=2, l=0):
 98 |         if l == 0:
 99 |             print(pcolor('config:', color[1], attrs=attrs))
100 |         for key, val in rec_args.items():
101 |             if isinstance(val, dict):
102 |                 print(pcolor('{} {}:'.format('-' * n, key), color[1], attrs=attrs))
103 |                 print_recursive(val, n + 2, l + 1)
104 |             else:
105 |                 print('{}: {}'.format(pcolor('{} {}'.format('-' * n, key), color[2]), val))
106 | 
107 |     # Color partial functions
108 |     pcolor1 = partial(pcolor, color='blue', attrs=['bold', 'dark'])
109 |     pcolor2 = partial(pcolor, color='blue', attrs=['bold'])
110 |     # Config and name
111 |     line = pcolor1('#' * 120)
112 |     path = pcolor1('### Config: ') + \
113 |            pcolor2('{}'.format(config.default.replace('/', '.'))) + \
114 |            pcolor1(' -> ') + \
115 |            pcolor2('{}'.format(config.config.replace('/', '.')))
116 |     name = pcolor1('### Name: ') + \
117 |            pcolor2('{}'.format(config.name))
118 |     # Add wandb link if available
119 |     if not config.wandb.dry_run:
120 |         name += pcolor1(' -> ') + \
121 |                 pcolor2('{}'.format(config.wandb.url))
122 |     # Add s3 link if available
123 |     if config.checkpoint.s3_path is not '':
124 |         name += pcolor1('\n### s3:') + \
125 |                 pcolor2(' {}'.format(config.checkpoint.s3_url))
126 |     # Create header string
127 |     header = '%s\n%s\n%s\n%s' % (line, path, name, line)
128 | 
129 |     # Print header, config and header again
130 |     print()
131 |     print(header)
132 |     print_recursive(config)
133 |     print(header)
134 |     print()
135 | 
136 | 
137 | class AvgMeter:
138 |     """Average meter for logging"""
139 |     def __init__(self, n_max=100):
140 |         """
141 |         Initializes a AvgMeter object.
142 | 
143 |         Parameters
144 |         ----------
145 |         n_max : int
146 |             Number of steps to average over
147 |         """
148 |         self.n_max = n_max
149 |         self.values = []
150 | 
151 |     def __call__(self, value):
152 |         """Appends new value and returns average"""
153 |         self.values.append(value)
154 |         if len(self.values) > self.n_max:
155 |             self.values.pop(0)
156 |         return self.get()
157 | 
158 |     def get(self):
159 |         """Get current average"""
160 |         return sum(self.values) / len(self.values)
161 | 
162 |     def reset(self):
163 |         """Reset meter"""
164 |         self.values.clear()
165 | 
166 |     def get_and_reset(self):
167 |         """Gets current average and resets"""
168 |         average = self.get()
169 |         self.reset()
170 |         return average
171 | 


--------------------------------------------------------------------------------
/packnet_sfm/utils/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | from packnet_sfm.utils.types import is_list, is_int
  4 | 
  5 | ########################################################################################################################
  6 | 
  7 | def filter_dict(dictionary, keywords):
  8 |     """
  9 |     Returns only the keywords that are part of a dictionary
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     dictionary : dict
 14 |         Dictionary for filtering
 15 |     keywords : list of str
 16 |         Keywords that will be filtered
 17 | 
 18 |     Returns
 19 |     -------
 20 |     keywords : list of str
 21 |         List containing the keywords that are keys in dictionary
 22 |     """
 23 |     return [key for key in keywords if key in dictionary]
 24 | 
 25 | ########################################################################################################################
 26 | 
 27 | def make_list(var, n=None):
 28 |     """
 29 |     Wraps the input into a list, and optionally repeats it to be size n
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     var : Any
 34 |         Variable to be wrapped in a list
 35 |     n : int
 36 |         How much the wrapped variable will be repeated
 37 | 
 38 |     Returns
 39 |     -------
 40 |     var_list : list
 41 |         List generated from var
 42 |     """
 43 |     var = var if is_list(var) else [var]
 44 |     if n is None:
 45 |         return var
 46 |     else:
 47 |         assert len(var) == 1 or len(var) == n, 'Wrong list length for make_list'
 48 |         return var * n if len(var) == 1 else var
 49 | 
 50 | ########################################################################################################################
 51 | 
 52 | def same_shape(shape1, shape2):
 53 |     """
 54 |     Checks if two shapes are the same
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     shape1 : tuple
 59 |         First shape
 60 |     shape2 : tuple
 61 |         Second shape
 62 | 
 63 |     Returns
 64 |     -------
 65 |     flag : bool
 66 |         True if both shapes are the same (same length and dimensions)
 67 |     """
 68 |     if len(shape1) != len(shape2):
 69 |         return False
 70 |     for i in range(len(shape1)):
 71 |         if shape1[i] != shape2[i]:
 72 |             return False
 73 |     return True
 74 | 
 75 | ########################################################################################################################
 76 | 
 77 | def parse_crop_borders(borders, shape):
 78 |     """
 79 |     Calculate borders for cropping.
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     borders : tuple
 84 |         Border input for parsing. Can be one of the following forms:
 85 |         (int, int, int, int): y, height, x, width
 86 |         (int, int): y, x --> y, height = image_height - y, x, width = image_width - x
 87 |         Negative numbers are taken from image borders, according to the shape argument
 88 |         Float numbers for y and x are treated as percentage, according to the shape argument,
 89 |             and in this case height and width are centered at that point.
 90 |     shape : tuple
 91 |         Image shape (image_height, image_width), used to determine negative crop boundaries
 92 | 
 93 |     Returns
 94 |     -------
 95 |     borders : tuple (left, top, right, bottom)
 96 |         Parsed borders for cropping
 97 |     """
 98 |     # Return full image if there are no borders to crop
 99 |     if len(borders) == 0:
100 |         return 0, 0, shape[1], shape[0]
101 |     # Copy borders for modification
102 |     borders = list(borders).copy()
103 |     # If borders are 4-dimensional
104 |     if len(borders) == 4:
105 |         borders = [borders[2], borders[0], borders[3], borders[1]]
106 |         if is_int(borders[0]):
107 |             # If horizontal cropping is integer (regular cropping)
108 |             borders[0] += shape[1] if borders[0] < 0 else 0
109 |             borders[2] += shape[1] if borders[2] <= 0 else borders[0]
110 |         else:
111 |             # If horizontal cropping is float (center cropping)
112 |             center_w, half_w = borders[0] * shape[1], borders[2] / 2
113 |             borders[0] = int(center_w - half_w)
114 |             borders[2] = int(center_w + half_w)
115 |         if is_int(borders[1]):
116 |             # If vertical cropping is integer (regular cropping)
117 |             borders[1] += shape[0] if borders[1] < 0 else 0
118 |             borders[3] += shape[0] if borders[3] <= 0 else borders[1]
119 |         else:
120 |             # If vertical cropping is float (center cropping)
121 |             center_h, half_h = borders[1] * shape[0], borders[3] / 2
122 |             borders[1] = int(center_h - half_h)
123 |             borders[3] = int(center_h + half_h)
124 |     # If borders are 2-dimensional
125 |     elif len(borders) == 2:
126 |         borders = [borders[1], borders[0]]
127 |         if is_int(borders[0]):
128 |             # If cropping is integer (regular cropping)
129 |             borders = (max(0, borders[0]),
130 |                        max(0, borders[1]),
131 |                        shape[1] + min(0, borders[0]),
132 |                        shape[0] + min(0, borders[1]))
133 |         else:
134 |             # If cropping is float (center cropping)
135 |             center_w, half_w = borders[0] * shape[1], borders[1] / 2
136 |             center_h, half_h = borders[0] * shape[0], borders[1] / 2
137 |             borders = (int(center_w - half_w), int(center_h - half_h),
138 |                        int(center_w + half_w), int(center_h + half_h))
139 |     # Otherwise, invalid
140 |     else:
141 |         raise NotImplementedError('Crop tuple must have 2 or 4 values.')
142 |     # Assert that borders are valid
143 |     assert 0 <= borders[0] < borders[2] <= shape[1] and \
144 |            0 <= borders[1] < borders[3] <= shape[0], 'Crop borders {} are invalid'.format(borders)
145 |     # Return updated borders
146 |     return borders


--------------------------------------------------------------------------------
/packnet_sfm/utils/save.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import numpy as np
 4 | import os
 5 | 
 6 | from packnet_sfm.utils.image import write_image
 7 | from packnet_sfm.utils.depth import write_depth, inv2depth, viz_inv_depth
 8 | from packnet_sfm.utils.logging import prepare_dataset_prefix
 9 | 
10 | 
11 | def save_depth(batch, output, args, dataset, save):
12 |     """
13 |     Save depth predictions in various ways
14 | 
15 |     Parameters
16 |     ----------
17 |     batch : dict
18 |         Batch from dataloader
19 |     output : dict
20 |         Output from model
21 |     args : tuple
22 |         Step arguments
23 |     dataset : CfgNode
24 |         Dataset configuration
25 |     save : CfgNode
26 |         Save configuration
27 |     """
28 |     # If there is no save folder, don't save
29 |     if save.folder is '':
30 |         return
31 | 
32 |     # If we want to save
33 |     if save.depth.rgb or save.depth.viz or save.depth.npz or save.depth.png:
34 |         # Retrieve useful tensors
35 |         rgb = batch['rgb']
36 |         pred_inv_depth = output['inv_depth']
37 | 
38 |         # Prepare path strings
39 |         filename = batch['filename']
40 |         dataset_idx = 0 if len(args) == 1 else args[1]
41 |         save_path = os.path.join(save.folder, 'depth',
42 |                                  prepare_dataset_prefix(dataset, dataset_idx),
43 |                                  os.path.basename(save.pretrained).split('.')[0])
44 |         # Create folder
45 |         os.makedirs(save_path, exist_ok=True)
46 | 
47 |         # For each image in the batch
48 |         length = rgb.shape[0]
49 |         for i in range(length):
50 |             # Save numpy depth maps
51 |             if save.depth.npz:
52 |                 write_depth('{}/{}_depth.npz'.format(save_path, filename[i]),
53 |                             depth=inv2depth(pred_inv_depth[i]),
54 |                             intrinsics=batch['intrinsics'][i] if 'intrinsics' in batch else None)
55 |             # Save png depth maps
56 |             if save.depth.png:
57 |                 write_depth('{}/{}_depth.png'.format(save_path, filename[i]),
58 |                             depth=inv2depth(pred_inv_depth[i]))
59 |             # Save rgb images
60 |             if save.depth.rgb:
61 |                 rgb_i = rgb[i].permute(1, 2, 0).detach().cpu().numpy() * 255
62 |                 write_image('{}/{}_rgb.png'.format(save_path, filename[i]), rgb_i)
63 |             # Save inverse depth visualizations
64 |             if save.depth.viz:
65 |                 viz_i = viz_inv_depth(pred_inv_depth[i]) * 255
66 |                 write_image('{}/{}_viz.png'.format(save_path, filename[i]), viz_i)
67 | 


--------------------------------------------------------------------------------
/packnet_sfm/utils/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import yacs
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | ########################################################################################################################
 8 | 
 9 | def is_numpy(data):
10 |     """Checks if data is a numpy array."""
11 |     return isinstance(data, np.ndarray)
12 | 
13 | def is_tensor(data):
14 |     """Checks if data is a torch tensor."""
15 |     return type(data) == torch.Tensor
16 | 
17 | def is_tuple(data):
18 |     """Checks if data is a tuple."""
19 |     return isinstance(data, tuple)
20 | 
21 | def is_list(data):
22 |     """Checks if data is a list."""
23 |     return isinstance(data, list)
24 | 
25 | def is_dict(data):
26 |     """Checks if data is a dictionary."""
27 |     return isinstance(data, dict)
28 | 
29 | def is_str(data):
30 |     """Checks if data is a string."""
31 |     return isinstance(data, str)
32 | 
33 | def is_int(data):
34 |     """Checks if data is an integer."""
35 |     return isinstance(data, int)
36 | 
37 | def is_seq(data):
38 |     """Checks if data is a list or tuple."""
39 |     return is_tuple(data) or is_list(data)
40 | 
41 | def is_cfg(data):
42 |     """Checks if data is a configuration node"""
43 |     return type(data) == yacs.config.CfgNode
44 | 
45 | ########################################################################################################################


--------------------------------------------------------------------------------
/scripts/eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import argparse
 4 | import torch
 5 | 
 6 | from packnet_sfm.models.model_wrapper import ModelWrapper
 7 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer
 8 | from packnet_sfm.utils.config import parse_test_file
 9 | from packnet_sfm.utils.load import set_debug
10 | from packnet_sfm.utils.horovod import hvd_init
11 | 
12 | 
13 | def parse_args():
14 |     """Parse arguments for training script"""
15 |     parser = argparse.ArgumentParser(description='PackNet-SfM evaluation script')
16 |     parser.add_argument('--checkpoint', type=str, help='Checkpoint (.ckpt)')
17 |     parser.add_argument('--config', type=str, default=None, help='Configuration (.yaml)')
18 |     parser.add_argument('--half', action="store_true", help='Use half precision (fp16)')
19 |     args = parser.parse_args()
20 |     assert args.checkpoint.endswith('.ckpt'), \
21 |         'You need to provide a .ckpt file as checkpoint'
22 |     assert args.config is None or args.config.endswith('.yaml'), \
23 |         'You need to provide a .yaml file as configuration'
24 |     return args
25 | 
26 | 
27 | def test(ckpt_file, cfg_file, half):
28 |     """
29 |     Monocular depth estimation test script.
30 | 
31 |     Parameters
32 |     ----------
33 |     ckpt_file : str
34 |         Checkpoint path for a pretrained model
35 |     cfg_file : str
36 |         Configuration file
37 |     half: bool
38 |         use half precision (fp16)
39 |     """
40 |     # Initialize horovod
41 |     hvd_init()
42 | 
43 |     # Parse arguments
44 |     config, state_dict = parse_test_file(ckpt_file, cfg_file)
45 | 
46 |     # Set debug if requested
47 |     set_debug(config.debug)
48 | 
49 |     # Initialize monodepth model from checkpoint arguments
50 |     model_wrapper = ModelWrapper(config)
51 |     # Restore model state
52 |     model_wrapper.load_state_dict(state_dict)
53 | 
54 |     # change to half precision for evaluation if requested
55 |     config.arch["dtype"] = torch.float16 if half else None
56 | 
57 |     # Create trainer with args.arch parameters
58 |     trainer = HorovodTrainer(**config.arch)
59 | 
60 |     # Test model
61 |     trainer.test(model_wrapper)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     args = parse_args()
66 |     test(args.checkpoint, args.config, args.half)
67 | 


--------------------------------------------------------------------------------
/scripts/evaluate_depth_maps.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import numpy as np
 4 | import os
 5 | import torch
 6 | 
 7 | from glob import glob
 8 | from argparse import Namespace
 9 | from packnet_sfm.utils.depth import load_depth
10 | from tqdm import tqdm
11 | 
12 | from packnet_sfm.utils.depth import load_depth, compute_depth_metrics
13 | 
14 | 
15 | def parse_args():
16 |     """Parse arguments for benchmark script"""
17 |     parser = argparse.ArgumentParser(description='PackNet-SfM benchmark script')
18 |     parser.add_argument('--pred_folder', type=str,
19 |                         help='Folder containing predicted depth maps (.npz with key "depth")')
20 |     parser.add_argument('--gt_folder', type=str,
21 |                         help='Folder containing ground-truth depth maps (.npz with key "depth")')
22 |     parser.add_argument('--use_gt_scale', action='store_true',
23 |                         help='Use ground-truth median scaling on predicted depth maps')
24 |     parser.add_argument('--min_depth', type=float, default=0.,
25 |                         help='Minimum distance to consider during evaluation')
26 |     parser.add_argument('--max_depth', type=float, default=80.,
27 |                         help='Maximum distance to consider during evaluation')
28 |     parser.add_argument('--crop', type=str, default='', choices=['', 'garg'],
29 |                         help='Which crop to use during evaluation')
30 |     args = parser.parse_args()
31 |     return args
32 | 
33 | 
34 | def main(args):
35 |     # Get and sort ground-truth and predicted files
36 |     exts = ('npz', 'png')
37 |     gt_files, pred_files = [], []
38 |     for ext in exts:
39 |         gt_files.extend(glob(os.path.join(args.gt_folder, '*.{}'.format(ext))))
40 |         pred_files.extend(glob(os.path.join(args.pred_folder, '*.{}'.format(ext))))
41 |     # Sort ground-truth and prediction
42 |     gt_files.sort()
43 |     pred_files.sort()
44 |     # Loop over all files
45 |     metrics = []
46 |     progress_bar = tqdm(zip(gt_files, pred_files), total=len(gt_files))
47 |     for gt, pred in progress_bar:
48 |         # Get and prepare ground-truth and predictions
49 |         gt = torch.tensor(load_depth(gt)).unsqueeze(0).unsqueeze(0)
50 |         pred = torch.tensor(load_depth(pred)).unsqueeze(0).unsqueeze(0)
51 |         # Calculate metrics
52 |         metrics.append(compute_depth_metrics(
53 |             args, gt, pred, use_gt_scale=args.use_gt_scale))
54 |     # Get and print average value
55 |     metrics = (sum(metrics) / len(metrics)).detach().cpu().numpy()
56 |     names = ['abs_rel', 'sqr_rel', 'rmse', 'rmse_log', 'a1', 'a2', 'a3']
57 |     for name, metric in zip(names, metrics):
58 |         print('{} = {}'.format(name, metric))
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     args = parse_args()
63 |     main(args)
64 | 


--------------------------------------------------------------------------------
/scripts/infer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import os
  6 | import torch
  7 | 
  8 | from glob import glob
  9 | from cv2 import imwrite
 10 | 
 11 | from packnet_sfm.models.model_wrapper import ModelWrapper
 12 | from packnet_sfm.datasets.augmentations import resize_image, to_tensor
 13 | from packnet_sfm.utils.horovod import hvd_init, rank, world_size, print0
 14 | from packnet_sfm.utils.image import load_image
 15 | from packnet_sfm.utils.config import parse_test_file
 16 | from packnet_sfm.utils.load import set_debug
 17 | from packnet_sfm.utils.depth import write_depth, inv2depth, viz_inv_depth
 18 | from packnet_sfm.utils.logging import pcolor
 19 | 
 20 | 
 21 | def is_image(file, ext=('.png', '.jpg',)):
 22 |     """Check if a file is an image with certain extensions"""
 23 |     return file.endswith(ext)
 24 | 
 25 | 
 26 | def parse_args():
 27 |     parser = argparse.ArgumentParser(description='PackNet-SfM inference of depth maps from images')
 28 |     parser.add_argument('--checkpoint', type=str, help='Checkpoint (.ckpt)')
 29 |     parser.add_argument('--input', type=str, help='Input file or folder')
 30 |     parser.add_argument('--output', type=str, help='Output file or folder')
 31 |     parser.add_argument('--image_shape', type=int, nargs='+', default=None,
 32 |                         help='Input and output image shape '
 33 |                              '(default: checkpoint\'s config.datasets.augmentation.image_shape)')
 34 |     parser.add_argument('--half', action="store_true", help='Use half precision (fp16)')
 35 |     parser.add_argument('--save', type=str, choices=['npz', 'png'], default=None,
 36 |                         help='Save format (npz or png). Default is None (no depth map is saved).')
 37 |     args = parser.parse_args()
 38 |     assert args.checkpoint.endswith('.ckpt'), \
 39 |         'You need to provide a .ckpt file as checkpoint'
 40 |     assert args.image_shape is None or len(args.image_shape) == 2, \
 41 |         'You need to provide a 2-dimensional tuple as shape (H,W)'
 42 |     assert (is_image(args.input) and is_image(args.output)) or \
 43 |            (not is_image(args.input) and not is_image(args.input)), \
 44 |         'Input and output must both be images or folders'
 45 |     return args
 46 | 
 47 | 
 48 | @torch.no_grad()
 49 | def infer_and_save_depth(input_file, output_file, model_wrapper, image_shape, half, save):
 50 |     """
 51 |     Process a single input file to produce and save visualization
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     input_file : str
 56 |         Image file
 57 |     output_file : str
 58 |         Output file, or folder where the output will be saved
 59 |     model_wrapper : nn.Module
 60 |         Model wrapper used for inference
 61 |     image_shape : Image shape
 62 |         Input image shape
 63 |     half: bool
 64 |         use half precision (fp16)
 65 |     save: str
 66 |         Save format (npz or png)
 67 |     """
 68 |     if not is_image(output_file):
 69 |         # If not an image, assume it's a folder and append the input name
 70 |         os.makedirs(output_file, exist_ok=True)
 71 |         output_file = os.path.join(output_file, os.path.basename(input_file))
 72 | 
 73 |     # change to half precision for evaluation if requested
 74 |     dtype = torch.float16 if half else None
 75 | 
 76 |     # Load image
 77 |     image = load_image(input_file)
 78 |     # Resize and to tensor
 79 |     image = resize_image(image, image_shape)
 80 |     image = to_tensor(image).unsqueeze(0)
 81 | 
 82 |     # Send image to GPU if available
 83 |     if torch.cuda.is_available():
 84 |         image = image.to('cuda:{}'.format(rank()), dtype=dtype)
 85 | 
 86 |     # Depth inference (returns predicted inverse depth)
 87 |     pred_inv_depth = model_wrapper.depth(image)['inv_depths'][0]
 88 | 
 89 |     if save == 'npz' or save == 'png':
 90 |         # Get depth from predicted depth map and save to different formats
 91 |         filename = '{}.{}'.format(os.path.splitext(output_file)[0], save)
 92 |         print('Saving {} to {}'.format(
 93 |             pcolor(input_file, 'cyan', attrs=['bold']),
 94 |             pcolor(filename, 'magenta', attrs=['bold'])))
 95 |         write_depth(filename, depth=inv2depth(pred_inv_depth))
 96 |     else:
 97 |         # Prepare RGB image
 98 |         rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255
 99 |         # Prepare inverse depth
100 |         viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255
101 |         # Concatenate both vertically
102 |         image = np.concatenate([rgb, viz_pred_inv_depth], 0)
103 |         # Save visualization
104 |         print('Saving {} to {}'.format(
105 |             pcolor(input_file, 'cyan', attrs=['bold']),
106 |             pcolor(output_file, 'magenta', attrs=['bold'])))
107 |         imwrite(output_file, image[:, :, ::-1])
108 | 
109 | 
110 | def main(args):
111 | 
112 |     # Initialize horovod
113 |     hvd_init()
114 | 
115 |     # Parse arguments
116 |     config, state_dict = parse_test_file(args.checkpoint)
117 | 
118 |     # If no image shape is provided, use the checkpoint one
119 |     image_shape = args.image_shape
120 |     if image_shape is None:
121 |         image_shape = config.datasets.augmentation.image_shape
122 | 
123 |     # Set debug if requested
124 |     set_debug(config.debug)
125 | 
126 |     # Initialize model wrapper from checkpoint arguments
127 |     model_wrapper = ModelWrapper(config, load_datasets=False)
128 |     # Restore monodepth_model state
129 |     model_wrapper.load_state_dict(state_dict)
130 | 
131 |     # change to half precision for evaluation if requested
132 |     dtype = torch.float16 if args.half else None
133 | 
134 |     # Send model to GPU if available
135 |     if torch.cuda.is_available():
136 |         model_wrapper = model_wrapper.to('cuda:{}'.format(rank()), dtype=dtype)
137 | 
138 |     # Set to eval mode
139 |     model_wrapper.eval()
140 | 
141 |     if os.path.isdir(args.input):
142 |         # If input file is a folder, search for image files
143 |         files = []
144 |         for ext in ['png', 'jpg']:
145 |             files.extend(glob((os.path.join(args.input, '*.{}'.format(ext)))))
146 |         files.sort()
147 |         print0('Found {} files'.format(len(files)))
148 |     else:
149 |         # Otherwise, use it as is
150 |         files = [args.input]
151 | 
152 |     # Process each file
153 |     for fn in files[rank()::world_size()]:
154 |         infer_and_save_depth(
155 |             fn, args.output, model_wrapper, image_shape, args.half, args.save)
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     args = parse_args()
160 |     main(args)
161 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import argparse
 4 | 
 5 | from packnet_sfm.models.model_wrapper import ModelWrapper
 6 | from packnet_sfm.models.model_checkpoint import ModelCheckpoint
 7 | from packnet_sfm.trainers.horovod_trainer import HorovodTrainer
 8 | from packnet_sfm.utils.config import parse_train_file
 9 | from packnet_sfm.utils.load import set_debug, filter_args_create
10 | from packnet_sfm.utils.horovod import hvd_init, rank
11 | from packnet_sfm.loggers import WandbLogger
12 | 
13 | 
14 | def parse_args():
15 |     """Parse arguments for training script"""
16 |     parser = argparse.ArgumentParser(description='PackNet-SfM training script')
17 |     parser.add_argument('file', type=str, help='Input file (.ckpt or .yaml)')
18 |     args = parser.parse_args()
19 |     assert args.file.endswith(('.ckpt', '.yaml')), \
20 |         'You need to provide a .ckpt of .yaml file'
21 |     return args
22 | 
23 | 
24 | def train(file):
25 |     """
26 |     Monocular depth estimation training script.
27 | 
28 |     Parameters
29 |     ----------
30 |     file : str
31 |         Filepath, can be either a
32 |         **.yaml** for a yacs configuration file or a
33 |         **.ckpt** for a pre-trained checkpoint file.
34 |     """
35 |     # Initialize horovod
36 |     hvd_init()
37 | 
38 |     # Produce configuration and checkpoint from filename
39 |     config, ckpt = parse_train_file(file)
40 | 
41 |     # Set debug if requested
42 |     set_debug(config.debug)
43 | 
44 |     # Wandb Logger
45 |     logger = None if config.wandb.dry_run or rank() > 0 \
46 |         else filter_args_create(WandbLogger, config.wandb)
47 | 
48 |     # model checkpoint
49 |     checkpoint = None if config.checkpoint.filepath is '' or rank() > 0 else \
50 |         filter_args_create(ModelCheckpoint, config.checkpoint)
51 | 
52 |     # Initialize model wrapper
53 |     model_wrapper = ModelWrapper(config, resume=ckpt, logger=logger)
54 | 
55 |     # Create trainer with args.arch parameters
56 |     trainer = HorovodTrainer(**config.arch, checkpoint=checkpoint)
57 | 
58 |     # Train model
59 |     trainer.fit(model_wrapper)
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     args = parse_args()
64 |     train(args.file)
65 | 


--------------------------------------------------------------------------------