├── .gitignore
├── .gitkeep
├── .isort.cfg
├── .pylintrc
├── .style.yapf
├── LICENSE.md
├── Makefile
├── README.md
├── configs
    ├── common
    │   ├── augmentation.yaml
    │   ├── optimizer.yaml
    │   ├── test.yaml
    │   ├── test_dataloader.yaml
    │   └── train_dataloader.yaml
    ├── defaults.yaml
    ├── evaluators
    │   ├── kitti_3d.yaml
    │   └── nuscenes.yaml
    ├── experiments
    │   ├── dd3d_kitti_dla34.yaml
    │   ├── dd3d_kitti_dla34_overfit.yaml
    │   ├── dd3d_kitti_omninets.yaml
    │   ├── dd3d_kitti_regnety_006_bifpn.yaml
    │   ├── dd3d_kitti_v99.yaml
    │   ├── dd3d_nusc_dla34.yaml
    │   └── dd3d_nusc_v99.yaml
    ├── feature_extractors
    │   ├── d2_fpn.yaml
    │   ├── dla34_fpn.yaml
    │   ├── omninet_big.yaml
    │   ├── omninet_small.yaml
    │   └── v2_99_fpn.yaml
    ├── meta_arch
    │   └── dd3d.yaml
    ├── models
    │   ├── dd3d.yaml
    │   └── depth_head.yaml
    ├── test_datasets
    │   ├── base_test_dataset.yaml
    │   ├── kitti_3d.yaml
    │   └── nuscenes.yaml
    ├── train_datasets
    │   ├── base_train_dataset.yaml
    │   ├── kitti_3d.yaml
    │   └── nuscenes.yaml
    ├── visualize_dataloader.yaml
    └── visualizers
    │   ├── base_visualizer.yaml
    │   ├── box3d.yaml
    │   ├── common.yaml
    │   └── d2.yaml
├── docker
    ├── Dockerfile
    └── Dockerfile-cu111
├── media
    └── figs
    │   ├── demo_dd3d_kitti_val_short.gif
    │   └── tri-logo.png
├── scripts
    ├── train.py
    └── visualize_dataloader.py
└── tridet
    ├── __init__.py
    ├── data
        ├── __init__.py
        ├── augmentations
        │   ├── __init__.py
        │   ├── build.py
        │   ├── color_transform.py
        │   ├── crop_transform.py
        │   ├── flip_transform.py
        │   └── resize_transform.py
        ├── build.py
        ├── dataset_mappers
        │   ├── __init__.py
        │   ├── dataset_mapper.py
        │   └── nuscenes_mapper.py
        ├── datasets
        │   ├── __init__.py
        │   ├── kitti_3d
        │   │   ├── __init__.py
        │   │   └── build.py
        │   └── nuscenes
        │   │   ├── __init__.py
        │   │   └── build.py
        ├── samplers
        │   ├── __init__.py
        │   └── group_sampler.py
        └── transform_utils.py
    ├── evaluators
        ├── __init__.py
        ├── kitti_3d_evaluator.py
        ├── nuscenes_evaluator.py
        └── rotate_iou.py
    ├── layers
        ├── __init__.py
        ├── bev_nms.py
        ├── conv_bn_fpn_layers.py
        ├── iou_loss.py
        ├── normalization.py
        ├── separable_conv2d.py
        └── smooth_l1_loss.py
    ├── modeling
        ├── __init__.py
        ├── backbone
        │   └── omni_scripts
        │   │   ├── __init__.py
        │   │   ├── act.py
        │   │   ├── backbone_with_fpn.py
        │   │   ├── fpn.py
        │   │   ├── fused_mb_nets.py
        │   │   ├── norm.py
        │   │   ├── omninet_w1.0.py
        │   │   ├── omninet_w1.3.py
        │   │   ├── ops.py
        │   │   └── utils.py
        ├── dd3d
        │   ├── __init__.py
        │   ├── core.py
        │   ├── dense_depth.py
        │   ├── dense_depth_loss.py
        │   ├── depth.py
        │   ├── disentangled_box3d_loss.py
        │   ├── fcos2d.py
        │   ├── fcos3d.py
        │   ├── nuscenes_dd3d.py
        │   ├── nuscenes_dd3d_tta.py
        │   ├── postprocessing.py
        │   ├── prepare_targets.py
        │   ├── test_time_augmentation.py
        │   └── utils.py
        └── feature_extractor
        │   ├── __init__.py
        │   ├── dla.py
        │   └── vovnet.py
    ├── structures
        ├── __init__.py
        ├── boxes3d.py
        ├── image_list.py
        └── pose.py
    ├── utils
        ├── coco.py
        ├── comm.py
        ├── events.py
        ├── geometry.py
        ├── hydra
        │   └── callbacks.py
        ├── s3.py
        ├── setup.py
        ├── tasks.py
        ├── tensor2d.py
        ├── train.py
        ├── visualization.py
        └── wandb.py
    └── visualizers
        ├── __init__.py
        ├── bev.py
        ├── box3d_visualizer.py
        └── d2_visualizer.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | outputs/
 3 | wandb/
 4 | 
 5 | # cluster hostfiles
 6 | hostfiles/
 7 | 
 8 | # Raw files
 9 | *.jpg
10 | *.png
11 | *.txt
12 | 
13 | # compilation and distribution
14 | __pycache__
15 | _ext
16 | *.pyc
17 | *.so
18 | build/
19 | dist/
20 | 
21 | # pytorch/python/numpy formats
22 | *.pth
23 | *.pkl
24 | *.npy
25 | 
26 | # ipython/jupyter notebooks
27 | *.ipynb
28 | **/.ipynb_checkpoints/
29 | 
30 | # Editor temporaries
31 | *.swn
32 | *.swo
33 | *.swp
34 | *~
35 | 


--------------------------------------------------------------------------------
/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/dd3d/56fad8ec9eb7fbd373953f49f9372120b4cd740c/.gitkeep


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output=6
3 | line_length=120
4 | 
5 | sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER,myself
6 | known_third_party=torch, pandas, numpy, matplotlib, cv2, mpi4py, tqdm, pyquaternion, click, scipy, hydra, fvcore, seaborn, pycocotools, diskcache, xarray, pytorch3d, nuscenes, pyquaternion, iopath, wandb
7 | 
8 | known_myself=tridet
9 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | # Copyright 2016-2018 Toyota Research Institute.  All rights reserved.
 2 | # pylintrc config file based on driving/src/utils/pylintrc
 3 | [MASTER]
 4 | accept-no-param-doc=no
 5 | accept-no-return-doc=yes
 6 | accept-no-yields-doc=yes
 7 | 
 8 | [REPORTS]
 9 | reports=no
10 | # Make errors emacs-compatible
11 | msg-template='{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}'
12 | 
13 | [TYPECHECK]
14 | ignored-classes=
15 |     PurePath
16 | 
17 | [MESSAGES CONTROL]
18 | disable=
19 |    # We do not want lazy logging, as error during lazy logging are caught and
20 |    # ignored. Not a good recipe for reliable logging.
21 |    logging-not-lazy,
22 |    # also allow .format in logging calls
23 |    logging-format-interpolation,
24 |    # Do not enforce "refactor" rules
25 |    R,
26 |    C, duplicate-code,
27 |    # Temporary disable complexity checks.
28 |    too-many-instance-attributes, too-many-branches, too-many-statements,
29 |    too-many-arguments, too-many-locals,
30 |    # Do not complain on fixme/TODO's
31 |    fixme,
32 |    # Do not complain if we locally disabled a rule
33 |    locally-disabled,
34 |    # We do not care if we have too few public methods.
35 |    too-few-public-methods,
36 |    # We do not care if overridden methods use different arguments.
37 |    arguments-differ,
38 |    # Since this doesn't check control-flow, it has lots of false positives.
39 |    invalid-unary-operand-type,
40 |    not-callable,
41 |    no-member,
42 |    protected-access,
43 |    attribute-defined-outside-init,
44 |    global-statement,
45 |    W0123,
46 |    W1401,
47 |    # allow multiple arguments for string formatting
48 |    E1305,
49 |    # bypass import error in setup.py
50 |    E0401,
51 |    E0611,
52 |    # suppress Lambda warnings
53 |    W0108,
54 |    # allow un-implemented abstract method in sub-classes
55 |    W0223,
56 |    # allow explicit return in __init__
57 |    E0101,
58 |    # (dennis.park) Use f-strings in logging.
59 |    W1203
60 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
 1 | [style]
 2 | based_on_style = pep8
 3 | indent_width = 4
 4 | column_limit = 120
 5 | arithmetic_precedence_indication = false
 6 | spaces_before_comment = 2
 7 | split_complex_comprehension = true
 8 | split_penalty_comprehension = 2100
 9 | blank_line_before_nested_class_or_def = false
10 | align_closing_bracket_with_visual_indent = true
11 | dedent_closing_brackets = true
12 | coalesce_brackets = true
13 | join_multiple_lines = false


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Toyota Research Institute (TRI)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PROJECT = dd3d
 2 | WORKSPACE = /workspace/$(PROJECT)
 3 | DOCKER_IMAGE = $(PROJECT):latest
 4 | DOCKERFILE ?= Dockerfile
 5 | 
 6 | DOCKER_OPTS = \
 7 | 	-it \
 8 | 	--rm \
 9 | 	-e DISPLAY=${DISPLAY} \
10 | 	-v /data:/data \
11 | 	-v /tmp:/tmp \
12 | 	-v /tmp/.X11-unix:/tmp/.X11-unix \
13 | 	-v /mnt/fsx:/mnt/fsx \
14 | 	-v /root/.ssh:/root/.ssh \
15 | 	-v ~/.aws:/root/.aws \
16 | 	--shm-size=1G \
17 | 	--ipc=host \
18 | 	--network=host \
19 | 	--privileged
20 | 
21 | DOCKER_BUILD_ARGS = \
22 | 	--build-arg WORKSPACE=$(WORKSPACE) \
23 | 	--build-arg AWS_ACCESS_KEY_ID \
24 | 	--build-arg AWS_SECRET_ACCESS_KEY \
25 | 	--build-arg AWS_DEFAULT_REGION \
26 | 	--build-arg WANDB_ENTITY \
27 | 	--build-arg WANDB_API_KEY \
28 | 
29 | NGPUS ?= $(shell nvidia-smi -L | wc -l)
30 | MASTER_ADDR ?= 127.0.0.1
31 | MPI_HOSTS ?= localhost:${NGPUS}
32 | MPI_CMD=mpirun \
33 | 		-x LD_LIBRARY_PATH \
34 | 		-x PYTHONPATH \
35 | 		-x MASTER_ADDR=${MASTER_ADDR} \
36 | 		-x NCCL_LL_THRESHOLD=0 \
37 | 		-x AWS_ACCESS_KEY_ID \
38 | 		-x AWS_SECRET_ACCESS_KEY \
39 | 		-x WANDB_ENTITY \
40 | 		-x WANDB_API_KEY \
41 | 		-np ${NGPUS} \
42 | 		-H ${MPI_HOSTS} \
43 | 		-x NCCL_SOCKET_IFNAME=^docker0,lo \
44 | 		--mca btl_tcp_if_exclude docker0,lo \
45 | 		-mca plm_rsh_args 'p 12345' \
46 | 		--allow-run-as-root
47 | 
48 | docker-build:
49 | 	docker build \
50 | 	$(DOCKER_BUILD_ARGS) \
51 | 	-f ./docker/$(DOCKERFILE) \
52 | 	-t $(DOCKER_IMAGE) .
53 | 
54 | docker-dev:
55 | 	nvidia-docker run --name $(PROJECT) \
56 | 	$(DOCKER_OPTS) \
57 | 	-v $(PWD):$(WORKSPACE) \
58 | 	$(DOCKER_IMAGE) bash
59 | 
60 | dist-run:
61 | 	nvidia-docker run --name $(PROJECT) --rm \
62 | 		-e DISPLAY=${DISPLAY} \
63 | 		-v ~/.torch:/root/.torch \
64 | 		${DOCKER_OPTS} \
65 | 		-v $(PWD):$(WORKSPACE) \
66 | 		${DOCKER_IMAGE} \
67 | 		${COMMAND}
68 | 
69 | docker-run: docker-build
70 | 	nvidia-docker run --name $(PROJECT) --rm \
71 | 		${DOCKER_OPTS} \
72 | 		${DOCKER_IMAGE} \
73 | 		${COMMAND}
74 | 
75 | docker-run-mpi: docker-build
76 | 	nvidia-docker run ${DOCKER_OPTS} -v $(PWD)/outputs:$(WORKSPACE)/outputs ${DOCKER_IMAGE} \
77 | 		bash -c "${MPI_CMD} ${COMMAND}"
78 | 
79 | clean:
80 | 	find . -name '"*.pyc' | xargs sudo rm -f && \
81 | 	find . -name '__pycache__' | xargs sudo rm -rf
82 | 


--------------------------------------------------------------------------------
/configs/common/augmentation.yaml:
--------------------------------------------------------------------------------
 1 | # If `True`, then selectively enable data augmentation.
 2 | # If `False`, then disable the entire data augmentation.
 3 | AUG_ENABLED: True
 4 | 
 5 | # 1) Resize
 6 | RESIZE:
 7 |   ENABLED: True
 8 |   # Size of the smallest side of the image during training
 9 |   MIN_SIZE_TRAIN: ???
10 |   # Sample size of smallest side by choice or random selection from range give by
11 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
12 |   # Maximum size of the side of the image during training
13 |   MAX_SIZE_TRAIN: ???
14 |   # Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
15 |   MIN_SIZE_TEST: ???
16 |   # Maximum size of the side of the image during testing
17 |   MAX_SIZE_TEST: ???
18 | 
19 | # 2) Crop
20 | CROP:
21 |   # `True` if cropping is used for data augmentation during training
22 |   ENABLED: False
23 |   # Cropping type:
24 |   # - "relative" crop (H * CROP.SIZE[0], W * CROP.SIZE[1]) part of an input of size (H, W)
25 |   # - "relative_range" uniformly sample relative crop size from between [CROP.SIZE[0], [CROP.SIZE[1]].
26 |   #   and  [1, 1] and use it as in "relative" scenario.
27 |   # - "absolute" crop part of an input with absolute size: (CROP.SIZE[0], CROP.SIZE[1]).
28 |   # - "absolute_range", for an input of size (H, W), uniformly sample H_crop in
29 |   #   [CROP.SIZE[0], min(H, CROP.SIZE[1])] and W_crop in [CROP.SIZE[0], min(W, CROP.SIZE[1])]
30 |   TYPE: "relative_range"
31 |   # Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of
32 |   # pixels if CROP.TYPE is "absolute"
33 |   SIZE: [0.9, 0.9]
34 | 
35 | # 3) Flip.
36 | RANDOM_FLIP:
37 |   # NOTE: Unlike d2, RandomFlip is configurable
38 |   ENABLED:  True
39 |   HORIZONTAL: True
40 |   VERTICAL: False
41 | 
42 | # 4) Color jittering
43 | COLOR_JITTER:
44 |   ENABLED:  True
45 |   BRIGHTNESS: [0.2, 0.2]
46 |   SATURATION: [0.2, 0.2]
47 |   CONTRAST: [0.2, 0.2]
48 | 


--------------------------------------------------------------------------------
/configs/common/optimizer.yaml:
--------------------------------------------------------------------------------
 1 | # Number of images per batch across all machines.
 2 | # If we have 16 GPUs and IMS_PER_BATCH = 32,
 3 | # each GPU will see 2 images per batch.
 4 | # May be adjusted automatically if REFERENCE_WORLD_SIZE is set.
 5 | IMS_PER_BATCH: 16
 6 | 
 7 | # Update scheme of torch.optim.SGD:
 8 | #   https://github.com/pytorch/pytorch/blob/master/torch/optim/sgd.py#L34
 9 | BASE_LR: 0.001
10 | MOMENTUM: 0.9
11 | 
12 | NESTEROV: False
13 | 
14 | WEIGHT_DECAY: 0.0001
15 | # The weight decay that's applied to parameters of normalization layers
16 | # (typically the affine transformation)
17 | WEIGHT_DECAY_NORM: 0.0
18 | 
19 | # Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for
20 | # biases. This is not useful (at least for recent models). You should avoid
21 | # changing these and they exist only to reproduce Detectron v1 training if
22 | # desired.
23 | BIAS_LR_FACTOR: 1.0
24 | WEIGHT_DECAY_BIAS: ${.WEIGHT_DECAY}
25 | 
26 | GAMMA: 0.1
27 | 
28 | # See detectron2/solver/build.py for LR scheduler options
29 | LR_SCHEDULER_NAME: WarmupMultiStepLR
30 | # The iteration number to decrease learning rate by GAMMA.
31 | STEPS: [30000]
32 | 
33 | WARMUP_FACTOR: 0.0001
34 | WARMUP_ITERS: 2000
35 | WARMUP_METHOD: "linear"
36 | 
37 | # Gradient clipping
38 | CLIP_GRADIENTS:
39 |   ENABLED: False
40 |   # Type of gradient clipping, currently 2 values are supported:
41 |   # - "value": the absolute values of elements of each gradients are clipped
42 |   # - "norm": the norm of the gradient for each parameter is clipped thus
43 |   #   affecting all elements in the parameter
44 |   CLIP_TYPE: "value"
45 |   # Maximum absolute value used for clipping gradients
46 |   CLIP_VALUE: 1.0
47 |   # Floating point number p for L-p norm to be used with the "norm"
48 |   # gradient clipping type; for L-inf, please specify .inf
49 |   NORM_TYPE: 2.0
50 | 
51 | # Save a checkpoint after every this number of iterations
52 | CHECKPOINT_PERIOD: 5000
53 | 
54 | # Support mixed precision training.
55 | MIXED_PRECISION_ENABLED: False
56 | 
57 | # If any parameters might not be used in forward pass, turn on this to avoid error in DDP.
58 | # See "Internal Design" -> "Forward Pass": https://pytorch.org/docs/stable/notes/ddp.html
59 | DDP_FIND_UNUSED_PARAMETERS: False
60 | 
61 | # Run multiple batches of size IMS_PER_BATCH before doing a backward pass.
62 | # The effective batch size: IMS_PER_BATCH x ACCUMULATE_GRAD_BATCHES
63 | ACCUMULATE_GRAD_BATCHES: 1
64 | 
65 | # If True, then SyncBN use only workers of the same machine to compute batch stats used in batchnorm.
66 | # If False, then SyncBN uses all workers across all machines.
67 | SYNCBN_USE_LOCAL_WORKERS: False
68 | 


--------------------------------------------------------------------------------
/configs/common/test.yaml:
--------------------------------------------------------------------------------
 1 | ENABLED: True
 2 | 
 3 | # The period (in terms of steps) to evaluate the model during training.
 4 | EVAL_PERIOD: 1000
 5 | EVAL_ON_START: False
 6 | ADDITIONAL_EVAL_STEPS: []
 7 | 
 8 | # (dennis.park) detectron2 hardcodes # ims per gpu to 1.
 9 | IMS_PER_BATCH: 16
10 | 


--------------------------------------------------------------------------------
/configs/common/test_dataloader.yaml:
--------------------------------------------------------------------------------
1 | # Number of data loading threads
2 | NUM_WORKERS: 4
3 | 
4 | # (dennis.park) Options: InferenceSampler, InferenceGroupSampler
5 | # If using `InferenceGroupSampler`, the user must specify `NUM_IMAGES_PER_GROUP` somewhere else.
6 | SAMPLER: "InferenceSampler"
7 | 


--------------------------------------------------------------------------------
/configs/common/train_dataloader.yaml:
--------------------------------------------------------------------------------
 1 | # Number of data loading threads
 2 | NUM_WORKERS: 4
 3 | 
 4 | FILTER_EMPTY_ANNOTATIONS: True
 5 | 
 6 | # Options: TrainingSampler, RepeatFactorTrainingSampler
 7 | SAMPLER: TrainingSampler
 8 | # Repeat threshold for RepeatFactorTrainingSampler
 9 | REPEAT_THRESHOLD: 0.4
10 | 
11 | # If True, each batch should contain only images for which the aspect ratio
12 | # is compatible. This groups portrait images together, and landscape images
13 | # are not batched with portrait images.
14 | # NOTE (dennis.park): This is set to True in detectron2.
15 | ASPECT_RATIO_GROUPING: False
16 | 


--------------------------------------------------------------------------------
/configs/defaults.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - train_datasets@DATASETS.TRAIN:
 4 |   - test_datasets@DATASETS.TEST:
 5 |   - feature_extractors@FE:
 6 |   - meta_arch@:
 7 |   - common/train_dataloader@DATALOADER.TRAIN
 8 |   - common/test_dataloader@DATALOADER.TEST
 9 |   - common/augmentation@INPUT
10 |   - common/optimizer@SOLVER
11 |   - common/test@TEST
12 | 
13 | WANDB:
14 |   ENABLED: False
15 |   # If True, then it will not upload to the W&B server.
16 |   DRYRUN: False
17 |   PROJECT: dd3d
18 |   GROUP:
19 |   TAGS: []
20 | 
21 | EVAL_ONLY: False
22 | EVAL_ON_START: False
23 | 
24 | ONLY_REGISTER_DATASETS: False
25 | 
26 | OUTPUT_ROOT: './outputs'
27 | 
28 | SYNC_OUTPUT_DIR_S3:
29 |   ENABLED: False
30 |   # The root path in S3 to cache working directories. Must start with 's3://'
31 |   ROOT_IN_S3: ???
32 |   # How frequently (in training steps) to sync the working directory.
33 |   PERIOD: 1000
34 | 
35 | DATASET_ROOT: /data/datasets/
36 | TMP_DIR: /tmp/
37 | 
38 | hydra:
39 |   callbacks:
40 |     distributed_callback:
41 |       _target_: tridet.utils.hydra.callbacks.SetupDistributedCallback
42 |     wandb_callback:
43 |       _target_: tridet.utils.hydra.callbacks.WandbInitCallback
44 |     output_dir_callback:
45 |       _target_: tridet.utils.hydra.callbacks.SyncOutputDirCallback
46 |     d2_logger_callback:
47 |       _target_: tridet.utils.hydra.callbacks.D2LoggerCallback
48 |     ckpt_path_callback:
49 |       _target_: tridet.utils.hydra.callbacks.CkptPathResolverCallback
50 |     sync_output_s3_end_callback:
51 |       _target_: tridet.utils.hydra.callbacks.SyncOutputS3BeforeEnd
52 |   verbose: False
53 | 


--------------------------------------------------------------------------------
/configs/evaluators/kitti_3d.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------
2 | # KITTI3D evaluator (3D bounding box)
3 | # -----------------------------------
4 | IOU_THRESHOLDS: [0.5, 0.7]
5 | ONLY_PREPARE_SUBMISSION: False
6 | 


--------------------------------------------------------------------------------
/configs/evaluators/nuscenes.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/dd3d/56fad8ec9eb7fbd373953f49f9372120b4cd740c/configs/evaluators/nuscenes.yaml


--------------------------------------------------------------------------------
/configs/experiments/dd3d_kitti_dla34.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/kitti_3d@EVALUATORS.KITTI3D
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: kitti_3d
 6 |   - override /test_datasets@DATASETS.TEST: kitti_3d
 7 |   - override /feature_extractors@FE: dla34_fpn
 8 | 
 9 | MODEL:
10 |   # from-coco, IODA-pretrained.
11 |   CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_dla34-y1urdmir-20210422_165446-model_final-remapped.pth
12 | 
13 | FE:
14 |   BACKBONE:
15 |     NORM: FrozenBN
16 |   FPN:
17 |     NORM: FrozenBN
18 |   OUT_FEATURES: ${.FPN.OUT_FEATURES}
19 | 
20 | DD3D:
21 |   FCOS2D:
22 |     NORM: BN
23 |     INFERENCE:
24 |       NMS_THRESH: 0.75
25 | 
26 |   FCOS3D:
27 |     NORM: FrozenBN
28 | 
29 | INPUT:
30 |   RESIZE:
31 |     # KITTI images are (370, 1224)
32 |     MIN_SIZE_TRAIN: [288, 304, 320, 336, 352, 368, 384, 400, 416, 448, 480, 512, 544, 576]
33 |     MAX_SIZE_TRAIN: 10000
34 |     MIN_SIZE_TEST: 384
35 |     MAX_SIZE_TEST: 100000
36 | 
37 | SOLVER:
38 |   IMS_PER_BATCH: 64 # need at least 128 GPU mem (with fp16).
39 |   BASE_LR: 0.002
40 |   MAX_ITER: 25000
41 |   STEPS: [21500, 24000]
42 |   WARMUP_ITERS: 2000
43 |   MIXED_PRECISION_ENABLED: True
44 |   CHECKPOINT_PERIOD: 2000
45 | 
46 | TEST:
47 |   IMS_PER_BATCH: 80
48 |   EVAL_PERIOD: 2000
49 |   AUG:
50 |     ENABLED: True
51 |     MIN_SIZES: [320, 384, 448, 512, 576]
52 |     MAX_SIZE: 100000
53 |     FLIP: True
54 | 
55 | DATALOADER:
56 |   TRAIN:
57 |     NUM_WORKERS: 12
58 |     SAMPLER: RepeatFactorTrainingSampler
59 |     REPEAT_THRESHOLD: 0.4
60 | 
61 | WANDB:
62 |   TAGS: [kitti-val, dla34, bn]
63 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_kitti_dla34_overfit.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - dd3d_kitti_dla34
 4 | 
 5 | DATASETS:
 6 |   TRAIN:
 7 |     NAME: kitti_3d_overfit
 8 |   TEST:
 9 |     NAME: kitti_3d_overfit
10 | 
11 | INPUT:
12 |   AUG_ENABLED: False
13 | 
14 | SOLVER:
15 |   IMS_PER_BATCH: 8
16 |   BASE_LR: 0.0001
17 |   MAX_ITER: 1500
18 |   STEPS: [1200]
19 |   WARMUP_ITERS: 100
20 | 
21 | TEST:
22 |   EVAL_PERIOD: 500
23 |   AUG:
24 |     ENABLED: False
25 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_kitti_omninets.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/kitti_3d@EVALUATORS.KITTI3D
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: kitti_3d
 6 |   - override /test_datasets@DATASETS.TEST: kitti_3d
 7 |   - override /feature_extractors@FE: dla34_fpn
 8 | 
 9 | MODEL:
10 |   # from-coco, IODA-pretrained.
11 |   backbone_with_fpn: 
12 |   width_mult: 1.0
13 |   depth_mult: 1.0
14 | 
15 | CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_omninet-small-3nxjur71.pth
16 | 
17 | FE:
18 |   BACKBONE:
19 |     NORM: FrozenBN
20 |   FPN:
21 |     NORM: FrozenBN
22 |   OUT_FEATURES: ${.FPN.OUT_FEATURES}
23 | 
24 | DD3D:
25 |   FCOS2D:
26 |     NORM: BN
27 |     INFERENCE:
28 |       NMS_THRESH: 0.75
29 | 
30 |   FCOS3D:
31 |     NORM: FrozenBN
32 | 
33 | INPUT:
34 |   RESIZE:
35 |     # KITTI images are (370, 1224)
36 |     MIN_SIZE_TRAIN: [288, 304, 320, 336, 352, 368, 384, 400, 416, 448, 480, 512, 544, 576]
37 |     MAX_SIZE_TRAIN: 10000
38 |     MIN_SIZE_TEST: 384
39 |     MAX_SIZE_TEST: 100000
40 | 
41 | SOLVER:
42 |   IMS_PER_BATCH: 64 # need at least 128 GPU mem (with fp16).
43 |   BASE_LR: 0.002
44 |   MAX_ITER: 25000
45 |   STEPS: [21500, 24000]
46 |   WARMUP_ITERS: 2000
47 |   MIXED_PRECISION_ENABLED: True
48 |   CHECKPOINT_PERIOD: 2000
49 | 
50 | TEST:
51 |   IMS_PER_BATCH: 80
52 |   EVAL_PERIOD: 2000
53 |   AUG:
54 |     ENABLED: True
55 |     MIN_SIZES: [320, 384, 448, 512, 576]
56 |     MAX_SIZE: 100000
57 |     FLIP: True
58 | 
59 | DATALOADER:
60 |   TRAIN:
61 |     NUM_WORKERS: 12
62 |     SAMPLER: RepeatFactorTrainingSampler
63 |     REPEAT_THRESHOLD: 0.4
64 | 
65 | WANDB:
66 |   TAGS: [kitti-val, dla34, bn]
67 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_kitti_regnety_006_bifpn.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/kitti_3d@EVALUATORS.KITTI3D
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: kitti_3d
 6 |   - override /test_datasets@DATASETS.TEST: kitti_3d
 7 |   - override /feature_extractors@FE: regnety_006_bifpn
 8 | 
 9 | MODEL:
10 |   # from-coco, IODA-pretrained.
11 |   CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_dla34-y1urdmir-20210422_165446-model_final-remapped.pth
12 | 
13 | FE:
14 |   NORM: FrozenBN
15 |   FPN:
16 |     NORM: FrozenBN
17 | 
18 | DD3D:
19 |   FCOS2D:
20 |     NORM: BN
21 |     INFERENCE:
22 |       NMS_THRESH: 0.75
23 | 
24 |   FCOS3D:
25 |     NORM: FrozenBN
26 | 
27 | INPUT:
28 |   RESIZE:
29 |     # KITTI images are (370, 1224)
30 |     MIN_SIZE_TRAIN: [288, 304, 320, 336, 352, 368, 384, 400, 416, 448, 480, 512, 544, 576]
31 |     MAX_SIZE_TRAIN: 10000
32 |     MIN_SIZE_TEST: 384
33 |     MAX_SIZE_TEST: 100000
34 | 
35 | SOLVER:
36 |   IMS_PER_BATCH: 64
37 |   BASE_LR: 0.002
38 |   MAX_ITER: 25000
39 |   STEPS: [21500, 24000]
40 |   WARMUP_ITERS: 2000
41 |   MIXED_PRECISION_ENABLED: True
42 |   CHECKPOINT_PERIOD: 2000
43 | 
44 | TEST:
45 |   IMS_PER_BATCH: 80
46 |   EVAL_PERIOD: 2000
47 |   AUG:
48 |     ENABLED: True
49 |     MIN_SIZES: [320, 384, 448, 512, 576]
50 |     MAX_SIZE: 100000
51 |     FLIP: True
52 | 
53 | DATALOADER:
54 |   TRAIN:
55 |     NUM_WORKERS: 12
56 |     SAMPLER: RepeatFactorTrainingSampler
57 |     REPEAT_THRESHOLD: 0.4
58 | 
59 | WANDB:
60 |   TAGS: [kitti-val, dla34, bn]
61 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_kitti_v99.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/kitti_3d@EVALUATORS.KITTI3D
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: kitti_3d
 6 |   - override /test_datasets@DATASETS.TEST: kitti_3d
 7 |   - override /feature_extractors@FE: v2_99_fpn
 8 | 
 9 | MODEL:
10 |   # from-coco, IODA-pretrained.
11 |   CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_v99-3jlw0p36-20210423_010520-model_final-remapped.pth
12 | 
13 | FE:
14 |   BACKBONE:
15 |     NORM: FrozenBN
16 |   FPN:
17 |     NORM: FrozenBN
18 |   OUT_FEATURES: ${.FPN.OUT_FEATURES}
19 | 
20 | DD3D:
21 |   FCOS2D:
22 |     NORM: BN
23 |     INFERENCE:
24 |       NMS_THRESH: 0.75
25 | 
26 |   FCOS3D:
27 |     NORM: FrozenBN
28 | 
29 | INPUT:
30 |   RESIZE:
31 |     # KITTI images are (370, 1224)
32 |     MIN_SIZE_TRAIN: [288, 304, 320, 336, 352, 368, 384, 400, 416, 448, 480, 512, 544, 576]
33 |     MAX_SIZE_TRAIN: 10000
34 |     MIN_SIZE_TEST: 384
35 |     MAX_SIZE_TEST: 100000
36 | 
37 | SOLVER:
38 |   IMS_PER_BATCH: 64 # need at least 256 GPU mem (with fp16).
39 |   BASE_LR: 0.002
40 |   MAX_ITER: 25000
41 |   STEPS: [21500, 24000]
42 |   WARMUP_ITERS: 2000  # ~35 epochs
43 |   MIXED_PRECISION_ENABLED: True
44 |   CHECKPOINT_PERIOD: 2000
45 | 
46 | TEST:
47 |   IMS_PER_BATCH: 80
48 |   EVAL_PERIOD: 2000
49 |   AUG:
50 |     ENABLED: True
51 |     MIN_SIZES: [320, 384, 448, 512, 576]
52 |     MAX_SIZE: 100000
53 |     FLIP: True
54 | 
55 | DATALOADER:
56 |   TRAIN:
57 |     NUM_WORKERS: 12
58 |     SAMPLER: RepeatFactorTrainingSampler
59 |     REPEAT_THRESHOLD: 0.4
60 | 
61 | WANDB:
62 |   TAGS: [kitti-val, v2-99, bn]
63 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_nusc_dla34.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/nuscenes@EVALUATORS.NUSCENES
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: nuscenes
 6 |   - override /test_datasets@DATASETS.TEST: nuscenes
 7 |   - override /feature_extractors@FE: dla34_fpn
 8 | 
 9 | MODEL:
10 |   META_ARCHITECTURE: NuscenesDD3D
11 |   # from-coco, IODA-pretrained.
12 |   CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_dla34-y1urdmir-20210422_165446-model_final-remapped.pth
13 | 
14 | FE:
15 |   BACKBONE:
16 |     NORM: FrozenBN
17 |   FPN:
18 |     NORM: FrozenBN
19 |   OUT_FEATURES: ${.FPN.OUT_FEATURES}
20 | 
21 | DD3D:
22 |   FCOS2D:
23 |     NORM: BN
24 |     INFERENCE:
25 |       NMS_THRESH: 0.75
26 | 
27 |   FCOS3D:
28 |     NORM: FrozenBN
29 | 
30 |   NUSC:
31 |     LOSS:
32 |       WEIGHT_ATTR: 0.2
33 |       WEIGHT_SPEED: 0.2
34 |     INFERENCE:
35 |       NUM_IMAGES_PER_SAMPLE: ${DATASETS.TEST.NUM_IMAGES_PER_SAMPLE}
36 |       MAX_NUM_DETS_PER_SAMPLE: 500
37 | 
38 | INPUT:
39 |   RESIZE:
40 |     # Nuscens images are (900, 1600)
41 |     MIN_SIZE_TRAIN: [640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152]
42 |     MAX_SIZE_TRAIN: 10000
43 |     MIN_SIZE_TEST: 896
44 |     MAX_SIZE_TEST: 100000
45 | 
46 | SOLVER:
47 |   IMS_PER_BATCH: 64 # need at least 128 GPU mem (with fp16).
48 |   BASE_LR: 0.002
49 |   MAX_ITER: 120000
50 |   STEPS: [100000, 115000]
51 |   WARMUP_ITERS: 2000
52 |   MIXED_PRECISION_ENABLED: True
53 |   CHECKPOINT_PERIOD: 2000
54 | 
55 | TEST:
56 |   IMS_PER_BATCH: 96 # 6 * 16 (must be multiple of 6 x #GPUs.)
57 |   EVAL_PERIOD: 2000
58 |   AUG:
59 |     ENABLED: True
60 |     MIN_SIZES: [640, 768, 896, 1024, 1152]
61 |     MAX_SIZE: 100000
62 |     FLIP: True
63 | 
64 | DATALOADER:
65 |   TRAIN:
66 |     NUM_WORKERS: 12
67 |     SAMPLER: RepeatFactorTrainingSampler
68 |     REPEAT_THRESHOLD: 0.8
69 |   TEST:
70 |     SAMPLER: InferenceGroupSampler
71 |     NUM_IMAGES_PER_GROUP: ${DATASETS.TEST.NUM_IMAGES_PER_SAMPLE}
72 | 
73 | WANDB:
74 |   TAGS: [nusc-val, v2-99, bn]
75 | 


--------------------------------------------------------------------------------
/configs/experiments/dd3d_nusc_v99.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - /evaluators/nuscenes@EVALUATORS.NUSCENES
 4 |   - override /meta_arch: dd3d
 5 |   - override /train_datasets@DATASETS.TRAIN: nuscenes
 6 |   - override /test_datasets@DATASETS.TEST: nuscenes
 7 |   - override /feature_extractors@FE: v2_99_fpn
 8 | 
 9 | MODEL:
10 |   META_ARCHITECTURE: NuscenesDD3D
11 |   # from-coco, IODA-pretrained.
12 |   CKPT: https://tri-ml-public.s3.amazonaws.com/github/dd3d/pretrained/depth_pretrained_v99-3jlw0p36-20210423_010520-model_final-remapped.pth
13 | 
14 | FE:
15 |   BACKBONE:
16 |     NORM: FrozenBN
17 |   FPN:
18 |     NORM: FrozenBN
19 |   OUT_FEATURES: ${.FPN.OUT_FEATURES}
20 | 
21 | DD3D:
22 |   FCOS2D:
23 |     NORM: BN
24 |     INFERENCE:
25 |       NMS_THRESH: 0.75
26 | 
27 |   FCOS3D:
28 |     NORM: FrozenBN
29 | 
30 |   NUSC:
31 |     LOSS:
32 |       WEIGHT_ATTR: 0.2
33 |       WEIGHT_SPEED: 0.2
34 |     INFERENCE:
35 |       NUM_IMAGES_PER_SAMPLE: ${DATASETS.TEST.NUM_IMAGES_PER_SAMPLE}
36 |       MAX_NUM_DETS_PER_SAMPLE: 500
37 | 
38 | INPUT:
39 |   RESIZE:
40 |     # Nuscens images are (900, 1600)
41 |     MIN_SIZE_TRAIN: [640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152]
42 |     MAX_SIZE_TRAIN: 10000
43 |     MIN_SIZE_TEST: 896
44 |     MAX_SIZE_TEST: 100000
45 | 
46 | SOLVER:
47 |   IMS_PER_BATCH: 64 # need at least 400 GPU mem (with fp16).
48 |   BASE_LR: 0.002
49 |   MAX_ITER: 120000
50 |   STEPS: [100000, 115000]
51 |   WARMUP_ITERS: 2000
52 |   MIXED_PRECISION_ENABLED: True
53 |   CHECKPOINT_PERIOD: 2000
54 | 
55 | TEST:
56 |   IMS_PER_BATCH: 192 # 6 * 32 (must be multiple of 6 x #GPUs.)
57 |   EVAL_PERIOD: 2000
58 |   AUG:
59 |     ENABLED: True
60 |     MIN_SIZES: [640, 768, 896, 1024, 1152]
61 |     MAX_SIZE: 100000
62 |     FLIP: True
63 | 
64 | DATALOADER:
65 |   TRAIN:
66 |     NUM_WORKERS: 12
67 |     SAMPLER: RepeatFactorTrainingSampler
68 |     REPEAT_THRESHOLD: 0.8
69 |   TEST:
70 |     SAMPLER: InferenceGroupSampler
71 |     NUM_IMAGES_PER_GROUP: ${DATASETS.TEST.NUM_IMAGES_PER_SAMPLE}
72 | 
73 | WANDB:
74 |   TAGS: [nusc-val, v2-99, bn]
75 | 


--------------------------------------------------------------------------------
/configs/feature_extractors/d2_fpn.yaml:
--------------------------------------------------------------------------------
 1 | IN_FEATURES: ${..BACKBONE.OUT_FEATURES}
 2 | # By default ('None'), returns all features.
 3 | OUT_FEATURES:
 4 | 
 5 | OUT_CHANNELS: 256
 6 | NORM: BN
 7 | 
 8 | # Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg"
 9 | FUSE_TYPE: sum
10 | 


--------------------------------------------------------------------------------
/configs/feature_extractors/dla34_fpn.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - d2_fpn@FPN
 3 | 
 4 | BUILDER: build_fcos_dla_fpn_backbone_p67
 5 | 
 6 | BACKBONE:
 7 |   NAME: DLA-34
 8 |   OUT_FEATURES: [level3, level4, level5]
 9 |   NORM: BN
10 | 


--------------------------------------------------------------------------------
/configs/feature_extractors/omninet_big.yaml:
--------------------------------------------------------------------------------
1 | _target_: tridet.modeling.backbone.omni_scripts.backbone_with_fpn.build_feature_extractor_all_fuse
2 | 
3 | return_list: True
4 | width_mult: 1.3
5 | depth_mult: 1.0
6 | 


--------------------------------------------------------------------------------
/configs/feature_extractors/omninet_small.yaml:
--------------------------------------------------------------------------------
1 | _target_: tridet.modeling.backbone.omni_scripts.backbone_with_fpn.build_feature_extractor_all_fuse
2 | 
3 | return_list: True
4 | width_mult: 1.0
5 | depth_mult: 1.0
6 | 


--------------------------------------------------------------------------------
/configs/feature_extractors/v2_99_fpn.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - d2_fpn@FPN
 3 | 
 4 | BUILDER: build_fcos_vovnet_fpn_backbone_p6
 5 | 
 6 | BACKBONE:
 7 |   NAME: V-99-eSE
 8 |   OUT_FEATURES: [stage2, stage3, stage4, stage5]
 9 |   NORM: BN
10 | 


--------------------------------------------------------------------------------
/configs/meta_arch/dd3d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - /models/dd3d@DD3D
 3 |   - /visualizers/common@VIS
 4 |   - /visualizers/d2@VIS.D2
 5 |   - /visualizers/box3d@VIS.BOX3D
 6 | 
 7 | INPUT:
 8 |   FORMAT: BGR
 9 | 
10 | MODEL:
11 |   DEVICE: cuda
12 |   META_ARCHITECTURE: DD3D
13 | 
14 |   # Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR).
15 |   # To train on images of different number of channels, just set different mean & std.
16 |   # Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675]
17 |   PIXEL_MEAN: [103.530, 116.280, 123.675]
18 |   # NOTE (dennis.park): This is set to [1.0, 1.0, 1.0] in detectron2.
19 |   PIXEL_STD: [57.375, 57.120, 58.395]
20 | 
21 |   # Path (a file path, or S3 URL like s3://... ) to a checkpoint file
22 |   # to be loaded to the model.
23 |   CKPT: ""
24 | 
25 |   BOX2D_ON: True
26 |   BOX3D_ON: True
27 |   DEPTH_ON: False
28 | 


--------------------------------------------------------------------------------
/configs/models/dd3d.yaml:
--------------------------------------------------------------------------------
 1 | IN_FEATURES: ${FE.OUT_FEATURES}
 2 | 
 3 | NUM_CLASSES: ${DATASETS.TRAIN.NUM_CLASSES}
 4 | 
 5 | # If None, then the feature location starts from (0, 0)
 6 | # If "half", then it starts from the (stride / 2, stride / 2)
 7 | FEATURE_LOCATIONS_OFFSET: none # "none" or "half"
 8 | 
 9 | # Range of sizes that each FPN level is responsible for.
10 | SIZES_OF_INTEREST: [64, 128, 256, 512]
11 | 
12 | INFERENCE:
13 |   DO_NMS: True # 2D NMS
14 |   DO_POSTPROCESS: True # Resize instances according to the original image size.
15 |   DO_BEV_NMS: False # NMS in BEV space.
16 |   BEV_NMS_IOU_THRESH: 0.3
17 |   NUSC_SAMPLE_AGGREGATE: False
18 | 
19 | FCOS2D:
20 |   _VERSION: v2
21 |   NORM: BN
22 |   NUM_CLS_CONVS: 4
23 |   NUM_BOX_CONVS: 4
24 |   USE_DEFORMABLE: False
25 |   USE_SCALE: True
26 |   BOX2D_SCALE_INIT_FACTOR: 1.0
27 | 
28 |   LOSS:
29 |     ALPHA: 0.25
30 |     GAMMA: 2.0
31 |     LOC_LOSS_TYPE: giou
32 | 
33 |   INFERENCE:
34 |     THRESH_WITH_CTR: True
35 |     PRE_NMS_THRESH: 0.05
36 |     PRE_NMS_TOPK: 1000
37 |     POST_NMS_TOPK: 100
38 |     NMS_THRESH: 0.6
39 | 
40 | FCOS3D:
41 |   NORM: BN
42 |   NUM_CONVS: 4
43 |   USE_DEFORMABLE: False
44 |   USE_SCALE:  True
45 |   DEPTH_SCALE_INIT_FACTOR: 0.3
46 |   PROJ_CTR_SCALE_INIT_FACTOR: 1.0
47 |   PER_LEVEL_PREDICTORS: False
48 | 
49 |   # If True, then the depth prediction is scaled using focal lengths; this enables camera-awareness.
50 |   SCALE_DEPTH_BY_FOCAL_LENGTHS: True
51 |   SCALE_DEPTH_BY_FOCAL_LENGTHS_FACTOR: 500.
52 | 
53 |   MEAN_DEPTH_PER_LEVEL: ${DATASETS.TRAIN.MEAN_DEPTH_PER_LEVEL}
54 |   STD_DEPTH_PER_LEVEL: ${DATASETS.TRAIN.STD_DEPTH_PER_LEVEL}
55 | 
56 |   MIN_DEPTH: 0.1
57 |   MAX_DEPTH: 80.0
58 | 
59 |   CANONICAL_BOX3D_SIZES: ${DATASETS.TRAIN.CANONICAL_BOX3D_SIZES}
60 |   CLASS_AGNOSTIC_BOX3D: False
61 | 
62 |   # If True, then the network predicts allocentric (local) orientation.
63 |   PREDICT_ALLOCENTRIC_ROT: True
64 |   # If True, then the network predicts L2 distance between camera and box center; if False, then it predicts the z-value.
65 |   PREDICT_DISTANCE: False
66 | 
67 |   LOSS:
68 |     SMOOTH_L1_BETA: 0.05
69 |     MAX_LOSS_PER_GROUP_DISENT: 20.0
70 |     CONF_3D_TEMPERATURE: 1.0
71 | 
72 |     WEIGHT_BOX3D: 2.0
73 |     WEIGHT_CONF3D: 1.0
74 | 
75 |   PREPARE_TARGET:
76 |     CENTER_SAMPLE: True
77 |     POS_RADIUS: 1.5
78 | 


--------------------------------------------------------------------------------
/configs/models/depth_head.yaml:
--------------------------------------------------------------------------------
 1 | _target_: tridet.modeling.dd3d.depth.PacknetDepthHead
 2 | _partial_: True   # must provide 'input_shape
 3 | 
 4 | net:
 5 |   _target_: tridet.layers.ConvBnFpnLayers
 6 |   _partial_: True   # must provide 'input_shape'.
 7 | 
 8 |   num_layers: 4
 9 |   norm_kwargs:
10 |   kernel_size: 3
11 |   activation: 'gelu'
12 |   groups: 1
13 | 
14 | min_depth: 1.0
15 | max_depth: 80.0
16 | scale_depth_by_focal_length: 900.0
17 | 


--------------------------------------------------------------------------------
/configs/test_datasets/base_test_dataset.yaml:
--------------------------------------------------------------------------------
1 | NAME: ???
2 | 
3 | NUSC_SAMPLE_AGGREGATE_IN_INFERENCE: False
4 | 
5 | DATASET_MAPPER: "default"
6 | 


--------------------------------------------------------------------------------
/configs/test_datasets/kitti_3d.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - base_test_dataset
3 | 
4 | NAME: kitti_3d_val
5 | 


--------------------------------------------------------------------------------
/configs/test_datasets/nuscenes.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - base_test_dataset
3 | 
4 | NAME: nusc_val-subsample-8
5 | 
6 | NUSC_SAMPLE_AGGREGATE_IN_INFERENCE: True
7 | NUM_IMAGES_PER_SAMPLE: 6
8 | 


--------------------------------------------------------------------------------
/configs/train_datasets/base_train_dataset.yaml:
--------------------------------------------------------------------------------
1 | NAME: ???
2 | 
3 | CANONICAL_BOX3D_SIZES: ???
4 | 
5 | DATASET_MAPPER: "default"
6 | 


--------------------------------------------------------------------------------
/configs/train_datasets/kitti_3d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base_train_dataset
 3 | 
 4 | NAME: kitti_3d_train
 5 | 
 6 | CANONICAL_BOX3D_SIZES: [
 7 |     # (width, length, height)
 8 |     [1.61876949, 3.89154523, 1.52969237],   # Car
 9 |     [0.62806586, 0.82038497, 1.76784787],   # Pedestrian
10 |     [0.56898187, 1.77149234, 1.7237099],    # Cyclist
11 |     [1.9134491 , 5.15499603, 2.18998422],   # Van
12 |     [2.61168401, 9.22692319, 3.36492722],   # Truck
13 |     [0.5390196 , 1.08098042, 1.28392158],   # Person_sitting
14 |     [2.36044838, 15.56991038,  3.5289238],  # Tram
15 |     [1.24489164, 2.51495357, 1.61402478],   # Misc
16 | ]
17 | 
18 | NUM_CLASSES: 5
19 | 
20 | MEAN_DEPTH_PER_LEVEL: [32.594, 15.178, 8.424, 5.004, 4.662]
21 | STD_DEPTH_PER_LEVEL: [14.682, 7.139, 4.345, 2.399, 2.587]
22 | 


--------------------------------------------------------------------------------
/configs/train_datasets/nuscenes.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base_train_dataset
 3 | 
 4 | NAME: nusc_train
 5 | 
 6 | CANONICAL_BOX3D_SIZES: [
 7 |     # (width, length, height)
 8 |       [2.3524184,   0.5062202,  1.0413622], # barrier
 9 |       [0.61416006,  1.7016163,  1.3054738], # bicycle
10 |       [2.9139307,  10.725025,   3.2832346], # bus
11 |       [1.9751819,   4.641267,   1.74352], # car
12 |       [2.772134,    6.565072,   3.2474296], # construction vehicle
13 |       [0.7800532,   2.138673,   1.4437162], # motorcycle
14 |       [0.6667362,   0.7181772,  1.7616143], # pedestrian
15 |       [0.40246472,  0.4027083,  1.0084083], # traffic cone
16 |       [3.0059454,  12.8197,     4.1213827], # trailer
17 |       [2.4986045,   6.9310856,  2.8382742] # truck
18 | ]
19 | 
20 | NUM_CLASSES: 10
21 | 
22 | MEAN_DEPTH_PER_LEVEL: [44.921,  20.252, 11.712, 7.166,  8.548]
23 | STD_DEPTH_PER_LEVEL:  [24.331,   9.833,  6.223, 4.611,  8.275]
24 | 
25 | DATASET_MAPPER: "nuscenes"
26 | 
27 | MIN_NUM_LIDAR_PTS: 3
28 | MIN_BOX_VISIBILITY: 0.2
29 | 


--------------------------------------------------------------------------------
/configs/visualize_dataloader.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - defaults
 3 | 
 4 | USE_TEST: False
 5 | 
 6 | MODEL:
 7 |   CHECKPOINT: ''
 8 | 
 9 | WANDB:
10 |   ENABLED: False
11 | 
12 | SOLVER:
13 |   IMS_PER_BATCH: 4
14 | TEST:
15 |   IMS_PER_BATCH: 4
16 | 


--------------------------------------------------------------------------------
/configs/visualizers/base_visualizer.yaml:
--------------------------------------------------------------------------------
1 | DATALOADER:
2 |   ENABLED: True
3 | 
4 | PREDICTIONS:
5 |   ENABLED: True
6 | 


--------------------------------------------------------------------------------
/configs/visualizers/box3d.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base_visualizer
 3 | 
 4 | DATALOADER:
 5 |   SCALE: 1.0
 6 |   RENDER_LABELS: True
 7 | 
 8 | PREDICTIONS:
 9 |   SCALE: 1.0
10 |   RENDER_LABELS: True
11 |   THRESHOLD: 0.5
12 |   MIN_DEPTH_CENTER: 0.
13 | 


--------------------------------------------------------------------------------
/configs/visualizers/common.yaml:
--------------------------------------------------------------------------------
1 | DATALOADER_ENABLED: True
2 | DATALOADER_PERIOD: 1000
3 | DATALOADER_MAX_NUM_SAMPLES: 10
4 | 
5 | PREDICTIONS_ENABLED: True
6 | PREDICTIONS_MAX_NUM_SAMPLES: 20
7 | 


--------------------------------------------------------------------------------
/configs/visualizers/d2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base_visualizer
 3 | 
 4 | DATALOADER:
 5 |   SCALE: 1.0
 6 |   COLOR_MODE: "image"
 7 | 
 8 | PREDICTIONS:
 9 |   SCALE: 1.0
10 |   COLOR_MODE: "image"
11 |   THRESHOLD: 0.4
12 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:10.2-cudnn8-devel-ubuntu18.04
  2 | 
  3 | ENV PYTHON_VERSION=3.8
  4 | 
  5 | # -------------------------
  6 | # Optional: AWS credentials
  7 | # -------------------------
  8 | ARG AWS_SECRET_ACCESS_KEY
  9 | ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
 10 | 
 11 | ARG AWS_ACCESS_KEY_ID
 12 | ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
 13 | 
 14 | ARG AWS_DEFAULT_REGION
 15 | ENV AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
 16 | 
 17 | # -------------------------
 18 | # Optional: W&B credentials
 19 | # -------------------------
 20 | ARG WANDB_ENTITY
 21 | ENV WANDB_ENTITY=${WANDB_ENTITY}
 22 | 
 23 | ARG WANDB_API_KEY
 24 | ENV WANDB_API_KEY=${WANDB_API_KEY}
 25 | 
 26 | # -------------------------
 27 | # Install core APT packages.
 28 | # -------------------------
 29 | ENV DEBIAN_FRONTEND=noninteractive
 30 | RUN apt-get update && apt-get install -y \
 31 |       # essential
 32 |       build-essential \
 33 |       cmake \
 34 |       ffmpeg \
 35 |       g++-4.8 \
 36 |       git \
 37 |       curl \
 38 |       docker.io \
 39 |       vim \
 40 |       wget \
 41 |       unzip \
 42 |       htop \
 43 |       libjpeg-dev \
 44 |       libpng-dev \
 45 |       libavdevice-dev \
 46 |       pkg-config \
 47 |       # python
 48 |       python${PYTHON_VERSION} \
 49 |       python${PYTHON_VERSION}-dev \
 50 |       python3-tk \
 51 |       python${PYTHON_VERSION}-distutils \
 52 |       # opencv
 53 |       python3-opencv \
 54 |     # set python
 55 |     && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
 56 |     && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 \
 57 |     && rm -rf /var/lib/apt/lists/*
 58 | 
 59 | # --------------------------------------------------
 60 | # We use 'mpirun' for launching distributed training.
 61 | # --------------------------------------------------
 62 | RUN mkdir /tmp/openmpi && \
 63 |     cd /tmp/openmpi && \
 64 |     wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz && \
 65 |     tar zxf openmpi-4.1.1.tar.gz && \
 66 |     cd openmpi-4.1.1 && \
 67 |     ./configure --enable-orterun-prefix-by-default && \
 68 |     make -j $(nproc) all && \
 69 |     make install && \
 70 |     ldconfig && \
 71 |     rm -rf /tmp/openmpi
 72 | 
 73 | # Install OpenSSH for MPI to communicate between containers
 74 | RUN apt-get update && apt-get install -y --no-install-recommends openssh-client openssh-server && \
 75 |     mkdir -p /var/run/sshd
 76 | 
 77 | # Allow OpenSSH to talk to containers without asking for confirmation
 78 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
 79 |     echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
 80 |     mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
 81 | 
 82 | # -------------------------
 83 | # Install core PIP packages.
 84 | # -------------------------
 85 | # Upgrade pip.
 86 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
 87 |     python get-pip.py && \
 88 |     rm get-pip.py
 89 | 
 90 | # Core tools.
 91 | RUN pip install \
 92 |     awscli==1.20.27 \
 93 |     boto3==1.18.27 \
 94 |     coloredlogs==15.0.1 \
 95 |     hydra-core==1.1.1 \
 96 |     matplotlib==3.4.3 \
 97 |     mpi4py==3.1.1 \
 98 |     numpy==1.20.3 \
 99 |     pandas==1.3.2 \
100 |     requests==2.26.0 \
101 |     scikit-image==0.18.2 \
102 |     scipy==1.7.1 \
103 |     seaborn==0.11.2 \
104 |     tenacity==8.0.1 \
105 |     tqdm==4.62.2 \
106 |     wandb==0.12.0
107 | 
108 | RUN pip install numba==0.54.0 Cython==0.29.24 pycocotools==2.0.2 nuscenes-devkit==1.1.7
109 | 
110 | # Install pytorch 1.9+cu102
111 | RUN pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 -f https://download.pytorch.org/whl/torch_stable.html
112 | 
113 | # Install fvcore and detectron2.
114 | ENV FVCORE_CACHE="/tmp"
115 | RUN pip install -U 'git+https://github.com/facebookresearch/fvcore'
116 | RUN python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
117 | 
118 | # Pre-built pytorch3d
119 | RUN pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu102_pyt190/download.html
120 | 
121 | #-----------------------
122 | # Copy working directory
123 | #-----------------------
124 | ARG WORKSPACE
125 | COPY . ${WORKSPACE}
126 | 
127 | ENV PYTHONPATH "${PYTHONPATH}:${WORKSPACE}/tridet/"
128 | 
129 | WORKDIR ${WORKSPACE}
130 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-cu111:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04
  2 | 
  3 | ENV PYTHON_VERSION=3.8
  4 | 
  5 | # -------------------------
  6 | # Optional: AWS credentials
  7 | # -------------------------
  8 | ARG AWS_SECRET_ACCESS_KEY
  9 | ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
 10 | 
 11 | ARG AWS_ACCESS_KEY_ID
 12 | ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
 13 | 
 14 | ARG AWS_DEFAULT_REGION
 15 | ENV AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
 16 | 
 17 | # -------------------------
 18 | # Optional: W&B credentials
 19 | # -------------------------
 20 | ARG WANDB_ENTITY
 21 | ENV WANDB_ENTITY=${WANDB_ENTITY}
 22 | 
 23 | ARG WANDB_API_KEY
 24 | ENV WANDB_API_KEY=${WANDB_API_KEY}
 25 | 
 26 | # -------------------------
 27 | # Install core APT packages.
 28 | # -------------------------
 29 | ENV DEBIAN_FRONTEND=noninteractive
 30 | RUN apt-get update && apt-get install -y \
 31 |       # essential
 32 |       build-essential \
 33 |       cmake \
 34 |       ffmpeg \
 35 |       g++-4.8 \
 36 |       git \
 37 |       curl \
 38 |       docker.io \
 39 |       vim \
 40 |       wget \
 41 |       unzip \
 42 |       htop \
 43 |       libjpeg-dev \
 44 |       libpng-dev \
 45 |       libavdevice-dev \
 46 |       pkg-config \
 47 |       # python
 48 |       python${PYTHON_VERSION} \
 49 |       python${PYTHON_VERSION}-dev \
 50 |       python3-tk \
 51 |       python${PYTHON_VERSION}-distutils \
 52 |       # opencv
 53 |       python3-opencv \
 54 |     # set python
 55 |     && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
 56 |     && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 \
 57 |     && rm -rf /var/lib/apt/lists/*
 58 | 
 59 | # --------------------------------------------------
 60 | # We use 'mpirun' for launching distributed training.
 61 | # --------------------------------------------------
 62 | RUN mkdir /tmp/openmpi && \
 63 |     cd /tmp/openmpi && \
 64 |     wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz && \
 65 |     tar zxf openmpi-4.1.1.tar.gz && \
 66 |     cd openmpi-4.1.1 && \
 67 |     ./configure --enable-orterun-prefix-by-default && \
 68 |     make -j $(nproc) all && \
 69 |     make install && \
 70 |     ldconfig && \
 71 |     rm -rf /tmp/openmpi
 72 | 
 73 | # Install OpenSSH for MPI to communicate between containers
 74 | RUN apt-get update && apt-get install -y --no-install-recommends openssh-client openssh-server && \
 75 |     mkdir -p /var/run/sshd
 76 | 
 77 | # Allow OpenSSH to talk to containers without asking for confirmation
 78 | RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
 79 |     echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
 80 |     mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
 81 | 
 82 | # -------------------------
 83 | # Install core PIP packages.
 84 | # -------------------------
 85 | # Upgrade pip.
 86 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
 87 |     python get-pip.py && \
 88 |     rm get-pip.py
 89 | 
 90 | # Core tools.
 91 | RUN pip install \
 92 |     awscli==1.20.27 \
 93 |     boto3==1.18.27 \
 94 |     coloredlogs==15.0.1 \
 95 |     hydra-core==1.1.1 \
 96 |     matplotlib==3.4.3 \
 97 |     mpi4py==3.1.1 \
 98 |     numpy==1.20.3 \
 99 |     pandas==1.3.2 \
100 |     requests==2.26.0 \
101 |     scikit-image==0.18.2 \
102 |     scipy==1.7.1 \
103 |     seaborn==0.11.2 \
104 |     tenacity==8.0.1 \
105 |     tqdm==4.62.2 \
106 |     wandb==0.12.0
107 | 
108 | RUN pip install numba==0.54.0 Cython==0.29.24 pycocotools==2.0.2 nuscenes-devkit==1.1.7
109 | 
110 | # Install pytorch 1.9+cu111
111 | RUN pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
112 | 
113 | # Install fvcore and detectron2.
114 | ENV FVCORE_CACHE="/tmp"
115 | RUN pip install -U 'git+https://github.com/facebookresearch/fvcore'
116 | RUN python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.9/index.html
117 | 
118 | # Pre-built pytorch3d
119 | RUN pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu111_pyt190/download.html
120 | 
121 | #-----------------------
122 | # Copy working directory
123 | #-----------------------
124 | ARG WORKSPACE
125 | COPY . ${WORKSPACE}
126 | 
127 | ENV PYTHONPATH "${PYTHONPATH}:${WORKSPACE}/tridet/"
128 | 
129 | WORKDIR ${WORKSPACE}
130 | 


--------------------------------------------------------------------------------
/media/figs/demo_dd3d_kitti_val_short.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/dd3d/56fad8ec9eb7fbd373953f49f9372120b4cd740c/media/figs/demo_dd3d_kitti_val_short.gif


--------------------------------------------------------------------------------
/media/figs/tri-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/dd3d/56fad8ec9eb7fbd373953f49f9372120b4cd740c/media/figs/tri-logo.png


--------------------------------------------------------------------------------
/scripts/visualize_dataloader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | import logging
 4 | import sys
 5 | from collections import defaultdict
 6 | 
 7 | import cv2
 8 | import hydra
 9 | from tqdm import tqdm
10 | 
11 | from detectron2.data import MetadataCatalog
12 | 
13 | from tridet.data import build_test_dataloader, build_train_dataloader
14 | from tridet.data.dataset_mappers import get_dataset_mapper
15 | from tridet.data.datasets import register_datasets
16 | from tridet.utils.setup import setup
17 | from tridet.utils.visualization import mosaic
18 | from tridet.visualizers import get_dataloader_visualizer
19 | 
20 | LOG = logging.getLogger('tridet')
21 | 
22 | 
23 | @hydra.main(config_path="../configs/", config_name="visualize_dataloader")
24 | def main(cfg):
25 |     setup(cfg)
26 |     dataset_names = register_datasets(cfg)
27 |     if cfg.ONLY_REGISTER_DATASETS:
28 |         return {}, cfg
29 |     LOG.info(f"Registered {len(dataset_names)} datasets:" + '\n\t' + '\n\t'.join(dataset_names))
30 | 
31 |     if cfg.USE_TEST:
32 |         dataset_name = cfg.DATASETS.TEST.NAME
33 |         mapper = get_dataset_mapper(cfg, is_train=False)
34 |         dataloader, _ = build_test_dataloader(cfg, dataset_name, mapper=mapper)
35 |     else:
36 |         mapper = get_dataset_mapper(cfg, is_train=True)
37 |         dataloader, _ = build_train_dataloader(cfg, mapper=mapper)
38 | 
39 |     visualizer_names = MetadataCatalog.get(cfg.DATASETS.TRAIN.NAME).loader_visualizers
40 |     for batch_idx, batch in tqdm(enumerate(dataloader)):
41 |         viz_images = defaultdict(dict)
42 |         LOG.info("Press any key to continue, press 'q' to quit.")
43 |         for viz_name in visualizer_names:
44 |             viz = get_dataloader_visualizer(cfg, viz_name, cfg.DATASETS.TRAIN.NAME)
45 |             for idx, x in enumerate(batch):
46 |                 viz_images[idx].update(viz.visualize(x))
47 | 
48 |         for k in range(len(batch)):
49 |             gt_viz = mosaic(list(viz_images[k].values()))
50 |             cv2.imshow("dataloader", gt_viz[:, :, ::-1])
51 | 
52 |             if cv2.waitKey(0) & 0xFF == ord('q'):
53 |                 sys.exit()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     main()  # pylint: disable=no-value-for-parameter
58 | 


--------------------------------------------------------------------------------
/tridet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tridet/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.data.build import build_test_dataloader, build_train_dataloader, collect_dataset_dicts
3 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.data.augmentations.build import build_augmentation
3 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | # Adapted from detectron2:
 4 | #     https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py
 5 | import logging
 6 | 
 7 | from tridet.data.augmentations.color_transform import RandomBrightness, RandomContrast, RandomSaturation
 8 | from tridet.data.augmentations.crop_transform import RandomCrop
 9 | from tridet.data.augmentations.flip_transform import RandomFlip
10 | from tridet.data.augmentations.resize_transform import ResizeShortestEdge
11 | 
12 | LOG = logging.getLogger(__name__)
13 | 
14 | 
15 | def build_augmentation(cfg, is_train):
16 |     """
17 |     Changes from the original function:
18 |         -  Move `RandomCrop` augmentation here; it's originally in dataset_mapper
19 |             https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/dataset_mapper.py#L89
20 |         - `RandomFlip()` is configurable. This is mostly unused for now.
21 |         - `RandomCrop` uses expanded version of `CropTransform`, which handles depth, intrinsics.
22 |         - `ResizeShortestEdge` uses expanded version of `ResizeTransform`, which handles depth, intrinsics.
23 |     """
24 |     if not cfg.INPUT.AUG_ENABLED:
25 |         return []
26 |     augmentation = []
27 |     if cfg.INPUT.CROP.ENABLED and is_train:
28 |         augmentation.append(RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
29 | 
30 |     # Resize augmentation.
31 |     if is_train:
32 |         min_size = cfg.INPUT.RESIZE.MIN_SIZE_TRAIN
33 |         max_size = cfg.INPUT.RESIZE.MAX_SIZE_TRAIN
34 |         sample_style = cfg.INPUT.RESIZE.MIN_SIZE_TRAIN_SAMPLING
35 |     else:
36 |         min_size = cfg.INPUT.RESIZE.MIN_SIZE_TEST
37 |         max_size = cfg.INPUT.RESIZE.MAX_SIZE_TEST
38 |         sample_style = "choice"
39 |     if min_size:
40 |         augmentation.append(ResizeShortestEdge(min_size, max_size, sample_style))
41 | 
42 |     if cfg.INPUT.RANDOM_FLIP.ENABLED and is_train:
43 |         augmentation.append(RandomFlip())
44 | 
45 |     if cfg.INPUT.COLOR_JITTER.ENABLED and is_train:
46 |         brightness_lower, brightness_upper = cfg.INPUT.COLOR_JITTER.BRIGHTNESS
47 |         brightness_min, brightness_max = 1. - brightness_lower, 1. + brightness_upper
48 |         augmentation.append(RandomBrightness(brightness_min, brightness_max))
49 | 
50 |         saturation_lower, saturation_upper = cfg.INPUT.COLOR_JITTER.SATURATION
51 |         saturation_min, saturation_max = 1. - saturation_lower, 1. + saturation_upper
52 |         augmentation.append(RandomSaturation(saturation_min, saturation_max))
53 | 
54 |         contrast_lower, contrast_upper = cfg.INPUT.COLOR_JITTER.CONTRAST
55 |         contrast_min, contrast_max = 1. - contrast_lower, 1. + contrast_upper
56 |         augmentation.append(RandomContrast(contrast_min, contrast_max))
57 | 
58 |     if not augmentation:
59 |         LOG.warning("No Augmentation!")
60 |     return augmentation
61 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/color_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # pylint: disable=unused-argument
 3 | from fvcore.transforms.transform import BlendTransform
 4 | 
 5 | from detectron2.data.transforms import RandomBrightness as _RandomBrightness
 6 | from detectron2.data.transforms import RandomContrast as _RandomContrast
 7 | from detectron2.data.transforms import RandomSaturation as _RandomSaturation
 8 | 
 9 | 
10 | def apply_no_op_intrinsics(blend_tfm, intrinsics):
11 |     return intrinsics
12 | 
13 | 
14 | def apply_no_op_depth(blend_tfm, depth):
15 |     return depth
16 | 
17 | 
18 | def apply_no_op_box3d(blend_tfm, box3d):
19 |     return box3d
20 | 
21 | 
22 | # (dennis.park) Augment ResizeTransform to handle intrinsics, depth
23 | BlendTransform.register_type("intrinsics", apply_no_op_intrinsics)
24 | BlendTransform.register_type("depth", apply_no_op_depth)
25 | BlendTransform.register_type("box3d", apply_no_op_box3d)
26 | 
27 | 
28 | class RandomContrast(_RandomContrast):
29 |     def get_transform(self, image):
30 |         tfm = super().get_transform(image)
31 |         return BlendTransform(tfm.src_image, tfm.src_weight, tfm.dst_weight)
32 | 
33 | 
34 | class RandomBrightness(_RandomBrightness):
35 |     def get_transform(self, image):
36 |         tfm = super().get_transform(image)
37 |         return BlendTransform(tfm.src_image, tfm.src_weight, tfm.dst_weight)
38 | 
39 | 
40 | class RandomSaturation(_RandomSaturation):
41 |     def get_transform(self, image):
42 |         tfm = super().get_transform(image)
43 |         return BlendTransform(tfm.src_image, tfm.src_weight, tfm.dst_weight)
44 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/crop_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import numpy as np
 3 | from fvcore.transforms.transform import CropTransform
 4 | 
 5 | from detectron2.data.transforms import RandomCrop as _RandomCrop
 6 | 
 7 | 
 8 | def apply_imcrop_intrinsics(crop_tfm, intrinsics):
 9 |     assert intrinsics.shape == (3, 3)
10 |     assert intrinsics[0, 1] == 0  # undistorted
11 |     assert np.allclose(intrinsics, np.triu(intrinsics))  # check if upper triangular
12 | 
13 |     x0, y0 = crop_tfm.x0, crop_tfm.y0
14 |     new_intrinsics = intrinsics.copy()
15 |     new_intrinsics[0, 2] -= x0
16 |     new_intrinsics[1, 2] -= y0
17 | 
18 |     return new_intrinsics
19 | 
20 | 
21 | def apply_imcrop_depth(crop_tfm, depth):
22 |     assert len(depth.shape) == 2
23 |     x0, y0, w, h = crop_tfm.x0, crop_tfm.y0, crop_tfm.w, crop_tfm.h
24 |     return depth[y0:y0 + h, x0:x0 + w]
25 | 
26 | 
27 | def apply_imcrop_box3d(crop_tfm, box3d):  # pylint: disable=unused-argument
28 |     return box3d
29 | 
30 | 
31 | # (dennis.park) Augment ResizeTransform to handle intrinsics, depth
32 | CropTransform.register_type("intrinsics", apply_imcrop_intrinsics)
33 | CropTransform.register_type("depth", apply_imcrop_depth)
34 | CropTransform.register_type("box3d", apply_imcrop_box3d)
35 | 
36 | 
37 | class RandomCrop(_RandomCrop):
38 |     def get_transform(self, image):
39 |         tfm = super().get_transform(image)
40 |         return CropTransform(tfm.x0, tfm.y0, tfm.w, tfm.h)
41 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/flip_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import numpy as np
 3 | from fvcore.transforms.transform import HFlipTransform, NoOpTransform, VFlipTransform
 4 | 
 5 | from detectron2.data.transforms import RandomFlip as _RandomFlip
 6 | 
 7 | 
 8 | def apply_hflip_intrinsics(hflip_tfm, intrinsics):
 9 |     intrinsics[0, 2] = hflip_tfm.width - intrinsics[0, 2]
10 |     return intrinsics
11 | 
12 | 
13 | def apply_vflip_intrinsics(vflip_tfm, intrinsics):
14 |     intrinsics[1, 2] = vflip_tfm.height - intrinsics[1, 2]
15 |     return intrinsics
16 | 
17 | 
18 | def apply_hflip_depth(hflip_tfm, depth):  # pylint: disable=unused-argument
19 |     assert depth.ndim == 2
20 |     return np.flip(depth, axis=1).copy()
21 | 
22 | 
23 | def apply_vflip_depth(vflip_tfm, depth):  # pylint: disable=unused-argument
24 |     assert depth.ndim == 2
25 |     return np.flip(depth, axis=0).copy()
26 | 
27 | 
28 | def apply_hflip_box3d(hflip_tfm, box3d):  # pylint: disable=unused-argument
29 |     """Horizontally flip 3D box.
30 | 
31 |     CAVEAT: This function makes assumption about the object symmetry wrt *y=0* plane.
32 | 
33 |     new quaternion: [quat.z, -quat.y, -quat.x, quat.w]
34 |     https://stackoverflow.com/questions/32438252/efficient-way-to-apply-mirror-effect-on-quaternion-rotation
35 | 
36 |     Parameters
37 |     ----------
38 |     hflip_tfm: HFlipTransform
39 | 
40 |     box3d: np.array
41 |         10D representation of 3D box. quaternion (4) + location (3) + dimension (3)
42 | 
43 |     Returns
44 |     -------
45 |     np.array
46 |         10D representation of flipped 3D box.
47 |     """
48 |     quat, tvec, dims = box3d[:4], box3d[4:7], box3d[7:]
49 | 
50 |     quat_new = np.float32([quat[3], -quat[2], -quat[1], quat[0]])
51 |     tvec_new = tvec.copy()
52 |     tvec_new[0] = -tvec_new[0]
53 |     dims_new = dims.copy()
54 |     return np.concatenate([quat_new, tvec_new, dims_new])
55 | 
56 | 
57 | def apply_vflip_box3d(vflip_tfm, box3d):  # pylint: disable=unused-argument
58 |     # TODO
59 |     raise NotImplementedError()
60 | 
61 | 
62 | HFlipTransform.register_type("intrinsics", apply_hflip_intrinsics)
63 | HFlipTransform.register_type("depth", apply_hflip_depth)
64 | HFlipTransform.register_type("box3d", apply_hflip_box3d)
65 | 
66 | VFlipTransform.register_type("intrinsics", apply_vflip_intrinsics)
67 | VFlipTransform.register_type("depth", apply_vflip_depth)
68 | VFlipTransform.register_type("box3d", apply_vflip_box3d)
69 | 
70 | 
71 | class RandomFlip(_RandomFlip):
72 |     def get_transform(self, image):
73 |         tfm = super().get_transform(image)
74 |         if isinstance(tfm, NoOpTransform):
75 |             return tfm
76 |         elif isinstance(tfm, HFlipTransform):
77 |             return HFlipTransform(tfm.width)
78 |         else:
79 |             assert isinstance(tfm, VFlipTransform)
80 |             return VFlipTransform(tfm.height)
81 | 


--------------------------------------------------------------------------------
/tridet/data/augmentations/resize_transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # pylint: disable=unused-argument
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | from detectron2.data.transforms import ResizeShortestEdge as _ResizeShortestEdge
 7 | from detectron2.data.transforms import ResizeTransform
 8 | 
 9 | CV2_INTERPOLATION_MODES = {"nearest": cv2.INTER_NEAREST, "linear": cv2.INTER_LINEAR, "cubic": cv2.INTER_CUBIC}
10 | DEFAULT_DEPTH_INTERPOLOATION_MODE = "nearest"
11 | 
12 | 
13 | def apply_imresize_intrinsics(resize_tfm, intrinsics):
14 |     assert intrinsics.shape == (3, 3)
15 |     assert intrinsics[0, 1] == 0  # undistorted
16 |     assert np.allclose(intrinsics, np.triu(intrinsics))  # check if upper triangular
17 | 
18 |     factor_x = resize_tfm.new_w / resize_tfm.w
19 |     factor_y = resize_tfm.new_h / resize_tfm.h
20 |     new_intrinsics = intrinsics * np.float32([factor_x, factor_y, 1]).reshape(3, 1)  # pylint: disable=too-many-function-args
21 |     return new_intrinsics
22 | 
23 | 
24 | def apply_imresize_depth(resize_tfm, depth):
25 |     assert depth.shape == (resize_tfm.h, resize_tfm.w)
26 |     interp = CV2_INTERPOLATION_MODES[DEFAULT_DEPTH_INTERPOLOATION_MODE]
27 |     resized_depth = cv2.resize(depth, (resize_tfm.new_w, resize_tfm.new_h), interpolation=interp)
28 |     return resized_depth
29 | 
30 | 
31 | def resize_depth_preserve(resize_tfm, depth):
32 |     """
33 |     Adapted from:
34 |         https://github.com/TRI-ML/packnet-sfm_internal/blob/919ab604ae2319e4554d3b588877acfddf877f9c/packnet_sfm/datasets/augmentations.py#L93
35 | 
36 |     -------------------------------------------------------------------------------------------------------------------
37 | 
38 |     Resizes depth map preserving all valid depth pixels
39 |     Multiple downsampled points can be assigned to the same pixel.
40 |     Parameters
41 |     ----------
42 |     depth : np.array [h,w]
43 |         Depth map
44 |     shape : tuple (H,W)
45 |         Output shape
46 |     Returns
47 |     -------
48 |     depth : np.array [H,W,1]
49 |         Resized depth map
50 |     """
51 |     assert depth.shape == (resize_tfm.h, resize_tfm.w)
52 | 
53 |     new_shape = (resize_tfm.new_h, resize_tfm.new_w)
54 | 
55 |     h, w = depth.shape
56 |     x = depth.reshape(-1)
57 |     # Create coordinate grid
58 |     uv = np.mgrid[:h, :w].transpose(1, 2, 0).reshape(-1, 2)
59 |     # Filters valid points
60 |     idx = x > 0
61 |     crd, val = uv[idx], x[idx]
62 |     # Downsamples coordinates
63 |     crd[:, 0] = (crd[:, 0] * (new_shape[0] / h)).astype(np.int32)
64 |     crd[:, 1] = (crd[:, 1] * (new_shape[1] / w)).astype(np.int32)
65 |     # Filters points inside image
66 |     idx = (crd[:, 0] < new_shape[0]) & (crd[:, 1] < new_shape[1])
67 |     crd, val = crd[idx], val[idx]
68 |     # Creates downsampled depth image and assigns points
69 |     resized_depth = np.zeros(new_shape)
70 |     resized_depth[crd[:, 0], crd[:, 1]] = val
71 |     return resized_depth
72 | 
73 | 
74 | def apply_imresize_box3d(resize_tfm, box3d):
75 |     return box3d
76 | 
77 | 
78 | # (dennis.park) Augment ResizeTransform to handle intrinsics, depth
79 | ResizeTransform.register_type("intrinsics", apply_imresize_intrinsics)
80 | # ResizeTransform.register_type("depth", apply_imresize_depth)
81 | ResizeTransform.register_type("depth", resize_depth_preserve)
82 | ResizeTransform.register_type("box3d", apply_imresize_box3d)
83 | 
84 | 
85 | class ResizeShortestEdge(_ResizeShortestEdge):
86 |     def get_transform(self, image):
87 |         tfm = super().get_transform(image)
88 |         return ResizeTransform(tfm.h, tfm.w, tfm.new_h, tfm.new_w)
89 | 


--------------------------------------------------------------------------------
/tridet/data/dataset_mappers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # pylint: disable=no-value-for-parameter, redundant-keyword-arg
 3 | from tridet.data.dataset_mappers.dataset_mapper import DefaultDatasetMapper
 4 | from tridet.data.dataset_mappers.nuscenes_mapper import NuscenesDatasetMapper
 5 | 
 6 | 
 7 | def get_dataset_mapper(cfg, is_train=True):
 8 |     if is_train:
 9 |         dataset_mapper_name = cfg.DATASETS.TRAIN.DATASET_MAPPER
10 |     else:
11 |         dataset_mapper_name = cfg.DATASETS.TEST.DATASET_MAPPER
12 | 
13 |     if dataset_mapper_name == "default":
14 |         return DefaultDatasetMapper(cfg, is_train=is_train)
15 |     elif dataset_mapper_name == "nuscenes":
16 |         return NuscenesDatasetMapper(cfg, is_train=is_train)
17 |     else:
18 |         raise ValueError(f"Invalid dataset mapper: {dataset_mapper_name}")
19 | 


--------------------------------------------------------------------------------
/tridet/data/dataset_mappers/nuscenes_mapper.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import torch
 3 | 
 4 | from tridet.data.dataset_mappers import DefaultDatasetMapper
 5 | 
 6 | 
 7 | class NuscenesDatasetMapper(DefaultDatasetMapper):
 8 |     """
 9 |     In addition to 2D / 3D boxes, each instance also has attribute and speed.
10 | 
11 |     Assumption: image transformation does not change attributes and speed.
12 |     """
13 |     def __call__(self, dataset_dict):
14 |         dataset_dict = super().__call__(dataset_dict)
15 | 
16 |         annos = dataset_dict['annotations']
17 | 
18 |         # NuScenes attributes
19 |         attributes = [obj["attribute_id"] for obj in annos]
20 |         attributes = torch.tensor(attributes, dtype=torch.int64)
21 |         dataset_dict['instances'].gt_attributes = attributes
22 | 
23 |         # Speed (magnitude of velocity)
24 |         speeds = [obj["speed"] for obj in annos]
25 |         speeds = torch.tensor(speeds, dtype=torch.float32)
26 |         dataset_dict['instances'].gt_speeds = speeds
27 |         return dataset_dict
28 | 


--------------------------------------------------------------------------------
/tridet/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | import random
 4 | from functools import partial
 5 | 
 6 | from detectron2.data import DatasetCatalog
 7 | 
 8 | from tridet.data.datasets.kitti_3d import register_kitti_3d_datasets
 9 | from tridet.data.datasets.nuscenes import register_nuscenes_datasets
10 | 
11 | 
12 | def register_datasets(cfg):
13 |     train_dataset_name = cfg.DATASETS.TRAIN.NAME
14 |     test_dataset_name = cfg.DATASETS.TEST.NAME
15 | 
16 |     required_datasets = [train_dataset_name, test_dataset_name]
17 | 
18 |     dataset_names = []
19 |     dataset_names.extend(register_kitti_3d_datasets(required_datasets, cfg))
20 |     dataset_names.extend(register_nuscenes_datasets(required_datasets, cfg))
21 |     if cfg.ONLY_REGISTER_DATASETS:
22 |         for name in dataset_names:
23 |             DatasetCatalog.get(name)
24 |     return dataset_names
25 | 
26 | 
27 | def random_sample_dataset_dicts(dataset_name, num_samples=10):
28 |     dataset_dicts = DatasetCatalog.get(dataset_name)
29 |     num_samples = min(num_samples, len(dataset_dicts))
30 |     random.seed(42)
31 |     if num_samples > 0:
32 |         inds = random.sample(range(len(dataset_dicts)), k=num_samples)
33 |     else:
34 |         # Use all dataset items.
35 |         inds = list(range(len(dataset_dicts)))
36 |     samples = [dataset_dicts[i] for i in inds]
37 |     return samples, inds
38 | 


--------------------------------------------------------------------------------
/tridet/data/datasets/kitti_3d/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | import os
 4 | from functools import partial
 5 | 
 6 | from detectron2.data import DatasetCatalog
 7 | 
 8 | from tridet.data.datasets.kitti_3d.build import build_monocular_kitti3d_dataset, register_kitti_3d_metadata
 9 | 
10 | LOG = logging.getLogger(__name__)
11 | 
12 | KITTI_ROOT = 'KITTI3D'
13 | 
14 | DATASET_DICTS_BUILDER = {
15 |     # Monocular datasets
16 |     "kitti_3d_train": (build_monocular_kitti3d_dataset, dict(mv3d_split='train')),
17 |     "kitti_3d_train_project_box3d": (build_monocular_kitti3d_dataset, dict(mv3d_split='train', box2d_from_box3d=True)),
18 |     "kitti_3d_train_right_cam": (build_monocular_kitti3d_dataset, dict(mv3d_split='train', sensors=('camera_3', ))),
19 |     "kitti_3d_train_both_cams":
20 |     (build_monocular_kitti3d_dataset, dict(mv3d_split='train', sensors=('camera_2', 'camera_3'))),
21 |     "kitti_3d_val": (build_monocular_kitti3d_dataset, dict(mv3d_split='val')),
22 |     "kitti_3d_trainval": (build_monocular_kitti3d_dataset, dict(mv3d_split='trainval')),
23 |     "kitti_3d_test": (build_monocular_kitti3d_dataset, dict(mv3d_split='test')),
24 |     "kitti_3d_overfit": (build_monocular_kitti3d_dataset, dict(mv3d_split='train', max_num_items=32)),
25 | }
26 | 
27 | METADATA_BUILDER = {name: (register_kitti_3d_metadata, {}) for name in DATASET_DICTS_BUILDER.keys()}
28 | 
29 | 
30 | def register_kitti_3d_datasets(required_datasets, cfg):
31 |     kitti_3d_datasets = sorted(list(set(required_datasets).intersection(DATASET_DICTS_BUILDER.keys())))
32 |     if kitti_3d_datasets:
33 |         LOG.info(f"KITTI-3D dataset(s): {', '.join(kitti_3d_datasets)} ")
34 |         for name in kitti_3d_datasets:
35 |             fn, kwargs = DATASET_DICTS_BUILDER[name]
36 |             kwargs.update({'root_dir': os.path.join(cfg.DATASET_ROOT, KITTI_ROOT)})
37 |             DatasetCatalog.register(name, partial(fn, **kwargs))
38 | 
39 |             fn, kwargs = METADATA_BUILDER[name]
40 |             kwargs.update({'coco_cache_dir': cfg.TMP_DIR})
41 |             fn(name, **kwargs)
42 |     return kitti_3d_datasets
43 | 


--------------------------------------------------------------------------------
/tridet/data/datasets/nuscenes/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | import os
 4 | from functools import partial
 5 | 
 6 | from detectron2.data import DatasetCatalog
 7 | from detectron2.utils.comm import get_world_size
 8 | 
 9 | from tridet.data.datasets.nuscenes.build import build_nuscenes_dataset, register_nuscenes_metadata
10 | 
11 | LOG = logging.getLogger(__name__)
12 | 
13 | NUSCENES_ROOT = "nuScenes"
14 | 
15 | NUSC_DATASET_NAMES = [
16 |     "nusc_train",
17 |     "nusc_val",
18 |     "nusc_val-subsample-8",
19 |     "nusc_trainval",
20 |     "nusc_test",
21 |     "nusc_mini_train",
22 |     "nusc_mini_val",
23 | ]
24 | 
25 | DATASET_DICTS_BUILDER = {name: (build_nuscenes_dataset, dict(name=name)) for name in NUSC_DATASET_NAMES}
26 | 
27 | METADATA_BUILDER = {name: (register_nuscenes_metadata, {}) for name in DATASET_DICTS_BUILDER.keys()}
28 | 
29 | 
30 | def register_nuscenes_datasets(required_datasets, cfg):
31 |     if cfg.DATASETS.TEST.NAME in ("nusc_train", "nusc_val", "nusc_trainval", "nusc_test") and \
32 |         get_world_size() > 1:
33 |         raise LOG.warning("The distributed evaluation does not work well with large test set for now. " \
34 |             f"If program hangs, consider using non-distributed evaluation: {cfg.DATASETS.TEST.NAME}")
35 | 
36 |     nusc_datasets = sorted(list(set(required_datasets).intersection(DATASET_DICTS_BUILDER.keys())))
37 |     if nusc_datasets:
38 |         LOG.info(f"nuScenes-3D dataset(s): {', '.join(nusc_datasets)} ")
39 |         for name in nusc_datasets:
40 |             fn, kwargs = DATASET_DICTS_BUILDER[name]
41 |             kwargs.update({
42 |                 'root_dir': os.path.join(cfg.DATASET_ROOT, NUSCENES_ROOT),
43 |                 'min_num_lidar_points': cfg.DATASETS.TRAIN.MIN_NUM_LIDAR_PTS,
44 |                 'min_box_visibility': cfg.DATASETS.TRAIN.MIN_BOX_VISIBILITY
45 |             })
46 |             DatasetCatalog.register(name, partial(fn, **kwargs))
47 | 
48 |             fn, kwargs = METADATA_BUILDER[name]
49 |             fn(name, **kwargs)
50 |     return nusc_datasets
51 | 


--------------------------------------------------------------------------------
/tridet/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.data.samplers.group_sampler import InferenceGroupSampler
3 | 


--------------------------------------------------------------------------------
/tridet/data/samplers/group_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | from torch.utils.data.sampler import Sampler
 3 | 
 4 | from detectron2.utils import comm
 5 | 
 6 | 
 7 | class InferenceGroupSampler(Sampler):
 8 |     """
 9 |     Assumptions:
10 |         1) The dataset consists of in-order groups, i.e. [*group-1-items, *group-2-items, ...]
11 |         2) In the dataloader, per-gpu batch size (i.e. total_batch_size / world_size) must be
12 |            a multiple of the group size. CAVEAT: this may cause CUDA OOM.
13 |     """
14 |     def __init__(self, total_size, group_size):
15 |         """
16 |         Args:
17 |             size (int): the total number of data of the underlying dataset to sample from
18 |         """
19 |         assert total_size > 0 and group_size > 0
20 |         assert total_size % group_size == 0, \
21 |            f"The total size must be divisible by group size: total size={total_size}, group size={group_size}"
22 | 
23 |         self._total_size = total_size
24 |         self._group_size = group_size
25 |         self._rank = comm.get_rank()
26 |         self._world_size = comm.get_world_size()
27 | 
28 |         self._num_groups = total_size // group_size
29 | 
30 |         shard_size = ((self._num_groups - 1) // self._world_size + 1) * self._group_size
31 | 
32 |         # shard_size = (self._total_size - 1) // self._world_size + 1
33 |         begin = shard_size * self._rank
34 |         end = min(shard_size * (self._rank + 1), self._total_size)
35 |         self._local_indices = range(begin, end)
36 | 
37 |     def __iter__(self):
38 |         yield from self._local_indices
39 | 
40 |     def __len__(self):
41 |         return len(self._local_indices)
42 | 


--------------------------------------------------------------------------------
/tridet/data/transform_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  3 | # Adapted from detectron2:
  4 | #   https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | from detectron2.data import transforms as T
  9 | from detectron2.structures import Boxes, BoxMode, Instances
 10 | 
 11 | from tridet.structures.boxes3d import Boxes3D
 12 | 
 13 | __all__ = ["transform_instance_annotations", "annotations_to_instances"]
 14 | 
 15 | 
 16 | def transform_instance_annotations(
 17 |     annotation,
 18 |     transforms,
 19 |     image_size,
 20 | ):
 21 |     """Adapted from:
 22 |         https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py#L254
 23 | 
 24 |     The changes from original:
 25 |         - The presence of 2D bounding box (i.e. "bbox" field) is assumed by default in d2; here it's optional.
 26 |         - Add optional 3D bounding box support.
 27 |         - If the instance mask annotation is in RLE, then it's decoded into polygons, not bitmask, to save memory.
 28 | 
 29 |     ===============================================================================================================
 30 | 
 31 |     Apply transforms to box, segmentation and keypoints annotations of a single instance.
 32 | 
 33 |     It will use `transforms.apply_box` for the box, and
 34 |     `transforms.apply_coords` for segmentation polygons & keypoints.
 35 |     If you need anything more specially designed for each data structure,
 36 |     you'll need to implement your own version of this function or the transforms.
 37 | 
 38 |     Args:
 39 |         annotation (dict): dict of instance annotations for a single instance.
 40 |             It will be modified in-place.
 41 |         transforms (TransformList or list[Transform]):
 42 |         image_size (tuple): the height, width of the transformed image
 43 |         keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
 44 | 
 45 |     Returns:
 46 |         dict:
 47 |             the same input dict with fields "bbox", "segmentation", "keypoints"
 48 |             transformed according to `transforms`.
 49 |             The "bbox_mode" field will be set to XYXY_ABS.
 50 |     """
 51 |     if isinstance(transforms, (tuple, list)):
 52 |         transforms = T.TransformList(transforms)
 53 |     # (dennis.park) Here 2D bounding box is optional.
 54 |     if "bbox" in annotation:
 55 |         assert "bbox_mode" in annotation, "'bbox' is present, but 'bbox_mode' is not."
 56 |         # bbox is 1d (per-instance bounding box)
 57 |         bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
 58 |         bbox = transforms.apply_box(np.array([bbox]))[0]
 59 |         # clip transformed bbox to image size
 60 |         bbox = bbox.clip(min=0)
 61 |         bbox = np.minimum(bbox, list(image_size + image_size)[::-1])
 62 |         annotation["bbox"] = bbox
 63 |         annotation["bbox_mode"] = BoxMode.XYXY_ABS
 64 | 
 65 |     # Vertical flipping is not implemented (`flip_transform.py`). TODO: implement if needed.
 66 |     if "bbox3d" in annotation:
 67 |         bbox3d = np.array(annotation["bbox3d"])
 68 |         annotation['bbox3d'] = transforms.apply_box3d(bbox3d)
 69 | 
 70 |     return annotation
 71 | 
 72 | 
 73 | def _create_empty_instances(image_size):
 74 |     target = Instances(image_size)
 75 | 
 76 |     target.gt_boxes = Boxes([])
 77 |     target.gt_classes = torch.tensor([], dtype=torch.int64)
 78 |     target.gt_boxes3d = Boxes3D.from_vectors([], torch.eye(3, dtype=torch.float32))
 79 | 
 80 |     return target
 81 | 
 82 | 
 83 | def annotations_to_instances(
 84 |     annos,
 85 |     image_size,
 86 |     intrinsics=None,
 87 | ):
 88 |     """
 89 |     Create an :class:`Instances` object used by the models,
 90 |     from instance annotations in the dataset dict.
 91 | 
 92 |     Args:
 93 |         annos (list[dict]): a list of instance annotations in one image, each
 94 |             element for one instance.
 95 |         image_size (tuple): height, width
 96 | 
 97 |     Returns:
 98 |         Instances:
 99 |             It will contain fields "gt_boxes", "gt_classes",
100 |             "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
101 |             This is the format that builtin models expect.
102 |     """
103 |     if len(annos) == 0:
104 |         return _create_empty_instances(image_size)
105 | 
106 |     boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
107 |     target = Instances(image_size)
108 |     target.gt_boxes = Boxes(boxes)
109 | 
110 |     classes = [obj["category_id"] for obj in annos]
111 |     classes = torch.tensor(classes, dtype=torch.int64)
112 |     target.gt_classes = classes
113 | 
114 |     if len(annos) and "bbox3d" in annos[0]:
115 |         assert intrinsics is not None
116 |         target.gt_boxes3d = Boxes3D.from_vectors([anno['bbox3d'] for anno in annos], intrinsics)
117 |         if len(target.gt_boxes3d) != target.gt_boxes.tensor.shape[0]:
118 |             raise ValueError(
119 |                 f"The sizes of `gt_boxes3d` and `gt_boxes` do not match: a={len(target.gt_boxes3d)}, b={target.gt_boxes.tensor.shape[0]}."
120 |             )
121 | 
122 |     return target
123 | 


--------------------------------------------------------------------------------
/tridet/evaluators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import inspect
 3 | import logging
 4 | import os
 5 | 
 6 | from detectron2.evaluation import COCOEvaluator, SemSegEvaluator
 7 | 
 8 | from tridet.data.datasets.nuscenes import NUSCENES_ROOT
 9 | from tridet.evaluators.kitti_3d_evaluator import KITTI3DEvaluator
10 | from tridet.evaluators.nuscenes_evaluator import NuscenesEvaluator
11 | from tridet.utils.comm import is_distributed
12 | 
13 | LOG = logging.getLogger('tridet')
14 | 
15 | AVAILABLE_EVALUATORS = ["coco_evaluator", "kitti3d_evaluator", "nuscenes_evaluator"]
16 | 
17 | 
18 | def get_evaluator(cfg, dataset_name, evaluator_name, output_dir):
19 |     assert evaluator_name in AVAILABLE_EVALUATORS, f"Invalid evaluator name: {evaluator_name}."
20 | 
21 |     distributed = is_distributed()
22 | 
23 |     if evaluator_name == "coco_evaluator":
24 |         tasks = []
25 |         assert cfg.MODEL.BOX2D_ON
26 |         tasks.append('bbox')
27 |         return COCOEvaluator(dataset_name, tuple(tasks), distributed=distributed, output_dir=output_dir)
28 |     elif evaluator_name == "kitti3d_evaluator":
29 |         return KITTI3DEvaluator(
30 |             dataset_name=dataset_name,
31 |             iou_thresholds=cfg.EVALUATORS.KITTI3D.IOU_THRESHOLDS,
32 |             only_prepare_submission=cfg.EVALUATORS.KITTI3D.ONLY_PREPARE_SUBMISSION,
33 |             distributed=distributed,
34 |             output_dir=output_dir,
35 |         )
36 |     elif evaluator_name == "nuscenes_evaluator":
37 |         nusc_root = os.path.join(cfg.DATASET_ROOT, NUSCENES_ROOT)
38 |         return NuscenesEvaluator(nusc_root=nusc_root, dataset_name=dataset_name, output_dir=output_dir)
39 | 


--------------------------------------------------------------------------------
/tridet/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.layers.bev_nms import bev_nms
3 | from tridet.layers.iou_loss import IOULoss
4 | from tridet.layers.smooth_l1_loss import smooth_l1_loss
5 | 


--------------------------------------------------------------------------------
/tridet/layers/bev_nms.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from pytorch3d.transforms import transform3d as t3d
  7 | 
  8 | from detectron2.layers.nms import batched_nms_rotated
  9 | from detectron2.structures import RotatedBoxes
 10 | 
 11 | from tridet.structures.pose import Pose
 12 | 
 13 | LOG = logging.getLogger(__name__)
 14 | 
 15 | # yapf: disable
 16 | # -------------------------------
 17 | # Convention of reference frames.
 18 | # -------------------------------
 19 | # Rotation from "camera" frame to "vehicle" frame.
 20 | # |------------------|--------------------------------|
 21 | # | Camera | Vehicle | Interpretation in Vehicle frame|
 22 | # |------------------|--------------------------------|
 23 | # |   z    |   x     |            forward             |
 24 | # |   x    |  -y     |             right              |
 25 | # |   y    |  -z     |              down              |
 26 | # |------------------|--------------------------------|
 27 | CAMERA_TO_VEHICLE_ROTATION = Pose.from_matrix(np.float32([
 28 |     [ 0,  0,  1,  0],
 29 |     [-1,  0,  0,  0],
 30 |     [ 0, -1,  0,  0],
 31 |     [ 0,  0,  0,  1]
 32 | ]))
 33 | 
 34 | # Rotation from "vehicle" frame to "bev" frame.
 35 | # |------------------|---------------------------------|
 36 | # | Vehicle |  BEV   | Interpretation in Vehicle frame |
 37 | # |------------------|---------------------------------|
 38 | # |   x     |  -y    |             forward             |
 39 | # |   y     |  -x    |               left              |
 40 | # |   z     |  -z    |                up               |
 41 | # |------------------|---------------------------------|
 42 | VEHICLE_TO_BEV_ROTATION = Pose.from_matrix(np.float32([
 43 |     [ 0, -1,  0,  0],
 44 |     [-1,  0,  0,  0],
 45 |     [ 0,  0, -1,  0],
 46 |     [ 0,  0,  0,  1]
 47 | ]))
 48 | # yapf: enable
 49 | 
 50 | 
 51 | def boxes3d_to_rotated_boxes(
 52 |     boxes3d, pose_cam_global=CAMERA_TO_VEHICLE_ROTATION, pose_global_bev=VEHICLE_TO_BEV_ROTATION, use_top_surface=True
 53 | ):
 54 |     """
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     boxes3d: Boxes3D
 59 |         3D boxes in camera frame.
 60 |     pose_cam_global: Pose
 61 |         Transformation from sensor (camera) frame to global frame. Depending on the context, global frame can be
 62 |         "vehicle" frame which moves along with the vehicle, or "world" frame which is fixed in the world.
 63 |         By default, it is an axis-swapping rotation that convert pinhole camera frame to Vehicle frame, i.e.
 64 |             x: forward, y: left, z: up (see above for detail.)
 65 |         with no translation (i.e. moves along with camera).
 66 |     pose_global_bev: Pose
 67 |         Transformation from global frame to bird-eye-view frame. By default, "forward" matches with "up" of BEV image,
 68 |         By default, it is an axis-swapping rotation that converts Vehicle frame to BEV frame (see above for detail.)
 69 |         with no translation.
 70 |     """
 71 |     if use_top_surface:
 72 |         vertice_inds = [0, 1, 5, 4]  # (front-left, front-right, back-right, back-left) of top surface.
 73 |     else:
 74 |         # use bottom surface.
 75 |         vertice_inds = [3, 2, 6, 7]  # (front-left, front-right, back-right, back-left) of bottom surface.
 76 | 
 77 |     surface = boxes3d.corners[:, vertice_inds, :]
 78 |     pose_cam_bev = pose_global_bev * pose_cam_global
 79 |     cam_to_bev = t3d.Transform3d(matrix=surface.new_tensor(pose_cam_bev.matrix.T))  # Need to transpose!
 80 |     # Assumpiton: this is close to rectangles. TODO: assert it?
 81 |     rot_boxes_bev = cam_to_bev.transform_points(surface)[:, :, :2]
 82 | 
 83 |     # length/width of objects are equivalent to "height"/width of RotatedBoxes
 84 |     length = torch.norm(rot_boxes_bev[:, 0, :] - rot_boxes_bev[:, 3, :], dim=1).abs()
 85 |     width = torch.norm(rot_boxes_bev[:, 0, :] - rot_boxes_bev[:, 1, :], dim=1).abs()
 86 | 
 87 |     center = torch.mean(rot_boxes_bev[:, [0, 2], :], dim=1)
 88 |     center_x, center_y = center[:, 0], center[:, 1]
 89 | 
 90 |     forward = rot_boxes_bev[:, 0, :] - rot_boxes_bev[:, 3, :]
 91 |     # CCW-angle, i.e. rotation wrt -z (or "up") in BEV frame.
 92 |     angle = torch.atan2(forward[:, 0], forward[:, 1])
 93 |     angle = 180. / np.pi * angle
 94 | 
 95 |     rot_boxes = RotatedBoxes(torch.stack([center_x, center_y, width, length, angle], dim=1))
 96 |     return rot_boxes
 97 | 
 98 | 
 99 | def bev_nms(
100 |     boxes3d, scores, iou_threshold, pose_cam_global=CAMERA_TO_VEHICLE_ROTATION, class_idxs=None, class_agnostic=False
101 | ):
102 |     """
103 | 
104 |     Parameters
105 |     ----------
106 |     boxes3d: Boxes3D
107 |         3D boxes in camera frame.
108 | 
109 |     scores: Tensor
110 |         1D score vector. Must be of same size 'boxes3d'
111 | 
112 |     iou_threshold: float
113 |         Two rotated boxes in BEV frame cannot overlap (according to IoU) more than this threshold.
114 | 
115 |     class_idxs: Tensor or None
116 |         If not None, 1D integer vector. Must be of same size 'boxes3d'
117 | 
118 |     class_agnostic: bool
119 |         If True, then category ID is not considered in NMS.
120 |         If False, then NMS is performed per-cateogry ('class_idxs' must not be None.)
121 | 
122 |     Returns
123 |     -------
124 |     keep: Tensor
125 |         1D integer vector that contains filtered indices to 'boxes3d' to keep after NMS.
126 |     """
127 |     rot_boxes = boxes3d_to_rotated_boxes(boxes3d, pose_cam_global=pose_cam_global)
128 |     if class_agnostic:
129 |         class_idxs = torch.zeros_like(scores, dtype=torch.int64)
130 |     else:
131 |         assert class_idxs is not None
132 |     keep = batched_nms_rotated(rot_boxes.tensor, scores, class_idxs, iou_threshold)
133 |     return keep
134 | 


--------------------------------------------------------------------------------
/tridet/layers/conv_bn_fpn_layers.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from detectron2.layers import Conv2d, ShapeSpec
 5 | from torch import nn
 6 | 
 7 | from tridet.layers.normalization import get_norm
 8 | from tridet.layers.separable_conv2d import ACTIVATIONS
 9 | from tridet.modeling.dd3d.utils import get_fpn_out_channels
10 | 
11 | 
12 | class ConvBnFpnLayers(nn.Module):
13 |     """
14 |     """
15 |     def __init__(
16 |         self,
17 |         num_layers,
18 |         input_shape,
19 |         norm_kwargs={},
20 |         kernel_size=3,
21 |         activation='gelu',
22 |         groups=1,
23 |         extra_input_dim=0,
24 |         use_input_dim=True,
25 |         output_dim=None,
26 |     ):
27 |         super().__init__()
28 |         assert kernel_size % 2 == 1, "'kernel_size' must be odd."
29 |         self._input_shape = input_shape
30 |         self._extra_input_dim = extra_input_dim
31 |         num_levels = len(input_shape)
32 |         channels = get_fpn_out_channels(input_shape)
33 | 
34 |         if not use_input_dim:
35 |             assert output_dim is not None, "'output_dim' must be given, if 'use_input_dim=False'."
36 |         input_dim = channels + extra_input_dim
37 |         out_channels = input_dim if use_input_dim else output_dim
38 |         self._out_channels = out_channels
39 | 
40 |         conv_layers = []
41 |         for l in range(num_layers):
42 |             in_channels = input_dim if l == 0 else out_channels
43 |             # Build convolution layers
44 |             conv_kwargs = dict(
45 |                 in_channels=in_channels,
46 |                 out_channels=out_channels,
47 |                 kernel_size=kernel_size,
48 |                 stride=1,
49 |                 padding=kernel_size // 2,
50 |                 bias=False,  # BN is applied manually in forward()
51 |                 norm=None,
52 |                 activation=None,  # activation is applied manually in forward()
53 |                 groups=groups
54 |             )
55 |             conv_layers.append(Conv2d(**conv_kwargs))
56 |         self.conv_layers = nn.ModuleList(conv_layers)
57 | 
58 |         # Define a BN layer per each (level, layer).
59 |         self.bn_layers = nn.ModuleList()
60 |         norm_kwargs = norm_kwargs or {}
61 |         for _ in range(num_levels):
62 |             self.bn_layers.append(nn.ModuleList([get_norm('BN', out_channels, norm_kwargs) for _ in range(num_layers)]))
63 | 
64 |         # Activation
65 |         self.act = ACTIVATIONS[activation]
66 | 
67 |         self.init_weights()
68 | 
69 |     def output_shape(self):
70 |         return [
71 |             ShapeSpec(channels=self._out_channels, height=x.height, width=x.width, stride=x.stride)
72 |             for x in self._input_shape
73 |         ]
74 | 
75 |     def init_weights(self):
76 |         for conv in self.conv_layers:
77 |             nn.init.kaiming_normal_(conv.weight)  # mode = 'fan_in'
78 | 
79 |     def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
80 |         out = []
81 |         for level, _bn_layers in enumerate(self.bn_layers):  # iterating over first bn dim first makes TS happy
82 |             x_level = x[level]
83 |             for conv, bn in zip(self.conv_layers, _bn_layers):
84 |                 x_level = conv(x_level)
85 |                 x_level = bn(x_level)
86 |                 x_level = self.act(x_level)
87 |             out.append(x_level)
88 |         return out
89 | 


--------------------------------------------------------------------------------
/tridet/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # Adapted from AdelaiDet:
 3 | #   https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | class IOULoss(nn.Module):
 9 |     """
10 |     Intersetion Over Union (IoU) loss which supports three
11 |     different IoU computations:
12 | 
13 |     * IoU
14 |     * Linear IoU
15 |     * gIoU
16 |     """
17 |     def __init__(self, loc_loss_type='iou'):
18 |         super(IOULoss, self).__init__()
19 |         self.loc_loss_type = loc_loss_type
20 | 
21 |     def forward(self, pred, target, weight=None):
22 |         """
23 |         Args:
24 |             pred: Nx4 predicted bounding boxes
25 |             target: Nx4 target bounding boxes
26 |             weight: N loss weight for each instance
27 |         """
28 |         pred_left = pred[:, 0]
29 |         pred_top = pred[:, 1]
30 |         pred_right = pred[:, 2]
31 |         pred_bottom = pred[:, 3]
32 | 
33 |         target_left = target[:, 0]
34 |         target_top = target[:, 1]
35 |         target_right = target[:, 2]
36 |         target_bottom = target[:, 3]
37 | 
38 |         target_aera = (target_left + target_right) * \
39 |                       (target_top + target_bottom)
40 |         pred_aera = (pred_left + pred_right) * \
41 |                     (pred_top + pred_bottom)
42 | 
43 |         w_intersect = torch.min(pred_left, target_left) + \
44 |                       torch.min(pred_right, target_right)
45 |         h_intersect = torch.min(pred_bottom, target_bottom) + \
46 |                       torch.min(pred_top, target_top)
47 | 
48 |         g_w_intersect = torch.max(pred_left, target_left) + \
49 |                         torch.max(pred_right, target_right)
50 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + \
51 |                         torch.max(pred_top, target_top)
52 |         ac_uion = g_w_intersect * g_h_intersect
53 | 
54 |         area_intersect = w_intersect * h_intersect
55 |         area_union = target_aera + pred_aera - area_intersect
56 | 
57 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
58 |         gious = ious - (ac_uion - area_union) / ac_uion
59 |         if self.loc_loss_type == 'iou':
60 |             losses = -torch.log(ious)
61 |         elif self.loc_loss_type == 'linear_iou':
62 |             losses = 1 - ious
63 |         elif self.loc_loss_type == 'giou':
64 |             losses = 1 - gious
65 |         else:
66 |             raise NotImplementedError
67 | 
68 |         if weight is not None:
69 |             return (losses * weight).sum()
70 |         else:
71 |             return losses.sum()
72 | 


--------------------------------------------------------------------------------
/tridet/layers/normalization.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | # Adapted from AdelaiDet
 3 | #   https://github.com/aim-uofa/AdelaiDet/
 4 | import logging
 5 | from functools import partial
 6 | 
 7 | import torch
 8 | from torch import nn
 9 | 
10 | LOG = logging.getLogger(__name__)
11 | 
12 | 
13 | class Scale(nn.Module):
14 |     def __init__(self, init_value=1.0):
15 |         super(Scale, self).__init__()
16 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
17 | 
18 |     def forward(self, input):
19 |         return input * self.scale
20 | 
21 | 
22 | class Offset(nn.Module):
23 |     def __init__(self, init_value=0.):
24 |         super(Offset, self).__init__()
25 |         self.bias = nn.Parameter(torch.FloatTensor([init_value]))
26 | 
27 |     def forward(self, input):
28 |         return input + self.bias
29 | 
30 | 
31 | class ModuleListDial(nn.ModuleList):
32 |     def __init__(self, modules=None):
33 |         super(ModuleListDial, self).__init__(modules)
34 |         self.cur_position = 0
35 | 
36 |     def forward(self, x):
37 |         result = self[self.cur_position](x)
38 |         self.cur_position += 1
39 |         if self.cur_position >= len(self):
40 |             self.cur_position = 0
41 |         return result
42 | 
43 | class DialableModules(nn.ModuleList):
44 |     """
45 |     Dialable modules. Typically used with hierarchical output from FPN feature extractors.
46 |     Separate modules are applied to each FPN layer.
47 |     """
48 |     def __init__(self, modules=None):
49 |         super(DialableModules, self).__init__(modules)
50 |         self.cur_position = 0
51 | 
52 |     def forward(self, x):
53 |         result = self[self.cur_position](x)
54 |         self.cur_position += 1
55 |         if self.cur_position >= len(self):
56 |             self.cur_position = 0
57 |         return result
58 | 
59 | 
60 | class DialableBN(DialableModules):
61 |     """
62 |     Dialable batch-norm layers. Typical use case: all FPN layers shares a 2D convolutional decoder, but
63 |     the batch-norm layers are not shared. That is, each FPN layers has its own shift and scale parameters, and keeps
64 |     its own batch statistics (mean, scale).
65 |     """
66 |     def __init__(self, out_channels, num_bn_modules, **bn_kwargs):
67 |         LOG.info(f"Initializing DialableBN with `num_bn_modules`={num_bn_modules}")
68 |         bn_modules = [nn.BatchNorm2d(out_channels, **bn_kwargs) for _ in range(num_bn_modules)]
69 |         super().__init__(bn_modules)
70 | 
71 | def get_norm(norm, out_channels, norm_kwargs={}):
72 |     if not norm:
73 |         return None
74 | 
75 |     norm_mapping = {
76 |         "BN": nn.BatchNorm2d,
77 |         "DialableBN": DialableBN,
78 |         "GN": nn.GroupNorm,
79 |     }
80 | 
81 |     norm_fn = partial(norm_mapping[norm], **norm_kwargs)
82 |     if norm == "BN":
83 |         return norm_fn(num_features=out_channels)
84 |     elif norm == "DialableBN":
85 |         return norm_fn(out_channels=out_channels)
86 |     elif norm == "GN":
87 |         return norm_fn(num_channels=out_channels)
88 | 


--------------------------------------------------------------------------------
/tridet/layers/separable_conv2d.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from detectron2.layers import Conv2d
  5 | from torch import nn
  6 | from torch.nn import functional as F
  7 | from torch.nn.init import _calculate_correct_fan, calculate_gain
  8 | 
  9 | from tridet.layers.normalization import get_norm
 10 | 
 11 | ACTIVATIONS = {
 12 |     'relu': F.relu,
 13 |     'gelu': F.gelu,
 14 | }
 15 | 
 16 | 
 17 | def kaiming_uniform_groups_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', groups=1):
 18 |     """'torch.nn.init.kaiming_uniform_()' with 'groups'.
 19 | 
 20 |     If 'mode=="fan_out"', fan is divided by 'groups', yielding larger std of weights.
 21 |     """
 22 |     if 0 in tensor.shape:
 23 |         return tensor
 24 |     fan = _calculate_correct_fan(tensor, mode)
 25 |     if mode == 'fan_out':
 26 |         fan //= groups
 27 |     gain = calculate_gain(nonlinearity, a)
 28 |     std = gain / math.sqrt(fan)
 29 |     bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
 30 |     with torch.no_grad():
 31 |         return tensor.uniform_(-bound, bound)
 32 | 
 33 | 
 34 | def kaiming_normal_groups_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', groups=1):
 35 |     """'torch.nn.init.kaiming_normal_()' with 'groups'.
 36 | 
 37 |     If 'mode=="fan_out"', fan is divided by 'groups', yielding larger std of weights.
 38 |     """
 39 |     if 0 in tensor.shape:
 40 |         return tensor
 41 |     fan = _calculate_correct_fan(tensor, mode)
 42 |     if mode == 'fan_out':
 43 |         fan //= groups
 44 |     gain = calculate_gain(nonlinearity, a)
 45 |     std = gain / math.sqrt(fan)
 46 |     with torch.no_grad():
 47 |         return tensor.normal_(0, std)
 48 | 
 49 | 
 50 | class SeparableConv2d(nn.Module):
 51 |     """ Separable Conv
 52 |     """
 53 |     def __init__(
 54 |         self,
 55 |         in_channels,
 56 |         out_channels,
 57 |         kernel_size=3,
 58 |         stride=1,
 59 |         dilation=1,
 60 |         bias=None,
 61 |         channel_multiplier=1.0,
 62 |         num_in_channels_per_group=1,  # depth-separable conv.
 63 |         norm='BN',
 64 |         norm_kwargs={},
 65 |         activation=None,
 66 |     ):
 67 |         super().__init__()
 68 |         assert kernel_size % 2 == 1, "kernel_size must be odd."
 69 |         assert in_channels % num_in_channels_per_group == 0, "'in_channels' must be divisible by 'num_in_channels_per_group'"
 70 |         hidden_channels = int(in_channels * channel_multiplier)
 71 |         groups = in_channels // num_in_channels_per_group
 72 |         self.conv_dw = Conv2d(
 73 |             in_channels,
 74 |             hidden_channels,
 75 |             kernel_size=kernel_size,
 76 |             stride=stride,
 77 |             padding=kernel_size // 2,
 78 |             dilation=dilation,
 79 |             bias=False,
 80 |             norm=None,
 81 |             activation=None,
 82 |             groups=groups
 83 |         )
 84 | 
 85 |         norm_kwargs = norm_kwargs or {}
 86 |         norm_layer = get_norm(norm, hidden_channels, norm_kwargs=norm_kwargs) if isinstance(norm, str) else norm
 87 |         if bias is None:
 88 |             bias = norm_layer is None
 89 |         act = ACTIVATIONS[activation] if isinstance(activation, str) else activation
 90 |         self.conv_pw = Conv2d(
 91 |             hidden_channels, out_channels, kernel_size=1, stride=1, bias=bias, norm=norm_layer, activation=act
 92 |         )
 93 | 
 94 |         self.groups = in_channels
 95 |         self.init_weights()
 96 | 
 97 |     def init_weights(self):
 98 |         # This seems important to make the network output roughly zero-mean, unit-std.
 99 |         kaiming_normal_groups_(self.conv_dw.weight, mode='fan_out', nonlinearity='linear', groups=self.groups)
100 |         kaiming_normal_groups_(self.conv_pw.weight, mode='fan_out', nonlinearity='relu', groups=1)
101 |         if self.conv_pw.bias is not None:
102 |             nn.init.constant_(self.conv_pw.bias, 0)
103 | 
104 |     def forward(self, x):
105 |         return self.conv_pw(self.conv_dw(x))
106 | 


--------------------------------------------------------------------------------
/tridet/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | # Adapted from fvcore:
 4 | #   https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def smooth_l1_loss(input: torch.Tensor, target: torch.Tensor, beta: float, reduction: str = "none") -> torch.Tensor:
10 |     """
11 |     Smooth L1 loss defined in the Fast R-CNN paper as:
12 | 
13 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
14 |     smoothl1(x) = |
15 |                   | abs(x) - 0.5 * beta   otherwise,
16 | 
17 |     where x = input - target.
18 | 
19 |     Smooth L1 loss is related to Huber loss, which is defined as:
20 | 
21 |                 | 0.5 * x ** 2                  if abs(x) < beta
22 |      huber(x) = |
23 |                 | beta * (abs(x) - 0.5 * beta)  otherwise
24 | 
25 |     Smooth L1 loss is equal to huber(x) / beta. This leads to the following
26 |     differences:
27 | 
28 |      - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
29 |        converges to a constant 0 loss.
30 |      - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
31 |        converges to L2 loss.
32 |      - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
33 |        slope of 1. For Huber loss, the slope of the L1 segment is beta.
34 | 
35 |     Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
36 |     portion replaced with a quadratic function such that at abs(x) = beta, its
37 |     slope is 1. The quadratic segment smooths the L1 loss near x = 0.
38 | 
39 |     Args:
40 |         input (Tensor): input tensor of any shape
41 |         target (Tensor): target value tensor with the same shape as input
42 |         beta (float): L1 to L2 change point.
43 |             For beta values < 1e-5, L1 loss is computed.
44 |         reduction: 'none' | 'mean' | 'sum'
45 |                  'none': No reduction will be applied to the output.
46 |                  'mean': The output will be averaged.
47 |                  'sum': The output will be summed.
48 | 
49 |     Returns:
50 |         The loss with the reduction option applied.
51 | 
52 |     Note:
53 |         PyTorch's builtin "Smooth L1 loss" implementation does not actually
54 |         implement Smooth L1 loss, nor does it implement Huber loss. It implements
55 |         the special case of both in which they are equal (beta=1).
56 |         See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
57 |      """
58 |     # (dennis.park) Make it work with mixed precision training.
59 |     beta = torch.as_tensor(beta).to(input.dtype)
60 |     if beta < 1e-5:
61 |         # if beta == 0, then torch.where will result in nan gradients when
62 |         # the chain rule is applied due to pytorch implementation details
63 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
64 |         # zeros, rather than "no gradient"). To avoid this issue, we define
65 |         # small values of beta to be exactly l1 loss.
66 |         loss = torch.abs(input - target)
67 |     else:
68 |         n = torch.abs(input - target)
69 |         cond = n < beta
70 |         a = 0.5 * n**2
71 |         b = n - 0.5 * beta
72 |         a, b = a.to(input.dtype), b.to(input.dtype)
73 |         loss = torch.where(cond, a, b)
74 |         # loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
75 | 
76 |     if reduction == "mean":
77 |         loss = loss.mean()
78 |     elif reduction == "sum":
79 |         loss = loss.sum()
80 |     return loss
81 | 


--------------------------------------------------------------------------------
/tridet/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import tridet.modeling.dd3d
 3 | from tridet.modeling import feature_extractor
 4 | from tridet.modeling.dd3d import DD3DWithTTA, NuscenesDD3DWithTTA
 5 | 
 6 | TTA_MODELS = {
 7 |     "DD3D": DD3DWithTTA,
 8 |     "NuscenesDD3D": NuscenesDD3DWithTTA,
 9 | }
10 | 
11 | 
12 | def build_tta_model(cfg, model):
13 |     meta_arch = cfg.MODEL.META_ARCHITECTURE
14 |     assert meta_arch in TTA_MODELS, f"Test-time augmentation model is not available: {meta_arch}"
15 |     return TTA_MODELS[meta_arch](cfg, model)
16 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/act.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional, Tuple, Type, Union
 2 | 
 3 | from torch import nn
 4 | 
 5 | __all__ = ["build_activation"]
 6 | 
 7 | # register activation function here
 8 | #   name: module, kwargs with default values
 9 | REGISTERED_ACT_DICT: Dict[str, Tuple[Type, Dict[str, Any]]] = {
10 |     "relu": (nn.ReLU, {"inplace": True}),
11 |     "relu6": (nn.ReLU6, {"inplace": True}),
12 |     "leaky_relu": (nn.LeakyReLU, {"inplace": True, "negative_slope": 0.1}),
13 |     "h_swish": (nn.Hardswish, {"inplace": True}),
14 |     "h_sigmoid": (nn.Hardsigmoid, {"inplace": True}),
15 |     "swish": (nn.SiLU, {"inplace": True}),
16 |     "silu": (nn.SiLU, {"inplace": True}),
17 |     "tanh": (nn.Tanh, {}),
18 |     "sigmoid": (nn.Sigmoid, {}),
19 |     "gelu": (nn.GELU, {}),
20 |     "mish": (nn.Mish, {"inplace": True}),
21 | }
22 | 
23 | 
24 | def build_activation(act_func_name: Union[str, nn.Module], **kwargs) -> Optional[nn.Module]:
25 |     if isinstance(act_func_name, nn.Module):
26 |         return act_func_name
27 |     if act_func_name in REGISTERED_ACT_DICT:
28 |         act_module, default_args = REGISTERED_ACT_DICT[act_func_name]
29 |         for key in default_args:
30 |             if key in kwargs:
31 |                 default_args[key] = kwargs[key]
32 |         return act_module(**default_args)
33 |     elif act_func_name is None or act_func_name.lower() == "none":
34 |         return None
35 |     else:
36 |         raise ValueError("do not support: %s" % act_func_name)
37 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/backbone_with_fpn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from detectron2.layers import ShapeSpec
  3 | 
  4 | from tridet.modeling.backbone.omni_scripts.utils import make_divisible
  5 | from tridet.modeling.backbone.omni_scripts.fused_mb_nets import MixFusedMobileNetV2
  6 | from tridet.modeling.backbone.omni_scripts.fpn import FPN
  7 | from tridet.modeling.backbone.omni_scripts.ops import ConvLayer
  8 | 
  9 | __all__ = ["BackboneFPN", "build_feature_extractor_all_fuse"]
 10 | 
 11 | 
 12 | def build_feature_extractor_all_fuse(return_list=False, width_mult=1.0, depth_mult=1.0):
 13 | 
 14 | 	stage_width_list = [32, 16, 32, 56, 104, 120, 320]
 15 | 	depth_list = [2, 2, 5, 4, 3]
 16 | 	for i, width in enumerate(stage_width_list):
 17 | 		stage_width_list[i] = make_divisible(width * width_mult, 8)
 18 | 	for i, depth in enumerate(depth_list):
 19 | 		depth_list[i] = int(depth * depth_mult)
 20 | 	backbone = MixFusedMobileNetV2(
 21 | 		width_mult=1.0,
 22 | 		ks=[3, 3, 3, 3, 3],
 23 | 		expand_ratio=[4, 4, 4, 4, 4],
 24 | 		depth=depth_list,  # 2, 3, 5, 5, 5
 25 | 		block_type_list=["fmb", "fmb", "fmb", "fmb", "fmb", "fmb"],
 26 | 		stage_width_list=stage_width_list,
 27 | 		channel_att_list=[None, None, None, None, None],
 28 | 		act_func="relu",
 29 | 	)
 30 | 	fpn_width_mult = 0.7
 31 | 	output_width = make_divisible(128 * width_mult, 8)
 32 | 	
 33 | 	fpn = FPN(
 34 | 		inputs=[
 35 | 			("ex_stage2", stage_width_list[-1], make_divisible(stage_width_list[-1] * fpn_width_mult, 8), 128),
 36 | 			("ex_stage1", stage_width_list[-1], make_divisible(stage_width_list[-1] * fpn_width_mult, 8), 64),
 37 | 			("stage5", stage_width_list[-1], make_divisible(stage_width_list[-1] * fpn_width_mult, 8), 32),
 38 | 			("stage4", stage_width_list[-2], make_divisible(stage_width_list[-2] * fpn_width_mult, 8), 16),
 39 | 			("stage2", stage_width_list[-4], make_divisible(stage_width_list[-4] * fpn_width_mult, 8), 8),
 40 | 		],
 41 | 		input_mode="cat_conv",
 42 | 		middle_config={
 43 | 			"all": ["fmb_e@4_k@3", "fmb_e@4_k@3"],
 44 | 			8: ["fmb_e@4_k@3"],
 45 | 		},
 46 | 		channel_att=None,
 47 | 		prefix="fpn",
 48 | 		act_func="relu",
 49 | 		spp_size=[3, 5, 7],
 50 | 		use_pan=True,
 51 | 		output_width=output_width,
 52 | 	)
 53 | 	model = BackboneFPN(
 54 | 		backbone, fpn,
 55 | 		n_extra_stage=2, last_channels=stage_width_list[-1], act_func="relu", return_list=return_list,
 56 | 	)
 57 | 	return model
 58 | 
 59 | 
 60 | class BackboneFPN(nn.Module):
 61 | 	def __init__(
 62 | 		self, backbone: nn.Module, fpn: FPN, last_channels: int, act_func="relu", n_extra_stage=0,
 63 | 		return_list=False,
 64 | 	):
 65 | 		super(BackboneFPN, self).__init__()
 66 | 		self.backbone = backbone
 67 | 		self.fpn = fpn
 68 | 		self.extra_stage = nn.ModuleList([
 69 | 			# PoolingLayer("avg", kernel_size=2, stride=2)
 70 | 			ConvLayer(last_channels, last_channels, 3, 2, act_func=act_func)
 71 | 			# FusedMBV2Block(last_channels, last_channels, 3, 2, expand_ratio=4, act_func=(act_func, None))
 72 | 			for _ in range(n_extra_stage)
 73 | 		])
 74 | 		self.return_list = return_list
 75 | 	
 76 | 	@property
 77 | 	def n_extra_stage(self):
 78 | 		return len(self.extra_stage)
 79 | 
 80 | 	@property
 81 | 	def size_divisibility(self):
 82 | 		return 32 * (2 ** self.n_extra_stage)
 83 | 	
 84 | 	def output_shape(self):
 85 | 		out_list = []
 86 | 		for i, (key, in_channel, mid_channel, stride) in enumerate(self.fpn.inputs):
 87 | 			channels = self.fpn.output_width or mid_channel
 88 | 			out_list.append((f"{self.fpn.prefix}_out{i + 1}", ShapeSpec(channels=channels, stride=stride)))
 89 | 		out_list = out_list[::-1]
 90 | 		out_dict = {}
 91 | 		for key, shape in out_list:
 92 | 			out_dict[key] = shape
 93 | 		return out_dict
 94 | 	
 95 | 	def forward(self, x):
 96 | 		feed_dict = self.backbone(x)
 97 | 		x = feed_dict["output"]
 98 | 		for i, extra_stage in enumerate(self.extra_stage):
 99 | 			feed_dict[f"ex_stage{i + 1}"] = x = extra_stage(x)
100 | 		feed_dict = self.fpn(feed_dict)
101 | 		if self.return_list:
102 | 			out_list = [feed_dict[key] for key in self.output_shape()]
103 | 			return out_list
104 | 		else:
105 | 			return feed_dict
106 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/fpn.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional, Tuple
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from tridet.modeling.backbone.omni_scripts.utils import make_divisible
  6 | from tridet.modeling.backbone.omni_scripts.ops import (
  7 | 	ConvLayer, PoolingLayer, SPPBlock, UpSampleLayer, MBV2Block, FusedMBV2Block, ResidualBlock, DAGOp
  8 | )
  9 | 
 10 | __all__ = ["FPN"]
 11 | 
 12 | 
 13 | def build_block(
 14 | 	block_str: str, in_channels: int, out_channels: int, channel_att: Optional[str], act_func: str
 15 | ) -> nn.Module:
 16 | 	block_config = {"e": 4, "k": 3}
 17 | 	block_str = block_str.split("_")
 18 | 	block_config["name"] = block_str[0]
 19 | 	for hparam in block_str[1:]:
 20 | 		if hparam.startswith("e@"):
 21 | 			block_config["e"] = float(hparam[2:])
 22 | 		elif hparam.startswith("k@"):
 23 | 			block_config["k"] = int(hparam[2:])
 24 | 	
 25 | 	mid_channels = make_divisible(in_channels * block_config["e"], 8)
 26 | 	if channel_att is not None:
 27 | 		raise NotImplementedError
 28 | 	else:
 29 | 		channel_att = None
 30 | 	
 31 | 	if block_config["name"] == "mb":
 32 | 		block = MBV2Block(
 33 | 			in_channels,
 34 | 			out_channels,
 35 | 			block_config["k"],
 36 | 			expand_ratio=block_config["e"],
 37 | 			act_func=(act_func, act_func, None),
 38 | 		)
 39 | 		if channel_att is not None:
 40 | 			block = nn.Sequential(
 41 | 				block.inverted_conv,
 42 | 				block.depth_conv,
 43 | 				channel_att,
 44 | 				block.point_conv
 45 | 			)
 46 | 	elif block_config["name"] == "fmb":
 47 | 		block = FusedMBV2Block(
 48 | 			in_channels,
 49 | 			out_channels,
 50 | 			block_config["k"],
 51 | 			expand_ratio=block_config["e"],
 52 | 			act_func=(act_func, None),
 53 | 		)
 54 | 		if channel_att is not None:
 55 | 			block = nn.Sequential(
 56 | 				block.spatial_conv,
 57 | 				channel_att,
 58 | 				block.point_conv,
 59 | 			)
 60 | 	else:
 61 | 		raise NotImplementedError
 62 | 	
 63 | 	if in_channels == out_channels:
 64 | 		block = ResidualBlock(
 65 | 			block,
 66 | 			nn.Identity(),
 67 | 		)
 68 | 	return block
 69 | 
 70 | 
 71 | class FPN(nn.Module):
 72 | 	"""Vanilla FPN and PAN"""
 73 | 	
 74 | 	def __init__(
 75 | 		self,
 76 | 		# inputs
 77 | 		inputs: List[Tuple[str, int, int, int]],
 78 | 		input_mode="cat_conv",
 79 | 		# middle
 80 | 		middle_config: Optional[Dict] = None,
 81 | 		channel_att: Optional[str] = None,
 82 | 		# general
 83 | 		prefix="fpn",
 84 | 		act_func="relu",
 85 | 		spp_size: Optional[List] = None,
 86 | 		use_pan=True,
 87 | 		output_width: Optional[int] = None,
 88 | 	):
 89 | 		super(FPN, self).__init__()
 90 | 		middle_config = middle_config or {}
 91 | 		if "all" not in middle_config:
 92 | 			middle_config["all"] = ["mbv2_e@4_k@5", "mbv2_e@4_k@5"]
 93 | 		
 94 | 		# sort inputs by stride
 95 | 		inputs = sorted(inputs, key=lambda tup: tup[-1], reverse=True)
 96 | 		self.inputs = inputs
 97 | 		self.prefix = prefix
 98 | 		self.output_width = output_width
 99 | 		
100 | 		blocks = []
101 | 		extra_input = []
102 | 		for idx, (feature_id, in_channels, mid_channels, stride) in enumerate(inputs):
103 | 			# inputs
104 | 			dag_inputs, dag_merge_mode, dag_post_input_op = self.build_input(
105 | 				feature_id,
106 | 				in_channels,
107 | 				extra_input,
108 | 				input_mode,
109 | 				mid_channels,
110 | 				act_func,
111 | 			)
112 | 			# middle
113 | 			dag_middle_blocks = []
114 | 			if idx == 0 and spp_size is not None:
115 | 				spp_block = ResidualBlock(
116 | 					SPPBlock(
117 | 						mid_channels,
118 | 						pool_size=spp_size,
119 | 						pool_type="avg",
120 | 						act_func=act_func,
121 | 					),
122 | 					nn.Identity(),
123 | 				)
124 | 				dag_middle_blocks.append(spp_block)
125 | 			for block_str in middle_config.get(stride, middle_config["all"]):
126 | 				dag_middle_blocks.append(
127 | 					build_block(
128 | 						block_str,
129 | 						mid_channels,
130 | 						mid_channels,
131 | 						channel_att,
132 | 						act_func,
133 | 					)
134 | 				)
135 | 			# output
136 | 			if use_pan or self.output_width is None:
137 | 				output_module = nn.Identity()
138 | 			else:
139 | 				output_module = ConvLayer(
140 | 					mid_channels,
141 | 					self.output_width,
142 | 					1,
143 | 					act_func=act_func,
144 | 				)
145 | 			dag_outputs = {
146 | 				f"{prefix}_{'inner' if use_pan else 'out'}{idx + 1}": output_module
147 | 			}
148 | 			if idx < len(inputs) - 1:
149 | 				up_factor = stride // inputs[idx + 1][3]
150 | 				dag_outputs[f"{prefix}_up{idx + 1}"] = nn.Sequential(
151 | 					ConvLayer(
152 | 						mid_channels, inputs[idx + 1][2], 1, act_func=act_func
153 | 					),
154 | 					UpSampleLayer(
155 | 						factor=up_factor,
156 | 						mode="bilinear",
157 | 						align_corners=False,
158 | 					)
159 | 					if up_factor > 1
160 | 					else None,
161 | 				)
162 | 				extra_input = [(f"{prefix}_up{idx + 1}", inputs[idx + 1][2])]
163 | 			
164 | 			blocks.append(
165 | 				DAGOp(
166 | 					inputs=dag_inputs,
167 | 					merge_mode=dag_merge_mode,
168 | 					post_input_op=dag_post_input_op,
169 | 					middle=nn.Sequential(*dag_middle_blocks),
170 | 					outputs=dag_outputs,
171 | 				)
172 | 			)
173 | 		if use_pan:
174 | 			for idx in range(len(inputs) - 1, -1, -1):
175 | 				_, _, mid_channels, stride = inputs[idx]
176 | 				if idx < len(inputs) - 1:
177 | 					extra_input = [(f"{prefix}_down{idx + 1}", mid_channels)]
178 | 				else:
179 | 					extra_input = []
180 | 				dag_inputs, dag_merge_mode, dag_post_input_op = self.build_input(
181 | 					f"{prefix}_inner{idx + 1}",
182 | 					mid_channels,
183 | 					extra_input,
184 | 					input_mode,
185 | 					mid_channels,
186 | 					act_func,
187 | 				)
188 | 				# middle
189 | 				dag_middle_blocks = []
190 | 				for block_str in middle_config.get(stride, middle_config["all"]):
191 | 					dag_middle_blocks.append(
192 | 						build_block(
193 | 							block_str,
194 | 							mid_channels,
195 | 							mid_channels,
196 | 							channel_att,
197 | 							act_func,
198 | 						)
199 | 					)
200 | 				# output
201 | 				if self.output_width is None:
202 | 					output_module = nn.Identity()
203 | 				else:
204 | 					output_module = ConvLayer(
205 | 						mid_channels,
206 | 						self.output_width,
207 | 						1,
208 | 						act_func=act_func,
209 | 					)
210 | 				dag_outputs = {f"{prefix}_out{idx + 1}": output_module}
211 | 				if idx != 0:
212 | 					down_factor = inputs[idx - 1][3] // stride
213 | 					downsample = PoolingLayer(
214 | 						pool_type="avg",
215 | 						kernel_size=down_factor,
216 | 						stride=down_factor,
217 | 					)
218 | 					dag_outputs[f"{prefix}_down{idx}"] = nn.Sequential(
219 | 						downsample,
220 | 						ConvLayer(
221 | 							mid_channels, inputs[idx - 1][2], 1, act_func=act_func,
222 | 						),
223 | 					)
224 | 				blocks.append(
225 | 					DAGOp(
226 | 						inputs=dag_inputs,
227 | 						merge_mode=dag_merge_mode,
228 | 						post_input_op=dag_post_input_op,
229 | 						middle=nn.Sequential(*dag_middle_blocks),
230 | 						outputs=dag_outputs,
231 | 					)
232 | 				)
233 | 		
234 | 		self.blocks = nn.ModuleList(blocks)
235 | 	
236 | 	@staticmethod
237 | 	def build_input(
238 | 		feature_id: str,
239 | 		in_channels: int,
240 | 		extra_input: List[Tuple[str, int]],
241 | 		input_mode: str,
242 | 		mid_channels: int,
243 | 		act_func: str,
244 | 	) -> Tuple[Dict[str, nn.Module], str, Optional[nn.Module]]:
245 | 		if input_mode == "cat_conv":
246 | 			merge_mode = "cat"
247 | 			inputs = {feature_id: nn.Identity()}
248 | 			for extra_id, extra_in_channels in extra_input:
249 | 				inputs[extra_id] = nn.Identity()
250 | 			post_input_op = ConvLayer(
251 | 				in_channels=sum([in_channels] + [extra_c for _, extra_c in extra_input]),
252 | 				out_channels=mid_channels,
253 | 				kernel_size=1,
254 | 				act_func=act_func,
255 | 			)
256 | 		elif input_mode == "add":
257 | 			merge_mode = "add"
258 | 			inputs = {feature_id: nn.Identity()}
259 | 			for extra_id, extra_in_channels in extra_input:
260 | 				inputs[extra_id] = nn.Identity()
261 | 			post_input_op = None
262 | 		else:
263 | 			raise NotImplementedError
264 | 		return inputs, merge_mode, post_input_op
265 | 	
266 | 	def forward(self, feature_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
267 | 		for block in self.blocks:
268 | 			feature_dict = block(feature_dict)
269 | 		return feature_dict
270 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/fused_mb_nets.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Union
  2 | import torch.nn as nn
  3 | 
  4 | from tridet.modeling.backbone.omni_scripts.utils import make_divisible, val2list
  5 | from tridet.modeling.backbone.omni_scripts.ops import ConvLayer, MBV1Block, MBV2Block, FusedMBV2Block, ResidualBlock, SeqBackbone
  6 | 
  7 | __all__ = ["MixFusedMobileNetV2"]
  8 | 
  9 | 
 10 | class MixFusedMobileNetV2(SeqBackbone):
 11 | 	def __init__(
 12 | 		self,
 13 | 		width_mult=1.0,
 14 | 		channel_divisor=8,
 15 | 		ks: Union[int, List[int], None] = None,
 16 | 		expand_ratio: Union[int, List[int], None] = None,
 17 | 		depth: Union[int, List[int], None] = None,
 18 | 		stage_width_list: Optional[List[int]] = None,
 19 | 		act_func=None,
 20 | 		block_type_list: Optional[List[str]] = None,
 21 | 		channel_att_list: Union[None, str, List[Optional[str]]] = None,
 22 | 	):
 23 | 		
 24 | 		ks = val2list(ks or 3, 5)
 25 | 		expand_ratio = val2list(expand_ratio or 6, 5)
 26 | 		depth = val2list(depth, 5)
 27 | 		act_func = act_func or "relu"
 28 | 		block_type_list = block_type_list or ["fmb", "fmb", "fmb", "mb", "mb", "mb"]
 29 | 		channel_att_list = val2list(channel_att_list, 5)
 30 | 		
 31 | 		block_configs = [
 32 | 			# t, n, s
 33 | 			[expand_ratio[0], depth[0] or 2, ks[0], 2],
 34 | 			[expand_ratio[1], depth[1] or 3, ks[1], 2],
 35 | 			[expand_ratio[2], depth[2] or 4, ks[2], 2],
 36 | 			[expand_ratio[3], depth[3] or 3, ks[3], 1],
 37 | 			[expand_ratio[4], depth[4] or 3, ks[4], 2],
 38 | 		]
 39 | 		
 40 | 		stage_width_list = stage_width_list or [32, 16, 24, 32, 64, 96, 160]
 41 | 		for i, w in enumerate(stage_width_list):
 42 | 			stage_width_list[i] = make_divisible(w * width_mult, channel_divisor)
 43 | 		
 44 | 		# input stem
 45 | 		input_stem = nn.Sequential(
 46 | 			ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func, first_layer=True),
 47 | 			(FusedMBV2Block if block_type_list[0] == "fmb" else MBV1Block)(
 48 | 				stage_width_list[0],
 49 | 				stage_width_list[1],
 50 | 				kernel_size=3,
 51 | 				stride=1,
 52 | 				act_func=(act_func, None),
 53 | 				**({"expand_ratio": 1} if block_type_list[0] == "fmb" else {}),
 54 | 			),
 55 | 		)
 56 | 		
 57 | 		# stages
 58 | 		stages = []
 59 | 		in_channels = stage_width_list[1]
 60 | 		for (t, n, k, s), c, block_type, channel_att_type in zip(
 61 | 			block_configs, stage_width_list[2:], block_type_list[1:], channel_att_list,
 62 | 		):
 63 | 			blocks = []
 64 | 			for i in range(n):
 65 | 				stride = s if i == 0 else 1
 66 | 				mb_conv = (FusedMBV2Block if block_type == "fmb" else MBV2Block)(
 67 | 					in_channels,
 68 | 					c,
 69 | 					k,
 70 | 					stride,
 71 | 					expand_ratio=t,
 72 | 					act_func=(act_func, None) if block_type == "fmb" else (act_func, act_func, None),
 73 | 				)
 74 | 				if channel_att_type is None:
 75 | 					channel_att = None
 76 | 				elif channel_att_type.startswith("se"):
 77 | 					raise NotImplementedError
 78 | 				elif channel_att_type.startswith("ca"):
 79 | 					raise NotImplementedError
 80 | 				else:
 81 | 					channel_att = None
 82 | 				if channel_att is not None:
 83 | 					if isinstance(mb_conv, FusedMBV2Block):
 84 | 						mb_conv = nn.Sequential(
 85 | 							mb_conv.spatial_conv,
 86 | 							channel_att,
 87 | 							mb_conv.point_conv,
 88 | 						)
 89 | 					else:
 90 | 						mb_conv = nn.Sequential(
 91 | 							mb_conv.inverted_conv,
 92 | 							mb_conv.depth_conv,
 93 | 							channel_att,
 94 | 							mb_conv.point_conv
 95 | 						)
 96 | 				if i != 0:
 97 | 					mb_conv = ResidualBlock(
 98 | 						mb_conv,
 99 | 						nn.Identity(),
100 | 					)
101 | 				blocks.append(mb_conv)
102 | 				in_channels = c
103 | 			stages.append(nn.Sequential(*blocks))
104 | 		super(MixFusedMobileNetV2, self).__init__(input_stem, stages)
105 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/norm.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional, Tuple, Type
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | __all__ = ["REGISTERED_NORMALIZATION_DICT", "build_norm"]
 6 | 
 7 | # register normalization function here
 8 | #   name: module, kwargs with default values
 9 | REGISTERED_NORMALIZATION_DICT: Dict[str, Tuple[Type, Dict[str, Any]]] = {
10 |     "bn_3d": (nn.BatchNorm3d, {"num_features": None, "eps": 1e-5, "momentum": 0.1}),
11 |     "bn_2d": (nn.BatchNorm2d, {"num_features": None, "eps": 1e-5, "momentum": 0.1}),
12 |     "bn_1d": (nn.BatchNorm1d, {"num_features": None, "eps": 1e-5, "momentum": 0.1}),
13 |     "sync_bn": (nn.SyncBatchNorm, {"num_features": None, "eps": 1e-5, "momentum": 0.1}),
14 |     "gn": (nn.GroupNorm, {"num_groups": None, "num_channels": None, "eps": 1e-5}),
15 |     "ln": (nn.LayerNorm, {"normalized_shape": None, "eps": 1e-5}),
16 | }
17 | 
18 | 
19 | def build_norm(norm_name="bn_2d", num_features=None, **kwargs) -> Optional[nn.Module]:
20 |     if norm_name == "gn":
21 |         kwargs["num_channels"] = num_features
22 |     elif norm_name == "ln":
23 |         kwargs["normalized_shape"] = num_features
24 |     else:
25 |         kwargs["num_features"] = num_features
26 |     if norm_name in REGISTERED_NORMALIZATION_DICT:
27 |         norm_module, default_args = REGISTERED_NORMALIZATION_DICT[norm_name]
28 |         for key in default_args:
29 |             if key in kwargs:
30 |                 default_args[key] = kwargs[key]
31 |         return norm_module(**default_args)
32 |     elif norm_name is None or norm_name.lower() == "none":
33 |         return None
34 |     else:
35 |         raise ValueError("do not support: %s" % norm_name)
36 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/omninet_w1.0.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tridet.modeling.backbone.omni_scripts.backbone_with_fpn import build_feature_extractor_all_fuse
 3 | 
 4 | omninet_w10 = build_feature_extractor_all_fuse(
 5 | 	return_list=False, width_mult=1.0, depth_mult=1.0,
 6 | )
 7 | 
 8 | checkpoint = torch.load(
 9 | 	"omninet-small",
10 | 	map_location="cpu"
11 | )
12 | checkpoint = checkpoint["state_dict"]
13 | omninet_w10.load_state_dict(checkpoint)
14 | print(omninet_w10)
15 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/omninet_w1.3.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tridet.modeling.backbone.omni_scripts.backbone_with_fpn import build_feature_extractor_all_fuse
 3 | 
 4 | omninet_w13 = build_feature_extractor_all_fuse(
 5 | 	return_list=False, width_mult=1.3, depth_mult=1.0,
 6 | )
 7 | 
 8 | checkpoint = torch.load(
 9 | 	"omninet-big",
10 | 	map_location="cpu"
11 | )
12 | checkpoint = checkpoint["state_dict"]
13 | omninet_w13.load_state_dict(checkpoint)
14 | print(omninet_w13)
15 | 


--------------------------------------------------------------------------------
/tridet/modeling/backbone/omni_scripts/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, List, Tuple, Any, Optional
 2 | 
 3 | __all__ = ["list_sum", "val2list", "squeeze_list", "make_divisible", "get_same_padding"]
 4 | 
 5 | 
 6 | def list_sum(x: List) -> Any:
 7 |     """Return the sum of a list of objects.
 8 | 
 9 |     can be int, float, torch.Tensor, np.ndarray, etc
10 |     can be used for adding losses
11 |     """
12 |     return x[0] if len(x) == 1 else x[0] + list_sum(x[1:])
13 | 
14 | 
15 | def val2list(val: Union[List, Tuple, Any], repeat_time=1) -> List:
16 |     """Repeat `val` for `repeat_time` times and return the list or val if list/tuple."""
17 |     if isinstance(val, (list, tuple)):
18 |         return list(val)
19 |     return [val for _ in range(repeat_time)]
20 | 
21 | 
22 | def squeeze_list(src_list: Optional[List]) -> Union[List, Any]:
23 |     """Return the first item of the given list if the list only contains one item.
24 | 
25 |     usually used in args parsing
26 |     """
27 |     if src_list is not None and len(src_list) == 1:
28 |         return src_list[0]
29 |     else:
30 |         return src_list
31 | 
32 | 
33 | def make_divisible(v: Union[int, float], divisor: Optional[int], min_val=None) -> Union[int, float]:
34 |     """This function is taken from the original tf repo.
35 | 
36 |     It ensures that all layers have a channel number that is divisible by 8
37 |     It can be seen here:
38 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
39 |     :param v:
40 |     :param divisor:
41 |     :param min_val:
42 |     :return:
43 |     """
44 |     if divisor is None:
45 |         return v
46 | 
47 |     if min_val is None:
48 |         min_val = divisor
49 |     new_v = max(min_val, int(v + divisor / 2) // divisor * divisor)
50 |     # Make sure that round down does not go down by more than 10%.
51 |     if new_v < 0.9 * v:
52 |         new_v += divisor
53 |     return new_v
54 | 
55 | 
56 | def get_same_padding(kernel_size: Union[int, Tuple[int, int]]) -> Union[int, tuple]:
57 |     if isinstance(kernel_size, tuple):
58 |         assert len(kernel_size) == 2, f"invalid kernel size: {kernel_size}"
59 |         p1 = get_same_padding(kernel_size[0])
60 |         p2 = get_same_padding(kernel_size[1])
61 |         return p1, p2
62 |     else:
63 |         assert isinstance(kernel_size, int), "kernel size should be either `int` or `tuple`"
64 |         assert kernel_size % 2 > 0, "kernel size should be odd number"
65 |         return kernel_size // 2
66 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.modeling.dd3d.core import DD3D
3 | from tridet.modeling.dd3d.nuscenes_dd3d import NuscenesDD3D
4 | from tridet.modeling.dd3d.nuscenes_dd3d_tta import NuscenesDD3DWithTTA
5 | from tridet.modeling.dd3d.test_time_augmentation import DD3DWithTTA
6 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/core.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import torch
  3 | from torch import nn
  4 | 
  5 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
  6 | from detectron2.modeling.postprocessing import detector_postprocess as resize_instances
  7 | from detectron2.structures import Instances
  8 | 
  9 | from tridet.modeling.dd3d.fcos2d import FCOS2DHead, FCOS2DInference, FCOS2DLoss
 10 | from tridet.modeling.dd3d.fcos3d import FCOS3DHead, FCOS3DInference, FCOS3DLoss
 11 | from tridet.modeling.dd3d.postprocessing import nuscenes_sample_aggregate
 12 | from tridet.modeling.dd3d.prepare_targets import DD3DTargetPreparer
 13 | from tridet.modeling.feature_extractor import build_feature_extractor
 14 | from tridet.structures.image_list import ImageList
 15 | from tridet.utils.tensor2d import compute_features_locations as compute_locations_per_level
 16 | from tridet.modeling.backbone.omni_scripts.backbone_with_fpn import build_feature_extractor_all_fuse
 17 | 
 18 | 
 19 | @META_ARCH_REGISTRY.register()
 20 | class DD3D(nn.Module):
 21 |     def __init__(self, cfg):
 22 |         super().__init__()
 23 |         if "backbone_with_fpn" in cfg.MODEL: 
 24 |             self.backbone = build_feature_extractor_all_fuse(
 25 |                 width_mult=cfg.MODEL.width_mult, depth_mult=cfg.MODEL.depth_mult,
 26 |             )
 27 |         else:
 28 |             self.backbone = build_feature_extractor(cfg)
 29 | 
 30 |         backbone_output_shape = self.backbone.output_shape()
 31 |         self.in_features = cfg.DD3D.IN_FEATURES or list(backbone_output_shape.keys())
 32 |         self.backbone_output_shape = [backbone_output_shape[f] for f in self.in_features]
 33 | 
 34 |         self.feature_locations_offset = cfg.DD3D.FEATURE_LOCATIONS_OFFSET
 35 | 
 36 |         self.fcos2d_head = FCOS2DHead(cfg, self.backbone_output_shape)
 37 |         self.fcos2d_loss = FCOS2DLoss(cfg)
 38 |         self.fcos2d_inference = FCOS2DInference(cfg)
 39 | 
 40 |         if cfg.MODEL.BOX3D_ON:
 41 |             self.fcos3d_head = FCOS3DHead(cfg, self.backbone_output_shape)
 42 |             self.fcos3d_loss = FCOS3DLoss(cfg)
 43 |             self.fcos3d_inference = FCOS3DInference(cfg)
 44 |             self.only_box2d = False
 45 |         else:
 46 |             self.only_box2d = True
 47 | 
 48 |         self.prepare_targets = DD3DTargetPreparer(cfg, self.backbone_output_shape)
 49 | 
 50 |         self.postprocess_in_inference = cfg.DD3D.INFERENCE.DO_POSTPROCESS
 51 | 
 52 |         self.do_nms = cfg.DD3D.INFERENCE.DO_NMS
 53 |         self.do_bev_nms = cfg.DD3D.INFERENCE.DO_BEV_NMS
 54 |         self.bev_nms_iou_thresh = cfg.DD3D.INFERENCE.BEV_NMS_IOU_THRESH
 55 | 
 56 |         # nuScenes inference aggregates detections over all 6 cameras.
 57 |         self.nusc_sample_aggregate_in_inference = cfg.DD3D.INFERENCE.NUSC_SAMPLE_AGGREGATE
 58 |         self.num_classes = cfg.DD3D.NUM_CLASSES
 59 | 
 60 |         self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
 61 |         self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
 62 | 
 63 |     @property
 64 |     def device(self):
 65 |         return self.pixel_mean.device
 66 | 
 67 |     def preprocess_image(self, x):
 68 |         return (x - self.pixel_mean) / self.pixel_std
 69 | 
 70 |     def forward(self, batched_inputs):
 71 |         images = [x["image"].to(self.device) for x in batched_inputs]
 72 |         images = [self.preprocess_image(x) for x in images]
 73 | 
 74 |         if 'intrinsics' in batched_inputs[0]:
 75 |             intrinsics = [x['intrinsics'].to(self.device) for x in batched_inputs]
 76 |         else:
 77 |             intrinsics = None
 78 |         images = ImageList.from_tensors(images, self.backbone.size_divisibility, intrinsics=intrinsics)
 79 | 
 80 |         gt_dense_depth = None
 81 |         if 'depth' in batched_inputs[0]:
 82 |             gt_dense_depth = [x["depth"].to(self.device) for x in batched_inputs]
 83 |             gt_dense_depth = ImageList.from_tensors(
 84 |                 gt_dense_depth, self.backbone.size_divisibility, intrinsics=intrinsics
 85 |             )
 86 | 
 87 |         features = self.backbone(images.tensor)
 88 |         features = [features[f] for f in self.in_features]
 89 | 
 90 |         if "instances" in batched_inputs[0]:
 91 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
 92 |         else:
 93 |             gt_instances = None
 94 | 
 95 |         locations = self.compute_locations(features)
 96 |         logits, box2d_reg, centerness, _ = self.fcos2d_head(features)
 97 |         if not self.only_box2d:
 98 |             box3d_quat, box3d_ctr, box3d_depth, box3d_size, box3d_conf, dense_depth = self.fcos3d_head(features)
 99 |         inv_intrinsics = images.intrinsics.inverse() if images.intrinsics is not None else None
100 | 
101 |         if self.training:
102 |             assert gt_instances is not None
103 |             feature_shapes = [x.shape[-2:] for x in features]
104 |             training_targets = self.prepare_targets(locations, gt_instances, feature_shapes)
105 |             if gt_dense_depth is not None:
106 |                 training_targets.update({"dense_depth": gt_dense_depth})
107 | 
108 |             losses = {}
109 |             fcos2d_loss, fcos2d_info = self.fcos2d_loss(logits, box2d_reg, centerness, training_targets)
110 |             losses.update(fcos2d_loss)
111 | 
112 |             if not self.only_box2d:
113 |                 fcos3d_loss = self.fcos3d_loss(
114 |                     box3d_quat, box3d_ctr, box3d_depth, box3d_size, box3d_conf, dense_depth, inv_intrinsics,
115 |                     fcos2d_info, training_targets
116 |                 )
117 |                 losses.update(fcos3d_loss)
118 |             return losses
119 |         else:
120 |             pred_instances, fcos2d_info = self.fcos2d_inference(
121 |                 logits, box2d_reg, centerness, locations, images.image_sizes
122 |             )
123 |             if not self.only_box2d:
124 |                 # This adds 'pred_boxes3d' and 'scores_3d' to Instances in 'pred_instances' in place.
125 |                 self.fcos3d_inference(
126 |                     box3d_quat, box3d_ctr, box3d_depth, box3d_size, box3d_conf, inv_intrinsics, pred_instances,
127 |                     fcos2d_info
128 |                 )
129 | 
130 |                 # 3D score == 2D score x confidence.
131 |                 score_key = "scores_3d"
132 |             else:
133 |                 score_key = "scores"
134 | 
135 |             # Transpose to "image-first", i.e. (B, L)
136 |             pred_instances = list(zip(*pred_instances))
137 |             pred_instances = [Instances.cat(instances) for instances in pred_instances]
138 | 
139 |             # 2D NMS and pick top-K.
140 |             if self.do_nms:
141 |                 pred_instances = self.fcos2d_inference.nms_and_top_k(pred_instances, score_key)
142 | 
143 |             if not self.only_box2d and self.do_bev_nms:
144 |                 # Bird-eye-view NMS.
145 |                 dummy_group_idxs = {i: [i] for i, _ in enumerate(pred_instances)}
146 |                 if 'pose' in batched_inputs[0]:
147 |                     poses = [x['pose'] for x in batched_inputs]
148 |                 else:
149 |                     poses = [x['extrinsics'] for x in batched_inputs]
150 |                 pred_instances = nuscenes_sample_aggregate(
151 |                     pred_instances,
152 |                     dummy_group_idxs,
153 |                     self.num_classes,
154 |                     poses,
155 |                     iou_threshold=self.bev_nms_iou_thresh,
156 |                     include_boxes3d_global=False
157 |                 )
158 | 
159 |             if self.postprocess_in_inference:
160 |                 processed_results = []
161 |                 for results_per_image, input_per_image, image_size in \
162 |                         zip(pred_instances, batched_inputs, images.image_sizes):
163 |                     height = input_per_image.get("height", image_size[0])
164 |                     width = input_per_image.get("width", image_size[1])
165 |                     r = resize_instances(results_per_image, height, width)
166 |                     processed_results.append({"instances": r})
167 |             else:
168 |                 processed_results = [{"instances": x} for x in pred_instances]
169 | 
170 |             return processed_results
171 | 
172 |     def compute_locations(self, features):
173 |         locations = []
174 |         in_strides = [x.stride for x in self.backbone_output_shape]
175 |         for level, feature in enumerate(features):
176 |             h, w = feature.size()[-2:]
177 |             locations_per_level = compute_locations_per_level(
178 |                 h, w, in_strides[level], feature.dtype, feature.device, offset=self.feature_locations_offset
179 |             )
180 |             locations.append(locations_per_level)
181 |         return locations
182 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/dense_depth.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | 
  7 | from detectron2.layers import Conv2d, get_norm
  8 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
  9 | 
 10 | from tridet.layers.normalization import ModuleListDial, Offset, Scale
 11 | from tridet.modeling.dd3d.dense_depth_loss import build_dense_depth_loss
 12 | from tridet.modeling.feature_extractor import build_feature_extractor
 13 | from tridet.structures.image_list import ImageList
 14 | from tridet.utils.tensor2d import aligned_bilinear
 15 | 
 16 | 
 17 | class DD3DDenseDepthHead(nn.Module):
 18 |     def __init__(self, cfg, input_shape):
 19 |         super().__init__()
 20 |         self.in_strides = [shape.stride for shape in input_shape]
 21 |         self.num_levels = len(input_shape)
 22 |         assert self.in_strides == [shape.stride for shape in input_shape]
 23 | 
 24 |         self.mean_depth_per_level = torch.FloatTensor(cfg.DD3D.FCOS3D.MEAN_DEPTH_PER_LEVEL)
 25 |         self.std_depth_per_level = torch.FloatTensor(cfg.DD3D.FCOS3D.STD_DEPTH_PER_LEVEL)
 26 | 
 27 |         self.scale_depth_by_focal_lengths_factor = cfg.DD3D.FCOS3D.SCALE_DEPTH_BY_FOCAL_LENGTHS_FACTOR
 28 | 
 29 |         self.use_scale = cfg.DD3D.FCOS3D.USE_SCALE
 30 |         self.depth_scale_init_factor = cfg.DD3D.FCOS3D.DEPTH_SCALE_INIT_FACTOR
 31 | 
 32 |         box3d_tower = []
 33 |         in_channels = input_shape[0].channels
 34 | 
 35 |         num_convs = cfg.DD3D.FCOS3D.NUM_CONVS
 36 |         use_deformable = cfg.DD3D.FCOS3D.USE_DEFORMABLE
 37 |         norm = cfg.DD3D.FCOS3D.NORM
 38 | 
 39 |         if use_deformable:
 40 |             raise ValueError("Not supported yet.")
 41 | 
 42 |         for i in range(num_convs):
 43 |             if norm in ("BN", "FrozenBN"):
 44 |                 # Each FPN level has its own batchnorm layer.
 45 |                 # "BN" is converted to "SyncBN" in distributed training (see train.py)
 46 |                 norm_layer = ModuleListDial([get_norm(norm, in_channels) for _ in range(self.num_levels)])
 47 |             else:
 48 |                 norm_layer = get_norm(norm, in_channels)
 49 |             box3d_tower.append(
 50 |                 Conv2d(
 51 |                     in_channels,
 52 |                     in_channels,
 53 |                     kernel_size=3,
 54 |                     stride=1,
 55 |                     padding=1,
 56 |                     bias=norm_layer is None,
 57 |                     norm=norm_layer,
 58 |                     activation=F.relu
 59 |                 )
 60 |             )
 61 |         self.add_module('box3d_tower', nn.Sequential(*box3d_tower))
 62 | 
 63 |         # Each FPN level has its own predictor layer.
 64 |         self.dense_depth = nn.ModuleList([
 65 |             Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1, bias=(not cfg.DD3D.FCOS3D.USE_SCALE))
 66 |             for _ in range(self.num_levels)
 67 |         ])
 68 | 
 69 |         if self.use_scale:
 70 |             self.scales_depth = nn.ModuleList([
 71 |                 Scale(init_value=sigma * self.depth_scale_init_factor) for sigma in self.std_depth_per_level
 72 |             ])
 73 |             self.offsets_depth = nn.ModuleList([Offset(init_value=b) for b in self.mean_depth_per_level])
 74 | 
 75 |         self._init_weights()
 76 | 
 77 |     def _init_weights(self):
 78 | 
 79 |         for l in self.box3d_tower.modules():
 80 |             if isinstance(l, nn.Conv2d):
 81 |                 torch.nn.init.kaiming_normal_(l.weight, mode='fan_out', nonlinearity='relu')
 82 |                 if l.bias is not None:
 83 |                     torch.nn.init.constant_(l.bias, 0)
 84 | 
 85 |         for l in self.dense_depth.modules():
 86 |             if isinstance(l, nn.Conv2d):
 87 |                 torch.nn.init.kaiming_uniform_(l.weight, a=1)
 88 |                 if l.bias is not None:  # depth head may not have bias.
 89 |                     torch.nn.init.constant_(l.bias, 0)
 90 | 
 91 |     def forward(self, x):
 92 |         assert len(x) == self.num_levels
 93 |         dense_depth = []
 94 |         for l, features in enumerate(x):
 95 |             box3d_tower_out = self.box3d_tower(features)
 96 |             dense_depth_lvl = self.dense_depth[l](box3d_tower_out)
 97 |             if self.use_scale:
 98 |                 dense_depth_lvl = self.offsets_depth[l](self.scales_depth[l](dense_depth_lvl))
 99 |             dense_depth.append(dense_depth_lvl)
100 |         return dense_depth
101 | 
102 | 
103 | @META_ARCH_REGISTRY.register()
104 | class DD3DDenseDepth(nn.Module):
105 |     def __init__(self, cfg):
106 |         super().__init__()
107 |         self.in_features = cfg.DD3D.IN_FEATURES
108 |         self.feature_locations_offset = cfg.DD3D.FEATURE_LOCATIONS_OFFSET
109 | 
110 |         self.backbone = build_feature_extractor(cfg)
111 |         backbone_output_shape = self.backbone.output_shape()
112 |         backbone_output_shape = [backbone_output_shape[f] for f in self.in_features]
113 | 
114 |         self.fcos3d_head = DD3DDenseDepthHead(cfg, backbone_output_shape)
115 |         self.depth_loss = build_dense_depth_loss(cfg)
116 | 
117 |         self.scale_depth_by_focal_lengths = cfg.DD3D.FCOS3D.SCALE_DEPTH_BY_FOCAL_LENGTHS
118 |         self.scale_depth_by_focal_lengths_factor = cfg.DD3D.FCOS3D.SCALE_DEPTH_BY_FOCAL_LENGTHS_FACTOR
119 | 
120 |         self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
121 |         self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
122 | 
123 |     @property
124 |     def device(self):
125 |         return self.pixel_mean.device
126 | 
127 |     def preprocess_image(self, x):
128 |         return (x - self.pixel_mean) / self.pixel_std
129 | 
130 |     def forward(self, batched_inputs):
131 |         images = [x["image"].to(self.device) for x in batched_inputs]
132 |         images = [self.preprocess_image(x) for x in images]
133 | 
134 |         if 'intrinsics' in batched_inputs[0]:
135 |             intrinsics = [x['intrinsics'].to(self.device) for x in batched_inputs]
136 |         else:
137 |             intrinsics = None
138 |         images = ImageList.from_tensors(images, self.backbone.size_divisibility, intrinsics=intrinsics)
139 | 
140 |         gt_dense_depth = None
141 |         if 'depth' in batched_inputs[0]:
142 |             gt_dense_depth = [x["depth"].to(self.device) for x in batched_inputs]
143 |             gt_dense_depth = ImageList.from_tensors(
144 |                 gt_dense_depth, self.backbone.size_divisibility, intrinsics=intrinsics
145 |             )
146 | 
147 |         features = self.backbone(images.tensor)
148 |         features = [features[f] for f in self.in_features]
149 |         dense_depth = self.fcos3d_head(features)
150 | 
151 |         inv_intrinsics = images.intrinsics.inverse() if images.intrinsics is not None else None
152 | 
153 |         # Upsample.
154 |         dense_depth = [
155 |             aligned_bilinear(x, factor=stride, offset=self.feature_locations_offset).squeeze(1)
156 |             for x, stride in zip(dense_depth, self.in_strides)
157 |         ]
158 | 
159 |         if self.scale_depth_by_focal_lengths:
160 |             assert inv_intrinsics is not None
161 |             pixel_size = torch.norm(torch.stack([inv_intrinsics[:, 0, 0], inv_intrinsics[:, 1, 1]], dim=-1), dim=-1)
162 |             scaled_pixel_size = (pixel_size * self.scale_depth_by_focal_lengths_factor).reshape(-1, 1, 1)
163 |             dense_depth = [x / scaled_pixel_size for x in dense_depth]
164 | 
165 |         if self.training:
166 |             losses = {}
167 |             for lvl, x in enumerate(dense_depth):
168 |                 loss_lvl = self.depth_loss(x, gt_dense_depth.tensor)["loss_dense_depth"]
169 |                 loss_lvl = loss_lvl / (np.sqrt(2)**lvl)  # Is sqrt(2) good?
170 |                 losses.update({f"loss_dense_depth_lvl_{lvl}": loss_lvl})
171 |             return losses
172 |         else:
173 |             raise NotImplementedError()
174 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/dense_depth_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from detectron2.config.config import configurable
 6 | 
 7 | from tridet.layers import smooth_l1_loss
 8 | 
 9 | 
10 | class DenseDepthL1Loss(nn.Module):
11 |     @configurable
12 |     def __init__(self, beta, min_depth=0., max_depth=100., loss_weight=1.0):
13 |         super().__init__()
14 |         self.beta = beta
15 |         self.min_depth = min_depth
16 |         self.max_depth = max_depth
17 |         self.loss_weight = loss_weight
18 | 
19 |     @classmethod
20 |     def from_config(cls, cfg):
21 |         return {
22 |             "beta": cfg.DD3D.FCOS3D.LOSS.SMOOTH_L1_BETA,
23 |             "min_depth": cfg.DD3D.FCOS3D.MIN_DEPTH,
24 |             "max_depth": cfg.DD3D.FCOS3D.MAX_DEPTH,
25 |             "loss_weight": cfg.DD3D.FCOS3D.DEPTH_HEAD.LOSS_WEIGHT
26 |         }
27 | 
28 |     def forward(self, depth_pred, depth_gt, masks=None):
29 |         M = (depth_gt < self.min_depth).to(torch.float32) + (depth_gt > self.max_depth).to(torch.float32)
30 |         if masks is not None:
31 |             M += (1. - masks).to(torch.float32)
32 | 
33 |         M = M == 0.
34 |         loss = smooth_l1_loss(depth_pred[M], depth_gt[M], beta=self.beta, reduction='mean')
35 | 
36 |         return {"loss_dense_depth": self.loss_weight * loss}
37 | 
38 | 
39 | def build_dense_depth_loss(cfg):
40 |     if cfg.DD3D.FCOS3D.DEPTH_HEAD.LOSS_TYPE == "L1":
41 |         return DenseDepthL1Loss(cfg)
42 |     else:
43 |         ValueError(f"Not supported depth loss: {cfg.DD3D.FCOS3D.DEPTH_HEAD.LOSS_TYPE}")
44 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/depth.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from detectron2.layers import Conv2d
 5 | from tridet.utils.geometry import get_pixel_sizes_perspective_cams
 6 | from tridet.modeling.dd3d.utils import get_fpn_out_channels
 7 | 
 8 | class PacknetDepthHead(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         net,
12 |         input_shape,
13 |         min_depth,
14 |         max_depth,
15 |         scale_depth_by_focal_length=None,  # NOTE: when use as depth-as-input, disable this and do the scaling online.
16 |     ):
17 |         super().__init__()
18 | 
19 |         self.net = net(input_shape=input_shape)
20 | 
21 |         input_shape = self.net.output_shape()
22 |         in_channels = get_fpn_out_channels(input_shape)
23 | 
24 |         # Predictor
25 |         conv_kwargs = dict(
26 |             in_channels=in_channels,
27 |             out_channels=1,
28 |             kernel_size=3,
29 |             stride=1,
30 |             bias=True,
31 |             padding=1,
32 |             norm=None,
33 |             activation=F.sigmoid
34 |         )
35 |         self.predictor = Conv2d(**conv_kwargs)
36 | 
37 |         self.min_depth = min_depth
38 |         self.max_depth = max_depth
39 |         self.scale_depth_by_focal_length = scale_depth_by_focal_length
40 | 
41 |     def forward(self, x, cams):
42 |         net_out = self.net(x)
43 |         depth = [self.predictor(x) for x in net_out]
44 | 
45 |         if self.scale_depth_by_focal_length is not None:
46 |             pixel_size = get_pixel_sizes_perspective_cams(cams)
47 |             depth = [x / (pixel_size * self.scale_depth_by_focal_length).view(-1, 1, 1, 1) for x in depth]
48 | 
49 |         m, M = self.min_depth, self.max_depth
50 |         depth = [(M - m) * x + m for x in depth]
51 |         depth = [x.clamp(min=m, max=M) for x in depth]
52 |         return {'depth': depth, 'depth_head_net_out': net_out}
53 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/disentangled_box3d_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | 
 4 | import torch
 5 | 
 6 | from detectron2.config import configurable
 7 | 
 8 | from tridet.layers import smooth_l1_loss
 9 | 
10 | LOG = logging.getLogger(__name__)
11 | 
12 | 
13 | class DisentangledBox3DLoss():
14 |     @configurable
15 |     def __init__(self, smooth_l1_loss_beta, max_loss_per_group):
16 |         self.smooth_l1_loss_beta = smooth_l1_loss_beta
17 |         self.max_loss_per_group = max_loss_per_group
18 | 
19 |     @classmethod
20 |     def from_config(cls, cfg):
21 |         return {
22 |             "smooth_l1_loss_beta": cfg.DD3D.FCOS3D.LOSS.SMOOTH_L1_BETA,
23 |             "max_loss_per_group": cfg.DD3D.FCOS3D.LOSS.MAX_LOSS_PER_GROUP_DISENT
24 |         }
25 | 
26 |     def __call__(self, box3d_pred, box3d_targets, locations, weights=None):
27 | 
28 |         box3d_pred = box3d_pred.to(torch.float32)
29 |         box3d_targets = box3d_targets.to(torch.float32)
30 | 
31 |         target_corners = box3d_targets.corners
32 | 
33 |         disentangled_losses = {}
34 |         for component_key in ["quat", "proj_ctr", "depth", "size"]:
35 |             disentangled_boxes = box3d_targets.clone()
36 |             setattr(disentangled_boxes, component_key, getattr(box3d_pred, component_key))
37 |             pred_corners = disentangled_boxes.to(torch.float32).corners
38 | 
39 |             loss = smooth_l1_loss(pred_corners, target_corners, beta=self.smooth_l1_loss_beta)
40 | 
41 |             # Bound the loss
42 |             loss.clamp(max=self.max_loss_per_group)
43 | 
44 |             if weights is not None:
45 |                 # loss = torch.sum(loss.reshape(-1, 24) * weights.unsqueeze(-1))
46 |                 loss = torch.sum(loss.reshape(-1, 24).mean(dim=1) * weights)
47 |             else:
48 |                 loss = loss.reshape(-1, 24).mean()
49 | 
50 |             disentangled_losses["loss_box3d_" + component_key] = loss
51 | 
52 |         entangled_l1_dist = (target_corners - box3d_pred.corners).detach().abs().reshape(-1, 24).mean(dim=1)
53 | 
54 |         return disentangled_losses, entangled_l1_dist
55 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/nuscenes_dd3d_tta.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import copy
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn.parallel import DistributedDataParallel
  8 | 
  9 | from detectron2.data.detection_utils import read_image
 10 | from detectron2.layers import batched_nms
 11 | from detectron2.structures import Boxes, Instances
 12 | from detectron2.utils.comm import get_world_size
 13 | 
 14 | from tridet.layers import bev_nms
 15 | from tridet.modeling.dd3d.nuscenes_dd3d import NuscenesDD3D
 16 | from tridet.modeling.dd3d.postprocessing import get_group_idxs, nuscenes_sample_aggregate
 17 | from tridet.modeling.dd3d.test_time_augmentation import DatasetMapperTTA
 18 | from tridet.structures.boxes3d import Boxes3D
 19 | 
 20 | 
 21 | class NuscenesDD3DWithTTA(nn.Module):
 22 |     def __init__(self, cfg, model, tta_mapper=None):
 23 |         super().__init__()
 24 |         if isinstance(model, DistributedDataParallel):
 25 |             model = model.module
 26 |         assert isinstance(model, NuscenesDD3D), \
 27 |             "NuscenesDD3DWithTTA only supports on NuscenesDD3D. Got a model of type {}".format(type(model))
 28 | 
 29 |         assert not model.postprocess_in_inference, "To use test-time augmentation, `postprocess_in_inference` must be False."
 30 |         self.cfg = cfg.copy()
 31 | 
 32 |         self.model = model
 33 |         self.nms_thresh = cfg.DD3D.FCOS2D.INFERENCE.NMS_THRESH
 34 | 
 35 |         if tta_mapper is None:
 36 |             tta_mapper = DatasetMapperTTA(cfg)
 37 |         self.tta_mapper = tta_mapper
 38 |         self.batch_size = cfg.TEST.IMS_PER_BATCH // get_world_size()
 39 | 
 40 |     def __call__(self, batched_inputs):
 41 |         """
 42 |         Same input/output format as :meth:`NuscenesDD3D`
 43 |         """
 44 |         def _maybe_read_image(dataset_dict):
 45 |             ret = copy.copy(dataset_dict)
 46 |             if "image" not in ret:
 47 |                 image = read_image(ret.pop("file_name"), self.tta_mapper.image_format)
 48 |                 image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
 49 |                 ret["image"] = image
 50 |             if "height" not in ret and "width" not in ret:
 51 |                 ret["height"] = image.shape[1]
 52 |                 ret["width"] = image.shape[2]
 53 |             return ret
 54 | 
 55 |         instances_per_image = [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs]
 56 | 
 57 |         # ----------------------------------------------------------
 58 |         # NuScenes specific: cross-image (i.e. sample-level) BEV NMS.
 59 |         # ----------------------------------------------------------
 60 |         sample_tokens = [x['sample_token'] for x in batched_inputs]
 61 |         group_idxs = get_group_idxs(sample_tokens, self.model.num_images_per_sample)
 62 |         global_poses = [x['pose'] for x in batched_inputs]
 63 | 
 64 |         filtered_instances = nuscenes_sample_aggregate(
 65 |             instances_per_image,
 66 |             group_idxs,
 67 |             self.model.num_classes,
 68 |             global_poses,
 69 |             self.model.bev_nms_iou_thresh,
 70 |             max_num_dets_per_sample=self.model.max_num_dets_per_sample
 71 |         )
 72 | 
 73 |         return [{'instances': instances} for instances in filtered_instances]
 74 | 
 75 |     def _inference_one_image(self, x):
 76 |         """
 77 |         Args:
 78 |             x (dict): one dataset dict with "image" field being a CHW tensor
 79 | 
 80 |         Returns:
 81 |             dict: one output dict
 82 |         """
 83 |         orig_shape = (x["height"], x["width"])
 84 |         augmented_inputs, tfms = self._get_augmented_inputs(x)
 85 |         merged_instances = self._get_augmented_instances(augmented_inputs, tfms, orig_shape)
 86 |         if len(merged_instances) > 0:
 87 |             if self.model.do_nms:
 88 |                 # Multiclass NMS.
 89 |                 keep = batched_nms(
 90 |                     merged_instances.pred_boxes.tensor, merged_instances.scores_3d, merged_instances.pred_classes,
 91 |                     self.nms_thresh
 92 |                 )
 93 |                 merged_instances = merged_instances[keep]
 94 | 
 95 |             if not self.model.only_box2d and self.model.do_bev_nms > 0:
 96 |                 # Bird-eye-view NMS.
 97 |                 keep = bev_nms(
 98 |                     merged_instances.pred_boxes3d,
 99 |                     merged_instances.scores_3d,
100 |                     self.model.bev_nms_iou_thresh,
101 |                     class_idxs=merged_instances.pred_classes,
102 |                     class_agnostic=False
103 |                 )
104 |                 merged_instances = merged_instances[keep]
105 | 
106 |         return merged_instances
107 | 
108 |     def _get_augmented_inputs(self, x):
109 |         augmented_inputs = self.tta_mapper(x)
110 |         tfms = [x.pop("transforms") for x in augmented_inputs]
111 |         return augmented_inputs, tfms
112 | 
113 |     def _get_augmented_instances(self, augmented_inputs, tfms, orig_shape):
114 |         # 1: forward with all augmented images
115 |         outputs = self._batch_inference(augmented_inputs)
116 |         # 2: union the results
117 |         all_boxes = []
118 |         all_boxes3d = []
119 | 
120 |         for input, output, tfm in zip(augmented_inputs, outputs, tfms):
121 |             # Need to inverse the transforms on boxes, to obtain results on original image
122 |             inv_tfm = tfm.inverse()
123 | 
124 |             # 2D boxes
125 |             pred_boxes = output.pred_boxes.tensor
126 |             orig_pred_boxes = inv_tfm.apply_box(pred_boxes.cpu().numpy())
127 |             orig_pred_boxes = torch.from_numpy(orig_pred_boxes).to(pred_boxes.device)
128 |             all_boxes.append(Boxes(orig_pred_boxes))
129 | 
130 |             # 3D boxes
131 |             pred_boxes_3d = output.pred_boxes3d
132 |             vectorized_boxes_3d = pred_boxes_3d.vectorize().cpu().numpy()
133 |             orig_vec_pred_boxes_3d = [inv_tfm.apply_box3d(box3d_as_vec) for box3d_as_vec in vectorized_boxes_3d]
134 | 
135 |             # intrinsics
136 |             orig_intrinsics = inv_tfm.apply_intrinsics(input['intrinsics'].cpu().numpy())
137 |             orig_pred_boxes_3d = Boxes3D.from_vectors(
138 |                 orig_vec_pred_boxes_3d, orig_intrinsics, device=pred_boxes_3d.device
139 |             )
140 |             all_boxes3d.append(orig_pred_boxes_3d)
141 | 
142 |         all_boxes = Boxes.cat(all_boxes)
143 |         all_boxes3d = Boxes3D.cat(all_boxes3d)
144 | 
145 |         all_scores = torch.cat([x.scores for x in outputs])
146 |         all_scores_3d = torch.cat([x.scores_3d for x in outputs])
147 |         all_classes = torch.cat([x.pred_classes for x in outputs])
148 | 
149 |         all_attributes = torch.cat([x.pred_attributes for x in outputs])
150 |         all_speeds = torch.cat([x.pred_speeds for x in outputs])
151 | 
152 |         return Instances(
153 |             image_size=orig_shape,
154 |             pred_boxes=all_boxes,
155 |             pred_boxes3d=all_boxes3d,
156 |             pred_classes=all_classes,
157 |             scores=all_scores,
158 |             scores_3d=all_scores_3d,
159 |             pred_attributes=all_attributes,
160 |             pred_speeds=all_speeds
161 |         )
162 | 
163 |     def _batch_inference(self, batched_inputs):
164 |         """
165 |         Execute inference on a list of inputs,
166 |         using batch size = self.batch_size, instead of the length of the list.
167 | 
168 |         Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
169 |         """
170 |         outputs = []
171 |         inputs = []
172 |         for idx, x in enumerate(batched_inputs):
173 |             inputs.append(x)
174 |             if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1:
175 |                 # This runs NMS (box and optionally bev) per each augmented image.
176 |                 outputs.extend([res['instances'] for res in self.model(inputs)])
177 |                 inputs = []
178 |         return outputs
179 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/postprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | from collections import OrderedDict, defaultdict
  3 | from pprint import pprint
  4 | 
  5 | import torch
  6 | from pytorch3d.transforms import transform3d as t3d
  7 | from pytorch3d.transforms.rotation_conversions import matrix_to_quaternion, quaternion_to_matrix
  8 | 
  9 | from detectron2.structures import Instances
 10 | 
 11 | from tridet.layers import bev_nms
 12 | from tridet.structures.boxes3d import GenericBoxes3D
 13 | from tridet.structures.pose import Pose
 14 | 
 15 | 
 16 | def _indices_to_mask(indices, size):
 17 |     mask = indices.new_zeros(size, dtype=torch.bool)
 18 |     mask[indices] = True
 19 |     return mask
 20 | 
 21 | 
 22 | def sample_bev_nms(instances, poses, category_key='pred_classes', iou_threshold=0.3):
 23 |     boxes3d_global = []
 24 |     for _instances, pose in zip(instances, poses):
 25 |         # pose_SO
 26 |         box3d_vec = _instances.pred_boxes3d.vectorize()
 27 |         quat, tvec, wlh = box3d_vec[:, :4], box3d_vec[:, 4:7], box3d_vec[:, 7:10]
 28 |         R = quaternion_to_matrix(quat)
 29 |         rotation = t3d.Rotate(R=R.transpose(1, 2), device=quat.device)
 30 |         translation = t3d.Translate(tvec, device=quat.device)
 31 |         tfm_SO = rotation.compose(translation)
 32 | 
 33 |         # pose_WS
 34 |         quat, tvec = quat.new(pose.quat.elements), tvec.new(pose.tvec)
 35 |         R = quaternion_to_matrix(quat)
 36 |         rotation = t3d.Rotate(R=R.transpose(0, 1), device=quat.device)
 37 |         translation = t3d.Translate(tvec.unsqueeze(0), device=quat.device)
 38 |         tfm_WS = rotation.compose(translation)
 39 | 
 40 |         # boxes in global frame.
 41 |         tfm_WO = tfm_SO.compose(tfm_WS)
 42 |         pose_WO = tfm_WO.get_matrix().transpose(1, 2)
 43 |         rotation = pose_WO[:, :3, :3]
 44 |         quat = matrix_to_quaternion(rotation)
 45 |         tvec = pose_WO[:, :3, -1]
 46 | 
 47 |         boxes3d_global.append(torch.hstack([quat, tvec, wlh]))
 48 | 
 49 |     boxes3d_global = torch.vstack(boxes3d_global)
 50 |     boxes3d_global = GenericBoxes3D(boxes3d_global[:, :4], boxes3d_global[:, 4:7], boxes3d_global[:, 7:])
 51 | 
 52 |     _ids = torch.cat([x.get(category_key) for x in instances])
 53 |     scores = torch.cat([x.scores_3d for x in instances])
 54 |     keep = bev_nms(boxes3d_global, scores, iou_threshold, pose_cam_global=Pose(), class_idxs=_ids)
 55 |     return keep, boxes3d_global
 56 | 
 57 | 
 58 | def nuscenes_sample_aggregate(
 59 |     instances,
 60 |     group_idxs,
 61 |     num_classes,
 62 |     global_poses,
 63 |     iou_threshold,
 64 |     include_boxes3d_global=True,
 65 |     max_num_dets_per_sample=None
 66 | ):
 67 |     """
 68 |     Parameters
 69 |     ----------
 70 |     instances: List[Instances]
 71 |         Predicted instances.
 72 | 
 73 |     group_idxs: dict
 74 |         Mapping from nuScene's `sample_token` to a list of indices of `instances.`
 75 | 
 76 |     num_classes: int
 77 |         Number of classes.
 78 | 
 79 |     pose_global: List[Pose]
 80 |         List of global poses for each image (or Instances)
 81 |     """
 82 |     num_images = len(instances)
 83 |     for group_idx, (_, idxs) in enumerate(group_idxs.items()):
 84 |         group_id = group_idx * num_classes
 85 |         for idx in idxs:
 86 |             instances[idx].image_id = torch.ones_like(instances[idx].pred_classes) * idx
 87 |             instances[idx].sample_category_id = instances[idx].pred_classes + group_id
 88 |     keep, boxes3d_global = sample_bev_nms(
 89 |         instances, global_poses, category_key='sample_category_id', iou_threshold=iou_threshold
 90 |     )
 91 | 
 92 |     # NOTE: NuScenes allow max. 500 detections per sample
 93 |     if max_num_dets_per_sample:
 94 |         keep = keep[:max_num_dets_per_sample]
 95 | 
 96 |     instances = Instances.cat(instances)
 97 |     if include_boxes3d_global:
 98 |         instances.pred_boxes3d_global = boxes3d_global
 99 |     instances.remove('sample_category_id')
100 | 
101 |     mask = _indices_to_mask(keep, len(instances))
102 |     _filtered_instances = instances[mask]
103 |     filtered_instances = []
104 |     for image_id in range(num_images):
105 |         _instances = _filtered_instances[_filtered_instances.image_id == image_id]
106 |         _instances.remove('image_id')
107 |         filtered_instances.append(_instances)
108 |     return filtered_instances
109 | 
110 | 
111 | def get_group_idxs(sample_tokens, num_images_per_sample, inverse=False):
112 |     grouped_idxs = defaultdict(list)
113 |     for idx, token in enumerate(sample_tokens):
114 |         grouped_idxs[token].append(idx)
115 |     group_sizes = {token: len(idxs) for token, idxs in grouped_idxs.items()}
116 | 
117 |     if not all([siz == num_images_per_sample for siz in group_sizes.values()]):
118 |         pprint(group_sizes)
119 |         raise ValueError("Group sizes does not match with 'num_images_per_sample'.")
120 | 
121 |     token_to_idxs = OrderedDict(grouped_idxs)
122 |     if not inverse:
123 |         return token_to_idxs
124 |     else:
125 |         idx_to_token = OrderedDict()
126 |         for token, idxs in token_to_idxs.items():
127 |             for idx in idxs:
128 |                 idx_to_token[idx] = token
129 |         return idx_to_token
130 | 


--------------------------------------------------------------------------------
/tridet/modeling/dd3d/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def get_fpn_out_channels(output_shape):
 3 |     out_channels = []
 4 |     if isinstance(output_shape, list):
 5 |         out_channels = [x.channels for x in output_shape]
 6 |     elif isinstance(output_shape, dict):
 7 |         out_channels = [x.channels for x in output_shape.values()]
 8 |     assert len(set(out_channels)) == 1, "The feature extractor must produce same channels of features for all levels."
 9 |     out_channels = out_channels[0]
10 |     return out_channels
11 | 


--------------------------------------------------------------------------------
/tridet/modeling/feature_extractor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | from detectron2.layers import ShapeSpec
 3 | from detectron2.modeling.backbone import BACKBONE_REGISTRY, Backbone
 4 | 
 5 | from tridet.modeling.feature_extractor.dla import (
 6 |     build_dla_backbone, build_dla_fpn_backbone, build_fcos_dla_fpn_backbone_p6, build_fcos_dla_fpn_backbone_p67
 7 | )
 8 | from tridet.modeling.feature_extractor.vovnet import (
 9 |     build_fcos_vovnet_fpn_backbone_p6, build_vovnet_backbone, build_vovnet_fpn_backbone
10 | )
11 | 
12 | 
13 | def build_feature_extractor(cfg, input_shape=None):
14 |     """
15 |     Build a backbone from `cfg.FE.BUILDER`
16 | 
17 |     Returns:
18 |         an instance of :class:`Backbone`
19 |     """
20 |     if input_shape is None:
21 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
22 | 
23 |     builder_name = cfg.FE.BUILDER
24 |     feature_extractor = BACKBONE_REGISTRY.get(builder_name)(cfg, input_shape)
25 |     assert isinstance(feature_extractor, Backbone)
26 |     return feature_extractor
27 | 


--------------------------------------------------------------------------------
/tridet/structures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
2 | from tridet.structures.image_list import ImageList
3 | 


--------------------------------------------------------------------------------
/tridet/structures/image_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  3 | from __future__ import division
  4 | 
  5 | from typing import Any, List, Sequence, Tuple
  6 | 
  7 | import torch
  8 | from torch import device
  9 | from torch.nn import functional as F
 10 | 
 11 | from detectron2.utils.env import TORCH_VERSION
 12 | 
 13 | 
 14 | def _as_tensor(x: Tuple[int, int]) -> torch.Tensor:
 15 |     """
 16 |     An equivalent of `torch.as_tensor`, but works under tracing if input
 17 |     is a list of tensor. `torch.as_tensor` will record a constant in tracing,
 18 |     but this function will use `torch.stack` instead.
 19 |     """
 20 |     if torch.jit.is_scripting():
 21 |         return torch.as_tensor(x)
 22 |     if isinstance(x, (list, tuple)) and all([isinstance(t, torch.Tensor) for t in x]):
 23 |         return torch.stack(x)
 24 |     return torch.as_tensor(x)
 25 | 
 26 | 
 27 | class ImageList(object):
 28 |     """
 29 |     Adapted from detectron2:
 30 |         https://github.com/facebookresearch/detectron2/blob/master/detectron2/structures/image_list.py)
 31 | 
 32 |     Key differences:
 33 |         - add optional intrinsics
 34 |         - add optional image path (useful for debugging)
 35 |     ==================================================================================================================
 36 | 
 37 |     Structure that holds a list of images (of possibly
 38 |     varying sizes) as a single tensor.
 39 |     This works by padding the images to the same size,
 40 |     and storing in a field the original sizes of each image
 41 | 
 42 |     Attributes:
 43 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w)
 44 |     """
 45 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]], intrinsics=None, image_paths=None):
 46 |         """
 47 |         Arguments:
 48 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 49 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
 50 |                 be smaller than (H, W) due to padding.
 51 |         """
 52 |         self.tensor = tensor
 53 |         self.image_sizes = image_sizes
 54 |         self._intrinsics = intrinsics
 55 |         self._image_paths = image_paths
 56 | 
 57 |     @property
 58 |     def intrinsics(self):
 59 |         if torch.allclose(self._intrinsics[0], torch.eye(3, device=self._intrinsics.device)):
 60 |             # TODO: torch.inverse(images.intrinsics) often return identity, when it shouldn't. Is it pytorch bug?
 61 |             raise ValueError("Intrinsics is Identity.")
 62 |         return self._intrinsics
 63 | 
 64 |     @property
 65 |     def image_paths(self):
 66 |         return self._image_paths
 67 | 
 68 |     def __len__(self) -> int:
 69 |         return len(self.image_sizes)
 70 | 
 71 |     def __getitem__(self, idx) -> torch.Tensor:
 72 |         """
 73 |         Access the individual image in its original size.
 74 | 
 75 |         Args:
 76 |             idx: int or slice
 77 | 
 78 |         Returns:
 79 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 80 |         """
 81 |         size = self.image_sizes[idx]
 82 |         return self.tensor[idx, ..., :size[0], :size[1]]
 83 | 
 84 |     @torch.jit.unused
 85 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 86 |         cast_tensor = self.tensor.to(*args, **kwargs)
 87 |         return ImageList(cast_tensor, self.image_sizes, intrinsics=self.intrinsics)
 88 | 
 89 |     @property
 90 |     def device(self) -> device:
 91 |         return self.tensor.device
 92 | 
 93 |     @staticmethod
 94 |     def from_tensors(
 95 |         tensors: List[torch.Tensor],
 96 |         size_divisibility: int = 0,
 97 |         pad_value: float = 0.0,
 98 |         intrinsics=None,
 99 |         image_paths=None
100 |     ) -> "ImageList":
101 |         """
102 |         Args:
103 |             tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
104 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
105 |                 to the same shape with `pad_value`.
106 |             size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
107 |                 the common height and width is divisible by `size_divisibility`.
108 |                 This depends on the model and many models need a divisibility of 32.
109 |             pad_value (float): value to pad
110 | 
111 |         Returns:
112 |             an `ImageList`.
113 |         """
114 |         assert len(tensors) > 0
115 |         assert isinstance(tensors, (tuple, list))
116 |         for t in tensors:
117 |             assert isinstance(t, torch.Tensor), type(t)
118 |             assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
119 | 
120 |         image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
121 |         image_sizes_tensor = [_as_tensor(x) for x in image_sizes]
122 |         max_size = torch.stack(image_sizes_tensor).max(0).values
123 | 
124 |         if size_divisibility > 1:
125 |             stride = size_divisibility
126 |             # the last two dims are H,W, both subject to divisibility requirement
127 |             max_size = torch.div(max_size + (stride - 1),  stride, rounding_mode='floor') * stride
128 | 
129 |         # handle weirdness of scripting and tracing ...
130 |         if torch.jit.is_scripting():
131 |             max_size: List[int] = max_size.to(dtype=torch.long).tolist()
132 |         else:
133 |             # https://github.com/pytorch/pytorch/issues/42448
134 |             if TORCH_VERSION >= (1, 7) and torch.jit.is_tracing():
135 |                 image_sizes = image_sizes_tensor
136 | 
137 |         if len(tensors) == 1:
138 |             # This seems slightly (2%) faster.
139 |             # TODO: check whether it's faster for multiple images as well
140 |             image_size = image_sizes[0]
141 |             padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
142 |             batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
143 |         else:
144 |             # max_size can be a tensor in tracing mode, therefore convert to list
145 |             batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
146 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
147 |             for img, pad_img in zip(tensors, batched_imgs):
148 |                 pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
149 | 
150 |         if intrinsics is not None:
151 |             assert isinstance(intrinsics, (tuple, list))
152 |             assert len(intrinsics) == len(tensors)
153 |             intrinsics = torch.stack(intrinsics, dim=0)
154 | 
155 |         if image_paths is not None:
156 |             assert len(image_paths) == len(tensors)
157 | 
158 |         return ImageList(batched_imgs.contiguous(), image_sizes, intrinsics, image_paths)
159 | 


--------------------------------------------------------------------------------
/tridet/structures/pose.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import numpy as np
  3 | from pyquaternion import Quaternion
  4 | 
  5 | 
  6 | class Pose:
  7 |     """SE(3) rigid transform class that allows compounding of 6-DOF poses
  8 |     and provides common transformations that are commonly seen in geometric problems.
  9 |     """
 10 |     def __init__(self, wxyz=np.float32([1., 0., 0., 0.]), tvec=np.float32([0., 0., 0.])):
 11 |         """Initialize a Pose with Quaternion and 3D Position
 12 | 
 13 |         Parameters
 14 |         ----------
 15 |         wxyz: np.float32 or Quaternion (default: np.float32([1,0,0,0]))
 16 |             Quaternion/Rotation (wxyz)
 17 | 
 18 |         tvec: np.float32 (default: np.float32([0,0,0]))
 19 |             Translation (xyz)
 20 |         """
 21 |         assert isinstance(wxyz, (np.ndarray, Quaternion))
 22 |         assert isinstance(tvec, np.ndarray)
 23 | 
 24 |         if isinstance(wxyz, np.ndarray):
 25 |             assert np.abs(1.0 - np.linalg.norm(wxyz)) < 1.0e-3
 26 | 
 27 |         self.quat = Quaternion(wxyz)
 28 |         self.tvec = tvec
 29 | 
 30 |     def __repr__(self):
 31 |         formatter = {'float_kind': lambda x: '%.2f' % x}
 32 |         tvec_str = np.array2string(self.tvec, formatter=formatter)
 33 |         return 'wxyz: {}, tvec: ({})'.format(self.quat, tvec_str)
 34 | 
 35 |     def copy(self):
 36 |         """Return a copy of this pose object.
 37 | 
 38 |         Returns
 39 |         ----------
 40 |         result: Pose
 41 |             Copied pose object.
 42 |         """
 43 |         return self.__class__(Quaternion(self.quat), self.tvec.copy())
 44 | 
 45 |     def __mul__(self, other):
 46 |         """Left-multiply Pose with another Pose or 3D-Points.
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         other: Pose or np.ndarray
 51 |             1. Pose: Identical to oplus operation.
 52 |                (i.e. self_pose * other_pose)
 53 |             2. ndarray: transform [N x 3] point set
 54 |                (i.e. X' = self_pose * X)
 55 | 
 56 |         Returns
 57 |         ----------
 58 |         result: Pose or np.ndarray
 59 |             Transformed pose or point cloud
 60 |         """
 61 |         if isinstance(other, Pose):
 62 |             assert isinstance(other, self.__class__)
 63 |             t = self.quat.rotate(other.tvec) + self.tvec
 64 |             q = self.quat * other.quat
 65 |             return self.__class__(q, t)
 66 |         elif isinstance(other, np.ndarray):
 67 |             assert other.shape[-1] == 3, 'Point cloud is not 3-dimensional'
 68 |             X = np.hstack([other, np.ones((len(other), 1))]).T
 69 |             return (np.dot(self.matrix, X).T)[:, :3]
 70 |         else:
 71 |             return NotImplemented
 72 | 
 73 |     def __rmul__(self, other):
 74 |         raise NotImplementedError('Right multiply not implemented yet!')
 75 | 
 76 |     def inverse(self):
 77 |         """Returns a new Pose that corresponds to the
 78 |         inverse of this one.
 79 | 
 80 |         Returns
 81 |         ----------
 82 |         result: Pose
 83 |             Inverted pose
 84 |         """
 85 |         qinv = self.quat.inverse
 86 |         return self.__class__(qinv, qinv.rotate(-self.tvec))
 87 | 
 88 |     @property
 89 |     def matrix(self):
 90 |         """Returns a 4x4 homogeneous matrix of the form [R t; 0 1]
 91 | 
 92 |         Returns
 93 |         ----------
 94 |         result: np.ndarray
 95 |             4x4 homogeneous matrix
 96 |         """
 97 |         result = self.quat.transformation_matrix
 98 |         result[:3, 3] = self.tvec
 99 |         return result
100 | 
101 |     @property
102 |     def rotation_matrix(self):
103 |         """Returns the 3x3 rotation matrix (R)
104 | 
105 |         Returns
106 |         ----------
107 |         result: np.ndarray
108 |             3x3 rotation matrix
109 |         """
110 |         result = self.quat.transformation_matrix
111 |         return result[:3, :3]
112 | 
113 |     @property
114 |     def rotation(self):
115 |         """Return the rotation component of the pose as a Quaternion object.
116 | 
117 |         Returns
118 |         ----------
119 |         self.quat: Quaternion
120 |             Rotation component of the Pose object.
121 |         """
122 |         return self.quat
123 | 
124 |     @property
125 |     def translation(self):
126 |         """Return the translation component of the pose as a np.ndarray.
127 | 
128 |         Returns
129 |         ----------
130 |         self.tvec: np.ndarray
131 |             Translation component of the Pose object.
132 |         """
133 |         return self.tvec
134 | 
135 |     @classmethod
136 |     def from_matrix(cls, transformation_matrix):
137 |         """Initialize pose from 4x4 transformation matrix
138 | 
139 |         Parameters
140 |         ----------
141 |         transformation_matrix: np.ndarray
142 |             4x4 containing rotation/translation
143 | 
144 |         Returns
145 |         -------
146 |         Pose
147 |         """
148 |         return cls(wxyz=Quaternion(matrix=transformation_matrix[:3, :3]), tvec=np.float32(transformation_matrix[:3, 3]))
149 | 
150 |     @classmethod
151 |     def from_rotation_translation(cls, rotation_matrix, tvec):
152 |         """Initialize pose from rotation matrix and translation vector.
153 | 
154 |         Parameters
155 |         ----------
156 |         rotation_matrix : np.ndarray
157 |             3x3 rotation matrix
158 |         tvec : np.ndarray
159 |             length-3 translation vector
160 |         """
161 |         return cls(wxyz=Quaternion(matrix=rotation_matrix), tvec=np.float64(tvec))
162 | 
163 |     def __eq__(self, other):
164 |         return self.quat == other.quat and (self.tvec == other.tvec).all()
165 | 


--------------------------------------------------------------------------------
/tridet/utils/comm.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | from functools import wraps
  4 | 
  5 | import torch.distributed as dist
  6 | from mpi4py import MPI  # pylint: disable=unused-import
  7 | 
  8 | from detectron2.utils import comm as d2_comm
  9 | 
 10 | LOG = logging.getLogger(__name__)
 11 | 
 12 | _NESTED_BROADCAST_FROM_MASTER = False
 13 | 
 14 | 
 15 | def is_distributed():
 16 |     return d2_comm.get_world_size() > 1
 17 | 
 18 | 
 19 | def broadcast_from_master(fn):
 20 |     """If distributed, only the master executes the function and broadcast the results to other workers.
 21 | 
 22 |     Usage:
 23 |     @broadcast_from_master
 24 |     def foo(a, b): ...
 25 |     """
 26 |     @wraps(fn)
 27 |     def wrapper(*args, **kwargs):  # pylint: disable=unused-argument
 28 |         global _NESTED_BROADCAST_FROM_MASTER
 29 | 
 30 |         if not is_distributed():
 31 |             return fn(*args, **kwargs)
 32 | 
 33 |         if _NESTED_BROADCAST_FROM_MASTER:
 34 |             assert d2_comm.is_main_process()
 35 |             LOG.warning(f"_NESTED_BROADCAST_FROM_MASTER = True, {fn.__name__}")
 36 |             return fn(*args, **kwargs)
 37 | 
 38 |         if d2_comm.is_main_process():
 39 |             _NESTED_BROADCAST_FROM_MASTER = True
 40 |             ret = fn(*args, **kwargs)
 41 |             _NESTED_BROADCAST_FROM_MASTER = False
 42 |         else:
 43 |             ret = None
 44 | 
 45 |         ret = MPI.COMM_WORLD.bcast(ret, root=0)
 46 | 
 47 |         assert ret is not None
 48 |         return ret
 49 | 
 50 |     return wrapper
 51 | 
 52 | 
 53 | def master_only(fn):
 54 |     """If distributed, only the master executes the function.
 55 | 
 56 |     Usage:
 57 |     @master_only
 58 |     def foo(a, b): ...
 59 |     """
 60 |     @wraps(fn)
 61 |     def wrapped_fn(*args, **kwargs):
 62 |         if d2_comm.is_main_process():
 63 |             ret = fn(*args, **kwargs)
 64 |         d2_comm.synchronize()
 65 |         if d2_comm.is_main_process():
 66 |             return ret
 67 | 
 68 |     return wrapped_fn
 69 | 
 70 | 
 71 | def gather_dict(dikt):
 72 |     """Gather python dictionaries from all workers to the rank=0 worker.
 73 | 
 74 |     Assumption: the keys of `dikt` are disjoint across all workers.
 75 | 
 76 |     If rank = 0, then returned aggregated dict.
 77 |     If rank > 0, then return `None`.
 78 |     """
 79 |     dict_lst = d2_comm.gather(dikt, dst=0)
 80 |     if d2_comm.is_main_process():
 81 |         gathered_dict = {}
 82 |         for dic in dict_lst:
 83 |             for k in dic.keys():
 84 |                 assert k not in gathered_dict, f"Dictionary key overlaps: {k}"
 85 |             gathered_dict.update(dic)
 86 |         return gathered_dict
 87 |     else:
 88 |         return None
 89 | 
 90 | 
 91 | def reduce_sum(tensor):
 92 |     """
 93 |     Adapted from AdelaiDet:
 94 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
 95 |     """
 96 |     if not is_distributed():
 97 |         return tensor
 98 |     tensor = tensor.clone()
 99 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
100 |     return tensor
101 | 


--------------------------------------------------------------------------------
/tridet/utils/events.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | # Adapted from detectron2:
 4 | #   https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/events.py
 5 | import wandb
 6 | from detectron2.utils.events import EventStorage
 7 | 
 8 | from tridet.utils.comm import master_only
 9 | 
10 | 
11 | class WandbEventStorage(EventStorage):
12 | 
13 |     @master_only
14 |     def put_scalar(self, name, value, smoothing_hint=True, wandb_log=True):
15 |         super().put_scalar(name, value, smoothing_hint=smoothing_hint)
16 | 
17 |         # Add W&B logging
18 |         name = self._current_prefix + name
19 |         value = float(value)
20 |         if wandb_log and wandb.run:
21 |             wandb.log({name: value}, step=self.iter)
22 | 


--------------------------------------------------------------------------------
/tridet/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | from pytorch3d.transforms.rotation_conversions import matrix_to_quaternion, quaternion_to_matrix
  8 | 
  9 | LOG = logging.getLogger(__name__)
 10 | 
 11 | PI = 3.14159265358979323846
 12 | EPS = 1e-7
 13 | 
 14 | 
 15 | def allocentric_to_egocentric(quat, proj_ctr, inv_intrinsics):
 16 |     """
 17 |     Parameters
 18 |     ----------
 19 |     quat: Tensor
 20 |         (N, 4). Batch of (allocentric) quaternions.
 21 | 
 22 |     proj_ctr: Tensor
 23 |         (N, 2). Projected centers. xy coordninates.
 24 | 
 25 |     inv_intrinsics: [type]
 26 |         (N, 3, 3). Inverted intrinsics.
 27 |     """
 28 |     R_obj_to_local = quaternion_to_matrix(quat)
 29 | 
 30 |     # ray == z-axis in local orientaion
 31 |     ray = unproject_points2d(proj_ctr, inv_intrinsics)
 32 |     z = ray / ray.norm(dim=1, keepdim=True)
 33 | 
 34 |     # gram-schmit process: local_y = global_y - global_y \dot local_z
 35 |     y = z.new_tensor([[0., 1., 0.]]) - z[:, 1:2] * z
 36 |     y = y / y.norm(dim=1, keepdim=True)
 37 |     x = torch.cross(y, z, dim=1)
 38 | 
 39 |     # local -> global
 40 |     R_local_to_global = torch.stack([x, y, z], dim=-1)
 41 | 
 42 |     # obj -> global
 43 |     R_obj_to_global = torch.bmm(R_local_to_global, R_obj_to_local)
 44 | 
 45 |     egocentric_quat = matrix_to_quaternion(R_obj_to_global)
 46 | 
 47 |     # Make sure it's unit norm.
 48 |     quat_norm = egocentric_quat.norm(dim=1, keepdim=True)
 49 |     if not torch.allclose(quat_norm, torch.as_tensor(1.), atol=1e-3):
 50 |         LOG.warning(
 51 |             f"Some of the input quaternions are not unit norm: min={quat_norm.min()}, max={quat_norm.max()}; therefore normalizing."
 52 |         )
 53 |         egocentric_quat = egocentric_quat / quat_norm.clamp(min=EPS)
 54 | 
 55 |     return egocentric_quat
 56 | 
 57 | 
 58 | def homogenize_points(xy):
 59 |     """
 60 |     Parameters
 61 |     ----------
 62 |     xy: Tensor
 63 |         xy coordinates. shape=(N, ..., 2)
 64 |         E.g., (N, 2) or (N, K, 2) or (N, H, W, 2)
 65 | 
 66 |     Returns
 67 |     -------
 68 |     Tensor:
 69 |         1. is appended to the last dimension. shape=(N, ..., 3)
 70 |         E.g, (N, 3) or (N, K, 3) or (N, H, W, 3).
 71 |     """
 72 |     # NOTE: this seems to work for arbitrary number of dimensions of input
 73 |     pad = torch.nn.ConstantPad1d(padding=(0, 1), value=1.)
 74 |     return pad(xy)
 75 | 
 76 | 
 77 | def project_points3d(Xw, K):
 78 |     _, C = Xw.shape
 79 |     assert C == 3
 80 |     uv, _ = cv2.projectPoints(
 81 |         Xw, np.zeros((3, 1), dtype=np.float32), np.zeros(3, dtype=np.float32), K, np.zeros(5, dtype=np.float32)
 82 |     )
 83 |     return uv.reshape(-1, 2)
 84 | 
 85 | 
 86 | def unproject_points2d(points2d, inv_K, scale=1.0):
 87 |     """
 88 |     Parameters
 89 |     ----------
 90 |     points2d: Tensor
 91 |         xy coordinates. shape=(N, ..., 2)
 92 |         E.g., (N, 2) or (N, K, 2) or (N, H, W, 2)
 93 | 
 94 |     inv_K: Tensor
 95 |         Inverted intrinsics; shape=(N, 3, 3)
 96 | 
 97 |     scale: float, default: 1.0
 98 |         Scaling factor.
 99 | 
100 |     Returns
101 |     -------
102 |     Tensor:
103 |         Unprojected 3D point. shape=(N, ..., 3)
104 |         E.g., (N, 3) or (N, K, 3) or (N, H, W, 3)
105 |     """
106 |     points2d = homogenize_points(points2d)
107 |     siz = points2d.size()
108 |     points2d = points2d.view(-1, 3).unsqueeze(-1)  # (N, 3, 1)
109 |     unprojected = torch.matmul(inv_K, points2d)  # (N, 3, 3) x (N, 3, 1) -> (N, 3, 1)
110 |     unprojected = unprojected.view(siz)
111 | 
112 |     return unprojected * scale
113 | 
114 | 
115 | def get_pixel_sizes_perspective_cams(cams):
116 |     """Get physical pixel size of pinhole cameras.
117 | 
118 |     ((1 / fx) ** 2 + (1 / fy) ** 2)).sqrt()
119 | 
120 |     Parameters
121 |     ----------
122 |     cams: PerspectiveCameras
123 |         [description]
124 |     """
125 |     inv_intrinsics = cams.get_projection_transform().inverse().get_matrix()
126 |     return inv_intrinsics.diagonal(dim1=1, dim2=2).norm(dim=1)
127 | 


--------------------------------------------------------------------------------
/tridet/utils/hydra/callbacks.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | from hydra.experimental.callback import Callback
  5 | from mpi4py import MPI
  6 | 
  7 | from detectron2.utils import comm as d2_comm
  8 | from detectron2.utils.logger import setup_logger
  9 | 
 10 | from tridet.utils.s3 import aws_credential_is_available, maybe_download_ckpt_from_url, sync_output_dir_s3
 11 | from tridet.utils.setup import setup_distributed
 12 | from tridet.utils.wandb import derive_output_dir_from_wandb_id, init_wandb, wandb_credential_is_available
 13 | 
 14 | LOG = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class SetupDistributedCallback(Callback):
 18 |     """
 19 |     """
 20 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 21 |         world_size = MPI.COMM_WORLD.Get_size()
 22 |         distributed = world_size > 1
 23 |         if distributed:
 24 |             rank = MPI.COMM_WORLD.Get_rank()
 25 |             setup_distributed(world_size, rank)
 26 | 
 27 |     def on_job_start(self, config, **kwargs):  # pylint: disable=unused-argument
 28 |         world_size = d2_comm.get_world_size()
 29 |         rank = d2_comm.get_rank()
 30 |         LOG.info("Rank of current process: {}. World size: {}".format(rank, world_size))
 31 | 
 32 | 
 33 | class WandbInitCallback(Callback):
 34 |     """If W&B is enabled, then
 35 |         1) initialize W&B,
 36 |         2) derive the path of output directory using W&B ID, and
 37 |         3) set it as hydra working directory.
 38 |     """
 39 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 40 |         if not config.WANDB.ENABLED:
 41 |             return
 42 |         if not wandb_credential_is_available():
 43 |             LOG.warning(
 44 |                 "W&B credential must be defined in environment variables."
 45 |                 "Use `WANDB.ENABLED=False` to suppress this warning. "
 46 |                 "Skipping `WandbInitCallback`..."
 47 |             )
 48 |             return
 49 | 
 50 |         init_wandb(config)
 51 |         output_dir = derive_output_dir_from_wandb_id(config)
 52 |         if output_dir:
 53 |             config.hydra.run.dir = output_dir
 54 | 
 55 | 
 56 | class SyncOutputDirCallback(Callback):
 57 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 58 |         if d2_comm.is_main_process():
 59 |             output_dir = config.hydra.run.dir
 60 |         else:
 61 |             output_dir = None
 62 |         output_dir = MPI.COMM_WORLD.bcast(output_dir, root=0)
 63 | 
 64 |         if output_dir != config.hydra.run.dir:
 65 |             LOG.warning("Hydra run dir is not synced. Overwriting from rank=0.")
 66 |             config.hydra.run.dir = output_dir
 67 | 
 68 | 
 69 | class D2LoggerCallback(Callback):
 70 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 71 |         rank = d2_comm.get_rank()
 72 |         log_output_dir = os.path.join(config.hydra.run.dir, 'logs')
 73 |         setup_logger(log_output_dir, distributed_rank=rank, name="hydra")
 74 |         setup_logger(log_output_dir, distributed_rank=rank, name="detectron2", abbrev_name="d2")
 75 |         setup_logger(log_output_dir, distributed_rank=rank, name="tridet")
 76 |         setup_logger(log_output_dir, distributed_rank=rank, name="fvcore")
 77 | 
 78 |         logging.getLogger('numba').setLevel(logging.ERROR)  # too much logs
 79 | 
 80 | 
 81 | class CkptPathResolverCallback(Callback):
 82 |     """
 83 |     If the checkpoint (`config.model.CKPT`) is an S3 path, then downloaded it and replace the path with
 84 |     local path.
 85 |     """
 86 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 87 |         if config.MODEL.CKPT:
 88 |             new_ckpt_path = maybe_download_ckpt_from_url(config)
 89 |             new_ckpt_path = os.path.abspath(new_ckpt_path)
 90 |             config.MODEL.CKPT = new_ckpt_path
 91 | 
 92 | 
 93 | class SyncOutputS3BeforeEnd(Callback):
 94 |     """
 95 |     """
 96 |     def on_run_start(self, config, **kwargs):  # pylint: disable=unused-argument
 97 |         if config.SYNC_OUTPUT_DIR_S3.ENABLED and not aws_credential_is_available():
 98 |             raise ValueError(f"\n\nAWS credential must be set in environment variables (rank={d2_comm.get_rank()}).\n")
 99 | 
100 |     def on_run_end(self, config, **kwargs):  # pylint: disable=unused-argument
101 |         """
102 |         """
103 |         if config.SYNC_OUTPUT_DIR_S3.ENABLED:
104 |             sync_output_dir_s3(config, output_dir=config.hydra.run.dir)
105 | 


--------------------------------------------------------------------------------
/tridet/utils/s3.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | import os
  4 | import subprocess
  5 | import tempfile
  6 | import time
  7 | 
  8 | import requests
  9 | from hydra.utils import to_absolute_path
 10 | from tqdm import tqdm
 11 | 
 12 | import wandb
 13 | from detectron2.utils import comm
 14 | 
 15 | from tridet.utils.comm import broadcast_from_master
 16 | from tridet.utils.wandb import wandb_is_initialized
 17 | 
 18 | LOG = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | @broadcast_from_master
 22 | def maybe_download_ckpt_from_url(cfg):
 23 |     """If the checkpoint is an S3 or https path, the main process download the weight under, by default, `/tmp/`.
 24 | 
 25 |     NOTE: All workers must update `cfg.MODEL.CKPT` to use the new path.
 26 |     """
 27 |     ckpt_path = cfg.MODEL.CKPT
 28 | 
 29 |     if ckpt_path.startswith("s3://") or ckpt_path.startswith("https://"):
 30 |         os.makedirs(cfg.TMP_DIR, exist_ok=True)
 31 |         _, ext = os.path.splitext(ckpt_path)
 32 |         tmp_path = tempfile.NamedTemporaryFile(dir=cfg.TMP_DIR, suffix=ext).name
 33 | 
 34 |         LOG.info("Downloading initial weights:")
 35 |         LOG.info(f"  src: {ckpt_path}")
 36 |         LOG.info(f"  dst: {tmp_path}")
 37 | 
 38 |         if ckpt_path.startswith("s3://"):
 39 |             if not aws_credential_is_available():
 40 |                 raise ValueError('AWS credentials are undefined in environment variables.')
 41 |             s3_copy(ckpt_path, tmp_path)
 42 |         else:  # https://
 43 |             req = requests.get(ckpt_path)
 44 |             with open(tmp_path, 'wb') as f:
 45 |                 for chunk in tqdm(req.iter_content(100000)):
 46 |                     f.write(chunk)
 47 |         return tmp_path
 48 | 
 49 |     else:
 50 |         return ckpt_path
 51 | 
 52 | 
 53 | def aws_credential_is_available():
 54 |     AWS_CREDENTIALS = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
 55 |     for x in AWS_CREDENTIALS:
 56 |         if not os.environ.get(x, None):
 57 |             return False
 58 |     return True
 59 | 
 60 | 
 61 | def s3_copy(source_path, target_path, verbose=True):
 62 |     """Copy single file from local to s3, s3 to local, or s3 to s3.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     source_path: str
 67 |         Path of file to copy
 68 | 
 69 |     target_path: str
 70 |         Path to copy file to
 71 | 
 72 |     verbose: bool, default: True
 73 |         If True print some helpful messages
 74 | 
 75 |     Returns
 76 |     -------
 77 |     bool: True if successful
 78 |     """
 79 |     if verbose:
 80 |         logging.getLogger().setLevel(logging.DEBUG)
 81 | 
 82 |     success = False
 83 |     command_str = "aws s3 cp --acl bucket-owner-full-control {} {}".format(source_path, target_path)
 84 |     try:
 85 |         subprocess.check_output(command_str, shell=True)
 86 |         success = True
 87 |     except subprocess.CalledProcessError as e:
 88 |         success = False
 89 |         LOG.error("{} failed with error code {}".format(command_str, e.returncode))
 90 |         LOG.error(e.output)
 91 |     if verbose:
 92 |         LOG.info("Done copying file")
 93 | 
 94 |     return success
 95 | 
 96 | 
 97 | def sync_dir(source, target, verbose=True, excludes=None):
 98 |     """
 99 |     Sync a directory from source to target (either local to s3, s3 to s3, s3 to local)
100 | 
101 |     Parameters
102 |     ----------
103 |     source: str
104 |         Directory from which we want to sync files
105 | 
106 |     target: str
107 |         Directory to which all files will be synced
108 | 
109 |     verbose: bool, default: True
110 |         If True, log some helpful messages
111 |     """
112 |     assert source.startswith('s3://') or target.startswith('s3://')
113 |     command_str = "aws s3 sync --quiet --acl bucket-owner-full-control {} {}".format(source, target)
114 |     if excludes:
115 |         for exclude in excludes:
116 |             command_str += f" --exclude '{exclude}'"
117 |     if verbose:
118 |         LOG.info("Syncing with '{}'".format(command_str))
119 |     try:
120 |         subprocess.check_output(command_str, shell=True)
121 |     except subprocess.CalledProcessError as e:
122 |         LOG.error("{} failed with error code {}".format(command_str, e.returncode))
123 |         LOG.error(e.output)
124 |     if verbose:
125 |         LOG.info("Done syncing")
126 | 
127 | 
128 | def sync_output_dir_s3(cfg, output_dir=None):
129 |     output_dir = output_dir or os.getcwd()
130 |     output_dir = os.path.abspath(os.path.normpath(output_dir))
131 |     output_root = to_absolute_path(cfg.OUTPUT_ROOT)
132 | 
133 |     assert os.path.commonprefix([output_dir, output_root]) == output_root, f'{output_dir}, {output_root}'
134 |     tar_output_dir = os.path.join(cfg.SYNC_OUTPUT_DIR_S3.ROOT_IN_S3, output_dir[len(output_root) + 1:])
135 | 
136 |     if comm.is_main_process():
137 |         LOG.info(f"Syncing output_dir: {output_dir} -> {tar_output_dir}")
138 |         sync_dir(output_dir, tar_output_dir)
139 | 
140 |         if wandb_is_initialized():
141 |             tar_wandb_run_dir = os.path.join(tar_output_dir, 'wandb')
142 |             LOG.info(f"Syncing W&B run dir: {wandb.run.dir} -> {tar_wandb_run_dir}")
143 |             sync_dir(wandb.run.dir, tar_wandb_run_dir)
144 | 
145 |     elif comm.get_local_rank() == 0 and os.path.exists(os.path.join(output_dir, 'logs')):
146 |         # local master -- only sync the log files
147 |         log_output_dir, log_tar_output_dir = os.path.join(output_dir, 'logs'), os.path.join(tar_output_dir, 'logs')
148 |         LOG.info(f"Syncing log output_dir: {log_output_dir} -> {log_tar_output_dir}")
149 |         sync_dir(log_output_dir, log_tar_output_dir)
150 | 
151 | 
152 | def maybe_download_from_s3(src_path):
153 |     if not src_path.startswith("s3://"):
154 |         return src_path
155 | 
156 |     extension = os.path.splitext(src_path)[-1]
157 |     if not extension:
158 |         extension = None
159 |     tmp_path = tempfile.NamedTemporaryFile(suffix=extension).name
160 |     suceeded = s3_copy(src_path, tmp_path)
161 |     if not suceeded:
162 |         raise RuntimeError("`s3_copy` failed.")
163 |     return tmp_path
164 | 
165 | 
166 | def maybe_sync_dir_from_s3(src_path, excludes=None):
167 |     if not src_path.startswith("s3://"):
168 |         return src_path
169 | 
170 |     tmp_dir = tempfile.NamedTemporaryFile().name
171 |     os.makedirs(tmp_dir)
172 |     LOG.info(f"Syncing {src_path} to {tmp_dir}")
173 |     st = time.time()
174 |     sync_dir(src_path, tmp_dir, excludes=excludes)
175 |     LOG.info(f"Done. ({time.time() - st}s)")
176 |     return tmp_dir
177 | 


--------------------------------------------------------------------------------
/tridet/utils/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 3 | import json
 4 | import logging
 5 | import os
 6 | import resource
 7 | from datetime import datetime
 8 | 
 9 | import torch
10 | import torch.distributed as dist
11 | from omegaconf import OmegaConf
12 | 
13 | import detectron2.utils.comm as d2_comm
14 | from detectron2.utils.env import seed_all_rng
15 | from detectron2.utils.events import _CURRENT_STORAGE_STACK
16 | 
17 | from tridet.utils.comm import broadcast_from_master
18 | from tridet.utils.events import WandbEventStorage
19 | 
20 | LOG = logging.getLogger(__name__)
21 | 
22 | 
23 | def setup_distributed(world_size, rank):
24 |     """
25 |     Adapted from detectron2:
26 |         https://github.com/facebookresearch/detectron2/blob/master/detectron2/engine/launch.py#L85
27 |     """
28 |     host = os.environ["MASTER_ADDR"] if "MASTER_ADDR" in os.environ else "127.0.0.1"
29 |     port = 12345
30 |     dist_url = f"tcp://{host}:{port}"
31 |     try:
32 |         dist.init_process_group(backend='NCCL', init_method=dist_url, world_size=world_size, rank=rank)
33 |     except Exception as e:
34 |         logging.error("Process group URL: %s", dist_url)
35 |         raise e
36 |     # synchronize is needed here to prevent a possible timeout after calling init_process_group
37 |     # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
38 |     d2_comm.synchronize()
39 | 
40 |     # Assumption: all machines have the same number of GPUs.
41 |     num_gpus_per_machine = torch.cuda.device_count()
42 |     machine_rank = rank // num_gpus_per_machine
43 | 
44 |     # Setup the local process group (which contains ranks within the same machine)
45 |     assert d2_comm._LOCAL_PROCESS_GROUP is None
46 |     num_machines = world_size // num_gpus_per_machine
47 |     for i in range(num_machines):
48 |         ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
49 |         pg = dist.new_group(ranks_on_i)
50 |         if i == machine_rank:
51 |             d2_comm._LOCAL_PROCESS_GROUP = pg
52 | 
53 |     # Declare GPU device.
54 |     local_rank = rank % num_gpus_per_machine
55 |     torch.cuda.set_device(local_rank)
56 | 
57 |     # Multi-node training often fails with "received 0 items of ancdata" error.
58 |     # https://github.com/fastai/fastai/issues/23#issuecomment-345091054
59 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
60 |     resource.setrlimit(resource.RLIMIT_NOFILE, (8192, rlimit[1]))
61 | 
62 | 
63 | @broadcast_from_master
64 | def get_random_seed():
65 |     """Adapted from d2.utils.env:seed_all_rng()"""
66 |     seed = os.getpid() + int(datetime.now().strftime("%S%f")) + int.from_bytes(os.urandom(2), "big")
67 |     return seed
68 | 
69 | 
70 | def setup(cfg):
71 |     assert torch.cuda.is_available(), "cuda is not available."
72 | 
73 |     # Seed random number generators. If distributed, then sync the random seed over all GPUs.
74 |     seed = get_random_seed()
75 |     seed_all_rng(seed)
76 | 
77 |     LOG.info("Working Directory: {}".format(os.getcwd()))
78 |     LOG.info("Full config:\n{}".format(json.dumps(OmegaConf.to_container(cfg, resolve=True), indent=2)))
79 | 
80 |     # Set up EventStorage
81 |     storage = WandbEventStorage()
82 |     _CURRENT_STORAGE_STACK.append(storage)
83 | 
84 |     # After this, the cfg is immutable.
85 |     OmegaConf.set_readonly(cfg, True)
86 | 


--------------------------------------------------------------------------------
/tridet/utils/tasks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | from detectron2.config import configurable
 5 | 
 6 | 
 7 | class Task():
 8 |     def __init__(self, name, is_detection_task, is_dense_prediction_task):
 9 |         self.name = name
10 |         self.is_detection_task = is_detection_task
11 |         self.is_dense_prediction_task = is_dense_prediction_task
12 | 
13 | 
14 | # yapf: disable
15 | TASKS = [
16 |     Task(
17 |         name="box2d",
18 |         is_detection_task=True,
19 |         is_dense_prediction_task=False,
20 |     ),
21 |     Task(
22 |         name="box3d",
23 |         is_detection_task=True,
24 |         is_dense_prediction_task=False,
25 |     ),
26 |     Task(
27 |         name="depth",
28 |         is_detection_task=False,
29 |         is_dense_prediction_task=True,
30 |     )
31 | ]
32 | # yapf: enable
33 | 
34 | NAME_TO_TASK = OrderedDict([(task.name, task) for task in TASKS])
35 | 
36 | 
37 | class TaskManager():
38 |     @configurable
39 |     def __init__(self, box2d_on=False, box3d_on=False, depth_on=False):
40 |         """
41 |         configurable is experimental.
42 |         """
43 |         self._box2d_on = self._mask2d_on = self._box3d_on = self._semseg2d_on = self._depth_on = False
44 |         tasks = []
45 |         if box2d_on:
46 |             tasks.append(NAME_TO_TASK['box2d'])
47 |             self._box2d_on = True
48 |         if box3d_on:
49 |             tasks.append(NAME_TO_TASK['box3d'])
50 |             self._box3d_on = True
51 |         if depth_on:
52 |             tasks.append(NAME_TO_TASK['depth'])
53 |             self._depth_on = True
54 | 
55 |         if not tasks:
56 |             raise ValueError("No task specified.")
57 | 
58 |         self._tasks = tasks
59 | 
60 |     @property
61 |     def tasks(self):
62 |         return self._tasks
63 | 
64 |     @classmethod
65 |     def from_config(cls, cfg):
66 |         # yapf: disable
67 |         return OrderedDict(
68 |             box2d_on    = cfg.MODEL.BOX2D_ON,
69 |             box3d_on    = cfg.MODEL.BOX3D_ON,
70 |             depth_on    = cfg.MODEL.DEPTH_ON,
71 |         )
72 |         # yapf: enable
73 | 
74 |     # Indicators that tells if each task is enabled.
75 |     @property
76 |     def box2d_on(self):
77 |         return self._box2d_on
78 | 
79 |     @property
80 |     def box3d_on(self):
81 |         return self._box3d_on
82 | 
83 |     @property
84 |     def depth_on(self):
85 |         return self._depth_on
86 | 
87 |     @property
88 |     def has_dense_prediction_task(self):
89 |         return any([task.is_dense_prediction_task for task in self.tasks])
90 | 
91 |     @property
92 |     def has_detection_task(self):
93 |         return any([task.is_detection_task for task in self.tasks])
94 | 
95 |     @property
96 |     def task_names(self):
97 |         return [task.name for task in self.tasks]
98 | 


--------------------------------------------------------------------------------
/tridet/utils/tensor2d.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def compute_features_locations(h, w, stride, dtype=torch.float32, device='cpu', offset="none"):
 7 |     """Adapted from AdelaiDet:
 8 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
 9 | 
10 |     Key differnece: offset is configurable.
11 |     """
12 |     shifts_x = torch.arange(0, w * stride, step=stride, dtype=dtype, device=device)
13 |     shifts_y = torch.arange(0, h * stride, step=stride, dtype=dtype, device=device)
14 |     shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
15 |     shift_x = shift_x.reshape(-1)
16 |     shift_y = shift_y.reshape(-1)
17 |     # (dennis.park)
18 |     # locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
19 |     locations = torch.stack((shift_x, shift_y), dim=1)
20 |     if offset == "half":
21 |         locations += stride // 2
22 |     else:
23 |         assert offset == "none"
24 | 
25 |     return locations
26 | 
27 | 
28 | def aligned_bilinear(tensor, factor, offset="none"):
29 |     """Adapted from AdelaiDet:
30 |         https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py
31 |     """
32 |     assert tensor.dim() == 4
33 |     assert factor >= 1
34 |     assert int(factor) == factor
35 | 
36 |     if factor == 1:
37 |         return tensor
38 | 
39 |     h, w = tensor.size()[2:]
40 |     tensor = F.pad(tensor, pad=(0, 1, 0, 1), mode="replicate")
41 |     oh = factor * h + 1
42 |     ow = factor * w + 1
43 |     tensor = F.interpolate(tensor, size=(oh, ow), mode='bilinear', align_corners=True)
44 |     if offset == "half":
45 |         tensor = F.pad(tensor, pad=(factor // 2, 0, factor // 2, 0), mode="replicate")
46 | 
47 |     return tensor[:, :, :oh - 1, :ow - 1]
48 | 


--------------------------------------------------------------------------------
/tridet/utils/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | import logging
 3 | import os
 4 | 
 5 | from tabulate import tabulate
 6 | from termcolor import colored
 7 | 
 8 | from detectron2.utils.events import get_event_storage
 9 | 
10 | LOG = logging.getLogger(__name__)
11 | 
12 | 
13 | def get_inference_output_dir(dataset_name, is_last=False, use_tta=False, root_output_dir=None):
14 |     if not root_output_dir:
15 |         root_output_dir = os.getcwd()  # hydra
16 |     step = get_event_storage().iter
17 |     if is_last:
18 |         result_dirname = "final"
19 |     else:
20 |         result_dirname = f"step{step:07d}"
21 |     if use_tta:
22 |         result_dirname += "-tta"
23 |     output_dir = os.path.join(root_output_dir, "inference", result_dirname, dataset_name)
24 |     return output_dir
25 | 
26 | 
27 | def print_test_results(test_results):
28 |     metric_table = tabulate(
29 |         [(k, v) for k, v in test_results.items()],
30 |         headers=["metric", "value"],
31 |         tablefmt="pipe",
32 |         numalign="left",
33 |         stralign="left",
34 |     )
35 |     LOG.info("Test results:\n" + colored(metric_table, "cyan"))
36 | 


--------------------------------------------------------------------------------
/tridet/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  3 | import colorsys
  4 | import os
  5 | 
  6 | import cv2
  7 | import matplotlib.colors as mplc
  8 | import numpy as np
  9 | from PIL import Image, ImageDraw
 10 | 
 11 | from tridet.utils.wandb import flatten_dict
 12 | 
 13 | 
 14 | def fill_color_polygon(image, polygon, color, alpha=0.5):
 15 |     """Color interior of polygon with alpha-blending. This function modified input in place.
 16 |     """
 17 |     _mask = Image.new('L', (image.shape[1], image.shape[0]), 0)
 18 |     ImageDraw.Draw(_mask).polygon(polygon, outline=1, fill=1)
 19 |     mask = np.array(_mask, np.bool)
 20 |     for c in range(3):
 21 |         channel = image[:, :, c]
 22 |         channel[mask] = channel[mask] * (1. - alpha) + color[c] * alpha
 23 | 
 24 | 
 25 | def save_vis(np_arrays_dict, output_dir, filename, step=None):
 26 |     np_arrays_dict = flatten_dict(np_arrays_dict)
 27 |     npz_filename = os.path.join(output_dir, '' if step is None else f"step{step:06d}", filename)
 28 |     os.makedirs(os.path.dirname(npz_filename), exist_ok=True)
 29 |     np.savez_compressed(npz_filename, **np_arrays_dict)
 30 | 
 31 | 
 32 | def change_color_brightness(color, brightness_factor):
 33 |     """
 34 |     Copied from detectron2.utils.visualizer.py
 35 |     -------------------------------------------
 36 | 
 37 |     Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
 38 |     less or more saturation than the original color.
 39 | 
 40 |     Args:
 41 |         color: color of the polygon. Refer to `matplotlib.colors` for a full list of
 42 |             formats that are accepted.
 43 |         brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
 44 |             0 will correspond to no change, a factor in [-1.0, 0) range will result in
 45 |             a darker color and a factor in (0, 1.0] range will result in a lighter color.
 46 | 
 47 |     Returns:
 48 |         modified_color (tuple[double]): a tuple containing the RGB values of the
 49 |             modified color. Each value in the tuple is in the [0.0, 1.0] range.
 50 |     """
 51 |     assert brightness_factor >= -1.0 and brightness_factor <= 1.0
 52 |     color = mplc.to_rgb(color)
 53 |     polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
 54 |     modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
 55 |     modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
 56 |     modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
 57 |     modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
 58 |     return modified_color
 59 | 
 60 | 
 61 | def draw_text(ax, text, position, *, font_size, color="g", horizontal_alignment="center", rotation=0):
 62 |     """
 63 |     Copied from Visualizer.draw_text()
 64 |     -----------------------------------
 65 | 
 66 |     Args:
 67 |         text (str): class label
 68 |         position (tuple): a tuple of the x and y coordinates to place text on image.
 69 |         font_size (int, optional): font of the text. If not provided, a font size
 70 |             proportional to the image width is calculated and used.
 71 |         color: color of the text. Refer to `matplotlib.colors` for full list
 72 |             of formats that are accepted.
 73 |         horizontal_alignment (str): see `matplotlib.text.Text`
 74 |         rotation: rotation angle in degrees CCW
 75 | 
 76 |     Returns:
 77 |         output (VisImage): image object with text drawn.
 78 |     """
 79 |     # since the text background is dark, we don't want the text to be dark
 80 |     color = np.maximum(list(mplc.to_rgb(color)), 0.2)
 81 |     color[np.argmax(color)] = max(0.8, np.max(color))
 82 | 
 83 |     x, y = position
 84 |     ax.text(
 85 |         x,
 86 |         y,
 87 |         text,
 88 |         size=font_size,
 89 |         family="sans-serif",
 90 |         bbox={
 91 |             "facecolor": "black",
 92 |             "alpha": 0.8,
 93 |             "pad": 0.7,
 94 |             "edgecolor": "none"
 95 |         },
 96 |         verticalalignment="top",
 97 |         horizontalalignment=horizontal_alignment,
 98 |         color=color,
 99 |         zorder=10,
100 |         rotation=rotation,
101 |     )
102 |     return ax
103 | 
104 | 
105 | def float_to_uint8_color(float_clr):
106 |     assert all([c >= 0. for c in float_clr])
107 |     assert all([c <= 1. for c in float_clr])
108 |     return [int(c * 255.) for c in float_clr]
109 | 
110 | 
111 | def mosaic(items, scale=1.0, pad=3, grid_width=None):
112 |     """Creates a mosaic from list of images.
113 | 
114 |     Parameters
115 |     ----------
116 |     items: list of np.ndarray
117 |         List of images to mosaic.
118 | 
119 |     scale: float, default=1.0
120 |         Scale factor applied to images. scale > 1.0 enlarges images.
121 | 
122 |     pad: int, default=3
123 |         Padding size of the images before mosaic
124 | 
125 |     grid_width: int, default=None
126 |         Mosaic width or grid width of the mosaic
127 | 
128 |     Returns
129 |     -------
130 |     image: np.array of shape (H, W, 3)
131 |         Image mosaic
132 |     """
133 |     # Determine tile width and height
134 |     N = len(items)
135 |     assert N > 0, 'No items to mosaic!'
136 |     grid_width = grid_width if grid_width else np.ceil(np.sqrt(N)).astype(int)
137 |     grid_height = np.ceil(N * 1. / grid_width).astype(np.int)
138 |     input_size = items[0].shape[:2]
139 |     target_shape = (int(input_size[1] * scale), int(input_size[0] * scale))
140 |     mosaic_items = []
141 |     for j in range(grid_width * grid_height):
142 |         if j < N:
143 |             # Only the first image is scaled, the rest are re-shaped
144 |             # to the same size as the previous image in the mosaic
145 |             im = cv2.resize(items[j], dsize=target_shape)
146 |             mosaic_items.append(im)
147 |         else:
148 |             mosaic_items.append(np.zeros_like(mosaic_items[-1]))
149 | 
150 |     # Stack W tiles horizontally first, then vertically
151 |     im_pad = lambda im: cv2.copyMakeBorder(im, pad, pad, pad, pad, cv2.BORDER_CONSTANT, 0)
152 |     mosaic_items = [im_pad(im) for im in mosaic_items]
153 |     hstack = [np.hstack(mosaic_items[j:j + grid_width]) for j in range(0, len(mosaic_items), grid_width)]
154 |     mosaic_viz = np.vstack(hstack) if len(hstack) > 1 \
155 |         else hstack[0]
156 |     return mosaic_viz
157 | 


--------------------------------------------------------------------------------
/tridet/utils/wandb.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  2 | import logging
  3 | import os
  4 | from collections import OrderedDict
  5 | from collections.abc import Mapping
  6 | from functools import wraps
  7 | 
  8 | import wandb
  9 | from detectron2.utils.events import get_event_storage
 10 | from omegaconf import OmegaConf
 11 | 
 12 | from tridet.utils.comm import broadcast_from_master, master_only
 13 | 
 14 | LOG = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | def wandb_credential_is_available():
 18 |     if os.environ.get('WANDB_API_KEY', None):
 19 |         return True
 20 |     else:
 21 |         return False
 22 | 
 23 | 
 24 | @master_only
 25 | def init_wandb(cfg):
 26 |     if not wandb_credential_is_available():
 27 |         LOG.warning(
 28 |             "W&B credential must be defined in environment variables."
 29 |             "Use `WANDB.ENABLED=False` to suppress this warning. "
 30 |             "Skipping `init_wandb`..."
 31 |         )
 32 |         return
 33 | 
 34 |     if cfg.WANDB.DRYRUN:
 35 |         os.environ['WANDB_MODE'] = 'dryrun'
 36 | 
 37 |     _cfg = cfg.copy()
 38 |     del _cfg.hydra
 39 |     cfg_as_dict = OmegaConf.to_container(_cfg, resolve=True)
 40 |     wandb.init(project=cfg.WANDB.PROJECT, config=cfg_as_dict, tags=cfg.WANDB.TAGS, group=cfg.WANDB.GROUP)
 41 | 
 42 | 
 43 | def wandb_is_initialized():
 44 |     try:
 45 |         wandb.run.id  # pylint: disable=pointless-statement
 46 |         initialized = True
 47 |     except AttributeError:
 48 |         initialized = False
 49 |     return initialized
 50 | 
 51 | 
 52 | def if_wandb_initialized(fn):
 53 |     @wraps(fn)
 54 |     def wrapped_fn(*args, **kwargs):
 55 |         if wandb_is_initialized():
 56 |             return fn(*args, **kwargs)
 57 |         else:
 58 |             return None
 59 | 
 60 |     return wrapped_fn
 61 | 
 62 | 
 63 | @broadcast_from_master
 64 | def derive_output_dir_from_wandb_id(cfg):
 65 |     assert wandb_is_initialized()
 66 |     wandb_run_dir = wandb.run.dir
 67 |     if wandb_run_dir.endswith('/files'):  # wandb 0.10.x
 68 |         wandb_run_dir = wandb_run_dir[:-6]
 69 |     datetime_str, wandb_run_id = wandb_run_dir.split('-')[-2:]
 70 |     assert wandb_run_id == wandb.run.id
 71 | 
 72 |     output_dir = os.path.join(cfg.OUTPUT_ROOT, '-'.join([wandb_run_id, datetime_str]))
 73 |     return output_dir
 74 | 
 75 | 
 76 | @master_only
 77 | @if_wandb_initialized
 78 | def log_nested_dict(dikt):
 79 |     storage = get_event_storage()
 80 |     step = storage.iter
 81 | 
 82 |     wandb.log(flatten_dict(dikt), step=step)
 83 | 
 84 | 
 85 | def flatten_dict(results):
 86 |     """
 87 |     Almost identical to detectron2.evaluation.testing:flatten_result_dict()', but using 'OrderedDict'
 88 |     --------------------------------------------------------------------------------------------------
 89 | 
 90 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
 91 |     If results[k1][k2][k3] = v, the returned dict will have the entry
 92 |     {"k1/k2/k3": v}.
 93 | 
 94 |     Args:
 95 |         results (dict):
 96 |     """
 97 |     r = OrderedDict()
 98 |     for k, v in results.items():
 99 |         k = str(k)
100 |         if isinstance(v, Mapping):
101 |             v = flatten_dict(v)
102 |             for kk, vv in v.items():
103 |                 r[k + "/" + kk] = vv
104 |         else:
105 |             r[k] = v
106 |     return r
107 | 


--------------------------------------------------------------------------------
/tridet/visualizers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
 2 | from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
 3 | 
 4 | from tridet.visualizers.box3d_visualizer import Box3DDataloaderVisualizer, Box3DPredictionVisualizer
 5 | from tridet.visualizers.d2_visualizer import D2DataloaderVisualizer, D2PredictionVisualizer
 6 | 
 7 | 
 8 | def get_predictions_visualizer(cfg, visualizer_name, dataset_name, inference_output_dir):
 9 |     if visualizer_name == 'd2_visualizer':
10 |         return D2PredictionVisualizer(cfg, dataset_name, inference_output_dir)
11 |     elif visualizer_name == "box3d_visualizer":
12 |         return Box3DPredictionVisualizer(cfg, dataset_name, inference_output_dir)
13 |     else:
14 |         raise ValueError(f"Invalid visualizer: {visualizer_name}")
15 | 
16 | 
17 | def get_dataloader_visualizer(cfg, visualizer_name, dataset_name):
18 |     if visualizer_name == 'd2_visualizer':
19 |         return D2DataloaderVisualizer(cfg, dataset_name)
20 |     elif visualizer_name == "box3d_visualizer":
21 |         return Box3DDataloaderVisualizer(cfg, dataset_name)
22 |     else:
23 |         raise ValueError(f"Invalid visualizer: {visualizer_name}")
24 | 


--------------------------------------------------------------------------------
/tridet/visualizers/d2_visualizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Copyright 2021 Toyota Research Institute.  All rights reserved.
  3 | import json
  4 | import logging
  5 | import os
  6 | from collections import OrderedDict, defaultdict
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | from detectron2.data import DatasetCatalog, MetadataCatalog
 12 | from detectron2.data import detection_utils as d2_utils
 13 | from detectron2.structures import Boxes, BoxMode, Instances
 14 | from detectron2.utils.visualizer import ColorMode, Visualizer
 15 | 
 16 | DETECTION_RESULT_FILE = "coco_instances_results.json"
 17 | SEMSEG_RESULT_FILE = "sem_seg_predictions.json"
 18 | 
 19 | D2_COLORMODE_MAPPING = {
 20 |     "image": ColorMode.IMAGE,
 21 |     "segm": ColorMode.SEGMENTATION,
 22 |     "image_bw": ColorMode.IMAGE_BW,
 23 | }
 24 | 
 25 | LOG = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | def get_tasks_from_cfg(cfg):
 29 |     tasks = []
 30 |     if cfg.MODEL.BOX2D_ON:
 31 |         tasks.append('bbox2d')
 32 |     assert len(tasks) > 0, "Empty task."
 33 |     return tasks
 34 | 
 35 | 
 36 | def create_instances(predictions, image_size, score_threshold, metadata, score_key="score"):
 37 |     ret = Instances(image_size)
 38 | 
 39 |     # score = np.asarray([x["score"] for x in predictions])
 40 |     score = np.asarray([x[score_key] for x in predictions])
 41 |     chosen = (score > score_threshold).nonzero()[0]
 42 |     score = score[chosen]
 43 |     bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
 44 |     bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
 45 | 
 46 |     # dataset_id_map = metadata.thing_dataset_id_to_contiguous_id
 47 |     if not hasattr(metadata, 'thing_dataset_id_to_contiguous_id'):
 48 |         # (dennis.park) Assume the `category_id` is already a contiguous IDs starting at 0.
 49 |         dataset_id_map = {idx: idx for idx, _ in enumerate(metadata.thing_classes)}
 50 |     else:
 51 |         dataset_id_map = metadata.thing_dataset_id_to_contiguous_id
 52 |     labels = np.asarray([dataset_id_map[predictions[i]["category_id"]] for i in chosen])
 53 | 
 54 |     ret.scores = score
 55 |     ret.pred_boxes = Boxes(bbox)
 56 |     ret.pred_classes = labels
 57 | 
 58 |     # Add bbox3d
 59 |     try:
 60 |         ret.pred_boxes3d = torch.as_tensor([predictions[i]["bbox3d"] for i in chosen])
 61 |     except KeyError:
 62 |         pass
 63 |     return ret
 64 | 
 65 | 
 66 | class D2PredictionVisualizer():
 67 |     """
 68 |     Adapted from detectron2:
 69 |         detectron2.utils.visualizer
 70 | 
 71 |     Key difference: load inference results on disk generated by COCOEvaluator
 72 |     """
 73 |     def __init__(self, cfg, dataset_name, inference_output_dir):
 74 |         self._metadata = MetadataCatalog.get(dataset_name)
 75 |         self._input_format = cfg.INPUT.FORMAT
 76 |         self._scale = cfg.VIS.D2.PREDICTIONS.SCALE
 77 |         self._d2_viz_color_mode = D2_COLORMODE_MAPPING[cfg.VIS.D2.PREDICTIONS.COLOR_MODE]
 78 | 
 79 |         tasks = get_tasks_from_cfg(cfg)
 80 |         dataset_dicts = DatasetCatalog.get(dataset_name)
 81 | 
 82 |         # Per-image predicted instances
 83 |         self.pred_instances_by_image = None
 84 |         if "bbox2d" in tasks:
 85 |             with open(os.path.join(inference_output_dir, DETECTION_RESULT_FILE), 'r') as f:
 86 |                 instance_predictions = json.load(f)
 87 | 
 88 |             pred_instances_by_image = defaultdict(list)
 89 |             for p in instance_predictions:
 90 |                 # 'p' is key'ed by 'image_id'.
 91 |                 image_id = p['image_id']
 92 |                 pred_instances_by_image[image_id].append(p)
 93 | 
 94 |             # det2d_threshold = cfg.VIS.PREDICTIONS.DET2D_THRESHOLD
 95 |             det2d_threshold = cfg.VIS.D2.PREDICTIONS.THRESHOLD
 96 |             # This handles images with no predictions.
 97 |             for dataset_dict in dataset_dicts:
 98 |                 image_id = dataset_dict['image_id']
 99 |                 img_shape = (dataset_dict['height'], dataset_dict['width'])
100 |                 pred_instances_by_image[image_id] = create_instances(
101 |                     pred_instances_by_image[image_id], img_shape, det2d_threshold, self._metadata
102 |                 )
103 | 
104 |             self.pred_instances_by_image = pred_instances_by_image
105 |             LOG.info(
106 |                 f"Found 2D detection predictions (bbox2d and/or mask2d) for {len(pred_instances_by_image)} images."
107 |             )
108 | 
109 |     def visualize(self, x):
110 |         """
111 |         Parameters
112 |         ----------
113 |         x: Dict
114 |             One 'dataset_dict'.
115 | 
116 |         Returns
117 |         -------
118 |         viz_images: Dict[np.array]
119 |             Visualizations as RGB images.
120 |         """
121 |         # Load image.
122 |         img = d2_utils.read_image(x["file_name"], format=self._input_format)
123 |         img = d2_utils.convert_image_to_rgb(img, self._input_format)
124 | 
125 |         viz_images = OrderedDict()
126 | 
127 |         # d2 groundtruth instances viz (2D box, mask, keypoints)
128 |         if 'annotations' in x:
129 |             # Visualizer.draw_datset_dict() renders various types of annotations.
130 |             # But here we only use its capability to render *instance() annotations.
131 |             _x = {'annotations': x['annotations']}
132 |             viz = Visualizer(img, self._metadata, scale=self._scale, instance_mode=self._d2_viz_color_mode)
133 |             viz_image = viz.draw_dataset_dict(_x).get_image()
134 |             viz_images["viz_gt_instances_d2"] = viz_image
135 | 
136 |         # d2 instance predictions viz (2D box, mask, keypoints)
137 |         if self.pred_instances_by_image is not None:
138 |             pred_instances = self.pred_instances_by_image[x['image_id']]
139 |             viz = Visualizer(img, self._metadata, scale=self._scale, instance_mode=self._d2_viz_color_mode)
140 |             viz_image = viz.draw_instance_predictions(pred_instances).get_image()
141 |             viz_images["viz_pred_instance_d2"] = viz_image
142 | 
143 |         return viz_images
144 | 
145 | 
146 | def draw_gt_instances_d2(gt_instances, img, metadata, scale, instance_mode):
147 |     """Wrapper of D2's 'Visualizer.draw_instance_predictions()' to render GT instances.
148 |     """
149 |     # Rename instance fields to work with Visualizer.draw_instance_predictions() of detectron2.
150 |     field_remapping = {
151 |         'gt_boxes': 'pred_boxes',
152 |         'gt_classes': 'pred_classes',
153 |     }
154 |     fields = {}
155 |     for k, v in gt_instances._fields.items():
156 |         new_k = field_remapping.get(k, None)
157 |         k = new_k or k
158 |         fields[k] = v
159 | 
160 |     instances = Instances(image_size=gt_instances._image_size, **fields)
161 |     viz = Visualizer(img, metadata, scale=scale, instance_mode=instance_mode)
162 |     viz_image = viz.draw_instance_predictions(instances).get_image()
163 |     return viz_image
164 | 
165 | 
166 | class D2DataloaderVisualizer():
167 |     def __init__(self, cfg, dataset_name):
168 |         self._metadata = MetadataCatalog.get(dataset_name)
169 |         self._input_format = cfg.INPUT.FORMAT
170 |         self._scale = cfg.VIS.D2.DATALOADER.SCALE
171 |         self._d2_viz_color_mode = D2_COLORMODE_MAPPING[cfg.VIS.D2.DATALOADER.COLOR_MODE]
172 | 
173 |     def visualize(self, x):
174 |         # Assumption: dataloader produce CHW images.
175 |         img = d2_utils.convert_image_to_rgb(x['image'].permute(1, 2, 0), self._input_format)
176 | 
177 |         viz_images = OrderedDict()
178 | 
179 |         # d2 instance viz (2D box, mask, keypoints)
180 |         gt_instances = x['instances']
181 |         viz_image = draw_gt_instances_d2(gt_instances, img, self._metadata, self._scale, self._d2_viz_color_mode)
182 |         viz_images['viz_gt_instances_d2'] = viz_image
183 | 
184 |         return viz_images
185 | 


--------------------------------------------------------------------------------