├── .gitignore
├── CONTRIBUTING.md
├── DeepLab_COCO_Demo.ipynb
├── DeepLab_Cityscapes_Demo.ipynb
├── LICENSE
├── README.md
├── ViP_DeepLab_Demo.ipynb
├── __init__.py
├── common.py
├── common_test.py
├── compile.sh
├── config.proto
├── configs
├── ade20k
│ └── kmax_deeplab
│ │ ├── kmax_meta_convnext_large_os32.textproto
│ │ ├── kmax_meta_convnext_large_os32_res1281.textproto
│ │ ├── kmax_meta_r50_os32.textproto
│ │ └── kmax_meta_r50_os32_res1281.textproto
├── cityscapes
│ ├── axial_deeplab
│ │ ├── axial_swidernet_1_1_1_os16.textproto
│ │ ├── axial_swidernet_1_1_3_os16.textproto
│ │ ├── axial_swidernet_1_1_4.5_os16.textproto
│ │ ├── max_deeplab_l_backbone_os16.textproto
│ │ └── max_deeplab_s_backbone_os16.textproto
│ ├── kmax_deeplab
│ │ ├── kmax_meta_axial_r50_os32.textproto
│ │ ├── kmax_meta_convnext_base_os32.textproto
│ │ ├── kmax_meta_convnext_large_os32.textproto
│ │ ├── kmax_meta_convnext_small_os32.textproto
│ │ ├── kmax_meta_convnext_tiny_os32.textproto
│ │ └── kmax_meta_r50_os32.textproto
│ └── panoptic_deeplab
│ │ ├── mobilenet_v3_large_os16.textproto
│ │ ├── mobilenet_v3_large_os32.textproto
│ │ ├── mobilenet_v3_small_os16.textproto
│ │ ├── mobilenet_v3_small_os32.textproto
│ │ ├── resnet50_beta_os32.textproto
│ │ ├── resnet50_os32_merge_with_pure_tf_func.textproto
│ │ ├── resnet50_os32_semseg.textproto
│ │ ├── swidernet_sac_1_1_1_os16.textproto
│ │ ├── swidernet_sac_1_1_3_os16.textproto
│ │ ├── swidernet_sac_1_1_4.5_os16.textproto
│ │ └── wide_resnet41_os16.textproto
├── cityscapes_dvps
│ └── vip_deeplab
│ │ ├── resnet50_beta_os32.textproto
│ │ └── wide_resnet41_os16.textproto
├── coco
│ ├── kmax_deeplab
│ │ ├── kmax_meta_axial_r50_os32.textproto
│ │ ├── kmax_meta_convnext_base_os32.textproto
│ │ ├── kmax_meta_convnext_large_os32.textproto
│ │ ├── kmax_meta_convnext_small_os32.textproto
│ │ ├── kmax_meta_convnext_tiny_os32.textproto
│ │ └── kmax_meta_r50_os32.textproto
│ ├── max_deeplab
│ │ ├── max_deeplab_l_os16_res1025_100k.textproto
│ │ ├── max_deeplab_l_os16_res1025_200k.textproto
│ │ ├── max_deeplab_l_os16_res1025_400k.textproto
│ │ ├── max_deeplab_s_os16_res1025_100k.textproto
│ │ ├── max_deeplab_s_os16_res1025_200k.textproto
│ │ ├── max_deeplab_s_os16_res1025_400k.textproto
│ │ ├── max_deeplab_s_os16_res641_100k.textproto
│ │ ├── max_deeplab_s_os16_res641_200k.textproto
│ │ └── max_deeplab_s_os16_res641_400k.textproto
│ └── panoptic_deeplab
│ │ ├── resnet50_beta_os16.textproto
│ │ ├── resnet50_beta_os32.textproto
│ │ ├── resnet50_os16.textproto
│ │ └── resnet50_os32.textproto
├── example
│ ├── example_cityscapes_deeplabv3.textproto
│ ├── example_cityscapes_deeplabv3_mv3l.textproto
│ ├── example_cityscapes_deeplabv3plus.textproto
│ ├── example_cityscapes_panoptic_deeplab.textproto
│ ├── example_cityscapes_panoptic_deeplab_mv3l.textproto
│ ├── example_coco_kmax_meta_convnext.textproto
│ ├── example_coco_max_deeplab.textproto
│ └── example_kitti-step_motion_deeplab.textproto
├── kitti
│ ├── motion_deeplab
│ │ ├── resnet50_os32.textproto
│ │ └── resnet50_os32_trainval.textproto
│ └── panoptic_deeplab
│ │ ├── resnet50_os32.textproto
│ │ └── resnet50_os32_trainval.textproto
├── motchallenge
│ ├── motion_deeplab
│ │ └── resnet50_os32.textproto
│ └── panoptic_deeplab
│ │ └── resnet50_os32.textproto
└── semkitti_dvps
│ └── vip_deeplab
│ └── resnet50_beta_os32.textproto
├── data
├── __init__.py
├── ade20k_constants.py
├── build_ade20k_data.py
├── build_cityscapes_data.py
├── build_cityscapes_data_test.py
├── build_coco_data.py
├── build_coco_data_test.py
├── build_dvps_data.py
├── build_step_data.py
├── build_step_data_test.py
├── coco_constants.py
├── data_utils.py
├── data_utils_test.py
├── dataloader
│ ├── __init__.py
│ ├── input_reader.py
│ └── multicamera_input_reader.py
├── dataset.py
├── dataset_utils.py
├── dataset_utils_test.py
├── multicamera_data_utils.py
├── preprocessing
│ ├── __init__.py
│ ├── autoaugment_policy.py
│ ├── autoaugment_policy_test.py
│ ├── autoaugment_utils.py
│ ├── autoaugment_utils_test.py
│ ├── input_preprocessing.py
│ ├── input_preprocessing_test.py
│ ├── preprocess_utils.py
│ └── preprocess_utils_test.py
├── sample_generator.py
├── sample_generator_test.py
├── testdata
│ ├── create_test_data.py
│ ├── dummy_gt_for_vps.png
│ ├── dummy_prediction.png
│ ├── gtFine
│ │ ├── cityscapes_panoptic_dummy_trainId.json
│ │ └── cityscapes_panoptic_dummy_trainId
│ │ │ └── dummy_000000_000000_gtFine_panoptic.png
│ ├── leftImg8bit
│ │ └── dummy_000000_000000_leftImg8bit.png
│ └── targets
│ │ ├── center_target.png
│ │ ├── center_weights.png
│ │ ├── eval_is_crowd.npy
│ │ ├── eval_panoptic_target.npy
│ │ ├── eval_semantic_target.png
│ │ ├── is_crowd.npy
│ │ ├── offset_target.npy
│ │ ├── offset_weights.png
│ │ ├── panoptic_target.npy
│ │ ├── panoptic_target.png
│ │ ├── rgb_target.png
│ │ ├── semantic_target.png
│ │ ├── semantic_weights.png
│ │ ├── thing_id_class_target.npy
│ │ └── thing_id_mask_target.npy
├── utils
│ ├── __init__.py
│ └── create_step_panoptic_maps.py
└── waymo_constants.py
├── dataset.proto
├── evaluation
├── __init__.py
├── coco_instance_ap.py
├── coco_instance_ap_test.py
├── depth_aware_segmentation_and_tracking_quality.py
├── depth_aware_segmentation_and_tracking_quality_test.py
├── depth_metrics.py
├── depth_metrics_test.py
├── numpy
│ ├── __init__.py
│ ├── segmentation_and_tracking_quality.py
│ └── segmentation_and_tracking_quality_test.py
├── panoptic_quality.py
├── panoptic_quality_test.py
├── segmentation_and_tracking_quality.py
├── segmentation_and_tracking_quality_test.py
├── test_utils.py
├── test_utils_test.py
├── testdata
│ ├── README.md
│ ├── bird_gt.png
│ ├── bird_pred_class.png
│ ├── bird_pred_instance.png
│ ├── cat_gt.png
│ ├── cat_pred_class.png
│ ├── cat_pred_instance.png
│ ├── team_gt_instance.png
│ ├── team_pred_class.png
│ └── team_pred_instance.png
└── video_panoptic_quality.py
├── evaluator.proto
├── export_model.py
├── g3doc
├── faq.md
├── img
│ ├── axial_deeplab
│ │ ├── axial_block.png
│ │ ├── nonlocal_block.png
│ │ └── position_sensitive_axial_block.png
│ ├── kmax_deeplab
│ │ ├── clustering_view_of_mask_transformer.png
│ │ └── kmax_decoder.png
│ ├── max_deeplab
│ │ ├── overview.png
│ │ └── overview_simple.png
│ ├── moat
│ │ ├── moat_block.png
│ │ ├── moat_imagenet1k_224.png
│ │ └── moat_imagenet22k_384.png
│ ├── panoptic_deeplab.png
│ ├── step
│ │ └── kitti_step_annotation.png
│ └── vip_deeplab
│ │ └── demo.gif
├── projects
│ ├── axial_deeplab.md
│ ├── imagenet_pretrained_checkpoints.md
│ ├── kmax_deeplab.md
│ ├── max_deeplab.md
│ ├── moat_imagenet_pretrained_checkpoints.md
│ ├── motion_deeplab.md
│ ├── panoptic_deeplab.md
│ ├── vip_deeplab.md
│ └── wod_pvps.md
└── setup
│ ├── ade20k.md
│ ├── cityscapes.md
│ ├── cityscapes_test_server_evaluation.md
│ ├── coco.md
│ ├── coco_test_server_evaluation.md
│ ├── getting_started.md
│ ├── installation.md
│ ├── kitti_step.md
│ ├── motchallenge_step.md
│ └── your_own_dataset.md
├── model.proto
├── model
├── __init__.py
├── builder.py
├── builder_test.py
├── decoder
│ ├── __init__.py
│ ├── aspp.py
│ ├── aspp_test.py
│ ├── deeplabv3.py
│ ├── deeplabv3_test.py
│ ├── deeplabv3plus.py
│ ├── deeplabv3plus_test.py
│ ├── max_deeplab.py
│ ├── max_deeplab_test.py
│ ├── motion_deeplab_decoder.py
│ ├── panoptic_deeplab.py
│ ├── panoptic_deeplab_test.py
│ └── vip_deeplab_decoder.py
├── deeplab.py
├── deeplab_test.py
├── encoder
│ ├── __init__.py
│ ├── atrous_consistency_test.py
│ ├── axial_resnet.py
│ ├── axial_resnet_instances.py
│ ├── axial_resnet_instances_test.py
│ ├── axial_resnet_test.py
│ ├── mobilenet.py
│ ├── mobilenet_test.py
│ └── model_export_test.py
├── kmax_deeplab.py
├── kmax_deeplab_test.py
├── layers
│ ├── __init__.py
│ ├── activations.py
│ ├── activations_test.py
│ ├── axial_block_groups.py
│ ├── axial_block_groups_test.py
│ ├── axial_blocks.py
│ ├── axial_blocks_test.py
│ ├── axial_layers.py
│ ├── axial_layers_test.py
│ ├── blocks.py
│ ├── blocks_test.py
│ ├── convolutions.py
│ ├── convolutions_test.py
│ ├── drop_path.py
│ ├── drop_path_test.py
│ ├── dual_path_transformer.py
│ ├── dual_path_transformer_test.py
│ ├── moat_attention.py
│ ├── moat_attention_test.py
│ ├── moat_blocks.py
│ ├── moat_blocks_test.py
│ ├── positional_encodings.py
│ ├── positional_encodings_test.py
│ ├── recompute_grad.py
│ ├── recompute_grad_test.py
│ ├── resized_fuse.py
│ ├── resized_fuse_test.py
│ ├── squeeze_and_excite.py
│ ├── squeeze_and_excite_test.py
│ ├── stems.py
│ └── stems_test.py
├── loss
│ ├── base_loss.py
│ ├── base_loss_test.py
│ ├── loss_builder.py
│ ├── loss_builder_test.py
│ ├── matchers_ops.py
│ ├── matchers_ops_test.py
│ ├── max_deeplab_loss.py
│ └── max_deeplab_loss_test.py
├── pixel_decoder
│ ├── kmax.py
│ └── kmax_test.py
├── pixel_encoder
│ ├── axial_resnet.py
│ ├── axial_resnet_test.py
│ ├── convnext.py
│ ├── convnext_test.py
│ ├── moat.py
│ └── moat_test.py
├── post_processor
│ ├── __init__.py
│ ├── max_deeplab.py
│ ├── max_deeplab_test.py
│ ├── motion_deeplab.py
│ ├── panoptic_deeplab.py
│ ├── panoptic_deeplab_test.py
│ ├── post_processor_builder.py
│ ├── post_processor_builder_test.py
│ ├── vip_deeplab.py
│ └── vip_deeplab_test.py
├── test_utils.py
├── test_utils_test.py
├── transformer_decoder
│ ├── kmax.py
│ └── kmax_test.py
├── utils.py
└── utils_test.py
├── tensorflow_ops
├── kernels
│ ├── merge_semantic_and_instance_maps_op.cc
│ ├── merge_semantic_and_instance_maps_op_kernel.cc
│ ├── merge_semantic_and_instance_maps_op_kernel.cu.cc
│ └── merge_semantic_and_instance_maps_op_kernel.h
└── python
│ ├── kernel_tests
│ ├── __init__.py
│ └── merge_semantic_and_instance_maps_op_test.py
│ └── ops
│ ├── __init__.py
│ └── merge_semantic_and_instance_maps_op.py
├── tracker
├── __init__.py
├── iou_tracker.py
└── optical_flow_utils.py
├── trainer.proto
├── trainer
├── __init__.py
├── distribution_utils.py
├── evaluator.py
├── evaluator_test.py
├── runner_utils.py
├── runner_utils_test.py
├── train.py
├── train_lib.py
├── trainer.py
├── trainer_utils.py
├── vis.py
└── vis_utils.py
├── utils
├── __init__.py
├── coco_tools.py
├── coco_tools_test.py
├── create_images_json_for_cityscapes.py
├── hparam_configs.py
├── hparam_configs_test.py
├── net_surgery_convert_last_layer.py
├── panoptic_cityscapes_categories.json
├── panoptic_instances.py
├── panoptic_instances_test.py
└── test_utils.py
└── video
├── __init__.py
├── motion_deeplab.py
└── vip_deeplab.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Caching.
2 | __pycache__/
3 |
4 | # IDE settings.
5 | .vscode/
6 | .idea/
7 | .env
8 | .config/
9 |
10 | # Generated proto files.
11 | *_pb2.py
12 |
13 | # For mac
14 | *.DS_Store
15 |
16 | # Generated files
17 | *.o
18 | *.so
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/common_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for common.py."""
17 | import tensorflow as tf
18 |
19 | from deeplab2 import common
20 |
21 |
22 | class CommonTest(tf.test.TestCase):
23 |
24 | def test_constants_keys(self):
25 | self.assertEqual(common.PRED_PANOPTIC_KEY, 'panoptic_pred')
26 | self.assertEqual(common.PRED_SEMANTIC_KEY, 'semantic_pred')
27 | self.assertEqual(common.PRED_INSTANCE_CENTER_KEY, 'instance_center_pred')
28 | self.assertEqual(common.PRED_INSTANCE_KEY, 'instance_pred')
29 |
30 | self.assertEqual(common.PRED_SEMANTIC_LOGITS_KEY, 'semantic_logits')
31 | self.assertEqual(common.PRED_CENTER_HEATMAP_KEY, 'center_heatmap')
32 | self.assertEqual(common.PRED_OFFSET_MAP_KEY, 'offset_map')
33 | self.assertEqual(common.PRED_FRAME_OFFSET_MAP_KEY, 'frame_offset_map')
34 |
35 | self.assertEqual(common.GT_PANOPTIC_KEY, 'panoptic_gt')
36 | self.assertEqual(common.GT_SEMANTIC_KEY, 'semantic_gt')
37 | self.assertEqual(common.GT_INSTANCE_CENTER_KEY, 'instance_center_gt')
38 | self.assertEqual(common.GT_FRAME_OFFSET_KEY, 'frame_offset_gt')
39 | self.assertEqual(common.GT_INSTANCE_REGRESSION_KEY,
40 | 'instance_regression_gt')
41 | self.assertEqual(common.GT_PANOPTIC_RAW, 'panoptic_raw')
42 | self.assertEqual(common.GT_SEMANTIC_RAW, 'semantic_raw')
43 | self.assertEqual(common.GT_SIZE_RAW, 'size_raw')
44 |
45 | self.assertEqual(common.SEMANTIC_LOSS_WEIGHT_KEY, 'semantic_loss_weight')
46 | self.assertEqual(common.CENTER_LOSS_WEIGHT_KEY, 'center_loss_weight')
47 | self.assertEqual(common.REGRESSION_LOSS_WEIGHT_KEY,
48 | 'regression_loss_weight')
49 | self.assertEqual(common.FRAME_REGRESSION_LOSS_WEIGHT_KEY,
50 | 'frame_regression_loss_weight')
51 |
52 | self.assertEqual(common.RESIZED_IMAGE, 'resized_image')
53 | self.assertEqual(common.IMAGE, 'image')
54 | self.assertEqual(common.IMAGE_NAME, 'image_name')
55 | self.assertEqual(common.SEQUENCE_ID, 'sequence_id')
56 | self.assertEqual(common.FRAME_ID, 'frame_id')
57 |
58 | self.assertEqual(common.KEY_FRAME_ID, 'video/frame_id')
59 | self.assertEqual(common.KEY_SEQUENCE_ID, 'video/sequence_id')
60 | self.assertEqual(common.KEY_LABEL_FORMAT, 'image/segmentation/class/format')
61 | self.assertEqual(common.KEY_ENCODED_PREV_LABEL,
62 | 'prev_image/segmentation/class/encoded')
63 | self.assertEqual(common.KEY_ENCODED_LABEL,
64 | 'image/segmentation/class/encoded')
65 | self.assertEqual(common.KEY_IMAGE_CHANNELS, 'image/channels')
66 | self.assertEqual(common.KEY_IMAGE_WIDTH, 'image/width')
67 | self.assertEqual(common.KEY_IMAGE_HEIGHT, 'image/height')
68 | self.assertEqual(common.KEY_IMAGE_FORMAT, 'image/format')
69 | self.assertEqual(common.KEY_IMAGE_FILENAME, 'image/filename')
70 | self.assertEqual(common.KEY_ENCODED_PREV_IMAGE, 'prev_image/encoded')
71 | self.assertEqual(common.KEY_ENCODED_IMAGE, 'image/encoded')
72 |
73 | def test_multicamera_keys(self):
74 | test_camera_name = 'front'
75 | expected = {
76 | common.KEY_PER_CAMERA_ENCODED_IMAGE:
77 | 'image/encoded/%s',
78 | common.KEY_PER_CAMERA_ENCODED_NEXT_IMAGE:
79 | 'next_image/encoded/%s',
80 | common.KEY_PER_CAMERA_IMAGE_HEIGHT:
81 | 'image/height/%s',
82 | common.KEY_PER_CAMERA_IMAGE_WIDTH:
83 | 'image/width/%s',
84 | common.KEY_PER_CAMERA_ENCODED_LABEL:
85 | 'image/segmentation/class/encoded/%s',
86 | common.KEY_PER_CAMERA_ENCODED_NEXT_LABEL:
87 | 'next_image/segmentation/class/encoded/%s',
88 | common.KEY_PER_CAMERA_ENCODED_DEPTH:
89 | 'image/depth/encoded/%s',
90 | }
91 | for key, val in expected.items():
92 | self.assertEqual(key % test_camera_name, val % test_camera_name)
93 |
94 |
95 | if __name__ == '__main__':
96 | tf.test.main()
97 |
--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The Deeplab2 Authors.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Quick start command line to setup deeplab2 (Linux only).
16 | # Example command to run:
17 | # deeplab2/compile.sh ${PATH_TO_PROTOC}
18 | #
19 | # This script assumes that the following folder structure:
20 | #
21 | # + root
22 | # + deeplab2
23 | # + models
24 | # + orbit
25 | # + cocoapi
26 | # + PythonAPI
27 | #
28 | # Besides, the script also assumes that `protoc` can be accessed from command
29 | # line.
30 |
31 | #!/bin/bash
32 |
33 | set -e
34 |
35 | # cpu or gpu
36 | CONFIG="cpu"
37 |
38 | function tolower() {
39 | echo "${1,,}"
40 | }
41 |
42 | if [[ ! -z "$1" ]]
43 | then
44 | echo "Setting configuration from argument($1)..."
45 | CONFIG=$(tolower "$1")
46 | if [ "$CONFIG" != "cpu" ] && [ "$CONFIG" != "gpu" ]
47 | then
48 | echo "Configuration must be either \"cpu\" or \"gpu\", exiting..."
49 | exit 1
50 | fi
51 | fi
52 |
53 | echo "Running configuration with $CONFIG."
54 |
55 | # Protobuf compilation
56 | # Replace `protoc` with `${PATH_TO_PROTOC}` if protobuf compilier is downloaded
57 | # from web.
58 | echo "-----------------------------------------------------------------------"
59 | echo "Compiling protobuf..."
60 | echo "-----------------------------------------------------------------------"
61 | protoc deeplab2/*.proto --python_out=.
62 |
63 | # Compile custom ops
64 | # See details in https://www.tensorflow.org/guide/create_op#compile_the_op_using_your_system_compiler_tensorflow_binary_installation
65 | TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
66 | TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
67 | OP_NAME='deeplab2/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op'
68 |
69 | if [ "$CONFIG" == "cpu" ]
70 | then
71 | # CPU
72 | echo "-----------------------------------------------------------------------"
73 | echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (CPU)..."
74 | echo "-----------------------------------------------------------------------"
75 | g++ -std=c++14 -shared \
76 | ${OP_NAME}.cc ${OP_NAME}_kernel.cc -o ${OP_NAME}.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
77 | else
78 | # GPU
79 | # (https://www.tensorflow.org/guide/create_op#compiling_the_kernel_for_the_gpu_device)
80 | echo "-----------------------------------------------------------------------"
81 | echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (GPU)..."
82 | echo "-----------------------------------------------------------------------"
83 | nvcc -std=c++14 -c -o ${OP_NAME}_kernel.cu.o \
84 | ${OP_NAME}_kernel.cu.cc \
85 | ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
86 |
87 | g++ -std=c++14 -shared -o ${OP_NAME}.so ${OP_NAME}.cc ${OP_NAME}_kernel.cc \
88 | ${OP_NAME}_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
89 | fi
90 |
91 | # PYTHONPATH
92 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/models:`pwd`/cocoapi/PythonAPI
93 |
94 | # Runing test
95 | echo "-----------------------------------------------------------------------"
96 | echo "Running tests for merge_semantic_and_instance_maps_op..."
97 | echo "-----------------------------------------------------------------------"
98 | python deeplab2/tensorflow_ops/python/kernel_tests/merge_semantic_and_instance_maps_op_test.py
99 |
100 | # End-to-end tests
101 | echo "-----------------------------------------------------------------------"
102 | echo "Running end-to-end tests..."
103 | echo "-----------------------------------------------------------------------"
104 |
105 | # Model training test (test for custom ops, protobug)
106 | python deeplab2/model/deeplab_test.py
107 |
108 | # Model evaluation test (test for other packages such as orbit, cocoapi, etc)
109 | python deeplab2/trainer/evaluator_test.py
110 |
111 | echo "------------------------"
112 | echo "Done with configuration!"
113 | echo "------------------------"
114 |
115 |
--------------------------------------------------------------------------------
/config.proto:
--------------------------------------------------------------------------------
1 | // Copyright 2023 The Deeplab2 Authors.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | syntax = "proto2";
16 |
17 | package deeplab2;
18 |
19 | import public 'deeplab2/dataset.proto';
20 | import public 'deeplab2/evaluator.proto';
21 | import public 'deeplab2/model.proto';
22 | import public 'deeplab2/trainer.proto';
23 |
24 | option java_multiple_files = true;
25 |
26 | // Configure experiment options.
27 | message ExperimentOptions {
28 | // Set the experiment name.
29 | optional string experiment_name = 1;
30 | // Set the options for the model.
31 | optional ModelOptions model_options = 2;
32 | // Set the options for the trainer.
33 | optional TrainerOptions trainer_options = 3;
34 | // Set the options for the training dataset.
35 | optional DatasetOptions train_dataset_options = 4;
36 | // Set the options for the evaluator.
37 | optional EvaluatorOptions evaluator_options = 5;
38 | // Set the options for the validation dataset.
39 | optional DatasetOptions eval_dataset_options = 6;
40 | }
41 |
--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 | #
4 | # Panoptic-DeepLab with ResNet-50 and output stride 32.
5 | #
6 | ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7 | # Before using this config, you need to update the following fields:
8 | # - experiment_name: Use a unique experiment name for each experiment.
9 | # - initial_checkpoint: Update the path to the initial checkpoint.
10 | # - train_dataset_options.file_pattern: Update the path to the
11 | # training set. e.g., your_dataset/train*.tfrecord
12 | # - eval_dataset_options.file_pattern: Update the path to the
13 | # validation set, e.g., your_dataset/eval*.tfrecord
14 | #########################################################################
15 | #
16 | # This config provides an example of training Panoptic-DeepLab with ONLY
17 | # semantic segmentation (i.e., the instance/panoptic segmentation is not
18 | # trained). This could be used for some datasets that provide only
19 | # semantic segmentation annotations.
20 | #
21 | # For ResNet, see
22 | # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
23 | # In CVPR, 2016.
24 | # For Panoptic-DeepLab, see
25 | # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
26 | # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
27 |
28 | # Use a unique experiment_name for each experiment.
29 | experiment_name: "${EXPERIMENT_NAME}"
30 | model_options {
31 | # Update the path to the initial checkpoint (e.g., ImageNet
32 | # pretrained checkpoint).
33 | initial_checkpoint: "${INIT_CHECKPOINT}"
34 | backbone {
35 | name: "resnet50"
36 | output_stride: 32
37 | }
38 | decoder {
39 | feature_key: "res5"
40 | decoder_channels: 256
41 | aspp_channels: 256
42 | atrous_rates: 3
43 | atrous_rates: 6
44 | atrous_rates: 9
45 | }
46 | panoptic_deeplab {
47 | low_level {
48 | feature_key: "res3"
49 | channels_project: 64
50 | }
51 | low_level {
52 | feature_key: "res2"
53 | channels_project: 32
54 | }
55 | instance {
56 | enable: false
57 | }
58 | semantic_head {
59 | output_channels: 19
60 | head_channels: 256
61 | }
62 | }
63 | }
64 | trainer_options {
65 | save_checkpoints_steps: 1000
66 | save_summaries_steps: 100
67 | steps_per_loop: 100
68 | loss_options {
69 | semantic_loss {
70 | name: "softmax_cross_entropy"
71 | weight: 1.0
72 | top_k_percent: 0.2
73 | }
74 | }
75 | solver_options {
76 | base_learning_rate: 0.0005
77 | training_number_of_steps: 60000
78 | }
79 | }
80 | train_dataset_options {
81 | dataset: "cityscapes_panoptic"
82 | # Update the path to training set.
83 | file_pattern: "${TRAIN_SET}"
84 | # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
85 | # Also see Q1 in g3doc/faq.md.
86 | batch_size: 8
87 | crop_size: 1025
88 | crop_size: 2049
89 | # Skip resizing.
90 | min_resize_value: 0
91 | max_resize_value: 0
92 | augmentations {
93 | min_scale_factor: 0.5
94 | max_scale_factor: 2.0
95 | scale_factor_step_size: 0.1
96 | }
97 | }
98 | eval_dataset_options {
99 | dataset: "cityscapes_panoptic"
100 | # Update the path to validation set.
101 | file_pattern: "${VAL_SET}"
102 | batch_size: 1
103 | crop_size: 1025
104 | crop_size: 2049
105 | # Skip resizing.
106 | min_resize_value: 0
107 | max_resize_value: 0
108 | }
109 | evaluator_options {
110 | continuous_eval_timeout: -1
111 | save_predictions: true
112 | save_raw_predictions: false
113 | }
114 |
--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "res5"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "resnet50"
14 | }
15 |
16 | # Example for cityscapes.
17 | deeplab_v3 {
18 | num_classes: 19
19 | }
20 | }
21 |
22 | train_dataset_options {
23 | crop_size: 1025
24 | crop_size: 2049
25 | }
26 |
--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3_mv3l.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "res5"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "mobilenet_v3_large"
14 | use_squeeze_and_excite: true
15 | }
16 |
17 | # Example for cityscapes.
18 | deeplab_v3 {
19 | num_classes: 19
20 | }
21 | }
22 |
23 | train_dataset_options {
24 | crop_size: 1025
25 | crop_size: 2049
26 | }
27 |
--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3plus.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "res5"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "resnet50"
14 | }
15 |
16 | deeplab_v3_plus {
17 | low_level {
18 | feature_key: "res2"
19 | channels_project: 48
20 | }
21 | # Example for cityscapes.
22 | num_classes: 19
23 | }
24 | }
25 |
26 | train_dataset_options {
27 | crop_size: 1025
28 | crop_size: 2049
29 | }
30 |
--------------------------------------------------------------------------------
/configs/example/example_cityscapes_panoptic_deeplab.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "res5"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "resnet50"
14 | }
15 |
16 | panoptic_deeplab {
17 | low_level {
18 | feature_key: "res3"
19 | channels_project: 64
20 | }
21 | low_level {
22 | feature_key: "res2"
23 | channels_project: 32
24 | }
25 | semantic_head {
26 | # Example for cityscapes.
27 | output_channels: 19
28 | head_channels: 256
29 | }
30 | instance {
31 | instance_decoder_override {
32 | feature_key: "res5"
33 | decoder_channels: 128
34 | atrous_rates: 6
35 | atrous_rates: 12
36 | atrous_rates: 18
37 | }
38 | low_level_override {
39 | feature_key: "res3"
40 | channels_project: 32
41 | }
42 | low_level_override {
43 | feature_key: "res2"
44 | channels_project: 16
45 | }
46 | center_head {
47 | output_channels: 1
48 | head_channels: 32
49 | }
50 | regression_head {
51 | output_channels: 2
52 | head_channels: 32
53 | }
54 | }
55 | }
56 | }
57 |
58 | train_dataset_options {
59 | crop_size: 1025
60 | crop_size: 2049
61 | }
62 |
--------------------------------------------------------------------------------
/configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "res5"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "mobilenet_v3_large"
14 | use_squeeze_and_excite: true
15 | }
16 |
17 | panoptic_deeplab {
18 | low_level {
19 | feature_key: "res3"
20 | channels_project: 64
21 | }
22 | low_level {
23 | feature_key: "res2"
24 | channels_project: 32
25 | }
26 | semantic_head {
27 | # Example for cityscapes.
28 | output_channels: 19
29 | head_channels: 256
30 | }
31 | instance {
32 | instance_decoder_override {
33 | feature_key: "res5"
34 | decoder_channels: 128
35 | atrous_rates: 6
36 | atrous_rates: 12
37 | atrous_rates: 18
38 | }
39 | low_level_override {
40 | feature_key: "res3"
41 | channels_project: 32
42 | }
43 | low_level_override {
44 | feature_key: "res2"
45 | channels_project: 16
46 | }
47 | center_head {
48 | output_channels: 1
49 | head_channels: 32
50 | }
51 | regression_head {
52 | output_channels: 2
53 | head_channels: 32
54 | }
55 | }
56 | }
57 | }
58 |
59 | train_dataset_options {
60 | crop_size: 1025
61 | crop_size: 2049
62 | }
63 |
--------------------------------------------------------------------------------
/configs/example/example_coco_kmax_meta_convnext.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "feature_semantic"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "kmax_convnext_base"
14 | drop_path_keep_prob: 0.5
15 | }
16 |
17 | max_deeplab {
18 | pixel_space_head {
19 | output_channels: 128
20 | head_channels: 256
21 | }
22 | auxiliary_low_level {
23 | feature_key: "res3"
24 | channels_project: 64
25 | }
26 | auxiliary_low_level {
27 | feature_key: "res2"
28 | channels_project: 32
29 | }
30 | auxiliary_semantic_head {
31 | # Example for COCO.
32 | output_channels: 134
33 | head_channels: 256
34 | }
35 | }
36 | }
37 |
38 | train_dataset_options {
39 | crop_size: 65
40 | crop_size: 65
41 | }
42 |
--------------------------------------------------------------------------------
/configs/example/example_coco_max_deeplab.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/config.proto
2 | # proto-message: ExperimentOptions
3 |
4 | model_options {
5 | decoder {
6 | feature_key: "feature_semantic"
7 | atrous_rates: 6
8 | atrous_rates: 12
9 | atrous_rates: 18
10 | }
11 |
12 | backbone {
13 | name: "max_deeplab_s"
14 | output_stride: 16
15 | }
16 |
17 | max_deeplab {
18 | pixel_space_head {
19 | output_channels: 128
20 | head_channels: 256
21 | }
22 | auxiliary_low_level {
23 | feature_key: "res3"
24 | channels_project: 64
25 | }
26 | auxiliary_low_level {
27 | feature_key: "res2"
28 | channels_project: 32
29 | }
30 | auxiliary_semantic_head {
31 | # Example for COCO.
32 | output_channels: 134
33 | head_channels: 256
34 | }
35 | }
36 | }
37 |
38 | train_dataset_options {
39 | crop_size: 65
40 | crop_size: 65
41 | }
42 |
--------------------------------------------------------------------------------
/configs/example/example_kitti-step_motion_deeplab.textproto:
--------------------------------------------------------------------------------
1 | # proto-file: deeplab2/model.proto
2 | # proto-message: ModelOptions
3 |
4 | decoder {
5 | feature_key: "res5"
6 | atrous_rates: 6
7 | atrous_rates: 12
8 | atrous_rates: 18
9 | }
10 |
11 | backbone {
12 | name: "resnet50"
13 | }
14 |
15 | # Motion-Deeplab adopts Panoptic-Deeplab for the task of Video Panoptic
16 | # Segmentation or Segmenting and Tracking Every Pixel (STEP).
17 | motion_deeplab {
18 | low_level {
19 | feature_key: "res3"
20 | channels_project: 64
21 | }
22 | low_level {
23 | feature_key: "res2"
24 | channels_project: 32
25 | }
26 | semantic_head {
27 | # Example for KITTI-STEP.
28 | output_channels: 19
29 | head_channels: 256
30 | }
31 | instance {
32 | instance_decoder_override {
33 | feature_key: "res5"
34 | decoder_channels: 128
35 | atrous_rates: 6
36 | atrous_rates: 12
37 | atrous_rates: 18
38 | }
39 | low_level_override {
40 | feature_key: "res3"
41 | channels_project: 32
42 | }
43 | low_level_override {
44 | feature_key: "res2"
45 | channels_project: 16
46 | }
47 | center_head {
48 | output_channels: 1
49 | head_channels: 32
50 | }
51 | regression_head {
52 | output_channels: 2
53 | head_channels: 32
54 | }
55 | }
56 | motion_head {
57 | output_channels: 2
58 | head_channels: 32
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/data/build_cityscapes_data_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for build_cityscapes_data."""
17 |
18 | import os
19 |
20 | from absl import flags
21 | import numpy as np
22 | from PIL import Image
23 | import tensorflow as tf
24 |
25 | from deeplab2.data import build_cityscapes_data
26 |
27 |
28 | FLAGS = flags.FLAGS
29 | _TEST_DATA_DIR = 'deeplab2/data/testdata'
30 | _TEST_FILE_PREFIX = 'dummy_000000_000000'
31 |
32 |
33 | class BuildCityscapesDataTest(tf.test.TestCase):
34 |
35 | def test_read_segments(self):
36 | cityscapes_root = os.path.join(_TEST_DATA_DIR)
37 | segments_dict = build_cityscapes_data._read_segments(
38 | cityscapes_root, dataset_split='dummy')
39 | self.assertIn(_TEST_FILE_PREFIX, segments_dict)
40 | _, segments = segments_dict[_TEST_FILE_PREFIX]
41 | self.assertLen(segments, 10)
42 |
43 | def test_generate_panoptic_label(self):
44 | FLAGS.treat_crowd_as_ignore = False # Test a more complicated setting
45 | cityscapes_root = os.path.join(_TEST_DATA_DIR)
46 | segments_dict = build_cityscapes_data._read_segments(
47 | cityscapes_root, dataset_split='dummy')
48 | annotation_file_name, segments = segments_dict[_TEST_FILE_PREFIX]
49 | panoptic_annotation_file = build_cityscapes_data._get_panoptic_annotation(
50 | cityscapes_root, dataset_split='dummy',
51 | annotation_file_name=annotation_file_name)
52 | panoptic_label = build_cityscapes_data._generate_panoptic_label(
53 | panoptic_annotation_file, segments)
54 |
55 | # Check panoptic label matches golden file.
56 | golden_file_path = os.path.join(_TEST_DATA_DIR,
57 | 'dummy_gt_for_vps.png')
58 | with tf.io.gfile.GFile(golden_file_path, 'rb') as f:
59 | golden_label = Image.open(f)
60 | # The PNG file is encoded by:
61 | # color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
62 | golden_label = np.dot(np.asarray(golden_label), [1, 256, 256 * 256])
63 |
64 | np.testing.assert_array_equal(panoptic_label, golden_label)
65 |
66 | if __name__ == '__main__':
67 | tf.test.main()
68 |
--------------------------------------------------------------------------------
/data/data_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for data_utils."""
17 |
18 | import io
19 | import numpy as np
20 | from PIL import Image
21 | import tensorflow as tf
22 |
23 | from deeplab2.data import data_utils
24 |
25 |
26 | def _encode_png_image(image):
27 | """Helper method to encode input image in PNG format."""
28 | buffer = io.BytesIO()
29 | Image.fromarray(image).save(buffer, format='png')
30 | return buffer.getvalue()
31 |
32 |
33 | class DataUtilsTest(tf.test.TestCase):
34 |
35 | def _create_test_image(self, height, width):
36 | rng = np.random.RandomState(319281498)
37 | return rng.randint(0, 255, size=(height, width, 3), dtype=np.uint8)
38 |
39 | def test_encode_and_decode(self):
40 | """Checks decode created tf.Example for semantic segmentation."""
41 | test_image_height = 20
42 | test_image_width = 15
43 | filename = 'dummy'
44 |
45 | image = self._create_test_image(test_image_height, test_image_width)
46 | # Take the last channel as dummy label.
47 | label = image[..., 0]
48 |
49 | example = data_utils.create_tfexample(
50 | image_data=_encode_png_image(image),
51 | image_format='png', filename=filename,
52 | label_data=_encode_png_image(label), label_format='png')
53 |
54 | # Parse created example, expect getting identical results.
55 | parser = data_utils.SegmentationDecoder(is_panoptic_dataset=False)
56 | parsed_tensors = parser(example.SerializeToString())
57 |
58 | self.assertIn('image', parsed_tensors)
59 | self.assertIn('image_name', parsed_tensors)
60 | self.assertIn('label', parsed_tensors)
61 | self.assertEqual(filename, parsed_tensors['image_name'])
62 | np.testing.assert_array_equal(image, parsed_tensors['image'].numpy())
63 | # Decoded label is a 3-D array with last dimension of 1.
64 | decoded_label = parsed_tensors['label'].numpy()
65 | np.testing.assert_array_equal(label, decoded_label[..., 0])
66 |
67 | def test_encode_and_decode_panoptic(self):
68 | test_image_height = 31
69 | test_image_width = 17
70 | filename = 'dummy'
71 |
72 | image = self._create_test_image(test_image_height, test_image_width)
73 | # Create dummy panoptic label in np.int32 dtype.
74 | label = np.dot(image.astype(np.int32), [1, 256, 256 * 256]).astype(np.int32)
75 | example = data_utils.create_tfexample(
76 | image_data=_encode_png_image(image),
77 | image_format='png', filename=filename,
78 | label_data=label.tostring(), label_format='raw')
79 |
80 | parser = data_utils.SegmentationDecoder(is_panoptic_dataset=True)
81 | parsed_tensors = parser(example.SerializeToString())
82 |
83 | self.assertIn('image', parsed_tensors)
84 | self.assertIn('image_name', parsed_tensors)
85 | self.assertIn('label', parsed_tensors)
86 | self.assertEqual(filename, parsed_tensors['image_name'])
87 | np.testing.assert_array_equal(image, parsed_tensors['image'].numpy())
88 | # Decoded label is a 3-D array with last dimension of 1.
89 | decoded_label = parsed_tensors['label'].numpy()
90 | np.testing.assert_array_equal(label, decoded_label[..., 0])
91 |
92 |
93 | if __name__ == '__main__':
94 | tf.test.main()
95 |
--------------------------------------------------------------------------------
/data/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/data/dataset_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains utility function for handling the dataset."""
17 |
18 | import tensorflow as tf
19 |
20 |
21 | def get_semantic_and_panoptic_label(dataset_info, label, ignore_label):
22 | """Helper function to get semantic and panoptic label from panoptic label.
23 |
24 | This functions gets the semantic and panoptic label from panoptic label for
25 | different datasets. The labels must be encoded with semantic_label *
26 | label_divisor + instance_id. For thing classes, the instance ID 0 is reserved
27 | for crowd regions. Please note, the returned panoptic label has replaced
28 | the crowd region with ignore regions. Yet, the semantic label makes use of
29 | these regions.
30 |
31 | Args:
32 | dataset_info: A dictionary storing dataset information.
33 | label: A Tensor of panoptic label.
34 | ignore_label: An integer specifying the ignore_label.
35 |
36 | Returns:
37 | semantic_label: A Tensor of semantic segmentation label.
38 | panoptic_label: A Tensor of panoptic segmentation label, which follows the
39 | Cityscapes annotation where
40 | panoptic_label = semantic_label * panoptic_label_divisor + instance_id.
41 | thing_mask: A boolean Tensor specifying the thing regions. Zero if no thing.
42 | crowd_region: A boolean Tensor specifying crowd region. Zero if no crowd
43 | annotation.
44 |
45 | Raises:
46 | ValueError: An error occurs when the ignore_label is not in range
47 | [0, label_divisor].
48 | """
49 | panoptic_label_divisor = dataset_info['panoptic_label_divisor']
50 | if ignore_label >= panoptic_label_divisor or ignore_label < 0:
51 | raise ValueError('The ignore_label must be in [0, label_divisor].')
52 |
53 | semantic_label = label // panoptic_label_divisor
54 | # Find iscrowd region if any and set to ignore for panoptic labels.
55 | # 1. Find thing mask.
56 | thing_mask = tf.zeros_like(semantic_label, tf.bool)
57 | for thing_id in dataset_info['class_has_instances_list']:
58 | thing_mask = tf.logical_or(
59 | thing_mask,
60 | tf.equal(semantic_label, thing_id))
61 | # 2. Find crowd region (thing label that have instance_id == 0).
62 | crowd_region = tf.logical_and(
63 | thing_mask,
64 | tf.equal(label % panoptic_label_divisor, 0))
65 | # 3. Set crowd region to ignore label.
66 | panoptic_label = tf.where(
67 | crowd_region,
68 | tf.ones_like(label) * ignore_label * panoptic_label_divisor,
69 | label)
70 |
71 | return semantic_label, panoptic_label, thing_mask, crowd_region
72 |
--------------------------------------------------------------------------------
/data/dataset_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for dataset_utils."""
17 |
18 | import numpy as np
19 | import tensorflow as tf
20 |
21 | from deeplab2.data import dataset_utils
22 |
23 |
24 | class DatasetUtilsTest(tf.test.TestCase):
25 |
26 | def _get_test_labels(self, num_classes, shape, label_divisor):
27 | num_ids_per_class = 35
28 | semantic_labels = np.random.randint(num_classes, size=shape)
29 | panoptic_labels = np.random.randint(
30 | num_ids_per_class, size=shape) + semantic_labels * label_divisor
31 |
32 | semantic_labels = tf.convert_to_tensor(semantic_labels, dtype=tf.int32)
33 | panoptic_labels = tf.convert_to_tensor(panoptic_labels, dtype=tf.int32)
34 |
35 | return panoptic_labels, semantic_labels
36 |
37 | def setUp(self):
38 | super().setUp()
39 | self._first_thing_class = 9
40 | self._num_classes = 19
41 | self._dataset_info = {
42 | 'panoptic_label_divisor': 1000,
43 | 'class_has_instances_list': tf.range(self._first_thing_class,
44 | self._num_classes)
45 | }
46 | self._num_ids = 37
47 | self._labels, self._semantic_classes = self._get_test_labels(
48 | self._num_classes, [2, 33, 33],
49 | self._dataset_info['panoptic_label_divisor'])
50 |
51 | def test_get_panoptic_and_semantic_label(self):
52 | # Note: self._labels contains one crowd instance per class.
53 | (returned_sem_labels, returned_pan_labels, returned_thing_mask,
54 | returned_crowd_region) = (
55 | dataset_utils.get_semantic_and_panoptic_label(
56 | self._dataset_info, self._labels, ignore_label=255))
57 |
58 | expected_semantic_labels = self._semantic_classes
59 | condition = self._labels % self._dataset_info['panoptic_label_divisor'] == 0
60 | condition = tf.logical_and(
61 | condition,
62 | tf.math.greater_equal(expected_semantic_labels,
63 | self._first_thing_class))
64 | expected_crowd_labels = tf.where(condition, 1.0, 0.0)
65 | expected_pan_labels = tf.where(
66 | condition, 255 * self._dataset_info['panoptic_label_divisor'],
67 | self._labels)
68 | expected_thing_mask = tf.where(
69 | tf.math.greater_equal(expected_semantic_labels,
70 | self._first_thing_class), 1.0, 0.0)
71 |
72 | self.assertListEqual(returned_sem_labels.shape.as_list(),
73 | expected_semantic_labels.shape.as_list())
74 | self.assertListEqual(returned_pan_labels.shape.as_list(),
75 | expected_pan_labels.shape.as_list())
76 | self.assertListEqual(returned_crowd_region.shape.as_list(),
77 | expected_crowd_labels.shape.as_list())
78 | self.assertListEqual(returned_thing_mask.shape.as_list(),
79 | expected_thing_mask.shape.as_list())
80 | np.testing.assert_equal(returned_sem_labels.numpy(),
81 | expected_semantic_labels.numpy())
82 | np.testing.assert_equal(returned_pan_labels.numpy(),
83 | expected_pan_labels.numpy())
84 | np.testing.assert_equal(returned_crowd_region.numpy(),
85 | expected_crowd_labels.numpy())
86 | np.testing.assert_equal(returned_thing_mask.numpy(),
87 | expected_thing_mask.numpy())
88 |
89 | if __name__ == '__main__':
90 | tf.test.main()
91 |
--------------------------------------------------------------------------------
/data/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_policy.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """AutoAugment policy file.
17 |
18 | This file contains found auto-augment policy.
19 |
20 | Please cite or refer to the following papers for details:
21 | - Ekin D Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, and Quoc V Le.
22 | "Autoaugment: Learning augmentation policies from data." In CVPR, 2019.
23 |
24 | - Ekin D Cubuk, Barret Zoph, Jonathon Shlens, and Quoc V Le.
25 | "Randaugment: Practical automated data augmentation with a reduced search
26 | space." In CVPR, 2020.
27 | """
28 |
29 | # Reduced augmentation operation space.
30 | augmentation_reduced_operations = (
31 | 'AutoContrast', 'Equalize', 'Invert', 'Posterize',
32 | 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness')
33 |
34 | augmentation_probabilities = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
35 |
36 |
37 | def convert_policy(policy,
38 | search_space=augmentation_reduced_operations,
39 | probability_scale=1.0,
40 | magnitude_scale=1):
41 | """Converts policy from a list of numbers."""
42 | if len(policy) % 6:
43 | raise ValueError('Policy length must be a multiple of 6.')
44 | num_policies = len(policy) // 6
45 | policy_list = [[] for _ in range(num_policies)]
46 | for n in range(num_policies):
47 | for i in range(2):
48 | operation_id, prob_id, magnitude = (
49 | policy[6 * n + i * 3 : 6 * n + (i + 1) * 3])
50 | policy_name = search_space[operation_id]
51 | policy_prob = (
52 | augmentation_probabilities[prob_id] * probability_scale)
53 | policy_list[n].append((policy_name,
54 | policy_prob,
55 | magnitude * magnitude_scale))
56 | return policy_list
57 |
58 |
59 | simple_classification_policy = [8, 2, 7, 7, 1, 10,
60 | 1, 0, 9, 6, 1, 10,
61 | 8, 1, 9, 5, 1, 9,
62 | 4, 1, 7, 1, 3, 9,
63 | 8, 1, 1, 1, 1, 7]
64 |
65 | # All available policies.
66 | available_policies = {
67 | 'simple_classification_policy_magnitude_scale_0.2': convert_policy(
68 | simple_classification_policy,
69 | augmentation_reduced_operations,
70 | magnitude_scale=0.2),
71 | 'simple_classification_policy': convert_policy(
72 | simple_classification_policy,
73 | augmentation_reduced_operations,
74 | magnitude_scale=1),
75 | }
76 |
--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_policy_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for autoaugment_policy.py."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.data.preprocessing import autoaugment_policy
21 |
22 |
23 | class AutoaugmentPolicyTest(tf.test.TestCase):
24 |
25 | def testConvertPolicy(self):
26 | policy = [5, 1, 10, 5, 3, 4,
27 | 6, 3, 7, 3, 3, 9,
28 | 2, 2, 8, 8, 2, 8,
29 | 1, 4, 9, 4, 5, 7,
30 | 6, 4, 1, 1, 3, 4]
31 | expected = [
32 | [('Color', 0.2, 10), ('Color', 0.6, 4)],
33 | [('Contrast', 0.6, 7), ('Posterize', 0.6, 9)],
34 | [('Invert', 0.4, 8), ('Sharpness', 0.4, 8)],
35 | [('Equalize', 0.8, 9), ('Solarize', 1.0, 7)],
36 | [('Contrast', 0.8, 1), ('Equalize', 0.6, 4)],
37 | ]
38 | policy_list = autoaugment_policy.convert_policy(policy)
39 | self.assertAllEqual(policy_list, expected)
40 |
41 |
42 | if __name__ == '__main__':
43 | tf.test.main()
44 |
--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for autoaugment_utils.py."""
17 |
18 | import numpy as np
19 | import tensorflow as tf
20 |
21 | from deeplab2.data.preprocessing import autoaugment_utils
22 |
23 |
24 | class AutoaugmentUtilsTest(tf.test.TestCase):
25 |
26 | def testAugmentWithNamedPolicy(self):
27 | num_classes = 3
28 | np_image = np.random.randint(256, size=(13, 13, 3))
29 | image = tf.constant(np_image, dtype=tf.uint8)
30 | np_label = np.random.randint(num_classes, size=(13, 13, 1))
31 | label = tf.constant(np_label, dtype=tf.int32)
32 | image, label = autoaugment_utils.distort_image_with_autoaugment(
33 | image, label, ignore_label=255,
34 | augmentation_name='simple_classification_policy')
35 | self.assertTrue(image.numpy().any())
36 | self.assertTrue(label.numpy().any())
37 |
38 |
39 | if __name__ == '__main__':
40 | tf.test.main()
41 |
--------------------------------------------------------------------------------
/data/testdata/dummy_gt_for_vps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_gt_for_vps.png
--------------------------------------------------------------------------------
/data/testdata/dummy_prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_prediction.png
--------------------------------------------------------------------------------
/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId.json:
--------------------------------------------------------------------------------
1 | {
2 | "annotations": [
3 | {
4 | "file_name": "dummy_000000_000000_gtFine_panoptic.png",
5 | "image_id": "dummy_000000_000000",
6 | "segments_info": [
7 | {
8 | "area": 958,
9 | "category_id": 13,
10 | "id": 26000,
11 | "iscrowd": 0
12 | },
13 | {
14 | "area": 6178,
15 | "category_id": 13,
16 | "id": 26,
17 | "iscrowd": 1
18 | },
19 | {
20 | "area": 10496,
21 | "category_id": 13,
22 | "id": 26001,
23 | "iscrowd": 0
24 | },
25 | {
26 | "area": 5534,
27 | "category_id": 13,
28 | "id": 26002,
29 | "iscrowd": 0
30 | },
31 | {
32 | "area": 32768,
33 | "category_id": 13,
34 | "id": 26003,
35 | "iscrowd": 0
36 | },
37 | {
38 | "area": 19906,
39 | "category_id": 13,
40 | "id": 26004,
41 | "iscrowd": 0
42 | },
43 | {
44 | "area": 15940,
45 | "category_id": 8,
46 | "id": 21,
47 | "iscrowd": 0
48 | },
49 | {
50 | "area": 278754,
51 | "category_id": 10,
52 | "id": 23,
53 | "iscrowd": 0
54 | },
55 | {
56 | "area": 222420,
57 | "category_id": 2,
58 | "id": 11,
59 | "iscrowd": 0
60 | },
61 | {
62 | "area": 46475,
63 | "category_id": 0,
64 | "id": 7,
65 | "iscrowd": 0
66 | }
67 | ]
68 | }
69 | ]
70 | }
--------------------------------------------------------------------------------
/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png
--------------------------------------------------------------------------------
/data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png
--------------------------------------------------------------------------------
/data/testdata/targets/center_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_target.png
--------------------------------------------------------------------------------
/data/testdata/targets/center_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_weights.png
--------------------------------------------------------------------------------
/data/testdata/targets/eval_is_crowd.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_is_crowd.npy
--------------------------------------------------------------------------------
/data/testdata/targets/eval_panoptic_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_panoptic_target.npy
--------------------------------------------------------------------------------
/data/testdata/targets/eval_semantic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_semantic_target.png
--------------------------------------------------------------------------------
/data/testdata/targets/is_crowd.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/is_crowd.npy
--------------------------------------------------------------------------------
/data/testdata/targets/offset_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_target.npy
--------------------------------------------------------------------------------
/data/testdata/targets/offset_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_weights.png
--------------------------------------------------------------------------------
/data/testdata/targets/panoptic_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.npy
--------------------------------------------------------------------------------
/data/testdata/targets/panoptic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.png
--------------------------------------------------------------------------------
/data/testdata/targets/rgb_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/rgb_target.png
--------------------------------------------------------------------------------
/data/testdata/targets/semantic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_target.png
--------------------------------------------------------------------------------
/data/testdata/targets/semantic_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_weights.png
--------------------------------------------------------------------------------
/data/testdata/targets/thing_id_class_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_class_target.npy
--------------------------------------------------------------------------------
/data/testdata/targets/thing_id_mask_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_mask_target.npy
--------------------------------------------------------------------------------
/data/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/data/waymo_constants.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Meta info of Waymo Open Dataset: Panoramic Video Panoptic Segmentation.
17 |
18 | Dataset website: https://waymo.com/open/
19 | GitHub: https://github.com/waymo-research/waymo-open-dataset
20 |
21 | References:
22 |
23 | - Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu,
24 | Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open Dataset:
25 | Panoramic Video Panoptic Segmentation." In ECCV, 2022.
26 | """
27 |
28 | from typing import Any, Sequence
29 |
30 | import immutabledict
31 |
32 | COLORMAP = "waymo"
33 | PANOPTIC_LABEL_DIVISOR = 100000
34 |
35 | IGNORE_LABEL_NAME = "unknown"
36 | IGNORE_LABEL = 0
37 |
38 | _WAYMO_COLORS = immutabledict.immutabledict({
39 | "bicycle": [119, 11, 32],
40 | "bird": [127, 96, 0],
41 | "building": [70, 70, 70],
42 | "bus": [0, 60, 100],
43 | "car": [0, 0, 142],
44 | "construction_cone_pole": [230, 145, 56],
45 | "cyclist": [255, 0, 0],
46 | "dynamic": [102, 102, 102],
47 | "ground": [102, 102, 102],
48 | "ground_animal": [91, 15, 0],
49 | "lane_marker": [234, 209, 220],
50 | "motorcycle": [0, 0, 230],
51 | "motorcyclist": [180, 0, 0],
52 | "other_large_vehicle": [61, 133, 198],
53 | "other_pedestrian_object": [234, 153, 153],
54 | "person": [220, 20, 60],
55 | "pole": [153, 153, 153],
56 | "road": [128, 64, 128],
57 | "road_marker": [217, 210, 233],
58 | "sdc": [102, 102, 102],
59 | "sidewalk": [244, 35, 232],
60 | "sign": [246, 178, 107],
61 | "sky": [70, 130, 180],
62 | "static": [102, 102, 102],
63 | "traffic_light": [250, 170, 30],
64 | "trailer": [111, 168, 220],
65 | "truck": [0, 0, 70],
66 | "unknown": [102, 102, 102],
67 | "vegetation": [107, 142, 35],
68 | })
69 |
70 | _WAYMO_CLASS_NAMES = [
71 | "unknown",
72 | "sdc",
73 | "car",
74 | "truck",
75 | "bus",
76 | "other_large_vehicle",
77 | "bicycle",
78 | "motorcycle",
79 | "trailer",
80 | "person",
81 | "cyclist",
82 | "motorcyclist",
83 | "bird",
84 | "ground_animal",
85 | "construction_cone_pole",
86 | "pole",
87 | "other_pedestrian_object",
88 | "sign",
89 | "traffic_light",
90 | "building",
91 | "road",
92 | "lane_marker",
93 | "road_marker",
94 | "sidewalk",
95 | "vegetation",
96 | "sky",
97 | "ground",
98 | "dynamic",
99 | "static",
100 | ]
101 |
102 | _IS_THINGS = [
103 | "car", "truck", "bus", "other_large_vehicle", "trailer", "person",
104 | "cyclist", "motorcyclist"
105 | ]
106 |
107 |
108 | def get_waymo_meta() -> Sequence[Any]:
109 | """Gets the meta info for waymo dataset."""
110 | meta = []
111 | for name_id, name in enumerate(_WAYMO_CLASS_NAMES):
112 | item = {
113 | "color": _WAYMO_COLORS[name],
114 | "name": name,
115 | "id": name_id,
116 | "isthing": int(name in _IS_THINGS)
117 | }
118 | meta.append(item)
119 | return meta
120 |
--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/evaluation/depth_metrics_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for depth metrics."""
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 | from deeplab2.evaluation import depth_metrics
21 |
22 |
23 | class DepthMetricsTest(tf.test.TestCase):
24 |
25 | def test_depth_metrics_on_single_image(self):
26 | gt = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114],
27 | [0, 39.68081126, 58.77974067, 0],
28 | [40.57883826, 22.15134852, 31.46813478, 13.52603324]])
29 | pred = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195],
30 | [13.76178147, 41.62431592, 56.97362032, 81.48369608],
31 | [43.12005689, 15.54622258, 24.1993478, 12.14451783]])
32 | depth_obj = depth_metrics.DepthMetrics()
33 | depth_obj.update_state(gt, pred)
34 | result = depth_obj.result().numpy()
35 | # The following numbers are manually computed.
36 | self.assertAlmostEqual(result[0], 14.154233, places=4)
37 | self.assertAlmostEqual(result[1], 0.0268667, places=4)
38 | self.assertAlmostEqual(result[2], 0.13191505, places=4)
39 | self.assertAlmostEqual(result[3], 0.7, places=4)
40 |
41 | def test_depth_metrics_on_multiple_images(self):
42 | depth_obj = depth_metrics.DepthMetrics()
43 | gt_1 = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114],
44 | [0, 39.68081126, 58.77974067, 0],
45 | [40.57883826, 22.15134852, 31.46813478, 13.52603324]])
46 | pred_1 = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195],
47 | [13.76178147, 41.62431592, 56.97362032, 81.48369608],
48 | [43.12005689, 15.54622258, 24.1993478, 12.14451783]])
49 | depth_obj.update_state(gt_1, pred_1)
50 | gt_2 = np.array(
51 | [[79.56192404, 25.68145225, 0, 39.88486608, 68.91602466],
52 | [79.53460057, 2.55741031, 36.05057241, 68.04747416, 3.7783227],
53 | [0, 0, 72.47336778, 59.02611644, 66.07499008],
54 | [25.88578395, 58.2202574, 27.39066477, 29.83094038, 37.99239669]])
55 | pred_2 = np.array(
56 | [[83.80952145, 27.23367361, 72.52687468, 35.28400183, 72.41126444],
57 | [77.62373864, 0.87004049, 32.1619225, 66.91361903, 2.60688436],
58 | [15.30294603, 9.76419241, 68.61650198, 57.14559324, 66.88452603],
59 | [24.54818109, 61.60855251, 31.50312052, 26.02325866, 36.4019569]])
60 | depth_obj.update_state(gt_2, pred_2)
61 | gt_3 = np.array([[50.80100791, 0.41130084, 58.85031668],
62 | [29.44932853, 23.48806627, 30.17890056]])
63 | pred_3 = np.array([[49.66563966, 0.62070026, 58.84231026],
64 | [32.26735775, 28.07405648, 33.7131882]])
65 | depth_obj.update_state(gt_3, pred_3)
66 | result = depth_obj.result().numpy()
67 | # The following numbers are manually computed.
68 | self.assertAlmostEqual(result[0], 18.442057, places=4)
69 | self.assertAlmostEqual(result[1], 0.0388692, places=4)
70 | self.assertAlmostEqual(result[2], 0.13392223, places=4)
71 | self.assertAlmostEqual(result[3], 0.8052287, places=4)
72 |
73 |
74 | if __name__ == '__main__':
75 | tf.test.main()
76 |
--------------------------------------------------------------------------------
/evaluation/numpy/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/evaluation/test_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for test_utils."""
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 | from deeplab2.evaluation import test_utils
21 |
22 |
23 | class TestUtilsTest(tf.test.TestCase):
24 |
25 | def test_read_test_image(self):
26 | image_array = test_utils.read_test_image('team_pred_class.png')
27 | self.assertSequenceEqual(image_array.shape, (231, 345, 4))
28 |
29 | def test_reads_segmentation_with_color_map(self):
30 | rgb_to_semantic_label = {(0, 0, 0): 0, (0, 0, 255): 1, (255, 0, 0): 23}
31 | labels = test_utils.read_segmentation_with_rgb_color_map(
32 | 'team_pred_class.png', rgb_to_semantic_label)
33 |
34 | input_image = test_utils.read_test_image('team_pred_class.png')
35 | np.testing.assert_array_equal(
36 | labels == 0,
37 | np.logical_and(input_image[:, :, 0] == 0, input_image[:, :, 2] == 0))
38 | np.testing.assert_array_equal(labels == 1, input_image[:, :, 2] == 255)
39 | np.testing.assert_array_equal(labels == 23, input_image[:, :, 0] == 255)
40 |
41 | def test_reads_gt_segmentation(self):
42 | instance_label_to_semantic_label = {
43 | 0: 0,
44 | 47: 1,
45 | 97: 1,
46 | 133: 1,
47 | 150: 1,
48 | 174: 1,
49 | 198: 23,
50 | 215: 1,
51 | 244: 1,
52 | 255: 1,
53 | }
54 | instances, classes = test_utils.panoptic_segmentation_with_class_map(
55 | 'team_gt_instance.png', instance_label_to_semantic_label)
56 |
57 | expected_label_shape = (231, 345)
58 | self.assertSequenceEqual(instances.shape, expected_label_shape)
59 | self.assertSequenceEqual(classes.shape, expected_label_shape)
60 | np.testing.assert_array_equal(instances == 0, classes == 0)
61 | np.testing.assert_array_equal(instances == 198, classes == 23)
62 | np.testing.assert_array_equal(
63 | np.logical_and(instances != 0, instances != 198), classes == 1)
64 |
65 |
66 | if __name__ == '__main__':
67 | tf.test.main()
68 |
--------------------------------------------------------------------------------
/evaluation/testdata/README.md:
--------------------------------------------------------------------------------
1 | # Segmentation Evalaution Test Data
2 |
3 | ## Source Images
4 |
5 | * [team_input.png](team_input.png) \
6 | Source:
7 | https://ai.googleblog.com/2018/03/semantic-image-segmentation-with.html
8 | * [cat_input.jpg](cat_input.jpg) \
9 | Source: https://www.flickr.com/photos/magdalena_b/4995858743
10 | * [bird_input.jpg](bird_input.jpg) \
11 | Source: https://www.flickr.com/photos/chivinskia/40619099560
12 |
--------------------------------------------------------------------------------
/evaluation/testdata/bird_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_gt.png
--------------------------------------------------------------------------------
/evaluation/testdata/bird_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_class.png
--------------------------------------------------------------------------------
/evaluation/testdata/bird_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_instance.png
--------------------------------------------------------------------------------
/evaluation/testdata/cat_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_gt.png
--------------------------------------------------------------------------------
/evaluation/testdata/cat_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_class.png
--------------------------------------------------------------------------------
/evaluation/testdata/cat_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_instance.png
--------------------------------------------------------------------------------
/evaluation/testdata/team_gt_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_gt_instance.png
--------------------------------------------------------------------------------
/evaluation/testdata/team_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_class.png
--------------------------------------------------------------------------------
/evaluation/testdata/team_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_instance.png
--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/axial_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/axial_block.png
--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/nonlocal_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/nonlocal_block.png
--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/position_sensitive_axial_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/position_sensitive_axial_block.png
--------------------------------------------------------------------------------
/g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png
--------------------------------------------------------------------------------
/g3doc/img/kmax_deeplab/kmax_decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/kmax_decoder.png
--------------------------------------------------------------------------------
/g3doc/img/max_deeplab/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview.png
--------------------------------------------------------------------------------
/g3doc/img/max_deeplab/overview_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview_simple.png
--------------------------------------------------------------------------------
/g3doc/img/moat/moat_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_block.png
--------------------------------------------------------------------------------
/g3doc/img/moat/moat_imagenet1k_224.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet1k_224.png
--------------------------------------------------------------------------------
/g3doc/img/moat/moat_imagenet22k_384.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet22k_384.png
--------------------------------------------------------------------------------
/g3doc/img/panoptic_deeplab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/panoptic_deeplab.png
--------------------------------------------------------------------------------
/g3doc/img/step/kitti_step_annotation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/step/kitti_step_annotation.png
--------------------------------------------------------------------------------
/g3doc/img/vip_deeplab/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/vip_deeplab/demo.gif
--------------------------------------------------------------------------------
/g3doc/projects/wod_pvps.md:
--------------------------------------------------------------------------------
1 | # Panoramic Video Panoptic Segmentation
2 |
3 | Waymo Open Dataset: Panoramic Video Panoptic Segmentation (WOD-PVPS) [1], is a
4 | large-scale dataset that offers high-quality multi-camera video panoptic
5 | segmentation labels for autonomous driving. The labels are consistent over time
6 | for video processing and consistent across multiple cameras mounted on the
7 | vehicles for full panoramic scene understanding.
8 |
9 | The new task of Panoramic Video Panoptic Segmentation requires generating dense
10 | panoptic segmentation predictions consistent in both time and multi-cameras. To
11 | build a baseline for such a challenging task, we extend the ViP-DeepLab [2] to
12 | the multi-camera setting.
13 |
14 | ## Prerequisite
15 |
16 | 1. Make sure the software is properly [installed](../setup/installation.md).
17 |
18 | 2. Make sure the
19 | [target dataset](https://waymo.com/open/data/perception/#2d-video-panoptic-segmentation)
20 | is correctly prepared.
21 |
22 | ## Model Zoo
23 |
24 | ## Citing WOD-PVPS
25 |
26 | If you find this code helpful in your research or wish to refer to the baseline
27 | results, please use the following BibTeX entry.
28 |
29 | * Waymo Open Dataset: Panoramic Video Panoptic Segmentation:
30 |
31 | ```
32 | @article{mei2022waymo,
33 | title={Waymo Open Dataset: Panoramic Video Panoptic Segmentation},
34 | author={Mei, Jieru and Zhu, Alex Zihao and Yan, Xinchen and Yan, Hang and Qiao, Siyuan and Zhu, Yukun and Chen, Liang-Chieh and Kretzschmar, Henrik and Anguelov, Dragomir},
35 | journal={arXiv preprint arXiv:2206.07704},
36 | year={2022}
37 | }
38 |
39 | ```
40 |
41 | * ViP-DeepLab:
42 |
43 | ```
44 | @inproceedings{vip_deeplab_2021,
45 | author={Siyuan Qiao and Yukun Zhu and Hartwig Adam and Alan Yuille and Liang-Chieh Chen},
46 | title={{ViP-DeepLab}: Learning Visual Perception with Depth-aware Video Panoptic Segmentation},
47 | booktitle={CVPR},
48 | year={2021}
49 | }
50 |
51 | ```
52 |
53 | * Panoptic-DeepLab:
54 |
55 | ```
56 | @inproceedings{panoptic_deeplab_2020,
57 | author={Bowen Cheng and Maxwell D Collins and Yukun Zhu and Ting Liu and Thomas S Huang and Hartwig Adam and Liang-Chieh Chen},
58 | title={{Panoptic-DeepLab}: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation},
59 | booktitle={CVPR},
60 | year={2020}
61 | }
62 |
63 | ```
64 |
65 | ### References
66 |
67 | 1. Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu,
68 | Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open
69 | Dataset: Panoramic Video Panoptic Segmentation." In arXiv: 2206.07704, 2022.
70 |
71 | 2. Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen.
72 | "ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic
73 | Segmentation." In CVPR, 2021.
74 |
--------------------------------------------------------------------------------
/g3doc/setup/ade20k.md:
--------------------------------------------------------------------------------
1 | # Run DeepLab2 on ADE20K dataset
2 |
3 | This page walks through the steps required to generate
4 | [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/) panoptic
5 | segmentation data for DeepLab2.
6 |
7 | ## Prework
8 |
9 | Before running any Deeplab2 scripts, the users should (1) access the
10 | [ADE20K dataset website](https://groups.csail.mit.edu/vision/datasets/ADE20K/)
11 | to download the dataset, and (2) prepare the panoptic annotation using
12 | [Mask2Former's script](https://github.com/facebookresearch/Mask2Former/blob/main/datasets/prepare_ade20k_pan_seg.py).
13 |
14 | After finishing above steps, the expected directory structure should be as
15 | follows:
16 |
17 | ```
18 | .(ADE20K_ROOT)
19 | +-- images
20 | |
21 | |-- annotations
22 | |
23 | |-- objectInfo150.txt
24 | |
25 | |-- annotations_instance
26 | |
27 | |-- ade20k_panoptic_{train,val}.json
28 | |
29 | +-- ade20k_panoptic_{train,val}
30 | ```
31 |
32 | ## Convert prepared dataset to TFRecord
33 |
34 | Use the following commandline to generate ADE20K TFRecords:
35 |
36 | ```bash
37 | # For generating data for panoptic segmentation task
38 | python deeplab2/data/build_ade20k_data.py \
39 | --ade20k_root=${ADE20K_ROOT} \
40 | --output_dir=${OUTPUT_DIR}
41 | ```
42 |
43 | Commandline above will output two sharded tfrecord files:
44 | `{train|val}@1000.tfrecord`. In the tfrecords, for `train` and `val` set, it
45 | contains the RGB image pixels as well as corresponding annotations. These files
46 | will be used as the input for the model training and evaluation.
47 |
48 | ### TFExample proto format for ADE20K
49 |
50 | The Example proto contains the following fields:
51 |
52 | * `image/encoded`: encoded image content.
53 | * `image/filename`: image filename.
54 | * `image/format`: image file format.
55 | * `image/height`: image height.
56 | * `image/width`: image width.
57 | * `image/channels`: image channels.
58 | * `image/segmentation/class/encoded`: encoded segmentation content.
59 | * `image/segmentation/class/format`: segmentation encoding format.
60 |
61 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of
62 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is
63 | computed by:
64 |
65 | ```
66 | panoptic ID = semantic ID * label divisor + instance ID
67 | ```
68 |
69 | where semantic ID will be:
70 |
71 | * ignore label (0) for pixels not belonging to any segment
72 | * for segments associated with `iscrowd` label:
73 | * (default): ignore label (0)
74 | * `category_id` for other segments
75 |
76 | The instance ID will be 0 for pixels belonging to
77 |
78 | * `stuff` class
79 | * `thing` class with `iscrowd` label
80 | * pixels with ignore label
81 |
82 | and `[1, label divisor)` otherwise.
83 |
--------------------------------------------------------------------------------
/g3doc/setup/cityscapes_test_server_evaluation.md:
--------------------------------------------------------------------------------
1 | # Test Server Evaluation on Cityscapes dataset
2 |
3 | This page walks through the steps required to convert DeepLab2 predictions for
4 | test server evaluation on [Cityscapes](https://www.cityscapes-dataset.com/).
5 |
6 | A high-level overview of the whole process:
7 |
8 | 1. Save raw panoptic prediction in the two-channel format.
9 |
10 | 2. Create images json file.
11 |
12 | 3. Convert predictions in the two-channel format to the panoptic COCO format.
13 |
14 | 4. Run local validation set evaluation or prepare test set evaluation.
15 |
16 | We also define some environmental variables for simplicity and convenience:
17 |
18 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all
19 | checkpoints and results are saved.
20 |
21 | `DATA_ROOT`: where the original Cityscapes dataset is located.
22 |
23 | `PATH_TO_SAVE`: where the converted results should be saved.
24 |
25 | `IMAGES_SPLIT`: *val* or *test* depending on the target split.
26 |
27 | ## Save Raw Panoptic Prediction
28 |
29 | Save the raw panoptic predictions in the
30 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring
31 | the following fields are set properly in the textproto config file.
32 |
33 | ```
34 | eval_dataset_options.decode_groundtruth_label = false
35 | evaluator_options.save_predictions = true
36 | evaluator_options.save_raw_predictions = true
37 | evaluator_options.convert_raw_to_eval_ids = true
38 | ```
39 |
40 | Then run the model in evaluation modes (with `--mode=eval`), the results will be
41 | saved at
42 |
43 | *semantic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_semantic/\*.png
44 |
45 | *instance segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_instance/\*
46 |
47 | *panoptic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png
48 |
49 | ## Create Images JSON
50 |
51 | Create images json file by running the following commands.
52 |
53 | ```bash
54 | python deeplab2/utils/create_images_json_for_cityscapes.py \
55 | --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \
56 | --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \
57 | --only_basename \
58 | --include_image_type_suffix=false
59 | ```
60 |
61 | ## Convert the Prediction Format
62 |
63 | Convert prediction results saved in the
64 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the
65 | panoptic COCO format.
66 |
67 | ```bash
68 | python panopticapi/converters/2channels2panoptic_coco_format.py \
69 | --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \
70 | --images_json_file=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json\
71 | --categories_json_file=deeplab2/utils/panoptic_cityscapes_categories.json \
72 | --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
73 | --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json
74 | ```
75 |
76 | ## Run Local Evaluation Scripts (for *validation* set)
77 |
78 | Run the [official scripts](https://github.com/mcordts/cityscapesScripts) to
79 | evaluate validation set results.
80 |
81 | For *semantic segmentation*:
82 |
83 | ```bash
84 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_semantic/ \
85 | CITYSCAPES_DATASET=${DATA_ROOT} \
86 | CITYSCAPES_EXPORT_DIR=${PATH_TO_SAVE} \
87 | python cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py
88 | ```
89 |
90 | For *instance segmentation*:
91 |
92 | ```bash
93 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_instance/ \
94 | CITYSCAPES_DATASET=${DATA_ROOT} \
95 | python cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py
96 | ```
97 |
98 | For *panoptic segmentation*:
99 |
100 | ```bash
101 | python cityscapesscripts/evaluation/evalPanopticSemanticLabeling.py \
102 | --prediction-json-file=${PATH_TO_SAVE}/panoptic_cocoformat.json \
103 | --prediction-folder=${PATH_TO_SAVE}/panoptic_cocoformat \
104 | --gt-json-file=${DATA_ROOT}/gtFine/cityscapes_panoptic_val.json \
105 | --gt-folder=${DATA_ROOT}/gtFine/cityscapes_panoptic_val
106 | ```
107 |
108 | Please note that our prediction fortmat does not support instance segmentation
109 | prediction format yet.
110 |
111 | ## Prepare Submission Files (for *test* set)
112 |
113 | Run the following command to prepare a submission file for test server
114 | evaluation.
115 |
116 | ```bash
117 | zip -r cityscapes_test_submission_semantic.zip ${BASE_MODEL_DIRECTORY}/vis/raw_semantic
118 | zip -r cityscapes_test_submission_instance.zip ${BASE_MODEL_DIRECTORY}/vis/raw_instance
119 | zip -r cityscapes_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json
120 | ```
121 |
--------------------------------------------------------------------------------
/g3doc/setup/coco.md:
--------------------------------------------------------------------------------
1 | # Run DeepLab2 on COCO dataset
2 |
3 | This page walks through the steps required to generate
4 | [COCO](https://cocodataset.org/) panoptic segmentation data for DeepLab2.
5 | DeepLab2 uses sharded TFRecords for efficient processing of the data.
6 |
7 | ## Prework
8 |
9 | Before running any Deeplab2 scripts, the users should (1) access the
10 | [COCO dataset website](https://cocodataset.org/) to download the dataset,
11 | including [2017 Train images](http://images.cocodataset.org/zips/train2017.zip),
12 | [2017 Val images](http://images.cocodataset.org/zips/val2017.zip),
13 | [2017 Test images](http://images.cocodataset.org/zips/test2017.zip), and
14 | [2017 Panoptic Train/Val annotations](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip),
15 | and (2) unzip the downloaded files.
16 |
17 | After finishing above steps, the expected directory structure should be as
18 | follows:
19 |
20 | ```
21 | .(COCO_ROOT)
22 | +-- train2017
23 | | |
24 | | +-- *.jpg
25 | |
26 | |-- val2017
27 | | |
28 | | +-- *.jpg
29 | |
30 | |-- test2017
31 | | |
32 | | +-- *.jpg
33 | |
34 | +-- annotations
35 | |
36 | +-- panoptic_{train|val}2017.json
37 | +-- panoptic_{train|val}2017
38 | ```
39 |
40 | ## Convert prepared dataset to TFRecord
41 |
42 | Use the following commandline to generate COCO TFRecords:
43 |
44 | ```bash
45 | # For generating data for panoptic segmentation task
46 | python deeplab2/data/build_coco_data.py \
47 | --coco_root=${COCO_ROOT} \
48 | --output_dir=${OUTPUT_DIR}
49 | ```
50 |
51 | Commandline above will output three sharded tfrecord files:
52 | `{train|val|test}@1000.tfrecord`. In the tfrecords, for `train` and `val` set,
53 | it contains the RGB image pixels as well as corresponding annotations. For
54 | `test` set, it contains RGB images only. These files will be used as the input
55 | for the model training and evaluation.
56 |
57 | Note that we map the class ID to continuous IDs. Specifically, we map the
58 | original label ID, which ranges from 1 to 200, to the contiguous ones ranging
59 | from 1 to 133.
60 |
61 | ### TFExample proto format for COCO
62 |
63 | The Example proto contains the following fields:
64 |
65 | * `image/encoded`: encoded image content.
66 | * `image/filename`: image filename.
67 | * `image/format`: image file format.
68 | * `image/height`: image height.
69 | * `image/width`: image width.
70 | * `image/channels`: image channels.
71 | * `image/segmentation/class/encoded`: encoded segmentation content.
72 | * `image/segmentation/class/format`: segmentation encoding format.
73 |
74 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of
75 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is
76 | computed by:
77 |
78 | ```
79 | panoptic ID = semantic ID * label divisor + instance ID
80 | ```
81 |
82 | where semantic ID will be:
83 |
84 | * ignore label (0) for pixels not belonging to any segment
85 | * for segments associated with `iscrowd` label:
86 | * (default): ignore label (0)
87 | * (if set `--treat_crowd_as_ignore=false` while running
88 | `build_coco_data.py`): `category_id`
89 | * `category_id` for other segments
90 |
91 | The instance ID will be 0 for pixels belonging to
92 |
93 | * `stuff` class
94 | * `thing` class with `iscrowd` label
95 | * pixels with ignore label
96 |
97 | and `[1, label divisor)` otherwise.
98 |
--------------------------------------------------------------------------------
/g3doc/setup/coco_test_server_evaluation.md:
--------------------------------------------------------------------------------
1 | # Test Server Evaluation on COCO dataset
2 |
3 | This page walks through the steps required to convert DeepLab2 predictions for
4 | test server evaluation on [COCO](https://cocodataset.org/).
5 |
6 | A high-level overview of the whole process:
7 |
8 | 1. Save raw panoptic prediction in the two-channel format.
9 |
10 | 2. Convert predictions in the two-channel format to the panoptic COCO format.
11 |
12 | 3. Run local validation set evaluation or prepare test set evaluation.
13 |
14 | We also define some environmental variables for simplicity and convenience:
15 |
16 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all
17 | checkpoints and results are saved.
18 |
19 | `DATA_ROOT`: where the original COCO dataset is located.
20 |
21 | `PATH_TO_SAVE`: where the converted results should be saved.
22 |
23 | ## Save Raw Panoptic Prediction
24 |
25 | Save the raw panoptic predictions in the
26 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring
27 | the following fields are set properly in the textproto config file.
28 |
29 | ```
30 | eval_dataset_options.decode_groundtruth_label = false
31 | evaluator_options.save_predictions = true
32 | evaluator_options.save_raw_predictions = true
33 | evaluator_options.convert_raw_to_eval_ids = true
34 | ```
35 |
36 | Then run the model in evaluation modes (with `--mode=eval`), and the results
37 | will be saved at ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png.
38 |
39 | ## Convert the Prediction Format
40 |
41 | Convert prediction results saved in the
42 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the
43 | panoptic COCO format.
44 |
45 | ```bash
46 | python panopticapi/converters/2channels2panoptic_coco_format.py \
47 | --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \
48 | --images_json_file=${DATA_ROOT}/annotations/IMG_JSON \
49 | --categories_json_file=panopticapi/panoptic_coco_categories.json \
50 | --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
51 | --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json
52 | ```
53 |
54 | The `IMG_JSON` refers to `panoptic_val2017.json` for *val* set and
55 | `image_info_test-dev2017.json` for *test-dev* set.
56 |
57 | ## Run Local Evaluation Scripts (for *validation* set)
58 |
59 | Run the [official scripts](https://github.com/cocodataset/panopticapi) to
60 | evaluate validation set results.
61 |
62 | ```bash
63 | python panopticapi/evaluation.py \
64 | --pred_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json \
65 | --pred_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
66 | --gt_json_file=${DATA_ROOT}/annotations/panoptic_val2017.json \
67 | --gt_folder=${DATA_ROOT}/annotations/panoptic_val2017
68 | ```
69 |
70 | ## Prepare Submission Files (for *test* set)
71 |
72 | Run the following command to prepare a submission file for test server
73 | evaluation.
74 |
75 | ```bash
76 | zip -r coco_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json
77 | ```
78 |
--------------------------------------------------------------------------------
/g3doc/setup/motchallenge_step.md:
--------------------------------------------------------------------------------
1 | # Run DeepLab2 on MOTChallenge-STEP dataset
2 |
3 | ## MOTChallenge-STEP dataset
4 |
5 | MOTChallenge-STEP extends the existing [MOTChallenge](https://motchallenge.net/)
6 | dataset with spatially and temporally dense annotations.
7 |
8 | ### Label Map
9 |
10 | MOTChallenge-STEP dataset followings the same annotation and label policy as
11 | [KITTI-STEP dataset](./kitti_step.md). Among the
12 | [MOTChallenge](https://motchallenge.net/) dataset, 4 outdoor sequences are
13 | annotated for MOTChallenge-STEP. In particular, these sequences are splitted
14 | into 2 for training and 2 for testing. This dataset contains only 7 semantic
15 | classes, as not all of
16 | [Cityscapes](https://www.cityscapes-dataset.com/dataset-overview/#class-definitions)'
17 | 19 semantic classes are present.
18 |
19 | Label Name | Label ID
20 | -------------- | --------
21 | sidewalk | 0
22 | building | 1
23 | vegetation | 2
24 | sky | 3
25 | person† | 4
26 | rider | 5
27 | bicycle | 6
28 | void | 255
29 |
30 | †: Single instance annotations are available.
31 |
32 | ### Prepare MOTChallenge-STEP for Training and Evaluation
33 |
34 | In the following, we provide a step-by-step walk through to prepare the data.
35 |
36 | 1. Create the MOTChallenge-STEP directory:
37 |
38 | ```bash
39 | mkdir ${MOTCHALLENGE_STEP_ROOT}/images
40 | cd ${MOTCHALLENGE_STEP_ROOT}/images
41 | ```
42 |
43 | 2. Download MOTChallenge images from https://motchallenge.net/data/MOTS.zip and
44 | unzip.
45 |
46 | ```bash
47 | wget ${MOTCHALLENGE_LINK}
48 | unzip ${MOTCHALLENGE_IMAGES}.zip
49 | ```
50 |
51 | 3. Move and rename the data:
52 |
53 | ```bash
54 | # Create directories.
55 | mkdir train
56 | mkdir train/0002
57 | mkdir train/0009
58 | mkdir test
59 | mkdir test/0001
60 | mkdir test/0007
61 |
62 | # Copy data.
63 | cp -r MOTS/train/MOTS20-02/img1/* train/0002/
64 | cp -r MOTS/train/MOTS20-09/img1/* train/0009/
65 | cp -r MOTS/test/MOTS20-01/img1/* test/0001/
66 | cp -r MOTS/test/MOTS20-07/img1/* test/0007/
67 |
68 | # Clean up.
69 | rm -r MOTS
70 | ```
71 |
72 | 4. Download groundtruth MOTChallenge-STEP panoptic maps from
73 | https://motchallenge.net/data/motchallenge-step.tar.gz
74 |
75 | ```bash
76 | cd ${MOTCHALLENGE_STEP_ROOT}
77 | wget ${MOTCHALLENGE_GT_LINK}
78 | tar -xvf ${MOTCHALLENGE_GT}.zip
79 | ```
80 |
81 | The groundtruth panoptic map is encoded in the same way as described in
82 | [KITTI-STEP dataset](./kitti_step.md).
83 |
84 | DeepLab2 requires the dataset to be converted to TFRecords for efficient reading
85 | and prefetching. To create the dataset for training and evaluation, run the
86 | following command:
87 |
88 | ```bash
89 | python deeplab2/data/build_step_data.py \
90 | --step_root=${MOTCHALLENGE_STEP_ROOT} \
91 | --output_dir=${OUTPUT_DIR}
92 | ```
93 |
94 | This script outputs three sharded tfrecord files: `{train|test}@10.tfrecord`. In
95 | the tfrecords, for `train` set, it contains the RGB image pixels as well as
96 | their panoptic maps. For `test` set, it contains RGB images only. These files
97 | will be used as the input for the model training and evaluation.
98 |
99 | Optionally, you can also specify with `--use_two_frames` to encode two
100 | consecutive frames into the tfrecord files.
101 |
102 | ## Citing MOTChallenge-STEP
103 |
104 | If you find this dataset helpful in your research, please use the following
105 | BibTeX entry.
106 |
107 | ```
108 | @article{step_2021,
109 | author = {Weber, Mark and Xie, Jun and Collins, Maxwell and Zhu, Yukun and Voigtlaender, Paul and Adam, Hartwig and Green, Bradley and Geiger, Andreas and Leibe, Bastian and Cremers, Daniel and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'{e}, Laura and Chen, Liang-Chieh},
110 | journal = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks},
111 | title = {{STEP}: Segmenting and Tracking Every Pixel},
112 | year = {2021}
113 | }
114 | ```
115 |
--------------------------------------------------------------------------------
/g3doc/setup/your_own_dataset.md:
--------------------------------------------------------------------------------
1 | # Convert your own dataset for DeepLab2 framework
2 |
3 | You may want to train DeepLab2 on your own dataset. Here, we provide some
4 | guidances and hopefully that will facillitate the preparation process.
5 |
6 | 1. Prepare your own dataset.
7 | * **Images** should be stored either in `jpg` or `png` format.
8 | * **Annotations** should be stored either in `png` or `json` format. The
9 | DeepLab2 framework assumes the panoptic label format (i.e.,
10 | `panoptic_label = semantic_label * label_divisor + instance_id`, where
11 | the `label_divisor` should be larger than the maximum number of
12 | instances per image).
13 | * The `png` format refers to the case where we could split semantic
14 | label and instance id to RGB channels. For example, R-channel stores
15 | semantic label, while G- and B-channel store instance id (G:
16 | instance_id // 256 and B: instance_id % 256).
17 | * The `json` format refers to the
18 | [COCO panoptic json format](https://cocodataset.org/#format-data).
19 | 2. Convert the dataset to TFRecord.
20 |
21 | * Update our provided example code (e.g.,
22 | [build_step_data.py](../../data/build_step_data.py) for `png` format,
23 | and [build_coco_data.py](../../data/build_coco_data.py) for `json`
24 | format) to convert your dataset to TFRecord.
25 | * Alternatively, if you are using your own binary to create TFRecords,
26 | make sure to include the same fields in the proto as what our example
27 | code creates.
28 |
29 | 3. Modify the `dataset.py` (path: `${DEEPLAB2}/data/dataset.py`) to provide
30 | your dataset information.
31 |
32 | * Set the `panoptic_label_divisor` (i.e., the `label_divisor` above)
33 | correctly. Its value should be larger than the maximum number of
34 | instances that could appear per image in your dataset.
35 | * Set the `ignore_label` properly. Pixels annotated with `ignore_label`
36 | are not used during both training and evaluation. If your dataset does
37 | not contain the `ignore_label` annotations, you could simply set it to
38 | be a large value (e.g., 255 as for
39 | [Cityscapes](https://www.cityscapes-dataset.com/)).
40 | * Set the `class_has_instance_list` properly. The variable specifies which
41 | class belongs to the `thing` class (i.e., countable objects such as
42 | people, cars).
43 | * Set the colormap (for visualization) properly. You may also need to
44 | define your own colormap (see `${DEEPLAB2}/trainer/vis_utils.py`).
45 |
46 | 4. Prepare the experiment config.
47 |
48 | * Update our provided example configs (path:
49 | `${DEEPLAB2}/configs/${DATASET}/${MODEL}/${BACKBONE}`) for your use
50 | case. A few things that may worth your attention:
51 | * Set the `crop_size` correctly for both training and evaluation. See
52 | Q2 in [FAQ](../faq.md) for more details.
53 | * Tune the config flags for your dataset (e.g., `base_learning_rate`,
54 | `training_number_of_step`, and so on).
55 |
56 | Finally, if your dataset only contains semantic segmentation annotations,
57 | you could still use DeepLab2 framework with some minor changes:
58 |
59 | 1. Since the code only reads panoptic data at the moment, you need to set
60 | `panoptic_label_divisor = k`, where k is any positive integer,
61 | `instance_id = 0`, and `class_has_instances_list = []` (i.e., we treat the
62 | dataset as the one that contains only `stuff` classes), when you are (1)
63 | converting the dataset to TFRecord (e.g.,
64 | [build_step_data.py](../../data/build_step_data.py)),
65 | and (2) adding dataset information in dataset.py.
66 | 2. Have a config similar to
67 | `${DEEPLAB2}/configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto`
68 | , where the instance branch is not
69 | initiated.
70 |
71 | At this point, you are good to go! Enjoy training DeepLab2!
72 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/model/builder_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for model.builder."""
17 |
18 | import os
19 | from absl.testing import parameterized
20 |
21 | import tensorflow as tf
22 |
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.model import builder
26 | from deeplab2.model.decoder import motion_deeplab_decoder
27 | from deeplab2.model.encoder import axial_resnet_instances
28 | from deeplab2.model.encoder import mobilenet
29 | # resources dependency
30 |
31 |
32 | _CONFIG_PATH = 'deeplab2/configs/example'
33 |
34 |
35 | def _read_proto_file(filename, proto):
36 | filename = filename # OSS: removed internal filename loading.
37 | with tf.io.gfile.GFile(filename, 'r') as proto_file:
38 | return text_format.ParseLines(proto_file, proto)
39 |
40 |
41 | class BuilderTest(tf.test.TestCase, parameterized.TestCase):
42 |
43 | def test_resnet50_encoder_creation(self):
44 | backbone_options = config_pb2.ModelOptions.BackboneOptions(
45 | name='resnet50', output_stride=32)
46 | encoder = builder.create_encoder(
47 | backbone_options,
48 | tf.keras.layers.experimental.SyncBatchNormalization)
49 | self.assertIsInstance(encoder, axial_resnet_instances.ResNet50)
50 |
51 | @parameterized.parameters('mobilenet_v3_large', 'mobilenet_v3_small')
52 | def test_mobilenet_encoder_creation(self, model_name):
53 | backbone_options = config_pb2.ModelOptions.BackboneOptions(
54 | name=model_name, use_squeeze_and_excite=True, output_stride=32)
55 | encoder = builder.create_encoder(
56 | backbone_options,
57 | tf.keras.layers.experimental.SyncBatchNormalization)
58 | self.assertIsInstance(encoder, mobilenet.MobileNet)
59 |
60 | def test_resnet_encoder_creation(self):
61 | backbone_options = config_pb2.ModelOptions.BackboneOptions(
62 | name='max_deeplab_s', output_stride=32)
63 | encoder = builder.create_resnet_encoder(
64 | backbone_options,
65 | bn_layer=tf.keras.layers.experimental.SyncBatchNormalization)
66 | self.assertIsInstance(encoder, axial_resnet_instances.MaXDeepLabS)
67 |
68 | def test_decoder_creation(self):
69 | proto_filename = os.path.join(
70 | _CONFIG_PATH, 'example_kitti-step_motion_deeplab.textproto')
71 | model_options = _read_proto_file(proto_filename, config_pb2.ModelOptions())
72 | motion_decoder = builder.create_decoder(
73 | model_options, tf.keras.layers.experimental.SyncBatchNormalization,
74 | ignore_label=255)
75 | self.assertIsInstance(motion_decoder,
76 | motion_deeplab_decoder.MotionDeepLabDecoder)
77 |
78 |
79 | if __name__ == '__main__':
80 | tf.test.main()
81 |
--------------------------------------------------------------------------------
/model/decoder/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/model/decoder/aspp_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for aspp."""
17 | import tensorflow as tf
18 |
19 | from deeplab2.model.decoder import aspp
20 | from deeplab2.utils import test_utils
21 |
22 |
23 | class AsppTest(tf.test.TestCase):
24 |
25 | def test_aspp_pool_error(self):
26 | pool = aspp.ASPPPool(output_channels=64, name='')
27 |
28 | # Should pass without an error.
29 | pool.set_pool_size((None, None))
30 |
31 | with self.assertRaises(ValueError):
32 | # Should raise an error.
33 | pool.set_pool_size((2, None))
34 |
35 | def test_aspp_conv_atrous_rate_shape(self):
36 | atrous_rates = [2, 6, 12, 18]
37 | for rate in atrous_rates:
38 | conv = aspp.ASPPConv(output_channels=64, atrous_rate=rate, name='')
39 | input_tensor = tf.random.uniform(shape=(2, 12, 12, 3))
40 |
41 | output = conv(input_tensor)
42 | expected_shape = [2, 12, 12, 64]
43 | self.assertListEqual(output.shape.as_list(), expected_shape)
44 |
45 | def test_aspp_conv_non_negative(self):
46 | conv = aspp.ASPPConv(output_channels=12, atrous_rate=2, name='')
47 | input_tensor = tf.random.uniform(shape=(2, 17, 17, 3))
48 |
49 | output = conv(input_tensor)
50 | self.assertTrue((output.numpy() >= 0.0).all())
51 |
52 | def test_aspp_pool_shape(self):
53 | pool = aspp.ASPPPool(output_channels=64, name='')
54 | input_tensor = tf.random.uniform(shape=(2, 12, 12, 3))
55 |
56 | output = pool(input_tensor)
57 | expected_shape = [2, 12, 12, 64]
58 | self.assertListEqual(output.shape.as_list(), expected_shape)
59 |
60 | def test_aspp_pool_non_negative(self):
61 | pool = aspp.ASPPPool(output_channels=12, name='')
62 | input_tensor = tf.random.uniform(shape=(2, 17, 17, 3))
63 |
64 | output = pool(input_tensor)
65 | self.assertTrue((output.numpy() >= 0.0).all())
66 |
67 | def test_aspp_wrong_atrous_rate(self):
68 | with self.assertRaises(ValueError):
69 | _ = aspp.ASPP(output_channels=64, atrous_rates=[1, 2, 3, 4])
70 |
71 | @test_utils.test_all_strategies
72 | def test_aspp_shape(self, strategy):
73 | with strategy.scope():
74 | for bn_layer in test_utils.NORMALIZATION_LAYERS:
75 | aspp_layer = aspp.ASPP(
76 | output_channels=64, atrous_rates=[6, 12, 18], bn_layer=bn_layer)
77 | input_tensor = tf.random.uniform(shape=(2, 32, 32, 3))
78 |
79 | output = aspp_layer(input_tensor)
80 | expected_shape = [2, 32, 32, 64]
81 | self.assertListEqual(output.shape.as_list(), expected_shape)
82 |
83 | def test_aspp_non_negative(self):
84 | aspp_layer = aspp.ASPP(output_channels=32, atrous_rates=[4, 8, 16])
85 | input_tensor = tf.random.uniform(shape=(2, 32, 32, 3))
86 |
87 | output = aspp_layer(input_tensor)
88 | self.assertTrue((output.numpy() >= 0.0).all())
89 |
90 | if __name__ == '__main__':
91 | tf.test.main()
92 |
--------------------------------------------------------------------------------
/model/decoder/deeplabv3.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains code to build a DeepLabV3.
17 |
18 | Reference:
19 | - [Rethinking Atrous Convolution for Semantic Image Segmentation](
20 | https://arxiv.org/pdf/1706.05587.pdf)
21 | """
22 | import tensorflow as tf
23 |
24 | from deeplab2 import common
25 | from deeplab2.model.decoder import aspp
26 | from deeplab2.model.layers import convolutions
27 |
28 |
29 | layers = tf.keras.layers
30 |
31 |
32 | class DeepLabV3(layers.Layer):
33 | """A DeepLabV3 model.
34 |
35 | This model takes in features from an encoder and performs multi-scale context
36 | aggregation with the help of an ASPP layer. Finally, a classification head is
37 | used to predict a semantic segmentation.
38 | """
39 |
40 | def __init__(self,
41 | decoder_options,
42 | deeplabv3_options,
43 | bn_layer=tf.keras.layers.BatchNormalization):
44 | """Creates a DeepLabV3 decoder of type layers.Layer.
45 |
46 | Args:
47 | decoder_options: Decoder options as defined in config_pb2.DecoderOptions.
48 | deeplabv3_options: Model options as defined in
49 | config_pb2.ModelOptions.DeeplabV3Options.
50 | bn_layer: An optional tf.keras.layers.Layer that computes the
51 | normalization (default: tf.keras.layers.BatchNormalization).
52 | """
53 | super(DeepLabV3, self).__init__(name='DeepLabV3')
54 |
55 | self._feature_name = decoder_options.feature_key
56 | self._aspp = aspp.ASPP(decoder_options.aspp_channels,
57 | decoder_options.atrous_rates,
58 | bn_layer=bn_layer)
59 |
60 | self._classifier_conv_bn_act = convolutions.Conv2DSame(
61 | decoder_options.decoder_channels,
62 | kernel_size=3,
63 | name='classifier_conv_bn_act',
64 | use_bias=False,
65 | use_bn=True,
66 | bn_layer=bn_layer,
67 | activation='relu')
68 |
69 | self._final_conv = convolutions.Conv2DSame(
70 | deeplabv3_options.num_classes, kernel_size=1, name='final_conv')
71 |
72 | def set_pool_size(self, pool_size):
73 | """Sets the pooling size of the ASPP pooling layer.
74 |
75 | Args:
76 | pool_size: A tuple specifying the pooling size of the ASPP pooling layer.
77 | """
78 | self._aspp.set_pool_size(pool_size)
79 |
80 | def get_pool_size(self):
81 | return self._aspp.get_pool_size()
82 |
83 | def reset_pooling_layer(self):
84 | """Resets the ASPP pooling layer to global average pooling."""
85 | self._aspp.reset_pooling_layer()
86 |
87 | def call(self, features, training=False):
88 | """Performs a forward pass.
89 |
90 | Args:
91 | features: A single input tf.Tensor or an input dict of tf.Tensor with
92 | shape [batch, height, width, channels]. If passed a dict, different keys
93 | should point to different features extracted by the encoder, e.g.
94 | low-level or high-level features.
95 | training: A boolean flag indicating whether training behavior should be
96 | used (default: False).
97 |
98 | Returns:
99 | A dictionary containing the semantic prediction under key
100 | common.PRED_SEMANTIC_LOGITS_KEY.
101 | """
102 | if isinstance(features, tf.Tensor):
103 | feature = features
104 | else:
105 | feature = features[self._feature_name]
106 |
107 | x = self._aspp(feature, training=training)
108 |
109 | x = self._classifier_conv_bn_act(x, training=training)
110 |
111 | return {common.PRED_SEMANTIC_LOGITS_KEY: self._final_conv(x)}
112 |
113 | @property
114 | def checkpoint_items(self):
115 | items = {
116 | common.CKPT_DEEPLABV3_ASPP: self._aspp,
117 | common.CKPT_DEEPLABV3_CLASSIFIER_CONV_BN_ACT:
118 | self._classifier_conv_bn_act,
119 | common.CKPT_SEMANTIC_LAST_LAYER: self._final_conv,
120 | }
121 | return items
122 |
--------------------------------------------------------------------------------
/model/decoder/max_deeplab_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for max_deeplab."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2 import common
21 | from deeplab2 import config_pb2
22 | from deeplab2.model.decoder import max_deeplab
23 |
24 |
25 | def _create_max_deeplab_example_proto(num_non_void_classes=19):
26 | semantic_decoder = config_pb2.DecoderOptions(
27 | feature_key='feature_semantic', atrous_rates=[6, 12, 18])
28 | auxiliary_semantic_head = config_pb2.HeadOptions(
29 | output_channels=num_non_void_classes, head_channels=256)
30 | pixel_space_head = config_pb2.HeadOptions(
31 | output_channels=128, head_channels=256)
32 | max_deeplab_options = config_pb2.ModelOptions.MaXDeepLabOptions(
33 | pixel_space_head=pixel_space_head,
34 | auxiliary_semantic_head=auxiliary_semantic_head)
35 | # Add features from lowest to highest.
36 | max_deeplab_options.auxiliary_low_level.add(
37 | feature_key='res3', channels_project=64)
38 | max_deeplab_options.auxiliary_low_level.add(
39 | feature_key='res2', channels_project=32)
40 | return config_pb2.ModelOptions(
41 | decoder=semantic_decoder, max_deeplab=max_deeplab_options)
42 |
43 |
44 | class MaXDeeplabTest(tf.test.TestCase):
45 |
46 | def test_max_deeplab_decoder_output_shape(self):
47 | num_non_void_classes = 19
48 | num_mask_slots = 127
49 | model_options = _create_max_deeplab_example_proto(
50 | num_non_void_classes=num_non_void_classes)
51 | decoder = max_deeplab.MaXDeepLab(
52 | max_deeplab_options=model_options.max_deeplab,
53 | ignore_label=255,
54 | decoder_options=model_options.decoder)
55 |
56 | input_dict = {
57 | 'res2':
58 | tf.random.uniform([2, 17, 17, 256]),
59 | 'res3':
60 | tf.random.uniform([2, 9, 9, 512]),
61 | 'transformer_class_feature':
62 | tf.random.uniform([2, num_mask_slots, 256]),
63 | 'transformer_mask_feature':
64 | tf.random.uniform([2, num_mask_slots, 256]),
65 | 'feature_panoptic':
66 | tf.random.uniform([2, 17, 17, 256]),
67 | 'feature_semantic':
68 | tf.random.uniform([2, 5, 5, 2048])
69 | }
70 | resulting_dict = decoder(input_dict)
71 | self.assertListEqual(
72 | resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(),
73 | [2, 17, 17, 19]) # Stride 4
74 | self.assertListEqual(
75 | resulting_dict[
76 | common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(),
77 | [2, 17, 17, 128]) # Stride 4
78 | self.assertListEqual(
79 | resulting_dict[
80 | common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(),
81 | # Non-void classes and a void class.
82 | [2, num_mask_slots, num_non_void_classes + 1])
83 | self.assertListEqual(
84 | resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(),
85 | [2, 17, 17, num_mask_slots]) # Stride 4.
86 |
87 |
88 | if __name__ == '__main__':
89 | tf.test.main()
90 |
--------------------------------------------------------------------------------
/model/encoder/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/model/encoder/atrous_consistency_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests of atrous consistencies for axial_resnet_instances."""
17 |
18 | from absl.testing import parameterized
19 | import tensorflow as tf
20 |
21 | from deeplab2.model import test_utils
22 | from deeplab2.model.encoder import axial_resnet_instances
23 |
24 |
25 | class AtrousConsistencyTest(tf.test.TestCase, parameterized.TestCase):
26 |
27 | @parameterized.product(
28 | (dict(model_name='resnet50', backbone_layer_multiplier=1),
29 | dict(model_name='resnet50_beta', backbone_layer_multiplier=1),
30 | dict(model_name='wide_resnet41', backbone_layer_multiplier=1),
31 | dict(model_name='swidernet', backbone_layer_multiplier=2)),
32 | output_stride=[8, 16, 32])
33 | def test_model_atrous_consistency_with_output_stride_four(
34 | self, model_name, backbone_layer_multiplier, output_stride):
35 | tf.random.set_seed(0)
36 |
37 | # Create the input.
38 | pixel_inputs = test_utils.create_test_input(1, 225, 225, 3)
39 |
40 | # Create the model and the weights.
41 | model_1 = axial_resnet_instances.get_model(
42 | model_name,
43 | # Test with small models only.
44 | num_blocks=[2, 2, 2, 2],
45 | backbone_layer_multiplier=backbone_layer_multiplier,
46 | bn_layer=tf.keras.layers.BatchNormalization,
47 | conv_kernel_weight_decay=0.0001,
48 | output_stride=4)
49 |
50 | # Create the weights.
51 | model_1(pixel_inputs, training=False)
52 |
53 | # Set the batch norm gamma as non-zero so that the 3x3 convolution affects
54 | # the output.
55 | for weight in model_1.trainable_weights:
56 | if '/gamma:0' in weight.name:
57 | weight.assign(tf.ones_like(weight))
58 |
59 | # Dense feature extraction followed by subsampling.
60 | pixel_outputs = model_1(pixel_inputs, training=False)['res5']
61 | downsampling_stride = output_stride // 4
62 | expected = pixel_outputs[:, ::downsampling_stride, ::downsampling_stride, :]
63 |
64 | # Feature extraction at the nominal network rate.
65 | model_2 = axial_resnet_instances.get_model(
66 | model_name,
67 | # Test with small models only.
68 | num_blocks=[2, 2, 2, 2],
69 | backbone_layer_multiplier=backbone_layer_multiplier,
70 | bn_layer=tf.keras.layers.BatchNormalization,
71 | conv_kernel_weight_decay=0.0001,
72 | output_stride=output_stride)
73 | # Create the weights.
74 | model_2(pixel_inputs, training=False)
75 | # Make the two networks use the same weights.
76 | model_2.set_weights(model_1.get_weights())
77 | output = model_2(pixel_inputs, training=False)['res5']
78 |
79 | # Normalize the outputs. Since we set batch_norm gamma to 1, the output
80 | # activations can explode to a large standard deviation, which sometimes
81 | # cause numerical errors beyond the tolerances.
82 | normalizing_factor = tf.math.reduce_std(expected)
83 | # Compare normalized outputs.
84 | self.assertAllClose(output / normalizing_factor,
85 | expected / normalizing_factor,
86 | atol=1e-4, rtol=1e-4)
87 |
88 |
89 | if __name__ == '__main__':
90 | tf.test.main()
91 |
--------------------------------------------------------------------------------
/model/encoder/axial_resnet_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for axial_resnet."""
17 |
18 | import numpy as np
19 | import tensorflow as tf
20 |
21 | from deeplab2.model.encoder import axial_resnet
22 |
23 |
24 | class AxialResNetTest(tf.test.TestCase):
25 |
26 | def test_axial_resnet_correct_output_shape(self):
27 | model = axial_resnet.AxialResNet('max_deeplab_s')
28 | endpoints = model(tf.zeros([2, 65, 65, 3]), training=False)
29 | self.assertListEqual(endpoints['backbone_output'].get_shape().as_list(),
30 | [2, 5, 5, 2048])
31 | self.assertListEqual(
32 | endpoints['transformer_class_feature'].get_shape().as_list(),
33 | [2, 128, 256])
34 | self.assertListEqual(
35 | endpoints['transformer_mask_feature'].get_shape().as_list(),
36 | [2, 128, 256])
37 | self.assertListEqual(endpoints['feature_panoptic'].get_shape().as_list(),
38 | [2, 17, 17, 256])
39 | self.assertListEqual(endpoints['feature_semantic'].get_shape().as_list(),
40 | [2, 5, 5, 2048])
41 | num_params = np.sum(
42 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
43 | self.assertEqual(num_params, 61726624)
44 |
45 | if __name__ == '__main__':
46 | tf.test.main()
47 |
--------------------------------------------------------------------------------
/model/encoder/model_export_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests of model exports for axial_resnet_instances."""
17 |
18 | import os
19 |
20 | from absl import flags
21 | from absl.testing import parameterized
22 | import tensorflow as tf
23 |
24 | from deeplab2.model.encoder import axial_resnet_instances
25 |
26 | FLAGS = flags.FLAGS
27 |
28 |
29 | class ModelExportTest(tf.test.TestCase, parameterized.TestCase):
30 |
31 | @parameterized.parameters(
32 | ('resnet50',),
33 | ('resnet50_beta',),
34 | ('max_deeplab_s_backbone',),
35 | ('max_deeplab_l_backbone',),
36 | ('axial_resnet_s',),
37 | ('axial_resnet_l',),
38 | ('axial_deeplab_s',),
39 | ('axial_deeplab_l',),
40 | ('swidernet',),
41 | ('axial_swidernet',),
42 | )
43 | def test_model_export(self, model_name):
44 | model = axial_resnet_instances.get_model(
45 | model_name,
46 | output_stride=16,
47 | backbone_layer_multiplier=1.0,
48 | bn_layer=tf.keras.layers.BatchNormalization,
49 | conv_kernel_weight_decay=0.0001,
50 | # Test with small models only.
51 | num_blocks=[2, 2, 2, 2],
52 | # Disable drop path as it is not compatible with model exporting.
53 | block_group_config={'drop_path_keep_prob': 1.0})
54 | model(tf.keras.Input([257, 257, 3], batch_size=1), training=False)
55 | export_dir = os.path.join(
56 | FLAGS.test_tmpdir, 'test_model_export', model_name)
57 | model.save(export_dir)
58 |
59 |
60 | if __name__ == '__main__':
61 | tf.test.main()
62 |
--------------------------------------------------------------------------------
/model/kmax_deeplab_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for kmax_deeplab."""
17 |
18 | import os
19 |
20 | import numpy as np
21 | import tensorflow as tf
22 |
23 | from google.protobuf import text_format
24 | from deeplab2 import common
25 | from deeplab2 import config_pb2
26 | from deeplab2.data import dataset
27 | from deeplab2.model import kmax_deeplab
28 | from deeplab2.model import utils
29 | # resources dependency
30 |
31 | _CONFIG_PATH = 'deeplab2/configs/example'
32 |
33 |
34 | def _read_proto_file(filename, proto):
35 | filename = filename # OSS: removed internal filename loading.
36 | with tf.io.gfile.GFile(filename, 'r') as proto_file:
37 | return text_format.ParseLines(proto_file, proto)
38 |
39 |
40 | def _create_model_from_test_proto(file_name,
41 | dataset_name='cityscapes_panoptic'):
42 | proto_filename = os.path.join(_CONFIG_PATH, file_name)
43 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
44 | return kmax_deeplab.KMaXDeepLab(
45 | config,
46 | dataset.MAP_NAME_TO_DATASET_INFO[dataset_name]), config
47 |
48 |
49 | class DeeplabTest(tf.test.TestCase):
50 |
51 | def test_deeplab_with_kmax_convnext_base(self):
52 | model, experiment_options = _create_model_from_test_proto(
53 | 'example_coco_kmax_meta_convnext.textproto',
54 | dataset_name='coco_panoptic')
55 | train_crop_size = tuple(experiment_options.train_dataset_options.crop_size)
56 | input_tensor = tf.random.uniform(
57 | shape=(2, train_crop_size[0], train_crop_size[1], 3))
58 | stride_4_size = utils.scale_mutable_sequence(train_crop_size, 0.25)
59 | expected_semantic_shape = [
60 | 2, stride_4_size[0], stride_4_size[1], experiment_options.model_options.
61 | max_deeplab.auxiliary_semantic_head.output_channels]
62 | expected_transformer_class_logits_shape = [
63 | 2, 128, experiment_options.model_options.
64 | max_deeplab.auxiliary_semantic_head.output_channels]
65 | expected_pixel_space_normalized_feature_shape = [
66 | 2, stride_4_size[0], stride_4_size[1], experiment_options.model_options.
67 | max_deeplab.pixel_space_head.output_channels]
68 | expected_pixel_space_mask_logits_shape = [
69 | 2, stride_4_size[0], stride_4_size[1], 128]
70 | resulting_dict = model(input_tensor, training=True)
71 | self.assertListEqual(
72 | resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(),
73 | expected_semantic_shape)
74 | self.assertListEqual(
75 | resulting_dict[
76 | common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(),
77 | expected_transformer_class_logits_shape)
78 | self.assertListEqual(
79 | resulting_dict[
80 | common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(),
81 | expected_pixel_space_normalized_feature_shape)
82 | self.assertListEqual(
83 | resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(),
84 | expected_pixel_space_mask_logits_shape)
85 | num_params = 0
86 | for v in model.trainable_weights:
87 | params = np.prod(v.get_shape().as_list())
88 | # Exclude the auxiliary semantic head.
89 | if 'auxiliary_semantic' not in v.name:
90 | num_params += params
91 | self.assertEqual(num_params, 121513304)
92 |
93 |
94 | if __name__ == '__main__':
95 | tf.test.main()
96 |
--------------------------------------------------------------------------------
/model/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/model/layers/activations.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Defines a set of useful activation functions."""
17 | import functools
18 | import tensorflow as tf
19 |
20 |
21 | def gelu(input_tensor, approximate=False):
22 | """Gaussian Error Linear Unit.
23 |
24 | Reference:
25 | Gaussian Error Linear Units (GELUs), Dan Hendrycks, Kevin Gimpel, arXiv 2016.
26 |
27 | Args:
28 | input_tensor: A tensor with an arbitrary shape.
29 | approximate: A boolean, whether to enable approximation.
30 |
31 | Returns:
32 | The activated input tensor.
33 | """
34 | return tf.keras.activations.gelu(input_tensor, approximate=approximate)
35 |
36 |
37 | def hard_sigmoid(input_tensor):
38 | """Hard sigmoid activation function.
39 |
40 | Args:
41 | input_tensor: A tensor with an arbitrary shape.
42 |
43 | Returns:
44 | The activated input tensor.
45 | """
46 | input_tensor = tf.convert_to_tensor(input_tensor)
47 | return tf.nn.relu6(input_tensor + tf.constant(3.)) * 0.16667
48 |
49 |
50 | def relu6(input_tensor):
51 | """Relu6 activation function.
52 |
53 | Args:
54 | input_tensor: A tensor with an arbitrary shape.
55 |
56 | Returns:
57 | The activated input tensor.
58 | """
59 | input_tensor = tf.convert_to_tensor(input_tensor)
60 | return tf.nn.relu6(input_tensor)
61 |
62 |
63 | def swish(input_tensor):
64 | """Swish or SiLU activation function.
65 |
66 | Args:
67 | input_tensor: A tensor with an arbitrary shape.
68 |
69 | Returns:
70 | The activated input tensor.
71 | """
72 | input_tensor = tf.convert_to_tensor(input_tensor)
73 | return tf.nn.silu(input_tensor)
74 |
75 |
76 | def hard_swish(input_tensor):
77 | """Hard Swish function.
78 |
79 | Args:
80 | input_tensor: A tensor with an arbitrary shape.
81 |
82 | Returns:
83 | The activated input tensor.
84 | """
85 | input_tensor = tf.convert_to_tensor(input_tensor)
86 | return input_tensor * tf.nn.relu6(
87 | input_tensor + tf.constant(3.)) * (1. / 6.)
88 |
89 |
90 | def identity(input_tensor):
91 | """Identity function.
92 |
93 | Useful for helping in quantization.
94 |
95 | Args:
96 | input_tensor: A tensor with an arbitrary shape.
97 |
98 | Returns:
99 | The activated input tensor.
100 | """
101 | input_tensor = tf.convert_to_tensor(input_tensor)
102 | return tf.identity(input_tensor)
103 |
104 |
105 | def get_activation(identifier):
106 | """Gets activation function via input identifier.
107 |
108 | This function returns the specified customized activation function, if there
109 | is any. Otherwise, tf.keras.activations.get is called.
110 |
111 | Args:
112 | identifier: A string, name of the activation function.
113 |
114 | Returns:
115 | The specified activation function.
116 | """
117 | if isinstance(identifier, str):
118 | name_to_fn = {
119 | 'gelu': functools.partial(gelu, approximate=False),
120 | 'approximated_gelu': functools.partial(gelu, approximate=True),
121 | 'silu': swish,
122 | 'swish': swish,
123 | 'hard_swish': hard_swish,
124 | 'relu6': relu6,
125 | 'hard_sigmoid': hard_sigmoid,
126 | 'identity': identity,
127 | 'none': identity,
128 | }
129 | identifier = str(identifier).lower()
130 | if identifier in name_to_fn:
131 | return name_to_fn[identifier]
132 | return tf.keras.activations.get(identifier)
133 |
--------------------------------------------------------------------------------
/model/layers/activations_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for activations.py."""
17 | import tensorflow as tf
18 |
19 | from deeplab2.model.layers import activations
20 |
21 |
22 | class ActivationsTest(tf.test.TestCase):
23 |
24 | def test_gelu(self):
25 | expected_data = [[0.14967535, 0., -0.10032465],
26 | [-0.15880796, -0.04540223, 2.9963627]]
27 | gelu_data = activations.gelu([[.25, 0, -.25], [-1, -2, 3]],
28 | approximate=True)
29 | self.assertAllClose(expected_data, gelu_data)
30 | gelu_data_via_get_activation = activations.get_activation(
31 | 'approximated_gelu')([[.25, 0, -.25], [-1, -2, 3]])
32 | self.assertAllClose(expected_data, gelu_data_via_get_activation)
33 |
34 |
35 | if __name__ == '__main__':
36 | tf.test.main()
37 |
--------------------------------------------------------------------------------
/model/layers/axial_blocks_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for axial_blocks."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import axial_blocks
21 |
22 |
23 | class AxialBlocksTest(tf.test.TestCase):
24 |
25 | def test_conv_basic_block_correct_output_shape(self):
26 | layer = axial_blocks.AxialBlock(
27 | filters_list=[256, 256],
28 | strides=2)
29 | float_training_tensor = tf.constant(0.0, dtype=tf.float32)
30 | output = layer((tf.zeros([2, 65, 65, 32]),
31 | float_training_tensor))
32 | self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256])
33 |
34 | def test_conv_bottleneck_block_correct_output_shape(self):
35 | layer = axial_blocks.AxialBlock(
36 | filters_list=[64, 64, 256],
37 | strides=1)
38 | float_training_tensor = tf.constant(0.0, dtype=tf.float32)
39 | output = layer((tf.zeros([2, 65, 65, 32]),
40 | float_training_tensor))
41 | self.assertListEqual(output.get_shape().as_list(), [2, 65, 65, 256])
42 |
43 | def test_axial_block_correct_output_shape(self):
44 | layer = axial_blocks.AxialBlock(
45 | filters_list=[128, 64, 256],
46 | strides=2,
47 | attention_type='axial')
48 | float_training_tensor = tf.constant(0.0, dtype=tf.float32)
49 | output = layer((tf.zeros([2, 65, 65, 32]),
50 | float_training_tensor))
51 | self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256])
52 |
53 | if __name__ == '__main__':
54 | tf.test.main()
55 |
--------------------------------------------------------------------------------
/model/layers/axial_layers_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for axial_layers."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import axial_layers
21 |
22 |
23 | class AxialLayersTest(tf.test.TestCase):
24 |
25 | def test_default_axial_attention_layer_output_shape(self):
26 | layer = axial_layers.AxialAttention()
27 | output = layer(tf.zeros([10, 5, 32]))
28 | self.assertListEqual(output.get_shape().as_list(), [10, 5, 1024])
29 |
30 | def test_axial_attention_2d_layer_output_shape(self):
31 | layer = axial_layers.AxialAttention2D()
32 | output = layer(tf.zeros([2, 5, 5, 32]))
33 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024])
34 |
35 | def test_change_filters_output_shape(self):
36 | layer = axial_layers.AxialAttention2D(filters=32)
37 | output = layer(tf.zeros([2, 5, 5, 32]))
38 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 64])
39 |
40 | def test_value_expansion_output_shape(self):
41 | layer = axial_layers.AxialAttention2D(value_expansion=1)
42 | output = layer(tf.zeros([2, 5, 5, 32]))
43 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 512])
44 |
45 | def test_global_attention_output_shape(self):
46 | layer = axial_layers.GlobalAttention2D()
47 | output = layer(tf.zeros([2, 5, 5, 32]))
48 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024])
49 |
50 | def test_stride_two_output_shape(self):
51 | layer = axial_layers.AxialAttention2D(strides=2)
52 | output = layer(tf.zeros([2, 5, 5, 32]))
53 | self.assertListEqual(output.get_shape().as_list(), [2, 3, 3, 1024])
54 |
55 | if __name__ == '__main__':
56 | tf.test.main()
57 |
--------------------------------------------------------------------------------
/model/layers/blocks_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for blocks.py."""
17 | import tensorflow as tf
18 |
19 | from deeplab2.model.layers import blocks
20 |
21 |
22 | class BlocksTest(tf.test.TestCase):
23 |
24 | def test_inverted_bottleneck_block_output_shape(self):
25 | batch, height, width, input_channels = 2, 17, 17, 4
26 | output_channels = 6
27 | input_tensor = tf.random.uniform(
28 | shape=(batch, height, width, input_channels))
29 | ivb_block = blocks.InvertedBottleneckBlock(
30 | in_filters=input_channels,
31 | out_filters=output_channels,
32 | expand_ratio=2,
33 | strides=1,
34 | name='inverted_bottleneck',
35 | )
36 | output_tensor, _ = ivb_block(input_tensor)
37 | self.assertListEqual(output_tensor.get_shape().as_list(),
38 | [batch, height, width, output_channels])
39 |
40 | def test_inverted_bottleneck_block_feature_map_alignment(self):
41 | batch, height, width, input_channels = 2, 17, 17, 128
42 | output_channels = 256
43 | input_tensor = tf.random.uniform(
44 | shape=(batch, height, width, input_channels))
45 | ivb_block1 = blocks.InvertedBottleneckBlock(
46 | in_filters=input_channels,
47 | out_filters=output_channels,
48 | expand_ratio=2,
49 | strides=2,
50 | name='inverted_bottleneck1',
51 | )
52 | ivb_block1(input_tensor, False)
53 | weights = ivb_block1.get_weights()
54 | output_tensor, _ = ivb_block1(input_tensor, False)
55 |
56 | ivb_block2 = blocks.InvertedBottleneckBlock(
57 | in_filters=input_channels,
58 | out_filters=output_channels,
59 | expand_ratio=2,
60 | strides=1,
61 | name='inverted_bottleneck2',
62 | )
63 | ivb_block2(input_tensor, False)
64 | ivb_block2.set_weights(weights)
65 | expected = ivb_block2(input_tensor, False)[0][:, ::2, ::2, :]
66 |
67 | self.assertAllClose(ivb_block1.get_weights(), ivb_block2.get_weights(),
68 | atol=1e-4, rtol=1e-4)
69 | self.assertAllClose(output_tensor, expected, atol=1e-4, rtol=1e-4)
70 |
71 | if __name__ == '__main__':
72 | tf.test.main()
73 |
--------------------------------------------------------------------------------
/model/layers/drop_path_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Test for drop_path.py."""
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import drop_path
21 |
22 | # Set a fixed random seed.
23 | tf.random.set_seed(1)
24 |
25 |
26 | class DropPathTest(tf.test.TestCase):
27 |
28 | def test_drop_path_keep_prob_one(self):
29 | # Test drop_path_keep_prob = 1, where output should be equal to input.
30 | drop_path_keep_prob = 1.0
31 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
32 | layer_op = drop_path.DropPath(drop_path_keep_prob)
33 | output_tensor = layer_op(input_tensor, training=True)
34 | np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy())
35 |
36 | def test_not_training_mode(self):
37 | # Test not training mode, where output should be equal to input.
38 | drop_path_keep_prob = 0.8
39 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
40 | layer_op = drop_path.DropPath(drop_path_keep_prob)
41 | output_tensor = layer_op(input_tensor, training=False)
42 | np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy())
43 |
44 | def test_drop_path(self):
45 | drop_path_keep_prob = 0.8
46 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
47 | layer_op = drop_path.DropPath(drop_path_keep_prob)
48 | output_tensor = layer_op(input_tensor, training=True)
49 | self.assertFalse(np.array_equal(input_tensor.numpy(),
50 | output_tensor.numpy()))
51 |
52 | def test_constant_drop_path_schedule(self):
53 | keep_prob_for_last_stage = 0.8
54 | current_stage_keep_prob = drop_path.get_drop_path_keep_prob(
55 | keep_prob_for_last_stage,
56 | schedule='constant',
57 | current_stage=2,
58 | num_stages=5)
59 | self.assertEqual(current_stage_keep_prob, keep_prob_for_last_stage)
60 |
61 | def test_linear_drop_path_schedule(self):
62 | keep_prob_for_last_stage = 0.8
63 | current_stage_keep_prob = drop_path.get_drop_path_keep_prob(
64 | keep_prob_for_last_stage,
65 | schedule='linear',
66 | current_stage=1,
67 | num_stages=4)
68 | self.assertEqual(current_stage_keep_prob, 0.95)
69 |
70 | def test_unknown_drop_path_schedule(self):
71 | with self.assertRaises(ValueError):
72 | _ = drop_path.get_drop_path_keep_prob(0.8, 'unknown', 1, 4)
73 |
74 |
75 | if __name__ == '__main__':
76 | tf.test.main()
77 |
--------------------------------------------------------------------------------
/model/layers/moat_attention_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for moat_attention."""
17 |
18 | from absl import logging
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | from deeplab2.model.layers import moat_attention
23 |
24 |
25 | class MOATAttentionTest(tf.test.TestCase, parameterized.TestCase):
26 |
27 | def _log_param_specs(self, layer):
28 | num_params = sum([
29 | np.prod(var.get_shape().as_list()) for var in layer.trainable_weights
30 | ])
31 | format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format(
32 | max([len(v.name) for v in layer.trainable_weights]),
33 | max([len('{}'.format(v.get_shape())) for v in
34 | layer.trainable_weights]))
35 | format_str = ' >> ' + format_str + '\t{:>5.2f}%'
36 |
37 | for v in layer.trainable_weights:
38 | v_shape = v.get_shape().as_list()
39 | logging.info(format_str.format(v.name, '{}'.format(v_shape),
40 | np.prod(v_shape) / num_params * 100))
41 |
42 | @parameterized.named_parameters(
43 | ('attention', None),
44 | ('attention_with_relative_position_embedding', '2d_multi_head'),
45 | )
46 | def test_attention(self, relative_position_embedding_type):
47 | batch_size = 8
48 | height = 8
49 | width = 10
50 | hidden_size = 16
51 | head_size = 8
52 | query = tf.random.normal(shape=[batch_size, height, width, hidden_size],
53 | dtype=tf.float32)
54 |
55 | attention_layer = moat_attention.Attention(
56 | hidden_size=hidden_size,
57 | head_size=head_size,
58 | relative_position_embedding_type=relative_position_embedding_type)
59 | attention_output = attention_layer(query, training=True)
60 | self._log_param_specs(attention_layer)
61 |
62 | self.assertEqual(attention_output.shape.as_list(),
63 | [batch_size, height * width, hidden_size])
64 |
65 | if __name__ == '__main__':
66 | tf.test.main()
67 |
--------------------------------------------------------------------------------
/model/layers/moat_blocks_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for moat_blocks."""
17 |
18 | from absl import logging
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | from deeplab2.model.layers import moat_blocks
23 |
24 |
25 | class MOATBlocksTest(tf.test.TestCase, parameterized.TestCase):
26 |
27 | def _log_param_specs(self, layer):
28 | num_params = sum([
29 | np.prod(var.get_shape().as_list()) for var in layer.trainable_weights
30 | ])
31 | format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format(
32 | max([len(v.name) for v in layer.trainable_weights]),
33 | max([len('{}'.format(v.get_shape())) for v in
34 | layer.trainable_weights]))
35 | format_str = ' >> ' + format_str + '\t{:>5.2f}%'
36 |
37 | for v in layer.trainable_weights:
38 | v_shape = v.get_shape().as_list()
39 | logging.info(format_str.format(v.name, '{}'.format(v_shape),
40 | np.prod(v_shape) / num_params * 100))
41 |
42 | @parameterized.named_parameters(
43 | ('standard', 1),
44 | ('downsample', 2),
45 | )
46 | def test_mbconv_block(self, stride):
47 | batch_size = 8
48 | height, width = 8, 8
49 | input_size = 16
50 | hidden_size = input_size * stride
51 | inputs = tf.random.normal(shape=[batch_size, height, width, input_size],
52 | dtype=tf.float32)
53 | block = moat_blocks.MBConvBlock(hidden_size=hidden_size,
54 | block_stride=stride,)
55 | output = block(inputs, training=True)
56 | self._log_param_specs(block)
57 |
58 | self.assertEqual(output.shape.as_list(),
59 | [batch_size, height // stride, width // stride,
60 | hidden_size])
61 |
62 | @parameterized.named_parameters(
63 | ('standard', 1, False),
64 | ('downsample', 2, False),
65 | ('checkpointing', 1, True),
66 | )
67 | def test_moat_block(self, stride, use_checkpointing):
68 | batch_size = 8
69 | height, width = 8, 8
70 | input_size = 16
71 | hidden_size = input_size * stride
72 | inputs = tf.random.normal(shape=[batch_size, height, width, input_size],
73 | dtype=tf.float32)
74 | block = moat_blocks.MOATBlock(hidden_size=hidden_size,
75 | block_stride=stride,
76 | window_size=[height//stride, width//stride],
77 | use_checkpointing=use_checkpointing)
78 | output = block(inputs, training=True)
79 | self._log_param_specs(block)
80 |
81 | self.assertEqual(output.shape.as_list(),
82 | [batch_size, height // stride, width // stride,
83 | hidden_size])
84 |
85 |
86 | if __name__ == '__main__':
87 | tf.test.main()
88 |
--------------------------------------------------------------------------------
/model/layers/positional_encodings_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for positional_encodings."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import positional_encodings
21 |
22 |
23 | class PositionalEncodingsTest(tf.test.TestCase):
24 |
25 | def test_compute_relative_distance_matrix_output_shape(self):
26 | output = positional_encodings._compute_relative_distance_matrix(33, 97)
27 | self.assertListEqual(output.get_shape().as_list(), [33, 97])
28 |
29 | def test_relative_positional_encoding_output_shape(self):
30 | layer = positional_encodings.RelativePositionalEncoding(
31 | 33, 97, 32, 'rpe')
32 | output = layer(None)
33 | self.assertListEqual(output.get_shape().as_list(), [33, 97, 32])
34 |
35 | def test_add_absolute_positional_encoding_1d_output_shape(self):
36 | layer = positional_encodings.AddAbsolutePositionalEncoding(
37 | 'ape1d', positional_encoding_type='1d')
38 | shape = [2, 5, 5, 3]
39 | output = layer(tf.zeros(shape))
40 | self.assertEqual(len(layer.get_weights()), 10)
41 | self.assertListEqual(output.get_shape().as_list(), shape)
42 |
43 | def test_add_absolute_positional_encoding_2d_output_shape(self):
44 | layer = positional_encodings.AddAbsolutePositionalEncoding(
45 | 'ape2d', positional_encoding_type='2d')
46 | shape = [2, 5, 5, 3]
47 | output = layer(tf.zeros(shape))
48 | self.assertEqual(len(layer.get_weights()), 5)
49 | self.assertListEqual(output.get_shape().as_list(), shape)
50 |
51 | def test_add_absolute_positional_encoding_none_output_shape(self):
52 | layer = positional_encodings.AddAbsolutePositionalEncoding(
53 | 'none', positional_encoding_type='none')
54 | shape = [2, 5, 5, 3]
55 | output = layer(tf.zeros(shape))
56 | self.assertEqual(len(layer.get_weights()), 0)
57 | self.assertListEqual(output.get_shape().as_list(), shape)
58 |
59 | if __name__ == '__main__':
60 | tf.test.main()
61 |
--------------------------------------------------------------------------------
/model/layers/resized_fuse_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for resized_fuse."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import resized_fuse
21 |
22 |
23 | class ResizedFuseTest(tf.test.TestCase):
24 |
25 | def test_resize_and_fuse_features(self):
26 | batch, height, width, channels = 2, 11, 11, 6
27 | smaller_height, smaller_width, smaller_channels = 6, 6, 3
28 | larger_height1, larger_width1 = 21, 21 # Stride 2 conv.
29 | larger_height2, larger_width2 = 22, 22 # Stride 2 conv.
30 | larger_height3, larger_width3 = 23, 23 # Conv and resize.
31 |
32 | feature_list = []
33 | feature_list.append(tf.zeros([batch, smaller_height, smaller_width,
34 | smaller_channels]))
35 | feature_list.append(tf.zeros([batch, smaller_height, smaller_width,
36 | channels]))
37 | feature_list.append(tf.zeros([batch, height, width, smaller_channels]))
38 | feature_list.append(tf.zeros([batch, height, width, channels]))
39 | feature_list.append(tf.zeros([batch, larger_height1, larger_width1,
40 | channels]))
41 | feature_list.append(tf.zeros([batch, larger_height1, larger_width1,
42 | smaller_channels]))
43 | feature_list.append(tf.zeros([batch, larger_height2, larger_width2,
44 | smaller_channels]))
45 | feature_list.append(tf.zeros([batch, larger_height3, larger_width3,
46 | smaller_channels]))
47 | layer = resized_fuse.ResizedFuse(name='fuse',
48 | height=height,
49 | width=width,
50 | num_channels=channels)
51 | output = layer(feature_list)
52 | self.assertEqual(output.get_shape().as_list(), [batch, height, width,
53 | channels])
54 |
55 | if __name__ == '__main__':
56 | tf.test.main()
57 |
--------------------------------------------------------------------------------
/model/layers/squeeze_and_excite_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for squeeze_and_excite.py."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.layers import squeeze_and_excite
21 |
22 |
23 | class SqueezeAndExciteTest(tf.test.TestCase):
24 |
25 | def test_simpliefied_squeeze_and_excite_input_output_shape(self):
26 | # Test the shape of input and output of SimplifiedSqueezeAndExcite.
27 | channels = 32
28 | input_tensor = tf.random.uniform(shape=(3, 65, 65, channels))
29 | layer_op = squeeze_and_excite.SimplifiedSqueezeAndExcite(
30 | channels)
31 | output_tensor = layer_op(input_tensor)
32 | self.assertListEqual(input_tensor.get_shape().as_list(),
33 | output_tensor.get_shape().as_list())
34 |
35 | def test_squeeze_and_excite_input_output_shape(self):
36 | # Test the shape of input and output of SqueezeAndExcite.
37 | channels = 32
38 | input_tensor = tf.random.uniform(shape=(3, 65, 65, channels))
39 | layer_op = squeeze_and_excite.SqueezeAndExcite(
40 | in_filters=channels,
41 | out_filters=channels,
42 | se_ratio=8,
43 | name='se')
44 | output_tensor = layer_op(input_tensor)
45 | self.assertListEqual(input_tensor.get_shape().as_list(),
46 | output_tensor.get_shape().as_list())
47 |
48 |
49 | if __name__ == '__main__':
50 | tf.test.main()
51 |
--------------------------------------------------------------------------------
/model/layers/stems.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This script contains STEMs for neural networks.
17 |
18 | The `STEM` is defined as the first few convolutions that process the input
19 | image to a spatially smaller feature map (e.g., output stride = 2).
20 |
21 |
22 | Reference code:
23 | https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
24 | """
25 | import tensorflow as tf
26 |
27 | from deeplab2.model.layers import convolutions
28 |
29 | layers = tf.keras.layers
30 |
31 |
32 | class InceptionSTEM(tf.keras.layers.Layer):
33 | """A InceptionSTEM layer.
34 |
35 | This class builds an InceptionSTEM layer which can be used to as the first
36 | few layers in a neural network. In particular, InceptionSTEM contains three
37 | consecutive 3x3 colutions.
38 |
39 | Reference:
40 | - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, and Alexander Alemi.
41 | "Inception-v4, inception-resnet and the impact of residual connections on
42 | learning." In AAAI, 2017.
43 | """
44 |
45 | def __init__(self,
46 | bn_layer=tf.keras.layers.BatchNormalization,
47 | width_multiplier=1.0,
48 | conv_kernel_weight_decay=0.0,
49 | activation='relu'):
50 | """Creates the InceptionSTEM layer.
51 |
52 | Args:
53 | bn_layer: An optional tf.keras.layers.Layer that computes the
54 | normalization (default: tf.keras.layers.BatchNormalization).
55 | width_multiplier: A float multiplier, controlling the value of
56 | convolution output channels.
57 | conv_kernel_weight_decay: A float, the weight decay for convolution
58 | kernels.
59 | activation: A string specifying an activation function to be used in this
60 | stem.
61 | """
62 | super(InceptionSTEM, self).__init__(name='stem')
63 |
64 | self._conv1_bn_act = convolutions.Conv2DSame(
65 | output_channels=int(64 * width_multiplier),
66 | kernel_size=3,
67 | name='conv1_bn_act',
68 | strides=2,
69 | use_bias=False,
70 | use_bn=True,
71 | bn_layer=bn_layer,
72 | activation=activation,
73 | conv_kernel_weight_decay=conv_kernel_weight_decay)
74 |
75 | self._conv2_bn_act = convolutions.Conv2DSame(
76 | output_channels=int(64 * width_multiplier),
77 | kernel_size=3,
78 | name='conv2_bn_act',
79 | strides=1,
80 | use_bias=False,
81 | use_bn=True,
82 | bn_layer=bn_layer,
83 | activation=activation,
84 | conv_kernel_weight_decay=conv_kernel_weight_decay)
85 |
86 | self._conv3_bn = convolutions.Conv2DSame(
87 | output_channels=int(128 * width_multiplier),
88 | kernel_size=3,
89 | strides=1,
90 | use_bias=False,
91 | use_bn=True,
92 | bn_layer=bn_layer,
93 | activation='none',
94 | name='conv3_bn',
95 | conv_kernel_weight_decay=conv_kernel_weight_decay)
96 |
97 | def call(self, input_tensor, training=False):
98 | """Performs a forward pass.
99 |
100 | Args:
101 | input_tensor: An input tensor of type tf.Tensor with shape [batch, height,
102 | width, channels].
103 | training: A boolean flag indicating whether training behavior should be
104 | used (default: False).
105 |
106 | Returns:
107 | Two output tensors. The first output tensor is not activated. The second
108 | tensor is activated.
109 | """
110 | x = self._conv1_bn_act(input_tensor, training=training)
111 | x = self._conv2_bn_act(x, training=training)
112 | x = self._conv3_bn(x, training=training)
113 | return x
114 |
--------------------------------------------------------------------------------
/model/layers/stems_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for resnet_utils."""
17 | import tensorflow as tf
18 |
19 | from deeplab2.model.layers import stems
20 | from deeplab2.utils import test_utils
21 |
22 |
23 | class ResnetUtilsTest(tf.test.TestCase):
24 |
25 | def test_inception_stem_output_shape(self):
26 | batch = 2
27 | height, width = 65, 65
28 | input_tensor = test_utils.create_test_input(batch, height, width, 3)
29 | model = stems.InceptionSTEM()
30 | output_tensor = model(input_tensor)
31 | expected_height = (height - 1) / 2 + 1
32 | expected_width = (width - 1) / 2 + 1
33 | expected_channels = 128
34 | self.assertListEqual(
35 | output_tensor.get_shape().as_list(),
36 | [batch, expected_height, expected_width, expected_channels])
37 |
38 |
39 | if __name__ == '__main__':
40 | tf.test.main()
41 |
--------------------------------------------------------------------------------
/model/pixel_decoder/kmax_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for kMaX pixel decoder."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.pixel_decoder import kmax
21 |
22 |
23 | class KMaXPixelDecoderTest(tf.test.TestCase):
24 |
25 | def test_model_output_shape(self):
26 | model = kmax.KMaXPixelDecoder(name='kmax_pixel_decoder')
27 | output = model({
28 | 'stage1': tf.keras.Input(shape=(321, 321, 64)),
29 | 'stage2': tf.keras.Input(shape=(161, 161, 128)),
30 | 'stage3': tf.keras.Input(shape=(81, 81, 256)),
31 | 'stage4': tf.keras.Input(shape=(41, 41, 512)),
32 | 'stage5': tf.keras.Input(shape=(21, 21, 1024)),
33 | })
34 |
35 | self.assertListEqual(output['decoder_stage1'].get_shape().as_list(),
36 | [None, 21, 21, 2048])
37 | self.assertListEqual(output['decoder_stage2'].get_shape().as_list(),
38 | [None, 41, 41, 1024])
39 | self.assertListEqual(output['decoder_stage3'].get_shape().as_list(),
40 | [None, 81, 81, 512])
41 | self.assertListEqual(output['decoder_output'].get_shape().as_list(),
42 | [None, 161, 161, 256])
43 |
44 |
45 | if __name__ == '__main__':
46 | tf.test.main()
47 |
--------------------------------------------------------------------------------
/model/pixel_encoder/axial_resnet.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Implements ResNets[1] and Axial-ResNets [2, 3] as pixel encoders.
17 |
18 | [1] Deep residual learning for image recognition.
19 | CVPR 2016.
20 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
21 |
22 | [2] Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
23 | ECCV 2020.
24 | Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
25 | Liang-Chieh Chen.
26 |
27 | [3] MaX-DeepLab: End-to-End Panoptic Segmentation with Mask Transformers,
28 | CVPR 2021.
29 | Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
30 | """
31 |
32 | import functools
33 |
34 | import tensorflow as tf
35 |
36 | from deeplab2.model.encoder import axial_resnet
37 |
38 | resnet50 = functools.partial(
39 | axial_resnet.AxialResNet,
40 | output_stride=32,
41 | classification_mode=True,
42 | backbone_type="resnet",
43 | use_axial_beyond_stride=0,
44 | backbone_use_transformer_beyond_stride=0,
45 | activation="relu")
46 |
47 | # This is the same backbone as MaX-S, which uses Inception Stem and
48 | # incorporates Axial-Attention in the last two stages of ResNet-50.
49 | axial_resnet50 = functools.partial(
50 | axial_resnet.AxialResNet,
51 | output_stride=32,
52 | classification_mode=True,
53 | backbone_type="resnet_beta",
54 | use_axial_beyond_stride=16,
55 | backbone_use_transformer_beyond_stride=0,
56 | activation="gelu")
57 |
58 |
59 | def get_model(model_name, input_shape, drop_path_keep_prob=1.0, **kwargs):
60 | """Gets an (Axial-)ResNet model."""
61 | block_group_config = {
62 | "drop_path_schedule": "linear",
63 | "drop_path_keep_prob": drop_path_keep_prob
64 | }
65 | model_name = model_name.lower()
66 | if model_name == "resnet50":
67 | model = resnet50(
68 | name=model_name, block_group_config=block_group_config, **kwargs)
69 | elif model_name == "axial_resnet50":
70 | model = axial_resnet50(
71 | name=model_name, block_group_config=block_group_config, **kwargs)
72 | else:
73 | raise ValueError("Unsupported backbone %s!" % model_name)
74 |
75 | # Build the model.
76 | model(tf.keras.Input(shape=input_shape))
77 |
78 | return model
79 |
--------------------------------------------------------------------------------
/model/pixel_encoder/axial_resnet_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for (Axial-)ResNets."""
17 |
18 |
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 |
23 | from deeplab2.model.pixel_encoder import axial_resnet
24 |
25 |
26 | class AxialResNetTest(tf.test.TestCase, parameterized.TestCase):
27 |
28 | # The parameter count does not include the classification head.
29 | @parameterized.parameters(
30 | ('resnet50', 23508032),
31 | ('axial_resnet50', 41343424),
32 | )
33 | def test_model_output_shape_and_num_params(self, model_name,
34 | expected_num_params):
35 | model = axial_resnet.get_model(model_name,
36 | input_shape=(224, 224, 3))
37 | output = model(tf.keras.Input(shape=(224, 224, 3)))
38 |
39 | if model_name == 'resnet50':
40 | dims = [64, 256, 512, 1024, 2048]
41 | elif model_name == 'axial_resnet50':
42 | dims = [128, 256, 512, 1024, 2048]
43 |
44 | self.assertListEqual(output['stage1'].get_shape().as_list(),
45 | [None, 112, 112, dims[0]])
46 | self.assertListEqual(output['stage2'].get_shape().as_list(),
47 | [None, 56, 56, dims[1]])
48 | self.assertListEqual(output['stage3'].get_shape().as_list(),
49 | [None, 28, 28, dims[2]])
50 | self.assertListEqual(output['stage4'].get_shape().as_list(),
51 | [None, 14, 14, dims[3]])
52 | self.assertListEqual(output['stage5'].get_shape().as_list(),
53 | [None, 7, 7, dims[4]])
54 |
55 | num_params = np.sum(
56 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
57 | self.assertEqual(num_params, expected_num_params)
58 |
59 |
60 | if __name__ == '__main__':
61 | tf.test.main()
62 |
--------------------------------------------------------------------------------
/model/pixel_encoder/convnext_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for ConvNeXt."""
17 |
18 |
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 |
23 | from deeplab2.model.pixel_encoder import convnext
24 |
25 |
26 | class ConvNeXtTest(tf.test.TestCase, parameterized.TestCase):
27 |
28 | # The parameter count does not include the classification head.
29 | @parameterized.parameters(
30 | ('convnext_tiny', 27818592),
31 | ('convnext_small', 49453152),
32 | ('convnext_base', 87564416),
33 | ('convnext_large', 196227264),
34 | ('convnext_xlarge', 348143872),
35 | )
36 | def test_model_output_shape_and_num_params(self, model_name,
37 | expected_num_params):
38 | model = convnext.get_model(model_name,
39 | input_shape=(224, 224, 3))
40 | output = model(tf.keras.Input(shape=(224, 224, 3)))
41 |
42 | if model_name.lower() in ['convnext_tiny', 'convnext_small']:
43 | dims = [96, 192, 384, 768]
44 | elif model_name.lower() in ['convnext_base',]:
45 | dims = [128, 256, 512, 1024]
46 | elif model_name.lower() in ['convnext_large',]:
47 | dims = [192, 384, 768, 1536]
48 | elif model_name.lower() in ['convnext_xlarge',]:
49 | dims = [256, 512, 1024, 2048]
50 |
51 | self.assertListEqual(output['stage1'].get_shape().as_list(),
52 | [None, 56, 56, dims[0]])
53 | self.assertListEqual(output['stage2'].get_shape().as_list(),
54 | [None, 56, 56, dims[0]])
55 | self.assertListEqual(output['stage3'].get_shape().as_list(),
56 | [None, 28, 28, dims[1]])
57 | self.assertListEqual(output['stage4'].get_shape().as_list(),
58 | [None, 14, 14, dims[2]])
59 | self.assertListEqual(output['stage5'].get_shape().as_list(),
60 | [None, 7, 7, dims[3]])
61 |
62 | num_params = np.sum(
63 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
64 | self.assertEqual(num_params, expected_num_params)
65 |
66 | @parameterized.parameters(
67 | ('convnext_tiny', 224, 4383527995),
68 | ('convnext_small', 224, 8563618819),
69 | ('convnext_base', 224, 15194596739),
70 | ('convnext_large', 224, 34121222275),
71 | ('convnext_xlarge', 224, 60600740739),
72 | )
73 | def test_model_flops(self,
74 | model_name,
75 | input_resolution,
76 | expected_multiply_adds):
77 | input_shape = [1, input_resolution, input_resolution, 3]
78 | model = convnext.get_model(model_name,
79 | input_shape=input_shape[1:])
80 | model(tf.keras.Input(shape=input_shape[1:]))
81 |
82 | forward_pass = tf.function(
83 | model.call,
84 | input_signature=[tf.TensorSpec(shape=input_shape)])
85 |
86 | graph_info = tf.compat.v1.profiler.profile(
87 | forward_pass.get_concrete_function().graph,
88 | options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation())
89 | multiply_adds = graph_info.total_float_ops // 2
90 | self.assertEqual(multiply_adds, expected_multiply_adds)
91 |
92 | if __name__ == '__main__':
93 | tf.test.main()
94 |
--------------------------------------------------------------------------------
/model/post_processor/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/model/post_processor/post_processor_builder.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains a post-processor builder used in the DeepLab model."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2 import common
21 | from deeplab2 import config_pb2
22 | from deeplab2.data import dataset
23 | from deeplab2.model import utils
24 | from deeplab2.model.post_processor import max_deeplab
25 | from deeplab2.model.post_processor import panoptic_deeplab
26 |
27 |
28 | def get_post_processor(
29 | config: config_pb2.ExperimentOptions,
30 | dataset_descriptor: dataset.DatasetDescriptor) -> tf.keras.layers.Layer:
31 | """Initializes a DeepLab post-processor.
32 |
33 | Args:
34 | config: A config_pb2.ExperimentOptions configuration.
35 | dataset_descriptor: A dataset.DatasetDescriptor.
36 |
37 | Returns:
38 | PostProcessor: A post-processor depending on the configuration.
39 | """
40 | supported_tasks = utils.get_supported_tasks(config)
41 | if config.model_options.WhichOneof('meta_architecture') == 'max_deeplab':
42 | return max_deeplab.PostProcessor(config, dataset_descriptor)
43 | if common.TASK_PANOPTIC_SEGMENTATION in supported_tasks:
44 | return panoptic_deeplab.PostProcessor(config, dataset_descriptor)
45 | return panoptic_deeplab.SemanticOnlyPostProcessor()
46 |
--------------------------------------------------------------------------------
/model/post_processor/post_processor_builder_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for post_processor_builder.py."""
17 |
18 | import tensorflow as tf
19 |
20 | from google.protobuf import text_format
21 | from deeplab2 import common
22 | from deeplab2 import config_pb2
23 | from deeplab2.data import dataset
24 | from deeplab2.model.post_processor import post_processor_builder
25 |
26 |
27 | class EvaluatorTest(tf.test.TestCase):
28 |
29 | def test_evaluates_panoptic_deeplab_model(self):
30 | experiment_options_textproto = """
31 | experiment_name: "evaluation_test"
32 | eval_dataset_options {
33 | dataset: "cityscapes_panoptic"
34 | file_pattern: "EMPTY"
35 | batch_size: 1
36 | crop_size: 1025
37 | crop_size: 2049
38 | # Skip resizing.
39 | min_resize_value: 0
40 | max_resize_value: 0
41 | }
42 | evaluator_options {
43 | continuous_eval_timeout: -1
44 | stuff_area_limit: 2048
45 | center_score_threshold: 0.1
46 | nms_kernel: 13
47 | save_predictions: true
48 | save_raw_predictions: false
49 | }
50 | """
51 | config = text_format.Parse(experiment_options_textproto,
52 | config_pb2.ExperimentOptions())
53 | config.model_options.panoptic_deeplab.instance.enable = True
54 | post_processor = post_processor_builder.get_post_processor(
55 | config, dataset.CITYSCAPES_PANOPTIC_INFORMATION)
56 |
57 | result_dict = {
58 | common.PRED_SEMANTIC_PROBS_KEY:
59 | tf.zeros([1, 1025, 2049, 19], dtype=tf.float32),
60 | common.PRED_CENTER_HEATMAP_KEY:
61 | tf.zeros([1, 1025, 2049, 1], dtype=tf.float32),
62 | common.PRED_OFFSET_MAP_KEY:
63 | tf.zeros([1, 1025, 2049, 2], dtype=tf.float32)
64 | }
65 | processed_dict = post_processor(result_dict)
66 | expected_keys = {
67 | common.PRED_PANOPTIC_KEY,
68 | common.PRED_SEMANTIC_KEY,
69 | common.PRED_INSTANCE_KEY,
70 | common.PRED_INSTANCE_CENTER_KEY,
71 | common.PRED_INSTANCE_SCORES_KEY
72 | }
73 | self.assertCountEqual(processed_dict.keys(), expected_keys)
74 |
75 |
76 | if __name__ == '__main__':
77 | tf.test.main()
78 |
--------------------------------------------------------------------------------
/model/post_processor/vip_deeplab_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Test for vip_deeplab.py."""
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 | from deeplab2.model.post_processor import vip_deeplab
21 |
22 |
23 | class PostProcessingTest(tf.test.TestCase):
24 |
25 | def test_stitch_video_panoptic_prediction(self):
26 | concat_semantic = np.array(
27 | [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
28 | dtype=np.int32)
29 | concat_instance = np.array(
30 | [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
31 | dtype=np.int32)
32 | next_semantic = np.array(
33 | [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
34 | dtype=np.int32)
35 | next_instance = np.array(
36 | [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]],
37 | dtype=np.int32)
38 | label_divisor = 1000
39 | concat_panoptic = concat_semantic * label_divisor + concat_instance
40 | next_panoptic = next_semantic * label_divisor + next_instance
41 | new_panoptic = vip_deeplab.stitch_video_panoptic_prediction(
42 | concat_panoptic, next_panoptic, label_divisor)
43 | # The expected instance is manually computed. It should receive the IDs
44 | # propagated from concat_instance by IoU matching between concat_panoptic
45 | # and next_panoptic.
46 | expected_semantic = next_semantic
47 | expected_instance = np.array(
48 | [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
49 | dtype=np.int32)
50 | expected_panoptic = expected_semantic * label_divisor + expected_instance
51 | np.testing.assert_array_equal(expected_panoptic, new_panoptic)
52 |
53 | def test_tf_video_panoptic_prediction_stitcher(self):
54 | concat_semantic = np.array(
55 | [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
56 | dtype=np.int32)
57 | concat_instance = np.array(
58 | [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
59 | dtype=np.int32)
60 | next_semantic = np.array(
61 | [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
62 | dtype=np.int32)
63 | next_instance = np.array(
64 | [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]],
65 | dtype=np.int32)
66 | label_divisor = 1000
67 | concat_panoptic = concat_semantic * label_divisor + concat_instance
68 | next_panoptic = next_semantic * label_divisor + next_instance
69 | stitcher = vip_deeplab.VideoPanopticPredictionStitcher(label_divisor)
70 | new_panoptic = stitcher(
71 | tf.convert_to_tensor(concat_panoptic),
72 | tf.convert_to_tensor(next_panoptic)).numpy()
73 | # The expected instance is manually computed. It should receive the IDs
74 | # propagated from concat_instance by IoU matching between concat_panoptic
75 | # and next_panoptic.
76 | expected_semantic = next_semantic
77 | expected_instance = np.array(
78 | [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
79 | dtype=np.int32)
80 | expected_panoptic = expected_semantic * label_divisor + expected_instance
81 | np.testing.assert_array_equal(expected_panoptic, new_panoptic)
82 |
83 |
84 | if __name__ == '__main__':
85 | tf.test.main()
86 |
--------------------------------------------------------------------------------
/model/test_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains utility functions for the model tests."""
17 | import numpy as np
18 | import tensorflow as tf
19 |
20 |
21 | def create_test_input(batch, height, width, channels):
22 | """Creates test input tensor."""
23 | input_tensor = np.tile(
24 | np.reshape(
25 | np.reshape(np.arange(height), [height, 1]) +
26 | np.reshape(np.arange(width), [1, width]),
27 | [1, height, width, 1]),
28 | [batch, 1, 1, channels])
29 | # Normalize the input tensor so that the outputs are not too large.
30 | input_tensor = (input_tensor * 2 / np.max(input_tensor)) - 1
31 | return tf.cast(input_tensor, tf.float32)
32 |
--------------------------------------------------------------------------------
/model/test_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for test_utils."""
17 |
18 | import tensorflow as tf
19 |
20 | from deeplab2.model import test_utils
21 |
22 |
23 | class TestUtilsTest(tf.test.TestCase):
24 |
25 | def test_create_test_input(self):
26 | input_shape = [1, 2, 3, 4]
27 | input_tensor = test_utils.create_test_input(*input_shape)
28 | self.assertListEqual(input_tensor.get_shape().as_list(), input_shape)
29 |
30 |
31 | if __name__ == '__main__':
32 | tf.test.main()
33 |
--------------------------------------------------------------------------------
/model/transformer_decoder/kmax_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for kMaX transformer decoder."""
17 |
18 | import functools
19 | import os
20 |
21 | import tensorflow as tf
22 |
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.data import dataset
26 | from deeplab2.model import builder
27 | from deeplab2.model.transformer_decoder import kmax
28 | # resources dependency
29 |
30 |
31 | class KMaXTransformerDecoderTest(tf.test.TestCase):
32 |
33 | def test_model_output_shape(self):
34 | config_path = 'deeplab2/configs/example'
35 | def _read_proto_file(filename, proto):
36 | filename = filename # OSS: removed internal filename loading.
37 | with tf.io.gfile.GFile(filename, 'r') as proto_file:
38 | return text_format.ParseLines(proto_file, proto)
39 | proto_filename = os.path.join(config_path,
40 | 'example_coco_max_deeplab.textproto')
41 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
42 | dataset_descriptor = dataset.MAP_NAME_TO_DATASET_INFO['coco_panoptic']
43 | auxiliary_predictor_func = functools.partial(
44 | builder.create_decoder,
45 | model_options=config.model_options,
46 | bn_layer=tf.keras.layers.BatchNormalization,
47 | ignore_label=dataset_descriptor.ignore_label,
48 | use_auxiliary_semantic_head=False)
49 |
50 | model = kmax.KMaXTransformerDecoder(
51 | name='kmax_pixel_decoder',
52 | auxiliary_predictor_func=auxiliary_predictor_func)
53 | output = model({
54 | 'stage1': tf.keras.Input(shape=(321, 321, 64)),
55 | 'stage2': tf.keras.Input(shape=(161, 161, 128)),
56 | 'stage3': tf.keras.Input(shape=(81, 81, 256)),
57 | 'stage4': tf.keras.Input(shape=(41, 41, 512)),
58 | 'stage5': tf.keras.Input(shape=(21, 21, 1024)),
59 | 'decoder_stage1': tf.keras.Input(shape=(21, 21, 2048)),
60 | 'decoder_stage2': tf.keras.Input(shape=(41, 41, 1024)),
61 | 'decoder_stage3': tf.keras.Input(shape=(81, 81, 512)),
62 | 'decoder_output': tf.keras.Input(shape=(161, 161, 256)),
63 | })
64 |
65 | self.assertListEqual(
66 | output['transformer_class_feature'].get_shape().as_list(),
67 | [None, 128, 256])
68 | self.assertListEqual(
69 | output['transformer_mask_feature'].get_shape().as_list(),
70 | [None, 128, 256])
71 | self.assertListEqual(output['feature_panoptic'].get_shape().as_list(),
72 | [None, 161, 161, 256])
73 | self.assertListEqual(output['feature_semantic'].get_shape().as_list(),
74 | [None, 21, 21, 1024])
75 |
76 |
77 | if __name__ == '__main__':
78 | tf.test.main()
79 |
--------------------------------------------------------------------------------
/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2023 The Deeplab2 Authors.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #include /*third_party*/"tensorflow/core/framework/op.h"
16 | #include /*third_party*/"tensorflow/core/framework/shape_inference.h"
17 |
18 | namespace tensorflow_models {
19 | namespace deeplab {
20 | namespace deeplab2 {
21 |
22 | using tensorflow::shape_inference::DimensionHandle;
23 | using tensorflow::shape_inference::InferenceContext;
24 | using tensorflow::shape_inference::ShapeHandle;
25 |
26 | REGISTER_OP("MergeSemanticAndInstanceMaps")
27 | .Input("semantic_maps: int32")
28 | .Input("instance_maps: int32")
29 | .Input("thing_ids: int32")
30 | .Attr("label_divisor: int = 256")
31 | .Attr("stuff_area_limit: int = 0")
32 | .Attr("void_label: int = 0")
33 | .Output("parsing_maps: int32")
34 | .SetShapeFn([](InferenceContext* c) {
35 | ShapeHandle semantic_maps;
36 | ShapeHandle instance_maps;
37 | ShapeHandle thing_ids;
38 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &semantic_maps));
39 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &instance_maps));
40 | TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &thing_ids));
41 | DimensionHandle batch = c->Dim(semantic_maps, 0);
42 | DimensionHandle height = c->Dim(semantic_maps, 1);
43 | DimensionHandle width = c->Dim(semantic_maps, 2);
44 | c->set_output(0, c->MakeShape({batch, height, width}));
45 | return tensorflow::OkStatus();
46 | })
47 | .Doc(R"doc(
48 | Generates parsing maps from semantic maps and instance maps.
49 |
50 | Parsing maps, or panoptic segmentation, are merged from the predicted semantic
51 | maps and class-agnostic instance maps. This function merges the maps in the
52 | following way:
53 |
54 | 1) If a pixel belongs to `stuff` class (e.g., sky), the function directly uses
55 | the semantic label from the semantic map and uses 0 as the instance label.
56 | 2) If a pixel belongs to `thing` class (e.g., person), it uses the instance
57 | label from the instance map and uses the majority of the semantic labels of
58 | the same instance as the final semantic label.
59 | 3) The function relabels each instance, so that the instance label of each
60 | semantic class is in the range of [1, num_instances_of_the_semantic_class].
61 |
62 | Note that this operation is first poposed in the DeeperLab paper and adopted
63 | by the Panoptic-DeepLab framework.
64 | - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093.
65 | - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020.
66 |
67 | semantic_maps: An int32 Tensor with shape `[batch, height, width]` whose value
68 | indicates the predicted semantic label of each pixel.
69 | instance_maps: An int32 Tensor with shape `[batch, height, width]` whose value
70 | indicates the predicted instance label of each pixel.
71 | thing_ids: An int32 Tensor with shape `[num_thing_ids]` whose value refers to
72 | the semantic ids of the thing classes.
73 | label_divisor: An integer. The value used to combine the semantic and instance
74 | map to generate the parsing map. In particular, the value of a pixel in the
75 | parsing map is equal to its corresponding semantic label times label_divisor
76 | plus instance label (i.e., semantic_label * label_divisor + instance_label).
77 | stuff_area_limit: An integer. Predicted stuff segments whose areas are smaller
78 | than this threshold are assigned to VOID label.
79 | void_label: An integer, specifying the VOID label.
80 | parsing_maps: An int32 Tensor with shape `[batch, height, width]` whose value
81 | indicates the merged semantic and instance label of each pixel.
82 | )doc");
83 |
84 | } // namespace deeplab2
85 | } // namespace deeplab
86 | } // namespace tensorflow_models
87 |
--------------------------------------------------------------------------------
/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op_kernel.h:
--------------------------------------------------------------------------------
1 | // Copyright 2023 The Deeplab2 Authors.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #ifndef DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
16 | #define DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
17 | #include
18 |
19 | #include
20 |
21 | #include /*third_party*/"tensorflow/core/framework/numeric_types.h"
22 | #include /*third_party*/"tensorflow/core/framework/op_kernel.h"
23 | #include /*third_party*/"tensorflow/core/framework/tensor.h"
24 | #include /*third_party*/"tensorflow/core/framework/tensor_types.h"
25 |
26 | namespace tensorflow_models {
27 | namespace deeplab {
28 | namespace deeplab2 {
29 | namespace functor {
30 |
31 | template
32 | struct MergeSemanticAndInstanceMaps {
33 | // Functor that merges semantic and instance maps.
34 | void operator()(
35 | const Device& d,
36 | typename tensorflow::TTypes::ConstTensor semantic_maps,
37 | typename tensorflow::TTypes::ConstTensor instance_maps,
38 | const std::unordered_set& thing_ids_set, int label_divisor,
39 | int stuff_area_limit, int void_label,
40 | typename tensorflow::TTypes::Tensor parsing_maps);
41 | };
42 |
43 | // Helper method to convert a list of thing IDs into hashset.
44 | template
45 | std::unordered_set Convert1DInt32TensorToSet(
46 | const Device& d, const tensorflow::Tensor& tensor);
47 |
48 | } // namespace functor
49 | } // namespace deeplab2
50 | } // namespace deeplab
51 | } // namespace tensorflow_models
52 |
53 | #endif // DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
54 |
--------------------------------------------------------------------------------
/tensorflow_ops/python/kernel_tests/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/tensorflow_ops/python/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/tensorflow_ops/python/ops/merge_semantic_and_instance_maps_op.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Utility function for the C++ TensorFlow MergeSemanticAndInstanceMaps op."""
17 |
18 | import tensorflow as tf
19 |
20 | # Make the TensorFlow MergeSemanticAndInstanceMaps op accessible by importing
21 | # merge_semantic_and_instance_maps_op.py.
22 | from tensorflow.python.framework import load_library
23 | from tensorflow.python.platform import resource_loader
24 | gen_merge_semantic_and_instance_maps_op = load_library.load_op_library(resource_loader.get_path_to_datafile('../../kernels/merge_semantic_and_instance_maps_op.so'))
25 |
26 | merge_semantic_and_instance_maps = gen_merge_semantic_and_instance_maps_op.merge_semantic_and_instance_maps
27 |
28 | tf.no_gradient('MergeSemanticAndInstanceMaps')
29 |
--------------------------------------------------------------------------------
/tracker/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/tracker/optical_flow_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Utility functions for optical flow."""
17 | import cv2
18 | import numpy as np
19 |
20 |
21 | def warp_flow(img: np.ndarray, flow_tensor: np.ndarray) -> np.ndarray:
22 | flow = flow_tensor.copy()
23 | h, w = flow.shape[:2]
24 | flow[..., 0] += np.arange(w)
25 | flow[..., 1] += np.arange(h)[:, np.newaxis]
26 | res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
27 | return res
28 |
29 |
30 | def remove_occlusions(warped_binary_img: np.ndarray,
31 | occlusion_map: np.ndarray) -> np.ndarray:
32 | return warped_binary_img.astype(bool) & (1 - occlusion_map).astype(bool)
33 |
--------------------------------------------------------------------------------
/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/trainer/distribution_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains helper functions to run training in a distributed way."""
17 |
18 | from typing import Text, Optional
19 |
20 | import tensorflow as tf
21 |
22 |
23 | def tpu_initialize(tpu_address: Text):
24 | """Initializes TPU for TF 2.x training.
25 |
26 | Args:
27 | tpu_address: string, bns address of master TPU worker.
28 |
29 | Returns:
30 | A TPUClusterResolver.
31 | """
32 | cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
33 | tpu=tpu_address)
34 | if tpu_address not in ('', 'local'):
35 | tf.config.experimental_connect_to_cluster(cluster_resolver)
36 | tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
37 | return cluster_resolver
38 |
39 |
40 | def create_strategy(tpu_address: Optional[Text],
41 | num_gpus: int = 0) -> tf.distribute.Strategy:
42 | """Creates a strategy based on the given parameters.
43 |
44 | The strategies are created based on the following criteria and order:
45 | 1. If A tpu_address is not None, a TPUStrategy is used.
46 | 2. If num_gpus > 1, a MirrorStrategy is used which replicates the model on
47 | each GPU.
48 | 3. If num_gpus == 1, a OneDevice strategy is used on the GPU.
49 | 4. If num_gpus == 0, a OneDevice strategy is used on the CPU.
50 |
51 | Args:
52 | tpu_address: The optional name or address of the TPU to connect to or None.
53 | num_gpus: A non-negative integer specifying the number of GPUs.
54 |
55 | Returns:
56 | A tf.distribute.Strategy.
57 |
58 | Raises:
59 | ValueError: If `num_gpus` is negative and tpu_address is None.
60 | """
61 | if tpu_address is not None:
62 | resolver = tpu_initialize(tpu_address)
63 | return tf.distribute.TPUStrategy(resolver)
64 | else:
65 | if num_gpus < 0:
66 | raise ValueError('`num_gpus` must not be negative.')
67 | elif num_gpus == 0:
68 | devices = ['device:CPU:0']
69 | else:
70 | devices = ['device:GPU:%d' % i for i in range(num_gpus)]
71 | if len(devices) == 1:
72 | return tf.distribute.OneDeviceStrategy(devices[0])
73 | return tf.distribute.MirroredStrategy(devices)
74 |
--------------------------------------------------------------------------------
/trainer/runner_utils_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for runner_utils.py."""
17 |
18 | import os
19 |
20 | import numpy as np
21 | import tensorflow as tf
22 |
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.data import dataset
26 | from deeplab2.model import deeplab
27 | from deeplab2.trainer import runner_utils
28 | # resources dependency
29 |
30 | _CONFIG_PATH = 'deeplab2/configs/example'
31 |
32 |
33 | def _read_proto_file(filename, proto):
34 | filename = filename # OSS: removed internal filename loading.
35 | with tf.io.gfile.GFile(filename, 'r') as proto_file:
36 | return text_format.ParseLines(proto_file, proto)
37 |
38 |
39 | def _create_model_from_test_proto(file_name,
40 | dataset_name='coco_panoptic'):
41 | proto_filename = os.path.join(_CONFIG_PATH, file_name)
42 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
43 | return deeplab.DeepLab(config,
44 | dataset.MAP_NAME_TO_DATASET_INFO[dataset_name]
45 | ), config
46 |
47 |
48 | class RunnerUtilsTest(tf.test.TestCase):
49 |
50 | def test_check_if_variable_in_backbone_with_max_deeplab(self):
51 | model, experiment_options = _create_model_from_test_proto(
52 | 'example_coco_max_deeplab.textproto', dataset_name='coco_panoptic')
53 | train_crop_size = tuple(
54 | experiment_options.train_dataset_options.crop_size)
55 | input_tensor = tf.random.uniform(
56 | shape=(2, train_crop_size[0], train_crop_size[1], 3))
57 | _ = model(input_tensor, training=True)
58 |
59 | encoder = model.checkpoint_items['encoder']
60 | encoder_variable_names = [x.name for x in encoder.trainable_variables]
61 | encoder_name = experiment_options.model_options.backbone.name
62 |
63 | num_backbone_params = 0
64 | backbone_optimizer_inputs = []
65 | for variable in model.trainable_weights:
66 | if runner_utils.check_if_variable_in_backbone(variable, encoder_name,
67 | encoder_variable_names):
68 | backbone_optimizer_inputs.append(variable)
69 | num_backbone_params += np.prod(variable.get_shape().as_list())
70 | # The number of Tensors in the backbone. We use this number in addition to
71 | # the number of parameters as a check of correctness.
72 | self.assertLen(backbone_optimizer_inputs, 301)
73 | # The same number of parameters as max_deeplab_s_backbone.
74 | self.assertEqual(num_backbone_params, 41343424)
75 |
76 |
77 | if __name__ == '__main__':
78 | tf.test.main()
79 |
--------------------------------------------------------------------------------
/trainer/train.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """This file contains code to run a model."""
17 |
18 | import os
19 | from absl import app
20 | from absl import flags
21 | from absl import logging
22 | import tensorflow as tf
23 |
24 | from google.protobuf import text_format
25 | from deeplab2 import config_pb2
26 | from deeplab2.trainer import train_lib
27 |
28 | flags.DEFINE_enum(
29 | 'mode',
30 | default=None,
31 | enum_values=['train', 'eval', 'train_and_eval', 'continuous_eval'],
32 | help='Mode to run: `train`, `eval`, `train_and_eval`, `continuous_eval`.')
33 |
34 | flags.DEFINE_string(
35 | 'model_dir',
36 | default=None,
37 | help='The base directory where the model and training/evaluation summaries'
38 | 'are stored. The path will be combined with the `experiment_name` defined '
39 | 'in the config file to create a folder under which all files are stored.')
40 |
41 | flags.DEFINE_string(
42 | 'config_file',
43 | default=None,
44 | help='Proto file which specifies the experiment configuration. The proto '
45 | 'definition of ExperimentOptions is specified in config.proto.')
46 |
47 | flags.DEFINE_string(
48 | 'master',
49 | default=None,
50 | help='The Cloud TPU to use for training. This should be either the name '
51 | 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 '
52 | 'url.')
53 |
54 | flags.DEFINE_integer(
55 | 'num_gpus',
56 | default=0,
57 | help='The number of GPUs to use for. If `master` flag is not set, this'
58 | 'parameter specifies whether GPUs should be used and how many of them '
59 | '(default: 0).')
60 |
61 | FLAGS = flags.FLAGS
62 |
63 |
64 | def main(_):
65 | logging.info('Reading the config file.')
66 | with tf.io.gfile.GFile(FLAGS.config_file, 'r') as proto_file:
67 | config = text_format.ParseLines(proto_file, config_pb2.ExperimentOptions())
68 |
69 | logging.info('Starting the experiment.')
70 | combined_model_dir = os.path.join(FLAGS.model_dir, config.experiment_name)
71 | train_lib.run_experiment(FLAGS.mode, config, combined_model_dir, FLAGS.master,
72 | FLAGS.num_gpus)
73 |
74 |
75 | if __name__ == '__main__':
76 | app.run(main)
77 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------
/utils/create_images_json_for_cityscapes.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # pylint: disable=line-too-long
17 | # pyformat: disable
18 | r"""Creates a JSON file with info for a split of Cityscapes images.
19 |
20 | This single-purpose version has special handling for the directory structure of
21 | CityScapes dataset and the expected output ids.
22 |
23 | Sample commands:
24 |
25 | python create_images_json_for_cityscapes.py \
26 | --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \
27 | --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \
28 | --only_basename \
29 | --include_image_type_suffix=false
30 | """
31 | # pyformat: enable
32 | # pylint: enable=line-too-long
33 |
34 | from __future__ import absolute_import
35 | from __future__ import division
36 | from __future__ import print_function
37 |
38 | import json
39 | import os
40 | import re
41 |
42 | from absl import app
43 | from absl import flags
44 |
45 | import tensorflow as tf
46 |
47 | FLAGS = flags.FLAGS
48 |
49 | flags.DEFINE_string(
50 | 'image_dir', None,
51 | 'The top-level directory of image files to be included in the set.')
52 |
53 | flags.DEFINE_list(
54 | 'keep_cities', None,
55 | 'Comma-separated list of strings specifying cities to be processed.')
56 |
57 | flags.DEFINE_string('output_json_path', None,
58 | 'Output path to which is written the image info JSON.')
59 |
60 | flags.DEFINE_boolean(
61 | 'only_basename', True,
62 | 'If set, the included "file_name" properties of the images in the JSON '
63 | 'file will only include the base name and not the city directory. Used for '
64 | 'tools that do not support nested directories.')
65 |
66 | flags.DEFINE_boolean(
67 | 'include_image_type_suffix', True,
68 | 'If set, will include the suffix of the image type (e.g. "_leftImg8bit") '
69 | 'in the "file_name" properties of the image.')
70 |
71 |
72 | def _create_images_json(image_dir, output_json_path, only_basename=False,
73 | include_image_type_suffix=True, keep_cities=None):
74 | """Lists the images in image_dir and writes out the info JSON for them."""
75 | images_info_array = []
76 | for city_dir in tf.io.gfile.listdir(image_dir):
77 | if keep_cities and city_dir not in keep_cities:
78 | continue
79 | image_id_re = r'%s_[0-9]+_[0-9]+' % city_dir
80 | image_id_re = re.compile(image_id_re)
81 | for image_basename in tf.io.gfile.listdir(
82 | os.path.join(image_dir, city_dir)):
83 | match = image_id_re.match(image_basename)
84 | image_id = image_basename[match.start():match.end()]
85 | if include_image_type_suffix:
86 | file_name = image_basename
87 | else:
88 | file_name = image_id + os.path.splitext(image_basename)[1]
89 | if not only_basename:
90 | file_name = os.path.join(city_dir, file_name)
91 | image_info_dict = {'id': image_id, 'file_name': file_name}
92 | images_info_array.append(image_info_dict)
93 |
94 | info_dict = {'images': images_info_array}
95 |
96 | with tf.io.gfile.GFile(output_json_path, 'w+') as json_file:
97 | json.dump(info_dict, json_file)
98 |
99 |
100 | def main(argv):
101 | if len(argv) > 1:
102 | raise app.UsageError('Too many command-line arguments.')
103 | keep_cities = None
104 | if FLAGS.keep_cities:
105 | keep_cities = [str(x) for x in FLAGS.keep_cities]
106 | _create_images_json(
107 | FLAGS.image_dir,
108 | FLAGS.output_json_path,
109 | only_basename=FLAGS.only_basename,
110 | include_image_type_suffix=FLAGS.include_image_type_suffix,
111 | keep_cities=keep_cities)
112 |
113 |
114 | if __name__ == '__main__':
115 | flags.mark_flags_as_required(['image_dir', 'output_json_path'])
116 | app.run(main)
117 |
--------------------------------------------------------------------------------
/utils/hparam_configs_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Tests for hparam_configs."""
17 |
18 | import os
19 | import tempfile
20 | from absl import logging
21 | import tensorflow.compat.v1 as tf
22 | import yaml
23 | from deeplab2.utils import hparam_configs
24 |
25 |
26 | class HparamConfigsTest(tf.test.TestCase):
27 |
28 | def test_config_override(self):
29 | c = hparam_configs.Config({'a': 1, 'b': 2})
30 | self.assertEqual(c.as_dict(), {'a': 1, 'b': 2})
31 |
32 | c.update({'a': 10})
33 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 2})
34 |
35 | c.b = 20
36 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 20})
37 |
38 | c.override('a=true,b=ss')
39 | self.assertEqual(c.as_dict(), {'a': True, 'b': 'ss'})
40 |
41 | c.override('a=100,,,b=2.3,') # Extra ',' is fine.
42 | self.assertEqual(c.as_dict(), {'a': 100, 'b': 2.3})
43 |
44 | c.override('a=2x3,b=50') # a is a special format for image size.
45 | self.assertEqual(c.as_dict(), {'a': '2x3', 'b': 50})
46 |
47 | # Overrriding string must be in the format of xx=yy.
48 | with self.assertRaises(ValueError):
49 | c.override('a=true,invalid_string')
50 |
51 | def test_config_yaml(self):
52 | tmpdir = tempfile.gettempdir()
53 | yaml_file_path = os.path.join(tmpdir, 'x.yaml')
54 | with open(yaml_file_path, 'w') as f:
55 | f.write("""
56 | x: 2
57 | y:
58 | z: 'test'
59 | """)
60 | c = hparam_configs.Config(dict(x=234, y=2342))
61 | c.override(yaml_file_path)
62 | self.assertEqual(c.as_dict(), {'x': 2, 'y': {'z': 'test'}})
63 |
64 | yaml_file_path2 = os.path.join(tmpdir, 'y.yaml')
65 | c.save_to_yaml(yaml_file_path2)
66 | with open(yaml_file_path2, 'r') as f:
67 | config_dict = yaml.load(f, Loader=yaml.FullLoader)
68 | self.assertEqual(config_dict, {'x': 2, 'y': {'z': 'test'}})
69 |
70 | def test_config_override_recursive(self):
71 | c = hparam_configs.Config({'x': 1})
72 | self.assertEqual(c.as_dict(), {'x': 1})
73 | c.override('y.y0=2,y.y1=3', allow_new_keys=True)
74 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 2, 'y1': 3}})
75 | c.update({'y': {'y0': 5, 'y1': {'y11': 100}}})
76 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 5, 'y1': {'y11': 100}}})
77 | self.assertEqual(c.y.y1.y11, 100)
78 |
79 | def test_config_override_list(self):
80 | c = hparam_configs.Config({'x': [1.0, 2.0]})
81 | self.assertEqual(c.as_dict(), {'x': [1.0, 2.0]})
82 | c.override('x=3.0|4.0|5.0')
83 | self.assertEqual(c.as_dict(), {'x': [3.0, 4.0, 5.0]})
84 |
85 | def test_registry_factory(self):
86 | registry = hparam_configs.RegistryFactor(prefix='test:')
87 |
88 | @registry.register() # Use class name as key in default.
89 | class A:
90 | pass
91 |
92 | @registry.register(name='special_b') # Use name as key if name is not None.
93 | class B:
94 | pass
95 |
96 | self.assertEqual(registry.lookup('A'), A)
97 | self.assertEqual(registry.lookup('special_b'), B)
98 | with self.assertRaises(KeyError):
99 | registry.lookup('B')
100 |
101 |
102 | if __name__ == '__main__':
103 | logging.set_verbosity(logging.WARNING)
104 | tf.test.main()
105 |
--------------------------------------------------------------------------------
/utils/panoptic_cityscapes_categories.json:
--------------------------------------------------------------------------------
1 | [
2 | {"name": "road", "id": 7, "isthing": 0, "color": [128, 64, 128]},
3 | {"name": "sidewalk", "id": 8, "isthing": 0, "color": [244, 35, 232]},
4 | {"name": "building", "id": 11, "isthing": 0, "color": [ 70, 70, 70]},
5 | {"name": "wall", "id": 12, "isthing": 0, "color": [102, 102, 156]},
6 | {"name": "fence", "id": 13, "isthing": 0, "color": [190, 153, 153]},
7 | {"name": "pole", "id": 17, "isthing": 0, "color": [153, 153, 153]},
8 | {"name": "traffic light", "id": 19, "isthing": 0, "color": [250, 170, 30]},
9 | {"name": "traffic sign", "id": 20, "isthing": 0, "color": [220, 220, 0]},
10 | {"name": "vegetation", "id": 21, "isthing": 0, "color": [107, 142, 35]},
11 | {"name": "terrain", "id": 22, "isthing": 0, "color": [152, 251, 152]},
12 | {"name": "sky", "id": 23, "isthing": 0, "color": [ 70, 130, 180]},
13 | {"name": "person", "id": 24, "isthing": 1, "color": [220, 20, 60]},
14 | {"name": "rider", "id": 25, "isthing": 1, "color": [255, 0, 0]},
15 | {"name": "car", "id": 26, "isthing": 1, "color": [ 0, 0, 142]},
16 | {"name": "truck", "id": 27, "isthing": 1, "color": [ 0, 0, 70]},
17 | {"name": "bus", "id": 28, "isthing": 1, "color": [ 0, 60, 100]},
18 | {"name": "train", "id": 31, "isthing": 1, "color": [ 0, 80, 100]},
19 | {"name": "motorcycle", "id": 32, "isthing": 1, "color": [ 0, 0, 230]},
20 | {"name": "bicycle", "id": 33, "isthing": 1, "color": [119, 11, 32]}
21 | ]
22 |
--------------------------------------------------------------------------------
/utils/test_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Provide utility functions to write simple tests."""
17 | import functools
18 |
19 | import numpy as np
20 | import tensorflow as tf
21 |
22 |
23 | NORMALIZATION_LAYERS = (
24 | tf.keras.layers.experimental.SyncBatchNormalization,
25 | tf.keras.layers.BatchNormalization
26 | )
27 |
28 |
29 | def create_strategy():
30 | """Returns a strategy based on available devices.
31 |
32 | Does NOT work with local_multiworker_tpu_test tests!
33 | """
34 | tpus = tf.config.list_logical_devices(device_type='TPU')
35 | gpus = tf.config.list_logical_devices(device_type='GPU')
36 | if tpus:
37 | resolver = tf.distribute.cluster_resolver.TPUClusterResolver('')
38 | tf.config.experimental_connect_to_cluster(resolver)
39 | tf.tpu.experimental.initialize_tpu_system(resolver)
40 | return tf.distribute.TPUStrategy(resolver)
41 | elif gpus:
42 | return tf.distribute.OneDeviceStrategy('/gpu:0')
43 | else:
44 | return tf.distribute.OneDeviceStrategy('/cpu:0')
45 |
46 |
47 | def test_all_strategies(func):
48 | """Decorator to test CPU, GPU and TPU strategies."""
49 | @functools.wraps(func)
50 | def decorator(self):
51 | strategy = create_strategy()
52 | return func(self, strategy)
53 | return decorator
54 |
55 |
56 | def create_test_input(batch, height, width, channels):
57 | """Creates test input tensor."""
58 | return tf.convert_to_tensor(
59 | np.tile(
60 | np.reshape(
61 | np.reshape(np.arange(height), [height, 1]) +
62 | np.reshape(np.arange(width), [1, width]),
63 | [1, height, width, 1]),
64 | [batch, 1, 1, channels]), dtype=tf.float32)
65 |
--------------------------------------------------------------------------------
/video/__init__.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The Deeplab2 Authors.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
--------------------------------------------------------------------------------