├── .gitignore ├── CONTRIBUTING.md ├── DeepLab_COCO_Demo.ipynb ├── DeepLab_Cityscapes_Demo.ipynb ├── LICENSE ├── README.md ├── ViP_DeepLab_Demo.ipynb ├── __init__.py ├── common.py ├── common_test.py ├── compile.sh ├── config.proto ├── configs ├── ade20k │ └── kmax_deeplab │ │ ├── kmax_meta_convnext_large_os32.textproto │ │ ├── kmax_meta_convnext_large_os32_res1281.textproto │ │ ├── kmax_meta_r50_os32.textproto │ │ └── kmax_meta_r50_os32_res1281.textproto ├── cityscapes │ ├── axial_deeplab │ │ ├── axial_swidernet_1_1_1_os16.textproto │ │ ├── axial_swidernet_1_1_3_os16.textproto │ │ ├── axial_swidernet_1_1_4.5_os16.textproto │ │ ├── max_deeplab_l_backbone_os16.textproto │ │ └── max_deeplab_s_backbone_os16.textproto │ ├── kmax_deeplab │ │ ├── kmax_meta_axial_r50_os32.textproto │ │ ├── kmax_meta_convnext_base_os32.textproto │ │ ├── kmax_meta_convnext_large_os32.textproto │ │ ├── kmax_meta_convnext_small_os32.textproto │ │ ├── kmax_meta_convnext_tiny_os32.textproto │ │ └── kmax_meta_r50_os32.textproto │ └── panoptic_deeplab │ │ ├── mobilenet_v3_large_os16.textproto │ │ ├── mobilenet_v3_large_os32.textproto │ │ ├── mobilenet_v3_small_os16.textproto │ │ ├── mobilenet_v3_small_os32.textproto │ │ ├── resnet50_beta_os32.textproto │ │ ├── resnet50_os32_merge_with_pure_tf_func.textproto │ │ ├── resnet50_os32_semseg.textproto │ │ ├── swidernet_sac_1_1_1_os16.textproto │ │ ├── swidernet_sac_1_1_3_os16.textproto │ │ ├── swidernet_sac_1_1_4.5_os16.textproto │ │ └── wide_resnet41_os16.textproto ├── cityscapes_dvps │ └── vip_deeplab │ │ ├── resnet50_beta_os32.textproto │ │ └── wide_resnet41_os16.textproto ├── coco │ ├── kmax_deeplab │ │ ├── kmax_meta_axial_r50_os32.textproto │ │ ├── kmax_meta_convnext_base_os32.textproto │ │ ├── kmax_meta_convnext_large_os32.textproto │ │ ├── kmax_meta_convnext_small_os32.textproto │ │ ├── kmax_meta_convnext_tiny_os32.textproto │ │ └── kmax_meta_r50_os32.textproto │ ├── max_deeplab │ │ ├── max_deeplab_l_os16_res1025_100k.textproto │ │ ├── max_deeplab_l_os16_res1025_200k.textproto │ │ ├── max_deeplab_l_os16_res1025_400k.textproto │ │ ├── max_deeplab_s_os16_res1025_100k.textproto │ │ ├── max_deeplab_s_os16_res1025_200k.textproto │ │ ├── max_deeplab_s_os16_res1025_400k.textproto │ │ ├── max_deeplab_s_os16_res641_100k.textproto │ │ ├── max_deeplab_s_os16_res641_200k.textproto │ │ └── max_deeplab_s_os16_res641_400k.textproto │ └── panoptic_deeplab │ │ ├── resnet50_beta_os16.textproto │ │ ├── resnet50_beta_os32.textproto │ │ ├── resnet50_os16.textproto │ │ └── resnet50_os32.textproto ├── example │ ├── example_cityscapes_deeplabv3.textproto │ ├── example_cityscapes_deeplabv3_mv3l.textproto │ ├── example_cityscapes_deeplabv3plus.textproto │ ├── example_cityscapes_panoptic_deeplab.textproto │ ├── example_cityscapes_panoptic_deeplab_mv3l.textproto │ ├── example_coco_kmax_meta_convnext.textproto │ ├── example_coco_max_deeplab.textproto │ └── example_kitti-step_motion_deeplab.textproto ├── kitti │ ├── motion_deeplab │ │ ├── resnet50_os32.textproto │ │ └── resnet50_os32_trainval.textproto │ └── panoptic_deeplab │ │ ├── resnet50_os32.textproto │ │ └── resnet50_os32_trainval.textproto ├── motchallenge │ ├── motion_deeplab │ │ └── resnet50_os32.textproto │ └── panoptic_deeplab │ │ └── resnet50_os32.textproto └── semkitti_dvps │ └── vip_deeplab │ └── resnet50_beta_os32.textproto ├── data ├── __init__.py ├── ade20k_constants.py ├── build_ade20k_data.py ├── build_cityscapes_data.py ├── build_cityscapes_data_test.py ├── build_coco_data.py ├── build_coco_data_test.py ├── build_dvps_data.py ├── build_step_data.py ├── build_step_data_test.py ├── coco_constants.py ├── data_utils.py ├── data_utils_test.py ├── dataloader │ ├── __init__.py │ ├── input_reader.py │ └── multicamera_input_reader.py ├── dataset.py ├── dataset_utils.py ├── dataset_utils_test.py ├── multicamera_data_utils.py ├── preprocessing │ ├── __init__.py │ ├── autoaugment_policy.py │ ├── autoaugment_policy_test.py │ ├── autoaugment_utils.py │ ├── autoaugment_utils_test.py │ ├── input_preprocessing.py │ ├── input_preprocessing_test.py │ ├── preprocess_utils.py │ └── preprocess_utils_test.py ├── sample_generator.py ├── sample_generator_test.py ├── testdata │ ├── create_test_data.py │ ├── dummy_gt_for_vps.png │ ├── dummy_prediction.png │ ├── gtFine │ │ ├── cityscapes_panoptic_dummy_trainId.json │ │ └── cityscapes_panoptic_dummy_trainId │ │ │ └── dummy_000000_000000_gtFine_panoptic.png │ ├── leftImg8bit │ │ └── dummy_000000_000000_leftImg8bit.png │ └── targets │ │ ├── center_target.png │ │ ├── center_weights.png │ │ ├── eval_is_crowd.npy │ │ ├── eval_panoptic_target.npy │ │ ├── eval_semantic_target.png │ │ ├── is_crowd.npy │ │ ├── offset_target.npy │ │ ├── offset_weights.png │ │ ├── panoptic_target.npy │ │ ├── panoptic_target.png │ │ ├── rgb_target.png │ │ ├── semantic_target.png │ │ ├── semantic_weights.png │ │ ├── thing_id_class_target.npy │ │ └── thing_id_mask_target.npy ├── utils │ ├── __init__.py │ └── create_step_panoptic_maps.py └── waymo_constants.py ├── dataset.proto ├── evaluation ├── __init__.py ├── coco_instance_ap.py ├── coco_instance_ap_test.py ├── depth_aware_segmentation_and_tracking_quality.py ├── depth_aware_segmentation_and_tracking_quality_test.py ├── depth_metrics.py ├── depth_metrics_test.py ├── numpy │ ├── __init__.py │ ├── segmentation_and_tracking_quality.py │ └── segmentation_and_tracking_quality_test.py ├── panoptic_quality.py ├── panoptic_quality_test.py ├── segmentation_and_tracking_quality.py ├── segmentation_and_tracking_quality_test.py ├── test_utils.py ├── test_utils_test.py ├── testdata │ ├── README.md │ ├── bird_gt.png │ ├── bird_pred_class.png │ ├── bird_pred_instance.png │ ├── cat_gt.png │ ├── cat_pred_class.png │ ├── cat_pred_instance.png │ ├── team_gt_instance.png │ ├── team_pred_class.png │ └── team_pred_instance.png └── video_panoptic_quality.py ├── evaluator.proto ├── export_model.py ├── g3doc ├── faq.md ├── img │ ├── axial_deeplab │ │ ├── axial_block.png │ │ ├── nonlocal_block.png │ │ └── position_sensitive_axial_block.png │ ├── kmax_deeplab │ │ ├── clustering_view_of_mask_transformer.png │ │ └── kmax_decoder.png │ ├── max_deeplab │ │ ├── overview.png │ │ └── overview_simple.png │ ├── moat │ │ ├── moat_block.png │ │ ├── moat_imagenet1k_224.png │ │ └── moat_imagenet22k_384.png │ ├── panoptic_deeplab.png │ ├── step │ │ └── kitti_step_annotation.png │ └── vip_deeplab │ │ └── demo.gif ├── projects │ ├── axial_deeplab.md │ ├── imagenet_pretrained_checkpoints.md │ ├── kmax_deeplab.md │ ├── max_deeplab.md │ ├── moat_imagenet_pretrained_checkpoints.md │ ├── motion_deeplab.md │ ├── panoptic_deeplab.md │ ├── vip_deeplab.md │ └── wod_pvps.md └── setup │ ├── ade20k.md │ ├── cityscapes.md │ ├── cityscapes_test_server_evaluation.md │ ├── coco.md │ ├── coco_test_server_evaluation.md │ ├── getting_started.md │ ├── installation.md │ ├── kitti_step.md │ ├── motchallenge_step.md │ └── your_own_dataset.md ├── model.proto ├── model ├── __init__.py ├── builder.py ├── builder_test.py ├── decoder │ ├── __init__.py │ ├── aspp.py │ ├── aspp_test.py │ ├── deeplabv3.py │ ├── deeplabv3_test.py │ ├── deeplabv3plus.py │ ├── deeplabv3plus_test.py │ ├── max_deeplab.py │ ├── max_deeplab_test.py │ ├── motion_deeplab_decoder.py │ ├── panoptic_deeplab.py │ ├── panoptic_deeplab_test.py │ └── vip_deeplab_decoder.py ├── deeplab.py ├── deeplab_test.py ├── encoder │ ├── __init__.py │ ├── atrous_consistency_test.py │ ├── axial_resnet.py │ ├── axial_resnet_instances.py │ ├── axial_resnet_instances_test.py │ ├── axial_resnet_test.py │ ├── mobilenet.py │ ├── mobilenet_test.py │ └── model_export_test.py ├── kmax_deeplab.py ├── kmax_deeplab_test.py ├── layers │ ├── __init__.py │ ├── activations.py │ ├── activations_test.py │ ├── axial_block_groups.py │ ├── axial_block_groups_test.py │ ├── axial_blocks.py │ ├── axial_blocks_test.py │ ├── axial_layers.py │ ├── axial_layers_test.py │ ├── blocks.py │ ├── blocks_test.py │ ├── convolutions.py │ ├── convolutions_test.py │ ├── drop_path.py │ ├── drop_path_test.py │ ├── dual_path_transformer.py │ ├── dual_path_transformer_test.py │ ├── moat_attention.py │ ├── moat_attention_test.py │ ├── moat_blocks.py │ ├── moat_blocks_test.py │ ├── positional_encodings.py │ ├── positional_encodings_test.py │ ├── recompute_grad.py │ ├── recompute_grad_test.py │ ├── resized_fuse.py │ ├── resized_fuse_test.py │ ├── squeeze_and_excite.py │ ├── squeeze_and_excite_test.py │ ├── stems.py │ └── stems_test.py ├── loss │ ├── base_loss.py │ ├── base_loss_test.py │ ├── loss_builder.py │ ├── loss_builder_test.py │ ├── matchers_ops.py │ ├── matchers_ops_test.py │ ├── max_deeplab_loss.py │ └── max_deeplab_loss_test.py ├── pixel_decoder │ ├── kmax.py │ └── kmax_test.py ├── pixel_encoder │ ├── axial_resnet.py │ ├── axial_resnet_test.py │ ├── convnext.py │ ├── convnext_test.py │ ├── moat.py │ └── moat_test.py ├── post_processor │ ├── __init__.py │ ├── max_deeplab.py │ ├── max_deeplab_test.py │ ├── motion_deeplab.py │ ├── panoptic_deeplab.py │ ├── panoptic_deeplab_test.py │ ├── post_processor_builder.py │ ├── post_processor_builder_test.py │ ├── vip_deeplab.py │ └── vip_deeplab_test.py ├── test_utils.py ├── test_utils_test.py ├── transformer_decoder │ ├── kmax.py │ └── kmax_test.py ├── utils.py └── utils_test.py ├── tensorflow_ops ├── kernels │ ├── merge_semantic_and_instance_maps_op.cc │ ├── merge_semantic_and_instance_maps_op_kernel.cc │ ├── merge_semantic_and_instance_maps_op_kernel.cu.cc │ └── merge_semantic_and_instance_maps_op_kernel.h └── python │ ├── kernel_tests │ ├── __init__.py │ └── merge_semantic_and_instance_maps_op_test.py │ └── ops │ ├── __init__.py │ └── merge_semantic_and_instance_maps_op.py ├── tracker ├── __init__.py ├── iou_tracker.py └── optical_flow_utils.py ├── trainer.proto ├── trainer ├── __init__.py ├── distribution_utils.py ├── evaluator.py ├── evaluator_test.py ├── runner_utils.py ├── runner_utils_test.py ├── train.py ├── train_lib.py ├── trainer.py ├── trainer_utils.py ├── vis.py └── vis_utils.py ├── utils ├── __init__.py ├── coco_tools.py ├── coco_tools_test.py ├── create_images_json_for_cityscapes.py ├── hparam_configs.py ├── hparam_configs_test.py ├── net_surgery_convert_last_layer.py ├── panoptic_cityscapes_categories.json ├── panoptic_instances.py ├── panoptic_instances_test.py └── test_utils.py └── video ├── __init__.py ├── motion_deeplab.py └── vip_deeplab.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Caching. 2 | __pycache__/ 3 | 4 | # IDE settings. 5 | .vscode/ 6 | .idea/ 7 | .env 8 | .config/ 9 | 10 | # Generated proto files. 11 | *_pb2.py 12 | 13 | # For mac 14 | *.DS_Store 15 | 16 | # Generated files 17 | *.o 18 | *.so -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /common_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for common.py.""" 17 | import tensorflow as tf 18 | 19 | from deeplab2 import common 20 | 21 | 22 | class CommonTest(tf.test.TestCase): 23 | 24 | def test_constants_keys(self): 25 | self.assertEqual(common.PRED_PANOPTIC_KEY, 'panoptic_pred') 26 | self.assertEqual(common.PRED_SEMANTIC_KEY, 'semantic_pred') 27 | self.assertEqual(common.PRED_INSTANCE_CENTER_KEY, 'instance_center_pred') 28 | self.assertEqual(common.PRED_INSTANCE_KEY, 'instance_pred') 29 | 30 | self.assertEqual(common.PRED_SEMANTIC_LOGITS_KEY, 'semantic_logits') 31 | self.assertEqual(common.PRED_CENTER_HEATMAP_KEY, 'center_heatmap') 32 | self.assertEqual(common.PRED_OFFSET_MAP_KEY, 'offset_map') 33 | self.assertEqual(common.PRED_FRAME_OFFSET_MAP_KEY, 'frame_offset_map') 34 | 35 | self.assertEqual(common.GT_PANOPTIC_KEY, 'panoptic_gt') 36 | self.assertEqual(common.GT_SEMANTIC_KEY, 'semantic_gt') 37 | self.assertEqual(common.GT_INSTANCE_CENTER_KEY, 'instance_center_gt') 38 | self.assertEqual(common.GT_FRAME_OFFSET_KEY, 'frame_offset_gt') 39 | self.assertEqual(common.GT_INSTANCE_REGRESSION_KEY, 40 | 'instance_regression_gt') 41 | self.assertEqual(common.GT_PANOPTIC_RAW, 'panoptic_raw') 42 | self.assertEqual(common.GT_SEMANTIC_RAW, 'semantic_raw') 43 | self.assertEqual(common.GT_SIZE_RAW, 'size_raw') 44 | 45 | self.assertEqual(common.SEMANTIC_LOSS_WEIGHT_KEY, 'semantic_loss_weight') 46 | self.assertEqual(common.CENTER_LOSS_WEIGHT_KEY, 'center_loss_weight') 47 | self.assertEqual(common.REGRESSION_LOSS_WEIGHT_KEY, 48 | 'regression_loss_weight') 49 | self.assertEqual(common.FRAME_REGRESSION_LOSS_WEIGHT_KEY, 50 | 'frame_regression_loss_weight') 51 | 52 | self.assertEqual(common.RESIZED_IMAGE, 'resized_image') 53 | self.assertEqual(common.IMAGE, 'image') 54 | self.assertEqual(common.IMAGE_NAME, 'image_name') 55 | self.assertEqual(common.SEQUENCE_ID, 'sequence_id') 56 | self.assertEqual(common.FRAME_ID, 'frame_id') 57 | 58 | self.assertEqual(common.KEY_FRAME_ID, 'video/frame_id') 59 | self.assertEqual(common.KEY_SEQUENCE_ID, 'video/sequence_id') 60 | self.assertEqual(common.KEY_LABEL_FORMAT, 'image/segmentation/class/format') 61 | self.assertEqual(common.KEY_ENCODED_PREV_LABEL, 62 | 'prev_image/segmentation/class/encoded') 63 | self.assertEqual(common.KEY_ENCODED_LABEL, 64 | 'image/segmentation/class/encoded') 65 | self.assertEqual(common.KEY_IMAGE_CHANNELS, 'image/channels') 66 | self.assertEqual(common.KEY_IMAGE_WIDTH, 'image/width') 67 | self.assertEqual(common.KEY_IMAGE_HEIGHT, 'image/height') 68 | self.assertEqual(common.KEY_IMAGE_FORMAT, 'image/format') 69 | self.assertEqual(common.KEY_IMAGE_FILENAME, 'image/filename') 70 | self.assertEqual(common.KEY_ENCODED_PREV_IMAGE, 'prev_image/encoded') 71 | self.assertEqual(common.KEY_ENCODED_IMAGE, 'image/encoded') 72 | 73 | def test_multicamera_keys(self): 74 | test_camera_name = 'front' 75 | expected = { 76 | common.KEY_PER_CAMERA_ENCODED_IMAGE: 77 | 'image/encoded/%s', 78 | common.KEY_PER_CAMERA_ENCODED_NEXT_IMAGE: 79 | 'next_image/encoded/%s', 80 | common.KEY_PER_CAMERA_IMAGE_HEIGHT: 81 | 'image/height/%s', 82 | common.KEY_PER_CAMERA_IMAGE_WIDTH: 83 | 'image/width/%s', 84 | common.KEY_PER_CAMERA_ENCODED_LABEL: 85 | 'image/segmentation/class/encoded/%s', 86 | common.KEY_PER_CAMERA_ENCODED_NEXT_LABEL: 87 | 'next_image/segmentation/class/encoded/%s', 88 | common.KEY_PER_CAMERA_ENCODED_DEPTH: 89 | 'image/depth/encoded/%s', 90 | } 91 | for key, val in expected.items(): 92 | self.assertEqual(key % test_camera_name, val % test_camera_name) 93 | 94 | 95 | if __name__ == '__main__': 96 | tf.test.main() 97 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The Deeplab2 Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Quick start command line to setup deeplab2 (Linux only). 16 | # Example command to run: 17 | # deeplab2/compile.sh ${PATH_TO_PROTOC} 18 | # 19 | # This script assumes that the following folder structure: 20 | # 21 | # + root 22 | # + deeplab2 23 | # + models 24 | # + orbit 25 | # + cocoapi 26 | # + PythonAPI 27 | # 28 | # Besides, the script also assumes that `protoc` can be accessed from command 29 | # line. 30 | 31 | #!/bin/bash 32 | 33 | set -e 34 | 35 | # cpu or gpu 36 | CONFIG="cpu" 37 | 38 | function tolower() { 39 | echo "${1,,}" 40 | } 41 | 42 | if [[ ! -z "$1" ]] 43 | then 44 | echo "Setting configuration from argument($1)..." 45 | CONFIG=$(tolower "$1") 46 | if [ "$CONFIG" != "cpu" ] && [ "$CONFIG" != "gpu" ] 47 | then 48 | echo "Configuration must be either \"cpu\" or \"gpu\", exiting..." 49 | exit 1 50 | fi 51 | fi 52 | 53 | echo "Running configuration with $CONFIG." 54 | 55 | # Protobuf compilation 56 | # Replace `protoc` with `${PATH_TO_PROTOC}` if protobuf compilier is downloaded 57 | # from web. 58 | echo "-----------------------------------------------------------------------" 59 | echo "Compiling protobuf..." 60 | echo "-----------------------------------------------------------------------" 61 | protoc deeplab2/*.proto --python_out=. 62 | 63 | # Compile custom ops 64 | # See details in https://www.tensorflow.org/guide/create_op#compile_the_op_using_your_system_compiler_tensorflow_binary_installation 65 | TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) 66 | TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) 67 | OP_NAME='deeplab2/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op' 68 | 69 | if [ "$CONFIG" == "cpu" ] 70 | then 71 | # CPU 72 | echo "-----------------------------------------------------------------------" 73 | echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (CPU)..." 74 | echo "-----------------------------------------------------------------------" 75 | g++ -std=c++14 -shared \ 76 | ${OP_NAME}.cc ${OP_NAME}_kernel.cc -o ${OP_NAME}.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 77 | else 78 | # GPU 79 | # (https://www.tensorflow.org/guide/create_op#compiling_the_kernel_for_the_gpu_device) 80 | echo "-----------------------------------------------------------------------" 81 | echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (GPU)..." 82 | echo "-----------------------------------------------------------------------" 83 | nvcc -std=c++14 -c -o ${OP_NAME}_kernel.cu.o \ 84 | ${OP_NAME}_kernel.cu.cc \ 85 | ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr 86 | 87 | g++ -std=c++14 -shared -o ${OP_NAME}.so ${OP_NAME}.cc ${OP_NAME}_kernel.cc \ 88 | ${OP_NAME}_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]} 89 | fi 90 | 91 | # PYTHONPATH 92 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/models:`pwd`/cocoapi/PythonAPI 93 | 94 | # Runing test 95 | echo "-----------------------------------------------------------------------" 96 | echo "Running tests for merge_semantic_and_instance_maps_op..." 97 | echo "-----------------------------------------------------------------------" 98 | python deeplab2/tensorflow_ops/python/kernel_tests/merge_semantic_and_instance_maps_op_test.py 99 | 100 | # End-to-end tests 101 | echo "-----------------------------------------------------------------------" 102 | echo "Running end-to-end tests..." 103 | echo "-----------------------------------------------------------------------" 104 | 105 | # Model training test (test for custom ops, protobug) 106 | python deeplab2/model/deeplab_test.py 107 | 108 | # Model evaluation test (test for other packages such as orbit, cocoapi, etc) 109 | python deeplab2/trainer/evaluator_test.py 110 | 111 | echo "------------------------" 112 | echo "Done with configuration!" 113 | echo "------------------------" 114 | 115 | -------------------------------------------------------------------------------- /config.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The Deeplab2 Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | syntax = "proto2"; 16 | 17 | package deeplab2; 18 | 19 | import public 'deeplab2/dataset.proto'; 20 | import public 'deeplab2/evaluator.proto'; 21 | import public 'deeplab2/model.proto'; 22 | import public 'deeplab2/trainer.proto'; 23 | 24 | option java_multiple_files = true; 25 | 26 | // Configure experiment options. 27 | message ExperimentOptions { 28 | // Set the experiment name. 29 | optional string experiment_name = 1; 30 | // Set the options for the model. 31 | optional ModelOptions model_options = 2; 32 | // Set the options for the trainer. 33 | optional TrainerOptions trainer_options = 3; 34 | // Set the options for the training dataset. 35 | optional DatasetOptions train_dataset_options = 4; 36 | // Set the options for the evaluator. 37 | optional EvaluatorOptions evaluator_options = 5; 38 | // Set the options for the validation dataset. 39 | optional DatasetOptions eval_dataset_options = 6; 40 | } 41 | -------------------------------------------------------------------------------- /configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | # 4 | # Panoptic-DeepLab with ResNet-50 and output stride 32. 5 | # 6 | ############### PLEASE READ THIS BEFORE USING THIS CONFIG ############### 7 | # Before using this config, you need to update the following fields: 8 | # - experiment_name: Use a unique experiment name for each experiment. 9 | # - initial_checkpoint: Update the path to the initial checkpoint. 10 | # - train_dataset_options.file_pattern: Update the path to the 11 | # training set. e.g., your_dataset/train*.tfrecord 12 | # - eval_dataset_options.file_pattern: Update the path to the 13 | # validation set, e.g., your_dataset/eval*.tfrecord 14 | ######################################################################### 15 | # 16 | # This config provides an example of training Panoptic-DeepLab with ONLY 17 | # semantic segmentation (i.e., the instance/panoptic segmentation is not 18 | # trained). This could be used for some datasets that provide only 19 | # semantic segmentation annotations. 20 | # 21 | # For ResNet, see 22 | # - Kaiming He, et al. "Deep Residual Learning for Image Recognition." 23 | # In CVPR, 2016. 24 | # For Panoptic-DeepLab, see 25 | # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline 26 | # for Bottom-Up Panoptic Segmentation." In CVPR, 2020. 27 | 28 | # Use a unique experiment_name for each experiment. 29 | experiment_name: "${EXPERIMENT_NAME}" 30 | model_options { 31 | # Update the path to the initial checkpoint (e.g., ImageNet 32 | # pretrained checkpoint). 33 | initial_checkpoint: "${INIT_CHECKPOINT}" 34 | backbone { 35 | name: "resnet50" 36 | output_stride: 32 37 | } 38 | decoder { 39 | feature_key: "res5" 40 | decoder_channels: 256 41 | aspp_channels: 256 42 | atrous_rates: 3 43 | atrous_rates: 6 44 | atrous_rates: 9 45 | } 46 | panoptic_deeplab { 47 | low_level { 48 | feature_key: "res3" 49 | channels_project: 64 50 | } 51 | low_level { 52 | feature_key: "res2" 53 | channels_project: 32 54 | } 55 | instance { 56 | enable: false 57 | } 58 | semantic_head { 59 | output_channels: 19 60 | head_channels: 256 61 | } 62 | } 63 | } 64 | trainer_options { 65 | save_checkpoints_steps: 1000 66 | save_summaries_steps: 100 67 | steps_per_loop: 100 68 | loss_options { 69 | semantic_loss { 70 | name: "softmax_cross_entropy" 71 | weight: 1.0 72 | top_k_percent: 0.2 73 | } 74 | } 75 | solver_options { 76 | base_learning_rate: 0.0005 77 | training_number_of_steps: 60000 78 | } 79 | } 80 | train_dataset_options { 81 | dataset: "cityscapes_panoptic" 82 | # Update the path to training set. 83 | file_pattern: "${TRAIN_SET}" 84 | # Adjust the batch_size accordingly to better fit your GPU/TPU memory. 85 | # Also see Q1 in g3doc/faq.md. 86 | batch_size: 8 87 | crop_size: 1025 88 | crop_size: 2049 89 | # Skip resizing. 90 | min_resize_value: 0 91 | max_resize_value: 0 92 | augmentations { 93 | min_scale_factor: 0.5 94 | max_scale_factor: 2.0 95 | scale_factor_step_size: 0.1 96 | } 97 | } 98 | eval_dataset_options { 99 | dataset: "cityscapes_panoptic" 100 | # Update the path to validation set. 101 | file_pattern: "${VAL_SET}" 102 | batch_size: 1 103 | crop_size: 1025 104 | crop_size: 2049 105 | # Skip resizing. 106 | min_resize_value: 0 107 | max_resize_value: 0 108 | } 109 | evaluator_options { 110 | continuous_eval_timeout: -1 111 | save_predictions: true 112 | save_raw_predictions: false 113 | } 114 | -------------------------------------------------------------------------------- /configs/example/example_cityscapes_deeplabv3.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "res5" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "resnet50" 14 | } 15 | 16 | # Example for cityscapes. 17 | deeplab_v3 { 18 | num_classes: 19 19 | } 20 | } 21 | 22 | train_dataset_options { 23 | crop_size: 1025 24 | crop_size: 2049 25 | } 26 | -------------------------------------------------------------------------------- /configs/example/example_cityscapes_deeplabv3_mv3l.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "res5" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "mobilenet_v3_large" 14 | use_squeeze_and_excite: true 15 | } 16 | 17 | # Example for cityscapes. 18 | deeplab_v3 { 19 | num_classes: 19 20 | } 21 | } 22 | 23 | train_dataset_options { 24 | crop_size: 1025 25 | crop_size: 2049 26 | } 27 | -------------------------------------------------------------------------------- /configs/example/example_cityscapes_deeplabv3plus.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "res5" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "resnet50" 14 | } 15 | 16 | deeplab_v3_plus { 17 | low_level { 18 | feature_key: "res2" 19 | channels_project: 48 20 | } 21 | # Example for cityscapes. 22 | num_classes: 19 23 | } 24 | } 25 | 26 | train_dataset_options { 27 | crop_size: 1025 28 | crop_size: 2049 29 | } 30 | -------------------------------------------------------------------------------- /configs/example/example_cityscapes_panoptic_deeplab.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "res5" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "resnet50" 14 | } 15 | 16 | panoptic_deeplab { 17 | low_level { 18 | feature_key: "res3" 19 | channels_project: 64 20 | } 21 | low_level { 22 | feature_key: "res2" 23 | channels_project: 32 24 | } 25 | semantic_head { 26 | # Example for cityscapes. 27 | output_channels: 19 28 | head_channels: 256 29 | } 30 | instance { 31 | instance_decoder_override { 32 | feature_key: "res5" 33 | decoder_channels: 128 34 | atrous_rates: 6 35 | atrous_rates: 12 36 | atrous_rates: 18 37 | } 38 | low_level_override { 39 | feature_key: "res3" 40 | channels_project: 32 41 | } 42 | low_level_override { 43 | feature_key: "res2" 44 | channels_project: 16 45 | } 46 | center_head { 47 | output_channels: 1 48 | head_channels: 32 49 | } 50 | regression_head { 51 | output_channels: 2 52 | head_channels: 32 53 | } 54 | } 55 | } 56 | } 57 | 58 | train_dataset_options { 59 | crop_size: 1025 60 | crop_size: 2049 61 | } 62 | -------------------------------------------------------------------------------- /configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "res5" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "mobilenet_v3_large" 14 | use_squeeze_and_excite: true 15 | } 16 | 17 | panoptic_deeplab { 18 | low_level { 19 | feature_key: "res3" 20 | channels_project: 64 21 | } 22 | low_level { 23 | feature_key: "res2" 24 | channels_project: 32 25 | } 26 | semantic_head { 27 | # Example for cityscapes. 28 | output_channels: 19 29 | head_channels: 256 30 | } 31 | instance { 32 | instance_decoder_override { 33 | feature_key: "res5" 34 | decoder_channels: 128 35 | atrous_rates: 6 36 | atrous_rates: 12 37 | atrous_rates: 18 38 | } 39 | low_level_override { 40 | feature_key: "res3" 41 | channels_project: 32 42 | } 43 | low_level_override { 44 | feature_key: "res2" 45 | channels_project: 16 46 | } 47 | center_head { 48 | output_channels: 1 49 | head_channels: 32 50 | } 51 | regression_head { 52 | output_channels: 2 53 | head_channels: 32 54 | } 55 | } 56 | } 57 | } 58 | 59 | train_dataset_options { 60 | crop_size: 1025 61 | crop_size: 2049 62 | } 63 | -------------------------------------------------------------------------------- /configs/example/example_coco_kmax_meta_convnext.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "feature_semantic" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "kmax_convnext_base" 14 | drop_path_keep_prob: 0.5 15 | } 16 | 17 | max_deeplab { 18 | pixel_space_head { 19 | output_channels: 128 20 | head_channels: 256 21 | } 22 | auxiliary_low_level { 23 | feature_key: "res3" 24 | channels_project: 64 25 | } 26 | auxiliary_low_level { 27 | feature_key: "res2" 28 | channels_project: 32 29 | } 30 | auxiliary_semantic_head { 31 | # Example for COCO. 32 | output_channels: 134 33 | head_channels: 256 34 | } 35 | } 36 | } 37 | 38 | train_dataset_options { 39 | crop_size: 65 40 | crop_size: 65 41 | } 42 | -------------------------------------------------------------------------------- /configs/example/example_coco_max_deeplab.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/config.proto 2 | # proto-message: ExperimentOptions 3 | 4 | model_options { 5 | decoder { 6 | feature_key: "feature_semantic" 7 | atrous_rates: 6 8 | atrous_rates: 12 9 | atrous_rates: 18 10 | } 11 | 12 | backbone { 13 | name: "max_deeplab_s" 14 | output_stride: 16 15 | } 16 | 17 | max_deeplab { 18 | pixel_space_head { 19 | output_channels: 128 20 | head_channels: 256 21 | } 22 | auxiliary_low_level { 23 | feature_key: "res3" 24 | channels_project: 64 25 | } 26 | auxiliary_low_level { 27 | feature_key: "res2" 28 | channels_project: 32 29 | } 30 | auxiliary_semantic_head { 31 | # Example for COCO. 32 | output_channels: 134 33 | head_channels: 256 34 | } 35 | } 36 | } 37 | 38 | train_dataset_options { 39 | crop_size: 65 40 | crop_size: 65 41 | } 42 | -------------------------------------------------------------------------------- /configs/example/example_kitti-step_motion_deeplab.textproto: -------------------------------------------------------------------------------- 1 | # proto-file: deeplab2/model.proto 2 | # proto-message: ModelOptions 3 | 4 | decoder { 5 | feature_key: "res5" 6 | atrous_rates: 6 7 | atrous_rates: 12 8 | atrous_rates: 18 9 | } 10 | 11 | backbone { 12 | name: "resnet50" 13 | } 14 | 15 | # Motion-Deeplab adopts Panoptic-Deeplab for the task of Video Panoptic 16 | # Segmentation or Segmenting and Tracking Every Pixel (STEP). 17 | motion_deeplab { 18 | low_level { 19 | feature_key: "res3" 20 | channels_project: 64 21 | } 22 | low_level { 23 | feature_key: "res2" 24 | channels_project: 32 25 | } 26 | semantic_head { 27 | # Example for KITTI-STEP. 28 | output_channels: 19 29 | head_channels: 256 30 | } 31 | instance { 32 | instance_decoder_override { 33 | feature_key: "res5" 34 | decoder_channels: 128 35 | atrous_rates: 6 36 | atrous_rates: 12 37 | atrous_rates: 18 38 | } 39 | low_level_override { 40 | feature_key: "res3" 41 | channels_project: 32 42 | } 43 | low_level_override { 44 | feature_key: "res2" 45 | channels_project: 16 46 | } 47 | center_head { 48 | output_channels: 1 49 | head_channels: 32 50 | } 51 | regression_head { 52 | output_channels: 2 53 | head_channels: 32 54 | } 55 | } 56 | motion_head { 57 | output_channels: 2 58 | head_channels: 32 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /data/build_cityscapes_data_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for build_cityscapes_data.""" 17 | 18 | import os 19 | 20 | from absl import flags 21 | import numpy as np 22 | from PIL import Image 23 | import tensorflow as tf 24 | 25 | from deeplab2.data import build_cityscapes_data 26 | 27 | 28 | FLAGS = flags.FLAGS 29 | _TEST_DATA_DIR = 'deeplab2/data/testdata' 30 | _TEST_FILE_PREFIX = 'dummy_000000_000000' 31 | 32 | 33 | class BuildCityscapesDataTest(tf.test.TestCase): 34 | 35 | def test_read_segments(self): 36 | cityscapes_root = os.path.join(_TEST_DATA_DIR) 37 | segments_dict = build_cityscapes_data._read_segments( 38 | cityscapes_root, dataset_split='dummy') 39 | self.assertIn(_TEST_FILE_PREFIX, segments_dict) 40 | _, segments = segments_dict[_TEST_FILE_PREFIX] 41 | self.assertLen(segments, 10) 42 | 43 | def test_generate_panoptic_label(self): 44 | FLAGS.treat_crowd_as_ignore = False # Test a more complicated setting 45 | cityscapes_root = os.path.join(_TEST_DATA_DIR) 46 | segments_dict = build_cityscapes_data._read_segments( 47 | cityscapes_root, dataset_split='dummy') 48 | annotation_file_name, segments = segments_dict[_TEST_FILE_PREFIX] 49 | panoptic_annotation_file = build_cityscapes_data._get_panoptic_annotation( 50 | cityscapes_root, dataset_split='dummy', 51 | annotation_file_name=annotation_file_name) 52 | panoptic_label = build_cityscapes_data._generate_panoptic_label( 53 | panoptic_annotation_file, segments) 54 | 55 | # Check panoptic label matches golden file. 56 | golden_file_path = os.path.join(_TEST_DATA_DIR, 57 | 'dummy_gt_for_vps.png') 58 | with tf.io.gfile.GFile(golden_file_path, 'rb') as f: 59 | golden_label = Image.open(f) 60 | # The PNG file is encoded by: 61 | # color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256] 62 | golden_label = np.dot(np.asarray(golden_label), [1, 256, 256 * 256]) 63 | 64 | np.testing.assert_array_equal(panoptic_label, golden_label) 65 | 66 | if __name__ == '__main__': 67 | tf.test.main() 68 | -------------------------------------------------------------------------------- /data/data_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for data_utils.""" 17 | 18 | import io 19 | import numpy as np 20 | from PIL import Image 21 | import tensorflow as tf 22 | 23 | from deeplab2.data import data_utils 24 | 25 | 26 | def _encode_png_image(image): 27 | """Helper method to encode input image in PNG format.""" 28 | buffer = io.BytesIO() 29 | Image.fromarray(image).save(buffer, format='png') 30 | return buffer.getvalue() 31 | 32 | 33 | class DataUtilsTest(tf.test.TestCase): 34 | 35 | def _create_test_image(self, height, width): 36 | rng = np.random.RandomState(319281498) 37 | return rng.randint(0, 255, size=(height, width, 3), dtype=np.uint8) 38 | 39 | def test_encode_and_decode(self): 40 | """Checks decode created tf.Example for semantic segmentation.""" 41 | test_image_height = 20 42 | test_image_width = 15 43 | filename = 'dummy' 44 | 45 | image = self._create_test_image(test_image_height, test_image_width) 46 | # Take the last channel as dummy label. 47 | label = image[..., 0] 48 | 49 | example = data_utils.create_tfexample( 50 | image_data=_encode_png_image(image), 51 | image_format='png', filename=filename, 52 | label_data=_encode_png_image(label), label_format='png') 53 | 54 | # Parse created example, expect getting identical results. 55 | parser = data_utils.SegmentationDecoder(is_panoptic_dataset=False) 56 | parsed_tensors = parser(example.SerializeToString()) 57 | 58 | self.assertIn('image', parsed_tensors) 59 | self.assertIn('image_name', parsed_tensors) 60 | self.assertIn('label', parsed_tensors) 61 | self.assertEqual(filename, parsed_tensors['image_name']) 62 | np.testing.assert_array_equal(image, parsed_tensors['image'].numpy()) 63 | # Decoded label is a 3-D array with last dimension of 1. 64 | decoded_label = parsed_tensors['label'].numpy() 65 | np.testing.assert_array_equal(label, decoded_label[..., 0]) 66 | 67 | def test_encode_and_decode_panoptic(self): 68 | test_image_height = 31 69 | test_image_width = 17 70 | filename = 'dummy' 71 | 72 | image = self._create_test_image(test_image_height, test_image_width) 73 | # Create dummy panoptic label in np.int32 dtype. 74 | label = np.dot(image.astype(np.int32), [1, 256, 256 * 256]).astype(np.int32) 75 | example = data_utils.create_tfexample( 76 | image_data=_encode_png_image(image), 77 | image_format='png', filename=filename, 78 | label_data=label.tostring(), label_format='raw') 79 | 80 | parser = data_utils.SegmentationDecoder(is_panoptic_dataset=True) 81 | parsed_tensors = parser(example.SerializeToString()) 82 | 83 | self.assertIn('image', parsed_tensors) 84 | self.assertIn('image_name', parsed_tensors) 85 | self.assertIn('label', parsed_tensors) 86 | self.assertEqual(filename, parsed_tensors['image_name']) 87 | np.testing.assert_array_equal(image, parsed_tensors['image'].numpy()) 88 | # Decoded label is a 3-D array with last dimension of 1. 89 | decoded_label = parsed_tensors['label'].numpy() 90 | np.testing.assert_array_equal(label, decoded_label[..., 0]) 91 | 92 | 93 | if __name__ == '__main__': 94 | tf.test.main() 95 | -------------------------------------------------------------------------------- /data/dataloader/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /data/dataset_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains utility function for handling the dataset.""" 17 | 18 | import tensorflow as tf 19 | 20 | 21 | def get_semantic_and_panoptic_label(dataset_info, label, ignore_label): 22 | """Helper function to get semantic and panoptic label from panoptic label. 23 | 24 | This functions gets the semantic and panoptic label from panoptic label for 25 | different datasets. The labels must be encoded with semantic_label * 26 | label_divisor + instance_id. For thing classes, the instance ID 0 is reserved 27 | for crowd regions. Please note, the returned panoptic label has replaced 28 | the crowd region with ignore regions. Yet, the semantic label makes use of 29 | these regions. 30 | 31 | Args: 32 | dataset_info: A dictionary storing dataset information. 33 | label: A Tensor of panoptic label. 34 | ignore_label: An integer specifying the ignore_label. 35 | 36 | Returns: 37 | semantic_label: A Tensor of semantic segmentation label. 38 | panoptic_label: A Tensor of panoptic segmentation label, which follows the 39 | Cityscapes annotation where 40 | panoptic_label = semantic_label * panoptic_label_divisor + instance_id. 41 | thing_mask: A boolean Tensor specifying the thing regions. Zero if no thing. 42 | crowd_region: A boolean Tensor specifying crowd region. Zero if no crowd 43 | annotation. 44 | 45 | Raises: 46 | ValueError: An error occurs when the ignore_label is not in range 47 | [0, label_divisor]. 48 | """ 49 | panoptic_label_divisor = dataset_info['panoptic_label_divisor'] 50 | if ignore_label >= panoptic_label_divisor or ignore_label < 0: 51 | raise ValueError('The ignore_label must be in [0, label_divisor].') 52 | 53 | semantic_label = label // panoptic_label_divisor 54 | # Find iscrowd region if any and set to ignore for panoptic labels. 55 | # 1. Find thing mask. 56 | thing_mask = tf.zeros_like(semantic_label, tf.bool) 57 | for thing_id in dataset_info['class_has_instances_list']: 58 | thing_mask = tf.logical_or( 59 | thing_mask, 60 | tf.equal(semantic_label, thing_id)) 61 | # 2. Find crowd region (thing label that have instance_id == 0). 62 | crowd_region = tf.logical_and( 63 | thing_mask, 64 | tf.equal(label % panoptic_label_divisor, 0)) 65 | # 3. Set crowd region to ignore label. 66 | panoptic_label = tf.where( 67 | crowd_region, 68 | tf.ones_like(label) * ignore_label * panoptic_label_divisor, 69 | label) 70 | 71 | return semantic_label, panoptic_label, thing_mask, crowd_region 72 | -------------------------------------------------------------------------------- /data/dataset_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for dataset_utils.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from deeplab2.data import dataset_utils 22 | 23 | 24 | class DatasetUtilsTest(tf.test.TestCase): 25 | 26 | def _get_test_labels(self, num_classes, shape, label_divisor): 27 | num_ids_per_class = 35 28 | semantic_labels = np.random.randint(num_classes, size=shape) 29 | panoptic_labels = np.random.randint( 30 | num_ids_per_class, size=shape) + semantic_labels * label_divisor 31 | 32 | semantic_labels = tf.convert_to_tensor(semantic_labels, dtype=tf.int32) 33 | panoptic_labels = tf.convert_to_tensor(panoptic_labels, dtype=tf.int32) 34 | 35 | return panoptic_labels, semantic_labels 36 | 37 | def setUp(self): 38 | super().setUp() 39 | self._first_thing_class = 9 40 | self._num_classes = 19 41 | self._dataset_info = { 42 | 'panoptic_label_divisor': 1000, 43 | 'class_has_instances_list': tf.range(self._first_thing_class, 44 | self._num_classes) 45 | } 46 | self._num_ids = 37 47 | self._labels, self._semantic_classes = self._get_test_labels( 48 | self._num_classes, [2, 33, 33], 49 | self._dataset_info['panoptic_label_divisor']) 50 | 51 | def test_get_panoptic_and_semantic_label(self): 52 | # Note: self._labels contains one crowd instance per class. 53 | (returned_sem_labels, returned_pan_labels, returned_thing_mask, 54 | returned_crowd_region) = ( 55 | dataset_utils.get_semantic_and_panoptic_label( 56 | self._dataset_info, self._labels, ignore_label=255)) 57 | 58 | expected_semantic_labels = self._semantic_classes 59 | condition = self._labels % self._dataset_info['panoptic_label_divisor'] == 0 60 | condition = tf.logical_and( 61 | condition, 62 | tf.math.greater_equal(expected_semantic_labels, 63 | self._first_thing_class)) 64 | expected_crowd_labels = tf.where(condition, 1.0, 0.0) 65 | expected_pan_labels = tf.where( 66 | condition, 255 * self._dataset_info['panoptic_label_divisor'], 67 | self._labels) 68 | expected_thing_mask = tf.where( 69 | tf.math.greater_equal(expected_semantic_labels, 70 | self._first_thing_class), 1.0, 0.0) 71 | 72 | self.assertListEqual(returned_sem_labels.shape.as_list(), 73 | expected_semantic_labels.shape.as_list()) 74 | self.assertListEqual(returned_pan_labels.shape.as_list(), 75 | expected_pan_labels.shape.as_list()) 76 | self.assertListEqual(returned_crowd_region.shape.as_list(), 77 | expected_crowd_labels.shape.as_list()) 78 | self.assertListEqual(returned_thing_mask.shape.as_list(), 79 | expected_thing_mask.shape.as_list()) 80 | np.testing.assert_equal(returned_sem_labels.numpy(), 81 | expected_semantic_labels.numpy()) 82 | np.testing.assert_equal(returned_pan_labels.numpy(), 83 | expected_pan_labels.numpy()) 84 | np.testing.assert_equal(returned_crowd_region.numpy(), 85 | expected_crowd_labels.numpy()) 86 | np.testing.assert_equal(returned_thing_mask.numpy(), 87 | expected_thing_mask.numpy()) 88 | 89 | if __name__ == '__main__': 90 | tf.test.main() 91 | -------------------------------------------------------------------------------- /data/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /data/preprocessing/autoaugment_policy.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """AutoAugment policy file. 17 | 18 | This file contains found auto-augment policy. 19 | 20 | Please cite or refer to the following papers for details: 21 | - Ekin D Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, and Quoc V Le. 22 | "Autoaugment: Learning augmentation policies from data." In CVPR, 2019. 23 | 24 | - Ekin D Cubuk, Barret Zoph, Jonathon Shlens, and Quoc V Le. 25 | "Randaugment: Practical automated data augmentation with a reduced search 26 | space." In CVPR, 2020. 27 | """ 28 | 29 | # Reduced augmentation operation space. 30 | augmentation_reduced_operations = ( 31 | 'AutoContrast', 'Equalize', 'Invert', 'Posterize', 32 | 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness') 33 | 34 | augmentation_probabilities = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] 35 | 36 | 37 | def convert_policy(policy, 38 | search_space=augmentation_reduced_operations, 39 | probability_scale=1.0, 40 | magnitude_scale=1): 41 | """Converts policy from a list of numbers.""" 42 | if len(policy) % 6: 43 | raise ValueError('Policy length must be a multiple of 6.') 44 | num_policies = len(policy) // 6 45 | policy_list = [[] for _ in range(num_policies)] 46 | for n in range(num_policies): 47 | for i in range(2): 48 | operation_id, prob_id, magnitude = ( 49 | policy[6 * n + i * 3 : 6 * n + (i + 1) * 3]) 50 | policy_name = search_space[operation_id] 51 | policy_prob = ( 52 | augmentation_probabilities[prob_id] * probability_scale) 53 | policy_list[n].append((policy_name, 54 | policy_prob, 55 | magnitude * magnitude_scale)) 56 | return policy_list 57 | 58 | 59 | simple_classification_policy = [8, 2, 7, 7, 1, 10, 60 | 1, 0, 9, 6, 1, 10, 61 | 8, 1, 9, 5, 1, 9, 62 | 4, 1, 7, 1, 3, 9, 63 | 8, 1, 1, 1, 1, 7] 64 | 65 | # All available policies. 66 | available_policies = { 67 | 'simple_classification_policy_magnitude_scale_0.2': convert_policy( 68 | simple_classification_policy, 69 | augmentation_reduced_operations, 70 | magnitude_scale=0.2), 71 | 'simple_classification_policy': convert_policy( 72 | simple_classification_policy, 73 | augmentation_reduced_operations, 74 | magnitude_scale=1), 75 | } 76 | -------------------------------------------------------------------------------- /data/preprocessing/autoaugment_policy_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for autoaugment_policy.py.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.data.preprocessing import autoaugment_policy 21 | 22 | 23 | class AutoaugmentPolicyTest(tf.test.TestCase): 24 | 25 | def testConvertPolicy(self): 26 | policy = [5, 1, 10, 5, 3, 4, 27 | 6, 3, 7, 3, 3, 9, 28 | 2, 2, 8, 8, 2, 8, 29 | 1, 4, 9, 4, 5, 7, 30 | 6, 4, 1, 1, 3, 4] 31 | expected = [ 32 | [('Color', 0.2, 10), ('Color', 0.6, 4)], 33 | [('Contrast', 0.6, 7), ('Posterize', 0.6, 9)], 34 | [('Invert', 0.4, 8), ('Sharpness', 0.4, 8)], 35 | [('Equalize', 0.8, 9), ('Solarize', 1.0, 7)], 36 | [('Contrast', 0.8, 1), ('Equalize', 0.6, 4)], 37 | ] 38 | policy_list = autoaugment_policy.convert_policy(policy) 39 | self.assertAllEqual(policy_list, expected) 40 | 41 | 42 | if __name__ == '__main__': 43 | tf.test.main() 44 | -------------------------------------------------------------------------------- /data/preprocessing/autoaugment_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for autoaugment_utils.py.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from deeplab2.data.preprocessing import autoaugment_utils 22 | 23 | 24 | class AutoaugmentUtilsTest(tf.test.TestCase): 25 | 26 | def testAugmentWithNamedPolicy(self): 27 | num_classes = 3 28 | np_image = np.random.randint(256, size=(13, 13, 3)) 29 | image = tf.constant(np_image, dtype=tf.uint8) 30 | np_label = np.random.randint(num_classes, size=(13, 13, 1)) 31 | label = tf.constant(np_label, dtype=tf.int32) 32 | image, label = autoaugment_utils.distort_image_with_autoaugment( 33 | image, label, ignore_label=255, 34 | augmentation_name='simple_classification_policy') 35 | self.assertTrue(image.numpy().any()) 36 | self.assertTrue(label.numpy().any()) 37 | 38 | 39 | if __name__ == '__main__': 40 | tf.test.main() 41 | -------------------------------------------------------------------------------- /data/testdata/dummy_gt_for_vps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_gt_for_vps.png -------------------------------------------------------------------------------- /data/testdata/dummy_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_prediction.png -------------------------------------------------------------------------------- /data/testdata/gtFine/cityscapes_panoptic_dummy_trainId.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": [ 3 | { 4 | "file_name": "dummy_000000_000000_gtFine_panoptic.png", 5 | "image_id": "dummy_000000_000000", 6 | "segments_info": [ 7 | { 8 | "area": 958, 9 | "category_id": 13, 10 | "id": 26000, 11 | "iscrowd": 0 12 | }, 13 | { 14 | "area": 6178, 15 | "category_id": 13, 16 | "id": 26, 17 | "iscrowd": 1 18 | }, 19 | { 20 | "area": 10496, 21 | "category_id": 13, 22 | "id": 26001, 23 | "iscrowd": 0 24 | }, 25 | { 26 | "area": 5534, 27 | "category_id": 13, 28 | "id": 26002, 29 | "iscrowd": 0 30 | }, 31 | { 32 | "area": 32768, 33 | "category_id": 13, 34 | "id": 26003, 35 | "iscrowd": 0 36 | }, 37 | { 38 | "area": 19906, 39 | "category_id": 13, 40 | "id": 26004, 41 | "iscrowd": 0 42 | }, 43 | { 44 | "area": 15940, 45 | "category_id": 8, 46 | "id": 21, 47 | "iscrowd": 0 48 | }, 49 | { 50 | "area": 278754, 51 | "category_id": 10, 52 | "id": 23, 53 | "iscrowd": 0 54 | }, 55 | { 56 | "area": 222420, 57 | "category_id": 2, 58 | "id": 11, 59 | "iscrowd": 0 60 | }, 61 | { 62 | "area": 46475, 63 | "category_id": 0, 64 | "id": 7, 65 | "iscrowd": 0 66 | } 67 | ] 68 | } 69 | ] 70 | } -------------------------------------------------------------------------------- /data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png -------------------------------------------------------------------------------- /data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png -------------------------------------------------------------------------------- /data/testdata/targets/center_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_target.png -------------------------------------------------------------------------------- /data/testdata/targets/center_weights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_weights.png -------------------------------------------------------------------------------- /data/testdata/targets/eval_is_crowd.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_is_crowd.npy -------------------------------------------------------------------------------- /data/testdata/targets/eval_panoptic_target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_panoptic_target.npy -------------------------------------------------------------------------------- /data/testdata/targets/eval_semantic_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_semantic_target.png -------------------------------------------------------------------------------- /data/testdata/targets/is_crowd.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/is_crowd.npy -------------------------------------------------------------------------------- /data/testdata/targets/offset_target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_target.npy -------------------------------------------------------------------------------- /data/testdata/targets/offset_weights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_weights.png -------------------------------------------------------------------------------- /data/testdata/targets/panoptic_target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.npy -------------------------------------------------------------------------------- /data/testdata/targets/panoptic_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.png -------------------------------------------------------------------------------- /data/testdata/targets/rgb_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/rgb_target.png -------------------------------------------------------------------------------- /data/testdata/targets/semantic_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_target.png -------------------------------------------------------------------------------- /data/testdata/targets/semantic_weights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_weights.png -------------------------------------------------------------------------------- /data/testdata/targets/thing_id_class_target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_class_target.npy -------------------------------------------------------------------------------- /data/testdata/targets/thing_id_mask_target.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_mask_target.npy -------------------------------------------------------------------------------- /data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /data/waymo_constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Meta info of Waymo Open Dataset: Panoramic Video Panoptic Segmentation. 17 | 18 | Dataset website: https://waymo.com/open/ 19 | GitHub: https://github.com/waymo-research/waymo-open-dataset 20 | 21 | References: 22 | 23 | - Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu, 24 | Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open Dataset: 25 | Panoramic Video Panoptic Segmentation." In ECCV, 2022. 26 | """ 27 | 28 | from typing import Any, Sequence 29 | 30 | import immutabledict 31 | 32 | COLORMAP = "waymo" 33 | PANOPTIC_LABEL_DIVISOR = 100000 34 | 35 | IGNORE_LABEL_NAME = "unknown" 36 | IGNORE_LABEL = 0 37 | 38 | _WAYMO_COLORS = immutabledict.immutabledict({ 39 | "bicycle": [119, 11, 32], 40 | "bird": [127, 96, 0], 41 | "building": [70, 70, 70], 42 | "bus": [0, 60, 100], 43 | "car": [0, 0, 142], 44 | "construction_cone_pole": [230, 145, 56], 45 | "cyclist": [255, 0, 0], 46 | "dynamic": [102, 102, 102], 47 | "ground": [102, 102, 102], 48 | "ground_animal": [91, 15, 0], 49 | "lane_marker": [234, 209, 220], 50 | "motorcycle": [0, 0, 230], 51 | "motorcyclist": [180, 0, 0], 52 | "other_large_vehicle": [61, 133, 198], 53 | "other_pedestrian_object": [234, 153, 153], 54 | "person": [220, 20, 60], 55 | "pole": [153, 153, 153], 56 | "road": [128, 64, 128], 57 | "road_marker": [217, 210, 233], 58 | "sdc": [102, 102, 102], 59 | "sidewalk": [244, 35, 232], 60 | "sign": [246, 178, 107], 61 | "sky": [70, 130, 180], 62 | "static": [102, 102, 102], 63 | "traffic_light": [250, 170, 30], 64 | "trailer": [111, 168, 220], 65 | "truck": [0, 0, 70], 66 | "unknown": [102, 102, 102], 67 | "vegetation": [107, 142, 35], 68 | }) 69 | 70 | _WAYMO_CLASS_NAMES = [ 71 | "unknown", 72 | "sdc", 73 | "car", 74 | "truck", 75 | "bus", 76 | "other_large_vehicle", 77 | "bicycle", 78 | "motorcycle", 79 | "trailer", 80 | "person", 81 | "cyclist", 82 | "motorcyclist", 83 | "bird", 84 | "ground_animal", 85 | "construction_cone_pole", 86 | "pole", 87 | "other_pedestrian_object", 88 | "sign", 89 | "traffic_light", 90 | "building", 91 | "road", 92 | "lane_marker", 93 | "road_marker", 94 | "sidewalk", 95 | "vegetation", 96 | "sky", 97 | "ground", 98 | "dynamic", 99 | "static", 100 | ] 101 | 102 | _IS_THINGS = [ 103 | "car", "truck", "bus", "other_large_vehicle", "trailer", "person", 104 | "cyclist", "motorcyclist" 105 | ] 106 | 107 | 108 | def get_waymo_meta() -> Sequence[Any]: 109 | """Gets the meta info for waymo dataset.""" 110 | meta = [] 111 | for name_id, name in enumerate(_WAYMO_CLASS_NAMES): 112 | item = { 113 | "color": _WAYMO_COLORS[name], 114 | "name": name, 115 | "id": name_id, 116 | "isthing": int(name in _IS_THINGS) 117 | } 118 | meta.append(item) 119 | return meta 120 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /evaluation/depth_metrics_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for depth metrics.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from deeplab2.evaluation import depth_metrics 21 | 22 | 23 | class DepthMetricsTest(tf.test.TestCase): 24 | 25 | def test_depth_metrics_on_single_image(self): 26 | gt = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114], 27 | [0, 39.68081126, 58.77974067, 0], 28 | [40.57883826, 22.15134852, 31.46813478, 13.52603324]]) 29 | pred = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195], 30 | [13.76178147, 41.62431592, 56.97362032, 81.48369608], 31 | [43.12005689, 15.54622258, 24.1993478, 12.14451783]]) 32 | depth_obj = depth_metrics.DepthMetrics() 33 | depth_obj.update_state(gt, pred) 34 | result = depth_obj.result().numpy() 35 | # The following numbers are manually computed. 36 | self.assertAlmostEqual(result[0], 14.154233, places=4) 37 | self.assertAlmostEqual(result[1], 0.0268667, places=4) 38 | self.assertAlmostEqual(result[2], 0.13191505, places=4) 39 | self.assertAlmostEqual(result[3], 0.7, places=4) 40 | 41 | def test_depth_metrics_on_multiple_images(self): 42 | depth_obj = depth_metrics.DepthMetrics() 43 | gt_1 = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114], 44 | [0, 39.68081126, 58.77974067, 0], 45 | [40.57883826, 22.15134852, 31.46813478, 13.52603324]]) 46 | pred_1 = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195], 47 | [13.76178147, 41.62431592, 56.97362032, 81.48369608], 48 | [43.12005689, 15.54622258, 24.1993478, 12.14451783]]) 49 | depth_obj.update_state(gt_1, pred_1) 50 | gt_2 = np.array( 51 | [[79.56192404, 25.68145225, 0, 39.88486608, 68.91602466], 52 | [79.53460057, 2.55741031, 36.05057241, 68.04747416, 3.7783227], 53 | [0, 0, 72.47336778, 59.02611644, 66.07499008], 54 | [25.88578395, 58.2202574, 27.39066477, 29.83094038, 37.99239669]]) 55 | pred_2 = np.array( 56 | [[83.80952145, 27.23367361, 72.52687468, 35.28400183, 72.41126444], 57 | [77.62373864, 0.87004049, 32.1619225, 66.91361903, 2.60688436], 58 | [15.30294603, 9.76419241, 68.61650198, 57.14559324, 66.88452603], 59 | [24.54818109, 61.60855251, 31.50312052, 26.02325866, 36.4019569]]) 60 | depth_obj.update_state(gt_2, pred_2) 61 | gt_3 = np.array([[50.80100791, 0.41130084, 58.85031668], 62 | [29.44932853, 23.48806627, 30.17890056]]) 63 | pred_3 = np.array([[49.66563966, 0.62070026, 58.84231026], 64 | [32.26735775, 28.07405648, 33.7131882]]) 65 | depth_obj.update_state(gt_3, pred_3) 66 | result = depth_obj.result().numpy() 67 | # The following numbers are manually computed. 68 | self.assertAlmostEqual(result[0], 18.442057, places=4) 69 | self.assertAlmostEqual(result[1], 0.0388692, places=4) 70 | self.assertAlmostEqual(result[2], 0.13392223, places=4) 71 | self.assertAlmostEqual(result[3], 0.8052287, places=4) 72 | 73 | 74 | if __name__ == '__main__': 75 | tf.test.main() 76 | -------------------------------------------------------------------------------- /evaluation/numpy/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /evaluation/test_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for test_utils.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from deeplab2.evaluation import test_utils 21 | 22 | 23 | class TestUtilsTest(tf.test.TestCase): 24 | 25 | def test_read_test_image(self): 26 | image_array = test_utils.read_test_image('team_pred_class.png') 27 | self.assertSequenceEqual(image_array.shape, (231, 345, 4)) 28 | 29 | def test_reads_segmentation_with_color_map(self): 30 | rgb_to_semantic_label = {(0, 0, 0): 0, (0, 0, 255): 1, (255, 0, 0): 23} 31 | labels = test_utils.read_segmentation_with_rgb_color_map( 32 | 'team_pred_class.png', rgb_to_semantic_label) 33 | 34 | input_image = test_utils.read_test_image('team_pred_class.png') 35 | np.testing.assert_array_equal( 36 | labels == 0, 37 | np.logical_and(input_image[:, :, 0] == 0, input_image[:, :, 2] == 0)) 38 | np.testing.assert_array_equal(labels == 1, input_image[:, :, 2] == 255) 39 | np.testing.assert_array_equal(labels == 23, input_image[:, :, 0] == 255) 40 | 41 | def test_reads_gt_segmentation(self): 42 | instance_label_to_semantic_label = { 43 | 0: 0, 44 | 47: 1, 45 | 97: 1, 46 | 133: 1, 47 | 150: 1, 48 | 174: 1, 49 | 198: 23, 50 | 215: 1, 51 | 244: 1, 52 | 255: 1, 53 | } 54 | instances, classes = test_utils.panoptic_segmentation_with_class_map( 55 | 'team_gt_instance.png', instance_label_to_semantic_label) 56 | 57 | expected_label_shape = (231, 345) 58 | self.assertSequenceEqual(instances.shape, expected_label_shape) 59 | self.assertSequenceEqual(classes.shape, expected_label_shape) 60 | np.testing.assert_array_equal(instances == 0, classes == 0) 61 | np.testing.assert_array_equal(instances == 198, classes == 23) 62 | np.testing.assert_array_equal( 63 | np.logical_and(instances != 0, instances != 198), classes == 1) 64 | 65 | 66 | if __name__ == '__main__': 67 | tf.test.main() 68 | -------------------------------------------------------------------------------- /evaluation/testdata/README.md: -------------------------------------------------------------------------------- 1 | # Segmentation Evalaution Test Data 2 | 3 | ## Source Images 4 | 5 | * [team_input.png](team_input.png) \ 6 | Source: 7 | https://ai.googleblog.com/2018/03/semantic-image-segmentation-with.html 8 | * [cat_input.jpg](cat_input.jpg) \ 9 | Source: https://www.flickr.com/photos/magdalena_b/4995858743 10 | * [bird_input.jpg](bird_input.jpg) \ 11 | Source: https://www.flickr.com/photos/chivinskia/40619099560 12 | -------------------------------------------------------------------------------- /evaluation/testdata/bird_gt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_gt.png -------------------------------------------------------------------------------- /evaluation/testdata/bird_pred_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_class.png -------------------------------------------------------------------------------- /evaluation/testdata/bird_pred_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_instance.png -------------------------------------------------------------------------------- /evaluation/testdata/cat_gt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_gt.png -------------------------------------------------------------------------------- /evaluation/testdata/cat_pred_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_class.png -------------------------------------------------------------------------------- /evaluation/testdata/cat_pred_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_instance.png -------------------------------------------------------------------------------- /evaluation/testdata/team_gt_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_gt_instance.png -------------------------------------------------------------------------------- /evaluation/testdata/team_pred_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_class.png -------------------------------------------------------------------------------- /evaluation/testdata/team_pred_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_instance.png -------------------------------------------------------------------------------- /g3doc/img/axial_deeplab/axial_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/axial_block.png -------------------------------------------------------------------------------- /g3doc/img/axial_deeplab/nonlocal_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/nonlocal_block.png -------------------------------------------------------------------------------- /g3doc/img/axial_deeplab/position_sensitive_axial_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/position_sensitive_axial_block.png -------------------------------------------------------------------------------- /g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png -------------------------------------------------------------------------------- /g3doc/img/kmax_deeplab/kmax_decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/kmax_decoder.png -------------------------------------------------------------------------------- /g3doc/img/max_deeplab/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview.png -------------------------------------------------------------------------------- /g3doc/img/max_deeplab/overview_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview_simple.png -------------------------------------------------------------------------------- /g3doc/img/moat/moat_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_block.png -------------------------------------------------------------------------------- /g3doc/img/moat/moat_imagenet1k_224.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet1k_224.png -------------------------------------------------------------------------------- /g3doc/img/moat/moat_imagenet22k_384.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet22k_384.png -------------------------------------------------------------------------------- /g3doc/img/panoptic_deeplab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/panoptic_deeplab.png -------------------------------------------------------------------------------- /g3doc/img/step/kitti_step_annotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/step/kitti_step_annotation.png -------------------------------------------------------------------------------- /g3doc/img/vip_deeplab/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/vip_deeplab/demo.gif -------------------------------------------------------------------------------- /g3doc/projects/wod_pvps.md: -------------------------------------------------------------------------------- 1 | # Panoramic Video Panoptic Segmentation 2 | 3 | Waymo Open Dataset: Panoramic Video Panoptic Segmentation (WOD-PVPS) [1], is a 4 | large-scale dataset that offers high-quality multi-camera video panoptic 5 | segmentation labels for autonomous driving. The labels are consistent over time 6 | for video processing and consistent across multiple cameras mounted on the 7 | vehicles for full panoramic scene understanding. 8 | 9 | The new task of Panoramic Video Panoptic Segmentation requires generating dense 10 | panoptic segmentation predictions consistent in both time and multi-cameras. To 11 | build a baseline for such a challenging task, we extend the ViP-DeepLab [2] to 12 | the multi-camera setting. 13 | 14 | ## Prerequisite 15 | 16 | 1. Make sure the software is properly [installed](../setup/installation.md). 17 | 18 | 2. Make sure the 19 | [target dataset](https://waymo.com/open/data/perception/#2d-video-panoptic-segmentation) 20 | is correctly prepared. 21 | 22 | ## Model Zoo 23 | 24 | ## Citing WOD-PVPS 25 | 26 | If you find this code helpful in your research or wish to refer to the baseline 27 | results, please use the following BibTeX entry. 28 | 29 | * Waymo Open Dataset: Panoramic Video Panoptic Segmentation: 30 | 31 | ``` 32 | @article{mei2022waymo, 33 | title={Waymo Open Dataset: Panoramic Video Panoptic Segmentation}, 34 | author={Mei, Jieru and Zhu, Alex Zihao and Yan, Xinchen and Yan, Hang and Qiao, Siyuan and Zhu, Yukun and Chen, Liang-Chieh and Kretzschmar, Henrik and Anguelov, Dragomir}, 35 | journal={arXiv preprint arXiv:2206.07704}, 36 | year={2022} 37 | } 38 | 39 | ``` 40 | 41 | * ViP-DeepLab: 42 | 43 | ``` 44 | @inproceedings{vip_deeplab_2021, 45 | author={Siyuan Qiao and Yukun Zhu and Hartwig Adam and Alan Yuille and Liang-Chieh Chen}, 46 | title={{ViP-DeepLab}: Learning Visual Perception with Depth-aware Video Panoptic Segmentation}, 47 | booktitle={CVPR}, 48 | year={2021} 49 | } 50 | 51 | ``` 52 | 53 | * Panoptic-DeepLab: 54 | 55 | ``` 56 | @inproceedings{panoptic_deeplab_2020, 57 | author={Bowen Cheng and Maxwell D Collins and Yukun Zhu and Ting Liu and Thomas S Huang and Hartwig Adam and Liang-Chieh Chen}, 58 | title={{Panoptic-DeepLab}: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation}, 59 | booktitle={CVPR}, 60 | year={2020} 61 | } 62 | 63 | ``` 64 | 65 | ### References 66 | 67 | 1. Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu, 68 | Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open 69 | Dataset: Panoramic Video Panoptic Segmentation." In arXiv: 2206.07704, 2022. 70 | 71 | 2. Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen. 72 | "ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic 73 | Segmentation." In CVPR, 2021. 74 | -------------------------------------------------------------------------------- /g3doc/setup/ade20k.md: -------------------------------------------------------------------------------- 1 | # Run DeepLab2 on ADE20K dataset 2 | 3 | This page walks through the steps required to generate 4 | [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/) panoptic 5 | segmentation data for DeepLab2. 6 | 7 | ## Prework 8 | 9 | Before running any Deeplab2 scripts, the users should (1) access the 10 | [ADE20K dataset website](https://groups.csail.mit.edu/vision/datasets/ADE20K/) 11 | to download the dataset, and (2) prepare the panoptic annotation using 12 | [Mask2Former's script](https://github.com/facebookresearch/Mask2Former/blob/main/datasets/prepare_ade20k_pan_seg.py). 13 | 14 | After finishing above steps, the expected directory structure should be as 15 | follows: 16 | 17 | ``` 18 | .(ADE20K_ROOT) 19 | +-- images 20 | | 21 | |-- annotations 22 | | 23 | |-- objectInfo150.txt 24 | | 25 | |-- annotations_instance 26 | | 27 | |-- ade20k_panoptic_{train,val}.json 28 | | 29 | +-- ade20k_panoptic_{train,val} 30 | ``` 31 | 32 | ## Convert prepared dataset to TFRecord 33 | 34 | Use the following commandline to generate ADE20K TFRecords: 35 | 36 | ```bash 37 | # For generating data for panoptic segmentation task 38 | python deeplab2/data/build_ade20k_data.py \ 39 | --ade20k_root=${ADE20K_ROOT} \ 40 | --output_dir=${OUTPUT_DIR} 41 | ``` 42 | 43 | Commandline above will output two sharded tfrecord files: 44 | `{train|val}@1000.tfrecord`. In the tfrecords, for `train` and `val` set, it 45 | contains the RGB image pixels as well as corresponding annotations. These files 46 | will be used as the input for the model training and evaluation. 47 | 48 | ### TFExample proto format for ADE20K 49 | 50 | The Example proto contains the following fields: 51 | 52 | * `image/encoded`: encoded image content. 53 | * `image/filename`: image filename. 54 | * `image/format`: image file format. 55 | * `image/height`: image height. 56 | * `image/width`: image width. 57 | * `image/channels`: image channels. 58 | * `image/segmentation/class/encoded`: encoded segmentation content. 59 | * `image/segmentation/class/format`: segmentation encoding format. 60 | 61 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of 62 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is 63 | computed by: 64 | 65 | ``` 66 | panoptic ID = semantic ID * label divisor + instance ID 67 | ``` 68 | 69 | where semantic ID will be: 70 | 71 | * ignore label (0) for pixels not belonging to any segment 72 | * for segments associated with `iscrowd` label: 73 | * (default): ignore label (0) 74 | * `category_id` for other segments 75 | 76 | The instance ID will be 0 for pixels belonging to 77 | 78 | * `stuff` class 79 | * `thing` class with `iscrowd` label 80 | * pixels with ignore label 81 | 82 | and `[1, label divisor)` otherwise. 83 | -------------------------------------------------------------------------------- /g3doc/setup/cityscapes_test_server_evaluation.md: -------------------------------------------------------------------------------- 1 | # Test Server Evaluation on Cityscapes dataset 2 | 3 | This page walks through the steps required to convert DeepLab2 predictions for 4 | test server evaluation on [Cityscapes](https://www.cityscapes-dataset.com/). 5 | 6 | A high-level overview of the whole process: 7 | 8 | 1. Save raw panoptic prediction in the two-channel format. 9 | 10 | 2. Create images json file. 11 | 12 | 3. Convert predictions in the two-channel format to the panoptic COCO format. 13 | 14 | 4. Run local validation set evaluation or prepare test set evaluation. 15 | 16 | We also define some environmental variables for simplicity and convenience: 17 | 18 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all 19 | checkpoints and results are saved. 20 | 21 | `DATA_ROOT`: where the original Cityscapes dataset is located. 22 | 23 | `PATH_TO_SAVE`: where the converted results should be saved. 24 | 25 | `IMAGES_SPLIT`: *val* or *test* depending on the target split. 26 | 27 | ## Save Raw Panoptic Prediction 28 | 29 | Save the raw panoptic predictions in the 30 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring 31 | the following fields are set properly in the textproto config file. 32 | 33 | ``` 34 | eval_dataset_options.decode_groundtruth_label = false 35 | evaluator_options.save_predictions = true 36 | evaluator_options.save_raw_predictions = true 37 | evaluator_options.convert_raw_to_eval_ids = true 38 | ``` 39 | 40 | Then run the model in evaluation modes (with `--mode=eval`), the results will be 41 | saved at 42 | 43 | *semantic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_semantic/\*.png 44 | 45 | *instance segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_instance/\* 46 | 47 | *panoptic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png 48 | 49 | ## Create Images JSON 50 | 51 | Create images json file by running the following commands. 52 | 53 | ```bash 54 | python deeplab2/utils/create_images_json_for_cityscapes.py \ 55 | --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \ 56 | --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \ 57 | --only_basename \ 58 | --include_image_type_suffix=false 59 | ``` 60 | 61 | ## Convert the Prediction Format 62 | 63 | Convert prediction results saved in the 64 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the 65 | panoptic COCO format. 66 | 67 | ```bash 68 | python panopticapi/converters/2channels2panoptic_coco_format.py \ 69 | --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \ 70 | --images_json_file=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json\ 71 | --categories_json_file=deeplab2/utils/panoptic_cityscapes_categories.json \ 72 | --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \ 73 | --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json 74 | ``` 75 | 76 | ## Run Local Evaluation Scripts (for *validation* set) 77 | 78 | Run the [official scripts](https://github.com/mcordts/cityscapesScripts) to 79 | evaluate validation set results. 80 | 81 | For *semantic segmentation*: 82 | 83 | ```bash 84 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_semantic/ \ 85 | CITYSCAPES_DATASET=${DATA_ROOT} \ 86 | CITYSCAPES_EXPORT_DIR=${PATH_TO_SAVE} \ 87 | python cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py 88 | ``` 89 | 90 | For *instance segmentation*: 91 | 92 | ```bash 93 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_instance/ \ 94 | CITYSCAPES_DATASET=${DATA_ROOT} \ 95 | python cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py 96 | ``` 97 | 98 | For *panoptic segmentation*: 99 | 100 | ```bash 101 | python cityscapesscripts/evaluation/evalPanopticSemanticLabeling.py \ 102 | --prediction-json-file=${PATH_TO_SAVE}/panoptic_cocoformat.json \ 103 | --prediction-folder=${PATH_TO_SAVE}/panoptic_cocoformat \ 104 | --gt-json-file=${DATA_ROOT}/gtFine/cityscapes_panoptic_val.json \ 105 | --gt-folder=${DATA_ROOT}/gtFine/cityscapes_panoptic_val 106 | ``` 107 | 108 | Please note that our prediction fortmat does not support instance segmentation 109 | prediction format yet. 110 | 111 | ## Prepare Submission Files (for *test* set) 112 | 113 | Run the following command to prepare a submission file for test server 114 | evaluation. 115 | 116 | ```bash 117 | zip -r cityscapes_test_submission_semantic.zip ${BASE_MODEL_DIRECTORY}/vis/raw_semantic 118 | zip -r cityscapes_test_submission_instance.zip ${BASE_MODEL_DIRECTORY}/vis/raw_instance 119 | zip -r cityscapes_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json 120 | ``` 121 | -------------------------------------------------------------------------------- /g3doc/setup/coco.md: -------------------------------------------------------------------------------- 1 | # Run DeepLab2 on COCO dataset 2 | 3 | This page walks through the steps required to generate 4 | [COCO](https://cocodataset.org/) panoptic segmentation data for DeepLab2. 5 | DeepLab2 uses sharded TFRecords for efficient processing of the data. 6 | 7 | ## Prework 8 | 9 | Before running any Deeplab2 scripts, the users should (1) access the 10 | [COCO dataset website](https://cocodataset.org/) to download the dataset, 11 | including [2017 Train images](http://images.cocodataset.org/zips/train2017.zip), 12 | [2017 Val images](http://images.cocodataset.org/zips/val2017.zip), 13 | [2017 Test images](http://images.cocodataset.org/zips/test2017.zip), and 14 | [2017 Panoptic Train/Val annotations](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip), 15 | and (2) unzip the downloaded files. 16 | 17 | After finishing above steps, the expected directory structure should be as 18 | follows: 19 | 20 | ``` 21 | .(COCO_ROOT) 22 | +-- train2017 23 | | | 24 | | +-- *.jpg 25 | | 26 | |-- val2017 27 | | | 28 | | +-- *.jpg 29 | | 30 | |-- test2017 31 | | | 32 | | +-- *.jpg 33 | | 34 | +-- annotations 35 | | 36 | +-- panoptic_{train|val}2017.json 37 | +-- panoptic_{train|val}2017 38 | ``` 39 | 40 | ## Convert prepared dataset to TFRecord 41 | 42 | Use the following commandline to generate COCO TFRecords: 43 | 44 | ```bash 45 | # For generating data for panoptic segmentation task 46 | python deeplab2/data/build_coco_data.py \ 47 | --coco_root=${COCO_ROOT} \ 48 | --output_dir=${OUTPUT_DIR} 49 | ``` 50 | 51 | Commandline above will output three sharded tfrecord files: 52 | `{train|val|test}@1000.tfrecord`. In the tfrecords, for `train` and `val` set, 53 | it contains the RGB image pixels as well as corresponding annotations. For 54 | `test` set, it contains RGB images only. These files will be used as the input 55 | for the model training and evaluation. 56 | 57 | Note that we map the class ID to continuous IDs. Specifically, we map the 58 | original label ID, which ranges from 1 to 200, to the contiguous ones ranging 59 | from 1 to 133. 60 | 61 | ### TFExample proto format for COCO 62 | 63 | The Example proto contains the following fields: 64 | 65 | * `image/encoded`: encoded image content. 66 | * `image/filename`: image filename. 67 | * `image/format`: image file format. 68 | * `image/height`: image height. 69 | * `image/width`: image width. 70 | * `image/channels`: image channels. 71 | * `image/segmentation/class/encoded`: encoded segmentation content. 72 | * `image/segmentation/class/format`: segmentation encoding format. 73 | 74 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of 75 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is 76 | computed by: 77 | 78 | ``` 79 | panoptic ID = semantic ID * label divisor + instance ID 80 | ``` 81 | 82 | where semantic ID will be: 83 | 84 | * ignore label (0) for pixels not belonging to any segment 85 | * for segments associated with `iscrowd` label: 86 | * (default): ignore label (0) 87 | * (if set `--treat_crowd_as_ignore=false` while running 88 | `build_coco_data.py`): `category_id` 89 | * `category_id` for other segments 90 | 91 | The instance ID will be 0 for pixels belonging to 92 | 93 | * `stuff` class 94 | * `thing` class with `iscrowd` label 95 | * pixels with ignore label 96 | 97 | and `[1, label divisor)` otherwise. 98 | -------------------------------------------------------------------------------- /g3doc/setup/coco_test_server_evaluation.md: -------------------------------------------------------------------------------- 1 | # Test Server Evaluation on COCO dataset 2 | 3 | This page walks through the steps required to convert DeepLab2 predictions for 4 | test server evaluation on [COCO](https://cocodataset.org/). 5 | 6 | A high-level overview of the whole process: 7 | 8 | 1. Save raw panoptic prediction in the two-channel format. 9 | 10 | 2. Convert predictions in the two-channel format to the panoptic COCO format. 11 | 12 | 3. Run local validation set evaluation or prepare test set evaluation. 13 | 14 | We also define some environmental variables for simplicity and convenience: 15 | 16 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all 17 | checkpoints and results are saved. 18 | 19 | `DATA_ROOT`: where the original COCO dataset is located. 20 | 21 | `PATH_TO_SAVE`: where the converted results should be saved. 22 | 23 | ## Save Raw Panoptic Prediction 24 | 25 | Save the raw panoptic predictions in the 26 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring 27 | the following fields are set properly in the textproto config file. 28 | 29 | ``` 30 | eval_dataset_options.decode_groundtruth_label = false 31 | evaluator_options.save_predictions = true 32 | evaluator_options.save_raw_predictions = true 33 | evaluator_options.convert_raw_to_eval_ids = true 34 | ``` 35 | 36 | Then run the model in evaluation modes (with `--mode=eval`), and the results 37 | will be saved at ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png. 38 | 39 | ## Convert the Prediction Format 40 | 41 | Convert prediction results saved in the 42 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the 43 | panoptic COCO format. 44 | 45 | ```bash 46 | python panopticapi/converters/2channels2panoptic_coco_format.py \ 47 | --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \ 48 | --images_json_file=${DATA_ROOT}/annotations/IMG_JSON \ 49 | --categories_json_file=panopticapi/panoptic_coco_categories.json \ 50 | --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \ 51 | --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json 52 | ``` 53 | 54 | The `IMG_JSON` refers to `panoptic_val2017.json` for *val* set and 55 | `image_info_test-dev2017.json` for *test-dev* set. 56 | 57 | ## Run Local Evaluation Scripts (for *validation* set) 58 | 59 | Run the [official scripts](https://github.com/cocodataset/panopticapi) to 60 | evaluate validation set results. 61 | 62 | ```bash 63 | python panopticapi/evaluation.py \ 64 | --pred_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json \ 65 | --pred_folder=${PATH_TO_SAVE}/panoptic_cocoformat \ 66 | --gt_json_file=${DATA_ROOT}/annotations/panoptic_val2017.json \ 67 | --gt_folder=${DATA_ROOT}/annotations/panoptic_val2017 68 | ``` 69 | 70 | ## Prepare Submission Files (for *test* set) 71 | 72 | Run the following command to prepare a submission file for test server 73 | evaluation. 74 | 75 | ```bash 76 | zip -r coco_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json 77 | ``` 78 | -------------------------------------------------------------------------------- /g3doc/setup/motchallenge_step.md: -------------------------------------------------------------------------------- 1 | # Run DeepLab2 on MOTChallenge-STEP dataset 2 | 3 | ## MOTChallenge-STEP dataset 4 | 5 | MOTChallenge-STEP extends the existing [MOTChallenge](https://motchallenge.net/) 6 | dataset with spatially and temporally dense annotations. 7 | 8 | ### Label Map 9 | 10 | MOTChallenge-STEP dataset followings the same annotation and label policy as 11 | [KITTI-STEP dataset](./kitti_step.md). Among the 12 | [MOTChallenge](https://motchallenge.net/) dataset, 4 outdoor sequences are 13 | annotated for MOTChallenge-STEP. In particular, these sequences are splitted 14 | into 2 for training and 2 for testing. This dataset contains only 7 semantic 15 | classes, as not all of 16 | [Cityscapes](https://www.cityscapes-dataset.com/dataset-overview/#class-definitions)' 17 | 19 semantic classes are present. 18 | 19 | Label Name | Label ID 20 | -------------- | -------- 21 | sidewalk | 0 22 | building | 1 23 | vegetation | 2 24 | sky | 3 25 | person† | 4 26 | rider | 5 27 | bicycle | 6 28 | void | 255 29 | 30 | †: Single instance annotations are available. 31 | 32 | ### Prepare MOTChallenge-STEP for Training and Evaluation 33 | 34 | In the following, we provide a step-by-step walk through to prepare the data. 35 | 36 | 1. Create the MOTChallenge-STEP directory: 37 | 38 | ```bash 39 | mkdir ${MOTCHALLENGE_STEP_ROOT}/images 40 | cd ${MOTCHALLENGE_STEP_ROOT}/images 41 | ``` 42 | 43 | 2. Download MOTChallenge images from https://motchallenge.net/data/MOTS.zip and 44 | unzip. 45 | 46 | ```bash 47 | wget ${MOTCHALLENGE_LINK} 48 | unzip ${MOTCHALLENGE_IMAGES}.zip 49 | ``` 50 | 51 | 3. Move and rename the data: 52 | 53 | ```bash 54 | # Create directories. 55 | mkdir train 56 | mkdir train/0002 57 | mkdir train/0009 58 | mkdir test 59 | mkdir test/0001 60 | mkdir test/0007 61 | 62 | # Copy data. 63 | cp -r MOTS/train/MOTS20-02/img1/* train/0002/ 64 | cp -r MOTS/train/MOTS20-09/img1/* train/0009/ 65 | cp -r MOTS/test/MOTS20-01/img1/* test/0001/ 66 | cp -r MOTS/test/MOTS20-07/img1/* test/0007/ 67 | 68 | # Clean up. 69 | rm -r MOTS 70 | ``` 71 | 72 | 4. Download groundtruth MOTChallenge-STEP panoptic maps from 73 | https://motchallenge.net/data/motchallenge-step.tar.gz 74 | 75 | ```bash 76 | cd ${MOTCHALLENGE_STEP_ROOT} 77 | wget ${MOTCHALLENGE_GT_LINK} 78 | tar -xvf ${MOTCHALLENGE_GT}.zip 79 | ``` 80 | 81 | The groundtruth panoptic map is encoded in the same way as described in 82 | [KITTI-STEP dataset](./kitti_step.md). 83 | 84 | DeepLab2 requires the dataset to be converted to TFRecords for efficient reading 85 | and prefetching. To create the dataset for training and evaluation, run the 86 | following command: 87 | 88 | ```bash 89 | python deeplab2/data/build_step_data.py \ 90 | --step_root=${MOTCHALLENGE_STEP_ROOT} \ 91 | --output_dir=${OUTPUT_DIR} 92 | ``` 93 | 94 | This script outputs three sharded tfrecord files: `{train|test}@10.tfrecord`. In 95 | the tfrecords, for `train` set, it contains the RGB image pixels as well as 96 | their panoptic maps. For `test` set, it contains RGB images only. These files 97 | will be used as the input for the model training and evaluation. 98 | 99 | Optionally, you can also specify with `--use_two_frames` to encode two 100 | consecutive frames into the tfrecord files. 101 | 102 | ## Citing MOTChallenge-STEP 103 | 104 | If you find this dataset helpful in your research, please use the following 105 | BibTeX entry. 106 | 107 | ``` 108 | @article{step_2021, 109 | author = {Weber, Mark and Xie, Jun and Collins, Maxwell and Zhu, Yukun and Voigtlaender, Paul and Adam, Hartwig and Green, Bradley and Geiger, Andreas and Leibe, Bastian and Cremers, Daniel and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'{e}, Laura and Chen, Liang-Chieh}, 110 | journal = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks}, 111 | title = {{STEP}: Segmenting and Tracking Every Pixel}, 112 | year = {2021} 113 | } 114 | ``` 115 | -------------------------------------------------------------------------------- /g3doc/setup/your_own_dataset.md: -------------------------------------------------------------------------------- 1 | # Convert your own dataset for DeepLab2 framework 2 | 3 | You may want to train DeepLab2 on your own dataset. Here, we provide some 4 | guidances and hopefully that will facillitate the preparation process. 5 | 6 | 1. Prepare your own dataset. 7 | * **Images** should be stored either in `jpg` or `png` format. 8 | * **Annotations** should be stored either in `png` or `json` format. The 9 | DeepLab2 framework assumes the panoptic label format (i.e., 10 | `panoptic_label = semantic_label * label_divisor + instance_id`, where 11 | the `label_divisor` should be larger than the maximum number of 12 | instances per image). 13 | * The `png` format refers to the case where we could split semantic 14 | label and instance id to RGB channels. For example, R-channel stores 15 | semantic label, while G- and B-channel store instance id (G: 16 | instance_id // 256 and B: instance_id % 256). 17 | * The `json` format refers to the 18 | [COCO panoptic json format](https://cocodataset.org/#format-data). 19 | 2. Convert the dataset to TFRecord. 20 | 21 | * Update our provided example code (e.g., 22 | [build_step_data.py](../../data/build_step_data.py) for `png` format, 23 | and [build_coco_data.py](../../data/build_coco_data.py) for `json` 24 | format) to convert your dataset to TFRecord. 25 | * Alternatively, if you are using your own binary to create TFRecords, 26 | make sure to include the same fields in the proto as what our example 27 | code creates. 28 | 29 | 3. Modify the `dataset.py` (path: `${DEEPLAB2}/data/dataset.py`) to provide 30 | your dataset information. 31 | 32 | * Set the `panoptic_label_divisor` (i.e., the `label_divisor` above) 33 | correctly. Its value should be larger than the maximum number of 34 | instances that could appear per image in your dataset. 35 | * Set the `ignore_label` properly. Pixels annotated with `ignore_label` 36 | are not used during both training and evaluation. If your dataset does 37 | not contain the `ignore_label` annotations, you could simply set it to 38 | be a large value (e.g., 255 as for 39 | [Cityscapes](https://www.cityscapes-dataset.com/)). 40 | * Set the `class_has_instance_list` properly. The variable specifies which 41 | class belongs to the `thing` class (i.e., countable objects such as 42 | people, cars). 43 | * Set the colormap (for visualization) properly. You may also need to 44 | define your own colormap (see `${DEEPLAB2}/trainer/vis_utils.py`). 45 | 46 | 4. Prepare the experiment config. 47 | 48 | * Update our provided example configs (path: 49 | `${DEEPLAB2}/configs/${DATASET}/${MODEL}/${BACKBONE}`) for your use 50 | case. A few things that may worth your attention: 51 | * Set the `crop_size` correctly for both training and evaluation. See 52 | Q2 in [FAQ](../faq.md) for more details. 53 | * Tune the config flags for your dataset (e.g., `base_learning_rate`, 54 | `training_number_of_step`, and so on). 55 | 56 | Finally, if your dataset only contains semantic segmentation annotations, 57 | you could still use DeepLab2 framework with some minor changes: 58 | 59 | 1. Since the code only reads panoptic data at the moment, you need to set 60 | `panoptic_label_divisor = k`, where k is any positive integer, 61 | `instance_id = 0`, and `class_has_instances_list = []` (i.e., we treat the 62 | dataset as the one that contains only `stuff` classes), when you are (1) 63 | converting the dataset to TFRecord (e.g., 64 | [build_step_data.py](../../data/build_step_data.py)), 65 | and (2) adding dataset information in dataset.py. 66 | 2. Have a config similar to 67 | `${DEEPLAB2}/configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto` 68 | , where the instance branch is not 69 | initiated. 70 | 71 | At this point, you are good to go! Enjoy training DeepLab2! 72 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /model/builder_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for model.builder.""" 17 | 18 | import os 19 | from absl.testing import parameterized 20 | 21 | import tensorflow as tf 22 | 23 | from google.protobuf import text_format 24 | from deeplab2 import config_pb2 25 | from deeplab2.model import builder 26 | from deeplab2.model.decoder import motion_deeplab_decoder 27 | from deeplab2.model.encoder import axial_resnet_instances 28 | from deeplab2.model.encoder import mobilenet 29 | # resources dependency 30 | 31 | 32 | _CONFIG_PATH = 'deeplab2/configs/example' 33 | 34 | 35 | def _read_proto_file(filename, proto): 36 | filename = filename # OSS: removed internal filename loading. 37 | with tf.io.gfile.GFile(filename, 'r') as proto_file: 38 | return text_format.ParseLines(proto_file, proto) 39 | 40 | 41 | class BuilderTest(tf.test.TestCase, parameterized.TestCase): 42 | 43 | def test_resnet50_encoder_creation(self): 44 | backbone_options = config_pb2.ModelOptions.BackboneOptions( 45 | name='resnet50', output_stride=32) 46 | encoder = builder.create_encoder( 47 | backbone_options, 48 | tf.keras.layers.experimental.SyncBatchNormalization) 49 | self.assertIsInstance(encoder, axial_resnet_instances.ResNet50) 50 | 51 | @parameterized.parameters('mobilenet_v3_large', 'mobilenet_v3_small') 52 | def test_mobilenet_encoder_creation(self, model_name): 53 | backbone_options = config_pb2.ModelOptions.BackboneOptions( 54 | name=model_name, use_squeeze_and_excite=True, output_stride=32) 55 | encoder = builder.create_encoder( 56 | backbone_options, 57 | tf.keras.layers.experimental.SyncBatchNormalization) 58 | self.assertIsInstance(encoder, mobilenet.MobileNet) 59 | 60 | def test_resnet_encoder_creation(self): 61 | backbone_options = config_pb2.ModelOptions.BackboneOptions( 62 | name='max_deeplab_s', output_stride=32) 63 | encoder = builder.create_resnet_encoder( 64 | backbone_options, 65 | bn_layer=tf.keras.layers.experimental.SyncBatchNormalization) 66 | self.assertIsInstance(encoder, axial_resnet_instances.MaXDeepLabS) 67 | 68 | def test_decoder_creation(self): 69 | proto_filename = os.path.join( 70 | _CONFIG_PATH, 'example_kitti-step_motion_deeplab.textproto') 71 | model_options = _read_proto_file(proto_filename, config_pb2.ModelOptions()) 72 | motion_decoder = builder.create_decoder( 73 | model_options, tf.keras.layers.experimental.SyncBatchNormalization, 74 | ignore_label=255) 75 | self.assertIsInstance(motion_decoder, 76 | motion_deeplab_decoder.MotionDeepLabDecoder) 77 | 78 | 79 | if __name__ == '__main__': 80 | tf.test.main() 81 | -------------------------------------------------------------------------------- /model/decoder/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /model/decoder/aspp_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for aspp.""" 17 | import tensorflow as tf 18 | 19 | from deeplab2.model.decoder import aspp 20 | from deeplab2.utils import test_utils 21 | 22 | 23 | class AsppTest(tf.test.TestCase): 24 | 25 | def test_aspp_pool_error(self): 26 | pool = aspp.ASPPPool(output_channels=64, name='') 27 | 28 | # Should pass without an error. 29 | pool.set_pool_size((None, None)) 30 | 31 | with self.assertRaises(ValueError): 32 | # Should raise an error. 33 | pool.set_pool_size((2, None)) 34 | 35 | def test_aspp_conv_atrous_rate_shape(self): 36 | atrous_rates = [2, 6, 12, 18] 37 | for rate in atrous_rates: 38 | conv = aspp.ASPPConv(output_channels=64, atrous_rate=rate, name='') 39 | input_tensor = tf.random.uniform(shape=(2, 12, 12, 3)) 40 | 41 | output = conv(input_tensor) 42 | expected_shape = [2, 12, 12, 64] 43 | self.assertListEqual(output.shape.as_list(), expected_shape) 44 | 45 | def test_aspp_conv_non_negative(self): 46 | conv = aspp.ASPPConv(output_channels=12, atrous_rate=2, name='') 47 | input_tensor = tf.random.uniform(shape=(2, 17, 17, 3)) 48 | 49 | output = conv(input_tensor) 50 | self.assertTrue((output.numpy() >= 0.0).all()) 51 | 52 | def test_aspp_pool_shape(self): 53 | pool = aspp.ASPPPool(output_channels=64, name='') 54 | input_tensor = tf.random.uniform(shape=(2, 12, 12, 3)) 55 | 56 | output = pool(input_tensor) 57 | expected_shape = [2, 12, 12, 64] 58 | self.assertListEqual(output.shape.as_list(), expected_shape) 59 | 60 | def test_aspp_pool_non_negative(self): 61 | pool = aspp.ASPPPool(output_channels=12, name='') 62 | input_tensor = tf.random.uniform(shape=(2, 17, 17, 3)) 63 | 64 | output = pool(input_tensor) 65 | self.assertTrue((output.numpy() >= 0.0).all()) 66 | 67 | def test_aspp_wrong_atrous_rate(self): 68 | with self.assertRaises(ValueError): 69 | _ = aspp.ASPP(output_channels=64, atrous_rates=[1, 2, 3, 4]) 70 | 71 | @test_utils.test_all_strategies 72 | def test_aspp_shape(self, strategy): 73 | with strategy.scope(): 74 | for bn_layer in test_utils.NORMALIZATION_LAYERS: 75 | aspp_layer = aspp.ASPP( 76 | output_channels=64, atrous_rates=[6, 12, 18], bn_layer=bn_layer) 77 | input_tensor = tf.random.uniform(shape=(2, 32, 32, 3)) 78 | 79 | output = aspp_layer(input_tensor) 80 | expected_shape = [2, 32, 32, 64] 81 | self.assertListEqual(output.shape.as_list(), expected_shape) 82 | 83 | def test_aspp_non_negative(self): 84 | aspp_layer = aspp.ASPP(output_channels=32, atrous_rates=[4, 8, 16]) 85 | input_tensor = tf.random.uniform(shape=(2, 32, 32, 3)) 86 | 87 | output = aspp_layer(input_tensor) 88 | self.assertTrue((output.numpy() >= 0.0).all()) 89 | 90 | if __name__ == '__main__': 91 | tf.test.main() 92 | -------------------------------------------------------------------------------- /model/decoder/deeplabv3.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains code to build a DeepLabV3. 17 | 18 | Reference: 19 | - [Rethinking Atrous Convolution for Semantic Image Segmentation]( 20 | https://arxiv.org/pdf/1706.05587.pdf) 21 | """ 22 | import tensorflow as tf 23 | 24 | from deeplab2 import common 25 | from deeplab2.model.decoder import aspp 26 | from deeplab2.model.layers import convolutions 27 | 28 | 29 | layers = tf.keras.layers 30 | 31 | 32 | class DeepLabV3(layers.Layer): 33 | """A DeepLabV3 model. 34 | 35 | This model takes in features from an encoder and performs multi-scale context 36 | aggregation with the help of an ASPP layer. Finally, a classification head is 37 | used to predict a semantic segmentation. 38 | """ 39 | 40 | def __init__(self, 41 | decoder_options, 42 | deeplabv3_options, 43 | bn_layer=tf.keras.layers.BatchNormalization): 44 | """Creates a DeepLabV3 decoder of type layers.Layer. 45 | 46 | Args: 47 | decoder_options: Decoder options as defined in config_pb2.DecoderOptions. 48 | deeplabv3_options: Model options as defined in 49 | config_pb2.ModelOptions.DeeplabV3Options. 50 | bn_layer: An optional tf.keras.layers.Layer that computes the 51 | normalization (default: tf.keras.layers.BatchNormalization). 52 | """ 53 | super(DeepLabV3, self).__init__(name='DeepLabV3') 54 | 55 | self._feature_name = decoder_options.feature_key 56 | self._aspp = aspp.ASPP(decoder_options.aspp_channels, 57 | decoder_options.atrous_rates, 58 | bn_layer=bn_layer) 59 | 60 | self._classifier_conv_bn_act = convolutions.Conv2DSame( 61 | decoder_options.decoder_channels, 62 | kernel_size=3, 63 | name='classifier_conv_bn_act', 64 | use_bias=False, 65 | use_bn=True, 66 | bn_layer=bn_layer, 67 | activation='relu') 68 | 69 | self._final_conv = convolutions.Conv2DSame( 70 | deeplabv3_options.num_classes, kernel_size=1, name='final_conv') 71 | 72 | def set_pool_size(self, pool_size): 73 | """Sets the pooling size of the ASPP pooling layer. 74 | 75 | Args: 76 | pool_size: A tuple specifying the pooling size of the ASPP pooling layer. 77 | """ 78 | self._aspp.set_pool_size(pool_size) 79 | 80 | def get_pool_size(self): 81 | return self._aspp.get_pool_size() 82 | 83 | def reset_pooling_layer(self): 84 | """Resets the ASPP pooling layer to global average pooling.""" 85 | self._aspp.reset_pooling_layer() 86 | 87 | def call(self, features, training=False): 88 | """Performs a forward pass. 89 | 90 | Args: 91 | features: A single input tf.Tensor or an input dict of tf.Tensor with 92 | shape [batch, height, width, channels]. If passed a dict, different keys 93 | should point to different features extracted by the encoder, e.g. 94 | low-level or high-level features. 95 | training: A boolean flag indicating whether training behavior should be 96 | used (default: False). 97 | 98 | Returns: 99 | A dictionary containing the semantic prediction under key 100 | common.PRED_SEMANTIC_LOGITS_KEY. 101 | """ 102 | if isinstance(features, tf.Tensor): 103 | feature = features 104 | else: 105 | feature = features[self._feature_name] 106 | 107 | x = self._aspp(feature, training=training) 108 | 109 | x = self._classifier_conv_bn_act(x, training=training) 110 | 111 | return {common.PRED_SEMANTIC_LOGITS_KEY: self._final_conv(x)} 112 | 113 | @property 114 | def checkpoint_items(self): 115 | items = { 116 | common.CKPT_DEEPLABV3_ASPP: self._aspp, 117 | common.CKPT_DEEPLABV3_CLASSIFIER_CONV_BN_ACT: 118 | self._classifier_conv_bn_act, 119 | common.CKPT_SEMANTIC_LAST_LAYER: self._final_conv, 120 | } 121 | return items 122 | -------------------------------------------------------------------------------- /model/decoder/max_deeplab_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for max_deeplab.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2 import common 21 | from deeplab2 import config_pb2 22 | from deeplab2.model.decoder import max_deeplab 23 | 24 | 25 | def _create_max_deeplab_example_proto(num_non_void_classes=19): 26 | semantic_decoder = config_pb2.DecoderOptions( 27 | feature_key='feature_semantic', atrous_rates=[6, 12, 18]) 28 | auxiliary_semantic_head = config_pb2.HeadOptions( 29 | output_channels=num_non_void_classes, head_channels=256) 30 | pixel_space_head = config_pb2.HeadOptions( 31 | output_channels=128, head_channels=256) 32 | max_deeplab_options = config_pb2.ModelOptions.MaXDeepLabOptions( 33 | pixel_space_head=pixel_space_head, 34 | auxiliary_semantic_head=auxiliary_semantic_head) 35 | # Add features from lowest to highest. 36 | max_deeplab_options.auxiliary_low_level.add( 37 | feature_key='res3', channels_project=64) 38 | max_deeplab_options.auxiliary_low_level.add( 39 | feature_key='res2', channels_project=32) 40 | return config_pb2.ModelOptions( 41 | decoder=semantic_decoder, max_deeplab=max_deeplab_options) 42 | 43 | 44 | class MaXDeeplabTest(tf.test.TestCase): 45 | 46 | def test_max_deeplab_decoder_output_shape(self): 47 | num_non_void_classes = 19 48 | num_mask_slots = 127 49 | model_options = _create_max_deeplab_example_proto( 50 | num_non_void_classes=num_non_void_classes) 51 | decoder = max_deeplab.MaXDeepLab( 52 | max_deeplab_options=model_options.max_deeplab, 53 | ignore_label=255, 54 | decoder_options=model_options.decoder) 55 | 56 | input_dict = { 57 | 'res2': 58 | tf.random.uniform([2, 17, 17, 256]), 59 | 'res3': 60 | tf.random.uniform([2, 9, 9, 512]), 61 | 'transformer_class_feature': 62 | tf.random.uniform([2, num_mask_slots, 256]), 63 | 'transformer_mask_feature': 64 | tf.random.uniform([2, num_mask_slots, 256]), 65 | 'feature_panoptic': 66 | tf.random.uniform([2, 17, 17, 256]), 67 | 'feature_semantic': 68 | tf.random.uniform([2, 5, 5, 2048]) 69 | } 70 | resulting_dict = decoder(input_dict) 71 | self.assertListEqual( 72 | resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(), 73 | [2, 17, 17, 19]) # Stride 4 74 | self.assertListEqual( 75 | resulting_dict[ 76 | common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(), 77 | [2, 17, 17, 128]) # Stride 4 78 | self.assertListEqual( 79 | resulting_dict[ 80 | common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(), 81 | # Non-void classes and a void class. 82 | [2, num_mask_slots, num_non_void_classes + 1]) 83 | self.assertListEqual( 84 | resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(), 85 | [2, 17, 17, num_mask_slots]) # Stride 4. 86 | 87 | 88 | if __name__ == '__main__': 89 | tf.test.main() 90 | -------------------------------------------------------------------------------- /model/encoder/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /model/encoder/atrous_consistency_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests of atrous consistencies for axial_resnet_instances.""" 17 | 18 | from absl.testing import parameterized 19 | import tensorflow as tf 20 | 21 | from deeplab2.model import test_utils 22 | from deeplab2.model.encoder import axial_resnet_instances 23 | 24 | 25 | class AtrousConsistencyTest(tf.test.TestCase, parameterized.TestCase): 26 | 27 | @parameterized.product( 28 | (dict(model_name='resnet50', backbone_layer_multiplier=1), 29 | dict(model_name='resnet50_beta', backbone_layer_multiplier=1), 30 | dict(model_name='wide_resnet41', backbone_layer_multiplier=1), 31 | dict(model_name='swidernet', backbone_layer_multiplier=2)), 32 | output_stride=[8, 16, 32]) 33 | def test_model_atrous_consistency_with_output_stride_four( 34 | self, model_name, backbone_layer_multiplier, output_stride): 35 | tf.random.set_seed(0) 36 | 37 | # Create the input. 38 | pixel_inputs = test_utils.create_test_input(1, 225, 225, 3) 39 | 40 | # Create the model and the weights. 41 | model_1 = axial_resnet_instances.get_model( 42 | model_name, 43 | # Test with small models only. 44 | num_blocks=[2, 2, 2, 2], 45 | backbone_layer_multiplier=backbone_layer_multiplier, 46 | bn_layer=tf.keras.layers.BatchNormalization, 47 | conv_kernel_weight_decay=0.0001, 48 | output_stride=4) 49 | 50 | # Create the weights. 51 | model_1(pixel_inputs, training=False) 52 | 53 | # Set the batch norm gamma as non-zero so that the 3x3 convolution affects 54 | # the output. 55 | for weight in model_1.trainable_weights: 56 | if '/gamma:0' in weight.name: 57 | weight.assign(tf.ones_like(weight)) 58 | 59 | # Dense feature extraction followed by subsampling. 60 | pixel_outputs = model_1(pixel_inputs, training=False)['res5'] 61 | downsampling_stride = output_stride // 4 62 | expected = pixel_outputs[:, ::downsampling_stride, ::downsampling_stride, :] 63 | 64 | # Feature extraction at the nominal network rate. 65 | model_2 = axial_resnet_instances.get_model( 66 | model_name, 67 | # Test with small models only. 68 | num_blocks=[2, 2, 2, 2], 69 | backbone_layer_multiplier=backbone_layer_multiplier, 70 | bn_layer=tf.keras.layers.BatchNormalization, 71 | conv_kernel_weight_decay=0.0001, 72 | output_stride=output_stride) 73 | # Create the weights. 74 | model_2(pixel_inputs, training=False) 75 | # Make the two networks use the same weights. 76 | model_2.set_weights(model_1.get_weights()) 77 | output = model_2(pixel_inputs, training=False)['res5'] 78 | 79 | # Normalize the outputs. Since we set batch_norm gamma to 1, the output 80 | # activations can explode to a large standard deviation, which sometimes 81 | # cause numerical errors beyond the tolerances. 82 | normalizing_factor = tf.math.reduce_std(expected) 83 | # Compare normalized outputs. 84 | self.assertAllClose(output / normalizing_factor, 85 | expected / normalizing_factor, 86 | atol=1e-4, rtol=1e-4) 87 | 88 | 89 | if __name__ == '__main__': 90 | tf.test.main() 91 | -------------------------------------------------------------------------------- /model/encoder/axial_resnet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for axial_resnet.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from deeplab2.model.encoder import axial_resnet 22 | 23 | 24 | class AxialResNetTest(tf.test.TestCase): 25 | 26 | def test_axial_resnet_correct_output_shape(self): 27 | model = axial_resnet.AxialResNet('max_deeplab_s') 28 | endpoints = model(tf.zeros([2, 65, 65, 3]), training=False) 29 | self.assertListEqual(endpoints['backbone_output'].get_shape().as_list(), 30 | [2, 5, 5, 2048]) 31 | self.assertListEqual( 32 | endpoints['transformer_class_feature'].get_shape().as_list(), 33 | [2, 128, 256]) 34 | self.assertListEqual( 35 | endpoints['transformer_mask_feature'].get_shape().as_list(), 36 | [2, 128, 256]) 37 | self.assertListEqual(endpoints['feature_panoptic'].get_shape().as_list(), 38 | [2, 17, 17, 256]) 39 | self.assertListEqual(endpoints['feature_semantic'].get_shape().as_list(), 40 | [2, 5, 5, 2048]) 41 | num_params = np.sum( 42 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights]) 43 | self.assertEqual(num_params, 61726624) 44 | 45 | if __name__ == '__main__': 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /model/encoder/model_export_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests of model exports for axial_resnet_instances.""" 17 | 18 | import os 19 | 20 | from absl import flags 21 | from absl.testing import parameterized 22 | import tensorflow as tf 23 | 24 | from deeplab2.model.encoder import axial_resnet_instances 25 | 26 | FLAGS = flags.FLAGS 27 | 28 | 29 | class ModelExportTest(tf.test.TestCase, parameterized.TestCase): 30 | 31 | @parameterized.parameters( 32 | ('resnet50',), 33 | ('resnet50_beta',), 34 | ('max_deeplab_s_backbone',), 35 | ('max_deeplab_l_backbone',), 36 | ('axial_resnet_s',), 37 | ('axial_resnet_l',), 38 | ('axial_deeplab_s',), 39 | ('axial_deeplab_l',), 40 | ('swidernet',), 41 | ('axial_swidernet',), 42 | ) 43 | def test_model_export(self, model_name): 44 | model = axial_resnet_instances.get_model( 45 | model_name, 46 | output_stride=16, 47 | backbone_layer_multiplier=1.0, 48 | bn_layer=tf.keras.layers.BatchNormalization, 49 | conv_kernel_weight_decay=0.0001, 50 | # Test with small models only. 51 | num_blocks=[2, 2, 2, 2], 52 | # Disable drop path as it is not compatible with model exporting. 53 | block_group_config={'drop_path_keep_prob': 1.0}) 54 | model(tf.keras.Input([257, 257, 3], batch_size=1), training=False) 55 | export_dir = os.path.join( 56 | FLAGS.test_tmpdir, 'test_model_export', model_name) 57 | model.save(export_dir) 58 | 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /model/kmax_deeplab_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for kmax_deeplab.""" 17 | 18 | import os 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from google.protobuf import text_format 24 | from deeplab2 import common 25 | from deeplab2 import config_pb2 26 | from deeplab2.data import dataset 27 | from deeplab2.model import kmax_deeplab 28 | from deeplab2.model import utils 29 | # resources dependency 30 | 31 | _CONFIG_PATH = 'deeplab2/configs/example' 32 | 33 | 34 | def _read_proto_file(filename, proto): 35 | filename = filename # OSS: removed internal filename loading. 36 | with tf.io.gfile.GFile(filename, 'r') as proto_file: 37 | return text_format.ParseLines(proto_file, proto) 38 | 39 | 40 | def _create_model_from_test_proto(file_name, 41 | dataset_name='cityscapes_panoptic'): 42 | proto_filename = os.path.join(_CONFIG_PATH, file_name) 43 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions()) 44 | return kmax_deeplab.KMaXDeepLab( 45 | config, 46 | dataset.MAP_NAME_TO_DATASET_INFO[dataset_name]), config 47 | 48 | 49 | class DeeplabTest(tf.test.TestCase): 50 | 51 | def test_deeplab_with_kmax_convnext_base(self): 52 | model, experiment_options = _create_model_from_test_proto( 53 | 'example_coco_kmax_meta_convnext.textproto', 54 | dataset_name='coco_panoptic') 55 | train_crop_size = tuple(experiment_options.train_dataset_options.crop_size) 56 | input_tensor = tf.random.uniform( 57 | shape=(2, train_crop_size[0], train_crop_size[1], 3)) 58 | stride_4_size = utils.scale_mutable_sequence(train_crop_size, 0.25) 59 | expected_semantic_shape = [ 60 | 2, stride_4_size[0], stride_4_size[1], experiment_options.model_options. 61 | max_deeplab.auxiliary_semantic_head.output_channels] 62 | expected_transformer_class_logits_shape = [ 63 | 2, 128, experiment_options.model_options. 64 | max_deeplab.auxiliary_semantic_head.output_channels] 65 | expected_pixel_space_normalized_feature_shape = [ 66 | 2, stride_4_size[0], stride_4_size[1], experiment_options.model_options. 67 | max_deeplab.pixel_space_head.output_channels] 68 | expected_pixel_space_mask_logits_shape = [ 69 | 2, stride_4_size[0], stride_4_size[1], 128] 70 | resulting_dict = model(input_tensor, training=True) 71 | self.assertListEqual( 72 | resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(), 73 | expected_semantic_shape) 74 | self.assertListEqual( 75 | resulting_dict[ 76 | common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(), 77 | expected_transformer_class_logits_shape) 78 | self.assertListEqual( 79 | resulting_dict[ 80 | common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(), 81 | expected_pixel_space_normalized_feature_shape) 82 | self.assertListEqual( 83 | resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(), 84 | expected_pixel_space_mask_logits_shape) 85 | num_params = 0 86 | for v in model.trainable_weights: 87 | params = np.prod(v.get_shape().as_list()) 88 | # Exclude the auxiliary semantic head. 89 | if 'auxiliary_semantic' not in v.name: 90 | num_params += params 91 | self.assertEqual(num_params, 121513304) 92 | 93 | 94 | if __name__ == '__main__': 95 | tf.test.main() 96 | -------------------------------------------------------------------------------- /model/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /model/layers/activations.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Defines a set of useful activation functions.""" 17 | import functools 18 | import tensorflow as tf 19 | 20 | 21 | def gelu(input_tensor, approximate=False): 22 | """Gaussian Error Linear Unit. 23 | 24 | Reference: 25 | Gaussian Error Linear Units (GELUs), Dan Hendrycks, Kevin Gimpel, arXiv 2016. 26 | 27 | Args: 28 | input_tensor: A tensor with an arbitrary shape. 29 | approximate: A boolean, whether to enable approximation. 30 | 31 | Returns: 32 | The activated input tensor. 33 | """ 34 | return tf.keras.activations.gelu(input_tensor, approximate=approximate) 35 | 36 | 37 | def hard_sigmoid(input_tensor): 38 | """Hard sigmoid activation function. 39 | 40 | Args: 41 | input_tensor: A tensor with an arbitrary shape. 42 | 43 | Returns: 44 | The activated input tensor. 45 | """ 46 | input_tensor = tf.convert_to_tensor(input_tensor) 47 | return tf.nn.relu6(input_tensor + tf.constant(3.)) * 0.16667 48 | 49 | 50 | def relu6(input_tensor): 51 | """Relu6 activation function. 52 | 53 | Args: 54 | input_tensor: A tensor with an arbitrary shape. 55 | 56 | Returns: 57 | The activated input tensor. 58 | """ 59 | input_tensor = tf.convert_to_tensor(input_tensor) 60 | return tf.nn.relu6(input_tensor) 61 | 62 | 63 | def swish(input_tensor): 64 | """Swish or SiLU activation function. 65 | 66 | Args: 67 | input_tensor: A tensor with an arbitrary shape. 68 | 69 | Returns: 70 | The activated input tensor. 71 | """ 72 | input_tensor = tf.convert_to_tensor(input_tensor) 73 | return tf.nn.silu(input_tensor) 74 | 75 | 76 | def hard_swish(input_tensor): 77 | """Hard Swish function. 78 | 79 | Args: 80 | input_tensor: A tensor with an arbitrary shape. 81 | 82 | Returns: 83 | The activated input tensor. 84 | """ 85 | input_tensor = tf.convert_to_tensor(input_tensor) 86 | return input_tensor * tf.nn.relu6( 87 | input_tensor + tf.constant(3.)) * (1. / 6.) 88 | 89 | 90 | def identity(input_tensor): 91 | """Identity function. 92 | 93 | Useful for helping in quantization. 94 | 95 | Args: 96 | input_tensor: A tensor with an arbitrary shape. 97 | 98 | Returns: 99 | The activated input tensor. 100 | """ 101 | input_tensor = tf.convert_to_tensor(input_tensor) 102 | return tf.identity(input_tensor) 103 | 104 | 105 | def get_activation(identifier): 106 | """Gets activation function via input identifier. 107 | 108 | This function returns the specified customized activation function, if there 109 | is any. Otherwise, tf.keras.activations.get is called. 110 | 111 | Args: 112 | identifier: A string, name of the activation function. 113 | 114 | Returns: 115 | The specified activation function. 116 | """ 117 | if isinstance(identifier, str): 118 | name_to_fn = { 119 | 'gelu': functools.partial(gelu, approximate=False), 120 | 'approximated_gelu': functools.partial(gelu, approximate=True), 121 | 'silu': swish, 122 | 'swish': swish, 123 | 'hard_swish': hard_swish, 124 | 'relu6': relu6, 125 | 'hard_sigmoid': hard_sigmoid, 126 | 'identity': identity, 127 | 'none': identity, 128 | } 129 | identifier = str(identifier).lower() 130 | if identifier in name_to_fn: 131 | return name_to_fn[identifier] 132 | return tf.keras.activations.get(identifier) 133 | -------------------------------------------------------------------------------- /model/layers/activations_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for activations.py.""" 17 | import tensorflow as tf 18 | 19 | from deeplab2.model.layers import activations 20 | 21 | 22 | class ActivationsTest(tf.test.TestCase): 23 | 24 | def test_gelu(self): 25 | expected_data = [[0.14967535, 0., -0.10032465], 26 | [-0.15880796, -0.04540223, 2.9963627]] 27 | gelu_data = activations.gelu([[.25, 0, -.25], [-1, -2, 3]], 28 | approximate=True) 29 | self.assertAllClose(expected_data, gelu_data) 30 | gelu_data_via_get_activation = activations.get_activation( 31 | 'approximated_gelu')([[.25, 0, -.25], [-1, -2, 3]]) 32 | self.assertAllClose(expected_data, gelu_data_via_get_activation) 33 | 34 | 35 | if __name__ == '__main__': 36 | tf.test.main() 37 | -------------------------------------------------------------------------------- /model/layers/axial_blocks_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for axial_blocks.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import axial_blocks 21 | 22 | 23 | class AxialBlocksTest(tf.test.TestCase): 24 | 25 | def test_conv_basic_block_correct_output_shape(self): 26 | layer = axial_blocks.AxialBlock( 27 | filters_list=[256, 256], 28 | strides=2) 29 | float_training_tensor = tf.constant(0.0, dtype=tf.float32) 30 | output = layer((tf.zeros([2, 65, 65, 32]), 31 | float_training_tensor)) 32 | self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256]) 33 | 34 | def test_conv_bottleneck_block_correct_output_shape(self): 35 | layer = axial_blocks.AxialBlock( 36 | filters_list=[64, 64, 256], 37 | strides=1) 38 | float_training_tensor = tf.constant(0.0, dtype=tf.float32) 39 | output = layer((tf.zeros([2, 65, 65, 32]), 40 | float_training_tensor)) 41 | self.assertListEqual(output.get_shape().as_list(), [2, 65, 65, 256]) 42 | 43 | def test_axial_block_correct_output_shape(self): 44 | layer = axial_blocks.AxialBlock( 45 | filters_list=[128, 64, 256], 46 | strides=2, 47 | attention_type='axial') 48 | float_training_tensor = tf.constant(0.0, dtype=tf.float32) 49 | output = layer((tf.zeros([2, 65, 65, 32]), 50 | float_training_tensor)) 51 | self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256]) 52 | 53 | if __name__ == '__main__': 54 | tf.test.main() 55 | -------------------------------------------------------------------------------- /model/layers/axial_layers_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for axial_layers.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import axial_layers 21 | 22 | 23 | class AxialLayersTest(tf.test.TestCase): 24 | 25 | def test_default_axial_attention_layer_output_shape(self): 26 | layer = axial_layers.AxialAttention() 27 | output = layer(tf.zeros([10, 5, 32])) 28 | self.assertListEqual(output.get_shape().as_list(), [10, 5, 1024]) 29 | 30 | def test_axial_attention_2d_layer_output_shape(self): 31 | layer = axial_layers.AxialAttention2D() 32 | output = layer(tf.zeros([2, 5, 5, 32])) 33 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024]) 34 | 35 | def test_change_filters_output_shape(self): 36 | layer = axial_layers.AxialAttention2D(filters=32) 37 | output = layer(tf.zeros([2, 5, 5, 32])) 38 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 64]) 39 | 40 | def test_value_expansion_output_shape(self): 41 | layer = axial_layers.AxialAttention2D(value_expansion=1) 42 | output = layer(tf.zeros([2, 5, 5, 32])) 43 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 512]) 44 | 45 | def test_global_attention_output_shape(self): 46 | layer = axial_layers.GlobalAttention2D() 47 | output = layer(tf.zeros([2, 5, 5, 32])) 48 | self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024]) 49 | 50 | def test_stride_two_output_shape(self): 51 | layer = axial_layers.AxialAttention2D(strides=2) 52 | output = layer(tf.zeros([2, 5, 5, 32])) 53 | self.assertListEqual(output.get_shape().as_list(), [2, 3, 3, 1024]) 54 | 55 | if __name__ == '__main__': 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /model/layers/blocks_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for blocks.py.""" 17 | import tensorflow as tf 18 | 19 | from deeplab2.model.layers import blocks 20 | 21 | 22 | class BlocksTest(tf.test.TestCase): 23 | 24 | def test_inverted_bottleneck_block_output_shape(self): 25 | batch, height, width, input_channels = 2, 17, 17, 4 26 | output_channels = 6 27 | input_tensor = tf.random.uniform( 28 | shape=(batch, height, width, input_channels)) 29 | ivb_block = blocks.InvertedBottleneckBlock( 30 | in_filters=input_channels, 31 | out_filters=output_channels, 32 | expand_ratio=2, 33 | strides=1, 34 | name='inverted_bottleneck', 35 | ) 36 | output_tensor, _ = ivb_block(input_tensor) 37 | self.assertListEqual(output_tensor.get_shape().as_list(), 38 | [batch, height, width, output_channels]) 39 | 40 | def test_inverted_bottleneck_block_feature_map_alignment(self): 41 | batch, height, width, input_channels = 2, 17, 17, 128 42 | output_channels = 256 43 | input_tensor = tf.random.uniform( 44 | shape=(batch, height, width, input_channels)) 45 | ivb_block1 = blocks.InvertedBottleneckBlock( 46 | in_filters=input_channels, 47 | out_filters=output_channels, 48 | expand_ratio=2, 49 | strides=2, 50 | name='inverted_bottleneck1', 51 | ) 52 | ivb_block1(input_tensor, False) 53 | weights = ivb_block1.get_weights() 54 | output_tensor, _ = ivb_block1(input_tensor, False) 55 | 56 | ivb_block2 = blocks.InvertedBottleneckBlock( 57 | in_filters=input_channels, 58 | out_filters=output_channels, 59 | expand_ratio=2, 60 | strides=1, 61 | name='inverted_bottleneck2', 62 | ) 63 | ivb_block2(input_tensor, False) 64 | ivb_block2.set_weights(weights) 65 | expected = ivb_block2(input_tensor, False)[0][:, ::2, ::2, :] 66 | 67 | self.assertAllClose(ivb_block1.get_weights(), ivb_block2.get_weights(), 68 | atol=1e-4, rtol=1e-4) 69 | self.assertAllClose(output_tensor, expected, atol=1e-4, rtol=1e-4) 70 | 71 | if __name__ == '__main__': 72 | tf.test.main() 73 | -------------------------------------------------------------------------------- /model/layers/drop_path_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Test for drop_path.py.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import drop_path 21 | 22 | # Set a fixed random seed. 23 | tf.random.set_seed(1) 24 | 25 | 26 | class DropPathTest(tf.test.TestCase): 27 | 28 | def test_drop_path_keep_prob_one(self): 29 | # Test drop_path_keep_prob = 1, where output should be equal to input. 30 | drop_path_keep_prob = 1.0 31 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32)) 32 | layer_op = drop_path.DropPath(drop_path_keep_prob) 33 | output_tensor = layer_op(input_tensor, training=True) 34 | np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy()) 35 | 36 | def test_not_training_mode(self): 37 | # Test not training mode, where output should be equal to input. 38 | drop_path_keep_prob = 0.8 39 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32)) 40 | layer_op = drop_path.DropPath(drop_path_keep_prob) 41 | output_tensor = layer_op(input_tensor, training=False) 42 | np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy()) 43 | 44 | def test_drop_path(self): 45 | drop_path_keep_prob = 0.8 46 | input_tensor = tf.random.uniform(shape=(3, 65, 65, 32)) 47 | layer_op = drop_path.DropPath(drop_path_keep_prob) 48 | output_tensor = layer_op(input_tensor, training=True) 49 | self.assertFalse(np.array_equal(input_tensor.numpy(), 50 | output_tensor.numpy())) 51 | 52 | def test_constant_drop_path_schedule(self): 53 | keep_prob_for_last_stage = 0.8 54 | current_stage_keep_prob = drop_path.get_drop_path_keep_prob( 55 | keep_prob_for_last_stage, 56 | schedule='constant', 57 | current_stage=2, 58 | num_stages=5) 59 | self.assertEqual(current_stage_keep_prob, keep_prob_for_last_stage) 60 | 61 | def test_linear_drop_path_schedule(self): 62 | keep_prob_for_last_stage = 0.8 63 | current_stage_keep_prob = drop_path.get_drop_path_keep_prob( 64 | keep_prob_for_last_stage, 65 | schedule='linear', 66 | current_stage=1, 67 | num_stages=4) 68 | self.assertEqual(current_stage_keep_prob, 0.95) 69 | 70 | def test_unknown_drop_path_schedule(self): 71 | with self.assertRaises(ValueError): 72 | _ = drop_path.get_drop_path_keep_prob(0.8, 'unknown', 1, 4) 73 | 74 | 75 | if __name__ == '__main__': 76 | tf.test.main() 77 | -------------------------------------------------------------------------------- /model/layers/moat_attention_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for moat_attention.""" 17 | 18 | from absl import logging 19 | from absl.testing import parameterized 20 | import numpy as np 21 | import tensorflow as tf 22 | from deeplab2.model.layers import moat_attention 23 | 24 | 25 | class MOATAttentionTest(tf.test.TestCase, parameterized.TestCase): 26 | 27 | def _log_param_specs(self, layer): 28 | num_params = sum([ 29 | np.prod(var.get_shape().as_list()) for var in layer.trainable_weights 30 | ]) 31 | format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format( 32 | max([len(v.name) for v in layer.trainable_weights]), 33 | max([len('{}'.format(v.get_shape())) for v in 34 | layer.trainable_weights])) 35 | format_str = ' >> ' + format_str + '\t{:>5.2f}%' 36 | 37 | for v in layer.trainable_weights: 38 | v_shape = v.get_shape().as_list() 39 | logging.info(format_str.format(v.name, '{}'.format(v_shape), 40 | np.prod(v_shape) / num_params * 100)) 41 | 42 | @parameterized.named_parameters( 43 | ('attention', None), 44 | ('attention_with_relative_position_embedding', '2d_multi_head'), 45 | ) 46 | def test_attention(self, relative_position_embedding_type): 47 | batch_size = 8 48 | height = 8 49 | width = 10 50 | hidden_size = 16 51 | head_size = 8 52 | query = tf.random.normal(shape=[batch_size, height, width, hidden_size], 53 | dtype=tf.float32) 54 | 55 | attention_layer = moat_attention.Attention( 56 | hidden_size=hidden_size, 57 | head_size=head_size, 58 | relative_position_embedding_type=relative_position_embedding_type) 59 | attention_output = attention_layer(query, training=True) 60 | self._log_param_specs(attention_layer) 61 | 62 | self.assertEqual(attention_output.shape.as_list(), 63 | [batch_size, height * width, hidden_size]) 64 | 65 | if __name__ == '__main__': 66 | tf.test.main() 67 | -------------------------------------------------------------------------------- /model/layers/moat_blocks_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for moat_blocks.""" 17 | 18 | from absl import logging 19 | from absl.testing import parameterized 20 | import numpy as np 21 | import tensorflow as tf 22 | from deeplab2.model.layers import moat_blocks 23 | 24 | 25 | class MOATBlocksTest(tf.test.TestCase, parameterized.TestCase): 26 | 27 | def _log_param_specs(self, layer): 28 | num_params = sum([ 29 | np.prod(var.get_shape().as_list()) for var in layer.trainable_weights 30 | ]) 31 | format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format( 32 | max([len(v.name) for v in layer.trainable_weights]), 33 | max([len('{}'.format(v.get_shape())) for v in 34 | layer.trainable_weights])) 35 | format_str = ' >> ' + format_str + '\t{:>5.2f}%' 36 | 37 | for v in layer.trainable_weights: 38 | v_shape = v.get_shape().as_list() 39 | logging.info(format_str.format(v.name, '{}'.format(v_shape), 40 | np.prod(v_shape) / num_params * 100)) 41 | 42 | @parameterized.named_parameters( 43 | ('standard', 1), 44 | ('downsample', 2), 45 | ) 46 | def test_mbconv_block(self, stride): 47 | batch_size = 8 48 | height, width = 8, 8 49 | input_size = 16 50 | hidden_size = input_size * stride 51 | inputs = tf.random.normal(shape=[batch_size, height, width, input_size], 52 | dtype=tf.float32) 53 | block = moat_blocks.MBConvBlock(hidden_size=hidden_size, 54 | block_stride=stride,) 55 | output = block(inputs, training=True) 56 | self._log_param_specs(block) 57 | 58 | self.assertEqual(output.shape.as_list(), 59 | [batch_size, height // stride, width // stride, 60 | hidden_size]) 61 | 62 | @parameterized.named_parameters( 63 | ('standard', 1, False), 64 | ('downsample', 2, False), 65 | ('checkpointing', 1, True), 66 | ) 67 | def test_moat_block(self, stride, use_checkpointing): 68 | batch_size = 8 69 | height, width = 8, 8 70 | input_size = 16 71 | hidden_size = input_size * stride 72 | inputs = tf.random.normal(shape=[batch_size, height, width, input_size], 73 | dtype=tf.float32) 74 | block = moat_blocks.MOATBlock(hidden_size=hidden_size, 75 | block_stride=stride, 76 | window_size=[height//stride, width//stride], 77 | use_checkpointing=use_checkpointing) 78 | output = block(inputs, training=True) 79 | self._log_param_specs(block) 80 | 81 | self.assertEqual(output.shape.as_list(), 82 | [batch_size, height // stride, width // stride, 83 | hidden_size]) 84 | 85 | 86 | if __name__ == '__main__': 87 | tf.test.main() 88 | -------------------------------------------------------------------------------- /model/layers/positional_encodings_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for positional_encodings.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import positional_encodings 21 | 22 | 23 | class PositionalEncodingsTest(tf.test.TestCase): 24 | 25 | def test_compute_relative_distance_matrix_output_shape(self): 26 | output = positional_encodings._compute_relative_distance_matrix(33, 97) 27 | self.assertListEqual(output.get_shape().as_list(), [33, 97]) 28 | 29 | def test_relative_positional_encoding_output_shape(self): 30 | layer = positional_encodings.RelativePositionalEncoding( 31 | 33, 97, 32, 'rpe') 32 | output = layer(None) 33 | self.assertListEqual(output.get_shape().as_list(), [33, 97, 32]) 34 | 35 | def test_add_absolute_positional_encoding_1d_output_shape(self): 36 | layer = positional_encodings.AddAbsolutePositionalEncoding( 37 | 'ape1d', positional_encoding_type='1d') 38 | shape = [2, 5, 5, 3] 39 | output = layer(tf.zeros(shape)) 40 | self.assertEqual(len(layer.get_weights()), 10) 41 | self.assertListEqual(output.get_shape().as_list(), shape) 42 | 43 | def test_add_absolute_positional_encoding_2d_output_shape(self): 44 | layer = positional_encodings.AddAbsolutePositionalEncoding( 45 | 'ape2d', positional_encoding_type='2d') 46 | shape = [2, 5, 5, 3] 47 | output = layer(tf.zeros(shape)) 48 | self.assertEqual(len(layer.get_weights()), 5) 49 | self.assertListEqual(output.get_shape().as_list(), shape) 50 | 51 | def test_add_absolute_positional_encoding_none_output_shape(self): 52 | layer = positional_encodings.AddAbsolutePositionalEncoding( 53 | 'none', positional_encoding_type='none') 54 | shape = [2, 5, 5, 3] 55 | output = layer(tf.zeros(shape)) 56 | self.assertEqual(len(layer.get_weights()), 0) 57 | self.assertListEqual(output.get_shape().as_list(), shape) 58 | 59 | if __name__ == '__main__': 60 | tf.test.main() 61 | -------------------------------------------------------------------------------- /model/layers/resized_fuse_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for resized_fuse.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import resized_fuse 21 | 22 | 23 | class ResizedFuseTest(tf.test.TestCase): 24 | 25 | def test_resize_and_fuse_features(self): 26 | batch, height, width, channels = 2, 11, 11, 6 27 | smaller_height, smaller_width, smaller_channels = 6, 6, 3 28 | larger_height1, larger_width1 = 21, 21 # Stride 2 conv. 29 | larger_height2, larger_width2 = 22, 22 # Stride 2 conv. 30 | larger_height3, larger_width3 = 23, 23 # Conv and resize. 31 | 32 | feature_list = [] 33 | feature_list.append(tf.zeros([batch, smaller_height, smaller_width, 34 | smaller_channels])) 35 | feature_list.append(tf.zeros([batch, smaller_height, smaller_width, 36 | channels])) 37 | feature_list.append(tf.zeros([batch, height, width, smaller_channels])) 38 | feature_list.append(tf.zeros([batch, height, width, channels])) 39 | feature_list.append(tf.zeros([batch, larger_height1, larger_width1, 40 | channels])) 41 | feature_list.append(tf.zeros([batch, larger_height1, larger_width1, 42 | smaller_channels])) 43 | feature_list.append(tf.zeros([batch, larger_height2, larger_width2, 44 | smaller_channels])) 45 | feature_list.append(tf.zeros([batch, larger_height3, larger_width3, 46 | smaller_channels])) 47 | layer = resized_fuse.ResizedFuse(name='fuse', 48 | height=height, 49 | width=width, 50 | num_channels=channels) 51 | output = layer(feature_list) 52 | self.assertEqual(output.get_shape().as_list(), [batch, height, width, 53 | channels]) 54 | 55 | if __name__ == '__main__': 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /model/layers/squeeze_and_excite_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for squeeze_and_excite.py.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.layers import squeeze_and_excite 21 | 22 | 23 | class SqueezeAndExciteTest(tf.test.TestCase): 24 | 25 | def test_simpliefied_squeeze_and_excite_input_output_shape(self): 26 | # Test the shape of input and output of SimplifiedSqueezeAndExcite. 27 | channels = 32 28 | input_tensor = tf.random.uniform(shape=(3, 65, 65, channels)) 29 | layer_op = squeeze_and_excite.SimplifiedSqueezeAndExcite( 30 | channels) 31 | output_tensor = layer_op(input_tensor) 32 | self.assertListEqual(input_tensor.get_shape().as_list(), 33 | output_tensor.get_shape().as_list()) 34 | 35 | def test_squeeze_and_excite_input_output_shape(self): 36 | # Test the shape of input and output of SqueezeAndExcite. 37 | channels = 32 38 | input_tensor = tf.random.uniform(shape=(3, 65, 65, channels)) 39 | layer_op = squeeze_and_excite.SqueezeAndExcite( 40 | in_filters=channels, 41 | out_filters=channels, 42 | se_ratio=8, 43 | name='se') 44 | output_tensor = layer_op(input_tensor) 45 | self.assertListEqual(input_tensor.get_shape().as_list(), 46 | output_tensor.get_shape().as_list()) 47 | 48 | 49 | if __name__ == '__main__': 50 | tf.test.main() 51 | -------------------------------------------------------------------------------- /model/layers/stems.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This script contains STEMs for neural networks. 17 | 18 | The `STEM` is defined as the first few convolutions that process the input 19 | image to a spatially smaller feature map (e.g., output stride = 2). 20 | 21 | 22 | Reference code: 23 | https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py 24 | """ 25 | import tensorflow as tf 26 | 27 | from deeplab2.model.layers import convolutions 28 | 29 | layers = tf.keras.layers 30 | 31 | 32 | class InceptionSTEM(tf.keras.layers.Layer): 33 | """A InceptionSTEM layer. 34 | 35 | This class builds an InceptionSTEM layer which can be used to as the first 36 | few layers in a neural network. In particular, InceptionSTEM contains three 37 | consecutive 3x3 colutions. 38 | 39 | Reference: 40 | - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, and Alexander Alemi. 41 | "Inception-v4, inception-resnet and the impact of residual connections on 42 | learning." In AAAI, 2017. 43 | """ 44 | 45 | def __init__(self, 46 | bn_layer=tf.keras.layers.BatchNormalization, 47 | width_multiplier=1.0, 48 | conv_kernel_weight_decay=0.0, 49 | activation='relu'): 50 | """Creates the InceptionSTEM layer. 51 | 52 | Args: 53 | bn_layer: An optional tf.keras.layers.Layer that computes the 54 | normalization (default: tf.keras.layers.BatchNormalization). 55 | width_multiplier: A float multiplier, controlling the value of 56 | convolution output channels. 57 | conv_kernel_weight_decay: A float, the weight decay for convolution 58 | kernels. 59 | activation: A string specifying an activation function to be used in this 60 | stem. 61 | """ 62 | super(InceptionSTEM, self).__init__(name='stem') 63 | 64 | self._conv1_bn_act = convolutions.Conv2DSame( 65 | output_channels=int(64 * width_multiplier), 66 | kernel_size=3, 67 | name='conv1_bn_act', 68 | strides=2, 69 | use_bias=False, 70 | use_bn=True, 71 | bn_layer=bn_layer, 72 | activation=activation, 73 | conv_kernel_weight_decay=conv_kernel_weight_decay) 74 | 75 | self._conv2_bn_act = convolutions.Conv2DSame( 76 | output_channels=int(64 * width_multiplier), 77 | kernel_size=3, 78 | name='conv2_bn_act', 79 | strides=1, 80 | use_bias=False, 81 | use_bn=True, 82 | bn_layer=bn_layer, 83 | activation=activation, 84 | conv_kernel_weight_decay=conv_kernel_weight_decay) 85 | 86 | self._conv3_bn = convolutions.Conv2DSame( 87 | output_channels=int(128 * width_multiplier), 88 | kernel_size=3, 89 | strides=1, 90 | use_bias=False, 91 | use_bn=True, 92 | bn_layer=bn_layer, 93 | activation='none', 94 | name='conv3_bn', 95 | conv_kernel_weight_decay=conv_kernel_weight_decay) 96 | 97 | def call(self, input_tensor, training=False): 98 | """Performs a forward pass. 99 | 100 | Args: 101 | input_tensor: An input tensor of type tf.Tensor with shape [batch, height, 102 | width, channels]. 103 | training: A boolean flag indicating whether training behavior should be 104 | used (default: False). 105 | 106 | Returns: 107 | Two output tensors. The first output tensor is not activated. The second 108 | tensor is activated. 109 | """ 110 | x = self._conv1_bn_act(input_tensor, training=training) 111 | x = self._conv2_bn_act(x, training=training) 112 | x = self._conv3_bn(x, training=training) 113 | return x 114 | -------------------------------------------------------------------------------- /model/layers/stems_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for resnet_utils.""" 17 | import tensorflow as tf 18 | 19 | from deeplab2.model.layers import stems 20 | from deeplab2.utils import test_utils 21 | 22 | 23 | class ResnetUtilsTest(tf.test.TestCase): 24 | 25 | def test_inception_stem_output_shape(self): 26 | batch = 2 27 | height, width = 65, 65 28 | input_tensor = test_utils.create_test_input(batch, height, width, 3) 29 | model = stems.InceptionSTEM() 30 | output_tensor = model(input_tensor) 31 | expected_height = (height - 1) / 2 + 1 32 | expected_width = (width - 1) / 2 + 1 33 | expected_channels = 128 34 | self.assertListEqual( 35 | output_tensor.get_shape().as_list(), 36 | [batch, expected_height, expected_width, expected_channels]) 37 | 38 | 39 | if __name__ == '__main__': 40 | tf.test.main() 41 | -------------------------------------------------------------------------------- /model/pixel_decoder/kmax_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for kMaX pixel decoder.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.pixel_decoder import kmax 21 | 22 | 23 | class KMaXPixelDecoderTest(tf.test.TestCase): 24 | 25 | def test_model_output_shape(self): 26 | model = kmax.KMaXPixelDecoder(name='kmax_pixel_decoder') 27 | output = model({ 28 | 'stage1': tf.keras.Input(shape=(321, 321, 64)), 29 | 'stage2': tf.keras.Input(shape=(161, 161, 128)), 30 | 'stage3': tf.keras.Input(shape=(81, 81, 256)), 31 | 'stage4': tf.keras.Input(shape=(41, 41, 512)), 32 | 'stage5': tf.keras.Input(shape=(21, 21, 1024)), 33 | }) 34 | 35 | self.assertListEqual(output['decoder_stage1'].get_shape().as_list(), 36 | [None, 21, 21, 2048]) 37 | self.assertListEqual(output['decoder_stage2'].get_shape().as_list(), 38 | [None, 41, 41, 1024]) 39 | self.assertListEqual(output['decoder_stage3'].get_shape().as_list(), 40 | [None, 81, 81, 512]) 41 | self.assertListEqual(output['decoder_output'].get_shape().as_list(), 42 | [None, 161, 161, 256]) 43 | 44 | 45 | if __name__ == '__main__': 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /model/pixel_encoder/axial_resnet.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Implements ResNets[1] and Axial-ResNets [2, 3] as pixel encoders. 17 | 18 | [1] Deep residual learning for image recognition. 19 | CVPR 2016. 20 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. 21 | 22 | [2] Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation, 23 | ECCV 2020. 24 | Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille, 25 | Liang-Chieh Chen. 26 | 27 | [3] MaX-DeepLab: End-to-End Panoptic Segmentation with Mask Transformers, 28 | CVPR 2021. 29 | Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen. 30 | """ 31 | 32 | import functools 33 | 34 | import tensorflow as tf 35 | 36 | from deeplab2.model.encoder import axial_resnet 37 | 38 | resnet50 = functools.partial( 39 | axial_resnet.AxialResNet, 40 | output_stride=32, 41 | classification_mode=True, 42 | backbone_type="resnet", 43 | use_axial_beyond_stride=0, 44 | backbone_use_transformer_beyond_stride=0, 45 | activation="relu") 46 | 47 | # This is the same backbone as MaX-S, which uses Inception Stem and 48 | # incorporates Axial-Attention in the last two stages of ResNet-50. 49 | axial_resnet50 = functools.partial( 50 | axial_resnet.AxialResNet, 51 | output_stride=32, 52 | classification_mode=True, 53 | backbone_type="resnet_beta", 54 | use_axial_beyond_stride=16, 55 | backbone_use_transformer_beyond_stride=0, 56 | activation="gelu") 57 | 58 | 59 | def get_model(model_name, input_shape, drop_path_keep_prob=1.0, **kwargs): 60 | """Gets an (Axial-)ResNet model.""" 61 | block_group_config = { 62 | "drop_path_schedule": "linear", 63 | "drop_path_keep_prob": drop_path_keep_prob 64 | } 65 | model_name = model_name.lower() 66 | if model_name == "resnet50": 67 | model = resnet50( 68 | name=model_name, block_group_config=block_group_config, **kwargs) 69 | elif model_name == "axial_resnet50": 70 | model = axial_resnet50( 71 | name=model_name, block_group_config=block_group_config, **kwargs) 72 | else: 73 | raise ValueError("Unsupported backbone %s!" % model_name) 74 | 75 | # Build the model. 76 | model(tf.keras.Input(shape=input_shape)) 77 | 78 | return model 79 | -------------------------------------------------------------------------------- /model/pixel_encoder/axial_resnet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for (Axial-)ResNets.""" 17 | 18 | 19 | from absl.testing import parameterized 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from deeplab2.model.pixel_encoder import axial_resnet 24 | 25 | 26 | class AxialResNetTest(tf.test.TestCase, parameterized.TestCase): 27 | 28 | # The parameter count does not include the classification head. 29 | @parameterized.parameters( 30 | ('resnet50', 23508032), 31 | ('axial_resnet50', 41343424), 32 | ) 33 | def test_model_output_shape_and_num_params(self, model_name, 34 | expected_num_params): 35 | model = axial_resnet.get_model(model_name, 36 | input_shape=(224, 224, 3)) 37 | output = model(tf.keras.Input(shape=(224, 224, 3))) 38 | 39 | if model_name == 'resnet50': 40 | dims = [64, 256, 512, 1024, 2048] 41 | elif model_name == 'axial_resnet50': 42 | dims = [128, 256, 512, 1024, 2048] 43 | 44 | self.assertListEqual(output['stage1'].get_shape().as_list(), 45 | [None, 112, 112, dims[0]]) 46 | self.assertListEqual(output['stage2'].get_shape().as_list(), 47 | [None, 56, 56, dims[1]]) 48 | self.assertListEqual(output['stage3'].get_shape().as_list(), 49 | [None, 28, 28, dims[2]]) 50 | self.assertListEqual(output['stage4'].get_shape().as_list(), 51 | [None, 14, 14, dims[3]]) 52 | self.assertListEqual(output['stage5'].get_shape().as_list(), 53 | [None, 7, 7, dims[4]]) 54 | 55 | num_params = np.sum( 56 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights]) 57 | self.assertEqual(num_params, expected_num_params) 58 | 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /model/pixel_encoder/convnext_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for ConvNeXt.""" 17 | 18 | 19 | from absl.testing import parameterized 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from deeplab2.model.pixel_encoder import convnext 24 | 25 | 26 | class ConvNeXtTest(tf.test.TestCase, parameterized.TestCase): 27 | 28 | # The parameter count does not include the classification head. 29 | @parameterized.parameters( 30 | ('convnext_tiny', 27818592), 31 | ('convnext_small', 49453152), 32 | ('convnext_base', 87564416), 33 | ('convnext_large', 196227264), 34 | ('convnext_xlarge', 348143872), 35 | ) 36 | def test_model_output_shape_and_num_params(self, model_name, 37 | expected_num_params): 38 | model = convnext.get_model(model_name, 39 | input_shape=(224, 224, 3)) 40 | output = model(tf.keras.Input(shape=(224, 224, 3))) 41 | 42 | if model_name.lower() in ['convnext_tiny', 'convnext_small']: 43 | dims = [96, 192, 384, 768] 44 | elif model_name.lower() in ['convnext_base',]: 45 | dims = [128, 256, 512, 1024] 46 | elif model_name.lower() in ['convnext_large',]: 47 | dims = [192, 384, 768, 1536] 48 | elif model_name.lower() in ['convnext_xlarge',]: 49 | dims = [256, 512, 1024, 2048] 50 | 51 | self.assertListEqual(output['stage1'].get_shape().as_list(), 52 | [None, 56, 56, dims[0]]) 53 | self.assertListEqual(output['stage2'].get_shape().as_list(), 54 | [None, 56, 56, dims[0]]) 55 | self.assertListEqual(output['stage3'].get_shape().as_list(), 56 | [None, 28, 28, dims[1]]) 57 | self.assertListEqual(output['stage4'].get_shape().as_list(), 58 | [None, 14, 14, dims[2]]) 59 | self.assertListEqual(output['stage5'].get_shape().as_list(), 60 | [None, 7, 7, dims[3]]) 61 | 62 | num_params = np.sum( 63 | [np.prod(v.get_shape().as_list()) for v in model.trainable_weights]) 64 | self.assertEqual(num_params, expected_num_params) 65 | 66 | @parameterized.parameters( 67 | ('convnext_tiny', 224, 4383527995), 68 | ('convnext_small', 224, 8563618819), 69 | ('convnext_base', 224, 15194596739), 70 | ('convnext_large', 224, 34121222275), 71 | ('convnext_xlarge', 224, 60600740739), 72 | ) 73 | def test_model_flops(self, 74 | model_name, 75 | input_resolution, 76 | expected_multiply_adds): 77 | input_shape = [1, input_resolution, input_resolution, 3] 78 | model = convnext.get_model(model_name, 79 | input_shape=input_shape[1:]) 80 | model(tf.keras.Input(shape=input_shape[1:])) 81 | 82 | forward_pass = tf.function( 83 | model.call, 84 | input_signature=[tf.TensorSpec(shape=input_shape)]) 85 | 86 | graph_info = tf.compat.v1.profiler.profile( 87 | forward_pass.get_concrete_function().graph, 88 | options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()) 89 | multiply_adds = graph_info.total_float_ops // 2 90 | self.assertEqual(multiply_adds, expected_multiply_adds) 91 | 92 | if __name__ == '__main__': 93 | tf.test.main() 94 | -------------------------------------------------------------------------------- /model/post_processor/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /model/post_processor/post_processor_builder.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains a post-processor builder used in the DeepLab model.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2 import common 21 | from deeplab2 import config_pb2 22 | from deeplab2.data import dataset 23 | from deeplab2.model import utils 24 | from deeplab2.model.post_processor import max_deeplab 25 | from deeplab2.model.post_processor import panoptic_deeplab 26 | 27 | 28 | def get_post_processor( 29 | config: config_pb2.ExperimentOptions, 30 | dataset_descriptor: dataset.DatasetDescriptor) -> tf.keras.layers.Layer: 31 | """Initializes a DeepLab post-processor. 32 | 33 | Args: 34 | config: A config_pb2.ExperimentOptions configuration. 35 | dataset_descriptor: A dataset.DatasetDescriptor. 36 | 37 | Returns: 38 | PostProcessor: A post-processor depending on the configuration. 39 | """ 40 | supported_tasks = utils.get_supported_tasks(config) 41 | if config.model_options.WhichOneof('meta_architecture') == 'max_deeplab': 42 | return max_deeplab.PostProcessor(config, dataset_descriptor) 43 | if common.TASK_PANOPTIC_SEGMENTATION in supported_tasks: 44 | return panoptic_deeplab.PostProcessor(config, dataset_descriptor) 45 | return panoptic_deeplab.SemanticOnlyPostProcessor() 46 | -------------------------------------------------------------------------------- /model/post_processor/post_processor_builder_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for post_processor_builder.py.""" 17 | 18 | import tensorflow as tf 19 | 20 | from google.protobuf import text_format 21 | from deeplab2 import common 22 | from deeplab2 import config_pb2 23 | from deeplab2.data import dataset 24 | from deeplab2.model.post_processor import post_processor_builder 25 | 26 | 27 | class EvaluatorTest(tf.test.TestCase): 28 | 29 | def test_evaluates_panoptic_deeplab_model(self): 30 | experiment_options_textproto = """ 31 | experiment_name: "evaluation_test" 32 | eval_dataset_options { 33 | dataset: "cityscapes_panoptic" 34 | file_pattern: "EMPTY" 35 | batch_size: 1 36 | crop_size: 1025 37 | crop_size: 2049 38 | # Skip resizing. 39 | min_resize_value: 0 40 | max_resize_value: 0 41 | } 42 | evaluator_options { 43 | continuous_eval_timeout: -1 44 | stuff_area_limit: 2048 45 | center_score_threshold: 0.1 46 | nms_kernel: 13 47 | save_predictions: true 48 | save_raw_predictions: false 49 | } 50 | """ 51 | config = text_format.Parse(experiment_options_textproto, 52 | config_pb2.ExperimentOptions()) 53 | config.model_options.panoptic_deeplab.instance.enable = True 54 | post_processor = post_processor_builder.get_post_processor( 55 | config, dataset.CITYSCAPES_PANOPTIC_INFORMATION) 56 | 57 | result_dict = { 58 | common.PRED_SEMANTIC_PROBS_KEY: 59 | tf.zeros([1, 1025, 2049, 19], dtype=tf.float32), 60 | common.PRED_CENTER_HEATMAP_KEY: 61 | tf.zeros([1, 1025, 2049, 1], dtype=tf.float32), 62 | common.PRED_OFFSET_MAP_KEY: 63 | tf.zeros([1, 1025, 2049, 2], dtype=tf.float32) 64 | } 65 | processed_dict = post_processor(result_dict) 66 | expected_keys = { 67 | common.PRED_PANOPTIC_KEY, 68 | common.PRED_SEMANTIC_KEY, 69 | common.PRED_INSTANCE_KEY, 70 | common.PRED_INSTANCE_CENTER_KEY, 71 | common.PRED_INSTANCE_SCORES_KEY 72 | } 73 | self.assertCountEqual(processed_dict.keys(), expected_keys) 74 | 75 | 76 | if __name__ == '__main__': 77 | tf.test.main() 78 | -------------------------------------------------------------------------------- /model/post_processor/vip_deeplab_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Test for vip_deeplab.py.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | from deeplab2.model.post_processor import vip_deeplab 21 | 22 | 23 | class PostProcessingTest(tf.test.TestCase): 24 | 25 | def test_stitch_video_panoptic_prediction(self): 26 | concat_semantic = np.array( 27 | [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]], 28 | dtype=np.int32) 29 | concat_instance = np.array( 30 | [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]], 31 | dtype=np.int32) 32 | next_semantic = np.array( 33 | [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]], 34 | dtype=np.int32) 35 | next_instance = np.array( 36 | [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]], 37 | dtype=np.int32) 38 | label_divisor = 1000 39 | concat_panoptic = concat_semantic * label_divisor + concat_instance 40 | next_panoptic = next_semantic * label_divisor + next_instance 41 | new_panoptic = vip_deeplab.stitch_video_panoptic_prediction( 42 | concat_panoptic, next_panoptic, label_divisor) 43 | # The expected instance is manually computed. It should receive the IDs 44 | # propagated from concat_instance by IoU matching between concat_panoptic 45 | # and next_panoptic. 46 | expected_semantic = next_semantic 47 | expected_instance = np.array( 48 | [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]], 49 | dtype=np.int32) 50 | expected_panoptic = expected_semantic * label_divisor + expected_instance 51 | np.testing.assert_array_equal(expected_panoptic, new_panoptic) 52 | 53 | def test_tf_video_panoptic_prediction_stitcher(self): 54 | concat_semantic = np.array( 55 | [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]], 56 | dtype=np.int32) 57 | concat_instance = np.array( 58 | [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]], 59 | dtype=np.int32) 60 | next_semantic = np.array( 61 | [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]], 62 | dtype=np.int32) 63 | next_instance = np.array( 64 | [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]], 65 | dtype=np.int32) 66 | label_divisor = 1000 67 | concat_panoptic = concat_semantic * label_divisor + concat_instance 68 | next_panoptic = next_semantic * label_divisor + next_instance 69 | stitcher = vip_deeplab.VideoPanopticPredictionStitcher(label_divisor) 70 | new_panoptic = stitcher( 71 | tf.convert_to_tensor(concat_panoptic), 72 | tf.convert_to_tensor(next_panoptic)).numpy() 73 | # The expected instance is manually computed. It should receive the IDs 74 | # propagated from concat_instance by IoU matching between concat_panoptic 75 | # and next_panoptic. 76 | expected_semantic = next_semantic 77 | expected_instance = np.array( 78 | [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]], 79 | dtype=np.int32) 80 | expected_panoptic = expected_semantic * label_divisor + expected_instance 81 | np.testing.assert_array_equal(expected_panoptic, new_panoptic) 82 | 83 | 84 | if __name__ == '__main__': 85 | tf.test.main() 86 | -------------------------------------------------------------------------------- /model/test_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains utility functions for the model tests.""" 17 | import numpy as np 18 | import tensorflow as tf 19 | 20 | 21 | def create_test_input(batch, height, width, channels): 22 | """Creates test input tensor.""" 23 | input_tensor = np.tile( 24 | np.reshape( 25 | np.reshape(np.arange(height), [height, 1]) + 26 | np.reshape(np.arange(width), [1, width]), 27 | [1, height, width, 1]), 28 | [batch, 1, 1, channels]) 29 | # Normalize the input tensor so that the outputs are not too large. 30 | input_tensor = (input_tensor * 2 / np.max(input_tensor)) - 1 31 | return tf.cast(input_tensor, tf.float32) 32 | -------------------------------------------------------------------------------- /model/test_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for test_utils.""" 17 | 18 | import tensorflow as tf 19 | 20 | from deeplab2.model import test_utils 21 | 22 | 23 | class TestUtilsTest(tf.test.TestCase): 24 | 25 | def test_create_test_input(self): 26 | input_shape = [1, 2, 3, 4] 27 | input_tensor = test_utils.create_test_input(*input_shape) 28 | self.assertListEqual(input_tensor.get_shape().as_list(), input_shape) 29 | 30 | 31 | if __name__ == '__main__': 32 | tf.test.main() 33 | -------------------------------------------------------------------------------- /model/transformer_decoder/kmax_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for kMaX transformer decoder.""" 17 | 18 | import functools 19 | import os 20 | 21 | import tensorflow as tf 22 | 23 | from google.protobuf import text_format 24 | from deeplab2 import config_pb2 25 | from deeplab2.data import dataset 26 | from deeplab2.model import builder 27 | from deeplab2.model.transformer_decoder import kmax 28 | # resources dependency 29 | 30 | 31 | class KMaXTransformerDecoderTest(tf.test.TestCase): 32 | 33 | def test_model_output_shape(self): 34 | config_path = 'deeplab2/configs/example' 35 | def _read_proto_file(filename, proto): 36 | filename = filename # OSS: removed internal filename loading. 37 | with tf.io.gfile.GFile(filename, 'r') as proto_file: 38 | return text_format.ParseLines(proto_file, proto) 39 | proto_filename = os.path.join(config_path, 40 | 'example_coco_max_deeplab.textproto') 41 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions()) 42 | dataset_descriptor = dataset.MAP_NAME_TO_DATASET_INFO['coco_panoptic'] 43 | auxiliary_predictor_func = functools.partial( 44 | builder.create_decoder, 45 | model_options=config.model_options, 46 | bn_layer=tf.keras.layers.BatchNormalization, 47 | ignore_label=dataset_descriptor.ignore_label, 48 | use_auxiliary_semantic_head=False) 49 | 50 | model = kmax.KMaXTransformerDecoder( 51 | name='kmax_pixel_decoder', 52 | auxiliary_predictor_func=auxiliary_predictor_func) 53 | output = model({ 54 | 'stage1': tf.keras.Input(shape=(321, 321, 64)), 55 | 'stage2': tf.keras.Input(shape=(161, 161, 128)), 56 | 'stage3': tf.keras.Input(shape=(81, 81, 256)), 57 | 'stage4': tf.keras.Input(shape=(41, 41, 512)), 58 | 'stage5': tf.keras.Input(shape=(21, 21, 1024)), 59 | 'decoder_stage1': tf.keras.Input(shape=(21, 21, 2048)), 60 | 'decoder_stage2': tf.keras.Input(shape=(41, 41, 1024)), 61 | 'decoder_stage3': tf.keras.Input(shape=(81, 81, 512)), 62 | 'decoder_output': tf.keras.Input(shape=(161, 161, 256)), 63 | }) 64 | 65 | self.assertListEqual( 66 | output['transformer_class_feature'].get_shape().as_list(), 67 | [None, 128, 256]) 68 | self.assertListEqual( 69 | output['transformer_mask_feature'].get_shape().as_list(), 70 | [None, 128, 256]) 71 | self.assertListEqual(output['feature_panoptic'].get_shape().as_list(), 72 | [None, 161, 161, 256]) 73 | self.assertListEqual(output['feature_semantic'].get_shape().as_list(), 74 | [None, 21, 21, 1024]) 75 | 76 | 77 | if __name__ == '__main__': 78 | tf.test.main() 79 | -------------------------------------------------------------------------------- /tensorflow_ops/kernels/merge_semantic_and_instance_maps_op.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The Deeplab2 Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include /*third_party*/"tensorflow/core/framework/op.h" 16 | #include /*third_party*/"tensorflow/core/framework/shape_inference.h" 17 | 18 | namespace tensorflow_models { 19 | namespace deeplab { 20 | namespace deeplab2 { 21 | 22 | using tensorflow::shape_inference::DimensionHandle; 23 | using tensorflow::shape_inference::InferenceContext; 24 | using tensorflow::shape_inference::ShapeHandle; 25 | 26 | REGISTER_OP("MergeSemanticAndInstanceMaps") 27 | .Input("semantic_maps: int32") 28 | .Input("instance_maps: int32") 29 | .Input("thing_ids: int32") 30 | .Attr("label_divisor: int = 256") 31 | .Attr("stuff_area_limit: int = 0") 32 | .Attr("void_label: int = 0") 33 | .Output("parsing_maps: int32") 34 | .SetShapeFn([](InferenceContext* c) { 35 | ShapeHandle semantic_maps; 36 | ShapeHandle instance_maps; 37 | ShapeHandle thing_ids; 38 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &semantic_maps)); 39 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &instance_maps)); 40 | TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &thing_ids)); 41 | DimensionHandle batch = c->Dim(semantic_maps, 0); 42 | DimensionHandle height = c->Dim(semantic_maps, 1); 43 | DimensionHandle width = c->Dim(semantic_maps, 2); 44 | c->set_output(0, c->MakeShape({batch, height, width})); 45 | return tensorflow::OkStatus(); 46 | }) 47 | .Doc(R"doc( 48 | Generates parsing maps from semantic maps and instance maps. 49 | 50 | Parsing maps, or panoptic segmentation, are merged from the predicted semantic 51 | maps and class-agnostic instance maps. This function merges the maps in the 52 | following way: 53 | 54 | 1) If a pixel belongs to `stuff` class (e.g., sky), the function directly uses 55 | the semantic label from the semantic map and uses 0 as the instance label. 56 | 2) If a pixel belongs to `thing` class (e.g., person), it uses the instance 57 | label from the instance map and uses the majority of the semantic labels of 58 | the same instance as the final semantic label. 59 | 3) The function relabels each instance, so that the instance label of each 60 | semantic class is in the range of [1, num_instances_of_the_semantic_class]. 61 | 62 | Note that this operation is first poposed in the DeeperLab paper and adopted 63 | by the Panoptic-DeepLab framework. 64 | - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093. 65 | - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020. 66 | 67 | semantic_maps: An int32 Tensor with shape `[batch, height, width]` whose value 68 | indicates the predicted semantic label of each pixel. 69 | instance_maps: An int32 Tensor with shape `[batch, height, width]` whose value 70 | indicates the predicted instance label of each pixel. 71 | thing_ids: An int32 Tensor with shape `[num_thing_ids]` whose value refers to 72 | the semantic ids of the thing classes. 73 | label_divisor: An integer. The value used to combine the semantic and instance 74 | map to generate the parsing map. In particular, the value of a pixel in the 75 | parsing map is equal to its corresponding semantic label times label_divisor 76 | plus instance label (i.e., semantic_label * label_divisor + instance_label). 77 | stuff_area_limit: An integer. Predicted stuff segments whose areas are smaller 78 | than this threshold are assigned to VOID label. 79 | void_label: An integer, specifying the VOID label. 80 | parsing_maps: An int32 Tensor with shape `[batch, height, width]` whose value 81 | indicates the merged semantic and instance label of each pixel. 82 | )doc"); 83 | 84 | } // namespace deeplab2 85 | } // namespace deeplab 86 | } // namespace tensorflow_models 87 | -------------------------------------------------------------------------------- /tensorflow_ops/kernels/merge_semantic_and_instance_maps_op_kernel.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The Deeplab2 Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_ 16 | #define DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_ 17 | #include 18 | 19 | #include 20 | 21 | #include /*third_party*/"tensorflow/core/framework/numeric_types.h" 22 | #include /*third_party*/"tensorflow/core/framework/op_kernel.h" 23 | #include /*third_party*/"tensorflow/core/framework/tensor.h" 24 | #include /*third_party*/"tensorflow/core/framework/tensor_types.h" 25 | 26 | namespace tensorflow_models { 27 | namespace deeplab { 28 | namespace deeplab2 { 29 | namespace functor { 30 | 31 | template 32 | struct MergeSemanticAndInstanceMaps { 33 | // Functor that merges semantic and instance maps. 34 | void operator()( 35 | const Device& d, 36 | typename tensorflow::TTypes::ConstTensor semantic_maps, 37 | typename tensorflow::TTypes::ConstTensor instance_maps, 38 | const std::unordered_set& thing_ids_set, int label_divisor, 39 | int stuff_area_limit, int void_label, 40 | typename tensorflow::TTypes::Tensor parsing_maps); 41 | }; 42 | 43 | // Helper method to convert a list of thing IDs into hashset. 44 | template 45 | std::unordered_set Convert1DInt32TensorToSet( 46 | const Device& d, const tensorflow::Tensor& tensor); 47 | 48 | } // namespace functor 49 | } // namespace deeplab2 50 | } // namespace deeplab 51 | } // namespace tensorflow_models 52 | 53 | #endif // DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_ 54 | -------------------------------------------------------------------------------- /tensorflow_ops/python/kernel_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /tensorflow_ops/python/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /tensorflow_ops/python/ops/merge_semantic_and_instance_maps_op.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utility function for the C++ TensorFlow MergeSemanticAndInstanceMaps op.""" 17 | 18 | import tensorflow as tf 19 | 20 | # Make the TensorFlow MergeSemanticAndInstanceMaps op accessible by importing 21 | # merge_semantic_and_instance_maps_op.py. 22 | from tensorflow.python.framework import load_library 23 | from tensorflow.python.platform import resource_loader 24 | gen_merge_semantic_and_instance_maps_op = load_library.load_op_library(resource_loader.get_path_to_datafile('../../kernels/merge_semantic_and_instance_maps_op.so')) 25 | 26 | merge_semantic_and_instance_maps = gen_merge_semantic_and_instance_maps_op.merge_semantic_and_instance_maps 27 | 28 | tf.no_gradient('MergeSemanticAndInstanceMaps') 29 | -------------------------------------------------------------------------------- /tracker/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /tracker/optical_flow_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utility functions for optical flow.""" 17 | import cv2 18 | import numpy as np 19 | 20 | 21 | def warp_flow(img: np.ndarray, flow_tensor: np.ndarray) -> np.ndarray: 22 | flow = flow_tensor.copy() 23 | h, w = flow.shape[:2] 24 | flow[..., 0] += np.arange(w) 25 | flow[..., 1] += np.arange(h)[:, np.newaxis] 26 | res = cv2.remap(img, flow, None, cv2.INTER_LINEAR) 27 | return res 28 | 29 | 30 | def remove_occlusions(warped_binary_img: np.ndarray, 31 | occlusion_map: np.ndarray) -> np.ndarray: 32 | return warped_binary_img.astype(bool) & (1 - occlusion_map).astype(bool) 33 | -------------------------------------------------------------------------------- /trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /trainer/distribution_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains helper functions to run training in a distributed way.""" 17 | 18 | from typing import Text, Optional 19 | 20 | import tensorflow as tf 21 | 22 | 23 | def tpu_initialize(tpu_address: Text): 24 | """Initializes TPU for TF 2.x training. 25 | 26 | Args: 27 | tpu_address: string, bns address of master TPU worker. 28 | 29 | Returns: 30 | A TPUClusterResolver. 31 | """ 32 | cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( 33 | tpu=tpu_address) 34 | if tpu_address not in ('', 'local'): 35 | tf.config.experimental_connect_to_cluster(cluster_resolver) 36 | tf.tpu.experimental.initialize_tpu_system(cluster_resolver) 37 | return cluster_resolver 38 | 39 | 40 | def create_strategy(tpu_address: Optional[Text], 41 | num_gpus: int = 0) -> tf.distribute.Strategy: 42 | """Creates a strategy based on the given parameters. 43 | 44 | The strategies are created based on the following criteria and order: 45 | 1. If A tpu_address is not None, a TPUStrategy is used. 46 | 2. If num_gpus > 1, a MirrorStrategy is used which replicates the model on 47 | each GPU. 48 | 3. If num_gpus == 1, a OneDevice strategy is used on the GPU. 49 | 4. If num_gpus == 0, a OneDevice strategy is used on the CPU. 50 | 51 | Args: 52 | tpu_address: The optional name or address of the TPU to connect to or None. 53 | num_gpus: A non-negative integer specifying the number of GPUs. 54 | 55 | Returns: 56 | A tf.distribute.Strategy. 57 | 58 | Raises: 59 | ValueError: If `num_gpus` is negative and tpu_address is None. 60 | """ 61 | if tpu_address is not None: 62 | resolver = tpu_initialize(tpu_address) 63 | return tf.distribute.TPUStrategy(resolver) 64 | else: 65 | if num_gpus < 0: 66 | raise ValueError('`num_gpus` must not be negative.') 67 | elif num_gpus == 0: 68 | devices = ['device:CPU:0'] 69 | else: 70 | devices = ['device:GPU:%d' % i for i in range(num_gpus)] 71 | if len(devices) == 1: 72 | return tf.distribute.OneDeviceStrategy(devices[0]) 73 | return tf.distribute.MirroredStrategy(devices) 74 | -------------------------------------------------------------------------------- /trainer/runner_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for runner_utils.py.""" 17 | 18 | import os 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | from google.protobuf import text_format 24 | from deeplab2 import config_pb2 25 | from deeplab2.data import dataset 26 | from deeplab2.model import deeplab 27 | from deeplab2.trainer import runner_utils 28 | # resources dependency 29 | 30 | _CONFIG_PATH = 'deeplab2/configs/example' 31 | 32 | 33 | def _read_proto_file(filename, proto): 34 | filename = filename # OSS: removed internal filename loading. 35 | with tf.io.gfile.GFile(filename, 'r') as proto_file: 36 | return text_format.ParseLines(proto_file, proto) 37 | 38 | 39 | def _create_model_from_test_proto(file_name, 40 | dataset_name='coco_panoptic'): 41 | proto_filename = os.path.join(_CONFIG_PATH, file_name) 42 | config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions()) 43 | return deeplab.DeepLab(config, 44 | dataset.MAP_NAME_TO_DATASET_INFO[dataset_name] 45 | ), config 46 | 47 | 48 | class RunnerUtilsTest(tf.test.TestCase): 49 | 50 | def test_check_if_variable_in_backbone_with_max_deeplab(self): 51 | model, experiment_options = _create_model_from_test_proto( 52 | 'example_coco_max_deeplab.textproto', dataset_name='coco_panoptic') 53 | train_crop_size = tuple( 54 | experiment_options.train_dataset_options.crop_size) 55 | input_tensor = tf.random.uniform( 56 | shape=(2, train_crop_size[0], train_crop_size[1], 3)) 57 | _ = model(input_tensor, training=True) 58 | 59 | encoder = model.checkpoint_items['encoder'] 60 | encoder_variable_names = [x.name for x in encoder.trainable_variables] 61 | encoder_name = experiment_options.model_options.backbone.name 62 | 63 | num_backbone_params = 0 64 | backbone_optimizer_inputs = [] 65 | for variable in model.trainable_weights: 66 | if runner_utils.check_if_variable_in_backbone(variable, encoder_name, 67 | encoder_variable_names): 68 | backbone_optimizer_inputs.append(variable) 69 | num_backbone_params += np.prod(variable.get_shape().as_list()) 70 | # The number of Tensors in the backbone. We use this number in addition to 71 | # the number of parameters as a check of correctness. 72 | self.assertLen(backbone_optimizer_inputs, 301) 73 | # The same number of parameters as max_deeplab_s_backbone. 74 | self.assertEqual(num_backbone_params, 41343424) 75 | 76 | 77 | if __name__ == '__main__': 78 | tf.test.main() 79 | -------------------------------------------------------------------------------- /trainer/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """This file contains code to run a model.""" 17 | 18 | import os 19 | from absl import app 20 | from absl import flags 21 | from absl import logging 22 | import tensorflow as tf 23 | 24 | from google.protobuf import text_format 25 | from deeplab2 import config_pb2 26 | from deeplab2.trainer import train_lib 27 | 28 | flags.DEFINE_enum( 29 | 'mode', 30 | default=None, 31 | enum_values=['train', 'eval', 'train_and_eval', 'continuous_eval'], 32 | help='Mode to run: `train`, `eval`, `train_and_eval`, `continuous_eval`.') 33 | 34 | flags.DEFINE_string( 35 | 'model_dir', 36 | default=None, 37 | help='The base directory where the model and training/evaluation summaries' 38 | 'are stored. The path will be combined with the `experiment_name` defined ' 39 | 'in the config file to create a folder under which all files are stored.') 40 | 41 | flags.DEFINE_string( 42 | 'config_file', 43 | default=None, 44 | help='Proto file which specifies the experiment configuration. The proto ' 45 | 'definition of ExperimentOptions is specified in config.proto.') 46 | 47 | flags.DEFINE_string( 48 | 'master', 49 | default=None, 50 | help='The Cloud TPU to use for training. This should be either the name ' 51 | 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 ' 52 | 'url.') 53 | 54 | flags.DEFINE_integer( 55 | 'num_gpus', 56 | default=0, 57 | help='The number of GPUs to use for. If `master` flag is not set, this' 58 | 'parameter specifies whether GPUs should be used and how many of them ' 59 | '(default: 0).') 60 | 61 | FLAGS = flags.FLAGS 62 | 63 | 64 | def main(_): 65 | logging.info('Reading the config file.') 66 | with tf.io.gfile.GFile(FLAGS.config_file, 'r') as proto_file: 67 | config = text_format.ParseLines(proto_file, config_pb2.ExperimentOptions()) 68 | 69 | logging.info('Starting the experiment.') 70 | combined_model_dir = os.path.join(FLAGS.model_dir, config.experiment_name) 71 | train_lib.run_experiment(FLAGS.mode, config, combined_model_dir, FLAGS.master, 72 | FLAGS.num_gpus) 73 | 74 | 75 | if __name__ == '__main__': 76 | app.run(main) 77 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /utils/create_images_json_for_cityscapes.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # pylint: disable=line-too-long 17 | # pyformat: disable 18 | r"""Creates a JSON file with info for a split of Cityscapes images. 19 | 20 | This single-purpose version has special handling for the directory structure of 21 | CityScapes dataset and the expected output ids. 22 | 23 | Sample commands: 24 | 25 | python create_images_json_for_cityscapes.py \ 26 | --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \ 27 | --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \ 28 | --only_basename \ 29 | --include_image_type_suffix=false 30 | """ 31 | # pyformat: enable 32 | # pylint: enable=line-too-long 33 | 34 | from __future__ import absolute_import 35 | from __future__ import division 36 | from __future__ import print_function 37 | 38 | import json 39 | import os 40 | import re 41 | 42 | from absl import app 43 | from absl import flags 44 | 45 | import tensorflow as tf 46 | 47 | FLAGS = flags.FLAGS 48 | 49 | flags.DEFINE_string( 50 | 'image_dir', None, 51 | 'The top-level directory of image files to be included in the set.') 52 | 53 | flags.DEFINE_list( 54 | 'keep_cities', None, 55 | 'Comma-separated list of strings specifying cities to be processed.') 56 | 57 | flags.DEFINE_string('output_json_path', None, 58 | 'Output path to which is written the image info JSON.') 59 | 60 | flags.DEFINE_boolean( 61 | 'only_basename', True, 62 | 'If set, the included "file_name" properties of the images in the JSON ' 63 | 'file will only include the base name and not the city directory. Used for ' 64 | 'tools that do not support nested directories.') 65 | 66 | flags.DEFINE_boolean( 67 | 'include_image_type_suffix', True, 68 | 'If set, will include the suffix of the image type (e.g. "_leftImg8bit") ' 69 | 'in the "file_name" properties of the image.') 70 | 71 | 72 | def _create_images_json(image_dir, output_json_path, only_basename=False, 73 | include_image_type_suffix=True, keep_cities=None): 74 | """Lists the images in image_dir and writes out the info JSON for them.""" 75 | images_info_array = [] 76 | for city_dir in tf.io.gfile.listdir(image_dir): 77 | if keep_cities and city_dir not in keep_cities: 78 | continue 79 | image_id_re = r'%s_[0-9]+_[0-9]+' % city_dir 80 | image_id_re = re.compile(image_id_re) 81 | for image_basename in tf.io.gfile.listdir( 82 | os.path.join(image_dir, city_dir)): 83 | match = image_id_re.match(image_basename) 84 | image_id = image_basename[match.start():match.end()] 85 | if include_image_type_suffix: 86 | file_name = image_basename 87 | else: 88 | file_name = image_id + os.path.splitext(image_basename)[1] 89 | if not only_basename: 90 | file_name = os.path.join(city_dir, file_name) 91 | image_info_dict = {'id': image_id, 'file_name': file_name} 92 | images_info_array.append(image_info_dict) 93 | 94 | info_dict = {'images': images_info_array} 95 | 96 | with tf.io.gfile.GFile(output_json_path, 'w+') as json_file: 97 | json.dump(info_dict, json_file) 98 | 99 | 100 | def main(argv): 101 | if len(argv) > 1: 102 | raise app.UsageError('Too many command-line arguments.') 103 | keep_cities = None 104 | if FLAGS.keep_cities: 105 | keep_cities = [str(x) for x in FLAGS.keep_cities] 106 | _create_images_json( 107 | FLAGS.image_dir, 108 | FLAGS.output_json_path, 109 | only_basename=FLAGS.only_basename, 110 | include_image_type_suffix=FLAGS.include_image_type_suffix, 111 | keep_cities=keep_cities) 112 | 113 | 114 | if __name__ == '__main__': 115 | flags.mark_flags_as_required(['image_dir', 'output_json_path']) 116 | app.run(main) 117 | -------------------------------------------------------------------------------- /utils/hparam_configs_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for hparam_configs.""" 17 | 18 | import os 19 | import tempfile 20 | from absl import logging 21 | import tensorflow.compat.v1 as tf 22 | import yaml 23 | from deeplab2.utils import hparam_configs 24 | 25 | 26 | class HparamConfigsTest(tf.test.TestCase): 27 | 28 | def test_config_override(self): 29 | c = hparam_configs.Config({'a': 1, 'b': 2}) 30 | self.assertEqual(c.as_dict(), {'a': 1, 'b': 2}) 31 | 32 | c.update({'a': 10}) 33 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 2}) 34 | 35 | c.b = 20 36 | self.assertEqual(c.as_dict(), {'a': 10, 'b': 20}) 37 | 38 | c.override('a=true,b=ss') 39 | self.assertEqual(c.as_dict(), {'a': True, 'b': 'ss'}) 40 | 41 | c.override('a=100,,,b=2.3,') # Extra ',' is fine. 42 | self.assertEqual(c.as_dict(), {'a': 100, 'b': 2.3}) 43 | 44 | c.override('a=2x3,b=50') # a is a special format for image size. 45 | self.assertEqual(c.as_dict(), {'a': '2x3', 'b': 50}) 46 | 47 | # Overrriding string must be in the format of xx=yy. 48 | with self.assertRaises(ValueError): 49 | c.override('a=true,invalid_string') 50 | 51 | def test_config_yaml(self): 52 | tmpdir = tempfile.gettempdir() 53 | yaml_file_path = os.path.join(tmpdir, 'x.yaml') 54 | with open(yaml_file_path, 'w') as f: 55 | f.write(""" 56 | x: 2 57 | y: 58 | z: 'test' 59 | """) 60 | c = hparam_configs.Config(dict(x=234, y=2342)) 61 | c.override(yaml_file_path) 62 | self.assertEqual(c.as_dict(), {'x': 2, 'y': {'z': 'test'}}) 63 | 64 | yaml_file_path2 = os.path.join(tmpdir, 'y.yaml') 65 | c.save_to_yaml(yaml_file_path2) 66 | with open(yaml_file_path2, 'r') as f: 67 | config_dict = yaml.load(f, Loader=yaml.FullLoader) 68 | self.assertEqual(config_dict, {'x': 2, 'y': {'z': 'test'}}) 69 | 70 | def test_config_override_recursive(self): 71 | c = hparam_configs.Config({'x': 1}) 72 | self.assertEqual(c.as_dict(), {'x': 1}) 73 | c.override('y.y0=2,y.y1=3', allow_new_keys=True) 74 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 2, 'y1': 3}}) 75 | c.update({'y': {'y0': 5, 'y1': {'y11': 100}}}) 76 | self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 5, 'y1': {'y11': 100}}}) 77 | self.assertEqual(c.y.y1.y11, 100) 78 | 79 | def test_config_override_list(self): 80 | c = hparam_configs.Config({'x': [1.0, 2.0]}) 81 | self.assertEqual(c.as_dict(), {'x': [1.0, 2.0]}) 82 | c.override('x=3.0|4.0|5.0') 83 | self.assertEqual(c.as_dict(), {'x': [3.0, 4.0, 5.0]}) 84 | 85 | def test_registry_factory(self): 86 | registry = hparam_configs.RegistryFactor(prefix='test:') 87 | 88 | @registry.register() # Use class name as key in default. 89 | class A: 90 | pass 91 | 92 | @registry.register(name='special_b') # Use name as key if name is not None. 93 | class B: 94 | pass 95 | 96 | self.assertEqual(registry.lookup('A'), A) 97 | self.assertEqual(registry.lookup('special_b'), B) 98 | with self.assertRaises(KeyError): 99 | registry.lookup('B') 100 | 101 | 102 | if __name__ == '__main__': 103 | logging.set_verbosity(logging.WARNING) 104 | tf.test.main() 105 | -------------------------------------------------------------------------------- /utils/panoptic_cityscapes_categories.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"name": "road", "id": 7, "isthing": 0, "color": [128, 64, 128]}, 3 | {"name": "sidewalk", "id": 8, "isthing": 0, "color": [244, 35, 232]}, 4 | {"name": "building", "id": 11, "isthing": 0, "color": [ 70, 70, 70]}, 5 | {"name": "wall", "id": 12, "isthing": 0, "color": [102, 102, 156]}, 6 | {"name": "fence", "id": 13, "isthing": 0, "color": [190, 153, 153]}, 7 | {"name": "pole", "id": 17, "isthing": 0, "color": [153, 153, 153]}, 8 | {"name": "traffic light", "id": 19, "isthing": 0, "color": [250, 170, 30]}, 9 | {"name": "traffic sign", "id": 20, "isthing": 0, "color": [220, 220, 0]}, 10 | {"name": "vegetation", "id": 21, "isthing": 0, "color": [107, 142, 35]}, 11 | {"name": "terrain", "id": 22, "isthing": 0, "color": [152, 251, 152]}, 12 | {"name": "sky", "id": 23, "isthing": 0, "color": [ 70, 130, 180]}, 13 | {"name": "person", "id": 24, "isthing": 1, "color": [220, 20, 60]}, 14 | {"name": "rider", "id": 25, "isthing": 1, "color": [255, 0, 0]}, 15 | {"name": "car", "id": 26, "isthing": 1, "color": [ 0, 0, 142]}, 16 | {"name": "truck", "id": 27, "isthing": 1, "color": [ 0, 0, 70]}, 17 | {"name": "bus", "id": 28, "isthing": 1, "color": [ 0, 60, 100]}, 18 | {"name": "train", "id": 31, "isthing": 1, "color": [ 0, 80, 100]}, 19 | {"name": "motorcycle", "id": 32, "isthing": 1, "color": [ 0, 0, 230]}, 20 | {"name": "bicycle", "id": 33, "isthing": 1, "color": [119, 11, 32]} 21 | ] 22 | -------------------------------------------------------------------------------- /utils/test_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Provide utility functions to write simple tests.""" 17 | import functools 18 | 19 | import numpy as np 20 | import tensorflow as tf 21 | 22 | 23 | NORMALIZATION_LAYERS = ( 24 | tf.keras.layers.experimental.SyncBatchNormalization, 25 | tf.keras.layers.BatchNormalization 26 | ) 27 | 28 | 29 | def create_strategy(): 30 | """Returns a strategy based on available devices. 31 | 32 | Does NOT work with local_multiworker_tpu_test tests! 33 | """ 34 | tpus = tf.config.list_logical_devices(device_type='TPU') 35 | gpus = tf.config.list_logical_devices(device_type='GPU') 36 | if tpus: 37 | resolver = tf.distribute.cluster_resolver.TPUClusterResolver('') 38 | tf.config.experimental_connect_to_cluster(resolver) 39 | tf.tpu.experimental.initialize_tpu_system(resolver) 40 | return tf.distribute.TPUStrategy(resolver) 41 | elif gpus: 42 | return tf.distribute.OneDeviceStrategy('/gpu:0') 43 | else: 44 | return tf.distribute.OneDeviceStrategy('/cpu:0') 45 | 46 | 47 | def test_all_strategies(func): 48 | """Decorator to test CPU, GPU and TPU strategies.""" 49 | @functools.wraps(func) 50 | def decorator(self): 51 | strategy = create_strategy() 52 | return func(self, strategy) 53 | return decorator 54 | 55 | 56 | def create_test_input(batch, height, width, channels): 57 | """Creates test input tensor.""" 58 | return tf.convert_to_tensor( 59 | np.tile( 60 | np.reshape( 61 | np.reshape(np.arange(height), [height, 1]) + 62 | np.reshape(np.arange(width), [1, width]), 63 | [1, height, width, 1]), 64 | [batch, 1, 1, channels]), dtype=tf.float32) 65 | -------------------------------------------------------------------------------- /video/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The Deeplab2 Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | --------------------------------------------------------------------------------