├── .gitignore
├── CONTRIBUTING.md
├── DeepLab_COCO_Demo.ipynb
├── DeepLab_Cityscapes_Demo.ipynb
├── LICENSE
├── README.md
├── ViP_DeepLab_Demo.ipynb
├── __init__.py
├── common.py
├── common_test.py
├── compile.sh
├── config.proto
├── configs
    ├── ade20k
    │   └── kmax_deeplab
    │   │   ├── kmax_meta_convnext_large_os32.textproto
    │   │   ├── kmax_meta_convnext_large_os32_res1281.textproto
    │   │   ├── kmax_meta_r50_os32.textproto
    │   │   └── kmax_meta_r50_os32_res1281.textproto
    ├── cityscapes
    │   ├── axial_deeplab
    │   │   ├── axial_swidernet_1_1_1_os16.textproto
    │   │   ├── axial_swidernet_1_1_3_os16.textproto
    │   │   ├── axial_swidernet_1_1_4.5_os16.textproto
    │   │   ├── max_deeplab_l_backbone_os16.textproto
    │   │   └── max_deeplab_s_backbone_os16.textproto
    │   ├── kmax_deeplab
    │   │   ├── kmax_meta_axial_r50_os32.textproto
    │   │   ├── kmax_meta_convnext_base_os32.textproto
    │   │   ├── kmax_meta_convnext_large_os32.textproto
    │   │   ├── kmax_meta_convnext_small_os32.textproto
    │   │   ├── kmax_meta_convnext_tiny_os32.textproto
    │   │   └── kmax_meta_r50_os32.textproto
    │   └── panoptic_deeplab
    │   │   ├── mobilenet_v3_large_os16.textproto
    │   │   ├── mobilenet_v3_large_os32.textproto
    │   │   ├── mobilenet_v3_small_os16.textproto
    │   │   ├── mobilenet_v3_small_os32.textproto
    │   │   ├── resnet50_beta_os32.textproto
    │   │   ├── resnet50_os32_merge_with_pure_tf_func.textproto
    │   │   ├── resnet50_os32_semseg.textproto
    │   │   ├── swidernet_sac_1_1_1_os16.textproto
    │   │   ├── swidernet_sac_1_1_3_os16.textproto
    │   │   ├── swidernet_sac_1_1_4.5_os16.textproto
    │   │   └── wide_resnet41_os16.textproto
    ├── cityscapes_dvps
    │   └── vip_deeplab
    │   │   ├── resnet50_beta_os32.textproto
    │   │   └── wide_resnet41_os16.textproto
    ├── coco
    │   ├── kmax_deeplab
    │   │   ├── kmax_meta_axial_r50_os32.textproto
    │   │   ├── kmax_meta_convnext_base_os32.textproto
    │   │   ├── kmax_meta_convnext_large_os32.textproto
    │   │   ├── kmax_meta_convnext_small_os32.textproto
    │   │   ├── kmax_meta_convnext_tiny_os32.textproto
    │   │   └── kmax_meta_r50_os32.textproto
    │   ├── max_deeplab
    │   │   ├── max_deeplab_l_os16_res1025_100k.textproto
    │   │   ├── max_deeplab_l_os16_res1025_200k.textproto
    │   │   ├── max_deeplab_l_os16_res1025_400k.textproto
    │   │   ├── max_deeplab_s_os16_res1025_100k.textproto
    │   │   ├── max_deeplab_s_os16_res1025_200k.textproto
    │   │   ├── max_deeplab_s_os16_res1025_400k.textproto
    │   │   ├── max_deeplab_s_os16_res641_100k.textproto
    │   │   ├── max_deeplab_s_os16_res641_200k.textproto
    │   │   └── max_deeplab_s_os16_res641_400k.textproto
    │   └── panoptic_deeplab
    │   │   ├── resnet50_beta_os16.textproto
    │   │   ├── resnet50_beta_os32.textproto
    │   │   ├── resnet50_os16.textproto
    │   │   └── resnet50_os32.textproto
    ├── example
    │   ├── example_cityscapes_deeplabv3.textproto
    │   ├── example_cityscapes_deeplabv3_mv3l.textproto
    │   ├── example_cityscapes_deeplabv3plus.textproto
    │   ├── example_cityscapes_panoptic_deeplab.textproto
    │   ├── example_cityscapes_panoptic_deeplab_mv3l.textproto
    │   ├── example_coco_kmax_meta_convnext.textproto
    │   ├── example_coco_max_deeplab.textproto
    │   └── example_kitti-step_motion_deeplab.textproto
    ├── kitti
    │   ├── motion_deeplab
    │   │   ├── resnet50_os32.textproto
    │   │   └── resnet50_os32_trainval.textproto
    │   └── panoptic_deeplab
    │   │   ├── resnet50_os32.textproto
    │   │   └── resnet50_os32_trainval.textproto
    ├── motchallenge
    │   ├── motion_deeplab
    │   │   └── resnet50_os32.textproto
    │   └── panoptic_deeplab
    │   │   └── resnet50_os32.textproto
    └── semkitti_dvps
    │   └── vip_deeplab
    │       └── resnet50_beta_os32.textproto
├── data
    ├── __init__.py
    ├── ade20k_constants.py
    ├── build_ade20k_data.py
    ├── build_cityscapes_data.py
    ├── build_cityscapes_data_test.py
    ├── build_coco_data.py
    ├── build_coco_data_test.py
    ├── build_dvps_data.py
    ├── build_step_data.py
    ├── build_step_data_test.py
    ├── coco_constants.py
    ├── data_utils.py
    ├── data_utils_test.py
    ├── dataloader
    │   ├── __init__.py
    │   ├── input_reader.py
    │   └── multicamera_input_reader.py
    ├── dataset.py
    ├── dataset_utils.py
    ├── dataset_utils_test.py
    ├── multicamera_data_utils.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── autoaugment_policy.py
    │   ├── autoaugment_policy_test.py
    │   ├── autoaugment_utils.py
    │   ├── autoaugment_utils_test.py
    │   ├── input_preprocessing.py
    │   ├── input_preprocessing_test.py
    │   ├── preprocess_utils.py
    │   └── preprocess_utils_test.py
    ├── sample_generator.py
    ├── sample_generator_test.py
    ├── testdata
    │   ├── create_test_data.py
    │   ├── dummy_gt_for_vps.png
    │   ├── dummy_prediction.png
    │   ├── gtFine
    │   │   ├── cityscapes_panoptic_dummy_trainId.json
    │   │   └── cityscapes_panoptic_dummy_trainId
    │   │   │   └── dummy_000000_000000_gtFine_panoptic.png
    │   ├── leftImg8bit
    │   │   └── dummy_000000_000000_leftImg8bit.png
    │   └── targets
    │   │   ├── center_target.png
    │   │   ├── center_weights.png
    │   │   ├── eval_is_crowd.npy
    │   │   ├── eval_panoptic_target.npy
    │   │   ├── eval_semantic_target.png
    │   │   ├── is_crowd.npy
    │   │   ├── offset_target.npy
    │   │   ├── offset_weights.png
    │   │   ├── panoptic_target.npy
    │   │   ├── panoptic_target.png
    │   │   ├── rgb_target.png
    │   │   ├── semantic_target.png
    │   │   ├── semantic_weights.png
    │   │   ├── thing_id_class_target.npy
    │   │   └── thing_id_mask_target.npy
    ├── utils
    │   ├── __init__.py
    │   └── create_step_panoptic_maps.py
    └── waymo_constants.py
├── dataset.proto
├── evaluation
    ├── __init__.py
    ├── coco_instance_ap.py
    ├── coco_instance_ap_test.py
    ├── depth_aware_segmentation_and_tracking_quality.py
    ├── depth_aware_segmentation_and_tracking_quality_test.py
    ├── depth_metrics.py
    ├── depth_metrics_test.py
    ├── numpy
    │   ├── __init__.py
    │   ├── segmentation_and_tracking_quality.py
    │   └── segmentation_and_tracking_quality_test.py
    ├── panoptic_quality.py
    ├── panoptic_quality_test.py
    ├── segmentation_and_tracking_quality.py
    ├── segmentation_and_tracking_quality_test.py
    ├── test_utils.py
    ├── test_utils_test.py
    ├── testdata
    │   ├── README.md
    │   ├── bird_gt.png
    │   ├── bird_pred_class.png
    │   ├── bird_pred_instance.png
    │   ├── cat_gt.png
    │   ├── cat_pred_class.png
    │   ├── cat_pred_instance.png
    │   ├── team_gt_instance.png
    │   ├── team_pred_class.png
    │   └── team_pred_instance.png
    └── video_panoptic_quality.py
├── evaluator.proto
├── export_model.py
├── g3doc
    ├── faq.md
    ├── img
    │   ├── axial_deeplab
    │   │   ├── axial_block.png
    │   │   ├── nonlocal_block.png
    │   │   └── position_sensitive_axial_block.png
    │   ├── kmax_deeplab
    │   │   ├── clustering_view_of_mask_transformer.png
    │   │   └── kmax_decoder.png
    │   ├── max_deeplab
    │   │   ├── overview.png
    │   │   └── overview_simple.png
    │   ├── moat
    │   │   ├── moat_block.png
    │   │   ├── moat_imagenet1k_224.png
    │   │   └── moat_imagenet22k_384.png
    │   ├── panoptic_deeplab.png
    │   ├── step
    │   │   └── kitti_step_annotation.png
    │   └── vip_deeplab
    │   │   └── demo.gif
    ├── projects
    │   ├── axial_deeplab.md
    │   ├── imagenet_pretrained_checkpoints.md
    │   ├── kmax_deeplab.md
    │   ├── max_deeplab.md
    │   ├── moat_imagenet_pretrained_checkpoints.md
    │   ├── motion_deeplab.md
    │   ├── panoptic_deeplab.md
    │   ├── vip_deeplab.md
    │   └── wod_pvps.md
    └── setup
    │   ├── ade20k.md
    │   ├── cityscapes.md
    │   ├── cityscapes_test_server_evaluation.md
    │   ├── coco.md
    │   ├── coco_test_server_evaluation.md
    │   ├── getting_started.md
    │   ├── installation.md
    │   ├── kitti_step.md
    │   ├── motchallenge_step.md
    │   └── your_own_dataset.md
├── model.proto
├── model
    ├── __init__.py
    ├── builder.py
    ├── builder_test.py
    ├── decoder
    │   ├── __init__.py
    │   ├── aspp.py
    │   ├── aspp_test.py
    │   ├── deeplabv3.py
    │   ├── deeplabv3_test.py
    │   ├── deeplabv3plus.py
    │   ├── deeplabv3plus_test.py
    │   ├── max_deeplab.py
    │   ├── max_deeplab_test.py
    │   ├── motion_deeplab_decoder.py
    │   ├── panoptic_deeplab.py
    │   ├── panoptic_deeplab_test.py
    │   └── vip_deeplab_decoder.py
    ├── deeplab.py
    ├── deeplab_test.py
    ├── encoder
    │   ├── __init__.py
    │   ├── atrous_consistency_test.py
    │   ├── axial_resnet.py
    │   ├── axial_resnet_instances.py
    │   ├── axial_resnet_instances_test.py
    │   ├── axial_resnet_test.py
    │   ├── mobilenet.py
    │   ├── mobilenet_test.py
    │   └── model_export_test.py
    ├── kmax_deeplab.py
    ├── kmax_deeplab_test.py
    ├── layers
    │   ├── __init__.py
    │   ├── activations.py
    │   ├── activations_test.py
    │   ├── axial_block_groups.py
    │   ├── axial_block_groups_test.py
    │   ├── axial_blocks.py
    │   ├── axial_blocks_test.py
    │   ├── axial_layers.py
    │   ├── axial_layers_test.py
    │   ├── blocks.py
    │   ├── blocks_test.py
    │   ├── convolutions.py
    │   ├── convolutions_test.py
    │   ├── drop_path.py
    │   ├── drop_path_test.py
    │   ├── dual_path_transformer.py
    │   ├── dual_path_transformer_test.py
    │   ├── moat_attention.py
    │   ├── moat_attention_test.py
    │   ├── moat_blocks.py
    │   ├── moat_blocks_test.py
    │   ├── positional_encodings.py
    │   ├── positional_encodings_test.py
    │   ├── recompute_grad.py
    │   ├── recompute_grad_test.py
    │   ├── resized_fuse.py
    │   ├── resized_fuse_test.py
    │   ├── squeeze_and_excite.py
    │   ├── squeeze_and_excite_test.py
    │   ├── stems.py
    │   └── stems_test.py
    ├── loss
    │   ├── base_loss.py
    │   ├── base_loss_test.py
    │   ├── loss_builder.py
    │   ├── loss_builder_test.py
    │   ├── matchers_ops.py
    │   ├── matchers_ops_test.py
    │   ├── max_deeplab_loss.py
    │   └── max_deeplab_loss_test.py
    ├── pixel_decoder
    │   ├── kmax.py
    │   └── kmax_test.py
    ├── pixel_encoder
    │   ├── axial_resnet.py
    │   ├── axial_resnet_test.py
    │   ├── convnext.py
    │   ├── convnext_test.py
    │   ├── moat.py
    │   └── moat_test.py
    ├── post_processor
    │   ├── __init__.py
    │   ├── max_deeplab.py
    │   ├── max_deeplab_test.py
    │   ├── motion_deeplab.py
    │   ├── panoptic_deeplab.py
    │   ├── panoptic_deeplab_test.py
    │   ├── post_processor_builder.py
    │   ├── post_processor_builder_test.py
    │   ├── vip_deeplab.py
    │   └── vip_deeplab_test.py
    ├── test_utils.py
    ├── test_utils_test.py
    ├── transformer_decoder
    │   ├── kmax.py
    │   └── kmax_test.py
    ├── utils.py
    └── utils_test.py
├── tensorflow_ops
    ├── kernels
    │   ├── merge_semantic_and_instance_maps_op.cc
    │   ├── merge_semantic_and_instance_maps_op_kernel.cc
    │   ├── merge_semantic_and_instance_maps_op_kernel.cu.cc
    │   └── merge_semantic_and_instance_maps_op_kernel.h
    └── python
    │   ├── kernel_tests
    │       ├── __init__.py
    │       └── merge_semantic_and_instance_maps_op_test.py
    │   └── ops
    │       ├── __init__.py
    │       └── merge_semantic_and_instance_maps_op.py
├── tracker
    ├── __init__.py
    ├── iou_tracker.py
    └── optical_flow_utils.py
├── trainer.proto
├── trainer
    ├── __init__.py
    ├── distribution_utils.py
    ├── evaluator.py
    ├── evaluator_test.py
    ├── runner_utils.py
    ├── runner_utils_test.py
    ├── train.py
    ├── train_lib.py
    ├── trainer.py
    ├── trainer_utils.py
    ├── vis.py
    └── vis_utils.py
├── utils
    ├── __init__.py
    ├── coco_tools.py
    ├── coco_tools_test.py
    ├── create_images_json_for_cityscapes.py
    ├── hparam_configs.py
    ├── hparam_configs_test.py
    ├── net_surgery_convert_last_layer.py
    ├── panoptic_cityscapes_categories.json
    ├── panoptic_instances.py
    ├── panoptic_instances_test.py
    └── test_utils.py
└── video
    ├── __init__.py
    ├── motion_deeplab.py
    └── vip_deeplab.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Caching.
 2 | __pycache__/
 3 | 
 4 | # IDE settings.
 5 | .vscode/
 6 | .idea/
 7 | .env
 8 | .config/
 9 | 
10 | # Generated proto files.
11 | *_pb2.py
12 | 
13 | # For mac
14 | *.DS_Store
15 | 
16 | # Generated files
17 | *.o
18 | *.so


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/common_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for common.py."""
17 | import tensorflow as tf
18 | 
19 | from deeplab2 import common
20 | 
21 | 
22 | class CommonTest(tf.test.TestCase):
23 | 
24 |   def test_constants_keys(self):
25 |     self.assertEqual(common.PRED_PANOPTIC_KEY, 'panoptic_pred')
26 |     self.assertEqual(common.PRED_SEMANTIC_KEY, 'semantic_pred')
27 |     self.assertEqual(common.PRED_INSTANCE_CENTER_KEY, 'instance_center_pred')
28 |     self.assertEqual(common.PRED_INSTANCE_KEY, 'instance_pred')
29 | 
30 |     self.assertEqual(common.PRED_SEMANTIC_LOGITS_KEY, 'semantic_logits')
31 |     self.assertEqual(common.PRED_CENTER_HEATMAP_KEY, 'center_heatmap')
32 |     self.assertEqual(common.PRED_OFFSET_MAP_KEY, 'offset_map')
33 |     self.assertEqual(common.PRED_FRAME_OFFSET_MAP_KEY, 'frame_offset_map')
34 | 
35 |     self.assertEqual(common.GT_PANOPTIC_KEY, 'panoptic_gt')
36 |     self.assertEqual(common.GT_SEMANTIC_KEY, 'semantic_gt')
37 |     self.assertEqual(common.GT_INSTANCE_CENTER_KEY, 'instance_center_gt')
38 |     self.assertEqual(common.GT_FRAME_OFFSET_KEY, 'frame_offset_gt')
39 |     self.assertEqual(common.GT_INSTANCE_REGRESSION_KEY,
40 |                      'instance_regression_gt')
41 |     self.assertEqual(common.GT_PANOPTIC_RAW, 'panoptic_raw')
42 |     self.assertEqual(common.GT_SEMANTIC_RAW, 'semantic_raw')
43 |     self.assertEqual(common.GT_SIZE_RAW, 'size_raw')
44 | 
45 |     self.assertEqual(common.SEMANTIC_LOSS_WEIGHT_KEY, 'semantic_loss_weight')
46 |     self.assertEqual(common.CENTER_LOSS_WEIGHT_KEY, 'center_loss_weight')
47 |     self.assertEqual(common.REGRESSION_LOSS_WEIGHT_KEY,
48 |                      'regression_loss_weight')
49 |     self.assertEqual(common.FRAME_REGRESSION_LOSS_WEIGHT_KEY,
50 |                      'frame_regression_loss_weight')
51 | 
52 |     self.assertEqual(common.RESIZED_IMAGE, 'resized_image')
53 |     self.assertEqual(common.IMAGE, 'image')
54 |     self.assertEqual(common.IMAGE_NAME, 'image_name')
55 |     self.assertEqual(common.SEQUENCE_ID, 'sequence_id')
56 |     self.assertEqual(common.FRAME_ID, 'frame_id')
57 | 
58 |     self.assertEqual(common.KEY_FRAME_ID, 'video/frame_id')
59 |     self.assertEqual(common.KEY_SEQUENCE_ID, 'video/sequence_id')
60 |     self.assertEqual(common.KEY_LABEL_FORMAT, 'image/segmentation/class/format')
61 |     self.assertEqual(common.KEY_ENCODED_PREV_LABEL,
62 |                      'prev_image/segmentation/class/encoded')
63 |     self.assertEqual(common.KEY_ENCODED_LABEL,
64 |                      'image/segmentation/class/encoded')
65 |     self.assertEqual(common.KEY_IMAGE_CHANNELS, 'image/channels')
66 |     self.assertEqual(common.KEY_IMAGE_WIDTH, 'image/width')
67 |     self.assertEqual(common.KEY_IMAGE_HEIGHT, 'image/height')
68 |     self.assertEqual(common.KEY_IMAGE_FORMAT, 'image/format')
69 |     self.assertEqual(common.KEY_IMAGE_FILENAME, 'image/filename')
70 |     self.assertEqual(common.KEY_ENCODED_PREV_IMAGE, 'prev_image/encoded')
71 |     self.assertEqual(common.KEY_ENCODED_IMAGE, 'image/encoded')
72 | 
73 |   def test_multicamera_keys(self):
74 |     test_camera_name = 'front'
75 |     expected = {
76 |         common.KEY_PER_CAMERA_ENCODED_IMAGE:
77 |             'image/encoded/%s',
78 |         common.KEY_PER_CAMERA_ENCODED_NEXT_IMAGE:
79 |             'next_image/encoded/%s',
80 |         common.KEY_PER_CAMERA_IMAGE_HEIGHT:
81 |             'image/height/%s',
82 |         common.KEY_PER_CAMERA_IMAGE_WIDTH:
83 |             'image/width/%s',
84 |         common.KEY_PER_CAMERA_ENCODED_LABEL:
85 |             'image/segmentation/class/encoded/%s',
86 |         common.KEY_PER_CAMERA_ENCODED_NEXT_LABEL:
87 |             'next_image/segmentation/class/encoded/%s',
88 |         common.KEY_PER_CAMERA_ENCODED_DEPTH:
89 |             'image/depth/encoded/%s',
90 |     }
91 |     for key, val in expected.items():
92 |       self.assertEqual(key % test_camera_name, val % test_camera_name)
93 | 
94 | 
95 | if __name__ == '__main__':
96 |   tf.test.main()
97 | 


--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 The Deeplab2 Authors.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Quick start command line to setup deeplab2 (Linux only).
 16 | # Example command to run:
 17 | #   deeplab2/compile.sh ${PATH_TO_PROTOC}
 18 | #
 19 | # This script assumes that the following folder structure:
 20 | #
 21 | #   + root
 22 | #    + deeplab2
 23 | #    + models
 24 | #      + orbit
 25 | #    + cocoapi
 26 | #      + PythonAPI
 27 | #
 28 | # Besides, the script also assumes that `protoc` can be accessed from command
 29 | # line.
 30 | 
 31 | #!/bin/bash
 32 | 
 33 | set -e
 34 | 
 35 | # cpu or gpu
 36 | CONFIG="cpu"
 37 | 
 38 | function tolower() {
 39 |   echo "${1,,}"
 40 | }
 41 | 
 42 | if [[ ! -z "$1" ]]
 43 | then
 44 |   echo "Setting configuration from argument($1)..."
 45 |   CONFIG=$(tolower "$1")
 46 |   if  [ "$CONFIG" != "cpu" ] && [ "$CONFIG" != "gpu" ]
 47 |   then
 48 |     echo "Configuration must be either \"cpu\" or \"gpu\", exiting..."
 49 |     exit 1
 50 |   fi
 51 | fi
 52 | 
 53 | echo "Running configuration with $CONFIG."
 54 | 
 55 | # Protobuf compilation
 56 | # Replace `protoc` with `${PATH_TO_PROTOC}` if protobuf compilier is downloaded
 57 | # from web.
 58 | echo "-----------------------------------------------------------------------"
 59 | echo "Compiling protobuf..."
 60 | echo "-----------------------------------------------------------------------"
 61 | protoc deeplab2/*.proto --python_out=.
 62 | 
 63 | # Compile custom ops
 64 | # See details in https://www.tensorflow.org/guide/create_op#compile_the_op_using_your_system_compiler_tensorflow_binary_installation
 65 | TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
 66 | TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
 67 | OP_NAME='deeplab2/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op'
 68 | 
 69 | if [ "$CONFIG" == "cpu" ]
 70 | then
 71 |   # CPU
 72 |   echo "-----------------------------------------------------------------------"
 73 |   echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (CPU)..."
 74 |   echo "-----------------------------------------------------------------------"
 75 |   g++ -std=c++14 -shared \
 76 |   ${OP_NAME}.cc ${OP_NAME}_kernel.cc -o ${OP_NAME}.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
 77 | else
 78 |   # GPU
 79 |   # (https://www.tensorflow.org/guide/create_op#compiling_the_kernel_for_the_gpu_device)
 80 |   echo "-----------------------------------------------------------------------"
 81 |   echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (GPU)..."
 82 |   echo "-----------------------------------------------------------------------"
 83 |   nvcc -std=c++14 -c -o ${OP_NAME}_kernel.cu.o \
 84 |   ${OP_NAME}_kernel.cu.cc \
 85 |     ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
 86 | 
 87 |   g++ -std=c++14 -shared -o ${OP_NAME}.so ${OP_NAME}.cc ${OP_NAME}_kernel.cc \
 88 |     ${OP_NAME}_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
 89 | fi
 90 | 
 91 | # PYTHONPATH
 92 | export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/models:`pwd`/cocoapi/PythonAPI
 93 | 
 94 | # Runing test
 95 | echo "-----------------------------------------------------------------------"
 96 | echo "Running tests for merge_semantic_and_instance_maps_op..."
 97 | echo "-----------------------------------------------------------------------"
 98 | python deeplab2/tensorflow_ops/python/kernel_tests/merge_semantic_and_instance_maps_op_test.py
 99 | 
100 | # End-to-end tests
101 | echo "-----------------------------------------------------------------------"
102 | echo "Running end-to-end tests..."
103 | echo "-----------------------------------------------------------------------"
104 | 
105 | # Model training test (test for custom ops, protobug)
106 | python deeplab2/model/deeplab_test.py
107 | 
108 | # Model evaluation test (test for other packages such as orbit, cocoapi, etc)
109 | python deeplab2/trainer/evaluator_test.py
110 | 
111 | echo "------------------------"
112 | echo "Done with configuration!"
113 | echo "------------------------"
114 | 
115 | 


--------------------------------------------------------------------------------
/config.proto:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 The Deeplab2 Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | syntax = "proto2";
16 | 
17 | package deeplab2;
18 | 
19 | import public 'deeplab2/dataset.proto';
20 | import public 'deeplab2/evaluator.proto';
21 | import public 'deeplab2/model.proto';
22 | import public 'deeplab2/trainer.proto';
23 | 
24 | option java_multiple_files = true;
25 | 
26 | // Configure experiment options.
27 | message ExperimentOptions {
28 |   // Set the experiment name.
29 |   optional string experiment_name = 1;
30 |   // Set the options for the model.
31 |   optional ModelOptions model_options = 2;
32 |   // Set the options for the trainer.
33 |   optional TrainerOptions trainer_options = 3;
34 |   // Set the options for the training dataset.
35 |   optional DatasetOptions train_dataset_options = 4;
36 |   // Set the options for the evaluator.
37 |   optional EvaluatorOptions evaluator_options = 5;
38 |   // Set the options for the validation dataset.
39 |   optional DatasetOptions eval_dataset_options = 6;
40 | }
41 | 


--------------------------------------------------------------------------------
/configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto:
--------------------------------------------------------------------------------
  1 | # proto-file: deeplab2/config.proto
  2 | # proto-message: ExperimentOptions
  3 | #
  4 | # Panoptic-DeepLab with ResNet-50 and output stride 32.
  5 | #
  6 | ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
  7 | # Before using this config, you need to update the following fields:
  8 | # - experiment_name: Use a unique experiment name for each experiment.
  9 | # - initial_checkpoint: Update the path to the initial checkpoint.
 10 | # - train_dataset_options.file_pattern: Update the path to the
 11 | #   training set. e.g., your_dataset/train*.tfrecord
 12 | # - eval_dataset_options.file_pattern: Update the path to the
 13 | #   validation set, e.g., your_dataset/eval*.tfrecord
 14 | #########################################################################
 15 | #
 16 | # This config provides an example of training Panoptic-DeepLab with ONLY
 17 | # semantic segmentation (i.e., the instance/panoptic segmentation is not
 18 | # trained). This could be used for some datasets that provide only
 19 | # semantic segmentation annotations.
 20 | #
 21 | # For ResNet, see
 22 | # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
 23 | #   In CVPR, 2016.
 24 | # For Panoptic-DeepLab, see
 25 | # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
 26 | #   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
 27 | 
 28 | # Use a unique experiment_name for each experiment.
 29 | experiment_name: "${EXPERIMENT_NAME}"
 30 | model_options {
 31 |   # Update the path to the initial checkpoint (e.g., ImageNet
 32 |   # pretrained checkpoint).
 33 |   initial_checkpoint: "${INIT_CHECKPOINT}"
 34 |   backbone {
 35 |     name: "resnet50"
 36 |     output_stride: 32
 37 |   }
 38 |   decoder {
 39 |     feature_key: "res5"
 40 |     decoder_channels: 256
 41 |     aspp_channels: 256
 42 |     atrous_rates: 3
 43 |     atrous_rates: 6
 44 |     atrous_rates: 9
 45 |   }
 46 |   panoptic_deeplab {
 47 |     low_level {
 48 |       feature_key: "res3"
 49 |       channels_project: 64
 50 |     }
 51 |     low_level {
 52 |       feature_key: "res2"
 53 |       channels_project: 32
 54 |     }
 55 |     instance {
 56 |       enable: false
 57 |     }
 58 |     semantic_head {
 59 |       output_channels: 19
 60 |       head_channels: 256
 61 |     }
 62 |   }
 63 | }
 64 | trainer_options {
 65 |   save_checkpoints_steps: 1000
 66 |   save_summaries_steps: 100
 67 |   steps_per_loop: 100
 68 |   loss_options {
 69 |     semantic_loss {
 70 |       name: "softmax_cross_entropy"
 71 |       weight: 1.0
 72 |       top_k_percent: 0.2
 73 |     }
 74 |   }
 75 |   solver_options {
 76 |     base_learning_rate: 0.0005
 77 |     training_number_of_steps: 60000
 78 |   }
 79 | }
 80 | train_dataset_options {
 81 |   dataset: "cityscapes_panoptic"
 82 |   # Update the path to training set.
 83 |   file_pattern: "${TRAIN_SET}"
 84 |   # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
 85 |   # Also see Q1 in g3doc/faq.md.
 86 |   batch_size: 8
 87 |   crop_size: 1025
 88 |   crop_size: 2049
 89 |   # Skip resizing.
 90 |   min_resize_value: 0
 91 |   max_resize_value: 0
 92 |   augmentations {
 93 |     min_scale_factor: 0.5
 94 |     max_scale_factor: 2.0
 95 |     scale_factor_step_size: 0.1
 96 |   }
 97 | }
 98 | eval_dataset_options {
 99 |   dataset: "cityscapes_panoptic"
100 |   # Update the path to validation set.
101 |   file_pattern: "${VAL_SET}"
102 |   batch_size: 1
103 |   crop_size: 1025
104 |   crop_size: 2049
105 |   # Skip resizing.
106 |   min_resize_value: 0
107 |   max_resize_value: 0
108 | }
109 | evaluator_options {
110 |   continuous_eval_timeout: -1
111 |   save_predictions: true
112 |   save_raw_predictions: false
113 | }
114 | 


--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "res5"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "resnet50"
14 |   }
15 | 
16 |   # Example for cityscapes.
17 |   deeplab_v3 {
18 |     num_classes: 19
19 |   }
20 | }
21 | 
22 | train_dataset_options {
23 |   crop_size: 1025
24 |   crop_size: 2049
25 | }
26 | 


--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3_mv3l.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "res5"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "mobilenet_v3_large"
14 |     use_squeeze_and_excite: true
15 |   }
16 | 
17 |   # Example for cityscapes.
18 |   deeplab_v3 {
19 |     num_classes: 19
20 |   }
21 | }
22 | 
23 | train_dataset_options {
24 |   crop_size: 1025
25 |   crop_size: 2049
26 | }
27 | 


--------------------------------------------------------------------------------
/configs/example/example_cityscapes_deeplabv3plus.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "res5"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "resnet50"
14 |   }
15 | 
16 |   deeplab_v3_plus {
17 |     low_level {
18 |       feature_key: "res2"
19 |       channels_project: 48
20 |     }
21 |     # Example for cityscapes.
22 |     num_classes: 19
23 |   }
24 | }
25 | 
26 | train_dataset_options {
27 |   crop_size: 1025
28 |   crop_size: 2049
29 | }
30 | 


--------------------------------------------------------------------------------
/configs/example/example_cityscapes_panoptic_deeplab.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "res5"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "resnet50"
14 |   }
15 | 
16 |   panoptic_deeplab {
17 |     low_level {
18 |       feature_key: "res3"
19 |       channels_project: 64
20 |     }
21 |     low_level {
22 |       feature_key: "res2"
23 |       channels_project: 32
24 |     }
25 |     semantic_head {
26 |       # Example for cityscapes.
27 |       output_channels: 19
28 |       head_channels: 256
29 |     }
30 |     instance {
31 |       instance_decoder_override {
32 |         feature_key: "res5"
33 |         decoder_channels: 128
34 |         atrous_rates: 6
35 |         atrous_rates: 12
36 |         atrous_rates: 18
37 |       }
38 |       low_level_override {
39 |         feature_key: "res3"
40 |         channels_project: 32
41 |       }
42 |       low_level_override {
43 |         feature_key: "res2"
44 |         channels_project: 16
45 |       }
46 |       center_head {
47 |         output_channels: 1
48 |         head_channels: 32
49 |       }
50 |       regression_head {
51 |         output_channels: 2
52 |         head_channels: 32
53 |       }
54 |     }
55 |   }
56 | }
57 | 
58 | train_dataset_options {
59 |   crop_size: 1025
60 |   crop_size: 2049
61 | }
62 | 


--------------------------------------------------------------------------------
/configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "res5"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "mobilenet_v3_large"
14 |     use_squeeze_and_excite: true
15 |   }
16 | 
17 |   panoptic_deeplab {
18 |     low_level {
19 |       feature_key: "res3"
20 |       channels_project: 64
21 |     }
22 |     low_level {
23 |       feature_key: "res2"
24 |       channels_project: 32
25 |     }
26 |     semantic_head {
27 |       # Example for cityscapes.
28 |       output_channels: 19
29 |       head_channels: 256
30 |     }
31 |     instance {
32 |       instance_decoder_override {
33 |         feature_key: "res5"
34 |         decoder_channels: 128
35 |         atrous_rates: 6
36 |         atrous_rates: 12
37 |         atrous_rates: 18
38 |       }
39 |       low_level_override {
40 |         feature_key: "res3"
41 |         channels_project: 32
42 |       }
43 |       low_level_override {
44 |         feature_key: "res2"
45 |         channels_project: 16
46 |       }
47 |       center_head {
48 |         output_channels: 1
49 |         head_channels: 32
50 |       }
51 |       regression_head {
52 |         output_channels: 2
53 |         head_channels: 32
54 |       }
55 |     }
56 |   }
57 | }
58 | 
59 | train_dataset_options {
60 |   crop_size: 1025
61 |   crop_size: 2049
62 | }
63 | 


--------------------------------------------------------------------------------
/configs/example/example_coco_kmax_meta_convnext.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "feature_semantic"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "kmax_convnext_base"
14 |     drop_path_keep_prob: 0.5
15 |   }
16 | 
17 |   max_deeplab {
18 |     pixel_space_head {
19 |       output_channels: 128
20 |       head_channels: 256
21 |     }
22 |     auxiliary_low_level {
23 |       feature_key: "res3"
24 |       channels_project: 64
25 |     }
26 |     auxiliary_low_level {
27 |       feature_key: "res2"
28 |       channels_project: 32
29 |     }
30 |     auxiliary_semantic_head {
31 |       # Example for COCO.
32 |       output_channels: 134
33 |       head_channels: 256
34 |     }
35 |   }
36 | }
37 | 
38 | train_dataset_options {
39 |   crop_size: 65
40 |   crop_size: 65
41 | }
42 | 


--------------------------------------------------------------------------------
/configs/example/example_coco_max_deeplab.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/config.proto
 2 | # proto-message: ExperimentOptions
 3 | 
 4 | model_options {
 5 |   decoder {
 6 |     feature_key: "feature_semantic"
 7 |     atrous_rates: 6
 8 |     atrous_rates: 12
 9 |     atrous_rates: 18
10 |   }
11 | 
12 |   backbone {
13 |     name: "max_deeplab_s"
14 |     output_stride: 16
15 |   }
16 | 
17 |   max_deeplab {
18 |     pixel_space_head {
19 |       output_channels: 128
20 |       head_channels: 256
21 |     }
22 |     auxiliary_low_level {
23 |       feature_key: "res3"
24 |       channels_project: 64
25 |     }
26 |     auxiliary_low_level {
27 |       feature_key: "res2"
28 |       channels_project: 32
29 |     }
30 |     auxiliary_semantic_head {
31 |       # Example for COCO.
32 |       output_channels: 134
33 |       head_channels: 256
34 |     }
35 |   }
36 | }
37 | 
38 | train_dataset_options {
39 |   crop_size: 65
40 |   crop_size: 65
41 | }
42 | 


--------------------------------------------------------------------------------
/configs/example/example_kitti-step_motion_deeplab.textproto:
--------------------------------------------------------------------------------
 1 | # proto-file: deeplab2/model.proto
 2 | # proto-message: ModelOptions
 3 | 
 4 | decoder {
 5 |   feature_key: "res5"
 6 |   atrous_rates: 6
 7 |   atrous_rates: 12
 8 |   atrous_rates: 18
 9 | }
10 | 
11 | backbone {
12 |   name: "resnet50"
13 | }
14 | 
15 | # Motion-Deeplab adopts Panoptic-Deeplab for the task of Video Panoptic
16 | # Segmentation or Segmenting and Tracking Every Pixel (STEP).
17 | motion_deeplab {
18 |   low_level {
19 |     feature_key: "res3"
20 |     channels_project: 64
21 |   }
22 |   low_level {
23 |     feature_key: "res2"
24 |     channels_project: 32
25 |   }
26 |   semantic_head {
27 |     # Example for KITTI-STEP.
28 |     output_channels: 19
29 |     head_channels: 256
30 |   }
31 |   instance {
32 |     instance_decoder_override {
33 |       feature_key: "res5"
34 |       decoder_channels: 128
35 |       atrous_rates: 6
36 |       atrous_rates: 12
37 |       atrous_rates: 18
38 |     }
39 |     low_level_override {
40 |       feature_key: "res3"
41 |       channels_project: 32
42 |     }
43 |     low_level_override {
44 |       feature_key: "res2"
45 |       channels_project: 16
46 |     }
47 |     center_head {
48 |       output_channels: 1
49 |       head_channels: 32
50 |     }
51 |     regression_head {
52 |       output_channels: 2
53 |       head_channels: 32
54 |     }
55 |   }
56 |   motion_head {
57 |     output_channels: 2
58 |     head_channels: 32
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/data/build_cityscapes_data_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for build_cityscapes_data."""
17 | 
18 | import os
19 | 
20 | from absl import flags
21 | import numpy as np
22 | from PIL import Image
23 | import tensorflow as tf
24 | 
25 | from deeplab2.data import build_cityscapes_data
26 | 
27 | 
28 | FLAGS = flags.FLAGS
29 | _TEST_DATA_DIR = 'deeplab2/data/testdata'
30 | _TEST_FILE_PREFIX = 'dummy_000000_000000'
31 | 
32 | 
33 | class BuildCityscapesDataTest(tf.test.TestCase):
34 | 
35 |   def test_read_segments(self):
36 |     cityscapes_root = os.path.join(_TEST_DATA_DIR)
37 |     segments_dict = build_cityscapes_data._read_segments(
38 |         cityscapes_root, dataset_split='dummy')
39 |     self.assertIn(_TEST_FILE_PREFIX, segments_dict)
40 |     _, segments = segments_dict[_TEST_FILE_PREFIX]
41 |     self.assertLen(segments, 10)
42 | 
43 |   def test_generate_panoptic_label(self):
44 |     FLAGS.treat_crowd_as_ignore = False  # Test a more complicated setting
45 |     cityscapes_root = os.path.join(_TEST_DATA_DIR)
46 |     segments_dict = build_cityscapes_data._read_segments(
47 |         cityscapes_root, dataset_split='dummy')
48 |     annotation_file_name, segments = segments_dict[_TEST_FILE_PREFIX]
49 |     panoptic_annotation_file = build_cityscapes_data._get_panoptic_annotation(
50 |         cityscapes_root, dataset_split='dummy',
51 |         annotation_file_name=annotation_file_name)
52 |     panoptic_label = build_cityscapes_data._generate_panoptic_label(
53 |         panoptic_annotation_file, segments)
54 | 
55 |     # Check panoptic label matches golden file.
56 |     golden_file_path = os.path.join(_TEST_DATA_DIR,
57 |                                     'dummy_gt_for_vps.png')
58 |     with tf.io.gfile.GFile(golden_file_path, 'rb') as f:
59 |       golden_label = Image.open(f)
60 |       # The PNG file is encoded by:
61 |       #   color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
62 |       golden_label = np.dot(np.asarray(golden_label), [1, 256, 256 * 256])
63 | 
64 |     np.testing.assert_array_equal(panoptic_label, golden_label)
65 | 
66 | if __name__ == '__main__':
67 |   tf.test.main()
68 | 


--------------------------------------------------------------------------------
/data/data_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for data_utils."""
17 | 
18 | import io
19 | import numpy as np
20 | from PIL import Image
21 | import tensorflow as tf
22 | 
23 | from deeplab2.data import data_utils
24 | 
25 | 
26 | def _encode_png_image(image):
27 |   """Helper method to encode input image in PNG format."""
28 |   buffer = io.BytesIO()
29 |   Image.fromarray(image).save(buffer, format='png')
30 |   return buffer.getvalue()
31 | 
32 | 
33 | class DataUtilsTest(tf.test.TestCase):
34 | 
35 |   def _create_test_image(self, height, width):
36 |     rng = np.random.RandomState(319281498)
37 |     return rng.randint(0, 255, size=(height, width, 3), dtype=np.uint8)
38 | 
39 |   def test_encode_and_decode(self):
40 |     """Checks decode created tf.Example for semantic segmentation."""
41 |     test_image_height = 20
42 |     test_image_width = 15
43 |     filename = 'dummy'
44 | 
45 |     image = self._create_test_image(test_image_height, test_image_width)
46 |     # Take the last channel as dummy label.
47 |     label = image[..., 0]
48 | 
49 |     example = data_utils.create_tfexample(
50 |         image_data=_encode_png_image(image),
51 |         image_format='png', filename=filename,
52 |         label_data=_encode_png_image(label), label_format='png')
53 | 
54 |     # Parse created example, expect getting identical results.
55 |     parser = data_utils.SegmentationDecoder(is_panoptic_dataset=False)
56 |     parsed_tensors = parser(example.SerializeToString())
57 | 
58 |     self.assertIn('image', parsed_tensors)
59 |     self.assertIn('image_name', parsed_tensors)
60 |     self.assertIn('label', parsed_tensors)
61 |     self.assertEqual(filename, parsed_tensors['image_name'])
62 |     np.testing.assert_array_equal(image, parsed_tensors['image'].numpy())
63 |     # Decoded label is a 3-D array with last dimension of 1.
64 |     decoded_label = parsed_tensors['label'].numpy()
65 |     np.testing.assert_array_equal(label, decoded_label[..., 0])
66 | 
67 |   def test_encode_and_decode_panoptic(self):
68 |     test_image_height = 31
69 |     test_image_width = 17
70 |     filename = 'dummy'
71 | 
72 |     image = self._create_test_image(test_image_height, test_image_width)
73 |     # Create dummy panoptic label in np.int32 dtype.
74 |     label = np.dot(image.astype(np.int32), [1, 256, 256 * 256]).astype(np.int32)
75 |     example = data_utils.create_tfexample(
76 |         image_data=_encode_png_image(image),
77 |         image_format='png', filename=filename,
78 |         label_data=label.tostring(), label_format='raw')
79 | 
80 |     parser = data_utils.SegmentationDecoder(is_panoptic_dataset=True)
81 |     parsed_tensors = parser(example.SerializeToString())
82 | 
83 |     self.assertIn('image', parsed_tensors)
84 |     self.assertIn('image_name', parsed_tensors)
85 |     self.assertIn('label', parsed_tensors)
86 |     self.assertEqual(filename, parsed_tensors['image_name'])
87 |     np.testing.assert_array_equal(image, parsed_tensors['image'].numpy())
88 |     # Decoded label is a 3-D array with last dimension of 1.
89 |     decoded_label = parsed_tensors['label'].numpy()
90 |     np.testing.assert_array_equal(label, decoded_label[..., 0])
91 | 
92 | 
93 | if __name__ == '__main__':
94 |   tf.test.main()
95 | 


--------------------------------------------------------------------------------
/data/dataloader/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/data/dataset_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """This file contains utility function for handling the dataset."""
17 | 
18 | import tensorflow as tf
19 | 
20 | 
21 | def get_semantic_and_panoptic_label(dataset_info, label, ignore_label):
22 |   """Helper function to get semantic and panoptic label from panoptic label.
23 | 
24 |   This functions gets the semantic and panoptic label from panoptic label for
25 |   different datasets. The labels must be encoded with semantic_label *
26 |   label_divisor + instance_id. For thing classes, the instance ID 0 is reserved
27 |   for crowd regions. Please note, the returned panoptic label has replaced
28 |   the crowd region with ignore regions. Yet, the semantic label makes use of
29 |   these regions.
30 | 
31 |   Args:
32 |     dataset_info: A dictionary storing dataset information.
33 |     label: A Tensor of panoptic label.
34 |     ignore_label: An integer specifying the ignore_label.
35 | 
36 |   Returns:
37 |     semantic_label: A Tensor of semantic segmentation label.
38 |     panoptic_label: A Tensor of panoptic segmentation label, which follows the
39 |       Cityscapes annotation where
40 |       panoptic_label = semantic_label * panoptic_label_divisor + instance_id.
41 |     thing_mask: A boolean Tensor specifying the thing regions. Zero if no thing.
42 |     crowd_region: A boolean Tensor specifying crowd region. Zero if no crowd
43 |       annotation.
44 | 
45 |   Raises:
46 |     ValueError: An error occurs when the ignore_label is not in range
47 |       [0, label_divisor].
48 |   """
49 |   panoptic_label_divisor = dataset_info['panoptic_label_divisor']
50 |   if ignore_label >= panoptic_label_divisor or ignore_label < 0:
51 |     raise ValueError('The ignore_label must be in [0, label_divisor].')
52 | 
53 |   semantic_label = label // panoptic_label_divisor
54 |   # Find iscrowd region if any and set to ignore for panoptic labels.
55 |   # 1. Find thing mask.
56 |   thing_mask = tf.zeros_like(semantic_label, tf.bool)
57 |   for thing_id in dataset_info['class_has_instances_list']:
58 |     thing_mask = tf.logical_or(
59 |         thing_mask,
60 |         tf.equal(semantic_label, thing_id))
61 |   # 2. Find crowd region (thing label that have instance_id == 0).
62 |   crowd_region = tf.logical_and(
63 |       thing_mask,
64 |       tf.equal(label % panoptic_label_divisor, 0))
65 |   # 3. Set crowd region to ignore label.
66 |   panoptic_label = tf.where(
67 |       crowd_region,
68 |       tf.ones_like(label) * ignore_label * panoptic_label_divisor,
69 |       label)
70 | 
71 |   return semantic_label, panoptic_label, thing_mask, crowd_region
72 | 


--------------------------------------------------------------------------------
/data/dataset_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for dataset_utils."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from deeplab2.data import dataset_utils
22 | 
23 | 
24 | class DatasetUtilsTest(tf.test.TestCase):
25 | 
26 |   def _get_test_labels(self, num_classes, shape, label_divisor):
27 |     num_ids_per_class = 35
28 |     semantic_labels = np.random.randint(num_classes, size=shape)
29 |     panoptic_labels = np.random.randint(
30 |         num_ids_per_class, size=shape) + semantic_labels * label_divisor
31 | 
32 |     semantic_labels = tf.convert_to_tensor(semantic_labels, dtype=tf.int32)
33 |     panoptic_labels = tf.convert_to_tensor(panoptic_labels, dtype=tf.int32)
34 | 
35 |     return panoptic_labels, semantic_labels
36 | 
37 |   def setUp(self):
38 |     super().setUp()
39 |     self._first_thing_class = 9
40 |     self._num_classes = 19
41 |     self._dataset_info = {
42 |         'panoptic_label_divisor': 1000,
43 |         'class_has_instances_list': tf.range(self._first_thing_class,
44 |                                              self._num_classes)
45 |     }
46 |     self._num_ids = 37
47 |     self._labels, self._semantic_classes = self._get_test_labels(
48 |         self._num_classes, [2, 33, 33],
49 |         self._dataset_info['panoptic_label_divisor'])
50 | 
51 |   def test_get_panoptic_and_semantic_label(self):
52 |     # Note: self._labels contains one crowd instance per class.
53 |     (returned_sem_labels, returned_pan_labels, returned_thing_mask,
54 |      returned_crowd_region) = (
55 |          dataset_utils.get_semantic_and_panoptic_label(
56 |              self._dataset_info, self._labels, ignore_label=255))
57 | 
58 |     expected_semantic_labels = self._semantic_classes
59 |     condition = self._labels % self._dataset_info['panoptic_label_divisor'] == 0
60 |     condition = tf.logical_and(
61 |         condition,
62 |         tf.math.greater_equal(expected_semantic_labels,
63 |                               self._first_thing_class))
64 |     expected_crowd_labels = tf.where(condition, 1.0, 0.0)
65 |     expected_pan_labels = tf.where(
66 |         condition, 255 * self._dataset_info['panoptic_label_divisor'],
67 |         self._labels)
68 |     expected_thing_mask = tf.where(
69 |         tf.math.greater_equal(expected_semantic_labels,
70 |                               self._first_thing_class), 1.0, 0.0)
71 | 
72 |     self.assertListEqual(returned_sem_labels.shape.as_list(),
73 |                          expected_semantic_labels.shape.as_list())
74 |     self.assertListEqual(returned_pan_labels.shape.as_list(),
75 |                          expected_pan_labels.shape.as_list())
76 |     self.assertListEqual(returned_crowd_region.shape.as_list(),
77 |                          expected_crowd_labels.shape.as_list())
78 |     self.assertListEqual(returned_thing_mask.shape.as_list(),
79 |                          expected_thing_mask.shape.as_list())
80 |     np.testing.assert_equal(returned_sem_labels.numpy(),
81 |                             expected_semantic_labels.numpy())
82 |     np.testing.assert_equal(returned_pan_labels.numpy(),
83 |                             expected_pan_labels.numpy())
84 |     np.testing.assert_equal(returned_crowd_region.numpy(),
85 |                             expected_crowd_labels.numpy())
86 |     np.testing.assert_equal(returned_thing_mask.numpy(),
87 |                             expected_thing_mask.numpy())
88 | 
89 | if __name__ == '__main__':
90 |   tf.test.main()
91 | 


--------------------------------------------------------------------------------
/data/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_policy.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """AutoAugment policy file.
17 | 
18 | This file contains found auto-augment policy.
19 | 
20 | Please cite or refer to the following papers for details:
21 | - Ekin D Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, and Quoc V Le.
22 | "Autoaugment: Learning augmentation policies from data." In CVPR, 2019.
23 | 
24 | - Ekin D Cubuk, Barret Zoph, Jonathon Shlens, and Quoc V Le.
25 | "Randaugment: Practical automated data augmentation with a reduced search
26 | space." In CVPR, 2020.
27 | """
28 | 
29 | # Reduced augmentation operation space.
30 | augmentation_reduced_operations = (
31 |     'AutoContrast', 'Equalize', 'Invert', 'Posterize',
32 |     'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness')
33 | 
34 | augmentation_probabilities = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
35 | 
36 | 
37 | def convert_policy(policy,
38 |                    search_space=augmentation_reduced_operations,
39 |                    probability_scale=1.0,
40 |                    magnitude_scale=1):
41 |   """Converts policy from a list of numbers."""
42 |   if len(policy) % 6:
43 |     raise ValueError('Policy length must be a multiple of 6.')
44 |   num_policies = len(policy) // 6
45 |   policy_list = [[] for _ in range(num_policies)]
46 |   for n in range(num_policies):
47 |     for i in range(2):
48 |       operation_id, prob_id, magnitude = (
49 |           policy[6 * n + i * 3 : 6 * n + (i + 1) * 3])
50 |       policy_name = search_space[operation_id]
51 |       policy_prob = (
52 |           augmentation_probabilities[prob_id] * probability_scale)
53 |       policy_list[n].append((policy_name,
54 |                              policy_prob,
55 |                              magnitude * magnitude_scale))
56 |   return policy_list
57 | 
58 | 
59 | simple_classification_policy = [8, 2, 7, 7, 1, 10,
60 |                                 1, 0, 9, 6, 1, 10,
61 |                                 8, 1, 9, 5, 1, 9,
62 |                                 4, 1, 7, 1, 3, 9,
63 |                                 8, 1, 1, 1, 1, 7]
64 | 
65 | # All available policies.
66 | available_policies = {
67 |     'simple_classification_policy_magnitude_scale_0.2': convert_policy(
68 |         simple_classification_policy,
69 |         augmentation_reduced_operations,
70 |         magnitude_scale=0.2),
71 |     'simple_classification_policy': convert_policy(
72 |         simple_classification_policy,
73 |         augmentation_reduced_operations,
74 |         magnitude_scale=1),
75 | }
76 | 


--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_policy_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for autoaugment_policy.py."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.data.preprocessing import autoaugment_policy
21 | 
22 | 
23 | class AutoaugmentPolicyTest(tf.test.TestCase):
24 | 
25 |   def testConvertPolicy(self):
26 |     policy = [5, 1, 10, 5, 3, 4,
27 |               6, 3, 7, 3, 3, 9,
28 |               2, 2, 8, 8, 2, 8,
29 |               1, 4, 9, 4, 5, 7,
30 |               6, 4, 1, 1, 3, 4]
31 |     expected = [
32 |         [('Color', 0.2, 10), ('Color', 0.6, 4)],
33 |         [('Contrast', 0.6, 7), ('Posterize', 0.6, 9)],
34 |         [('Invert', 0.4, 8), ('Sharpness', 0.4, 8)],
35 |         [('Equalize', 0.8, 9), ('Solarize', 1.0, 7)],
36 |         [('Contrast', 0.8, 1), ('Equalize', 0.6, 4)],
37 |     ]
38 |     policy_list = autoaugment_policy.convert_policy(policy)
39 |     self.assertAllEqual(policy_list, expected)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |   tf.test.main()
44 | 


--------------------------------------------------------------------------------
/data/preprocessing/autoaugment_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for autoaugment_utils.py."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from deeplab2.data.preprocessing import autoaugment_utils
22 | 
23 | 
24 | class AutoaugmentUtilsTest(tf.test.TestCase):
25 | 
26 |   def testAugmentWithNamedPolicy(self):
27 |     num_classes = 3
28 |     np_image = np.random.randint(256, size=(13, 13, 3))
29 |     image = tf.constant(np_image, dtype=tf.uint8)
30 |     np_label = np.random.randint(num_classes, size=(13, 13, 1))
31 |     label = tf.constant(np_label, dtype=tf.int32)
32 |     image, label = autoaugment_utils.distort_image_with_autoaugment(
33 |         image, label, ignore_label=255,
34 |         augmentation_name='simple_classification_policy')
35 |     self.assertTrue(image.numpy().any())
36 |     self.assertTrue(label.numpy().any())
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   tf.test.main()
41 | 


--------------------------------------------------------------------------------
/data/testdata/dummy_gt_for_vps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_gt_for_vps.png


--------------------------------------------------------------------------------
/data/testdata/dummy_prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/dummy_prediction.png


--------------------------------------------------------------------------------
/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "annotations": [
 3 |     {
 4 |       "file_name": "dummy_000000_000000_gtFine_panoptic.png",
 5 |       "image_id": "dummy_000000_000000",
 6 |       "segments_info": [
 7 |         {
 8 |           "area": 958,
 9 |           "category_id": 13,
10 |           "id": 26000,
11 |           "iscrowd": 0
12 |         },
13 |         {
14 |           "area": 6178,
15 |           "category_id": 13,
16 |           "id": 26,
17 |           "iscrowd": 1
18 |         },
19 |         {
20 |           "area": 10496,
21 |           "category_id": 13,
22 |           "id": 26001,
23 |           "iscrowd": 0
24 |         },
25 |         {
26 |           "area": 5534,
27 |           "category_id": 13,
28 |           "id": 26002,
29 |           "iscrowd": 0
30 |         },
31 |         {
32 |           "area": 32768,
33 |           "category_id": 13,
34 |           "id": 26003,
35 |           "iscrowd": 0
36 |         },
37 |         {
38 |           "area": 19906,
39 |           "category_id": 13,
40 |           "id": 26004,
41 |           "iscrowd": 0
42 |         },
43 |         {
44 |           "area": 15940,
45 |           "category_id": 8,
46 |           "id": 21,
47 |           "iscrowd": 0
48 |         },
49 |         {
50 |           "area": 278754,
51 |           "category_id": 10,
52 |           "id": 23,
53 |           "iscrowd": 0
54 |         },
55 |         {
56 |           "area": 222420,
57 |           "category_id": 2,
58 |           "id": 11,
59 |           "iscrowd": 0
60 |         },
61 |         {
62 |           "area": 46475,
63 |           "category_id": 0,
64 |           "id": 7,
65 |           "iscrowd": 0
66 |         }
67 |       ]
68 |     }
69 |   ]
70 | }


--------------------------------------------------------------------------------
/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/gtFine/cityscapes_panoptic_dummy_trainId/dummy_000000_000000_gtFine_panoptic.png


--------------------------------------------------------------------------------
/data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/leftImg8bit/dummy_000000_000000_leftImg8bit.png


--------------------------------------------------------------------------------
/data/testdata/targets/center_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_target.png


--------------------------------------------------------------------------------
/data/testdata/targets/center_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/center_weights.png


--------------------------------------------------------------------------------
/data/testdata/targets/eval_is_crowd.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_is_crowd.npy


--------------------------------------------------------------------------------
/data/testdata/targets/eval_panoptic_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_panoptic_target.npy


--------------------------------------------------------------------------------
/data/testdata/targets/eval_semantic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/eval_semantic_target.png


--------------------------------------------------------------------------------
/data/testdata/targets/is_crowd.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/is_crowd.npy


--------------------------------------------------------------------------------
/data/testdata/targets/offset_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_target.npy


--------------------------------------------------------------------------------
/data/testdata/targets/offset_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/offset_weights.png


--------------------------------------------------------------------------------
/data/testdata/targets/panoptic_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.npy


--------------------------------------------------------------------------------
/data/testdata/targets/panoptic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/panoptic_target.png


--------------------------------------------------------------------------------
/data/testdata/targets/rgb_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/rgb_target.png


--------------------------------------------------------------------------------
/data/testdata/targets/semantic_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_target.png


--------------------------------------------------------------------------------
/data/testdata/targets/semantic_weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/semantic_weights.png


--------------------------------------------------------------------------------
/data/testdata/targets/thing_id_class_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_class_target.npy


--------------------------------------------------------------------------------
/data/testdata/targets/thing_id_mask_target.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/data/testdata/targets/thing_id_mask_target.npy


--------------------------------------------------------------------------------
/data/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/data/waymo_constants.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Meta info of Waymo Open Dataset: Panoramic Video Panoptic Segmentation.
 17 | 
 18 | Dataset website: https://waymo.com/open/
 19 | GitHub: https://github.com/waymo-research/waymo-open-dataset
 20 | 
 21 | References:
 22 | 
 23 | - Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu,
 24 | Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open Dataset:
 25 | Panoramic Video Panoptic Segmentation." In ECCV, 2022.
 26 | """
 27 | 
 28 | from typing import Any, Sequence
 29 | 
 30 | import immutabledict
 31 | 
 32 | COLORMAP = "waymo"
 33 | PANOPTIC_LABEL_DIVISOR = 100000
 34 | 
 35 | IGNORE_LABEL_NAME = "unknown"
 36 | IGNORE_LABEL = 0
 37 | 
 38 | _WAYMO_COLORS = immutabledict.immutabledict({
 39 |     "bicycle": [119, 11, 32],
 40 |     "bird": [127, 96, 0],
 41 |     "building": [70, 70, 70],
 42 |     "bus": [0, 60, 100],
 43 |     "car": [0, 0, 142],
 44 |     "construction_cone_pole": [230, 145, 56],
 45 |     "cyclist": [255, 0, 0],
 46 |     "dynamic": [102, 102, 102],
 47 |     "ground": [102, 102, 102],
 48 |     "ground_animal": [91, 15, 0],
 49 |     "lane_marker": [234, 209, 220],
 50 |     "motorcycle": [0, 0, 230],
 51 |     "motorcyclist": [180, 0, 0],
 52 |     "other_large_vehicle": [61, 133, 198],
 53 |     "other_pedestrian_object": [234, 153, 153],
 54 |     "person": [220, 20, 60],
 55 |     "pole": [153, 153, 153],
 56 |     "road": [128, 64, 128],
 57 |     "road_marker": [217, 210, 233],
 58 |     "sdc": [102, 102, 102],
 59 |     "sidewalk": [244, 35, 232],
 60 |     "sign": [246, 178, 107],
 61 |     "sky": [70, 130, 180],
 62 |     "static": [102, 102, 102],
 63 |     "traffic_light": [250, 170, 30],
 64 |     "trailer": [111, 168, 220],
 65 |     "truck": [0, 0, 70],
 66 |     "unknown": [102, 102, 102],
 67 |     "vegetation": [107, 142, 35],
 68 | })
 69 | 
 70 | _WAYMO_CLASS_NAMES = [
 71 |     "unknown",
 72 |     "sdc",
 73 |     "car",
 74 |     "truck",
 75 |     "bus",
 76 |     "other_large_vehicle",
 77 |     "bicycle",
 78 |     "motorcycle",
 79 |     "trailer",
 80 |     "person",
 81 |     "cyclist",
 82 |     "motorcyclist",
 83 |     "bird",
 84 |     "ground_animal",
 85 |     "construction_cone_pole",
 86 |     "pole",
 87 |     "other_pedestrian_object",
 88 |     "sign",
 89 |     "traffic_light",
 90 |     "building",
 91 |     "road",
 92 |     "lane_marker",
 93 |     "road_marker",
 94 |     "sidewalk",
 95 |     "vegetation",
 96 |     "sky",
 97 |     "ground",
 98 |     "dynamic",
 99 |     "static",
100 | ]
101 | 
102 | _IS_THINGS = [
103 |     "car", "truck", "bus", "other_large_vehicle", "trailer", "person",
104 |     "cyclist", "motorcyclist"
105 | ]
106 | 
107 | 
108 | def get_waymo_meta() -> Sequence[Any]:
109 |   """Gets the meta info for waymo dataset."""
110 |   meta = []
111 |   for name_id, name in enumerate(_WAYMO_CLASS_NAMES):
112 |     item = {
113 |         "color": _WAYMO_COLORS[name],
114 |         "name": name,
115 |         "id": name_id,
116 |         "isthing": int(name in _IS_THINGS)
117 |     }
118 |     meta.append(item)
119 |   return meta
120 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/evaluation/depth_metrics_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for depth metrics."""
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from deeplab2.evaluation import depth_metrics
21 | 
22 | 
23 | class DepthMetricsTest(tf.test.TestCase):
24 | 
25 |   def test_depth_metrics_on_single_image(self):
26 |     gt = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114],
27 |                    [0, 39.68081126, 58.77974067, 0],
28 |                    [40.57883826, 22.15134852, 31.46813478, 13.52603324]])
29 |     pred = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195],
30 |                      [13.76178147, 41.62431592, 56.97362032, 81.48369608],
31 |                      [43.12005689, 15.54622258, 24.1993478, 12.14451783]])
32 |     depth_obj = depth_metrics.DepthMetrics()
33 |     depth_obj.update_state(gt, pred)
34 |     result = depth_obj.result().numpy()
35 |     # The following numbers are manually computed.
36 |     self.assertAlmostEqual(result[0], 14.154233, places=4)
37 |     self.assertAlmostEqual(result[1], 0.0268667, places=4)
38 |     self.assertAlmostEqual(result[2], 0.13191505, places=4)
39 |     self.assertAlmostEqual(result[3], 0.7, places=4)
40 | 
41 |   def test_depth_metrics_on_multiple_images(self):
42 |     depth_obj = depth_metrics.DepthMetrics()
43 |     gt_1 = np.array([[5.44108091, 53.30197697, 61.06181767, 14.36723114],
44 |                      [0, 39.68081126, 58.77974067, 0],
45 |                      [40.57883826, 22.15134852, 31.46813478, 13.52603324]])
46 |     pred_1 = np.array([[4.87694111, 50.09085582, 55.74533641, 10.13579195],
47 |                        [13.76178147, 41.62431592, 56.97362032, 81.48369608],
48 |                        [43.12005689, 15.54622258, 24.1993478, 12.14451783]])
49 |     depth_obj.update_state(gt_1, pred_1)
50 |     gt_2 = np.array(
51 |         [[79.56192404, 25.68145225, 0, 39.88486608, 68.91602466],
52 |          [79.53460057, 2.55741031, 36.05057241, 68.04747416, 3.7783227],
53 |          [0, 0, 72.47336778, 59.02611644, 66.07499008],
54 |          [25.88578395, 58.2202574, 27.39066477, 29.83094038, 37.99239669]])
55 |     pred_2 = np.array(
56 |         [[83.80952145, 27.23367361, 72.52687468, 35.28400183, 72.41126444],
57 |          [77.62373864, 0.87004049, 32.1619225, 66.91361903, 2.60688436],
58 |          [15.30294603, 9.76419241, 68.61650198, 57.14559324, 66.88452603],
59 |          [24.54818109, 61.60855251, 31.50312052, 26.02325866, 36.4019569]])
60 |     depth_obj.update_state(gt_2, pred_2)
61 |     gt_3 = np.array([[50.80100791, 0.41130084, 58.85031668],
62 |                      [29.44932853, 23.48806627, 30.17890056]])
63 |     pred_3 = np.array([[49.66563966, 0.62070026, 58.84231026],
64 |                        [32.26735775, 28.07405648, 33.7131882]])
65 |     depth_obj.update_state(gt_3, pred_3)
66 |     result = depth_obj.result().numpy()
67 |     # The following numbers are manually computed.
68 |     self.assertAlmostEqual(result[0], 18.442057, places=4)
69 |     self.assertAlmostEqual(result[1], 0.0388692, places=4)
70 |     self.assertAlmostEqual(result[2], 0.13392223, places=4)
71 |     self.assertAlmostEqual(result[3], 0.8052287, places=4)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   tf.test.main()
76 | 


--------------------------------------------------------------------------------
/evaluation/numpy/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/evaluation/test_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for test_utils."""
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from deeplab2.evaluation import test_utils
21 | 
22 | 
23 | class TestUtilsTest(tf.test.TestCase):
24 | 
25 |   def test_read_test_image(self):
26 |     image_array = test_utils.read_test_image('team_pred_class.png')
27 |     self.assertSequenceEqual(image_array.shape, (231, 345, 4))
28 | 
29 |   def test_reads_segmentation_with_color_map(self):
30 |     rgb_to_semantic_label = {(0, 0, 0): 0, (0, 0, 255): 1, (255, 0, 0): 23}
31 |     labels = test_utils.read_segmentation_with_rgb_color_map(
32 |         'team_pred_class.png', rgb_to_semantic_label)
33 | 
34 |     input_image = test_utils.read_test_image('team_pred_class.png')
35 |     np.testing.assert_array_equal(
36 |         labels == 0,
37 |         np.logical_and(input_image[:, :, 0] == 0, input_image[:, :, 2] == 0))
38 |     np.testing.assert_array_equal(labels == 1, input_image[:, :, 2] == 255)
39 |     np.testing.assert_array_equal(labels == 23, input_image[:, :, 0] == 255)
40 | 
41 |   def test_reads_gt_segmentation(self):
42 |     instance_label_to_semantic_label = {
43 |         0: 0,
44 |         47: 1,
45 |         97: 1,
46 |         133: 1,
47 |         150: 1,
48 |         174: 1,
49 |         198: 23,
50 |         215: 1,
51 |         244: 1,
52 |         255: 1,
53 |     }
54 |     instances, classes = test_utils.panoptic_segmentation_with_class_map(
55 |         'team_gt_instance.png', instance_label_to_semantic_label)
56 | 
57 |     expected_label_shape = (231, 345)
58 |     self.assertSequenceEqual(instances.shape, expected_label_shape)
59 |     self.assertSequenceEqual(classes.shape, expected_label_shape)
60 |     np.testing.assert_array_equal(instances == 0, classes == 0)
61 |     np.testing.assert_array_equal(instances == 198, classes == 23)
62 |     np.testing.assert_array_equal(
63 |         np.logical_and(instances != 0, instances != 198), classes == 1)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |   tf.test.main()
68 | 


--------------------------------------------------------------------------------
/evaluation/testdata/README.md:
--------------------------------------------------------------------------------
 1 | # Segmentation Evalaution Test Data
 2 | 
 3 | ## Source Images
 4 | 
 5 | *   [team_input.png](team_input.png) \
 6 |     Source:
 7 |     https://ai.googleblog.com/2018/03/semantic-image-segmentation-with.html
 8 | *   [cat_input.jpg](cat_input.jpg) \
 9 |     Source: https://www.flickr.com/photos/magdalena_b/4995858743
10 | *   [bird_input.jpg](bird_input.jpg) \
11 |     Source: https://www.flickr.com/photos/chivinskia/40619099560
12 | 


--------------------------------------------------------------------------------
/evaluation/testdata/bird_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_gt.png


--------------------------------------------------------------------------------
/evaluation/testdata/bird_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_class.png


--------------------------------------------------------------------------------
/evaluation/testdata/bird_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/bird_pred_instance.png


--------------------------------------------------------------------------------
/evaluation/testdata/cat_gt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_gt.png


--------------------------------------------------------------------------------
/evaluation/testdata/cat_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_class.png


--------------------------------------------------------------------------------
/evaluation/testdata/cat_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/cat_pred_instance.png


--------------------------------------------------------------------------------
/evaluation/testdata/team_gt_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_gt_instance.png


--------------------------------------------------------------------------------
/evaluation/testdata/team_pred_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_class.png


--------------------------------------------------------------------------------
/evaluation/testdata/team_pred_instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/evaluation/testdata/team_pred_instance.png


--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/axial_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/axial_block.png


--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/nonlocal_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/nonlocal_block.png


--------------------------------------------------------------------------------
/g3doc/img/axial_deeplab/position_sensitive_axial_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/axial_deeplab/position_sensitive_axial_block.png


--------------------------------------------------------------------------------
/g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/clustering_view_of_mask_transformer.png


--------------------------------------------------------------------------------
/g3doc/img/kmax_deeplab/kmax_decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/kmax_deeplab/kmax_decoder.png


--------------------------------------------------------------------------------
/g3doc/img/max_deeplab/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview.png


--------------------------------------------------------------------------------
/g3doc/img/max_deeplab/overview_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/max_deeplab/overview_simple.png


--------------------------------------------------------------------------------
/g3doc/img/moat/moat_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_block.png


--------------------------------------------------------------------------------
/g3doc/img/moat/moat_imagenet1k_224.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet1k_224.png


--------------------------------------------------------------------------------
/g3doc/img/moat/moat_imagenet22k_384.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/moat/moat_imagenet22k_384.png


--------------------------------------------------------------------------------
/g3doc/img/panoptic_deeplab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/panoptic_deeplab.png


--------------------------------------------------------------------------------
/g3doc/img/step/kitti_step_annotation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/step/kitti_step_annotation.png


--------------------------------------------------------------------------------
/g3doc/img/vip_deeplab/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/deeplab2/64bc1c1a6505b6cd1779b65f9127180840b8a4fa/g3doc/img/vip_deeplab/demo.gif


--------------------------------------------------------------------------------
/g3doc/projects/wod_pvps.md:
--------------------------------------------------------------------------------
 1 | # Panoramic Video Panoptic Segmentation
 2 | 
 3 | Waymo Open Dataset: Panoramic Video Panoptic Segmentation (WOD-PVPS) [1], is a
 4 | large-scale dataset that offers high-quality multi-camera video panoptic
 5 | segmentation labels for autonomous driving. The labels are consistent over time
 6 | for video processing and consistent across multiple cameras mounted on the
 7 | vehicles for full panoramic scene understanding.
 8 | 
 9 | The new task of Panoramic Video Panoptic Segmentation requires generating dense
10 | panoptic segmentation predictions consistent in both time and multi-cameras. To
11 | build a baseline for such a challenging task, we extend the ViP-DeepLab [2] to
12 | the multi-camera setting.
13 | 
14 | ## Prerequisite
15 | 
16 | 1.  Make sure the software is properly [installed](../setup/installation.md).
17 | 
18 | 2.  Make sure the
19 |     [target dataset](https://waymo.com/open/data/perception/#2d-video-panoptic-segmentation)
20 |     is correctly prepared.
21 | 
22 | ## Model Zoo
23 | 
24 | ## Citing WOD-PVPS
25 | 
26 | If you find this code helpful in your research or wish to refer to the baseline
27 | results, please use the following BibTeX entry.
28 | 
29 | *   Waymo Open Dataset: Panoramic Video Panoptic Segmentation:
30 | 
31 | ```
32 | @article{mei2022waymo,
33 |   title={Waymo Open Dataset: Panoramic Video Panoptic Segmentation},
34 |   author={Mei, Jieru and Zhu, Alex Zihao and Yan, Xinchen and Yan, Hang and Qiao, Siyuan and Zhu, Yukun and Chen, Liang-Chieh and Kretzschmar, Henrik and Anguelov, Dragomir},
35 |   journal={arXiv preprint arXiv:2206.07704},
36 |   year={2022}
37 | }
38 | 
39 | ```
40 | 
41 | *   ViP-DeepLab:
42 | 
43 | ```
44 | @inproceedings{vip_deeplab_2021,
45 |   author={Siyuan Qiao and Yukun Zhu and Hartwig Adam and Alan Yuille and Liang-Chieh Chen},
46 |   title={{ViP-DeepLab}: Learning Visual Perception with Depth-aware Video Panoptic Segmentation},
47 |   booktitle={CVPR},
48 |   year={2021}
49 | }
50 | 
51 | ```
52 | 
53 | *   Panoptic-DeepLab:
54 | 
55 | ```
56 | @inproceedings{panoptic_deeplab_2020,
57 |   author={Bowen Cheng and Maxwell D Collins and Yukun Zhu and Ting Liu and Thomas S Huang and Hartwig Adam and Liang-Chieh Chen},
58 |   title={{Panoptic-DeepLab}: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation},
59 |   booktitle={CVPR},
60 |   year={2020}
61 | }
62 | 
63 | ```
64 | 
65 | ### References
66 | 
67 | 1.  Jieru Mei, Alex Zihao Zhu, Xinchen Yan, Hang Yan, Siyuan Qiao, Yukun Zhu,
68 |     Liang-Chieh Chen, Henrik Kretzschmar, Dragomir Anguelov. "Waymo Open
69 |     Dataset: Panoramic Video Panoptic Segmentation." In arXiv: 2206.07704, 2022.
70 | 
71 | 2.  Siyuan Qiao, Yukun Zhu, Hartwig Adam, Alan Yuille, and Liang-Chieh Chen.
72 |     "ViP-DeepLab: Learning Visual Perception with Depth-aware Video Panoptic
73 |     Segmentation." In CVPR, 2021.
74 | 


--------------------------------------------------------------------------------
/g3doc/setup/ade20k.md:
--------------------------------------------------------------------------------
 1 | # Run DeepLab2 on ADE20K dataset
 2 | 
 3 | This page walks through the steps required to generate
 4 | [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/) panoptic
 5 | segmentation data for DeepLab2.
 6 | 
 7 | ## Prework
 8 | 
 9 | Before running any Deeplab2 scripts, the users should (1) access the
10 | [ADE20K dataset website](https://groups.csail.mit.edu/vision/datasets/ADE20K/)
11 | to download the dataset, and (2) prepare the panoptic annotation using
12 | [Mask2Former's script](https://github.com/facebookresearch/Mask2Former/blob/main/datasets/prepare_ade20k_pan_seg.py).
13 | 
14 | After finishing above steps, the expected directory structure should be as
15 | follows:
16 | 
17 | ```
18 | .(ADE20K_ROOT)
19 | +-- images
20 | |
21 | |-- annotations
22 | |
23 | |-- objectInfo150.txt
24 | |
25 | |-- annotations_instance
26 | |
27 | |-- ade20k_panoptic_{train,val}.json
28 | |
29 | +-- ade20k_panoptic_{train,val}
30 | ```
31 | 
32 | ## Convert prepared dataset to TFRecord
33 | 
34 | Use the following commandline to generate ADE20K TFRecords:
35 | 
36 | ```bash
37 | # For generating data for panoptic segmentation task
38 | python deeplab2/data/build_ade20k_data.py \
39 |   --ade20k_root=${ADE20K_ROOT} \
40 |   --output_dir=${OUTPUT_DIR}
41 | ```
42 | 
43 | Commandline above will output two sharded tfrecord files:
44 | `{train|val}@1000.tfrecord`. In the tfrecords, for `train` and `val` set, it
45 | contains the RGB image pixels as well as corresponding annotations. These files
46 | will be used as the input for the model training and evaluation.
47 | 
48 | ### TFExample proto format for ADE20K
49 | 
50 | The Example proto contains the following fields:
51 | 
52 | *   `image/encoded`: encoded image content.
53 | *   `image/filename`: image filename.
54 | *   `image/format`: image file format.
55 | *   `image/height`: image height.
56 | *   `image/width`: image width.
57 | *   `image/channels`: image channels.
58 | *   `image/segmentation/class/encoded`: encoded segmentation content.
59 | *   `image/segmentation/class/format`: segmentation encoding format.
60 | 
61 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of
62 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is
63 | computed by:
64 | 
65 | ```
66 |   panoptic ID = semantic ID * label divisor + instance ID
67 | ```
68 | 
69 | where semantic ID will be:
70 | 
71 | *   ignore label (0) for pixels not belonging to any segment
72 | *   for segments associated with `iscrowd` label:
73 |     *   (default): ignore label (0)
74 | *   `category_id` for other segments
75 | 
76 | The instance ID will be 0 for pixels belonging to
77 | 
78 | *   `stuff` class
79 | *   `thing` class with `iscrowd` label
80 | *   pixels with ignore label
81 | 
82 | and `[1, label divisor)` otherwise.
83 | 


--------------------------------------------------------------------------------
/g3doc/setup/cityscapes_test_server_evaluation.md:
--------------------------------------------------------------------------------
  1 | # Test Server Evaluation on Cityscapes dataset
  2 | 
  3 | This page walks through the steps required to convert DeepLab2 predictions for
  4 | test server evaluation on [Cityscapes](https://www.cityscapes-dataset.com/).
  5 | 
  6 | A high-level overview of the whole process:
  7 | 
  8 | 1.  Save raw panoptic prediction in the two-channel format.
  9 | 
 10 | 2.  Create images json file.
 11 | 
 12 | 3.  Convert predictions in the two-channel format to the panoptic COCO format.
 13 | 
 14 | 4.  Run local validation set evaluation or prepare test set evaluation.
 15 | 
 16 | We also define some environmental variables for simplicity and convenience:
 17 | 
 18 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all
 19 | checkpoints and results are saved.
 20 | 
 21 | `DATA_ROOT`: where the original Cityscapes dataset is located.
 22 | 
 23 | `PATH_TO_SAVE`: where the converted results should be saved.
 24 | 
 25 | `IMAGES_SPLIT`: *val* or *test* depending on the target split.
 26 | 
 27 | ## Save Raw Panoptic Prediction
 28 | 
 29 | Save the raw panoptic predictions in the
 30 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring
 31 | the following fields are set properly in the textproto config file.
 32 | 
 33 | ```
 34 | eval_dataset_options.decode_groundtruth_label = false
 35 | evaluator_options.save_predictions = true
 36 | evaluator_options.save_raw_predictions = true
 37 | evaluator_options.convert_raw_to_eval_ids = true
 38 | ```
 39 | 
 40 | Then run the model in evaluation modes (with `--mode=eval`), the results will be
 41 | saved at
 42 | 
 43 | *semantic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_semantic/\*.png
 44 | 
 45 | *instance segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_instance/\*
 46 | 
 47 | *panoptic segmentation*: ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png
 48 | 
 49 | ## Create Images JSON
 50 | 
 51 | Create images json file by running the following commands.
 52 | 
 53 | ```bash
 54 | python deeplab2/utils/create_images_json_for_cityscapes.py \
 55 |   --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \
 56 |   --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \
 57 |   --only_basename \
 58 |   --include_image_type_suffix=false
 59 | ```
 60 | 
 61 | ## Convert the Prediction Format
 62 | 
 63 | Convert prediction results saved in the
 64 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the
 65 | panoptic COCO format.
 66 | 
 67 | ```bash
 68 | python panopticapi/converters/2channels2panoptic_coco_format.py \
 69 |   --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \
 70 |   --images_json_file=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json\
 71 |   --categories_json_file=deeplab2/utils/panoptic_cityscapes_categories.json \
 72 |   --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
 73 |   --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json
 74 | ```
 75 | 
 76 | ## Run Local Evaluation Scripts (for *validation* set)
 77 | 
 78 | Run the [official scripts](https://github.com/mcordts/cityscapesScripts) to
 79 | evaluate validation set results.
 80 | 
 81 | For *semantic segmentation*:
 82 | 
 83 | ```bash
 84 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_semantic/ \
 85 | CITYSCAPES_DATASET=${DATA_ROOT} \
 86 | CITYSCAPES_EXPORT_DIR=${PATH_TO_SAVE} \
 87 | python cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py
 88 | ```
 89 | 
 90 | For *instance segmentation*:
 91 | 
 92 | ```bash
 93 | CITYSCAPES_RESULTS=${BASE_MODEL_DIRECTORY}/vis/raw_instance/ \
 94 | CITYSCAPES_DATASET=${DATA_ROOT} \
 95 | python cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py
 96 | ```
 97 | 
 98 | For *panoptic segmentation*:
 99 | 
100 | ```bash
101 | python cityscapesscripts/evaluation/evalPanopticSemanticLabeling.py \
102 |     --prediction-json-file=${PATH_TO_SAVE}/panoptic_cocoformat.json \
103 |     --prediction-folder=${PATH_TO_SAVE}/panoptic_cocoformat \
104 |     --gt-json-file=${DATA_ROOT}/gtFine/cityscapes_panoptic_val.json \
105 |     --gt-folder=${DATA_ROOT}/gtFine/cityscapes_panoptic_val
106 | ```
107 | 
108 | Please note that our prediction fortmat does not support instance segmentation
109 | prediction format yet.
110 | 
111 | ## Prepare Submission Files (for *test* set)
112 | 
113 | Run the following command to prepare a submission file for test server
114 | evaluation.
115 | 
116 | ```bash
117 | zip -r cityscapes_test_submission_semantic.zip ${BASE_MODEL_DIRECTORY}/vis/raw_semantic
118 | zip -r cityscapes_test_submission_instance.zip ${BASE_MODEL_DIRECTORY}/vis/raw_instance
119 | zip -r cityscapes_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json
120 | ```
121 | 


--------------------------------------------------------------------------------
/g3doc/setup/coco.md:
--------------------------------------------------------------------------------
 1 | # Run DeepLab2 on COCO dataset
 2 | 
 3 | This page walks through the steps required to generate
 4 | [COCO](https://cocodataset.org/) panoptic segmentation data for DeepLab2.
 5 | DeepLab2 uses sharded TFRecords for efficient processing of the data.
 6 | 
 7 | ## Prework
 8 | 
 9 | Before running any Deeplab2 scripts, the users should (1) access the
10 | [COCO dataset website](https://cocodataset.org/) to download the dataset,
11 | including [2017 Train images](http://images.cocodataset.org/zips/train2017.zip),
12 | [2017 Val images](http://images.cocodataset.org/zips/val2017.zip),
13 | [2017 Test images](http://images.cocodataset.org/zips/test2017.zip), and
14 | [2017 Panoptic Train/Val annotations](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip),
15 | and (2) unzip the downloaded files.
16 | 
17 | After finishing above steps, the expected directory structure should be as
18 | follows:
19 | 
20 | ```
21 | .(COCO_ROOT)
22 | +-- train2017
23 | |   |
24 | |   +-- *.jpg
25 | |
26 | |-- val2017
27 | |   |
28 | |   +-- *.jpg
29 | |
30 | |-- test2017
31 | |   |
32 | |   +-- *.jpg
33 | |
34 | +-- annotations
35 |      |
36 |      +-- panoptic_{train|val}2017.json
37 |      +-- panoptic_{train|val}2017
38 | ```
39 | 
40 | ## Convert prepared dataset to TFRecord
41 | 
42 | Use the following commandline to generate COCO TFRecords:
43 | 
44 | ```bash
45 | # For generating data for panoptic segmentation task
46 | python deeplab2/data/build_coco_data.py \
47 |   --coco_root=${COCO_ROOT} \
48 |   --output_dir=${OUTPUT_DIR}
49 | ```
50 | 
51 | Commandline above will output three sharded tfrecord files:
52 | `{train|val|test}@1000.tfrecord`. In the tfrecords, for `train` and `val` set,
53 | it contains the RGB image pixels as well as corresponding annotations. For
54 | `test` set, it contains RGB images only. These files will be used as the input
55 | for the model training and evaluation.
56 | 
57 | Note that we map the class ID to continuous IDs. Specifically, we map the
58 | original label ID, which ranges from 1 to 200, to the contiguous ones ranging
59 | from 1 to 133.
60 | 
61 | ### TFExample proto format for COCO
62 | 
63 | The Example proto contains the following fields:
64 | 
65 | *   `image/encoded`: encoded image content.
66 | *   `image/filename`: image filename.
67 | *   `image/format`: image file format.
68 | *   `image/height`: image height.
69 | *   `image/width`: image width.
70 | *   `image/channels`: image channels.
71 | *   `image/segmentation/class/encoded`: encoded segmentation content.
72 | *   `image/segmentation/class/format`: segmentation encoding format.
73 | 
74 | For panoptic segmentation, the encoded segmentation map will be the raw bytes of
75 | an int32 panoptic map, where each pixel is assigned to a panoptic ID, which is
76 | computed by:
77 | 
78 | ```
79 |   panoptic ID = semantic ID * label divisor + instance ID
80 | ```
81 | 
82 | where semantic ID will be:
83 | 
84 | *   ignore label (0) for pixels not belonging to any segment
85 | *   for segments associated with `iscrowd` label:
86 |     *   (default): ignore label (0)
87 |     *   (if set `--treat_crowd_as_ignore=false` while running
88 |         `build_coco_data.py`): `category_id`
89 | *   `category_id` for other segments
90 | 
91 | The instance ID will be 0 for pixels belonging to
92 | 
93 | *   `stuff` class
94 | *   `thing` class with `iscrowd` label
95 | *   pixels with ignore label
96 | 
97 | and `[1, label divisor)` otherwise.
98 | 


--------------------------------------------------------------------------------
/g3doc/setup/coco_test_server_evaluation.md:
--------------------------------------------------------------------------------
 1 | # Test Server Evaluation on COCO dataset
 2 | 
 3 | This page walks through the steps required to convert DeepLab2 predictions for
 4 | test server evaluation on [COCO](https://cocodataset.org/).
 5 | 
 6 | A high-level overview of the whole process:
 7 | 
 8 | 1.  Save raw panoptic prediction in the two-channel format.
 9 | 
10 | 2.  Convert predictions in the two-channel format to the panoptic COCO format.
11 | 
12 | 3.  Run local validation set evaluation or prepare test set evaluation.
13 | 
14 | We also define some environmental variables for simplicity and convenience:
15 | 
16 | `BASE_MODEL_DIRECTORY`: variables set in textproto file, which defines where all
17 | checkpoints and results are saved.
18 | 
19 | `DATA_ROOT`: where the original COCO dataset is located.
20 | 
21 | `PATH_TO_SAVE`: where the converted results should be saved.
22 | 
23 | ## Save Raw Panoptic Prediction
24 | 
25 | Save the raw panoptic predictions in the
26 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) by ensuring
27 | the following fields are set properly in the textproto config file.
28 | 
29 | ```
30 | eval_dataset_options.decode_groundtruth_label = false
31 | evaluator_options.save_predictions = true
32 | evaluator_options.save_raw_predictions = true
33 | evaluator_options.convert_raw_to_eval_ids = true
34 | ```
35 | 
36 | Then run the model in evaluation modes (with `--mode=eval`), and the results
37 | will be saved at ${BASE_MODEL_DIRECTORY}/vis/raw_panoptic/\*.png.
38 | 
39 | ## Convert the Prediction Format
40 | 
41 | Convert prediction results saved in the
42 | [two-channel panoptic format](https://arxiv.org/pdf/1801.00868.pdf) to the
43 | panoptic COCO format.
44 | 
45 | ```bash
46 | python panopticapi/converters/2channels2panoptic_coco_format.py \
47 |   --source_folder=${BASE_MODEL_DIRECTORY}/vis/raw_panoptic \
48 |   --images_json_file=${DATA_ROOT}/annotations/IMG_JSON \
49 |   --categories_json_file=panopticapi/panoptic_coco_categories.json \
50 |   --segmentations_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
51 |   --predictions_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json
52 | ```
53 | 
54 | The `IMG_JSON` refers to `panoptic_val2017.json` for *val* set and
55 | `image_info_test-dev2017.json` for *test-dev* set.
56 | 
57 | ## Run Local Evaluation Scripts (for *validation* set)
58 | 
59 | Run the [official scripts](https://github.com/cocodataset/panopticapi) to
60 | evaluate validation set results.
61 | 
62 | ```bash
63 | python panopticapi/evaluation.py \
64 |     --pred_json_file=${PATH_TO_SAVE}/panoptic_cocoformat.json \
65 |     --pred_folder=${PATH_TO_SAVE}/panoptic_cocoformat \
66 |     --gt_json_file=${DATA_ROOT}/annotations/panoptic_val2017.json \
67 |     --gt_folder=${DATA_ROOT}/annotations/panoptic_val2017
68 | ```
69 | 
70 | ## Prepare Submission Files (for *test* set)
71 | 
72 | Run the following command to prepare a submission file for test server
73 | evaluation.
74 | 
75 | ```bash
76 | zip -r coco_test_submission_panoptic.zip ${PATH_TO_SAVE}/panoptic_cocoformat ${PATH_TO_SAVE}/panoptic_cocoformat.json
77 | ```
78 | 


--------------------------------------------------------------------------------
/g3doc/setup/motchallenge_step.md:
--------------------------------------------------------------------------------
  1 | # Run DeepLab2 on MOTChallenge-STEP dataset
  2 | 
  3 | ## MOTChallenge-STEP dataset
  4 | 
  5 | MOTChallenge-STEP extends the existing [MOTChallenge](https://motchallenge.net/)
  6 | dataset with spatially and temporally dense annotations.
  7 | 
  8 | ### Label Map
  9 | 
 10 | MOTChallenge-STEP dataset followings the same annotation and label policy as
 11 | [KITTI-STEP dataset](./kitti_step.md). Among the
 12 | [MOTChallenge](https://motchallenge.net/) dataset, 4 outdoor sequences are
 13 | annotated for MOTChallenge-STEP. In particular, these sequences are splitted
 14 | into 2 for training and 2 for testing. This dataset contains only 7 semantic
 15 | classes, as not all of
 16 | [Cityscapes](https://www.cityscapes-dataset.com/dataset-overview/#class-definitions)'
 17 | 19 semantic classes are present.
 18 | 
 19 | Label Name     | Label ID
 20 | -------------- | --------
 21 | sidewalk       | 0
 22 | building       | 1
 23 | vegetation     | 2
 24 | sky            | 3
 25 | person&dagger; | 4
 26 | rider          | 5
 27 | bicycle        | 6
 28 | void           | 255
 29 | 
 30 | &dagger;: Single instance annotations are available.
 31 | 
 32 | ### Prepare MOTChallenge-STEP for Training and Evaluation
 33 | 
 34 | In the following, we provide a step-by-step walk through to prepare the data.
 35 | 
 36 | 1.  Create the MOTChallenge-STEP directory:
 37 | 
 38 |     ```bash
 39 |     mkdir ${MOTCHALLENGE_STEP_ROOT}/images
 40 |     cd ${MOTCHALLENGE_STEP_ROOT}/images
 41 |     ```
 42 | 
 43 | 2.  Download MOTChallenge images from https://motchallenge.net/data/MOTS.zip and
 44 |     unzip.
 45 | 
 46 |     ```bash
 47 |     wget ${MOTCHALLENGE_LINK}
 48 |     unzip ${MOTCHALLENGE_IMAGES}.zip
 49 |     ```
 50 | 
 51 | 3.  Move and rename the data:
 52 | 
 53 |     ```bash
 54 |     # Create directories.
 55 |     mkdir train
 56 |     mkdir train/0002
 57 |     mkdir train/0009
 58 |     mkdir test
 59 |     mkdir test/0001
 60 |     mkdir test/0007
 61 | 
 62 |     # Copy data.
 63 |     cp -r MOTS/train/MOTS20-02/img1/* train/0002/
 64 |     cp -r MOTS/train/MOTS20-09/img1/* train/0009/
 65 |     cp -r MOTS/test/MOTS20-01/img1/* test/0001/
 66 |     cp -r MOTS/test/MOTS20-07/img1/* test/0007/
 67 | 
 68 |     # Clean up.
 69 |     rm -r MOTS
 70 |     ```
 71 | 
 72 | 4.  Download groundtruth MOTChallenge-STEP panoptic maps from
 73 |     https://motchallenge.net/data/motchallenge-step.tar.gz
 74 | 
 75 |     ```bash
 76 |     cd ${MOTCHALLENGE_STEP_ROOT}
 77 |     wget ${MOTCHALLENGE_GT_LINK}
 78 |     tar -xvf ${MOTCHALLENGE_GT}.zip
 79 |     ```
 80 | 
 81 | The groundtruth panoptic map is encoded in the same way as described in
 82 | [KITTI-STEP dataset](./kitti_step.md).
 83 | 
 84 | DeepLab2 requires the dataset to be converted to TFRecords for efficient reading
 85 | and prefetching. To create the dataset for training and evaluation, run the
 86 | following command:
 87 | 
 88 | ```bash
 89 | python deeplab2/data/build_step_data.py \
 90 |   --step_root=${MOTCHALLENGE_STEP_ROOT} \
 91 |   --output_dir=${OUTPUT_DIR}
 92 | ```
 93 | 
 94 | This script outputs three sharded tfrecord files: `{train|test}@10.tfrecord`. In
 95 | the tfrecords, for `train` set, it contains the RGB image pixels as well as
 96 | their panoptic maps. For `test` set, it contains RGB images only. These files
 97 | will be used as the input for the model training and evaluation.
 98 | 
 99 | Optionally, you can also specify with `--use_two_frames` to encode two
100 | consecutive frames into the tfrecord files.
101 | 
102 | ## Citing MOTChallenge-STEP
103 | 
104 | If you find this dataset helpful in your research, please use the following
105 | BibTeX entry.
106 | 
107 | ```
108 | @article{step_2021,
109 |  author = {Weber, Mark and Xie, Jun and Collins, Maxwell and Zhu, Yukun and Voigtlaender, Paul and Adam, Hartwig and Green, Bradley and Geiger, Andreas and Leibe, Bastian and Cremers, Daniel and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'{e}, Laura and Chen, Liang-Chieh},
110 |  journal = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks},
111 |  title = {{STEP}: Segmenting and Tracking Every Pixel},
112 |  year = {2021}
113 | }
114 | ```
115 | 


--------------------------------------------------------------------------------
/g3doc/setup/your_own_dataset.md:
--------------------------------------------------------------------------------
 1 | # Convert your own dataset for DeepLab2 framework
 2 | 
 3 | You may want to train DeepLab2 on your own dataset. Here, we provide some
 4 | guidances and hopefully that will facillitate the preparation process.
 5 | 
 6 | 1.  Prepare your own dataset.
 7 |     *   **Images** should be stored either in `jpg` or `png` format.
 8 |     *   **Annotations** should be stored either in `png` or `json` format. The
 9 |         DeepLab2 framework assumes the panoptic label format (i.e.,
10 |         `panoptic_label = semantic_label * label_divisor + instance_id`, where
11 |         the `label_divisor` should be larger than the maximum number of
12 |         instances per image).
13 |         *   The `png` format refers to the case where we could split semantic
14 |             label and instance id to RGB channels. For example, R-channel stores
15 |             semantic label, while G- and B-channel store instance id (G:
16 |             instance_id // 256 and B: instance_id % 256).
17 |         *   The `json` format refers to the
18 |             [COCO panoptic json format](https://cocodataset.org/#format-data).
19 | 2.  Convert the dataset to TFRecord.
20 | 
21 |     *   Update our provided example code (e.g.,
22 |         [build_step_data.py](../../data/build_step_data.py) for `png` format,
23 |         and [build_coco_data.py](../../data/build_coco_data.py) for `json`
24 |         format) to convert your dataset to TFRecord.
25 |     *   Alternatively, if you are using your own binary to create TFRecords,
26 |         make sure to include the same fields in the proto as what our example
27 |         code creates.
28 | 
29 | 3.  Modify the `dataset.py` (path: `${DEEPLAB2}/data/dataset.py`) to provide
30 |     your dataset information.
31 | 
32 |     *   Set the `panoptic_label_divisor` (i.e., the `label_divisor` above)
33 |         correctly. Its value should be larger than the maximum number of
34 |         instances that could appear per image in your dataset.
35 |     *   Set the `ignore_label` properly. Pixels annotated with `ignore_label`
36 |         are not used during both training and evaluation. If your dataset does
37 |         not contain the `ignore_label` annotations, you could simply set it to
38 |         be a large value (e.g., 255 as for
39 |         [Cityscapes](https://www.cityscapes-dataset.com/)).
40 |     *   Set the `class_has_instance_list` properly. The variable specifies which
41 |         class belongs to the `thing` class (i.e., countable objects such as
42 |         people, cars).
43 |     *   Set the colormap (for visualization) properly. You may also need to
44 |         define your own colormap (see `${DEEPLAB2}/trainer/vis_utils.py`).
45 | 
46 | 4.  Prepare the experiment config.
47 | 
48 |     *   Update our provided example configs (path:
49 |         `${DEEPLAB2}/configs/${DATASET}/${MODEL}/${BACKBONE}`) for your use
50 |         case. A few things that may worth your attention:
51 |         *   Set the `crop_size` correctly for both training and evaluation. See
52 |             Q2 in [FAQ](../faq.md) for more details.
53 |         *   Tune the config flags for your dataset (e.g., `base_learning_rate`,
54 |             `training_number_of_step`, and so on).
55 | 
56 | Finally, if your dataset only contains semantic segmentation annotations,
57 | you could still use DeepLab2 framework with some minor changes:
58 | 
59 | 1. Since the code only reads panoptic data at the moment, you need to set
60 | `panoptic_label_divisor = k`, where k is any positive integer,
61 | `instance_id = 0`, and `class_has_instances_list = []` (i.e., we treat the
62 | dataset as the one that contains only `stuff` classes), when you are (1)
63 | converting the dataset to TFRecord (e.g.,
64 | [build_step_data.py](../../data/build_step_data.py)),
65 | and (2) adding dataset information in dataset.py.
66 | 2. Have a config similar to
67 | `${DEEPLAB2}/configs/cityscapes/panoptic_deeplab/resnet50_os32_semseg.textproto`
68 | , where the instance branch is not
69 | initiated.
70 | 
71 | At this point, you are good to go! Enjoy training DeepLab2!
72 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/model/builder_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for model.builder."""
17 | 
18 | import os
19 | from absl.testing import parameterized
20 | 
21 | import tensorflow as tf
22 | 
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.model import builder
26 | from deeplab2.model.decoder import motion_deeplab_decoder
27 | from deeplab2.model.encoder import axial_resnet_instances
28 | from deeplab2.model.encoder import mobilenet
29 | # resources dependency
30 | 
31 | 
32 | _CONFIG_PATH = 'deeplab2/configs/example'
33 | 
34 | 
35 | def _read_proto_file(filename, proto):
36 |   filename = filename  # OSS: removed internal filename loading.
37 |   with tf.io.gfile.GFile(filename, 'r') as proto_file:
38 |     return text_format.ParseLines(proto_file, proto)
39 | 
40 | 
41 | class BuilderTest(tf.test.TestCase, parameterized.TestCase):
42 | 
43 |   def test_resnet50_encoder_creation(self):
44 |     backbone_options = config_pb2.ModelOptions.BackboneOptions(
45 |         name='resnet50', output_stride=32)
46 |     encoder = builder.create_encoder(
47 |         backbone_options,
48 |         tf.keras.layers.experimental.SyncBatchNormalization)
49 |     self.assertIsInstance(encoder, axial_resnet_instances.ResNet50)
50 | 
51 |   @parameterized.parameters('mobilenet_v3_large', 'mobilenet_v3_small')
52 |   def test_mobilenet_encoder_creation(self, model_name):
53 |     backbone_options = config_pb2.ModelOptions.BackboneOptions(
54 |         name=model_name, use_squeeze_and_excite=True, output_stride=32)
55 |     encoder = builder.create_encoder(
56 |         backbone_options,
57 |         tf.keras.layers.experimental.SyncBatchNormalization)
58 |     self.assertIsInstance(encoder, mobilenet.MobileNet)
59 | 
60 |   def test_resnet_encoder_creation(self):
61 |     backbone_options = config_pb2.ModelOptions.BackboneOptions(
62 |         name='max_deeplab_s', output_stride=32)
63 |     encoder = builder.create_resnet_encoder(
64 |         backbone_options,
65 |         bn_layer=tf.keras.layers.experimental.SyncBatchNormalization)
66 |     self.assertIsInstance(encoder, axial_resnet_instances.MaXDeepLabS)
67 | 
68 |   def test_decoder_creation(self):
69 |     proto_filename = os.path.join(
70 |         _CONFIG_PATH, 'example_kitti-step_motion_deeplab.textproto')
71 |     model_options = _read_proto_file(proto_filename, config_pb2.ModelOptions())
72 |     motion_decoder = builder.create_decoder(
73 |         model_options, tf.keras.layers.experimental.SyncBatchNormalization,
74 |         ignore_label=255)
75 |     self.assertIsInstance(motion_decoder,
76 |                           motion_deeplab_decoder.MotionDeepLabDecoder)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |   tf.test.main()
81 | 


--------------------------------------------------------------------------------
/model/decoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/model/decoder/aspp_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for aspp."""
17 | import tensorflow as tf
18 | 
19 | from deeplab2.model.decoder import aspp
20 | from deeplab2.utils import test_utils
21 | 
22 | 
23 | class AsppTest(tf.test.TestCase):
24 | 
25 |   def test_aspp_pool_error(self):
26 |     pool = aspp.ASPPPool(output_channels=64, name='')
27 | 
28 |     # Should pass without an error.
29 |     pool.set_pool_size((None, None))
30 | 
31 |     with self.assertRaises(ValueError):
32 |       # Should raise an error.
33 |       pool.set_pool_size((2, None))
34 | 
35 |   def test_aspp_conv_atrous_rate_shape(self):
36 |     atrous_rates = [2, 6, 12, 18]
37 |     for rate in atrous_rates:
38 |       conv = aspp.ASPPConv(output_channels=64, atrous_rate=rate, name='')
39 |       input_tensor = tf.random.uniform(shape=(2, 12, 12, 3))
40 | 
41 |       output = conv(input_tensor)
42 |       expected_shape = [2, 12, 12, 64]
43 |       self.assertListEqual(output.shape.as_list(), expected_shape)
44 | 
45 |   def test_aspp_conv_non_negative(self):
46 |     conv = aspp.ASPPConv(output_channels=12, atrous_rate=2, name='')
47 |     input_tensor = tf.random.uniform(shape=(2, 17, 17, 3))
48 | 
49 |     output = conv(input_tensor)
50 |     self.assertTrue((output.numpy() >= 0.0).all())
51 | 
52 |   def test_aspp_pool_shape(self):
53 |     pool = aspp.ASPPPool(output_channels=64, name='')
54 |     input_tensor = tf.random.uniform(shape=(2, 12, 12, 3))
55 | 
56 |     output = pool(input_tensor)
57 |     expected_shape = [2, 12, 12, 64]
58 |     self.assertListEqual(output.shape.as_list(), expected_shape)
59 | 
60 |   def test_aspp_pool_non_negative(self):
61 |     pool = aspp.ASPPPool(output_channels=12, name='')
62 |     input_tensor = tf.random.uniform(shape=(2, 17, 17, 3))
63 | 
64 |     output = pool(input_tensor)
65 |     self.assertTrue((output.numpy() >= 0.0).all())
66 | 
67 |   def test_aspp_wrong_atrous_rate(self):
68 |     with self.assertRaises(ValueError):
69 |       _ = aspp.ASPP(output_channels=64, atrous_rates=[1, 2, 3, 4])
70 | 
71 |   @test_utils.test_all_strategies
72 |   def test_aspp_shape(self, strategy):
73 |     with strategy.scope():
74 |       for bn_layer in test_utils.NORMALIZATION_LAYERS:
75 |         aspp_layer = aspp.ASPP(
76 |             output_channels=64, atrous_rates=[6, 12, 18], bn_layer=bn_layer)
77 |         input_tensor = tf.random.uniform(shape=(2, 32, 32, 3))
78 | 
79 |         output = aspp_layer(input_tensor)
80 |         expected_shape = [2, 32, 32, 64]
81 |         self.assertListEqual(output.shape.as_list(), expected_shape)
82 | 
83 |   def test_aspp_non_negative(self):
84 |     aspp_layer = aspp.ASPP(output_channels=32, atrous_rates=[4, 8, 16])
85 |     input_tensor = tf.random.uniform(shape=(2, 32, 32, 3))
86 | 
87 |     output = aspp_layer(input_tensor)
88 |     self.assertTrue((output.numpy() >= 0.0).all())
89 | 
90 | if __name__ == '__main__':
91 |   tf.test.main()
92 | 


--------------------------------------------------------------------------------
/model/decoder/deeplabv3.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """This file contains code to build a DeepLabV3.
 17 | 
 18 | Reference:
 19 |   - [Rethinking Atrous Convolution for Semantic Image Segmentation](
 20 |       https://arxiv.org/pdf/1706.05587.pdf)
 21 | """
 22 | import tensorflow as tf
 23 | 
 24 | from deeplab2 import common
 25 | from deeplab2.model.decoder import aspp
 26 | from deeplab2.model.layers import convolutions
 27 | 
 28 | 
 29 | layers = tf.keras.layers
 30 | 
 31 | 
 32 | class DeepLabV3(layers.Layer):
 33 |   """A DeepLabV3 model.
 34 | 
 35 |   This model takes in features from an encoder and performs multi-scale context
 36 |   aggregation with the help of an ASPP layer. Finally, a classification head is
 37 |   used to predict a semantic segmentation.
 38 |   """
 39 | 
 40 |   def __init__(self,
 41 |                decoder_options,
 42 |                deeplabv3_options,
 43 |                bn_layer=tf.keras.layers.BatchNormalization):
 44 |     """Creates a DeepLabV3 decoder of type layers.Layer.
 45 | 
 46 |     Args:
 47 |       decoder_options: Decoder options as defined in config_pb2.DecoderOptions.
 48 |       deeplabv3_options: Model options as defined in
 49 |         config_pb2.ModelOptions.DeeplabV3Options.
 50 |       bn_layer: An optional tf.keras.layers.Layer that computes the
 51 |         normalization (default: tf.keras.layers.BatchNormalization).
 52 |     """
 53 |     super(DeepLabV3, self).__init__(name='DeepLabV3')
 54 | 
 55 |     self._feature_name = decoder_options.feature_key
 56 |     self._aspp = aspp.ASPP(decoder_options.aspp_channels,
 57 |                            decoder_options.atrous_rates,
 58 |                            bn_layer=bn_layer)
 59 | 
 60 |     self._classifier_conv_bn_act = convolutions.Conv2DSame(
 61 |         decoder_options.decoder_channels,
 62 |         kernel_size=3,
 63 |         name='classifier_conv_bn_act',
 64 |         use_bias=False,
 65 |         use_bn=True,
 66 |         bn_layer=bn_layer,
 67 |         activation='relu')
 68 | 
 69 |     self._final_conv = convolutions.Conv2DSame(
 70 |         deeplabv3_options.num_classes, kernel_size=1, name='final_conv')
 71 | 
 72 |   def set_pool_size(self, pool_size):
 73 |     """Sets the pooling size of the ASPP pooling layer.
 74 | 
 75 |     Args:
 76 |       pool_size: A tuple specifying the pooling size of the ASPP pooling layer.
 77 |     """
 78 |     self._aspp.set_pool_size(pool_size)
 79 | 
 80 |   def get_pool_size(self):
 81 |     return self._aspp.get_pool_size()
 82 | 
 83 |   def reset_pooling_layer(self):
 84 |     """Resets the ASPP pooling layer to global average pooling."""
 85 |     self._aspp.reset_pooling_layer()
 86 | 
 87 |   def call(self, features, training=False):
 88 |     """Performs a forward pass.
 89 | 
 90 |     Args:
 91 |       features: A single input tf.Tensor or an input dict of tf.Tensor with
 92 |         shape [batch, height, width, channels]. If passed a dict, different keys
 93 |         should point to different features extracted by the encoder, e.g.
 94 |         low-level or high-level features.
 95 |       training: A boolean flag indicating whether training behavior should be
 96 |         used (default: False).
 97 | 
 98 |     Returns:
 99 |       A dictionary containing the semantic prediction under key
100 |       common.PRED_SEMANTIC_LOGITS_KEY.
101 |     """
102 |     if isinstance(features, tf.Tensor):
103 |       feature = features
104 |     else:
105 |       feature = features[self._feature_name]
106 | 
107 |     x = self._aspp(feature, training=training)
108 | 
109 |     x = self._classifier_conv_bn_act(x, training=training)
110 | 
111 |     return {common.PRED_SEMANTIC_LOGITS_KEY: self._final_conv(x)}
112 | 
113 |   @property
114 |   def checkpoint_items(self):
115 |     items = {
116 |         common.CKPT_DEEPLABV3_ASPP: self._aspp,
117 |         common.CKPT_DEEPLABV3_CLASSIFIER_CONV_BN_ACT:
118 |             self._classifier_conv_bn_act,
119 |         common.CKPT_SEMANTIC_LAST_LAYER: self._final_conv,
120 |     }
121 |     return items
122 | 


--------------------------------------------------------------------------------
/model/decoder/max_deeplab_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for max_deeplab."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2 import common
21 | from deeplab2 import config_pb2
22 | from deeplab2.model.decoder import max_deeplab
23 | 
24 | 
25 | def _create_max_deeplab_example_proto(num_non_void_classes=19):
26 |   semantic_decoder = config_pb2.DecoderOptions(
27 |       feature_key='feature_semantic', atrous_rates=[6, 12, 18])
28 |   auxiliary_semantic_head = config_pb2.HeadOptions(
29 |       output_channels=num_non_void_classes, head_channels=256)
30 |   pixel_space_head = config_pb2.HeadOptions(
31 |       output_channels=128, head_channels=256)
32 |   max_deeplab_options = config_pb2.ModelOptions.MaXDeepLabOptions(
33 |       pixel_space_head=pixel_space_head,
34 |       auxiliary_semantic_head=auxiliary_semantic_head)
35 |   # Add features from lowest to highest.
36 |   max_deeplab_options.auxiliary_low_level.add(
37 |       feature_key='res3', channels_project=64)
38 |   max_deeplab_options.auxiliary_low_level.add(
39 |       feature_key='res2', channels_project=32)
40 |   return config_pb2.ModelOptions(
41 |       decoder=semantic_decoder, max_deeplab=max_deeplab_options)
42 | 
43 | 
44 | class MaXDeeplabTest(tf.test.TestCase):
45 | 
46 |   def test_max_deeplab_decoder_output_shape(self):
47 |     num_non_void_classes = 19
48 |     num_mask_slots = 127
49 |     model_options = _create_max_deeplab_example_proto(
50 |         num_non_void_classes=num_non_void_classes)
51 |     decoder = max_deeplab.MaXDeepLab(
52 |         max_deeplab_options=model_options.max_deeplab,
53 |         ignore_label=255,
54 |         decoder_options=model_options.decoder)
55 | 
56 |     input_dict = {
57 |         'res2':
58 |             tf.random.uniform([2, 17, 17, 256]),
59 |         'res3':
60 |             tf.random.uniform([2, 9, 9, 512]),
61 |         'transformer_class_feature':
62 |             tf.random.uniform([2, num_mask_slots, 256]),
63 |         'transformer_mask_feature':
64 |             tf.random.uniform([2, num_mask_slots, 256]),
65 |         'feature_panoptic':
66 |             tf.random.uniform([2, 17, 17, 256]),
67 |         'feature_semantic':
68 |             tf.random.uniform([2, 5, 5, 2048])
69 |     }
70 |     resulting_dict = decoder(input_dict)
71 |     self.assertListEqual(
72 |         resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(),
73 |         [2, 17, 17, 19])  # Stride 4
74 |     self.assertListEqual(
75 |         resulting_dict[
76 |             common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(),
77 |         [2, 17, 17, 128])  # Stride 4
78 |     self.assertListEqual(
79 |         resulting_dict[
80 |             common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(),
81 |         # Non-void classes and a void class.
82 |         [2, num_mask_slots, num_non_void_classes + 1])
83 |     self.assertListEqual(
84 |         resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(),
85 |         [2, 17, 17, num_mask_slots])  # Stride 4.
86 | 
87 | 
88 | if __name__ == '__main__':
89 |   tf.test.main()
90 | 


--------------------------------------------------------------------------------
/model/encoder/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/model/encoder/atrous_consistency_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests of atrous consistencies for axial_resnet_instances."""
17 | 
18 | from absl.testing import parameterized
19 | import tensorflow as tf
20 | 
21 | from deeplab2.model import test_utils
22 | from deeplab2.model.encoder import axial_resnet_instances
23 | 
24 | 
25 | class AtrousConsistencyTest(tf.test.TestCase, parameterized.TestCase):
26 | 
27 |   @parameterized.product(
28 |       (dict(model_name='resnet50', backbone_layer_multiplier=1),
29 |        dict(model_name='resnet50_beta', backbone_layer_multiplier=1),
30 |        dict(model_name='wide_resnet41', backbone_layer_multiplier=1),
31 |        dict(model_name='swidernet', backbone_layer_multiplier=2)),
32 |       output_stride=[8, 16, 32])
33 |   def test_model_atrous_consistency_with_output_stride_four(
34 |       self, model_name, backbone_layer_multiplier, output_stride):
35 |     tf.random.set_seed(0)
36 | 
37 |     # Create the input.
38 |     pixel_inputs = test_utils.create_test_input(1, 225, 225, 3)
39 | 
40 |     # Create the model and the weights.
41 |     model_1 = axial_resnet_instances.get_model(
42 |         model_name,
43 |         # Test with small models only.
44 |         num_blocks=[2, 2, 2, 2],
45 |         backbone_layer_multiplier=backbone_layer_multiplier,
46 |         bn_layer=tf.keras.layers.BatchNormalization,
47 |         conv_kernel_weight_decay=0.0001,
48 |         output_stride=4)
49 | 
50 |     # Create the weights.
51 |     model_1(pixel_inputs, training=False)
52 | 
53 |     # Set the batch norm gamma as non-zero so that the 3x3 convolution affects
54 |     # the output.
55 |     for weight in model_1.trainable_weights:
56 |       if '/gamma:0' in weight.name:
57 |         weight.assign(tf.ones_like(weight))
58 | 
59 |     # Dense feature extraction followed by subsampling.
60 |     pixel_outputs = model_1(pixel_inputs, training=False)['res5']
61 |     downsampling_stride = output_stride // 4
62 |     expected = pixel_outputs[:, ::downsampling_stride, ::downsampling_stride, :]
63 | 
64 |     # Feature extraction at the nominal network rate.
65 |     model_2 = axial_resnet_instances.get_model(
66 |         model_name,
67 |         # Test with small models only.
68 |         num_blocks=[2, 2, 2, 2],
69 |         backbone_layer_multiplier=backbone_layer_multiplier,
70 |         bn_layer=tf.keras.layers.BatchNormalization,
71 |         conv_kernel_weight_decay=0.0001,
72 |         output_stride=output_stride)
73 |     # Create the weights.
74 |     model_2(pixel_inputs, training=False)
75 |     # Make the two networks use the same weights.
76 |     model_2.set_weights(model_1.get_weights())
77 |     output = model_2(pixel_inputs, training=False)['res5']
78 | 
79 |     # Normalize the outputs. Since we set batch_norm gamma to 1, the output
80 |     # activations can explode to a large standard deviation, which sometimes
81 |     # cause numerical errors beyond the tolerances.
82 |     normalizing_factor = tf.math.reduce_std(expected)
83 |     # Compare normalized outputs.
84 |     self.assertAllClose(output / normalizing_factor,
85 |                         expected / normalizing_factor,
86 |                         atol=1e-4, rtol=1e-4)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |   tf.test.main()
91 | 


--------------------------------------------------------------------------------
/model/encoder/axial_resnet_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for axial_resnet."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from deeplab2.model.encoder import axial_resnet
22 | 
23 | 
24 | class AxialResNetTest(tf.test.TestCase):
25 | 
26 |   def test_axial_resnet_correct_output_shape(self):
27 |     model = axial_resnet.AxialResNet('max_deeplab_s')
28 |     endpoints = model(tf.zeros([2, 65, 65, 3]), training=False)
29 |     self.assertListEqual(endpoints['backbone_output'].get_shape().as_list(),
30 |                          [2, 5, 5, 2048])
31 |     self.assertListEqual(
32 |         endpoints['transformer_class_feature'].get_shape().as_list(),
33 |         [2, 128, 256])
34 |     self.assertListEqual(
35 |         endpoints['transformer_mask_feature'].get_shape().as_list(),
36 |         [2, 128, 256])
37 |     self.assertListEqual(endpoints['feature_panoptic'].get_shape().as_list(),
38 |                          [2, 17, 17, 256])
39 |     self.assertListEqual(endpoints['feature_semantic'].get_shape().as_list(),
40 |                          [2, 5, 5, 2048])
41 |     num_params = np.sum(
42 |         [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
43 |     self.assertEqual(num_params, 61726624)
44 | 
45 | if __name__ == '__main__':
46 |   tf.test.main()
47 | 


--------------------------------------------------------------------------------
/model/encoder/model_export_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests of model exports for axial_resnet_instances."""
17 | 
18 | import os
19 | 
20 | from absl import flags
21 | from absl.testing import parameterized
22 | import tensorflow as tf
23 | 
24 | from deeplab2.model.encoder import axial_resnet_instances
25 | 
26 | FLAGS = flags.FLAGS
27 | 
28 | 
29 | class ModelExportTest(tf.test.TestCase, parameterized.TestCase):
30 | 
31 |   @parameterized.parameters(
32 |       ('resnet50',),
33 |       ('resnet50_beta',),
34 |       ('max_deeplab_s_backbone',),
35 |       ('max_deeplab_l_backbone',),
36 |       ('axial_resnet_s',),
37 |       ('axial_resnet_l',),
38 |       ('axial_deeplab_s',),
39 |       ('axial_deeplab_l',),
40 |       ('swidernet',),
41 |       ('axial_swidernet',),
42 |       )
43 |   def test_model_export(self, model_name):
44 |     model = axial_resnet_instances.get_model(
45 |         model_name,
46 |         output_stride=16,
47 |         backbone_layer_multiplier=1.0,
48 |         bn_layer=tf.keras.layers.BatchNormalization,
49 |         conv_kernel_weight_decay=0.0001,
50 |         # Test with small models only.
51 |         num_blocks=[2, 2, 2, 2],
52 |         # Disable drop path as it is not compatible with model exporting.
53 |         block_group_config={'drop_path_keep_prob': 1.0})
54 |     model(tf.keras.Input([257, 257, 3], batch_size=1), training=False)
55 |     export_dir = os.path.join(
56 |         FLAGS.test_tmpdir, 'test_model_export', model_name)
57 |     model.save(export_dir)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------
/model/kmax_deeplab_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for kmax_deeplab."""
17 | 
18 | import os
19 | 
20 | import numpy as np
21 | import tensorflow as tf
22 | 
23 | from google.protobuf import text_format
24 | from deeplab2 import common
25 | from deeplab2 import config_pb2
26 | from deeplab2.data import dataset
27 | from deeplab2.model import kmax_deeplab
28 | from deeplab2.model import utils
29 | # resources dependency
30 | 
31 | _CONFIG_PATH = 'deeplab2/configs/example'
32 | 
33 | 
34 | def _read_proto_file(filename, proto):
35 |   filename = filename  # OSS: removed internal filename loading.
36 |   with tf.io.gfile.GFile(filename, 'r') as proto_file:
37 |     return text_format.ParseLines(proto_file, proto)
38 | 
39 | 
40 | def _create_model_from_test_proto(file_name,
41 |                                   dataset_name='cityscapes_panoptic'):
42 |   proto_filename = os.path.join(_CONFIG_PATH, file_name)
43 |   config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
44 |   return kmax_deeplab.KMaXDeepLab(
45 |       config,
46 |       dataset.MAP_NAME_TO_DATASET_INFO[dataset_name]), config
47 | 
48 | 
49 | class DeeplabTest(tf.test.TestCase):
50 | 
51 |   def test_deeplab_with_kmax_convnext_base(self):
52 |     model, experiment_options = _create_model_from_test_proto(
53 |         'example_coco_kmax_meta_convnext.textproto',
54 |         dataset_name='coco_panoptic')
55 |     train_crop_size = tuple(experiment_options.train_dataset_options.crop_size)
56 |     input_tensor = tf.random.uniform(
57 |         shape=(2, train_crop_size[0], train_crop_size[1], 3))
58 |     stride_4_size = utils.scale_mutable_sequence(train_crop_size, 0.25)
59 |     expected_semantic_shape = [
60 |         2, stride_4_size[0], stride_4_size[1], experiment_options.model_options.
61 |         max_deeplab.auxiliary_semantic_head.output_channels]
62 |     expected_transformer_class_logits_shape = [
63 |         2, 128, experiment_options.model_options.
64 |         max_deeplab.auxiliary_semantic_head.output_channels]
65 |     expected_pixel_space_normalized_feature_shape = [
66 |         2, stride_4_size[0], stride_4_size[1], experiment_options.model_options.
67 |         max_deeplab.pixel_space_head.output_channels]
68 |     expected_pixel_space_mask_logits_shape = [
69 |         2, stride_4_size[0], stride_4_size[1], 128]
70 |     resulting_dict = model(input_tensor, training=True)
71 |     self.assertListEqual(
72 |         resulting_dict[common.PRED_SEMANTIC_LOGITS_KEY].shape.as_list(),
73 |         expected_semantic_shape)
74 |     self.assertListEqual(
75 |         resulting_dict[
76 |             common.PRED_TRANSFORMER_CLASS_LOGITS_KEY].shape.as_list(),
77 |         expected_transformer_class_logits_shape)
78 |     self.assertListEqual(
79 |         resulting_dict[
80 |             common.PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY].shape.as_list(),
81 |         expected_pixel_space_normalized_feature_shape)
82 |     self.assertListEqual(
83 |         resulting_dict[common.PRED_PIXEL_SPACE_MASK_LOGITS_KEY].shape.as_list(),
84 |         expected_pixel_space_mask_logits_shape)
85 |     num_params = 0
86 |     for v in model.trainable_weights:
87 |       params = np.prod(v.get_shape().as_list())
88 |       # Exclude the auxiliary semantic head.
89 |       if 'auxiliary_semantic' not in v.name:
90 |         num_params += params
91 |     self.assertEqual(num_params, 121513304)
92 | 
93 | 
94 | if __name__ == '__main__':
95 |   tf.test.main()
96 | 


--------------------------------------------------------------------------------
/model/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/model/layers/activations.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Defines a set of useful activation functions."""
 17 | import functools
 18 | import tensorflow as tf
 19 | 
 20 | 
 21 | def gelu(input_tensor, approximate=False):
 22 |   """Gaussian Error Linear Unit.
 23 | 
 24 |   Reference:
 25 |   Gaussian Error Linear Units (GELUs), Dan Hendrycks, Kevin Gimpel, arXiv 2016.
 26 | 
 27 |   Args:
 28 |     input_tensor: A tensor with an arbitrary shape.
 29 |     approximate: A boolean, whether to enable approximation.
 30 | 
 31 |   Returns:
 32 |     The activated input tensor.
 33 |   """
 34 |   return tf.keras.activations.gelu(input_tensor, approximate=approximate)
 35 | 
 36 | 
 37 | def hard_sigmoid(input_tensor):
 38 |   """Hard sigmoid activation function.
 39 | 
 40 |   Args:
 41 |     input_tensor: A tensor with an arbitrary shape.
 42 | 
 43 |   Returns:
 44 |     The activated input tensor.
 45 |   """
 46 |   input_tensor = tf.convert_to_tensor(input_tensor)
 47 |   return tf.nn.relu6(input_tensor + tf.constant(3.)) * 0.16667
 48 | 
 49 | 
 50 | def relu6(input_tensor):
 51 |   """Relu6 activation function.
 52 | 
 53 |   Args:
 54 |     input_tensor: A tensor with an arbitrary shape.
 55 | 
 56 |   Returns:
 57 |     The activated input tensor.
 58 |   """
 59 |   input_tensor = tf.convert_to_tensor(input_tensor)
 60 |   return tf.nn.relu6(input_tensor)
 61 | 
 62 | 
 63 | def swish(input_tensor):
 64 |   """Swish or SiLU activation function.
 65 | 
 66 |   Args:
 67 |     input_tensor: A tensor with an arbitrary shape.
 68 | 
 69 |   Returns:
 70 |     The activated input tensor.
 71 |   """
 72 |   input_tensor = tf.convert_to_tensor(input_tensor)
 73 |   return tf.nn.silu(input_tensor)
 74 | 
 75 | 
 76 | def hard_swish(input_tensor):
 77 |   """Hard Swish function.
 78 | 
 79 |   Args:
 80 |     input_tensor: A tensor with an arbitrary shape.
 81 | 
 82 |   Returns:
 83 |     The activated input tensor.
 84 |   """
 85 |   input_tensor = tf.convert_to_tensor(input_tensor)
 86 |   return input_tensor * tf.nn.relu6(
 87 |       input_tensor + tf.constant(3.)) * (1. / 6.)
 88 | 
 89 | 
 90 | def identity(input_tensor):
 91 |   """Identity function.
 92 | 
 93 |   Useful for helping in quantization.
 94 | 
 95 |   Args:
 96 |     input_tensor: A tensor with an arbitrary shape.
 97 | 
 98 |   Returns:
 99 |     The activated input tensor.
100 |   """
101 |   input_tensor = tf.convert_to_tensor(input_tensor)
102 |   return tf.identity(input_tensor)
103 | 
104 | 
105 | def get_activation(identifier):
106 |   """Gets activation function via input identifier.
107 | 
108 |   This function returns the specified customized activation function, if there
109 |   is any. Otherwise, tf.keras.activations.get is called.
110 | 
111 |   Args:
112 |     identifier: A string, name of the activation function.
113 | 
114 |   Returns:
115 |     The specified activation function.
116 |   """
117 |   if isinstance(identifier, str):
118 |     name_to_fn = {
119 |         'gelu': functools.partial(gelu, approximate=False),
120 |         'approximated_gelu': functools.partial(gelu, approximate=True),
121 |         'silu': swish,
122 |         'swish': swish,
123 |         'hard_swish': hard_swish,
124 |         'relu6': relu6,
125 |         'hard_sigmoid': hard_sigmoid,
126 |         'identity': identity,
127 |         'none': identity,
128 |     }
129 |     identifier = str(identifier).lower()
130 |     if identifier in name_to_fn:
131 |       return name_to_fn[identifier]
132 |   return tf.keras.activations.get(identifier)
133 | 


--------------------------------------------------------------------------------
/model/layers/activations_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for activations.py."""
17 | import tensorflow as tf
18 | 
19 | from deeplab2.model.layers import activations
20 | 
21 | 
22 | class ActivationsTest(tf.test.TestCase):
23 | 
24 |   def test_gelu(self):
25 |     expected_data = [[0.14967535, 0., -0.10032465],
26 |                      [-0.15880796, -0.04540223, 2.9963627]]
27 |     gelu_data = activations.gelu([[.25, 0, -.25], [-1, -2, 3]],
28 |                                  approximate=True)
29 |     self.assertAllClose(expected_data, gelu_data)
30 |     gelu_data_via_get_activation = activations.get_activation(
31 |         'approximated_gelu')([[.25, 0, -.25], [-1, -2, 3]])
32 |     self.assertAllClose(expected_data, gelu_data_via_get_activation)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |   tf.test.main()
37 | 


--------------------------------------------------------------------------------
/model/layers/axial_blocks_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for axial_blocks."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import axial_blocks
21 | 
22 | 
23 | class AxialBlocksTest(tf.test.TestCase):
24 | 
25 |   def test_conv_basic_block_correct_output_shape(self):
26 |     layer = axial_blocks.AxialBlock(
27 |         filters_list=[256, 256],
28 |         strides=2)
29 |     float_training_tensor = tf.constant(0.0, dtype=tf.float32)
30 |     output = layer((tf.zeros([2, 65, 65, 32]),
31 |                     float_training_tensor))
32 |     self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256])
33 | 
34 |   def test_conv_bottleneck_block_correct_output_shape(self):
35 |     layer = axial_blocks.AxialBlock(
36 |         filters_list=[64, 64, 256],
37 |         strides=1)
38 |     float_training_tensor = tf.constant(0.0, dtype=tf.float32)
39 |     output = layer((tf.zeros([2, 65, 65, 32]),
40 |                     float_training_tensor))
41 |     self.assertListEqual(output.get_shape().as_list(), [2, 65, 65, 256])
42 | 
43 |   def test_axial_block_correct_output_shape(self):
44 |     layer = axial_blocks.AxialBlock(
45 |         filters_list=[128, 64, 256],
46 |         strides=2,
47 |         attention_type='axial')
48 |     float_training_tensor = tf.constant(0.0, dtype=tf.float32)
49 |     output = layer((tf.zeros([2, 65, 65, 32]),
50 |                     float_training_tensor))
51 |     self.assertListEqual(output.get_shape().as_list(), [2, 33, 33, 256])
52 | 
53 | if __name__ == '__main__':
54 |   tf.test.main()
55 | 


--------------------------------------------------------------------------------
/model/layers/axial_layers_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for axial_layers."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import axial_layers
21 | 
22 | 
23 | class AxialLayersTest(tf.test.TestCase):
24 | 
25 |   def test_default_axial_attention_layer_output_shape(self):
26 |     layer = axial_layers.AxialAttention()
27 |     output = layer(tf.zeros([10, 5, 32]))
28 |     self.assertListEqual(output.get_shape().as_list(), [10, 5, 1024])
29 | 
30 |   def test_axial_attention_2d_layer_output_shape(self):
31 |     layer = axial_layers.AxialAttention2D()
32 |     output = layer(tf.zeros([2, 5, 5, 32]))
33 |     self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024])
34 | 
35 |   def test_change_filters_output_shape(self):
36 |     layer = axial_layers.AxialAttention2D(filters=32)
37 |     output = layer(tf.zeros([2, 5, 5, 32]))
38 |     self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 64])
39 | 
40 |   def test_value_expansion_output_shape(self):
41 |     layer = axial_layers.AxialAttention2D(value_expansion=1)
42 |     output = layer(tf.zeros([2, 5, 5, 32]))
43 |     self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 512])
44 | 
45 |   def test_global_attention_output_shape(self):
46 |     layer = axial_layers.GlobalAttention2D()
47 |     output = layer(tf.zeros([2, 5, 5, 32]))
48 |     self.assertListEqual(output.get_shape().as_list(), [2, 5, 5, 1024])
49 | 
50 |   def test_stride_two_output_shape(self):
51 |     layer = axial_layers.AxialAttention2D(strides=2)
52 |     output = layer(tf.zeros([2, 5, 5, 32]))
53 |     self.assertListEqual(output.get_shape().as_list(), [2, 3, 3, 1024])
54 | 
55 | if __name__ == '__main__':
56 |   tf.test.main()
57 | 


--------------------------------------------------------------------------------
/model/layers/blocks_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for blocks.py."""
17 | import tensorflow as tf
18 | 
19 | from deeplab2.model.layers import blocks
20 | 
21 | 
22 | class BlocksTest(tf.test.TestCase):
23 | 
24 |   def test_inverted_bottleneck_block_output_shape(self):
25 |     batch, height, width, input_channels = 2, 17, 17, 4
26 |     output_channels = 6
27 |     input_tensor = tf.random.uniform(
28 |         shape=(batch, height, width, input_channels))
29 |     ivb_block = blocks.InvertedBottleneckBlock(
30 |         in_filters=input_channels,
31 |         out_filters=output_channels,
32 |         expand_ratio=2,
33 |         strides=1,
34 |         name='inverted_bottleneck',
35 |     )
36 |     output_tensor, _ = ivb_block(input_tensor)
37 |     self.assertListEqual(output_tensor.get_shape().as_list(),
38 |                          [batch, height, width, output_channels])
39 | 
40 |   def test_inverted_bottleneck_block_feature_map_alignment(self):
41 |     batch, height, width, input_channels = 2, 17, 17, 128
42 |     output_channels = 256
43 |     input_tensor = tf.random.uniform(
44 |         shape=(batch, height, width, input_channels))
45 |     ivb_block1 = blocks.InvertedBottleneckBlock(
46 |         in_filters=input_channels,
47 |         out_filters=output_channels,
48 |         expand_ratio=2,
49 |         strides=2,
50 |         name='inverted_bottleneck1',
51 |     )
52 |     ivb_block1(input_tensor, False)
53 |     weights = ivb_block1.get_weights()
54 |     output_tensor, _ = ivb_block1(input_tensor, False)
55 | 
56 |     ivb_block2 = blocks.InvertedBottleneckBlock(
57 |         in_filters=input_channels,
58 |         out_filters=output_channels,
59 |         expand_ratio=2,
60 |         strides=1,
61 |         name='inverted_bottleneck2',
62 |     )
63 |     ivb_block2(input_tensor, False)
64 |     ivb_block2.set_weights(weights)
65 |     expected = ivb_block2(input_tensor, False)[0][:, ::2, ::2, :]
66 | 
67 |     self.assertAllClose(ivb_block1.get_weights(), ivb_block2.get_weights(),
68 |                         atol=1e-4, rtol=1e-4)
69 |     self.assertAllClose(output_tensor, expected, atol=1e-4, rtol=1e-4)
70 | 
71 | if __name__ == '__main__':
72 |   tf.test.main()
73 | 


--------------------------------------------------------------------------------
/model/layers/drop_path_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Test for drop_path.py."""
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import drop_path
21 | 
22 | # Set a fixed random seed.
23 | tf.random.set_seed(1)
24 | 
25 | 
26 | class DropPathTest(tf.test.TestCase):
27 | 
28 |   def test_drop_path_keep_prob_one(self):
29 |     # Test drop_path_keep_prob = 1, where output should be equal to input.
30 |     drop_path_keep_prob = 1.0
31 |     input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
32 |     layer_op = drop_path.DropPath(drop_path_keep_prob)
33 |     output_tensor = layer_op(input_tensor, training=True)
34 |     np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy())
35 | 
36 |   def test_not_training_mode(self):
37 |     # Test not training mode, where output should be equal to input.
38 |     drop_path_keep_prob = 0.8
39 |     input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
40 |     layer_op = drop_path.DropPath(drop_path_keep_prob)
41 |     output_tensor = layer_op(input_tensor, training=False)
42 |     np.testing.assert_equal(input_tensor.numpy(), output_tensor.numpy())
43 | 
44 |   def test_drop_path(self):
45 |     drop_path_keep_prob = 0.8
46 |     input_tensor = tf.random.uniform(shape=(3, 65, 65, 32))
47 |     layer_op = drop_path.DropPath(drop_path_keep_prob)
48 |     output_tensor = layer_op(input_tensor, training=True)
49 |     self.assertFalse(np.array_equal(input_tensor.numpy(),
50 |                                     output_tensor.numpy()))
51 | 
52 |   def test_constant_drop_path_schedule(self):
53 |     keep_prob_for_last_stage = 0.8
54 |     current_stage_keep_prob = drop_path.get_drop_path_keep_prob(
55 |         keep_prob_for_last_stage,
56 |         schedule='constant',
57 |         current_stage=2,
58 |         num_stages=5)
59 |     self.assertEqual(current_stage_keep_prob, keep_prob_for_last_stage)
60 | 
61 |   def test_linear_drop_path_schedule(self):
62 |     keep_prob_for_last_stage = 0.8
63 |     current_stage_keep_prob = drop_path.get_drop_path_keep_prob(
64 |         keep_prob_for_last_stage,
65 |         schedule='linear',
66 |         current_stage=1,
67 |         num_stages=4)
68 |     self.assertEqual(current_stage_keep_prob, 0.95)
69 | 
70 |   def test_unknown_drop_path_schedule(self):
71 |     with self.assertRaises(ValueError):
72 |       _ = drop_path.get_drop_path_keep_prob(0.8, 'unknown', 1, 4)
73 | 
74 | 
75 | if __name__ == '__main__':
76 |   tf.test.main()
77 | 


--------------------------------------------------------------------------------
/model/layers/moat_attention_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for moat_attention."""
17 | 
18 | from absl import logging
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | from deeplab2.model.layers import moat_attention
23 | 
24 | 
25 | class MOATAttentionTest(tf.test.TestCase, parameterized.TestCase):
26 | 
27 |   def _log_param_specs(self, layer):
28 |     num_params = sum([
29 |         np.prod(var.get_shape().as_list()) for var in layer.trainable_weights
30 |     ])
31 |     format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format(
32 |         max([len(v.name) for v in layer.trainable_weights]),
33 |         max([len('{}'.format(v.get_shape())) for v in
34 |              layer.trainable_weights]))
35 |     format_str = '  >> ' + format_str + '\t{:>5.2f}%'
36 | 
37 |     for v in layer.trainable_weights:
38 |       v_shape = v.get_shape().as_list()
39 |       logging.info(format_str.format(v.name, '{}'.format(v_shape),
40 |                                      np.prod(v_shape) / num_params * 100))
41 | 
42 |   @parameterized.named_parameters(
43 |       ('attention', None),
44 |       ('attention_with_relative_position_embedding', '2d_multi_head'),
45 |   )
46 |   def test_attention(self, relative_position_embedding_type):
47 |     batch_size = 8
48 |     height = 8
49 |     width = 10
50 |     hidden_size = 16
51 |     head_size = 8
52 |     query = tf.random.normal(shape=[batch_size, height, width, hidden_size],
53 |                              dtype=tf.float32)
54 | 
55 |     attention_layer = moat_attention.Attention(
56 |         hidden_size=hidden_size,
57 |         head_size=head_size,
58 |         relative_position_embedding_type=relative_position_embedding_type)
59 |     attention_output = attention_layer(query, training=True)
60 |     self._log_param_specs(attention_layer)
61 | 
62 |     self.assertEqual(attention_output.shape.as_list(),
63 |                      [batch_size, height * width, hidden_size])
64 | 
65 | if __name__ == '__main__':
66 |   tf.test.main()
67 | 


--------------------------------------------------------------------------------
/model/layers/moat_blocks_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for moat_blocks."""
17 | 
18 | from absl import logging
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | from deeplab2.model.layers import moat_blocks
23 | 
24 | 
25 | class MOATBlocksTest(tf.test.TestCase, parameterized.TestCase):
26 | 
27 |   def _log_param_specs(self, layer):
28 |     num_params = sum([
29 |         np.prod(var.get_shape().as_list()) for var in layer.trainable_weights
30 |     ])
31 |     format_str = '{{:<{0}s}}\t{{:<{1}s}}'.format(
32 |         max([len(v.name) for v in layer.trainable_weights]),
33 |         max([len('{}'.format(v.get_shape())) for v in
34 |              layer.trainable_weights]))
35 |     format_str = '  >> ' + format_str + '\t{:>5.2f}%'
36 | 
37 |     for v in layer.trainable_weights:
38 |       v_shape = v.get_shape().as_list()
39 |       logging.info(format_str.format(v.name, '{}'.format(v_shape),
40 |                                      np.prod(v_shape) / num_params * 100))
41 | 
42 |   @parameterized.named_parameters(
43 |       ('standard', 1),
44 |       ('downsample', 2),
45 |   )
46 |   def test_mbconv_block(self, stride):
47 |     batch_size = 8
48 |     height, width = 8, 8
49 |     input_size = 16
50 |     hidden_size = input_size * stride
51 |     inputs = tf.random.normal(shape=[batch_size, height, width, input_size],
52 |                               dtype=tf.float32)
53 |     block = moat_blocks.MBConvBlock(hidden_size=hidden_size,
54 |                                     block_stride=stride,)
55 |     output = block(inputs, training=True)
56 |     self._log_param_specs(block)
57 | 
58 |     self.assertEqual(output.shape.as_list(),
59 |                      [batch_size, height // stride, width // stride,
60 |                       hidden_size])
61 | 
62 |   @parameterized.named_parameters(
63 |       ('standard', 1, False),
64 |       ('downsample', 2, False),
65 |       ('checkpointing', 1, True),
66 |   )
67 |   def test_moat_block(self, stride, use_checkpointing):
68 |     batch_size = 8
69 |     height, width = 8, 8
70 |     input_size = 16
71 |     hidden_size = input_size * stride
72 |     inputs = tf.random.normal(shape=[batch_size, height, width, input_size],
73 |                               dtype=tf.float32)
74 |     block = moat_blocks.MOATBlock(hidden_size=hidden_size,
75 |                                   block_stride=stride,
76 |                                   window_size=[height//stride, width//stride],
77 |                                   use_checkpointing=use_checkpointing)
78 |     output = block(inputs, training=True)
79 |     self._log_param_specs(block)
80 | 
81 |     self.assertEqual(output.shape.as_list(),
82 |                      [batch_size, height // stride, width // stride,
83 |                       hidden_size])
84 | 
85 | 
86 | if __name__ == '__main__':
87 |   tf.test.main()
88 | 


--------------------------------------------------------------------------------
/model/layers/positional_encodings_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for positional_encodings."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import positional_encodings
21 | 
22 | 
23 | class PositionalEncodingsTest(tf.test.TestCase):
24 | 
25 |   def test_compute_relative_distance_matrix_output_shape(self):
26 |     output = positional_encodings._compute_relative_distance_matrix(33, 97)
27 |     self.assertListEqual(output.get_shape().as_list(), [33, 97])
28 | 
29 |   def test_relative_positional_encoding_output_shape(self):
30 |     layer = positional_encodings.RelativePositionalEncoding(
31 |         33, 97, 32, 'rpe')
32 |     output = layer(None)
33 |     self.assertListEqual(output.get_shape().as_list(), [33, 97, 32])
34 | 
35 |   def test_add_absolute_positional_encoding_1d_output_shape(self):
36 |     layer = positional_encodings.AddAbsolutePositionalEncoding(
37 |         'ape1d', positional_encoding_type='1d')
38 |     shape = [2, 5, 5, 3]
39 |     output = layer(tf.zeros(shape))
40 |     self.assertEqual(len(layer.get_weights()), 10)
41 |     self.assertListEqual(output.get_shape().as_list(), shape)
42 | 
43 |   def test_add_absolute_positional_encoding_2d_output_shape(self):
44 |     layer = positional_encodings.AddAbsolutePositionalEncoding(
45 |         'ape2d', positional_encoding_type='2d')
46 |     shape = [2, 5, 5, 3]
47 |     output = layer(tf.zeros(shape))
48 |     self.assertEqual(len(layer.get_weights()), 5)
49 |     self.assertListEqual(output.get_shape().as_list(), shape)
50 | 
51 |   def test_add_absolute_positional_encoding_none_output_shape(self):
52 |     layer = positional_encodings.AddAbsolutePositionalEncoding(
53 |         'none', positional_encoding_type='none')
54 |     shape = [2, 5, 5, 3]
55 |     output = layer(tf.zeros(shape))
56 |     self.assertEqual(len(layer.get_weights()), 0)
57 |     self.assertListEqual(output.get_shape().as_list(), shape)
58 | 
59 | if __name__ == '__main__':
60 |   tf.test.main()
61 | 


--------------------------------------------------------------------------------
/model/layers/resized_fuse_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for resized_fuse."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import resized_fuse
21 | 
22 | 
23 | class ResizedFuseTest(tf.test.TestCase):
24 | 
25 |   def test_resize_and_fuse_features(self):
26 |     batch, height, width, channels = 2, 11, 11, 6
27 |     smaller_height, smaller_width, smaller_channels = 6, 6, 3
28 |     larger_height1, larger_width1 = 21, 21  # Stride 2 conv.
29 |     larger_height2, larger_width2 = 22, 22  # Stride 2 conv.
30 |     larger_height3, larger_width3 = 23, 23  # Conv and resize.
31 | 
32 |     feature_list = []
33 |     feature_list.append(tf.zeros([batch, smaller_height, smaller_width,
34 |                                   smaller_channels]))
35 |     feature_list.append(tf.zeros([batch, smaller_height, smaller_width,
36 |                                   channels]))
37 |     feature_list.append(tf.zeros([batch, height, width, smaller_channels]))
38 |     feature_list.append(tf.zeros([batch, height, width, channels]))
39 |     feature_list.append(tf.zeros([batch, larger_height1, larger_width1,
40 |                                   channels]))
41 |     feature_list.append(tf.zeros([batch, larger_height1, larger_width1,
42 |                                   smaller_channels]))
43 |     feature_list.append(tf.zeros([batch, larger_height2, larger_width2,
44 |                                   smaller_channels]))
45 |     feature_list.append(tf.zeros([batch, larger_height3, larger_width3,
46 |                                   smaller_channels]))
47 |     layer = resized_fuse.ResizedFuse(name='fuse',
48 |                                      height=height,
49 |                                      width=width,
50 |                                      num_channels=channels)
51 |     output = layer(feature_list)
52 |     self.assertEqual(output.get_shape().as_list(), [batch, height, width,
53 |                                                     channels])
54 | 
55 | if __name__ == '__main__':
56 |   tf.test.main()
57 | 


--------------------------------------------------------------------------------
/model/layers/squeeze_and_excite_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for squeeze_and_excite.py."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.layers import squeeze_and_excite
21 | 
22 | 
23 | class SqueezeAndExciteTest(tf.test.TestCase):
24 | 
25 |   def test_simpliefied_squeeze_and_excite_input_output_shape(self):
26 |     # Test the shape of input and output of SimplifiedSqueezeAndExcite.
27 |     channels = 32
28 |     input_tensor = tf.random.uniform(shape=(3, 65, 65, channels))
29 |     layer_op = squeeze_and_excite.SimplifiedSqueezeAndExcite(
30 |         channels)
31 |     output_tensor = layer_op(input_tensor)
32 |     self.assertListEqual(input_tensor.get_shape().as_list(),
33 |                          output_tensor.get_shape().as_list())
34 | 
35 |   def test_squeeze_and_excite_input_output_shape(self):
36 |     # Test the shape of input and output of SqueezeAndExcite.
37 |     channels = 32
38 |     input_tensor = tf.random.uniform(shape=(3, 65, 65, channels))
39 |     layer_op = squeeze_and_excite.SqueezeAndExcite(
40 |         in_filters=channels,
41 |         out_filters=channels,
42 |         se_ratio=8,
43 |         name='se')
44 |     output_tensor = layer_op(input_tensor)
45 |     self.assertListEqual(input_tensor.get_shape().as_list(),
46 |                          output_tensor.get_shape().as_list())
47 | 
48 | 
49 | if __name__ == '__main__':
50 |   tf.test.main()
51 | 


--------------------------------------------------------------------------------
/model/layers/stems.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """This script contains STEMs for neural networks.
 17 | 
 18 | The `STEM` is defined as the first few convolutions that process the input
 19 | image to a spatially smaller feature map (e.g., output stride = 2).
 20 | 
 21 | 
 22 | Reference code:
 23 | https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
 24 | """
 25 | import tensorflow as tf
 26 | 
 27 | from deeplab2.model.layers import convolutions
 28 | 
 29 | layers = tf.keras.layers
 30 | 
 31 | 
 32 | class InceptionSTEM(tf.keras.layers.Layer):
 33 |   """A InceptionSTEM layer.
 34 | 
 35 |   This class builds an InceptionSTEM layer which can be used to as the first
 36 |   few layers in a neural network. In particular, InceptionSTEM contains three
 37 |   consecutive 3x3 colutions.
 38 | 
 39 |   Reference:
 40 |   - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, and Alexander Alemi.
 41 |     "Inception-v4, inception-resnet and the impact of residual connections on
 42 |     learning." In AAAI, 2017.
 43 |   """
 44 | 
 45 |   def __init__(self,
 46 |                bn_layer=tf.keras.layers.BatchNormalization,
 47 |                width_multiplier=1.0,
 48 |                conv_kernel_weight_decay=0.0,
 49 |                activation='relu'):
 50 |     """Creates the InceptionSTEM layer.
 51 | 
 52 |     Args:
 53 |       bn_layer: An optional tf.keras.layers.Layer that computes the
 54 |         normalization (default: tf.keras.layers.BatchNormalization).
 55 |       width_multiplier: A float multiplier, controlling the value of
 56 |         convolution output channels.
 57 |       conv_kernel_weight_decay: A float, the weight decay for convolution
 58 |         kernels.
 59 |       activation: A string specifying an activation function to be used in this
 60 |         stem.
 61 |     """
 62 |     super(InceptionSTEM, self).__init__(name='stem')
 63 | 
 64 |     self._conv1_bn_act = convolutions.Conv2DSame(
 65 |         output_channels=int(64 * width_multiplier),
 66 |         kernel_size=3,
 67 |         name='conv1_bn_act',
 68 |         strides=2,
 69 |         use_bias=False,
 70 |         use_bn=True,
 71 |         bn_layer=bn_layer,
 72 |         activation=activation,
 73 |         conv_kernel_weight_decay=conv_kernel_weight_decay)
 74 | 
 75 |     self._conv2_bn_act = convolutions.Conv2DSame(
 76 |         output_channels=int(64 * width_multiplier),
 77 |         kernel_size=3,
 78 |         name='conv2_bn_act',
 79 |         strides=1,
 80 |         use_bias=False,
 81 |         use_bn=True,
 82 |         bn_layer=bn_layer,
 83 |         activation=activation,
 84 |         conv_kernel_weight_decay=conv_kernel_weight_decay)
 85 | 
 86 |     self._conv3_bn = convolutions.Conv2DSame(
 87 |         output_channels=int(128 * width_multiplier),
 88 |         kernel_size=3,
 89 |         strides=1,
 90 |         use_bias=False,
 91 |         use_bn=True,
 92 |         bn_layer=bn_layer,
 93 |         activation='none',
 94 |         name='conv3_bn',
 95 |         conv_kernel_weight_decay=conv_kernel_weight_decay)
 96 | 
 97 |   def call(self, input_tensor, training=False):
 98 |     """Performs a forward pass.
 99 | 
100 |     Args:
101 |       input_tensor: An input tensor of type tf.Tensor with shape [batch, height,
102 |         width, channels].
103 |       training: A boolean flag indicating whether training behavior should be
104 |         used (default: False).
105 | 
106 |     Returns:
107 |       Two output tensors. The first output tensor is not activated. The second
108 |         tensor is activated.
109 |     """
110 |     x = self._conv1_bn_act(input_tensor, training=training)
111 |     x = self._conv2_bn_act(x, training=training)
112 |     x = self._conv3_bn(x, training=training)
113 |     return x
114 | 


--------------------------------------------------------------------------------
/model/layers/stems_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for resnet_utils."""
17 | import tensorflow as tf
18 | 
19 | from deeplab2.model.layers import stems
20 | from deeplab2.utils import test_utils
21 | 
22 | 
23 | class ResnetUtilsTest(tf.test.TestCase):
24 | 
25 |   def test_inception_stem_output_shape(self):
26 |     batch = 2
27 |     height, width = 65, 65
28 |     input_tensor = test_utils.create_test_input(batch, height, width, 3)
29 |     model = stems.InceptionSTEM()
30 |     output_tensor = model(input_tensor)
31 |     expected_height = (height - 1) / 2 + 1
32 |     expected_width = (width - 1) / 2 + 1
33 |     expected_channels = 128
34 |     self.assertListEqual(
35 |         output_tensor.get_shape().as_list(),
36 |         [batch, expected_height, expected_width, expected_channels])
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   tf.test.main()
41 | 


--------------------------------------------------------------------------------
/model/pixel_decoder/kmax_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for kMaX pixel decoder."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.pixel_decoder import kmax
21 | 
22 | 
23 | class KMaXPixelDecoderTest(tf.test.TestCase):
24 | 
25 |   def test_model_output_shape(self):
26 |     model = kmax.KMaXPixelDecoder(name='kmax_pixel_decoder')
27 |     output = model({
28 |         'stage1': tf.keras.Input(shape=(321, 321, 64)),
29 |         'stage2': tf.keras.Input(shape=(161, 161, 128)),
30 |         'stage3': tf.keras.Input(shape=(81, 81, 256)),
31 |         'stage4': tf.keras.Input(shape=(41, 41, 512)),
32 |         'stage5': tf.keras.Input(shape=(21, 21, 1024)),
33 |     })
34 | 
35 |     self.assertListEqual(output['decoder_stage1'].get_shape().as_list(),
36 |                          [None, 21, 21, 2048])
37 |     self.assertListEqual(output['decoder_stage2'].get_shape().as_list(),
38 |                          [None, 41, 41, 1024])
39 |     self.assertListEqual(output['decoder_stage3'].get_shape().as_list(),
40 |                          [None, 81, 81, 512])
41 |     self.assertListEqual(output['decoder_output'].get_shape().as_list(),
42 |                          [None, 161, 161, 256])
43 | 
44 | 
45 | if __name__ == '__main__':
46 |   tf.test.main()
47 | 


--------------------------------------------------------------------------------
/model/pixel_encoder/axial_resnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Implements ResNets[1] and Axial-ResNets [2, 3] as pixel encoders.
17 | 
18 | [1] Deep residual learning for image recognition.
19 |     CVPR 2016.
20 |       Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
21 | 
22 | [2] Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
23 |     ECCV 2020.
24 |       Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
25 |       Liang-Chieh Chen.
26 | 
27 | [3] MaX-DeepLab: End-to-End Panoptic Segmentation with Mask Transformers,
28 |     CVPR 2021.
29 |       Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
30 | """
31 | 
32 | import functools
33 | 
34 | import tensorflow as tf
35 | 
36 | from deeplab2.model.encoder import axial_resnet
37 | 
38 | resnet50 = functools.partial(
39 |     axial_resnet.AxialResNet,
40 |     output_stride=32,
41 |     classification_mode=True,
42 |     backbone_type="resnet",
43 |     use_axial_beyond_stride=0,
44 |     backbone_use_transformer_beyond_stride=0,
45 |     activation="relu")
46 | 
47 | # This is the same backbone as MaX-S, which uses Inception Stem and
48 | # incorporates Axial-Attention in the last two stages of ResNet-50.
49 | axial_resnet50 = functools.partial(
50 |     axial_resnet.AxialResNet,
51 |     output_stride=32,
52 |     classification_mode=True,
53 |     backbone_type="resnet_beta",
54 |     use_axial_beyond_stride=16,
55 |     backbone_use_transformer_beyond_stride=0,
56 |     activation="gelu")
57 | 
58 | 
59 | def get_model(model_name, input_shape, drop_path_keep_prob=1.0, **kwargs):
60 |   """Gets an (Axial-)ResNet model."""
61 |   block_group_config = {
62 |       "drop_path_schedule": "linear",
63 |       "drop_path_keep_prob": drop_path_keep_prob
64 |   }
65 |   model_name = model_name.lower()
66 |   if model_name == "resnet50":
67 |     model = resnet50(
68 |         name=model_name, block_group_config=block_group_config, **kwargs)
69 |   elif model_name == "axial_resnet50":
70 |     model = axial_resnet50(
71 |         name=model_name, block_group_config=block_group_config, **kwargs)
72 |   else:
73 |     raise ValueError("Unsupported backbone %s!" % model_name)
74 | 
75 |   # Build the model.
76 |   model(tf.keras.Input(shape=input_shape))
77 | 
78 |   return model
79 | 


--------------------------------------------------------------------------------
/model/pixel_encoder/axial_resnet_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for (Axial-)ResNets."""
17 | 
18 | 
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | 
23 | from deeplab2.model.pixel_encoder import axial_resnet
24 | 
25 | 
26 | class AxialResNetTest(tf.test.TestCase, parameterized.TestCase):
27 | 
28 |   # The parameter count does not include the classification head.
29 |   @parameterized.parameters(
30 |       ('resnet50', 23508032),
31 |       ('axial_resnet50', 41343424),
32 |   )
33 |   def test_model_output_shape_and_num_params(self, model_name,
34 |                                              expected_num_params):
35 |     model = axial_resnet.get_model(model_name,
36 |                                    input_shape=(224, 224, 3))
37 |     output = model(tf.keras.Input(shape=(224, 224, 3)))
38 | 
39 |     if model_name == 'resnet50':
40 |       dims = [64, 256, 512, 1024, 2048]
41 |     elif model_name == 'axial_resnet50':
42 |       dims = [128, 256, 512, 1024, 2048]
43 | 
44 |     self.assertListEqual(output['stage1'].get_shape().as_list(),
45 |                          [None, 112, 112, dims[0]])
46 |     self.assertListEqual(output['stage2'].get_shape().as_list(),
47 |                          [None, 56, 56, dims[1]])
48 |     self.assertListEqual(output['stage3'].get_shape().as_list(),
49 |                          [None, 28, 28, dims[2]])
50 |     self.assertListEqual(output['stage4'].get_shape().as_list(),
51 |                          [None, 14, 14, dims[3]])
52 |     self.assertListEqual(output['stage5'].get_shape().as_list(),
53 |                          [None, 7, 7, dims[4]])
54 | 
55 |     num_params = np.sum(
56 |         [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
57 |     self.assertEqual(num_params, expected_num_params)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |   tf.test.main()
62 | 


--------------------------------------------------------------------------------
/model/pixel_encoder/convnext_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for ConvNeXt."""
17 | 
18 | 
19 | from absl.testing import parameterized
20 | import numpy as np
21 | import tensorflow as tf
22 | 
23 | from deeplab2.model.pixel_encoder import convnext
24 | 
25 | 
26 | class ConvNeXtTest(tf.test.TestCase, parameterized.TestCase):
27 | 
28 |   # The parameter count does not include the classification head.
29 |   @parameterized.parameters(
30 |       ('convnext_tiny', 27818592),
31 |       ('convnext_small', 49453152),
32 |       ('convnext_base', 87564416),
33 |       ('convnext_large', 196227264),
34 |       ('convnext_xlarge', 348143872),
35 |   )
36 |   def test_model_output_shape_and_num_params(self, model_name,
37 |                                              expected_num_params):
38 |     model = convnext.get_model(model_name,
39 |                                input_shape=(224, 224, 3))
40 |     output = model(tf.keras.Input(shape=(224, 224, 3)))
41 | 
42 |     if model_name.lower() in ['convnext_tiny', 'convnext_small']:
43 |       dims = [96, 192, 384, 768]
44 |     elif model_name.lower() in ['convnext_base',]:
45 |       dims = [128, 256, 512, 1024]
46 |     elif model_name.lower() in ['convnext_large',]:
47 |       dims = [192, 384, 768, 1536]
48 |     elif model_name.lower() in ['convnext_xlarge',]:
49 |       dims = [256, 512, 1024, 2048]
50 | 
51 |     self.assertListEqual(output['stage1'].get_shape().as_list(),
52 |                          [None, 56, 56, dims[0]])
53 |     self.assertListEqual(output['stage2'].get_shape().as_list(),
54 |                          [None, 56, 56, dims[0]])
55 |     self.assertListEqual(output['stage3'].get_shape().as_list(),
56 |                          [None, 28, 28, dims[1]])
57 |     self.assertListEqual(output['stage4'].get_shape().as_list(),
58 |                          [None, 14, 14, dims[2]])
59 |     self.assertListEqual(output['stage5'].get_shape().as_list(),
60 |                          [None, 7, 7, dims[3]])
61 | 
62 |     num_params = np.sum(
63 |         [np.prod(v.get_shape().as_list()) for v in model.trainable_weights])
64 |     self.assertEqual(num_params, expected_num_params)
65 | 
66 |   @parameterized.parameters(
67 |       ('convnext_tiny', 224, 4383527995),
68 |       ('convnext_small', 224, 8563618819),
69 |       ('convnext_base', 224, 15194596739),
70 |       ('convnext_large', 224, 34121222275),
71 |       ('convnext_xlarge', 224, 60600740739),
72 |       )
73 |   def test_model_flops(self,
74 |                        model_name,
75 |                        input_resolution,
76 |                        expected_multiply_adds):
77 |     input_shape = [1, input_resolution, input_resolution, 3]
78 |     model = convnext.get_model(model_name,
79 |                                input_shape=input_shape[1:])
80 |     model(tf.keras.Input(shape=input_shape[1:]))
81 | 
82 |     forward_pass = tf.function(
83 |         model.call,
84 |         input_signature=[tf.TensorSpec(shape=input_shape)])
85 | 
86 |     graph_info = tf.compat.v1.profiler.profile(
87 |         forward_pass.get_concrete_function().graph,
88 |         options=tf.compat.v1.profiler.ProfileOptionBuilder.float_operation())
89 |     multiply_adds = graph_info.total_float_ops // 2
90 |     self.assertEqual(multiply_adds, expected_multiply_adds)
91 | 
92 | if __name__ == '__main__':
93 |   tf.test.main()
94 | 


--------------------------------------------------------------------------------
/model/post_processor/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/model/post_processor/post_processor_builder.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """This file contains a post-processor builder used in the DeepLab model."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2 import common
21 | from deeplab2 import config_pb2
22 | from deeplab2.data import dataset
23 | from deeplab2.model import utils
24 | from deeplab2.model.post_processor import max_deeplab
25 | from deeplab2.model.post_processor import panoptic_deeplab
26 | 
27 | 
28 | def get_post_processor(
29 |     config: config_pb2.ExperimentOptions,
30 |     dataset_descriptor: dataset.DatasetDescriptor) -> tf.keras.layers.Layer:
31 |   """Initializes a DeepLab post-processor.
32 | 
33 |   Args:
34 |     config: A config_pb2.ExperimentOptions configuration.
35 |     dataset_descriptor: A dataset.DatasetDescriptor.
36 | 
37 |   Returns:
38 |     PostProcessor: A post-processor depending on the configuration.
39 |   """
40 |   supported_tasks = utils.get_supported_tasks(config)
41 |   if config.model_options.WhichOneof('meta_architecture') == 'max_deeplab':
42 |     return max_deeplab.PostProcessor(config, dataset_descriptor)
43 |   if common.TASK_PANOPTIC_SEGMENTATION in supported_tasks:
44 |     return panoptic_deeplab.PostProcessor(config, dataset_descriptor)
45 |   return panoptic_deeplab.SemanticOnlyPostProcessor()
46 | 


--------------------------------------------------------------------------------
/model/post_processor/post_processor_builder_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for post_processor_builder.py."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from google.protobuf import text_format
21 | from deeplab2 import common
22 | from deeplab2 import config_pb2
23 | from deeplab2.data import dataset
24 | from deeplab2.model.post_processor import post_processor_builder
25 | 
26 | 
27 | class EvaluatorTest(tf.test.TestCase):
28 | 
29 |   def test_evaluates_panoptic_deeplab_model(self):
30 |     experiment_options_textproto = """
31 |       experiment_name: "evaluation_test"
32 |       eval_dataset_options {
33 |         dataset: "cityscapes_panoptic"
34 |         file_pattern: "EMPTY"
35 |         batch_size: 1
36 |         crop_size: 1025
37 |         crop_size: 2049
38 |         # Skip resizing.
39 |         min_resize_value: 0
40 |         max_resize_value: 0
41 |       }
42 |       evaluator_options {
43 |         continuous_eval_timeout: -1
44 |         stuff_area_limit: 2048
45 |         center_score_threshold: 0.1
46 |         nms_kernel: 13
47 |         save_predictions: true
48 |         save_raw_predictions: false
49 |       }
50 |     """
51 |     config = text_format.Parse(experiment_options_textproto,
52 |                                config_pb2.ExperimentOptions())
53 |     config.model_options.panoptic_deeplab.instance.enable = True
54 |     post_processor = post_processor_builder.get_post_processor(
55 |         config, dataset.CITYSCAPES_PANOPTIC_INFORMATION)
56 | 
57 |     result_dict = {
58 |         common.PRED_SEMANTIC_PROBS_KEY:
59 |             tf.zeros([1, 1025, 2049, 19], dtype=tf.float32),
60 |         common.PRED_CENTER_HEATMAP_KEY:
61 |             tf.zeros([1, 1025, 2049, 1], dtype=tf.float32),
62 |         common.PRED_OFFSET_MAP_KEY:
63 |             tf.zeros([1, 1025, 2049, 2], dtype=tf.float32)
64 |     }
65 |     processed_dict = post_processor(result_dict)
66 |     expected_keys = {
67 |         common.PRED_PANOPTIC_KEY,
68 |         common.PRED_SEMANTIC_KEY,
69 |         common.PRED_INSTANCE_KEY,
70 |         common.PRED_INSTANCE_CENTER_KEY,
71 |         common.PRED_INSTANCE_SCORES_KEY
72 |     }
73 |     self.assertCountEqual(processed_dict.keys(), expected_keys)
74 | 
75 | 
76 | if __name__ == '__main__':
77 |   tf.test.main()
78 | 


--------------------------------------------------------------------------------
/model/post_processor/vip_deeplab_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Test for vip_deeplab.py."""
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model.post_processor import vip_deeplab
21 | 
22 | 
23 | class PostProcessingTest(tf.test.TestCase):
24 | 
25 |   def test_stitch_video_panoptic_prediction(self):
26 |     concat_semantic = np.array(
27 |         [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
28 |         dtype=np.int32)
29 |     concat_instance = np.array(
30 |         [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
31 |         dtype=np.int32)
32 |     next_semantic = np.array(
33 |         [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
34 |         dtype=np.int32)
35 |     next_instance = np.array(
36 |         [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]],
37 |         dtype=np.int32)
38 |     label_divisor = 1000
39 |     concat_panoptic = concat_semantic * label_divisor + concat_instance
40 |     next_panoptic = next_semantic * label_divisor + next_instance
41 |     new_panoptic = vip_deeplab.stitch_video_panoptic_prediction(
42 |         concat_panoptic, next_panoptic, label_divisor)
43 |     # The expected instance is manually computed. It should receive the IDs
44 |     # propagated from concat_instance by IoU matching between concat_panoptic
45 |     # and next_panoptic.
46 |     expected_semantic = next_semantic
47 |     expected_instance = np.array(
48 |         [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
49 |         dtype=np.int32)
50 |     expected_panoptic = expected_semantic * label_divisor + expected_instance
51 |     np.testing.assert_array_equal(expected_panoptic, new_panoptic)
52 | 
53 |   def test_tf_video_panoptic_prediction_stitcher(self):
54 |     concat_semantic = np.array(
55 |         [[[0, 0, 0, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
56 |         dtype=np.int32)
57 |     concat_instance = np.array(
58 |         [[[1, 1, 2, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
59 |         dtype=np.int32)
60 |     next_semantic = np.array(
61 |         [[[0, 1, 1, 0], [0, 1, 1, 0], [0, 2, 2, 0], [2, 2, 3, 3]]],
62 |         dtype=np.int32)
63 |     next_instance = np.array(
64 |         [[[2, 0, 0, 1], [2, 0, 0, 1], [2, 4, 4, 1], [5, 5, 3, 3]]],
65 |         dtype=np.int32)
66 |     label_divisor = 1000
67 |     concat_panoptic = concat_semantic * label_divisor + concat_instance
68 |     next_panoptic = next_semantic * label_divisor + next_instance
69 |     stitcher = vip_deeplab.VideoPanopticPredictionStitcher(label_divisor)
70 |     new_panoptic = stitcher(
71 |         tf.convert_to_tensor(concat_panoptic),
72 |         tf.convert_to_tensor(next_panoptic)).numpy()
73 |     # The expected instance is manually computed. It should receive the IDs
74 |     # propagated from concat_instance by IoU matching between concat_panoptic
75 |     # and next_panoptic.
76 |     expected_semantic = next_semantic
77 |     expected_instance = np.array(
78 |         [[[1, 0, 0, 2], [1, 0, 0, 2], [1, 1, 1, 2], [2, 2, 1, 1]]],
79 |         dtype=np.int32)
80 |     expected_panoptic = expected_semantic * label_divisor + expected_instance
81 |     np.testing.assert_array_equal(expected_panoptic, new_panoptic)
82 | 
83 | 
84 | if __name__ == '__main__':
85 |   tf.test.main()
86 | 


--------------------------------------------------------------------------------
/model/test_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """This file contains utility functions for the model tests."""
17 | import numpy as np
18 | import tensorflow as tf
19 | 
20 | 
21 | def create_test_input(batch, height, width, channels):
22 |   """Creates test input tensor."""
23 |   input_tensor = np.tile(
24 |       np.reshape(
25 |           np.reshape(np.arange(height), [height, 1]) +
26 |           np.reshape(np.arange(width), [1, width]),
27 |           [1, height, width, 1]),
28 |       [batch, 1, 1, channels])
29 |   # Normalize the input tensor so that the outputs are not too large.
30 |   input_tensor = (input_tensor * 2 / np.max(input_tensor)) - 1
31 |   return tf.cast(input_tensor, tf.float32)
32 | 


--------------------------------------------------------------------------------
/model/test_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for test_utils."""
17 | 
18 | import tensorflow as tf
19 | 
20 | from deeplab2.model import test_utils
21 | 
22 | 
23 | class TestUtilsTest(tf.test.TestCase):
24 | 
25 |   def test_create_test_input(self):
26 |     input_shape = [1, 2, 3, 4]
27 |     input_tensor = test_utils.create_test_input(*input_shape)
28 |     self.assertListEqual(input_tensor.get_shape().as_list(), input_shape)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |   tf.test.main()
33 | 


--------------------------------------------------------------------------------
/model/transformer_decoder/kmax_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for kMaX transformer decoder."""
17 | 
18 | import functools
19 | import os
20 | 
21 | import tensorflow as tf
22 | 
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.data import dataset
26 | from deeplab2.model import builder
27 | from deeplab2.model.transformer_decoder import kmax
28 | # resources dependency
29 | 
30 | 
31 | class KMaXTransformerDecoderTest(tf.test.TestCase):
32 | 
33 |   def test_model_output_shape(self):
34 |     config_path = 'deeplab2/configs/example'
35 |     def _read_proto_file(filename, proto):
36 |       filename = filename  # OSS: removed internal filename loading.
37 |       with tf.io.gfile.GFile(filename, 'r') as proto_file:
38 |         return text_format.ParseLines(proto_file, proto)
39 |     proto_filename = os.path.join(config_path,
40 |                                   'example_coco_max_deeplab.textproto')
41 |     config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
42 |     dataset_descriptor = dataset.MAP_NAME_TO_DATASET_INFO['coco_panoptic']
43 |     auxiliary_predictor_func = functools.partial(
44 |         builder.create_decoder,
45 |         model_options=config.model_options,
46 |         bn_layer=tf.keras.layers.BatchNormalization,
47 |         ignore_label=dataset_descriptor.ignore_label,
48 |         use_auxiliary_semantic_head=False)
49 | 
50 |     model = kmax.KMaXTransformerDecoder(
51 |         name='kmax_pixel_decoder',
52 |         auxiliary_predictor_func=auxiliary_predictor_func)
53 |     output = model({
54 |         'stage1': tf.keras.Input(shape=(321, 321, 64)),
55 |         'stage2': tf.keras.Input(shape=(161, 161, 128)),
56 |         'stage3': tf.keras.Input(shape=(81, 81, 256)),
57 |         'stage4': tf.keras.Input(shape=(41, 41, 512)),
58 |         'stage5': tf.keras.Input(shape=(21, 21, 1024)),
59 |         'decoder_stage1': tf.keras.Input(shape=(21, 21, 2048)),
60 |         'decoder_stage2': tf.keras.Input(shape=(41, 41, 1024)),
61 |         'decoder_stage3': tf.keras.Input(shape=(81, 81, 512)),
62 |         'decoder_output': tf.keras.Input(shape=(161, 161, 256)),
63 |     })
64 | 
65 |     self.assertListEqual(
66 |         output['transformer_class_feature'].get_shape().as_list(),
67 |         [None, 128, 256])
68 |     self.assertListEqual(
69 |         output['transformer_mask_feature'].get_shape().as_list(),
70 |         [None, 128, 256])
71 |     self.assertListEqual(output['feature_panoptic'].get_shape().as_list(),
72 |                          [None, 161, 161, 256])
73 |     self.assertListEqual(output['feature_semantic'].get_shape().as_list(),
74 |                          [None, 21, 21, 1024])
75 | 
76 | 
77 | if __name__ == '__main__':
78 |   tf.test.main()
79 | 


--------------------------------------------------------------------------------
/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 The Deeplab2 Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include /*third_party*/"tensorflow/core/framework/op.h"
16 | #include /*third_party*/"tensorflow/core/framework/shape_inference.h"
17 | 
18 | namespace tensorflow_models {
19 | namespace deeplab {
20 | namespace deeplab2 {
21 | 
22 | using tensorflow::shape_inference::DimensionHandle;
23 | using tensorflow::shape_inference::InferenceContext;
24 | using tensorflow::shape_inference::ShapeHandle;
25 | 
26 | REGISTER_OP("MergeSemanticAndInstanceMaps")
27 |     .Input("semantic_maps: int32")
28 |     .Input("instance_maps: int32")
29 |     .Input("thing_ids: int32")
30 |     .Attr("label_divisor: int = 256")
31 |     .Attr("stuff_area_limit: int = 0")
32 |     .Attr("void_label: int = 0")
33 |     .Output("parsing_maps: int32")
34 |     .SetShapeFn([](InferenceContext* c) {
35 |       ShapeHandle semantic_maps;
36 |       ShapeHandle instance_maps;
37 |       ShapeHandle thing_ids;
38 |       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &semantic_maps));
39 |       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &instance_maps));
40 |       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &thing_ids));
41 |       DimensionHandle batch = c->Dim(semantic_maps, 0);
42 |       DimensionHandle height = c->Dim(semantic_maps, 1);
43 |       DimensionHandle width = c->Dim(semantic_maps, 2);
44 |       c->set_output(0, c->MakeShape({batch, height, width}));
45 |       return tensorflow::OkStatus();
46 |     })
47 |     .Doc(R"doc(
48 | Generates parsing maps from semantic maps and instance maps.
49 | 
50 | Parsing maps, or panoptic segmentation, are merged from the predicted semantic
51 | maps and class-agnostic instance maps. This function merges the maps in the
52 | following way:
53 | 
54 | 1) If a pixel belongs to `stuff` class (e.g., sky), the function directly uses
55 |   the semantic label from the semantic map and uses 0 as the instance label.
56 | 2) If a pixel belongs to `thing` class (e.g., person), it uses the instance
57 |   label from the instance map and uses the majority of the semantic labels of
58 |   the same instance as the final semantic label.
59 | 3) The function relabels each instance, so that the instance label of each
60 |   semantic class is in the range of [1, num_instances_of_the_semantic_class].
61 | 
62 | Note that this operation is first poposed in the DeeperLab paper and adopted
63 | by the Panoptic-DeepLab framework.
64 |   - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093.
65 |   - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020.
66 | 
67 | semantic_maps: An int32 Tensor with shape `[batch, height, width]` whose value
68 |   indicates the predicted semantic label of each pixel.
69 | instance_maps: An int32 Tensor with shape `[batch, height, width]` whose value
70 |   indicates the predicted instance label of each pixel.
71 | thing_ids: An int32 Tensor with shape `[num_thing_ids]` whose value refers to
72 |   the semantic ids of the thing classes.
73 | label_divisor: An integer. The value used to combine the semantic and instance
74 |   map to generate the parsing map. In particular, the value of a pixel in the
75 |   parsing map is equal to its corresponding semantic label times label_divisor
76 |   plus instance label (i.e., semantic_label * label_divisor + instance_label).
77 | stuff_area_limit: An integer. Predicted stuff segments whose areas are smaller
78 |   than this threshold are assigned to VOID label.
79 | void_label: An integer, specifying the VOID label.
80 | parsing_maps: An int32 Tensor with shape `[batch, height, width]` whose value
81 |   indicates the merged semantic and instance label of each pixel.
82 | )doc");
83 | 
84 | }  // namespace deeplab2
85 | }  // namespace deeplab
86 | }  // namespace tensorflow_models
87 | 


--------------------------------------------------------------------------------
/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op_kernel.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 The Deeplab2 Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
16 | #define DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
17 | #include <stdint.h>
18 | 
19 | #include <unordered_set>
20 | 
21 | #include /*third_party*/"tensorflow/core/framework/numeric_types.h"
22 | #include /*third_party*/"tensorflow/core/framework/op_kernel.h"
23 | #include /*third_party*/"tensorflow/core/framework/tensor.h"
24 | #include /*third_party*/"tensorflow/core/framework/tensor_types.h"
25 | 
26 | namespace tensorflow_models {
27 | namespace deeplab {
28 | namespace deeplab2 {
29 | namespace functor {
30 | 
31 | template <typename Device>
32 | struct MergeSemanticAndInstanceMaps {
33 |   // Functor that merges semantic and instance maps.
34 |   void operator()(
35 |       const Device& d,
36 |       typename tensorflow::TTypes<int32_t, 3>::ConstTensor semantic_maps,
37 |       typename tensorflow::TTypes<int32_t, 3>::ConstTensor instance_maps,
38 |       const std::unordered_set<int32_t>& thing_ids_set, int label_divisor,
39 |       int stuff_area_limit, int void_label,
40 |       typename tensorflow::TTypes<int32_t, 3>::Tensor parsing_maps);
41 | };
42 | 
43 | // Helper method to convert a list of thing IDs into hashset.
44 | template <typename Device>
45 | std::unordered_set<int32_t> Convert1DInt32TensorToSet(
46 |     const Device& d, const tensorflow::Tensor& tensor);
47 | 
48 | }  // namespace functor
49 | }  // namespace deeplab2
50 | }  // namespace deeplab
51 | }  // namespace tensorflow_models
52 | 
53 | #endif  // DEEPLAB2_MERGE_SEMANTIC_AND_INSTANCE_MAPS_OP_KERNEL_H_
54 | 


--------------------------------------------------------------------------------
/tensorflow_ops/python/kernel_tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/tensorflow_ops/python/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/tensorflow_ops/python/ops/merge_semantic_and_instance_maps_op.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Utility function for the C++ TensorFlow MergeSemanticAndInstanceMaps op."""
17 | 
18 | import tensorflow as tf
19 | 
20 | # Make the TensorFlow MergeSemanticAndInstanceMaps op accessible by importing
21 | # merge_semantic_and_instance_maps_op.py.
22 | from tensorflow.python.framework import load_library
23 | from tensorflow.python.platform import resource_loader
24 | gen_merge_semantic_and_instance_maps_op = load_library.load_op_library(resource_loader.get_path_to_datafile('../../kernels/merge_semantic_and_instance_maps_op.so'))
25 | 
26 | merge_semantic_and_instance_maps = gen_merge_semantic_and_instance_maps_op.merge_semantic_and_instance_maps
27 | 
28 | tf.no_gradient('MergeSemanticAndInstanceMaps')
29 | 


--------------------------------------------------------------------------------
/tracker/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/tracker/optical_flow_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Utility functions for optical flow."""
17 | import cv2
18 | import numpy as np
19 | 
20 | 
21 | def warp_flow(img: np.ndarray, flow_tensor: np.ndarray) -> np.ndarray:
22 |   flow = flow_tensor.copy()
23 |   h, w = flow.shape[:2]
24 |   flow[..., 0] += np.arange(w)
25 |   flow[..., 1] += np.arange(h)[:, np.newaxis]
26 |   res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
27 |   return res
28 | 
29 | 
30 | def remove_occlusions(warped_binary_img: np.ndarray,
31 |                       occlusion_map: np.ndarray) -> np.ndarray:
32 |   return warped_binary_img.astype(bool) & (1 - occlusion_map).astype(bool)
33 | 


--------------------------------------------------------------------------------
/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/trainer/distribution_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """This file contains helper functions to run training in a distributed way."""
17 | 
18 | from typing import Text, Optional
19 | 
20 | import tensorflow as tf
21 | 
22 | 
23 | def tpu_initialize(tpu_address: Text):
24 |   """Initializes TPU for TF 2.x training.
25 | 
26 |   Args:
27 |     tpu_address: string, bns address of master TPU worker.
28 | 
29 |   Returns:
30 |     A TPUClusterResolver.
31 |   """
32 |   cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
33 |       tpu=tpu_address)
34 |   if tpu_address not in ('', 'local'):
35 |     tf.config.experimental_connect_to_cluster(cluster_resolver)
36 |   tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
37 |   return cluster_resolver
38 | 
39 | 
40 | def create_strategy(tpu_address: Optional[Text],
41 |                     num_gpus: int = 0) -> tf.distribute.Strategy:
42 |   """Creates a strategy based on the given parameters.
43 | 
44 |   The strategies are created based on the following criteria and order:
45 |   1. If A tpu_address is not None, a TPUStrategy is used.
46 |   2. If num_gpus > 1, a MirrorStrategy is used which replicates the model on
47 |     each GPU.
48 |   3. If num_gpus == 1, a OneDevice strategy is used on the GPU.
49 |   4. If num_gpus == 0, a OneDevice strategy is used on the CPU.
50 | 
51 |   Args:
52 |     tpu_address: The optional name or address of the TPU to connect to or None.
53 |     num_gpus: A non-negative integer specifying the number of GPUs.
54 | 
55 |   Returns:
56 |     A tf.distribute.Strategy.
57 | 
58 |   Raises:
59 |     ValueError: If `num_gpus` is negative and tpu_address is None.
60 |   """
61 |   if tpu_address is not None:
62 |     resolver = tpu_initialize(tpu_address)
63 |     return tf.distribute.TPUStrategy(resolver)
64 |   else:
65 |     if num_gpus < 0:
66 |       raise ValueError('`num_gpus` must not be negative.')
67 |     elif num_gpus == 0:
68 |       devices = ['device:CPU:0']
69 |     else:
70 |       devices = ['device:GPU:%d' % i for i in range(num_gpus)]
71 |     if len(devices) == 1:
72 |       return tf.distribute.OneDeviceStrategy(devices[0])
73 |     return tf.distribute.MirroredStrategy(devices)
74 | 


--------------------------------------------------------------------------------
/trainer/runner_utils_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Tests for runner_utils.py."""
17 | 
18 | import os
19 | 
20 | import numpy as np
21 | import tensorflow as tf
22 | 
23 | from google.protobuf import text_format
24 | from deeplab2 import config_pb2
25 | from deeplab2.data import dataset
26 | from deeplab2.model import deeplab
27 | from deeplab2.trainer import runner_utils
28 | # resources dependency
29 | 
30 | _CONFIG_PATH = 'deeplab2/configs/example'
31 | 
32 | 
33 | def _read_proto_file(filename, proto):
34 |   filename = filename  # OSS: removed internal filename loading.
35 |   with tf.io.gfile.GFile(filename, 'r') as proto_file:
36 |     return text_format.ParseLines(proto_file, proto)
37 | 
38 | 
39 | def _create_model_from_test_proto(file_name,
40 |                                   dataset_name='coco_panoptic'):
41 |   proto_filename = os.path.join(_CONFIG_PATH, file_name)
42 |   config = _read_proto_file(proto_filename, config_pb2.ExperimentOptions())
43 |   return deeplab.DeepLab(config,
44 |                          dataset.MAP_NAME_TO_DATASET_INFO[dataset_name]
45 |                          ), config
46 | 
47 | 
48 | class RunnerUtilsTest(tf.test.TestCase):
49 | 
50 |   def test_check_if_variable_in_backbone_with_max_deeplab(self):
51 |     model, experiment_options = _create_model_from_test_proto(
52 |         'example_coco_max_deeplab.textproto', dataset_name='coco_panoptic')
53 |     train_crop_size = tuple(
54 |         experiment_options.train_dataset_options.crop_size)
55 |     input_tensor = tf.random.uniform(
56 |         shape=(2, train_crop_size[0], train_crop_size[1], 3))
57 |     _ = model(input_tensor, training=True)
58 | 
59 |     encoder = model.checkpoint_items['encoder']
60 |     encoder_variable_names = [x.name for x in encoder.trainable_variables]
61 |     encoder_name = experiment_options.model_options.backbone.name
62 | 
63 |     num_backbone_params = 0
64 |     backbone_optimizer_inputs = []
65 |     for variable in model.trainable_weights:
66 |       if runner_utils.check_if_variable_in_backbone(variable, encoder_name,
67 |                                                     encoder_variable_names):
68 |         backbone_optimizer_inputs.append(variable)
69 |         num_backbone_params += np.prod(variable.get_shape().as_list())
70 |     # The number of Tensors in the backbone. We use this number in addition to
71 |     # the number of parameters as a check of correctness.
72 |     self.assertLen(backbone_optimizer_inputs, 301)
73 |     # The same number of parameters as max_deeplab_s_backbone.
74 |     self.assertEqual(num_backbone_params, 41343424)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |   tf.test.main()
79 | 


--------------------------------------------------------------------------------
/trainer/train.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """This file contains code to run a model."""
17 | 
18 | import os
19 | from absl import app
20 | from absl import flags
21 | from absl import logging
22 | import tensorflow as tf
23 | 
24 | from google.protobuf import text_format
25 | from deeplab2 import config_pb2
26 | from deeplab2.trainer import train_lib
27 | 
28 | flags.DEFINE_enum(
29 |     'mode',
30 |     default=None,
31 |     enum_values=['train', 'eval', 'train_and_eval', 'continuous_eval'],
32 |     help='Mode to run: `train`, `eval`, `train_and_eval`, `continuous_eval`.')
33 | 
34 | flags.DEFINE_string(
35 |     'model_dir',
36 |     default=None,
37 |     help='The base directory where the model and training/evaluation summaries'
38 |     'are stored. The path will be combined with the `experiment_name` defined '
39 |     'in the config file to create a folder under which all files are stored.')
40 | 
41 | flags.DEFINE_string(
42 |     'config_file',
43 |     default=None,
44 |     help='Proto file which specifies the experiment configuration. The proto '
45 |     'definition of ExperimentOptions is specified in config.proto.')
46 | 
47 | flags.DEFINE_string(
48 |     'master',
49 |     default=None,
50 |     help='The Cloud TPU to use for training. This should be either the name '
51 |     'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 '
52 |     'url.')
53 | 
54 | flags.DEFINE_integer(
55 |     'num_gpus',
56 |     default=0,
57 |     help='The number of GPUs to use for. If `master` flag is not set, this'
58 |     'parameter specifies whether GPUs should be used and how many of them '
59 |     '(default: 0).')
60 | 
61 | FLAGS = flags.FLAGS
62 | 
63 | 
64 | def main(_):
65 |   logging.info('Reading the config file.')
66 |   with tf.io.gfile.GFile(FLAGS.config_file, 'r') as proto_file:
67 |     config = text_format.ParseLines(proto_file, config_pb2.ExperimentOptions())
68 | 
69 |   logging.info('Starting the experiment.')
70 |   combined_model_dir = os.path.join(FLAGS.model_dir, config.experiment_name)
71 |   train_lib.run_experiment(FLAGS.mode, config, combined_model_dir, FLAGS.master,
72 |                            FLAGS.num_gpus)
73 | 
74 | 
75 | if __name__ == '__main__':
76 |   app.run(main)
77 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/utils/create_images_json_for_cityscapes.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # pylint: disable=line-too-long
 17 | # pyformat: disable
 18 | r"""Creates a JSON file with info for a split of Cityscapes images.
 19 | 
 20 | This single-purpose version has special handling for the directory structure of
 21 | CityScapes dataset and the expected output ids.
 22 | 
 23 | Sample commands:
 24 | 
 25 | python create_images_json_for_cityscapes.py \
 26 |   --image_dir=${DATA_ROOT}/leftImg8bit/${IMAGES_SPLIT} \
 27 |   --output_json_path=${PATH_TO_SAVE}/${IMAGES_SPLIT}_images.json \
 28 |   --only_basename \
 29 |   --include_image_type_suffix=false
 30 | """
 31 | # pyformat: enable
 32 | # pylint: enable=line-too-long
 33 | 
 34 | from __future__ import absolute_import
 35 | from __future__ import division
 36 | from __future__ import print_function
 37 | 
 38 | import json
 39 | import os
 40 | import re
 41 | 
 42 | from absl import app
 43 | from absl import flags
 44 | 
 45 | import tensorflow as tf
 46 | 
 47 | FLAGS = flags.FLAGS
 48 | 
 49 | flags.DEFINE_string(
 50 |     'image_dir', None,
 51 |     'The top-level directory of image files to be included in the set.')
 52 | 
 53 | flags.DEFINE_list(
 54 |     'keep_cities', None,
 55 |     'Comma-separated list of strings specifying cities to be processed.')
 56 | 
 57 | flags.DEFINE_string('output_json_path', None,
 58 |                     'Output path to which is written the image info JSON.')
 59 | 
 60 | flags.DEFINE_boolean(
 61 |     'only_basename', True,
 62 |     'If set, the included "file_name" properties of the images in the JSON '
 63 |     'file will only include the base name and not the city directory. Used for '
 64 |     'tools that do not support nested directories.')
 65 | 
 66 | flags.DEFINE_boolean(
 67 |     'include_image_type_suffix', True,
 68 |     'If set, will include the suffix of the image type (e.g. "_leftImg8bit") '
 69 |     'in the "file_name" properties of the image.')
 70 | 
 71 | 
 72 | def _create_images_json(image_dir, output_json_path, only_basename=False,
 73 |                         include_image_type_suffix=True, keep_cities=None):
 74 |   """Lists the images in image_dir and writes out the info JSON for them."""
 75 |   images_info_array = []
 76 |   for city_dir in tf.io.gfile.listdir(image_dir):
 77 |     if keep_cities and city_dir not in keep_cities:
 78 |       continue
 79 |     image_id_re = r'%s_[0-9]+_[0-9]+' % city_dir
 80 |     image_id_re = re.compile(image_id_re)
 81 |     for image_basename in tf.io.gfile.listdir(
 82 |         os.path.join(image_dir, city_dir)):
 83 |       match = image_id_re.match(image_basename)
 84 |       image_id = image_basename[match.start():match.end()]
 85 |       if include_image_type_suffix:
 86 |         file_name = image_basename
 87 |       else:
 88 |         file_name = image_id + os.path.splitext(image_basename)[1]
 89 |       if not only_basename:
 90 |         file_name = os.path.join(city_dir, file_name)
 91 |       image_info_dict = {'id': image_id, 'file_name': file_name}
 92 |       images_info_array.append(image_info_dict)
 93 | 
 94 |   info_dict = {'images': images_info_array}
 95 | 
 96 |   with tf.io.gfile.GFile(output_json_path, 'w+') as json_file:
 97 |     json.dump(info_dict, json_file)
 98 | 
 99 | 
100 | def main(argv):
101 |   if len(argv) > 1:
102 |     raise app.UsageError('Too many command-line arguments.')
103 |   keep_cities = None
104 |   if FLAGS.keep_cities:
105 |     keep_cities = [str(x) for x in FLAGS.keep_cities]
106 |   _create_images_json(
107 |       FLAGS.image_dir,
108 |       FLAGS.output_json_path,
109 |       only_basename=FLAGS.only_basename,
110 |       include_image_type_suffix=FLAGS.include_image_type_suffix,
111 |       keep_cities=keep_cities)
112 | 
113 | 
114 | if __name__ == '__main__':
115 |   flags.mark_flags_as_required(['image_dir', 'output_json_path'])
116 |   app.run(main)
117 | 


--------------------------------------------------------------------------------
/utils/hparam_configs_test.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The Deeplab2 Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Tests for hparam_configs."""
 17 | 
 18 | import os
 19 | import tempfile
 20 | from absl import logging
 21 | import tensorflow.compat.v1 as tf
 22 | import yaml
 23 | from deeplab2.utils import hparam_configs
 24 | 
 25 | 
 26 | class HparamConfigsTest(tf.test.TestCase):
 27 | 
 28 |   def test_config_override(self):
 29 |     c = hparam_configs.Config({'a': 1, 'b': 2})
 30 |     self.assertEqual(c.as_dict(), {'a': 1, 'b': 2})
 31 | 
 32 |     c.update({'a': 10})
 33 |     self.assertEqual(c.as_dict(), {'a': 10, 'b': 2})
 34 | 
 35 |     c.b = 20
 36 |     self.assertEqual(c.as_dict(), {'a': 10, 'b': 20})
 37 | 
 38 |     c.override('a=true,b=ss')
 39 |     self.assertEqual(c.as_dict(), {'a': True, 'b': 'ss'})
 40 | 
 41 |     c.override('a=100,,,b=2.3,')  # Extra ',' is fine.
 42 |     self.assertEqual(c.as_dict(), {'a': 100, 'b': 2.3})
 43 | 
 44 |     c.override('a=2x3,b=50')  # a is a special format for image size.
 45 |     self.assertEqual(c.as_dict(), {'a': '2x3', 'b': 50})
 46 | 
 47 |     # Overrriding string must be in the format of xx=yy.
 48 |     with self.assertRaises(ValueError):
 49 |       c.override('a=true,invalid_string')
 50 | 
 51 |   def test_config_yaml(self):
 52 |     tmpdir = tempfile.gettempdir()
 53 |     yaml_file_path = os.path.join(tmpdir, 'x.yaml')
 54 |     with open(yaml_file_path, 'w') as f:
 55 |       f.write("""
 56 |         x: 2
 57 |         y:
 58 |           z: 'test'
 59 |       """)
 60 |     c = hparam_configs.Config(dict(x=234, y=2342))
 61 |     c.override(yaml_file_path)
 62 |     self.assertEqual(c.as_dict(), {'x': 2, 'y': {'z': 'test'}})
 63 | 
 64 |     yaml_file_path2 = os.path.join(tmpdir, 'y.yaml')
 65 |     c.save_to_yaml(yaml_file_path2)
 66 |     with open(yaml_file_path2, 'r') as f:
 67 |       config_dict = yaml.load(f, Loader=yaml.FullLoader)
 68 |     self.assertEqual(config_dict, {'x': 2, 'y': {'z': 'test'}})
 69 | 
 70 |   def test_config_override_recursive(self):
 71 |     c = hparam_configs.Config({'x': 1})
 72 |     self.assertEqual(c.as_dict(), {'x': 1})
 73 |     c.override('y.y0=2,y.y1=3', allow_new_keys=True)
 74 |     self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 2, 'y1': 3}})
 75 |     c.update({'y': {'y0': 5, 'y1': {'y11': 100}}})
 76 |     self.assertEqual(c.as_dict(), {'x': 1, 'y': {'y0': 5, 'y1': {'y11': 100}}})
 77 |     self.assertEqual(c.y.y1.y11, 100)
 78 | 
 79 |   def test_config_override_list(self):
 80 |     c = hparam_configs.Config({'x': [1.0, 2.0]})
 81 |     self.assertEqual(c.as_dict(), {'x': [1.0, 2.0]})
 82 |     c.override('x=3.0|4.0|5.0')
 83 |     self.assertEqual(c.as_dict(), {'x': [3.0, 4.0, 5.0]})
 84 | 
 85 |   def test_registry_factory(self):
 86 |     registry = hparam_configs.RegistryFactor(prefix='test:')
 87 | 
 88 |     @registry.register()  # Use class name as key in default.
 89 |     class A:
 90 |       pass
 91 | 
 92 |     @registry.register(name='special_b')  # Use name as key if name is not None.
 93 |     class B:
 94 |       pass
 95 | 
 96 |     self.assertEqual(registry.lookup('A'), A)
 97 |     self.assertEqual(registry.lookup('special_b'), B)
 98 |     with self.assertRaises(KeyError):
 99 |       registry.lookup('B')
100 | 
101 | 
102 | if __name__ == '__main__':
103 |   logging.set_verbosity(logging.WARNING)
104 |   tf.test.main()
105 | 


--------------------------------------------------------------------------------
/utils/panoptic_cityscapes_categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {"name": "road",          "id": 7,  "isthing": 0, "color": [128,  64, 128]},
 3 |   {"name": "sidewalk",      "id": 8,  "isthing": 0, "color": [244,  35, 232]},
 4 |   {"name": "building",      "id": 11, "isthing": 0, "color": [ 70,  70,  70]},
 5 |   {"name": "wall",          "id": 12, "isthing": 0, "color": [102, 102, 156]},
 6 |   {"name": "fence",         "id": 13, "isthing": 0, "color": [190, 153, 153]},
 7 |   {"name": "pole",          "id": 17, "isthing": 0, "color": [153, 153, 153]},
 8 |   {"name": "traffic light", "id": 19, "isthing": 0, "color": [250, 170,  30]},
 9 |   {"name": "traffic sign",  "id": 20, "isthing": 0, "color": [220, 220,   0]},
10 |   {"name": "vegetation",    "id": 21, "isthing": 0, "color": [107, 142,  35]},
11 |   {"name": "terrain",       "id": 22, "isthing": 0, "color": [152, 251, 152]},
12 |   {"name": "sky",           "id": 23, "isthing": 0, "color": [ 70, 130, 180]},
13 |   {"name": "person",        "id": 24, "isthing": 1, "color": [220,  20,  60]},
14 |   {"name": "rider",         "id": 25, "isthing": 1, "color": [255,   0,   0]},
15 |   {"name": "car",           "id": 26, "isthing": 1, "color": [  0,   0, 142]},
16 |   {"name": "truck",         "id": 27, "isthing": 1, "color": [  0,   0,  70]},
17 |   {"name": "bus",           "id": 28, "isthing": 1, "color": [  0,  60, 100]},
18 |   {"name": "train",         "id": 31, "isthing": 1, "color": [  0,  80, 100]},
19 |   {"name": "motorcycle",    "id": 32, "isthing": 1, "color": [  0,   0, 230]},
20 |   {"name": "bicycle",       "id": 33, "isthing": 1, "color": [119,  11,  32]}
21 | ]
22 | 


--------------------------------------------------------------------------------
/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Provide utility functions to write simple tests."""
17 | import functools
18 | 
19 | import numpy as np
20 | import tensorflow as tf
21 | 
22 | 
23 | NORMALIZATION_LAYERS = (
24 |     tf.keras.layers.experimental.SyncBatchNormalization,
25 |     tf.keras.layers.BatchNormalization
26 | )
27 | 
28 | 
29 | def create_strategy():
30 |   """Returns a strategy based on available devices.
31 | 
32 |   Does NOT work with local_multiworker_tpu_test tests!
33 |   """
34 |   tpus = tf.config.list_logical_devices(device_type='TPU')
35 |   gpus = tf.config.list_logical_devices(device_type='GPU')
36 |   if tpus:
37 |     resolver = tf.distribute.cluster_resolver.TPUClusterResolver('')
38 |     tf.config.experimental_connect_to_cluster(resolver)
39 |     tf.tpu.experimental.initialize_tpu_system(resolver)
40 |     return tf.distribute.TPUStrategy(resolver)
41 |   elif gpus:
42 |     return tf.distribute.OneDeviceStrategy('/gpu:0')
43 |   else:
44 |     return tf.distribute.OneDeviceStrategy('/cpu:0')
45 | 
46 | 
47 | def test_all_strategies(func):
48 |   """Decorator to test CPU, GPU and TPU strategies."""
49 |   @functools.wraps(func)
50 |   def decorator(self):
51 |     strategy = create_strategy()
52 |     return func(self, strategy)
53 |   return decorator
54 | 
55 | 
56 | def create_test_input(batch, height, width, channels):
57 |   """Creates test input tensor."""
58 |   return tf.convert_to_tensor(
59 |       np.tile(
60 |           np.reshape(
61 |               np.reshape(np.arange(height), [height, 1]) +
62 |               np.reshape(np.arange(width), [1, width]),
63 |               [1, height, width, 1]),
64 |           [batch, 1, 1, channels]), dtype=tf.float32)
65 | 


--------------------------------------------------------------------------------
/video/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The Deeplab2 Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------