├── LICENSE ├── README.md ├── annotator ├── canny │ └── __init__.py ├── content │ └── __init__.py ├── hed │ └── __init__.py ├── midas │ ├── __init__.py │ ├── api.py │ ├── midas │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── blocks.py │ │ ├── dpt_depth.py │ │ ├── midas_net.py │ │ ├── midas_net_custom.py │ │ ├── transforms.py │ │ └── vit.py │ └── utils.py ├── mlsd │ ├── __init__.py │ ├── models │ │ ├── mbv2_mlsd_large.py │ │ └── mbv2_mlsd_tiny.py │ └── utils.py ├── openpose │ ├── __init__.py │ ├── body.py │ ├── hand.py │ ├── model.py │ └── util.py ├── sketch │ ├── __init__.py │ └── model │ │ └── module.py ├── uniformer │ ├── __init__.py │ ├── configs │ │ └── _base_ │ │ │ ├── datasets │ │ │ ├── ade20k.py │ │ │ ├── chase_db1.py │ │ │ ├── cityscapes.py │ │ │ ├── cityscapes_769x769.py │ │ │ ├── drive.py │ │ │ ├── hrf.py │ │ │ ├── pascal_context.py │ │ │ ├── pascal_context_59.py │ │ │ ├── pascal_voc12.py │ │ │ ├── pascal_voc12_aug.py │ │ │ └── stare.py │ │ │ ├── default_runtime.py │ │ │ ├── models │ │ │ ├── ann_r50-d8.py │ │ │ ├── apcnet_r50-d8.py │ │ │ ├── ccnet_r50-d8.py │ │ │ ├── cgnet.py │ │ │ ├── danet_r50-d8.py │ │ │ ├── deeplabv3_r50-d8.py │ │ │ ├── deeplabv3_unet_s5-d16.py │ │ │ ├── deeplabv3plus_r50-d8.py │ │ │ ├── dmnet_r50-d8.py │ │ │ ├── dnl_r50-d8.py │ │ │ ├── emanet_r50-d8.py │ │ │ ├── encnet_r50-d8.py │ │ │ ├── fast_scnn.py │ │ │ ├── fcn_hr18.py │ │ │ ├── fcn_r50-d8.py │ │ │ ├── fcn_unet_s5-d16.py │ │ │ ├── fpn_r50.py │ │ │ ├── fpn_uniformer.py │ │ │ ├── gcnet_r50-d8.py │ │ │ ├── lraspp_m-v3-d8.py │ │ │ ├── nonlocal_r50-d8.py │ │ │ ├── ocrnet_hr18.py │ │ │ ├── ocrnet_r50-d8.py │ │ │ ├── pointrend_r50.py │ │ │ ├── psanet_r50-d8.py │ │ │ ├── pspnet_r50-d8.py │ │ │ ├── pspnet_unet_s5-d16.py │ │ │ ├── upernet_r50.py │ │ │ └── upernet_uniformer.py │ │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ ├── exp │ │ └── upernet_global_small │ │ │ ├── config.py │ │ │ ├── run.sh │ │ │ ├── test.sh │ │ │ ├── test_config_g.py │ │ │ ├── test_config_h32.py │ │ │ └── test_config_w32.py │ ├── mmcv │ │ ├── __init__.py │ │ ├── arraymisc │ │ │ ├── __init__.py │ │ │ └── quantization.py │ │ ├── cnn │ │ │ ├── __init__.py │ │ │ ├── alexnet.py │ │ │ ├── bricks │ │ │ │ ├── __init__.py │ │ │ │ ├── activation.py │ │ │ │ ├── context_block.py │ │ │ │ ├── conv.py │ │ │ │ ├── conv2d_adaptive_padding.py │ │ │ │ ├── conv_module.py │ │ │ │ ├── conv_ws.py │ │ │ │ ├── depthwise_separable_conv_module.py │ │ │ │ ├── drop.py │ │ │ │ ├── generalized_attention.py │ │ │ │ ├── hsigmoid.py │ │ │ │ ├── hswish.py │ │ │ │ ├── non_local.py │ │ │ │ ├── norm.py │ │ │ │ ├── padding.py │ │ │ │ ├── plugin.py │ │ │ │ ├── registry.py │ │ │ │ ├── scale.py │ │ │ │ ├── swish.py │ │ │ │ ├── transformer.py │ │ │ │ ├── upsample.py │ │ │ │ └── wrappers.py │ │ │ ├── builder.py │ │ │ ├── resnet.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── flops_counter.py │ │ │ │ ├── fuse_conv_bn.py │ │ │ │ ├── sync_bn.py │ │ │ │ └── weight_init.py │ │ │ └── vgg.py │ │ ├── engine │ │ │ ├── __init__.py │ │ │ └── test.py │ │ ├── fileio │ │ │ ├── __init__.py │ │ │ ├── file_client.py │ │ │ ├── handlers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── json_handler.py │ │ │ │ ├── pickle_handler.py │ │ │ │ └── yaml_handler.py │ │ │ ├── io.py │ │ │ └── parse.py │ │ ├── image │ │ │ ├── __init__.py │ │ │ ├── colorspace.py │ │ │ ├── geometric.py │ │ │ ├── io.py │ │ │ ├── misc.py │ │ │ └── photometric.py │ │ ├── model_zoo │ │ │ ├── deprecated.json │ │ │ ├── mmcls.json │ │ │ └── open_mmlab.json │ │ ├── ops │ │ │ ├── __init__.py │ │ │ ├── assign_score_withk.py │ │ │ ├── ball_query.py │ │ │ ├── bbox.py │ │ │ ├── border_align.py │ │ │ ├── box_iou_rotated.py │ │ │ ├── carafe.py │ │ │ ├── cc_attention.py │ │ │ ├── contour_expand.py │ │ │ ├── corner_pool.py │ │ │ ├── correlation.py │ │ │ ├── deform_conv.py │ │ │ ├── deform_roi_pool.py │ │ │ ├── deprecated_wrappers.py │ │ │ ├── focal_loss.py │ │ │ ├── furthest_point_sample.py │ │ │ ├── fused_bias_leakyrelu.py │ │ │ ├── gather_points.py │ │ │ ├── group_points.py │ │ │ ├── info.py │ │ │ ├── iou3d.py │ │ │ ├── knn.py │ │ │ ├── masked_conv.py │ │ │ ├── merge_cells.py │ │ │ ├── modulated_deform_conv.py │ │ │ ├── multi_scale_deform_attn.py │ │ │ ├── nms.py │ │ │ ├── pixel_group.py │ │ │ ├── point_sample.py │ │ │ ├── points_in_boxes.py │ │ │ ├── points_sampler.py │ │ │ ├── psa_mask.py │ │ │ ├── roi_align.py │ │ │ ├── roi_align_rotated.py │ │ │ ├── roi_pool.py │ │ │ ├── roiaware_pool3d.py │ │ │ ├── roipoint_pool3d.py │ │ │ ├── saconv.py │ │ │ ├── scatter_points.py │ │ │ ├── sync_bn.py │ │ │ ├── three_interpolate.py │ │ │ ├── three_nn.py │ │ │ ├── tin_shift.py │ │ │ ├── upfirdn2d.py │ │ │ └── voxelize.py │ │ ├── parallel │ │ │ ├── __init__.py │ │ │ ├── _functions.py │ │ │ ├── collate.py │ │ │ ├── data_container.py │ │ │ ├── data_parallel.py │ │ │ ├── distributed.py │ │ │ ├── distributed_deprecated.py │ │ │ ├── registry.py │ │ │ ├── scatter_gather.py │ │ │ └── utils.py │ │ ├── runner │ │ │ ├── __init__.py │ │ │ ├── base_module.py │ │ │ ├── base_runner.py │ │ │ ├── builder.py │ │ │ ├── checkpoint.py │ │ │ ├── default_constructor.py │ │ │ ├── dist_utils.py │ │ │ ├── epoch_based_runner.py │ │ │ ├── fp16_utils.py │ │ │ ├── hooks │ │ │ │ ├── __init__.py │ │ │ │ ├── checkpoint.py │ │ │ │ ├── closure.py │ │ │ │ ├── ema.py │ │ │ │ ├── evaluation.py │ │ │ │ ├── hook.py │ │ │ │ ├── iter_timer.py │ │ │ │ ├── logger │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── dvclive.py │ │ │ │ │ ├── mlflow.py │ │ │ │ │ ├── neptune.py │ │ │ │ │ ├── pavi.py │ │ │ │ │ ├── tensorboard.py │ │ │ │ │ ├── text.py │ │ │ │ │ └── wandb.py │ │ │ │ ├── lr_updater.py │ │ │ │ ├── memory.py │ │ │ │ ├── momentum_updater.py │ │ │ │ ├── optimizer.py │ │ │ │ ├── profiler.py │ │ │ │ ├── sampler_seed.py │ │ │ │ └── sync_buffer.py │ │ │ ├── iter_based_runner.py │ │ │ ├── log_buffer.py │ │ │ ├── optimizer │ │ │ │ ├── __init__.py │ │ │ │ ├── builder.py │ │ │ │ └── default_constructor.py │ │ │ ├── priority.py │ │ │ └── utils.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── env.py │ │ │ ├── ext_loader.py │ │ │ ├── logging.py │ │ │ ├── misc.py │ │ │ ├── parrots_jit.py │ │ │ ├── parrots_wrapper.py │ │ │ ├── path.py │ │ │ ├── progressbar.py │ │ │ ├── registry.py │ │ │ ├── testing.py │ │ │ ├── timer.py │ │ │ ├── trace.py │ │ │ └── version_utils.py │ │ ├── version.py │ │ ├── video │ │ │ ├── __init__.py │ │ │ ├── io.py │ │ │ ├── optflow.py │ │ │ └── processing.py │ │ └── visualization │ │ │ ├── __init__.py │ │ │ ├── color.py │ │ │ ├── image.py │ │ │ └── optflow.py │ ├── mmcv_custom │ │ ├── __init__.py │ │ └── checkpoint.py │ └── mmseg │ │ ├── apis │ │ ├── __init__.py │ │ ├── inference.py │ │ ├── test.py │ │ └── train.py │ │ ├── core │ │ ├── __init__.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── class_names.py │ │ │ ├── eval_hooks.py │ │ │ └── metrics.py │ │ ├── seg │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── sampler │ │ │ │ ├── __init__.py │ │ │ │ ├── base_pixel_sampler.py │ │ │ │ └── ohem_pixel_sampler.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── misc.py │ │ ├── datasets │ │ ├── __init__.py │ │ ├── ade.py │ │ ├── builder.py │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── custom.py │ │ ├── dataset_wrappers.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── pascal_context.py │ │ ├── pipelines │ │ │ ├── __init__.py │ │ │ ├── compose.py │ │ │ ├── formating.py │ │ │ ├── loading.py │ │ │ ├── test_time_aug.py │ │ │ └── transforms.py │ │ ├── stare.py │ │ └── voc.py │ │ ├── models │ │ ├── __init__.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── cgnet.py │ │ │ ├── fast_scnn.py │ │ │ ├── hrnet.py │ │ │ ├── mobilenet_v2.py │ │ │ ├── mobilenet_v3.py │ │ │ ├── resnest.py │ │ │ ├── resnet.py │ │ │ ├── resnext.py │ │ │ ├── unet.py │ │ │ ├── uniformer.py │ │ │ └── vit.py │ │ ├── builder.py │ │ ├── decode_heads │ │ │ ├── __init__.py │ │ │ ├── ann_head.py │ │ │ ├── apc_head.py │ │ │ ├── aspp_head.py │ │ │ ├── cascade_decode_head.py │ │ │ ├── cc_head.py │ │ │ ├── da_head.py │ │ │ ├── decode_head.py │ │ │ ├── dm_head.py │ │ │ ├── dnl_head.py │ │ │ ├── ema_head.py │ │ │ ├── enc_head.py │ │ │ ├── fcn_head.py │ │ │ ├── fpn_head.py │ │ │ ├── gc_head.py │ │ │ ├── lraspp_head.py │ │ │ ├── nl_head.py │ │ │ ├── ocr_head.py │ │ │ ├── point_head.py │ │ │ ├── psa_head.py │ │ │ ├── psp_head.py │ │ │ ├── sep_aspp_head.py │ │ │ ├── sep_fcn_head.py │ │ │ └── uper_head.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── accuracy.py │ │ │ ├── cross_entropy_loss.py │ │ │ ├── dice_loss.py │ │ │ ├── lovasz_loss.py │ │ │ └── utils.py │ │ ├── necks │ │ │ ├── __init__.py │ │ │ ├── fpn.py │ │ │ └── multilevel_neck.py │ │ ├── segmentors │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── cascade_encoder_decoder.py │ │ │ └── encoder_decoder.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── drop.py │ │ │ ├── inverted_residual.py │ │ │ ├── make_divisible.py │ │ │ ├── res_layer.py │ │ │ ├── se_layer.py │ │ │ ├── self_attention_block.py │ │ │ ├── up_conv_block.py │ │ │ └── weight_init.py │ │ ├── ops │ │ ├── __init__.py │ │ ├── encoding.py │ │ └── wrappers.py │ │ └── utils │ │ ├── __init__.py │ │ ├── collect_env.py │ │ └── logger.py └── util.py ├── ckpt └── ckpt.txt ├── configs ├── global_v15.yaml ├── local_v15.yaml └── uni_v15.yaml ├── data └── data.txt ├── environment.yaml ├── figs ├── comparison.png ├── demo_conditions.png ├── demo_panel.png ├── demo_results.png ├── demo_results2.png ├── demo_results3.png ├── pipeline.png └── results.png ├── ldm ├── models │ ├── autoencoder.py │ └── diffusion │ │ ├── __init__.py │ │ ├── ddim.py │ │ ├── ddpm.py │ │ ├── dpm_solver │ │ ├── __init__.py │ │ ├── dpm_solver.py │ │ └── sampler.py │ │ ├── plms.py │ │ └── sampling_util.py ├── modules │ ├── attention.py │ ├── diffusionmodules │ │ ├── __init__.py │ │ ├── model.py │ │ ├── openaimodel.py │ │ ├── upscaling.py │ │ └── util.py │ ├── distributions │ │ ├── __init__.py │ │ └── distributions.py │ ├── ema.py │ ├── encoders │ │ ├── __init__.py │ │ └── modules.py │ └── image_degradation │ │ ├── __init__.py │ │ ├── bsrgan.py │ │ ├── bsrgan_light.py │ │ ├── utils │ │ └── test.png │ │ └── utils_image.py └── util.py ├── models ├── ddim_hacked.py ├── global_adapter.py ├── hack.py ├── local_adapter.py ├── logger.py ├── uni_controlnet.py └── util.py ├── samples ├── multi_conditions │ ├── case1 │ │ ├── canny.jpg │ │ ├── midas.jpg │ │ └── prompt.txt │ ├── case2 │ │ ├── content.jpg │ │ └── hed.jpg │ └── case3 │ │ ├── content.jpg │ │ ├── hed.jpg │ │ ├── midas.jpg │ │ ├── prompt.txt │ │ └── sketch.jpg └── single_condition │ ├── case1 │ ├── prompt.txt │ └── sketch.jpg │ ├── case2 │ ├── prompt.txt │ └── seg.jpg │ └── case3 │ └── content.jpg ├── src ├── test │ └── test.py └── train │ ├── dataset.py │ ├── train.py │ └── util.py └── utils ├── config.py ├── prepare_weights.py └── share.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /annotator/canny/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class CannyDetector: 5 | def __call__(self, img, low_threshold, high_threshold): 6 | return cv2.Canny(img, low_threshold, high_threshold) 7 | -------------------------------------------------------------------------------- /annotator/content/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from PIL import Image 3 | 4 | import torch 5 | from transformers import AutoProcessor, CLIPModel 6 | 7 | from annotator.util import annotator_ckpts_path 8 | 9 | 10 | class ContentDetector: 11 | def __init__(self): 12 | 13 | model_name = "openai/clip-vit-large-patch14" 14 | 15 | self.model = CLIPModel.from_pretrained(model_name, cache_dir=annotator_ckpts_path).cuda().eval() 16 | self.processor = AutoProcessor.from_pretrained(model_name, cache_dir=annotator_ckpts_path) 17 | 18 | def __call__(self, img): 19 | assert img.ndim == 3 20 | with torch.no_grad(): 21 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 22 | inputs = self.processor(images=img, return_tensors="pt").to('cuda') 23 | image_features = self.model.get_image_features(**inputs) 24 | image_feature = image_features[0].detach().cpu().numpy() 25 | return image_feature 26 | -------------------------------------------------------------------------------- /annotator/midas/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | 5 | from einops import rearrange 6 | from .api import MiDaSInference 7 | 8 | 9 | class MidasDetector: 10 | def __init__(self): 11 | self.model = MiDaSInference(model_type="dpt_hybrid").cuda() 12 | 13 | def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1): 14 | assert input_image.ndim == 3 15 | image_depth = input_image 16 | with torch.no_grad(): 17 | image_depth = torch.from_numpy(image_depth).float().cuda() 18 | image_depth = image_depth / 127.5 - 1.0 19 | image_depth = rearrange(image_depth, 'h w c -> 1 c h w') 20 | depth = self.model(image_depth)[0] 21 | 22 | depth_pt = depth.clone() 23 | depth_pt -= torch.min(depth_pt) 24 | depth_pt /= torch.max(depth_pt) 25 | depth_pt = depth_pt.cpu().numpy() 26 | depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8) 27 | 28 | return depth_image 29 | -------------------------------------------------------------------------------- /annotator/midas/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/annotator/midas/midas/__init__.py -------------------------------------------------------------------------------- /annotator/midas/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /annotator/mlsd/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import os 5 | 6 | from einops import rearrange 7 | from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny 8 | from .models.mbv2_mlsd_large import MobileV2_MLSD_Large 9 | from .utils import pred_lines 10 | 11 | from annotator.util import annotator_ckpts_path 12 | 13 | 14 | remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth" 15 | 16 | 17 | class MLSDdetector: 18 | def __init__(self): 19 | model_path = os.path.join(annotator_ckpts_path, "mlsd_large_512_fp32.pth") 20 | if not os.path.exists(model_path): 21 | from basicsr.utils.download_util import load_file_from_url 22 | load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path) 23 | model = MobileV2_MLSD_Large() 24 | model.load_state_dict(torch.load(model_path), strict=True) 25 | self.model = model.cuda().eval() 26 | 27 | def __call__(self, input_image, thr_v, thr_d): 28 | assert input_image.ndim == 3 29 | img = input_image 30 | img_output = np.zeros_like(img) 31 | try: 32 | with torch.no_grad(): 33 | lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d) 34 | for line in lines: 35 | x_start, y_start, x_end, y_end = [int(val) for val in line] 36 | cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1) 37 | except Exception as e: 38 | pass 39 | return img_output[:, :, 0] 40 | -------------------------------------------------------------------------------- /annotator/openpose/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 3 | 4 | import torch 5 | import numpy as np 6 | from . import util 7 | from .body import Body 8 | from .hand import Hand 9 | from annotator.util import annotator_ckpts_path 10 | 11 | 12 | body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth" 13 | hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth" 14 | 15 | 16 | class OpenposeDetector: 17 | def __init__(self): 18 | body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth") 19 | hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth") 20 | 21 | if not os.path.exists(hand_modelpath): 22 | from basicsr.utils.download_util import load_file_from_url 23 | load_file_from_url(body_model_path, model_dir=annotator_ckpts_path) 24 | load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path) 25 | 26 | self.body_estimation = Body(body_modelpath) 27 | self.hand_estimation = Hand(hand_modelpath) 28 | 29 | def __call__(self, oriImg, hand=False): 30 | oriImg = oriImg[:, :, ::-1].copy() 31 | with torch.no_grad(): 32 | candidate, subset = self.body_estimation(oriImg) 33 | canvas = np.zeros_like(oriImg) 34 | canvas = util.draw_bodypose(canvas, candidate, subset) 35 | if hand: 36 | hands_list = util.handDetect(candidate, subset, oriImg) 37 | all_hand_peaks = [] 38 | for x, y, w, is_left in hands_list: 39 | peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]) 40 | peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) 41 | peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) 42 | all_hand_peaks.append(peaks) 43 | canvas = util.draw_handpose(canvas, all_hand_peaks) 44 | return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist()) 45 | -------------------------------------------------------------------------------- /annotator/sketch/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | 4 | import os 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | 9 | from .model import module 10 | from annotator.hed import HEDdetector 11 | from annotator.util import annotator_ckpts_path 12 | 13 | 14 | remote_model_path = "https://github.com/aidreamwin/sketch_simplification_pytorch/releases/download/model/model_gan.pth" 15 | 16 | 17 | class SketchDetector: 18 | def __init__(self): 19 | model_path = os.path.join(annotator_ckpts_path, "model_gan.pth") 20 | self.immean, self.imstd = 0.9664114577640158, 0.0858381272736797 21 | self.model = module.Net() 22 | if os.path.exists(model_path): 23 | checkpoint = torch.load(model_path) 24 | else: 25 | checkpoint = torch.hub.load_state_dict_from_url(remote_model_path, model_dir=os.path.dirname(model_path), progress=True) 26 | self.model.load_state_dict(checkpoint) 27 | self.model.eval() 28 | self.hed_func = HEDdetector() 29 | 30 | def __call__(self, pre_img): 31 | img = 255 - self.hed_func(pre_img) 32 | assert img.ndim == 2 33 | img = Image.fromarray(img).convert('L') 34 | w, h = img.size[0], img.size[1] 35 | pw = 8 - (w % 8) if w % 8 != 0 else 0 36 | ph = 8 - (h % 8) if h % 8 != 0 else 0 37 | data = ((transforms.ToTensor()(img) - self.immean) / self.imstd).unsqueeze(0) 38 | if pw != 0 or ph != 0: 39 | data = torch.nn.ReplicationPad2d((0, pw, 0, ph))(data).data 40 | data = data.float().cuda() 41 | with torch.no_grad(): 42 | pred = self.model.cuda().forward(data).float()[0][0] 43 | pred = pred.detach().cpu().numpy() 44 | pred = cv2.resize(pred, (w, h))*255 45 | pred = pred.astype(np.uint8) 46 | return pred 47 | -------------------------------------------------------------------------------- /annotator/uniformer/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot 4 | from annotator.uniformer.mmseg.core.evaluation import get_palette 5 | from annotator.util import annotator_ckpts_path 6 | 7 | 8 | checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" 9 | 10 | 11 | class UniformerDetector: 12 | def __init__(self): 13 | modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") 14 | if not os.path.exists(modelpath): 15 | from basicsr.utils.download_util import load_file_from_url 16 | load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) 17 | config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py") 18 | self.model = init_segmentor(config_file, modelpath).cuda() 19 | 20 | def __call__(self, img): 21 | result = inference_segmentor(self.model, img) 22 | res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1) 23 | return res_img, result 24 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ChaseDB1Dataset' 3 | data_root = 'data/CHASE_DB1' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (960, 999) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='leftImg8bit/train', 41 | ann_dir='gtFine/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='leftImg8bit/val', 47 | ann_dir='gtFine/val', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (769, 769) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2049, 1025), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DRIVEDataset' 3 | data_root = 'data/DRIVE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (584, 565) 7 | crop_size = (64, 64) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'HRFDataset' 3 | data_root = 'data/HRF' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (2336, 3504) 7 | crop_size = (256, 256) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations'), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/pascal_context_59.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalContextDataset59' 3 | data_root = 'data/VOCdevkit/VOC2010/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | img_scale = (520, 520) 8 | crop_size = (480, 480) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations', reduce_zero_label=True), 13 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=img_scale, 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='JPEGImages', 44 | ann_dir='SegmentationClassContext', 45 | split='ImageSets/SegmentationContext/train.txt', 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | img_dir='JPEGImages', 51 | ann_dir='SegmentationClassContext', 52 | split='ImageSets/SegmentationContext/val.txt', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='JPEGImages', 58 | ann_dir='SegmentationClassContext', 59 | split='ImageSets/SegmentationContext/val.txt', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/pascal_voc12.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'PascalVOCDataset' 3 | data_root = 'data/VOCdevkit/VOC2012' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='JPEGImages', 41 | ann_dir='SegmentationClass', 42 | split='ImageSets/Segmentation/train.txt', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | img_dir='JPEGImages', 48 | ann_dir='SegmentationClass', 49 | split='ImageSets/Segmentation/val.txt', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type=dataset_type, 53 | data_root=data_root, 54 | img_dir='JPEGImages', 55 | ann_dir='SegmentationClass', 56 | split='ImageSets/Segmentation/val.txt', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './pascal_voc12.py' 2 | # dataset settings 3 | data = dict( 4 | train=dict( 5 | ann_dir=['SegmentationClass', 'SegmentationClassAug'], 6 | split=[ 7 | 'ImageSets/Segmentation/train.txt', 8 | 'ImageSets/Segmentation/aug.txt' 9 | ])) 10 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'STAREDataset' 3 | data_root = 'data/STARE' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | img_scale = (605, 700) 7 | crop_size = (128, 128) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations'), 11 | dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), 12 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 13 | dict(type='RandomFlip', prob=0.5), 14 | dict(type='PhotoMetricDistortion'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_semantic_seg']) 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=img_scale, 25 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']) 33 | ]) 34 | ] 35 | 36 | data = dict( 37 | samples_per_gpu=4, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='RepeatDataset', 41 | times=40000, 42 | dataset=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='images/training', 46 | ann_dir='annotations/training', 47 | pipeline=train_pipeline)), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='images/validation', 52 | ann_dir='annotations/validation', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='images/validation', 58 | ann_dir='annotations/validation', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/ann_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ANNHead', 19 | in_channels=[1024, 2048], 20 | in_index=[2, 3], 21 | channels=512, 22 | project_channels=256, 23 | query_scales=(1, ), 24 | key_pool_scales=(1, 3, 6, 8), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='APCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='CCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | recurrence=2, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/cgnet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='CGNet', 7 | norm_cfg=norm_cfg, 8 | in_channels=3, 9 | num_channels=(32, 64, 128), 10 | num_blocks=(3, 21), 11 | dilations=(2, 4), 12 | reductions=(8, 16)), 13 | decode_head=dict( 14 | type='FCNHead', 15 | in_channels=256, 16 | in_index=2, 17 | channels=256, 18 | num_convs=0, 19 | concat_input=False, 20 | dropout_ratio=0, 21 | num_classes=19, 22 | norm_cfg=norm_cfg, 23 | loss_decode=dict( 24 | type='CrossEntropyLoss', 25 | use_sigmoid=False, 26 | loss_weight=1.0, 27 | class_weight=[ 28 | 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, 29 | 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, 30 | 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, 31 | 10.396974, 10.055647 32 | ])), 33 | # model training and testing settings 34 | train_cfg=dict(sampler=None), 35 | test_cfg=dict(mode='whole')) 36 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pam_channels=64, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='ASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='ASPPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | dilations=(1, 12, 24, 36), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 51 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DepthwiseSeparableASPPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dilations=(1, 12, 24, 36), 23 | c1_in_channels=256, 24 | c1_channels=48, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DMHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | filter_sizes=(1, 3, 5, 7), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=dict(type='SyncBN', requires_grad=True), 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/dnl_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DNLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/emanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EMAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=256, 22 | ema_channels=512, 23 | num_bases=64, 24 | num_stages=3, 25 | momentum=0.1, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | auxiliary_head=dict( 33 | type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict( 44 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/encnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='EncHead', 19 | in_channels=[512, 1024, 2048], 20 | in_index=(1, 2, 3), 21 | channels=512, 22 | num_codes=32, 23 | use_se_loss=True, 24 | add_lateral=False, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 31 | loss_se_decode=dict( 32 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), 33 | auxiliary_head=dict( 34 | type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=19, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fast_scnn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='FastSCNN', 7 | downsample_dw_channels=(32, 48), 8 | global_in_channels=64, 9 | global_block_channels=(64, 96, 128), 10 | global_block_strides=(2, 2, 1), 11 | global_out_channels=128, 12 | higher_in_channels=64, 13 | lower_in_channels=128, 14 | fusion_out_channels=128, 15 | out_indices=(0, 1, 2), 16 | norm_cfg=norm_cfg, 17 | align_corners=False), 18 | decode_head=dict( 19 | type='DepthwiseSeparableFCNHead', 20 | in_channels=128, 21 | channels=128, 22 | concat_input=False, 23 | num_classes=19, 24 | in_index=-1, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 29 | auxiliary_head=[ 30 | dict( 31 | type='FCNHead', 32 | in_channels=128, 33 | channels=32, 34 | num_convs=1, 35 | num_classes=19, 36 | in_index=-2, 37 | norm_cfg=norm_cfg, 38 | concat_input=False, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 42 | dict( 43 | type='FCNHead', 44 | in_channels=64, 45 | channels=32, 46 | num_convs=1, 47 | num_classes=19, 48 | in_index=-3, 49 | norm_cfg=norm_cfg, 50 | concat_input=False, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), 54 | ], 55 | # model training and testing settings 56 | train_cfg=dict(), 57 | test_cfg=dict(mode='whole')) 58 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fcn_hr18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://msra/hrnetv2_w18', 6 | backbone=dict( 7 | type='HRNet', 8 | norm_cfg=norm_cfg, 9 | norm_eval=False, 10 | extra=dict( 11 | stage1=dict( 12 | num_modules=1, 13 | num_branches=1, 14 | block='BOTTLENECK', 15 | num_blocks=(4, ), 16 | num_channels=(64, )), 17 | stage2=dict( 18 | num_modules=1, 19 | num_branches=2, 20 | block='BASIC', 21 | num_blocks=(4, 4), 22 | num_channels=(18, 36)), 23 | stage3=dict( 24 | num_modules=4, 25 | num_branches=3, 26 | block='BASIC', 27 | num_blocks=(4, 4, 4), 28 | num_channels=(18, 36, 72)), 29 | stage4=dict( 30 | num_modules=3, 31 | num_branches=4, 32 | block='BASIC', 33 | num_blocks=(4, 4, 4, 4), 34 | num_channels=(18, 36, 72, 144)))), 35 | decode_head=dict( 36 | type='FCNHead', 37 | in_channels=[18, 36, 72, 144], 38 | in_index=(0, 1, 2, 3), 39 | channels=sum([18, 36, 72, 144]), 40 | input_transform='resize_concat', 41 | kernel_size=1, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=-1, 45 | num_classes=19, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 50 | # model training and testing settings 51 | train_cfg=dict(), 52 | test_cfg=dict(mode='whole')) 53 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fcn_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='FCNHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | num_convs=2, 23 | concat_input=True, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='FCNHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=64, 28 | num_convs=1, 29 | concat_input=False, 30 | dropout_ratio=0.1, 31 | num_classes=2, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | auxiliary_head=dict( 37 | type='FCNHead', 38 | in_channels=128, 39 | in_index=3, 40 | channels=64, 41 | num_convs=1, 42 | concat_input=False, 43 | dropout_ratio=0.1, 44 | num_classes=2, 45 | norm_cfg=norm_cfg, 46 | align_corners=False, 47 | loss_decode=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 49 | # model training and testing settings 50 | train_cfg=dict(), 51 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 52 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=4), 22 | decode_head=dict( 23 | type='FPNHead', 24 | in_channels=[256, 256, 256, 256], 25 | in_index=[0, 1, 2, 3], 26 | feature_strides=[4, 8, 16, 32], 27 | channels=128, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/fpn_uniformer.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='UniFormer', 7 | embed_dim=[64, 128, 320, 512], 8 | layers=[3, 4, 8, 3], 9 | head_dim=64, 10 | mlp_ratio=4., 11 | qkv_bias=True, 12 | drop_rate=0., 13 | attn_drop_rate=0., 14 | drop_path_rate=0.1), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[64, 128, 320, 512], 18 | out_channels=256, 19 | num_outs=4), 20 | decode_head=dict( 21 | type='FPNHead', 22 | in_channels=[256, 256, 256, 256], 23 | in_index=[0, 1, 2, 3], 24 | feature_strides=[4, 8, 16, 32], 25 | channels=128, 26 | dropout_ratio=0.1, 27 | num_classes=150, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole') 35 | ) 36 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='GCHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | ratio=1 / 4., 23 | pooling_type='att', 24 | fusion_types=('channel_add', ), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | backbone=dict( 6 | type='MobileNetV3', 7 | arch='large', 8 | out_indices=(1, 3, 16), 9 | norm_cfg=norm_cfg), 10 | decode_head=dict( 11 | type='LRASPPHead', 12 | in_channels=(16, 24, 960), 13 | in_index=(0, 1, 2), 14 | channels=128, 15 | input_transform='multiple_select', 16 | dropout_ratio=0.1, 17 | num_classes=19, 18 | norm_cfg=norm_cfg, 19 | act_cfg=dict(type='ReLU'), 20 | align_corners=False, 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 23 | # model training and testing settings 24 | train_cfg=dict(), 25 | test_cfg=dict(mode='whole')) 26 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='NLHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | dropout_ratio=0.1, 23 | reduction=2, 24 | use_scale=True, 25 | mode='embedded_gaussian', 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict( 30 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 31 | auxiliary_head=dict( 32 | type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 2, 4), 13 | strides=(1, 2, 1, 1), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | decode_head=[ 19 | dict( 20 | type='FCNHead', 21 | in_channels=1024, 22 | in_index=2, 23 | channels=256, 24 | num_convs=1, 25 | concat_input=False, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 32 | dict( 33 | type='OCRHead', 34 | in_channels=2048, 35 | in_index=3, 36 | channels=512, 37 | ocr_channels=256, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict( 43 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 44 | ], 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/pointrend_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='CascadeEncoderDecoder', 5 | num_stages=2, 6 | pretrained='open-mmlab://resnet50_v1c', 7 | backbone=dict( 8 | type='ResNetV1c', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | dilations=(1, 1, 1, 1), 13 | strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch', 17 | contract_dilation=True), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | num_outs=4), 23 | decode_head=[ 24 | dict( 25 | type='FPNHead', 26 | in_channels=[256, 256, 256, 256], 27 | in_index=[0, 1, 2, 3], 28 | feature_strides=[4, 8, 16, 32], 29 | channels=128, 30 | dropout_ratio=-1, 31 | num_classes=19, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | dict( 37 | type='PointHead', 38 | in_channels=[256], 39 | in_index=[0], 40 | channels=256, 41 | num_fcs=3, 42 | coarse_pred_each_layer=True, 43 | dropout_ratio=-1, 44 | num_classes=19, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 48 | ], 49 | # model training and testing settings 50 | train_cfg=dict( 51 | num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), 52 | test_cfg=dict( 53 | mode='whole', 54 | subdivision_steps=2, 55 | subdivision_num_points=8196, 56 | scale_factor=2)) 57 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/psanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSAHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | mask_size=(97, 97), 23 | psa_type='bi-direction', 24 | compact=False, 25 | shrink_factor=2, 26 | normalization_factor=1.0, 27 | psa_softmax=True, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | auxiliary_head=dict( 35 | type='FCNHead', 36 | in_channels=1024, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='PSPHead', 19 | in_channels=2048, 20 | in_index=3, 21 | channels=512, 22 | pool_scales=(1, 2, 3, 6), 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UNet', 8 | in_channels=3, 9 | base_channels=64, 10 | num_stages=5, 11 | strides=(1, 1, 1, 1, 1), 12 | enc_num_convs=(2, 2, 2, 2, 2), 13 | dec_num_convs=(2, 2, 2, 2), 14 | downsamples=(True, True, True, True), 15 | enc_dilations=(1, 1, 1, 1, 1), 16 | dec_dilations=(1, 1, 1, 1), 17 | with_cp=False, 18 | conv_cfg=None, 19 | norm_cfg=norm_cfg, 20 | act_cfg=dict(type='ReLU'), 21 | upsample_cfg=dict(type='InterpConv'), 22 | norm_eval=False), 23 | decode_head=dict( 24 | type='PSPHead', 25 | in_channels=64, 26 | in_index=4, 27 | channels=16, 28 | pool_scales=(1, 2, 3, 6), 29 | dropout_ratio=0.1, 30 | num_classes=2, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | auxiliary_head=dict( 36 | type='FCNHead', 37 | in_channels=128, 38 | in_index=3, 39 | channels=64, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=2, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=256, stride=170)) 51 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[256, 512, 1024, 2048], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/models/upernet_uniformer.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='UniFormer', 8 | embed_dim=[64, 128, 320, 512], 9 | layers=[3, 4, 8, 3], 10 | head_dim=64, 11 | mlp_ratio=4., 12 | qkv_bias=True, 13 | drop_rate=0., 14 | attn_drop_rate=0., 15 | drop_path_rate=0.1), 16 | decode_head=dict( 17 | type='UPerHead', 18 | in_channels=[64, 128, 320, 512], 19 | in_index=[0, 1, 2, 3], 20 | pool_scales=(1, 2, 3, 6), 21 | channels=512, 22 | dropout_ratio=0.1, 23 | num_classes=19, 24 | norm_cfg=norm_cfg, 25 | align_corners=False, 26 | loss_decode=dict( 27 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 28 | auxiliary_head=dict( 29 | type='FCNHead', 30 | in_channels=320, 31 | in_index=2, 32 | channels=256, 33 | num_convs=1, 34 | concat_input=False, 35 | dropout_ratio=0.1, 36 | num_classes=19, 37 | norm_cfg=norm_cfg, 38 | align_corners=False, 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 41 | # model training and testing settings 42 | train_cfg=dict(), 43 | test_cfg=dict(mode='whole')) -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /annotator/uniformer/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/config.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../configs/_base_/models/upernet_uniformer.py', 3 | '../../configs/_base_/datasets/ade20k.py', 4 | '../../configs/_base_/default_runtime.py', 5 | '../../configs/_base_/schedules/schedule_160k.py' 6 | ] 7 | model = dict( 8 | backbone=dict( 9 | type='UniFormer', 10 | embed_dim=[64, 128, 320, 512], 11 | layers=[3, 4, 8, 3], 12 | head_dim=64, 13 | drop_path_rate=0.25, 14 | windows=False, 15 | hybrid=False 16 | ), 17 | decode_head=dict( 18 | in_channels=[64, 128, 320, 512], 19 | num_classes=150 20 | ), 21 | auxiliary_head=dict( 22 | in_channels=320, 23 | num_classes=150 24 | )) 25 | 26 | # AdamW optimizer, no weight decay for position embedding & layer norm in backbone 27 | optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, 28 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 29 | 'relative_position_bias_table': dict(decay_mult=0.), 30 | 'norm': dict(decay_mult=0.)})) 31 | 32 | lr_config = dict(_delete_=True, policy='poly', 33 | warmup='linear', 34 | warmup_iters=1500, 35 | warmup_ratio=1e-6, 36 | power=1.0, min_lr=0.0, by_epoch=False) 37 | 38 | data=dict(samples_per_gpu=2) -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_path=$(dirname $0) 4 | PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ 5 | python -m torch.distributed.launch --nproc_per_node=8 \ 6 | tools/train.py ${work_path}/config.py \ 7 | --launcher pytorch \ 8 | --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ 9 | --work-dir ${work_path}/ckpt \ 10 | 2>&1 | tee -a ${work_path}/log.txt 11 | -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_path=$(dirname $0) 4 | PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ 5 | python -m torch.distributed.launch --nproc_per_node=8 \ 6 | tools/test.py ${work_path}/test_config_h32.py \ 7 | ${work_path}/ckpt/latest.pth \ 8 | --launcher pytorch \ 9 | --eval mIoU \ 10 | 2>&1 | tee -a ${work_path}/log.txt 11 | -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/test_config_g.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../configs/_base_/models/upernet_uniformer.py', 3 | '../../configs/_base_/datasets/ade20k.py', 4 | '../../configs/_base_/default_runtime.py', 5 | '../../configs/_base_/schedules/schedule_160k.py' 6 | ] 7 | model = dict( 8 | backbone=dict( 9 | type='UniFormer', 10 | embed_dim=[64, 128, 320, 512], 11 | layers=[3, 4, 8, 3], 12 | head_dim=64, 13 | drop_path_rate=0.25, 14 | windows=False, 15 | hybrid=False, 16 | ), 17 | decode_head=dict( 18 | in_channels=[64, 128, 320, 512], 19 | num_classes=150 20 | ), 21 | auxiliary_head=dict( 22 | in_channels=320, 23 | num_classes=150 24 | )) 25 | 26 | # AdamW optimizer, no weight decay for position embedding & layer norm in backbone 27 | optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, 28 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 29 | 'relative_position_bias_table': dict(decay_mult=0.), 30 | 'norm': dict(decay_mult=0.)})) 31 | 32 | lr_config = dict(_delete_=True, policy='poly', 33 | warmup='linear', 34 | warmup_iters=1500, 35 | warmup_ratio=1e-6, 36 | power=1.0, min_lr=0.0, by_epoch=False) 37 | 38 | data=dict(samples_per_gpu=2) -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/test_config_h32.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../configs/_base_/models/upernet_uniformer.py', 3 | '../../configs/_base_/datasets/ade20k.py', 4 | '../../configs/_base_/default_runtime.py', 5 | '../../configs/_base_/schedules/schedule_160k.py' 6 | ] 7 | model = dict( 8 | backbone=dict( 9 | type='UniFormer', 10 | embed_dim=[64, 128, 320, 512], 11 | layers=[3, 4, 8, 3], 12 | head_dim=64, 13 | drop_path_rate=0.25, 14 | windows=False, 15 | hybrid=True, 16 | window_size=32 17 | ), 18 | decode_head=dict( 19 | in_channels=[64, 128, 320, 512], 20 | num_classes=150 21 | ), 22 | auxiliary_head=dict( 23 | in_channels=320, 24 | num_classes=150 25 | )) 26 | 27 | # AdamW optimizer, no weight decay for position embedding & layer norm in backbone 28 | optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, 29 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 30 | 'relative_position_bias_table': dict(decay_mult=0.), 31 | 'norm': dict(decay_mult=0.)})) 32 | 33 | lr_config = dict(_delete_=True, policy='poly', 34 | warmup='linear', 35 | warmup_iters=1500, 36 | warmup_ratio=1e-6, 37 | power=1.0, min_lr=0.0, by_epoch=False) 38 | 39 | data=dict(samples_per_gpu=2) -------------------------------------------------------------------------------- /annotator/uniformer/exp/upernet_global_small/test_config_w32.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../configs/_base_/models/upernet_uniformer.py', 3 | '../../configs/_base_/datasets/ade20k.py', 4 | '../../configs/_base_/default_runtime.py', 5 | '../../configs/_base_/schedules/schedule_160k.py' 6 | ] 7 | model = dict( 8 | backbone=dict( 9 | type='UniFormer', 10 | embed_dim=[64, 128, 320, 512], 11 | layers=[3, 4, 8, 3], 12 | head_dim=64, 13 | drop_path_rate=0.25, 14 | windows=True, 15 | hybrid=False, 16 | window_size=32 17 | ), 18 | decode_head=dict( 19 | in_channels=[64, 128, 320, 512], 20 | num_classes=150 21 | ), 22 | auxiliary_head=dict( 23 | in_channels=320, 24 | num_classes=150 25 | )) 26 | 27 | # AdamW optimizer, no weight decay for position embedding & layer norm in backbone 28 | optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, 29 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 30 | 'relative_position_bias_table': dict(decay_mult=0.), 31 | 'norm': dict(decay_mult=0.)})) 32 | 33 | lr_config = dict(_delete_=True, policy='poly', 34 | warmup='linear', 35 | warmup_iters=1500, 36 | warmup_ratio=1e-6, 37 | power=1.0, min_lr=0.0, by_epoch=False) 38 | 39 | data=dict(samples_per_gpu=2) -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa 3 | from .arraymisc import * 4 | from .fileio import * 5 | from .image import * 6 | from .utils import * 7 | from .version import * 8 | from .video import * 9 | from .visualization import * 10 | 11 | # The following modules are not imported to this level, so mmcv may be used 12 | # without PyTorch. 13 | # - runner 14 | # - parallel 15 | # - op 16 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/arraymisc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .quantization import dequantize, quantize 3 | 4 | __all__ = ['quantize', 'dequantize'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/arraymisc/quantization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | 4 | 5 | def quantize(arr, min_val, max_val, levels, dtype=np.int64): 6 | """Quantize an array of (-inf, inf) to [0, levels-1]. 7 | 8 | Args: 9 | arr (ndarray): Input array. 10 | min_val (scalar): Minimum value to be clipped. 11 | max_val (scalar): Maximum value to be clipped. 12 | levels (int): Quantization levels. 13 | dtype (np.type): The type of the quantized array. 14 | 15 | Returns: 16 | tuple: Quantized array. 17 | """ 18 | if not (isinstance(levels, int) and levels > 1): 19 | raise ValueError( 20 | f'levels must be a positive integer, but got {levels}') 21 | if min_val >= max_val: 22 | raise ValueError( 23 | f'min_val ({min_val}) must be smaller than max_val ({max_val})') 24 | 25 | arr = np.clip(arr, min_val, max_val) - min_val 26 | quantized_arr = np.minimum( 27 | np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) 28 | 29 | return quantized_arr 30 | 31 | 32 | def dequantize(arr, min_val, max_val, levels, dtype=np.float64): 33 | """Dequantize an array. 34 | 35 | Args: 36 | arr (ndarray): Input array. 37 | min_val (scalar): Minimum value to be clipped. 38 | max_val (scalar): Maximum value to be clipped. 39 | levels (int): Quantization levels. 40 | dtype (np.type): The type of the dequantized array. 41 | 42 | Returns: 43 | tuple: Dequantized array. 44 | """ 45 | if not (isinstance(levels, int) and levels > 1): 46 | raise ValueError( 47 | f'levels must be a positive integer, but got {levels}') 48 | if min_val >= max_val: 49 | raise ValueError( 50 | f'min_val ({min_val}) must be smaller than max_val ({max_val})') 51 | 52 | dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - 53 | min_val) / levels + min_val 54 | 55 | return dequantized_arr 56 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | import torch.nn as nn 5 | 6 | 7 | class AlexNet(nn.Module): 8 | """AlexNet backbone. 9 | 10 | Args: 11 | num_classes (int): number of classes for classification. 12 | """ 13 | 14 | def __init__(self, num_classes=-1): 15 | super(AlexNet, self).__init__() 16 | self.num_classes = num_classes 17 | self.features = nn.Sequential( 18 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), 19 | nn.ReLU(inplace=True), 20 | nn.MaxPool2d(kernel_size=3, stride=2), 21 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 22 | nn.ReLU(inplace=True), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 25 | nn.ReLU(inplace=True), 26 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 29 | nn.ReLU(inplace=True), 30 | nn.MaxPool2d(kernel_size=3, stride=2), 31 | ) 32 | if self.num_classes > 0: 33 | self.classifier = nn.Sequential( 34 | nn.Dropout(), 35 | nn.Linear(256 * 6 * 6, 4096), 36 | nn.ReLU(inplace=True), 37 | nn.Dropout(), 38 | nn.Linear(4096, 4096), 39 | nn.ReLU(inplace=True), 40 | nn.Linear(4096, num_classes), 41 | ) 42 | 43 | def init_weights(self, pretrained=None): 44 | if isinstance(pretrained, str): 45 | logger = logging.getLogger() 46 | from ..runner import load_checkpoint 47 | load_checkpoint(self, pretrained, strict=False, logger=logger) 48 | elif pretrained is None: 49 | # use default initializer 50 | pass 51 | else: 52 | raise TypeError('pretrained must be a str or None') 53 | 54 | def forward(self, x): 55 | 56 | x = self.features(x) 57 | if self.num_classes > 0: 58 | x = x.view(x.size(0), 256 * 6 * 6) 59 | x = self.classifier(x) 60 | 61 | return x 62 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .activation import build_activation_layer 3 | from .context_block import ContextBlock 4 | from .conv import build_conv_layer 5 | from .conv2d_adaptive_padding import Conv2dAdaptivePadding 6 | from .conv_module import ConvModule 7 | from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d 8 | from .depthwise_separable_conv_module import DepthwiseSeparableConvModule 9 | from .drop import Dropout, DropPath 10 | from .generalized_attention import GeneralizedAttention 11 | from .hsigmoid import HSigmoid 12 | from .hswish import HSwish 13 | from .non_local import NonLocal1d, NonLocal2d, NonLocal3d 14 | from .norm import build_norm_layer, is_norm 15 | from .padding import build_padding_layer 16 | from .plugin import build_plugin_layer 17 | from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, 18 | PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) 19 | from .scale import Scale 20 | from .swish import Swish 21 | from .upsample import build_upsample_layer 22 | from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, 23 | Linear, MaxPool2d, MaxPool3d) 24 | 25 | __all__ = [ 26 | 'ConvModule', 'build_activation_layer', 'build_conv_layer', 27 | 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', 28 | 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', 29 | 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', 30 | 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', 31 | 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', 32 | 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', 33 | 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 34 | 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' 35 | ] 36 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/conv.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch import nn 3 | 4 | from .registry import CONV_LAYERS 5 | 6 | CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) 7 | CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) 8 | CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) 9 | CONV_LAYERS.register_module('Conv', module=nn.Conv2d) 10 | 11 | 12 | def build_conv_layer(cfg, *args, **kwargs): 13 | """Build convolution layer. 14 | 15 | Args: 16 | cfg (None or dict): The conv layer config, which should contain: 17 | - type (str): Layer type. 18 | - layer args: Args needed to instantiate an conv layer. 19 | args (argument list): Arguments passed to the `__init__` 20 | method of the corresponding conv layer. 21 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 22 | method of the corresponding conv layer. 23 | 24 | Returns: 25 | nn.Module: Created conv layer. 26 | """ 27 | if cfg is None: 28 | cfg_ = dict(type='Conv2d') 29 | else: 30 | if not isinstance(cfg, dict): 31 | raise TypeError('cfg must be a dict') 32 | if 'type' not in cfg: 33 | raise KeyError('the cfg dict must contain the key "type"') 34 | cfg_ = cfg.copy() 35 | 36 | layer_type = cfg_.pop('type') 37 | if layer_type not in CONV_LAYERS: 38 | raise KeyError(f'Unrecognized norm type {layer_type}') 39 | else: 40 | conv_layer = CONV_LAYERS.get(layer_type) 41 | 42 | layer = conv_layer(*args, **kwargs, **cfg_) 43 | 44 | return layer 45 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | from .registry import ACTIVATION_LAYERS 5 | 6 | 7 | @ACTIVATION_LAYERS.register_module() 8 | class HSigmoid(nn.Module): 9 | """Hard Sigmoid Module. Apply the hard sigmoid function: 10 | Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) 11 | Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) 12 | 13 | Args: 14 | bias (float): Bias of the input feature map. Default: 1.0. 15 | divisor (float): Divisor of the input feature map. Default: 2.0. 16 | min_value (float): Lower bound value. Default: 0.0. 17 | max_value (float): Upper bound value. Default: 1.0. 18 | 19 | Returns: 20 | Tensor: The output tensor. 21 | """ 22 | 23 | def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): 24 | super(HSigmoid, self).__init__() 25 | self.bias = bias 26 | self.divisor = divisor 27 | assert self.divisor != 0 28 | self.min_value = min_value 29 | self.max_value = max_value 30 | 31 | def forward(self, x): 32 | x = (x + self.bias) / self.divisor 33 | 34 | return x.clamp_(self.min_value, self.max_value) 35 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/hswish.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | from .registry import ACTIVATION_LAYERS 5 | 6 | 7 | @ACTIVATION_LAYERS.register_module() 8 | class HSwish(nn.Module): 9 | """Hard Swish Module. 10 | 11 | This module applies the hard swish function: 12 | 13 | .. math:: 14 | Hswish(x) = x * ReLU6(x + 3) / 6 15 | 16 | Args: 17 | inplace (bool): can optionally do the operation in-place. 18 | Default: False. 19 | 20 | Returns: 21 | Tensor: The output tensor. 22 | """ 23 | 24 | def __init__(self, inplace=False): 25 | super(HSwish, self).__init__() 26 | self.act = nn.ReLU6(inplace) 27 | 28 | def forward(self, x): 29 | return x * self.act(x + 3) / 6 30 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/padding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | from .registry import PADDING_LAYERS 5 | 6 | PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) 7 | PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) 8 | PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) 9 | 10 | 11 | def build_padding_layer(cfg, *args, **kwargs): 12 | """Build padding layer. 13 | 14 | Args: 15 | cfg (None or dict): The padding layer config, which should contain: 16 | - type (str): Layer type. 17 | - layer args: Args needed to instantiate a padding layer. 18 | 19 | Returns: 20 | nn.Module: Created padding layer. 21 | """ 22 | if not isinstance(cfg, dict): 23 | raise TypeError('cfg must be a dict') 24 | if 'type' not in cfg: 25 | raise KeyError('the cfg dict must contain the key "type"') 26 | 27 | cfg_ = cfg.copy() 28 | padding_type = cfg_.pop('type') 29 | if padding_type not in PADDING_LAYERS: 30 | raise KeyError(f'Unrecognized padding type {padding_type}.') 31 | else: 32 | padding_layer = PADDING_LAYERS.get(padding_type) 33 | 34 | layer = padding_layer(*args, **kwargs, **cfg_) 35 | 36 | return layer 37 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from annotator.uniformer.mmcv.utils import Registry 3 | 4 | CONV_LAYERS = Registry('conv layer') 5 | NORM_LAYERS = Registry('norm layer') 6 | ACTIVATION_LAYERS = Registry('activation layer') 7 | PADDING_LAYERS = Registry('padding layer') 8 | UPSAMPLE_LAYERS = Registry('upsample layer') 9 | PLUGIN_LAYERS = Registry('plugin layer') 10 | 11 | DROPOUT_LAYERS = Registry('drop out layers') 12 | POSITIONAL_ENCODING = Registry('position encoding') 13 | ATTENTION = Registry('attention') 14 | FEEDFORWARD_NETWORK = Registry('feed-forward Network') 15 | TRANSFORMER_LAYER = Registry('transformerLayer') 16 | TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') 17 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/scale.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class Scale(nn.Module): 7 | """A learnable scale parameter. 8 | 9 | This layer scales the input by a learnable factor. It multiplies a 10 | learnable scale parameter of shape (1,) with input of any shape. 11 | 12 | Args: 13 | scale (float): Initial value of scale factor. Default: 1.0 14 | """ 15 | 16 | def __init__(self, scale=1.0): 17 | super(Scale, self).__init__() 18 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 19 | 20 | def forward(self, x): 21 | return x * self.scale 22 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/bricks/swish.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .registry import ACTIVATION_LAYERS 6 | 7 | 8 | @ACTIVATION_LAYERS.register_module() 9 | class Swish(nn.Module): 10 | """Swish Module. 11 | 12 | This module applies the swish function: 13 | 14 | .. math:: 15 | Swish(x) = x * Sigmoid(x) 16 | 17 | Returns: 18 | Tensor: The output tensor. 19 | """ 20 | 21 | def __init__(self): 22 | super(Swish, self).__init__() 23 | 24 | def forward(self, x): 25 | return x * torch.sigmoid(x) 26 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..runner import Sequential 3 | from ..utils import Registry, build_from_cfg 4 | 5 | 6 | def build_model_from_cfg(cfg, registry, default_args=None): 7 | """Build a PyTorch model from config dict(s). Different from 8 | ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. 9 | 10 | Args: 11 | cfg (dict, list[dict]): The config of modules, is is either a config 12 | dict or a list of config dicts. If cfg is a list, a 13 | the built modules will be wrapped with ``nn.Sequential``. 14 | registry (:obj:`Registry`): A registry the module belongs to. 15 | default_args (dict, optional): Default arguments to build the module. 16 | Defaults to None. 17 | 18 | Returns: 19 | nn.Module: A built nn module. 20 | """ 21 | if isinstance(cfg, list): 22 | modules = [ 23 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 24 | ] 25 | return Sequential(*modules) 26 | else: 27 | return build_from_cfg(cfg, registry, default_args) 28 | 29 | 30 | MODELS = Registry('model', build_func=build_model_from_cfg) 31 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .flops_counter import get_model_complexity_info 3 | from .fuse_conv_bn import fuse_conv_bn 4 | from .sync_bn import revert_sync_batchnorm 5 | from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, 6 | KaimingInit, NormalInit, PretrainedInit, 7 | TruncNormalInit, UniformInit, XavierInit, 8 | bias_init_with_prob, caffe2_xavier_init, 9 | constant_init, initialize, kaiming_init, normal_init, 10 | trunc_normal_init, uniform_init, xavier_init) 11 | 12 | __all__ = [ 13 | 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', 14 | 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', 15 | 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', 16 | 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', 17 | 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', 18 | 'Caffe2XavierInit', 'revert_sync_batchnorm' 19 | ] 20 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | def _fuse_conv_bn(conv, bn): 7 | """Fuse conv and bn into one module. 8 | 9 | Args: 10 | conv (nn.Module): Conv to be fused. 11 | bn (nn.Module): BN to be fused. 12 | 13 | Returns: 14 | nn.Module: Fused module. 15 | """ 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_conv_bn(module): 28 | """Recursively fuse conv and bn in a module. 29 | 30 | During inference, the functionary of batch norm layers is turned off 31 | but only the mean and var alone channels are used, which exposes the 32 | chance to fuse it with the preceding conv layers to save computations and 33 | simplify network structures. 34 | 35 | Args: 36 | module (nn.Module): Module to be fused. 37 | 38 | Returns: 39 | nn.Module: Fused module. 40 | """ 41 | last_conv = None 42 | last_conv_name = None 43 | 44 | for name, child in module.named_children(): 45 | if isinstance(child, 46 | (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): 47 | if last_conv is None: # only fuse BN that is after Conv 48 | continue 49 | fused_conv = _fuse_conv_bn(last_conv, child) 50 | module._modules[last_conv_name] = fused_conv 51 | # To reduce changes, set BN as Identity instead of deleting it. 52 | module._modules[name] = nn.Identity() 53 | last_conv = None 54 | elif isinstance(child, nn.Conv2d): 55 | last_conv = child 56 | last_conv_name = name 57 | else: 58 | fuse_conv_bn(child) 59 | return module 60 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, 3 | single_gpu_test) 4 | 5 | __all__ = [ 6 | 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', 7 | 'single_gpu_test' 8 | ] 9 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .file_client import BaseStorageBackend, FileClient 3 | from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler 4 | from .io import dump, load, register_handler 5 | from .parse import dict_from_file, list_from_file 6 | 7 | __all__ = [ 8 | 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', 9 | 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', 10 | 'list_from_file', 'dict_from_file' 11 | ] 12 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseFileHandler 3 | from .json_handler import JsonHandler 4 | from .pickle_handler import PickleHandler 5 | from .yaml_handler import YamlHandler 6 | 7 | __all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] 8 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/handlers/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BaseFileHandler(metaclass=ABCMeta): 6 | # `str_like` is a flag to indicate whether the type of file object is 7 | # str-like object or bytes-like object. Pickle only processes bytes-like 8 | # objects but json only processes str-like object. If it is str-like 9 | # object, `StringIO` will be used to process the buffer. 10 | str_like = True 11 | 12 | @abstractmethod 13 | def load_from_fileobj(self, file, **kwargs): 14 | pass 15 | 16 | @abstractmethod 17 | def dump_to_fileobj(self, obj, file, **kwargs): 18 | pass 19 | 20 | @abstractmethod 21 | def dump_to_str(self, obj, **kwargs): 22 | pass 23 | 24 | def load_from_path(self, filepath, mode='r', **kwargs): 25 | with open(filepath, mode) as f: 26 | return self.load_from_fileobj(f, **kwargs) 27 | 28 | def dump_to_path(self, obj, filepath, mode='w', **kwargs): 29 | with open(filepath, mode) as f: 30 | self.dump_to_fileobj(obj, f, **kwargs) 31 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/handlers/json_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import json 3 | 4 | import numpy as np 5 | 6 | from .base import BaseFileHandler 7 | 8 | 9 | def set_default(obj): 10 | """Set default json values for non-serializable values. 11 | 12 | It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. 13 | It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, 14 | etc.) into plain numbers of plain python built-in types. 15 | """ 16 | if isinstance(obj, (set, range)): 17 | return list(obj) 18 | elif isinstance(obj, np.ndarray): 19 | return obj.tolist() 20 | elif isinstance(obj, np.generic): 21 | return obj.item() 22 | raise TypeError(f'{type(obj)} is unsupported for json dump') 23 | 24 | 25 | class JsonHandler(BaseFileHandler): 26 | 27 | def load_from_fileobj(self, file): 28 | return json.load(file) 29 | 30 | def dump_to_fileobj(self, obj, file, **kwargs): 31 | kwargs.setdefault('default', set_default) 32 | json.dump(obj, file, **kwargs) 33 | 34 | def dump_to_str(self, obj, **kwargs): 35 | kwargs.setdefault('default', set_default) 36 | return json.dumps(obj, **kwargs) 37 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import pickle 3 | 4 | from .base import BaseFileHandler 5 | 6 | 7 | class PickleHandler(BaseFileHandler): 8 | 9 | str_like = False 10 | 11 | def load_from_fileobj(self, file, **kwargs): 12 | return pickle.load(file, **kwargs) 13 | 14 | def load_from_path(self, filepath, **kwargs): 15 | return super(PickleHandler, self).load_from_path( 16 | filepath, mode='rb', **kwargs) 17 | 18 | def dump_to_str(self, obj, **kwargs): 19 | kwargs.setdefault('protocol', 2) 20 | return pickle.dumps(obj, **kwargs) 21 | 22 | def dump_to_fileobj(self, obj, file, **kwargs): 23 | kwargs.setdefault('protocol', 2) 24 | pickle.dump(obj, file, **kwargs) 25 | 26 | def dump_to_path(self, obj, filepath, **kwargs): 27 | super(PickleHandler, self).dump_to_path( 28 | obj, filepath, mode='wb', **kwargs) 29 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import yaml 3 | 4 | try: 5 | from yaml import CLoader as Loader, CDumper as Dumper 6 | except ImportError: 7 | from yaml import Loader, Dumper 8 | 9 | from .base import BaseFileHandler # isort:skip 10 | 11 | 12 | class YamlHandler(BaseFileHandler): 13 | 14 | def load_from_fileobj(self, file, **kwargs): 15 | kwargs.setdefault('Loader', Loader) 16 | return yaml.load(file, **kwargs) 17 | 18 | def dump_to_fileobj(self, obj, file, **kwargs): 19 | kwargs.setdefault('Dumper', Dumper) 20 | yaml.dump(obj, file, **kwargs) 21 | 22 | def dump_to_str(self, obj, **kwargs): 23 | kwargs.setdefault('Dumper', Dumper) 24 | return yaml.dump(obj, **kwargs) 25 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/image/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, 3 | gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, 4 | rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) 5 | from .geometric import (cutout, imcrop, imflip, imflip_, impad, 6 | impad_to_multiple, imrescale, imresize, imresize_like, 7 | imresize_to_multiple, imrotate, imshear, imtranslate, 8 | rescale_size) 9 | from .io import imfrombytes, imread, imwrite, supported_backends, use_backend 10 | from .misc import tensor2imgs 11 | from .photometric import (adjust_brightness, adjust_color, adjust_contrast, 12 | adjust_lighting, adjust_sharpness, auto_contrast, 13 | clahe, imdenormalize, imequalize, iminvert, 14 | imnormalize, imnormalize_, lut_transform, posterize, 15 | solarize) 16 | 17 | __all__ = [ 18 | 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', 19 | 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', 20 | 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', 21 | 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', 22 | 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', 23 | 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', 24 | 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', 25 | 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', 26 | 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', 27 | 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' 28 | ] 29 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/image/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | 4 | import annotator.uniformer.mmcv as mmcv 5 | 6 | try: 7 | import torch 8 | except ImportError: 9 | torch = None 10 | 11 | 12 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 13 | """Convert tensor to 3-channel images. 14 | 15 | Args: 16 | tensor (torch.Tensor): Tensor that contains multiple images, shape ( 17 | N, C, H, W). 18 | mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). 19 | std (tuple[float], optional): Standard deviation of images. 20 | Defaults to (1, 1, 1). 21 | to_rgb (bool, optional): Whether the tensor was converted to RGB 22 | format in the first place. If so, convert it back to BGR. 23 | Defaults to True. 24 | 25 | Returns: 26 | list[np.ndarray]: A list that contains multiple images. 27 | """ 28 | 29 | if torch is None: 30 | raise RuntimeError('pytorch is not installed') 31 | assert torch.is_tensor(tensor) and tensor.ndim == 4 32 | assert len(mean) == 3 33 | assert len(std) == 3 34 | 35 | num_imgs = tensor.size(0) 36 | mean = np.array(mean, dtype=np.float32) 37 | std = np.array(std, dtype=np.float32) 38 | imgs = [] 39 | for img_id in range(num_imgs): 40 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 41 | img = mmcv.imdenormalize( 42 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 43 | imgs.append(np.ascontiguousarray(img)) 44 | return imgs 45 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/model_zoo/deprecated.json: -------------------------------------------------------------------------------- 1 | { 2 | "resnet50_caffe": "detectron/resnet50_caffe", 3 | "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", 4 | "resnet101_caffe": "detectron/resnet101_caffe", 5 | "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" 6 | } 7 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/ball_query.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from torch.autograd import Function 4 | 5 | from ..utils import ext_loader 6 | 7 | ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) 8 | 9 | 10 | class BallQuery(Function): 11 | """Find nearby points in spherical space.""" 12 | 13 | @staticmethod 14 | def forward(ctx, min_radius: float, max_radius: float, sample_num: int, 15 | xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: 16 | """ 17 | Args: 18 | min_radius (float): minimum radius of the balls. 19 | max_radius (float): maximum radius of the balls. 20 | sample_num (int): maximum number of features in the balls. 21 | xyz (Tensor): (B, N, 3) xyz coordinates of the features. 22 | center_xyz (Tensor): (B, npoint, 3) centers of the ball query. 23 | 24 | Returns: 25 | Tensor: (B, npoint, nsample) tensor with the indices of 26 | the features that form the query balls. 27 | """ 28 | assert center_xyz.is_contiguous() 29 | assert xyz.is_contiguous() 30 | assert min_radius < max_radius 31 | 32 | B, N, _ = xyz.size() 33 | npoint = center_xyz.size(1) 34 | idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) 35 | 36 | ext_module.ball_query_forward( 37 | center_xyz, 38 | xyz, 39 | idx, 40 | b=B, 41 | n=N, 42 | m=npoint, 43 | min_radius=min_radius, 44 | max_radius=max_radius, 45 | nsample=sample_num) 46 | if torch.__version__ != 'parrots': 47 | ctx.mark_non_differentiable(idx) 48 | return idx 49 | 50 | @staticmethod 51 | def backward(ctx, a=None): 52 | return None, None, None, None 53 | 54 | 55 | ball_query = BallQuery.apply 56 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/box_iou_rotated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..utils import ext_loader 3 | 4 | ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) 5 | 6 | 7 | def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): 8 | """Return intersection-over-union (Jaccard index) of boxes. 9 | 10 | Both sets of boxes are expected to be in 11 | (x_center, y_center, width, height, angle) format. 12 | 13 | If ``aligned`` is ``False``, then calculate the ious between each bbox 14 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 15 | bboxes1 and bboxes2. 16 | 17 | Arguments: 18 | boxes1 (Tensor): rotated bboxes 1. \ 19 | It has shape (N, 5), indicating (x, y, w, h, theta) for each row. 20 | Note that theta is in radian. 21 | boxes2 (Tensor): rotated bboxes 2. \ 22 | It has shape (M, 5), indicating (x, y, w, h, theta) for each row. 23 | Note that theta is in radian. 24 | mode (str): "iou" (intersection over union) or iof (intersection over 25 | foreground). 26 | 27 | Returns: 28 | ious(Tensor): shape (N, M) if aligned == False else shape (N,) 29 | """ 30 | assert mode in ['iou', 'iof'] 31 | mode_dict = {'iou': 0, 'iof': 1} 32 | mode_flag = mode_dict[mode] 33 | rows = bboxes1.size(0) 34 | cols = bboxes2.size(0) 35 | if aligned: 36 | ious = bboxes1.new_zeros(rows) 37 | else: 38 | ious = bboxes1.new_zeros((rows * cols)) 39 | bboxes1 = bboxes1.contiguous() 40 | bboxes2 = bboxes2.contiguous() 41 | ext_module.box_iou_rotated( 42 | bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) 43 | if not aligned: 44 | ious = ious.view(rows, cols) 45 | return ious 46 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/contour_expand.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | 5 | from ..utils import ext_loader 6 | 7 | ext_module = ext_loader.load_ext('_ext', ['contour_expand']) 8 | 9 | 10 | def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, 11 | kernel_num): 12 | """Expand kernel contours so that foreground pixels are assigned into 13 | instances. 14 | 15 | Arguments: 16 | kernel_mask (np.array or Tensor): The instance kernel mask with 17 | size hxw. 18 | internal_kernel_label (np.array or Tensor): The instance internal 19 | kernel label with size hxw. 20 | min_kernel_area (int): The minimum kernel area. 21 | kernel_num (int): The instance kernel number. 22 | 23 | Returns: 24 | label (list): The instance index map with size hxw. 25 | """ 26 | assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) 27 | assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) 28 | assert isinstance(min_kernel_area, int) 29 | assert isinstance(kernel_num, int) 30 | 31 | if isinstance(kernel_mask, np.ndarray): 32 | kernel_mask = torch.from_numpy(kernel_mask) 33 | if isinstance(internal_kernel_label, np.ndarray): 34 | internal_kernel_label = torch.from_numpy(internal_kernel_label) 35 | 36 | if torch.__version__ == 'parrots': 37 | if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: 38 | label = [] 39 | else: 40 | label = ext_module.contour_expand( 41 | kernel_mask, 42 | internal_kernel_label, 43 | min_kernel_area=min_kernel_area, 44 | kernel_num=kernel_num) 45 | label = label.tolist() 46 | else: 47 | label = ext_module.contour_expand(kernel_mask, internal_kernel_label, 48 | min_kernel_area, kernel_num) 49 | return label 50 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/deprecated_wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # This file is for backward compatibility. 3 | # Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. 4 | import warnings 5 | 6 | from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d 7 | 8 | 9 | class Conv2d_deprecated(Conv2d): 10 | 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | warnings.warn( 14 | 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' 15 | ' the future. Please import them from "mmcv.cnn" instead') 16 | 17 | 18 | class ConvTranspose2d_deprecated(ConvTranspose2d): 19 | 20 | def __init__(self, *args, **kwargs): 21 | super().__init__(*args, **kwargs) 22 | warnings.warn( 23 | 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' 24 | 'deprecated in the future. Please import them from "mmcv.cnn" ' 25 | 'instead') 26 | 27 | 28 | class MaxPool2d_deprecated(MaxPool2d): 29 | 30 | def __init__(self, *args, **kwargs): 31 | super().__init__(*args, **kwargs) 32 | warnings.warn( 33 | 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' 34 | ' the future. Please import them from "mmcv.cnn" instead') 35 | 36 | 37 | class Linear_deprecated(Linear): 38 | 39 | def __init__(self, *args, **kwargs): 40 | super().__init__(*args, **kwargs) 41 | warnings.warn( 42 | 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' 43 | ' the future. Please import them from "mmcv.cnn" instead') 44 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/gather_points.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from ..utils import ext_loader 5 | 6 | ext_module = ext_loader.load_ext( 7 | '_ext', ['gather_points_forward', 'gather_points_backward']) 8 | 9 | 10 | class GatherPoints(Function): 11 | """Gather points with given index.""" 12 | 13 | @staticmethod 14 | def forward(ctx, features: torch.Tensor, 15 | indices: torch.Tensor) -> torch.Tensor: 16 | """ 17 | Args: 18 | features (Tensor): (B, C, N) features to gather. 19 | indices (Tensor): (B, M) where M is the number of points. 20 | 21 | Returns: 22 | Tensor: (B, C, M) where M is the number of points. 23 | """ 24 | assert features.is_contiguous() 25 | assert indices.is_contiguous() 26 | 27 | B, npoint = indices.size() 28 | _, C, N = features.size() 29 | output = torch.cuda.FloatTensor(B, C, npoint) 30 | 31 | ext_module.gather_points_forward( 32 | features, indices, output, b=B, c=C, n=N, npoints=npoint) 33 | 34 | ctx.for_backwards = (indices, C, N) 35 | if torch.__version__ != 'parrots': 36 | ctx.mark_non_differentiable(indices) 37 | return output 38 | 39 | @staticmethod 40 | def backward(ctx, grad_out): 41 | idx, C, N = ctx.for_backwards 42 | B, npoint = idx.size() 43 | 44 | grad_features = torch.cuda.FloatTensor(B, C, N).zero_() 45 | grad_out_data = grad_out.data.contiguous() 46 | ext_module.gather_points_backward( 47 | grad_out_data, 48 | idx, 49 | grad_features.data, 50 | b=B, 51 | c=C, 52 | n=N, 53 | npoints=npoint) 54 | return grad_features, None 55 | 56 | 57 | gather_points = GatherPoints.apply 58 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/info.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import glob 3 | import os 4 | 5 | import torch 6 | 7 | if torch.__version__ == 'parrots': 8 | import parrots 9 | 10 | def get_compiler_version(): 11 | return 'GCC ' + parrots.version.compiler 12 | 13 | def get_compiling_cuda_version(): 14 | return parrots.version.cuda 15 | else: 16 | from ..utils import ext_loader 17 | ext_module = ext_loader.load_ext( 18 | '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) 19 | 20 | def get_compiler_version(): 21 | return ext_module.get_compiler_version() 22 | 23 | def get_compiling_cuda_version(): 24 | return ext_module.get_compiling_cuda_version() 25 | 26 | 27 | def get_onnxruntime_op_path(): 28 | wildcard = os.path.join( 29 | os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 30 | '_ext_ort.*.so') 31 | 32 | paths = glob.glob(wildcard) 33 | if len(paths) > 0: 34 | return paths[0] 35 | else: 36 | return '' 37 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/three_nn.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | from torch.autograd import Function 5 | 6 | from ..utils import ext_loader 7 | 8 | ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) 9 | 10 | 11 | class ThreeNN(Function): 12 | """Find the top-3 nearest neighbors of the target set from the source set. 13 | 14 | Please refer to `Paper of PointNet++ `_ 15 | for more details. 16 | """ 17 | 18 | @staticmethod 19 | def forward(ctx, target: torch.Tensor, 20 | source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 21 | """ 22 | Args: 23 | target (Tensor): shape (B, N, 3), points set that needs to 24 | find the nearest neighbors. 25 | source (Tensor): shape (B, M, 3), points set that is used 26 | to find the nearest neighbors of points in target set. 27 | 28 | Returns: 29 | Tensor: shape (B, N, 3), L2 distance of each point in target 30 | set to their corresponding nearest neighbors. 31 | """ 32 | target = target.contiguous() 33 | source = source.contiguous() 34 | 35 | B, N, _ = target.size() 36 | m = source.size(1) 37 | dist2 = torch.cuda.FloatTensor(B, N, 3) 38 | idx = torch.cuda.IntTensor(B, N, 3) 39 | 40 | ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) 41 | if torch.__version__ != 'parrots': 42 | ctx.mark_non_differentiable(idx) 43 | 44 | return torch.sqrt(dist2), idx 45 | 46 | @staticmethod 47 | def backward(ctx, a=None, b=None): 48 | return None, None 49 | 50 | 51 | three_nn = ThreeNN.apply 52 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/ops/tin_shift.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # Code reference from "Temporal Interlacing Network" 3 | # https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py 4 | # Hao Shao, Shengju Qian, Yu Liu 5 | # shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import Function 10 | 11 | from ..utils import ext_loader 12 | 13 | ext_module = ext_loader.load_ext('_ext', 14 | ['tin_shift_forward', 'tin_shift_backward']) 15 | 16 | 17 | class TINShiftFunction(Function): 18 | 19 | @staticmethod 20 | def forward(ctx, input, shift): 21 | C = input.size(2) 22 | num_segments = shift.size(1) 23 | if C // num_segments <= 0 or C % num_segments != 0: 24 | raise ValueError('C should be a multiple of num_segments, ' 25 | f'but got C={C} and num_segments={num_segments}.') 26 | 27 | ctx.save_for_backward(shift) 28 | 29 | out = torch.zeros_like(input) 30 | ext_module.tin_shift_forward(input, shift, out) 31 | 32 | return out 33 | 34 | @staticmethod 35 | def backward(ctx, grad_output): 36 | 37 | shift = ctx.saved_tensors[0] 38 | data_grad_input = grad_output.new(*grad_output.size()).zero_() 39 | shift_grad_input = shift.new(*shift.size()).zero_() 40 | ext_module.tin_shift_backward(grad_output, shift, data_grad_input) 41 | 42 | return data_grad_input, shift_grad_input 43 | 44 | 45 | tin_shift = TINShiftFunction.apply 46 | 47 | 48 | class TINShift(nn.Module): 49 | """Temporal Interlace Shift. 50 | 51 | Temporal Interlace shift is a differentiable temporal-wise frame shifting 52 | which is proposed in "Temporal Interlacing Network" 53 | 54 | Please refer to https://arxiv.org/abs/2001.06499 for more details. 55 | Code is modified from https://github.com/mit-han-lab/temporal-shift-module 56 | """ 57 | 58 | def forward(self, input, shift): 59 | """Perform temporal interlace shift. 60 | 61 | Args: 62 | input (Tensor): Feature map with shape [N, num_segments, C, H * W]. 63 | shift (Tensor): Shift tensor with shape [N, num_segments]. 64 | 65 | Returns: 66 | Feature map after temporal interlace shift. 67 | """ 68 | return tin_shift(input, shift) 69 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/parallel/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .collate import collate 3 | from .data_container import DataContainer 4 | from .data_parallel import MMDataParallel 5 | from .distributed import MMDistributedDataParallel 6 | from .registry import MODULE_WRAPPERS 7 | from .scatter_gather import scatter, scatter_kwargs 8 | from .utils import is_module_wrapper 9 | 10 | __all__ = [ 11 | 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', 12 | 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' 13 | ] 14 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/parallel/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch.nn.parallel import DataParallel, DistributedDataParallel 3 | 4 | from annotator.uniformer.mmcv.utils import Registry 5 | 6 | MODULE_WRAPPERS = Registry('module wrapper') 7 | MODULE_WRAPPERS.register_module(module=DataParallel) 8 | MODULE_WRAPPERS.register_module(module=DistributedDataParallel) 9 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/parallel/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .registry import MODULE_WRAPPERS 3 | 4 | 5 | def is_module_wrapper(module): 6 | """Check if a module is a module wrapper. 7 | 8 | The following 3 modules in MMCV (and their subclasses) are regarded as 9 | module wrappers: DataParallel, DistributedDataParallel, 10 | MMDistributedDataParallel (the deprecated version). You may add you own 11 | module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. 12 | 13 | Args: 14 | module (nn.Module): The module to be checked. 15 | 16 | Returns: 17 | bool: True if the input module is a module wrapper. 18 | """ 19 | module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) 20 | return isinstance(module, module_wrappers) 21 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | from ..utils import Registry 5 | 6 | RUNNERS = Registry('runner') 7 | RUNNER_BUILDERS = Registry('runner builder') 8 | 9 | 10 | def build_runner_constructor(cfg): 11 | return RUNNER_BUILDERS.build(cfg) 12 | 13 | 14 | def build_runner(cfg, default_args=None): 15 | runner_cfg = copy.deepcopy(cfg) 16 | constructor_type = runner_cfg.pop('constructor', 17 | 'DefaultRunnerConstructor') 18 | runner_constructor = build_runner_constructor( 19 | dict( 20 | type=constructor_type, 21 | runner_cfg=runner_cfg, 22 | default_args=default_args)) 23 | runner = runner_constructor() 24 | return runner 25 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/default_constructor.py: -------------------------------------------------------------------------------- 1 | from .builder import RUNNER_BUILDERS, RUNNERS 2 | 3 | 4 | @RUNNER_BUILDERS.register_module() 5 | class DefaultRunnerConstructor: 6 | """Default constructor for runners. 7 | 8 | Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. 9 | For example, We can inject some new properties and functions for `Runner`. 10 | 11 | Example: 12 | >>> from annotator.uniformer.mmcv.runner import RUNNER_BUILDERS, build_runner 13 | >>> # Define a new RunnerReconstructor 14 | >>> @RUNNER_BUILDERS.register_module() 15 | >>> class MyRunnerConstructor: 16 | ... def __init__(self, runner_cfg, default_args=None): 17 | ... if not isinstance(runner_cfg, dict): 18 | ... raise TypeError('runner_cfg should be a dict', 19 | ... f'but got {type(runner_cfg)}') 20 | ... self.runner_cfg = runner_cfg 21 | ... self.default_args = default_args 22 | ... 23 | ... def __call__(self): 24 | ... runner = RUNNERS.build(self.runner_cfg, 25 | ... default_args=self.default_args) 26 | ... # Add new properties for existing runner 27 | ... runner.my_name = 'my_runner' 28 | ... runner.my_function = lambda self: print(self.my_name) 29 | ... ... 30 | >>> # build your runner 31 | >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, 32 | ... constructor='MyRunnerConstructor') 33 | >>> runner = build_runner(runner_cfg) 34 | """ 35 | 36 | def __init__(self, runner_cfg, default_args=None): 37 | if not isinstance(runner_cfg, dict): 38 | raise TypeError('runner_cfg should be a dict', 39 | f'but got {type(runner_cfg)}') 40 | self.runner_cfg = runner_cfg 41 | self.default_args = default_args 42 | 43 | def __call__(self): 44 | return RUNNERS.build(self.runner_cfg, default_args=self.default_args) 45 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .checkpoint import CheckpointHook 3 | from .closure import ClosureHook 4 | from .ema import EMAHook 5 | from .evaluation import DistEvalHook, EvalHook 6 | from .hook import HOOKS, Hook 7 | from .iter_timer import IterTimerHook 8 | from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, 9 | NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, 10 | TextLoggerHook, WandbLoggerHook) 11 | from .lr_updater import LrUpdaterHook 12 | from .memory import EmptyCacheHook 13 | from .momentum_updater import MomentumUpdaterHook 14 | from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, 15 | GradientCumulativeOptimizerHook, OptimizerHook) 16 | from .profiler import ProfilerHook 17 | from .sampler_seed import DistSamplerSeedHook 18 | from .sync_buffer import SyncBuffersHook 19 | 20 | __all__ = [ 21 | 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', 22 | 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', 23 | 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', 24 | 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', 25 | 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', 26 | 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', 27 | 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', 28 | 'GradientCumulativeFp16OptimizerHook' 29 | ] 30 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/closure.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .hook import HOOKS, Hook 3 | 4 | 5 | @HOOKS.register_module() 6 | class ClosureHook(Hook): 7 | 8 | def __init__(self, fn_name, fn): 9 | assert hasattr(self, fn_name) 10 | assert callable(fn) 11 | setattr(self, fn_name, fn) 12 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/iter_timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import time 3 | 4 | from .hook import HOOKS, Hook 5 | 6 | 7 | @HOOKS.register_module() 8 | class IterTimerHook(Hook): 9 | 10 | def before_epoch(self, runner): 11 | self.t = time.time() 12 | 13 | def before_iter(self, runner): 14 | runner.log_buffer.update({'data_time': time.time() - self.t}) 15 | 16 | def after_iter(self, runner): 17 | runner.log_buffer.update({'time': time.time() - self.t}) 18 | self.t = time.time() 19 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import LoggerHook 3 | from .dvclive import DvcliveLoggerHook 4 | from .mlflow import MlflowLoggerHook 5 | from .neptune import NeptuneLoggerHook 6 | from .pavi import PaviLoggerHook 7 | from .tensorboard import TensorboardLoggerHook 8 | from .text import TextLoggerHook 9 | from .wandb import WandbLoggerHook 10 | 11 | __all__ = [ 12 | 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', 13 | 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', 14 | 'NeptuneLoggerHook', 'DvcliveLoggerHook' 15 | ] 16 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ...dist_utils import master_only 3 | from ..hook import HOOKS 4 | from .base import LoggerHook 5 | 6 | 7 | @HOOKS.register_module() 8 | class DvcliveLoggerHook(LoggerHook): 9 | """Class to log metrics with dvclive. 10 | 11 | It requires `dvclive`_ to be installed. 12 | 13 | Args: 14 | path (str): Directory where dvclive will write TSV log files. 15 | interval (int): Logging interval (every k iterations). 16 | Default 10. 17 | ignore_last (bool): Ignore the log of last iterations in each epoch 18 | if less than `interval`. 19 | Default: True. 20 | reset_flag (bool): Whether to clear the output buffer after logging. 21 | Default: True. 22 | by_epoch (bool): Whether EpochBasedRunner is used. 23 | Default: True. 24 | 25 | .. _dvclive: 26 | https://dvc.org/doc/dvclive 27 | """ 28 | 29 | def __init__(self, 30 | path, 31 | interval=10, 32 | ignore_last=True, 33 | reset_flag=True, 34 | by_epoch=True): 35 | 36 | super(DvcliveLoggerHook, self).__init__(interval, ignore_last, 37 | reset_flag, by_epoch) 38 | self.path = path 39 | self.import_dvclive() 40 | 41 | def import_dvclive(self): 42 | try: 43 | import dvclive 44 | except ImportError: 45 | raise ImportError( 46 | 'Please run "pip install dvclive" to install dvclive') 47 | self.dvclive = dvclive 48 | 49 | @master_only 50 | def before_run(self, runner): 51 | self.dvclive.init(self.path) 52 | 53 | @master_only 54 | def log(self, runner): 55 | tags = self.get_loggable_tags(runner) 56 | if tags: 57 | for k, v in tags.items(): 58 | self.dvclive.log(k, v, step=self.get_iter(runner)) 59 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version 5 | from ...dist_utils import master_only 6 | from ..hook import HOOKS 7 | from .base import LoggerHook 8 | 9 | 10 | @HOOKS.register_module() 11 | class TensorboardLoggerHook(LoggerHook): 12 | 13 | def __init__(self, 14 | log_dir=None, 15 | interval=10, 16 | ignore_last=True, 17 | reset_flag=False, 18 | by_epoch=True): 19 | super(TensorboardLoggerHook, self).__init__(interval, ignore_last, 20 | reset_flag, by_epoch) 21 | self.log_dir = log_dir 22 | 23 | @master_only 24 | def before_run(self, runner): 25 | super(TensorboardLoggerHook, self).before_run(runner) 26 | if (TORCH_VERSION == 'parrots' 27 | or digit_version(TORCH_VERSION) < digit_version('1.1')): 28 | try: 29 | from tensorboardX import SummaryWriter 30 | except ImportError: 31 | raise ImportError('Please install tensorboardX to use ' 32 | 'TensorboardLoggerHook.') 33 | else: 34 | try: 35 | from torch.utils.tensorboard import SummaryWriter 36 | except ImportError: 37 | raise ImportError( 38 | 'Please run "pip install future tensorboard" to install ' 39 | 'the dependencies to use torch.utils.tensorboard ' 40 | '(applicable to PyTorch 1.1 or higher)') 41 | 42 | if self.log_dir is None: 43 | self.log_dir = osp.join(runner.work_dir, 'tf_logs') 44 | self.writer = SummaryWriter(self.log_dir) 45 | 46 | @master_only 47 | def log(self, runner): 48 | tags = self.get_loggable_tags(runner, allow_text=True) 49 | for tag, val in tags.items(): 50 | if isinstance(val, str): 51 | self.writer.add_text(tag, val, self.get_iter(runner)) 52 | else: 53 | self.writer.add_scalar(tag, val, self.get_iter(runner)) 54 | 55 | @master_only 56 | def after_run(self, runner): 57 | self.writer.close() 58 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/logger/wandb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ...dist_utils import master_only 3 | from ..hook import HOOKS 4 | from .base import LoggerHook 5 | 6 | 7 | @HOOKS.register_module() 8 | class WandbLoggerHook(LoggerHook): 9 | 10 | def __init__(self, 11 | init_kwargs=None, 12 | interval=10, 13 | ignore_last=True, 14 | reset_flag=False, 15 | commit=True, 16 | by_epoch=True, 17 | with_step=True): 18 | super(WandbLoggerHook, self).__init__(interval, ignore_last, 19 | reset_flag, by_epoch) 20 | self.import_wandb() 21 | self.init_kwargs = init_kwargs 22 | self.commit = commit 23 | self.with_step = with_step 24 | 25 | def import_wandb(self): 26 | try: 27 | import wandb 28 | except ImportError: 29 | raise ImportError( 30 | 'Please run "pip install wandb" to install wandb') 31 | self.wandb = wandb 32 | 33 | @master_only 34 | def before_run(self, runner): 35 | super(WandbLoggerHook, self).before_run(runner) 36 | if self.wandb is None: 37 | self.import_wandb() 38 | if self.init_kwargs: 39 | self.wandb.init(**self.init_kwargs) 40 | else: 41 | self.wandb.init() 42 | 43 | @master_only 44 | def log(self, runner): 45 | tags = self.get_loggable_tags(runner) 46 | if tags: 47 | if self.with_step: 48 | self.wandb.log( 49 | tags, step=self.get_iter(runner), commit=self.commit) 50 | else: 51 | tags['global_step'] = self.get_iter(runner) 52 | self.wandb.log(tags, commit=self.commit) 53 | 54 | @master_only 55 | def after_run(self, runner): 56 | self.wandb.join() 57 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from .hook import HOOKS, Hook 5 | 6 | 7 | @HOOKS.register_module() 8 | class EmptyCacheHook(Hook): 9 | 10 | def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): 11 | self._before_epoch = before_epoch 12 | self._after_epoch = after_epoch 13 | self._after_iter = after_iter 14 | 15 | def after_iter(self, runner): 16 | if self._after_iter: 17 | torch.cuda.empty_cache() 18 | 19 | def before_epoch(self, runner): 20 | if self._before_epoch: 21 | torch.cuda.empty_cache() 22 | 23 | def after_epoch(self, runner): 24 | if self._after_epoch: 25 | torch.cuda.empty_cache() 26 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/sampler_seed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .hook import HOOKS, Hook 3 | 4 | 5 | @HOOKS.register_module() 6 | class DistSamplerSeedHook(Hook): 7 | """Data-loading sampler for distributed training. 8 | 9 | When distributed training, it is only useful in conjunction with 10 | :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same 11 | purpose with :obj:`IterLoader`. 12 | """ 13 | 14 | def before_epoch(self, runner): 15 | if hasattr(runner.data_loader.sampler, 'set_epoch'): 16 | # in case the data loader uses `SequentialSampler` in Pytorch 17 | runner.data_loader.sampler.set_epoch(runner.epoch) 18 | elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'): 19 | # batch sampler in pytorch warps the sampler as its attributes. 20 | runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch) 21 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/hooks/sync_buffer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..dist_utils import allreduce_params 3 | from .hook import HOOKS, Hook 4 | 5 | 6 | @HOOKS.register_module() 7 | class SyncBuffersHook(Hook): 8 | """Synchronize model buffers such as running_mean and running_var in BN at 9 | the end of each epoch. 10 | 11 | Args: 12 | distributed (bool): Whether distributed training is used. It is 13 | effective only for distributed training. Defaults to True. 14 | """ 15 | 16 | def __init__(self, distributed=True): 17 | self.distributed = distributed 18 | 19 | def after_epoch(self, runner): 20 | """All-reduce model buffers at the end of each epoch.""" 21 | if self.distributed: 22 | allreduce_params(runner.model.buffers()) 23 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/log_buffer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from collections import OrderedDict 3 | 4 | import numpy as np 5 | 6 | 7 | class LogBuffer: 8 | 9 | def __init__(self): 10 | self.val_history = OrderedDict() 11 | self.n_history = OrderedDict() 12 | self.output = OrderedDict() 13 | self.ready = False 14 | 15 | def clear(self): 16 | self.val_history.clear() 17 | self.n_history.clear() 18 | self.clear_output() 19 | 20 | def clear_output(self): 21 | self.output.clear() 22 | self.ready = False 23 | 24 | def update(self, vars, count=1): 25 | assert isinstance(vars, dict) 26 | for key, var in vars.items(): 27 | if key not in self.val_history: 28 | self.val_history[key] = [] 29 | self.n_history[key] = [] 30 | self.val_history[key].append(var) 31 | self.n_history[key].append(count) 32 | 33 | def average(self, n=0): 34 | """Average latest n values or all values.""" 35 | assert n >= 0 36 | for key in self.val_history: 37 | values = np.array(self.val_history[key][-n:]) 38 | nums = np.array(self.n_history[key][-n:]) 39 | avg = np.sum(values * nums) / np.sum(nums) 40 | self.output[key] = avg 41 | self.ready = True 42 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer, 3 | build_optimizer_constructor) 4 | from .default_constructor import DefaultOptimizerConstructor 5 | 6 | __all__ = [ 7 | 'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor', 8 | 'build_optimizer', 'build_optimizer_constructor' 9 | ] 10 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/optimizer/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | import inspect 4 | 5 | import torch 6 | 7 | from ...utils import Registry, build_from_cfg 8 | 9 | OPTIMIZERS = Registry('optimizer') 10 | OPTIMIZER_BUILDERS = Registry('optimizer builder') 11 | 12 | 13 | def register_torch_optimizers(): 14 | torch_optimizers = [] 15 | for module_name in dir(torch.optim): 16 | if module_name.startswith('__'): 17 | continue 18 | _optim = getattr(torch.optim, module_name) 19 | if inspect.isclass(_optim) and issubclass(_optim, 20 | torch.optim.Optimizer): 21 | OPTIMIZERS.register_module()(_optim) 22 | torch_optimizers.append(module_name) 23 | return torch_optimizers 24 | 25 | 26 | TORCH_OPTIMIZERS = register_torch_optimizers() 27 | 28 | 29 | def build_optimizer_constructor(cfg): 30 | return build_from_cfg(cfg, OPTIMIZER_BUILDERS) 31 | 32 | 33 | def build_optimizer(model, cfg): 34 | optimizer_cfg = copy.deepcopy(cfg) 35 | constructor_type = optimizer_cfg.pop('constructor', 36 | 'DefaultOptimizerConstructor') 37 | paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) 38 | optim_constructor = build_optimizer_constructor( 39 | dict( 40 | type=constructor_type, 41 | optimizer_cfg=optimizer_cfg, 42 | paramwise_cfg=paramwise_cfg)) 43 | optimizer = optim_constructor(model) 44 | return optimizer 45 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/runner/priority.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from enum import Enum 3 | 4 | 5 | class Priority(Enum): 6 | """Hook priority levels. 7 | 8 | +--------------+------------+ 9 | | Level | Value | 10 | +==============+============+ 11 | | HIGHEST | 0 | 12 | +--------------+------------+ 13 | | VERY_HIGH | 10 | 14 | +--------------+------------+ 15 | | HIGH | 30 | 16 | +--------------+------------+ 17 | | ABOVE_NORMAL | 40 | 18 | +--------------+------------+ 19 | | NORMAL | 50 | 20 | +--------------+------------+ 21 | | BELOW_NORMAL | 60 | 22 | +--------------+------------+ 23 | | LOW | 70 | 24 | +--------------+------------+ 25 | | VERY_LOW | 90 | 26 | +--------------+------------+ 27 | | LOWEST | 100 | 28 | +--------------+------------+ 29 | """ 30 | 31 | HIGHEST = 0 32 | VERY_HIGH = 10 33 | HIGH = 30 34 | ABOVE_NORMAL = 40 35 | NORMAL = 50 36 | BELOW_NORMAL = 60 37 | LOW = 70 38 | VERY_LOW = 90 39 | LOWEST = 100 40 | 41 | 42 | def get_priority(priority): 43 | """Get priority value. 44 | 45 | Args: 46 | priority (int or str or :obj:`Priority`): Priority. 47 | 48 | Returns: 49 | int: The priority value. 50 | """ 51 | if isinstance(priority, int): 52 | if priority < 0 or priority > 100: 53 | raise ValueError('priority must be between 0 and 100') 54 | return priority 55 | elif isinstance(priority, Priority): 56 | return priority.value 57 | elif isinstance(priority, str): 58 | return Priority[priority.upper()].value 59 | else: 60 | raise TypeError('priority must be an integer or Priority enum value') 61 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/utils/ext_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import importlib 3 | import os 4 | import pkgutil 5 | import warnings 6 | from collections import namedtuple 7 | 8 | import torch 9 | 10 | if torch.__version__ != 'parrots': 11 | 12 | def load_ext(name, funcs): 13 | ext = importlib.import_module('mmcv.' + name) 14 | for fun in funcs: 15 | assert hasattr(ext, fun), f'{fun} miss in module {name}' 16 | return ext 17 | else: 18 | from parrots import extension 19 | from parrots.base import ParrotsException 20 | 21 | has_return_value_ops = [ 22 | 'nms', 23 | 'softnms', 24 | 'nms_match', 25 | 'nms_rotated', 26 | 'top_pool_forward', 27 | 'top_pool_backward', 28 | 'bottom_pool_forward', 29 | 'bottom_pool_backward', 30 | 'left_pool_forward', 31 | 'left_pool_backward', 32 | 'right_pool_forward', 33 | 'right_pool_backward', 34 | 'fused_bias_leakyrelu', 35 | 'upfirdn2d', 36 | 'ms_deform_attn_forward', 37 | 'pixel_group', 38 | 'contour_expand', 39 | ] 40 | 41 | def get_fake_func(name, e): 42 | 43 | def fake_func(*args, **kwargs): 44 | warnings.warn(f'{name} is not supported in parrots now') 45 | raise e 46 | 47 | return fake_func 48 | 49 | def load_ext(name, funcs): 50 | ExtModule = namedtuple('ExtModule', funcs) 51 | ext_list = [] 52 | lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 53 | for fun in funcs: 54 | try: 55 | ext_fun = extension.load(fun, name, lib_dir=lib_root) 56 | except ParrotsException as e: 57 | if 'No element registered' not in e.message: 58 | warnings.warn(e.message) 59 | ext_fun = get_fake_func(fun, e) 60 | ext_list.append(ext_fun) 61 | else: 62 | if fun in has_return_value_ops: 63 | ext_list.append(ext_fun.op) 64 | else: 65 | ext_list.append(ext_fun.op_) 66 | return ExtModule(*ext_list) 67 | 68 | 69 | def check_ops_exist(): 70 | ext_loader = pkgutil.find_loader('mmcv._ext') 71 | return ext_loader is not None 72 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/utils/parrots_jit.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | 4 | from .parrots_wrapper import TORCH_VERSION 5 | 6 | parrots_jit_option = os.getenv('PARROTS_JIT_OPTION') 7 | 8 | if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON': 9 | from parrots.jit import pat as jit 10 | else: 11 | 12 | def jit(func=None, 13 | check_input=None, 14 | full_shape=True, 15 | derivate=False, 16 | coderize=False, 17 | optimize=False): 18 | 19 | def wrapper(func): 20 | 21 | def wrapper_inner(*args, **kargs): 22 | return func(*args, **kargs) 23 | 24 | return wrapper_inner 25 | 26 | if func is None: 27 | return wrapper 28 | else: 29 | return func 30 | 31 | 32 | if TORCH_VERSION == 'parrots': 33 | from parrots.utils.tester import skip_no_elena 34 | else: 35 | 36 | def skip_no_elena(func): 37 | 38 | def wrapper(*args, **kargs): 39 | return func(*args, **kargs) 40 | 41 | return wrapper 42 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/utils/trace.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch 4 | 5 | from annotator.uniformer.mmcv.utils import digit_version 6 | 7 | 8 | def is_jit_tracing() -> bool: 9 | if (torch.__version__ != 'parrots' 10 | and digit_version(torch.__version__) >= digit_version('1.6.0')): 11 | on_trace = torch.jit.is_tracing() 12 | # In PyTorch 1.6, torch.jit.is_tracing has a bug. 13 | # Refers to https://github.com/pytorch/pytorch/issues/42448 14 | if isinstance(on_trace, bool): 15 | return on_trace 16 | else: 17 | return torch._C._is_tracing() 18 | else: 19 | warnings.warn( 20 | 'torch.jit.is_tracing is only supported after v1.6.0. ' 21 | 'Therefore is_tracing returns False automatically. Please ' 22 | 'set on_trace manually if you are using trace.', UserWarning) 23 | return False 24 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | __version__ = '1.3.17' 3 | 4 | 5 | def parse_version_info(version_str: str, length: int = 4) -> tuple: 6 | """Parse a version string into a tuple. 7 | 8 | Args: 9 | version_str (str): The version string. 10 | length (int): The maximum number of version levels. Default: 4. 11 | 12 | Returns: 13 | tuple[int | str]: The version info, e.g., "1.3.0" is parsed into 14 | (1, 3, 0, 0, 0, 0), and "2.0.0rc1" is parsed into 15 | (2, 0, 0, 0, 'rc', 1) (when length is set to 4). 16 | """ 17 | from packaging.version import parse 18 | version = parse(version_str) 19 | assert version.release, f'failed to parse version {version_str}' 20 | release = list(version.release) 21 | release = release[:length] 22 | if len(release) < length: 23 | release = release + [0] * (length - len(release)) 24 | if version.is_prerelease: 25 | release.extend(list(version.pre)) 26 | elif version.is_postrelease: 27 | release.extend(list(version.post)) 28 | else: 29 | release.extend([0, 0]) 30 | return tuple(release) 31 | 32 | 33 | version_info = tuple(int(x) for x in __version__.split('.')[:3]) 34 | 35 | __all__ = ['__version__', 'version_info', 'parse_version_info'] 36 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/video/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .io import Cache, VideoReader, frames2video 3 | from .optflow import (dequantize_flow, flow_from_bytes, flow_warp, flowread, 4 | flowwrite, quantize_flow, sparse_flow_from_bytes) 5 | from .processing import concat_video, convert_video, cut_video, resize_video 6 | 7 | __all__ = [ 8 | 'Cache', 'VideoReader', 'frames2video', 'convert_video', 'resize_video', 9 | 'cut_video', 'concat_video', 'flowread', 'flowwrite', 'quantize_flow', 10 | 'dequantize_flow', 'flow_warp', 'flow_from_bytes', 'sparse_flow_from_bytes' 11 | ] 12 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .color import Color, color_val 3 | from .image import imshow, imshow_bboxes, imshow_det_bboxes 4 | from .optflow import flow2rgb, flowshow, make_color_wheel 5 | 6 | __all__ = [ 7 | 'Color', 'color_val', 'imshow', 'imshow_bboxes', 'imshow_det_bboxes', 8 | 'flowshow', 'flow2rgb', 'make_color_wheel' 9 | ] 10 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv/visualization/color.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from enum import Enum 3 | 4 | import numpy as np 5 | 6 | from annotator.uniformer.mmcv.utils import is_str 7 | 8 | 9 | class Color(Enum): 10 | """An enum that defines common colors. 11 | 12 | Contains red, green, blue, cyan, yellow, magenta, white and black. 13 | """ 14 | red = (0, 0, 255) 15 | green = (0, 255, 0) 16 | blue = (255, 0, 0) 17 | cyan = (255, 255, 0) 18 | yellow = (0, 255, 255) 19 | magenta = (255, 0, 255) 20 | white = (255, 255, 255) 21 | black = (0, 0, 0) 22 | 23 | 24 | def color_val(color): 25 | """Convert various input to color tuples. 26 | 27 | Args: 28 | color (:obj:`Color`/str/tuple/int/ndarray): Color inputs 29 | 30 | Returns: 31 | tuple[int]: A tuple of 3 integers indicating BGR channels. 32 | """ 33 | if is_str(color): 34 | return Color[color].value 35 | elif isinstance(color, Color): 36 | return color.value 37 | elif isinstance(color, tuple): 38 | assert len(color) == 3 39 | for channel in color: 40 | assert 0 <= channel <= 255 41 | return color 42 | elif isinstance(color, int): 43 | assert 0 <= color <= 255 44 | return color, color, color 45 | elif isinstance(color, np.ndarray): 46 | assert color.ndim == 1 and color.size == 3 47 | assert np.all((color >= 0) & (color <= 255)) 48 | color = color.astype(np.uint8) 49 | return tuple(color) 50 | else: 51 | raise TypeError(f'Invalid type for color: {type(color)}') 52 | -------------------------------------------------------------------------------- /annotator/uniformer/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .checkpoint import load_checkpoint 4 | 5 | __all__ = ['load_checkpoint'] -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot 2 | from .test import multi_gpu_test, single_gpu_test 3 | from .train import get_root_logger, set_random_seed, train_segmentor 4 | 5 | __all__ = [ 6 | 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', 7 | 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', 8 | 'show_result_pyplot' 9 | ] 10 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * # noqa: F401, F403 2 | from .seg import * # noqa: F401, F403 3 | from .utils import * # noqa: F401, F403 4 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import get_classes, get_palette 2 | from .eval_hooks import DistEvalHook, EvalHook 3 | from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou 4 | 5 | __all__ = [ 6 | 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', 7 | 'eval_metrics', 'get_classes', 'get_palette' 8 | ] 9 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/seg/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_pixel_sampler 2 | from .sampler import BasePixelSampler, OHEMPixelSampler 3 | 4 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/seg/builder.py: -------------------------------------------------------------------------------- 1 | from annotator.uniformer.mmcv.utils import Registry, build_from_cfg 2 | 3 | PIXEL_SAMPLERS = Registry('pixel sampler') 4 | 5 | 6 | def build_pixel_sampler(cfg, **default_args): 7 | """Build pixel sampler for segmentation map.""" 8 | return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) 9 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/seg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_pixel_sampler import BasePixelSampler 2 | from .ohem_pixel_sampler import OHEMPixelSampler 3 | 4 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BasePixelSampler(metaclass=ABCMeta): 5 | """Base class of pixel sampler.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def sample(self, seg_logit, seg_label): 12 | """Placeholder for sample function.""" 13 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import add_prefix 2 | 3 | __all__ = ['add_prefix'] 4 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | def add_prefix(inputs, prefix): 2 | """Add prefix for dict. 3 | 4 | Args: 5 | inputs (dict): The input dict with str keys. 6 | prefix (str): The prefix to add. 7 | 8 | Returns: 9 | 10 | dict: The dict with keys updated with ``prefix``. 11 | """ 12 | 13 | outputs = dict() 14 | for name, value in inputs.items(): 15 | outputs[f'{prefix}.{name}'] = value 16 | 17 | return outputs 18 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .ade import ADE20KDataset 2 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 3 | from .chase_db1 import ChaseDB1Dataset 4 | from .cityscapes import CityscapesDataset 5 | from .custom import CustomDataset 6 | from .dataset_wrappers import ConcatDataset, RepeatDataset 7 | from .drive import DRIVEDataset 8 | from .hrf import HRFDataset 9 | from .pascal_context import PascalContextDataset, PascalContextDataset59 10 | from .stare import STAREDataset 11 | from .voc import PascalVOCDataset 12 | 13 | __all__ = [ 14 | 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 15 | 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', 16 | 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', 17 | 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 18 | 'STAREDataset' 19 | ] 20 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class ChaseDB1Dataset(CustomDataset): 9 | """Chase_db1 dataset. 10 | 11 | In segmentation map annotation for Chase_db1, 0 stands for background, 12 | which is included in 2 categories. ``reduce_zero_label`` is fixed to False. 13 | The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_1stHO.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(ChaseDB1Dataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_1stHO.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert osp.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 2 | 3 | from .builder import DATASETS 4 | 5 | 6 | @DATASETS.register_module() 7 | class ConcatDataset(_ConcatDataset): 8 | """A wrapper of concatenated dataset. 9 | 10 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 11 | concat the group flag for image aspect ratio. 12 | 13 | Args: 14 | datasets (list[:obj:`Dataset`]): A list of datasets. 15 | """ 16 | 17 | def __init__(self, datasets): 18 | super(ConcatDataset, self).__init__(datasets) 19 | self.CLASSES = datasets[0].CLASSES 20 | self.PALETTE = datasets[0].PALETTE 21 | 22 | 23 | @DATASETS.register_module() 24 | class RepeatDataset(object): 25 | """A wrapper of repeated dataset. 26 | 27 | The length of repeated dataset will be `times` larger than the original 28 | dataset. This is useful when the data loading time is long but the dataset 29 | is small. Using RepeatDataset can reduce the data loading time between 30 | epochs. 31 | 32 | Args: 33 | dataset (:obj:`Dataset`): The dataset to be repeated. 34 | times (int): Repeat times. 35 | """ 36 | 37 | def __init__(self, dataset, times): 38 | self.dataset = dataset 39 | self.times = times 40 | self.CLASSES = dataset.CLASSES 41 | self.PALETTE = dataset.PALETTE 42 | self._ori_len = len(self.dataset) 43 | 44 | def __getitem__(self, idx): 45 | """Get item from original dataset.""" 46 | return self.dataset[idx % self._ori_len] 47 | 48 | def __len__(self): 49 | """The length is multiplied by ``times``""" 50 | return self.times * self._ori_len 51 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/drive.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class DRIVEDataset(CustomDataset): 9 | """DRIVE dataset. 10 | 11 | In segmentation map annotation for DRIVE, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '_manual1.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(DRIVEDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='_manual1.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert osp.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class HRFDataset(CustomDataset): 9 | """HRF dataset. 10 | 11 | In segmentation map annotation for HRF, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(HRFDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert osp.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, 3 | Transpose, to_tensor) 4 | from .loading import LoadAnnotations, LoadImageFromFile 5 | from .test_time_aug import MultiScaleFlipAug 6 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, 7 | PhotoMetricDistortion, RandomCrop, RandomFlip, 8 | RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) 9 | 10 | __all__ = [ 11 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 12 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 13 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 14 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 15 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' 16 | ] 17 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from annotator.uniformer.mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose(object): 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/stare.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class STAREDataset(CustomDataset): 9 | """STARE dataset. 10 | 11 | In segmentation map annotation for STARE, 0 stands for background, which is 12 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 13 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 14 | '.ah.png'. 15 | """ 16 | 17 | CLASSES = ('background', 'vessel') 18 | 19 | PALETTE = [[120, 120, 120], [6, 230, 230]] 20 | 21 | def __init__(self, **kwargs): 22 | super(STAREDataset, self).__init__( 23 | img_suffix='.png', 24 | seg_map_suffix='.ah.png', 25 | reduce_zero_label=False, 26 | **kwargs) 27 | assert osp.exists(self.img_dir) 28 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .builder import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class PascalVOCDataset(CustomDataset): 9 | """Pascal VOC dataset. 10 | 11 | Args: 12 | split (str): Split txt file for Pascal VOC. 13 | """ 14 | 15 | CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 16 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 17 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 18 | 'train', 'tvmonitor') 19 | 20 | PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 21 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], 22 | [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], 23 | [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], 24 | [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] 25 | 26 | def __init__(self, split, **kwargs): 27 | super(PascalVOCDataset, self).__init__( 28 | img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) 29 | assert osp.exists(self.img_dir) and self.split is not None 30 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, 3 | build_head, build_loss, build_segmentor) 4 | from .decode_heads import * # noqa: F401,F403 5 | from .losses import * # noqa: F401,F403 6 | from .necks import * # noqa: F401,F403 7 | from .segmentors import * # noqa: F401,F403 8 | 9 | __all__ = [ 10 | 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', 11 | 'build_head', 'build_loss', 'build_segmentor' 12 | ] 13 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .cgnet import CGNet 2 | # from .fast_scnn import FastSCNN 3 | from .hrnet import HRNet 4 | from .mobilenet_v2 import MobileNetV2 5 | from .mobilenet_v3 import MobileNetV3 6 | from .resnest import ResNeSt 7 | from .resnet import ResNet, ResNetV1c, ResNetV1d 8 | from .resnext import ResNeXt 9 | from .unet import UNet 10 | from .vit import VisionTransformer 11 | from .uniformer import UniFormer 12 | 13 | __all__ = [ 14 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 15 | 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 16 | 'VisionTransformer', 'UniFormer' 17 | ] 18 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/builder.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from annotator.uniformer.mmcv.cnn import MODELS as MMCV_MODELS 4 | from annotator.uniformer.mmcv.utils import Registry 5 | 6 | MODELS = Registry('models', parent=MMCV_MODELS) 7 | 8 | BACKBONES = MODELS 9 | NECKS = MODELS 10 | HEADS = MODELS 11 | LOSSES = MODELS 12 | SEGMENTORS = MODELS 13 | 14 | 15 | def build_backbone(cfg): 16 | """Build backbone.""" 17 | return BACKBONES.build(cfg) 18 | 19 | 20 | def build_neck(cfg): 21 | """Build neck.""" 22 | return NECKS.build(cfg) 23 | 24 | 25 | def build_head(cfg): 26 | """Build head.""" 27 | return HEADS.build(cfg) 28 | 29 | 30 | def build_loss(cfg): 31 | """Build loss.""" 32 | return LOSSES.build(cfg) 33 | 34 | 35 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 36 | """Build segmentor.""" 37 | if train_cfg is not None or test_cfg is not None: 38 | warnings.warn( 39 | 'train_cfg and test_cfg is deprecated, ' 40 | 'please specify them in model', UserWarning) 41 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 42 | 'train_cfg specified in both outer field and model field ' 43 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 44 | 'test_cfg specified in both outer field and model field ' 45 | return SEGMENTORS.build( 46 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 47 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .ann_head import ANNHead 2 | from .apc_head import APCHead 3 | from .aspp_head import ASPPHead 4 | from .cc_head import CCHead 5 | from .da_head import DAHead 6 | from .dm_head import DMHead 7 | from .dnl_head import DNLHead 8 | from .ema_head import EMAHead 9 | from .enc_head import EncHead 10 | from .fcn_head import FCNHead 11 | from .fpn_head import FPNHead 12 | from .gc_head import GCHead 13 | from .lraspp_head import LRASPPHead 14 | from .nl_head import NLHead 15 | from .ocr_head import OCRHead 16 | # from .point_head import PointHead 17 | from .psa_head import PSAHead 18 | from .psp_head import PSPHead 19 | from .sep_aspp_head import DepthwiseSeparableASPPHead 20 | from .sep_fcn_head import DepthwiseSeparableFCNHead 21 | from .uper_head import UPerHead 22 | 23 | __all__ = [ 24 | 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 25 | 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', 26 | 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', 27 | 'APCHead', 'DMHead', 'LRASPPHead' 28 | ] 29 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/decode_heads/cc_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import HEADS 4 | from .fcn_head import FCNHead 5 | 6 | try: 7 | from annotator.uniformer.mmcv.ops import CrissCrossAttention 8 | except ModuleNotFoundError: 9 | CrissCrossAttention = None 10 | 11 | 12 | @HEADS.register_module() 13 | class CCHead(FCNHead): 14 | """CCNet: Criss-Cross Attention for Semantic Segmentation. 15 | 16 | This head is the implementation of `CCNet 17 | `_. 18 | 19 | Args: 20 | recurrence (int): Number of recurrence of Criss Cross Attention 21 | module. Default: 2. 22 | """ 23 | 24 | def __init__(self, recurrence=2, **kwargs): 25 | if CrissCrossAttention is None: 26 | raise RuntimeError('Please install mmcv-full for ' 27 | 'CrissCrossAttention ops') 28 | super(CCHead, self).__init__(num_convs=2, **kwargs) 29 | self.recurrence = recurrence 30 | self.cca = CrissCrossAttention(self.channels) 31 | 32 | def forward(self, inputs): 33 | """Forward function.""" 34 | x = self._transform_inputs(inputs) 35 | output = self.convs[0](x) 36 | for _ in range(self.recurrence): 37 | output = self.cca(output) 38 | output = self.convs[1](output) 39 | if self.concat_input: 40 | output = self.conv_cat(torch.cat([x, output], dim=1)) 41 | output = self.cls_seg(output) 42 | return output 43 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/decode_heads/gc_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from annotator.uniformer.mmcv.cnn import ContextBlock 3 | 4 | from ..builder import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | 8 | @HEADS.register_module() 9 | class GCHead(FCNHead): 10 | """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. 11 | 12 | This head is the implementation of `GCNet 13 | `_. 14 | 15 | Args: 16 | ratio (float): Multiplier of channels ratio. Default: 1/4. 17 | pooling_type (str): The pooling type of context aggregation. 18 | Options are 'att', 'avg'. Default: 'avg'. 19 | fusion_types (tuple[str]): The fusion type for feature fusion. 20 | Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) 21 | """ 22 | 23 | def __init__(self, 24 | ratio=1 / 4., 25 | pooling_type='att', 26 | fusion_types=('channel_add', ), 27 | **kwargs): 28 | super(GCHead, self).__init__(num_convs=2, **kwargs) 29 | self.ratio = ratio 30 | self.pooling_type = pooling_type 31 | self.fusion_types = fusion_types 32 | self.gc_block = ContextBlock( 33 | in_channels=self.channels, 34 | ratio=self.ratio, 35 | pooling_type=self.pooling_type, 36 | fusion_types=self.fusion_types) 37 | 38 | def forward(self, inputs): 39 | """Forward function.""" 40 | x = self._transform_inputs(inputs) 41 | output = self.convs[0](x) 42 | output = self.gc_block(output) 43 | output = self.convs[1](output) 44 | if self.concat_input: 45 | output = self.conv_cat(torch.cat([x, output], dim=1)) 46 | output = self.cls_seg(output) 47 | return output 48 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/decode_heads/nl_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from annotator.uniformer.mmcv.cnn import NonLocal2d 3 | 4 | from ..builder import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | 8 | @HEADS.register_module() 9 | class NLHead(FCNHead): 10 | """Non-local Neural Networks. 11 | 12 | This head is the implementation of `NLNet 13 | `_. 14 | 15 | Args: 16 | reduction (int): Reduction factor of projection transform. Default: 2. 17 | use_scale (bool): Whether to scale pairwise_weight by 18 | sqrt(1/inter_channels). Default: True. 19 | mode (str): The nonlocal mode. Options are 'embedded_gaussian', 20 | 'dot_product'. Default: 'embedded_gaussian.'. 21 | """ 22 | 23 | def __init__(self, 24 | reduction=2, 25 | use_scale=True, 26 | mode='embedded_gaussian', 27 | **kwargs): 28 | super(NLHead, self).__init__(num_convs=2, **kwargs) 29 | self.reduction = reduction 30 | self.use_scale = use_scale 31 | self.mode = mode 32 | self.nl_block = NonLocal2d( 33 | in_channels=self.channels, 34 | reduction=self.reduction, 35 | use_scale=self.use_scale, 36 | conv_cfg=self.conv_cfg, 37 | norm_cfg=self.norm_cfg, 38 | mode=self.mode) 39 | 40 | def forward(self, inputs): 41 | """Forward function.""" 42 | x = self._transform_inputs(inputs) 43 | output = self.convs[0](x) 44 | output = self.nl_block(output) 45 | output = self.convs[1](output) 46 | if self.concat_input: 47 | output = self.conv_cat(torch.cat([x, output], dim=1)) 48 | output = self.cls_seg(output) 49 | return output 50 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py: -------------------------------------------------------------------------------- 1 | from annotator.uniformer.mmcv.cnn import DepthwiseSeparableConvModule 2 | 3 | from ..builder import HEADS 4 | from .fcn_head import FCNHead 5 | 6 | 7 | @HEADS.register_module() 8 | class DepthwiseSeparableFCNHead(FCNHead): 9 | """Depthwise-Separable Fully Convolutional Network for Semantic 10 | Segmentation. 11 | 12 | This head is implemented according to Fast-SCNN paper. 13 | Args: 14 | in_channels(int): Number of output channels of FFM. 15 | channels(int): Number of middle-stage channels in the decode head. 16 | concat_input(bool): Whether to concatenate original decode input into 17 | the result of several consecutive convolution layers. 18 | Default: True. 19 | num_classes(int): Used to determine the dimension of 20 | final prediction tensor. 21 | in_index(int): Correspond with 'out_indices' in FastSCNN backbone. 22 | norm_cfg (dict | None): Config of norm layers. 23 | align_corners (bool): align_corners argument of F.interpolate. 24 | Default: False. 25 | loss_decode(dict): Config of loss type and some 26 | relevant additional options. 27 | """ 28 | 29 | def __init__(self, **kwargs): 30 | super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) 31 | self.convs[0] = DepthwiseSeparableConvModule( 32 | self.in_channels, 33 | self.channels, 34 | kernel_size=self.kernel_size, 35 | padding=self.kernel_size // 2, 36 | norm_cfg=self.norm_cfg) 37 | for i in range(1, self.num_convs): 38 | self.convs[i] = DepthwiseSeparableConvModule( 39 | self.channels, 40 | self.channels, 41 | kernel_size=self.kernel_size, 42 | padding=self.kernel_size // 2, 43 | norm_cfg=self.norm_cfg) 44 | 45 | if self.concat_input: 46 | self.conv_cat = DepthwiseSeparableConvModule( 47 | self.in_channels + self.channels, 48 | self.channels, 49 | kernel_size=self.kernel_size, 50 | padding=self.kernel_size // 2, 51 | norm_cfg=self.norm_cfg) 52 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | cross_entropy, mask_cross_entropy) 4 | from .dice_loss import DiceLoss 5 | from .lovasz_loss import LovaszLoss 6 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 7 | 8 | __all__ = [ 9 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 10 | 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 11 | 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss' 12 | ] 13 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .multilevel_neck import MultiLevelNeck 3 | 4 | __all__ = ['FPN', 'MultiLevelNeck'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseSegmentor 2 | from .cascade_encoder_decoder import CascadeEncoderDecoder 3 | from .encoder_decoder import EncoderDecoder 4 | 5 | __all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] 6 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .drop import DropPath 2 | from .inverted_residual import InvertedResidual, InvertedResidualV3 3 | from .make_divisible import make_divisible 4 | from .res_layer import ResLayer 5 | from .se_layer import SELayer 6 | from .self_attention_block import SelfAttentionBlock 7 | from .up_conv_block import UpConvBlock 8 | from .weight_init import trunc_normal_ 9 | 10 | __all__ = [ 11 | 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 12 | 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_' 13 | ] 14 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/utils/drop.py: -------------------------------------------------------------------------------- 1 | """Modified from https://github.com/rwightman/pytorch-image- 2 | models/blob/master/timm/models/layers/drop.py.""" 3 | 4 | import torch 5 | from torch import nn 6 | 7 | 8 | class DropPath(nn.Module): 9 | """Drop paths (Stochastic Depth) per sample (when applied in main path of 10 | residual blocks). 11 | 12 | Args: 13 | drop_prob (float): Drop rate for paths of model. Dropout rate has 14 | to be between 0 and 1. Default: 0. 15 | """ 16 | 17 | def __init__(self, drop_prob=0.): 18 | super(DropPath, self).__init__() 19 | self.drop_prob = drop_prob 20 | self.keep_prob = 1 - drop_prob 21 | 22 | def forward(self, x): 23 | if self.drop_prob == 0. or not self.training: 24 | return x 25 | shape = (x.shape[0], ) + (1, ) * ( 26 | x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 27 | random_tensor = self.keep_prob + torch.rand( 28 | shape, dtype=x.dtype, device=x.device) 29 | random_tensor.floor_() # binarize 30 | output = x.div(self.keep_prob) * random_tensor 31 | return output 32 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 2 | """Make divisible function. 3 | 4 | This function rounds the channel number to the nearest value that can be 5 | divisible by the divisor. It is taken from the original tf repo. It ensures 6 | that all layers have a channel number that is divisible by divisor. It can 7 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 8 | 9 | Args: 10 | value (int): The original channel number. 11 | divisor (int): The divisor to fully divide the channel number. 12 | min_value (int): The minimum value of the output channel. 13 | Default: None, means that the minimum value equal to the divisor. 14 | min_ratio (float): The minimum ratio of the rounded channel number to 15 | the original channel number. Default: 0.9. 16 | 17 | Returns: 18 | int: The modified output channel number. 19 | """ 20 | 21 | if min_value is None: 22 | min_value = divisor 23 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 24 | # Make sure that round down does not go down by more than (1-min_ratio). 25 | if new_value < min_ratio * value: 26 | new_value += divisor 27 | return new_value 28 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/models/utils/se_layer.py: -------------------------------------------------------------------------------- 1 | import annotator.uniformer.mmcv as mmcv 2 | import torch.nn as nn 3 | from annotator.uniformer.mmcv.cnn import ConvModule 4 | 5 | from .make_divisible import make_divisible 6 | 7 | 8 | class SELayer(nn.Module): 9 | """Squeeze-and-Excitation Module. 10 | 11 | Args: 12 | channels (int): The input (and output) channels of the SE layer. 13 | ratio (int): Squeeze ratio in SELayer, the intermediate channel will be 14 | ``int(channels/ratio)``. Default: 16. 15 | conv_cfg (None or dict): Config dict for convolution layer. 16 | Default: None, which means using conv2d. 17 | act_cfg (dict or Sequence[dict]): Config dict for activation layer. 18 | If act_cfg is a dict, two activation layers will be configured 19 | by this dict. If act_cfg is a sequence of dicts, the first 20 | activation layer will be configured by the first dict and the 21 | second activation layer will be configured by the second dict. 22 | Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, 23 | divisor=6.0)). 24 | """ 25 | 26 | def __init__(self, 27 | channels, 28 | ratio=16, 29 | conv_cfg=None, 30 | act_cfg=(dict(type='ReLU'), 31 | dict(type='HSigmoid', bias=3.0, divisor=6.0))): 32 | super(SELayer, self).__init__() 33 | if isinstance(act_cfg, dict): 34 | act_cfg = (act_cfg, act_cfg) 35 | assert len(act_cfg) == 2 36 | assert mmcv.is_tuple_of(act_cfg, dict) 37 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 38 | self.conv1 = ConvModule( 39 | in_channels=channels, 40 | out_channels=make_divisible(channels // ratio, 8), 41 | kernel_size=1, 42 | stride=1, 43 | conv_cfg=conv_cfg, 44 | act_cfg=act_cfg[0]) 45 | self.conv2 = ConvModule( 46 | in_channels=make_divisible(channels // ratio, 8), 47 | out_channels=channels, 48 | kernel_size=1, 49 | stride=1, 50 | conv_cfg=conv_cfg, 51 | act_cfg=act_cfg[1]) 52 | 53 | def forward(self, x): 54 | out = self.global_avgpool(x) 55 | out = self.conv1(out) 56 | out = self.conv2(out) 57 | return x * out 58 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoding import Encoding 2 | from .wrappers import Upsample, resize 3 | 4 | __all__ = ['Upsample', 'resize', 'Encoding'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/ops/wrappers.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | def resize(input, 8 | size=None, 9 | scale_factor=None, 10 | mode='nearest', 11 | align_corners=None, 12 | warning=True): 13 | if warning: 14 | if size is not None and align_corners: 15 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 16 | output_h, output_w = tuple(int(x) for x in size) 17 | if output_h > input_h or output_w > output_h: 18 | if ((output_h > 1 and output_w > 1 and input_h > 1 19 | and input_w > 1) and (output_h - 1) % (input_h - 1) 20 | and (output_w - 1) % (input_w - 1)): 21 | warnings.warn( 22 | f'When align_corners={align_corners}, ' 23 | 'the output would more aligned if ' 24 | f'input size {(input_h, input_w)} is `x+1` and ' 25 | f'out size {(output_h, output_w)} is `nx+1`') 26 | return F.interpolate(input, size, scale_factor, mode, align_corners) 27 | 28 | 29 | class Upsample(nn.Module): 30 | 31 | def __init__(self, 32 | size=None, 33 | scale_factor=None, 34 | mode='nearest', 35 | align_corners=None): 36 | super(Upsample, self).__init__() 37 | self.size = size 38 | if isinstance(scale_factor, tuple): 39 | self.scale_factor = tuple(float(factor) for factor in scale_factor) 40 | else: 41 | self.scale_factor = float(scale_factor) if scale_factor else None 42 | self.mode = mode 43 | self.align_corners = align_corners 44 | 45 | def forward(self, x): 46 | if not self.size: 47 | size = [int(t * self.scale_factor) for t in x.shape[-2:]] 48 | else: 49 | size = self.size 50 | return resize(x, size, None, self.mode, self.align_corners) 51 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from annotator.uniformer.mmcv.utils import collect_env as collect_base_env 2 | from annotator.uniformer.mmcv.utils import get_git_hash 3 | 4 | import annotator.uniformer.mmseg as mmseg 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' 11 | 12 | return env_info 13 | 14 | 15 | if __name__ == '__main__': 16 | for name, val in collect_env().items(): 17 | print('{}: {}'.format(name, val)) 18 | -------------------------------------------------------------------------------- /annotator/uniformer/mmseg/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from annotator.uniformer.mmcv.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get the root logger. 8 | 9 | The logger will be initialized if it has not been initialized. By default a 10 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 11 | also be added. The name of the root logger is the top-level package name, 12 | e.g., "mmseg". 13 | 14 | Args: 15 | log_file (str | None): The log filename. If specified, a FileHandler 16 | will be added to the root logger. 17 | log_level (int): The root logger level. Note that only the process of 18 | rank 0 is affected, while other processes will set the level to 19 | "Error" and be silent most of the time. 20 | 21 | Returns: 22 | logging.Logger: The root logger. 23 | """ 24 | 25 | logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) 26 | 27 | return logger 28 | -------------------------------------------------------------------------------- /annotator/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | 6 | annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts') 7 | 8 | 9 | def HWC3(x): 10 | assert x.dtype == np.uint8 11 | if x.ndim == 2: 12 | x = x[:, :, None] 13 | assert x.ndim == 3 14 | H, W, C = x.shape 15 | assert C == 1 or C == 3 or C == 4 16 | if C == 3: 17 | return x 18 | if C == 1: 19 | return np.concatenate([x, x, x], axis=2) 20 | if C == 4: 21 | color = x[:, :, 0:3].astype(np.float32) 22 | alpha = x[:, :, 3:4].astype(np.float32) / 255.0 23 | y = color * alpha + 255.0 * (1.0 - alpha) 24 | y = y.clip(0, 255).astype(np.uint8) 25 | return y 26 | 27 | 28 | def resize_image(input_image, resolution): 29 | H, W, C = input_image.shape 30 | H = float(H) 31 | W = float(W) 32 | k = float(resolution) / min(H, W) 33 | H *= k 34 | W *= k 35 | H = int(np.round(H / 64.0)) * 64 36 | W = int(np.round(W / 64.0)) * 64 37 | img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA) 38 | return img 39 | -------------------------------------------------------------------------------- /ckpt/ckpt.txt: -------------------------------------------------------------------------------- 1 | Weights here. 2 | -------------------------------------------------------------------------------- /configs/global_v15.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | target: models.uni_controlnet.UniControlNet 3 | params: 4 | linear_start: 0.00085 5 | linear_end: 0.0120 6 | num_timesteps_cond: 1 7 | log_every_t: 200 8 | timesteps: 1000 9 | first_stage_key: "jpg" 10 | cond_stage_key: "txt" 11 | image_size: 64 12 | channels: 4 13 | cond_stage_trainable: false 14 | conditioning_key: crossattn 15 | monitor: val/loss_simple_ema 16 | scale_factor: 0.18215 17 | use_ema: False 18 | mode: global 19 | 20 | global_control_config: 21 | target: models.global_adapter.GlobalAdapter 22 | params: 23 | in_dim: 768 24 | channel_mult: [2, 4] 25 | 26 | unet_config: 27 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 28 | params: 29 | image_size: 32 30 | in_channels: 4 31 | model_channels: 320 32 | out_channels: 4 33 | num_res_blocks: 2 34 | attention_resolutions: [4, 2, 1] 35 | channel_mult: [1, 2, 4, 4] 36 | use_checkpoint: True 37 | num_heads: 8 38 | use_spatial_transformer: True 39 | transformer_depth: 1 40 | context_dim: 768 41 | legacy: False 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: val/rec_loss 48 | ddconfig: 49 | double_z: true 50 | z_channels: 4 51 | resolution: 256 52 | in_channels: 3 53 | out_ch: 3 54 | ch: 128 55 | ch_mult: 56 | - 1 57 | - 2 58 | - 4 59 | - 4 60 | num_res_blocks: 2 61 | attn_resolutions: [] 62 | dropout: 0.0 63 | lossconfig: 64 | target: torch.nn.Identity 65 | 66 | cond_stage_config: 67 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 68 | 69 | data: 70 | target: src.train.dataset.UniDataset 71 | params: 72 | anno_path: ./data/anno.txt 73 | image_dir: ./data/images 74 | condition_root: ./data/conditions 75 | local_type_list: [] 76 | global_type_list: [content] 77 | resolution: 512 78 | drop_txt_prob: 0.5 79 | keep_all_cond_prob: 0.1 80 | drop_all_cond_prob: 0.1 81 | drop_each_cond_prob: [0.5] -------------------------------------------------------------------------------- /data/data.txt: -------------------------------------------------------------------------------- 1 | Data here. 2 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: unicontrol 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3.8.5 7 | - pip=20.3 8 | - cudatoolkit=11.3 9 | - pytorch=1.11.0 10 | - torchvision=0.12.0 11 | - numpy=1.23.1 12 | - pip: 13 | - gradio==3.16.2 14 | - albumentations==1.3.0 15 | - opencv-contrib-python==4.3.0.36 16 | - imageio==2.9.0 17 | - imageio-ffmpeg==0.4.2 18 | - pytorch-lightning==1.6.0 19 | - omegaconf==2.1.1 20 | - test-tube>=0.7.5 21 | - streamlit==1.12.1 22 | - einops==0.3.0 23 | - transformers==4.19.2 24 | - webdataset==0.2.5 25 | - kornia==0.6 26 | - open_clip_torch==2.0.2 27 | - invisible-watermark>=0.1.5 28 | - streamlit-drawable-canvas==0.8.0 29 | - torchmetrics==0.7.0 30 | - timm==0.6.12 31 | - addict==2.4.0 32 | - yapf==0.32.0 33 | - prettytable==3.6.0 34 | - safetensors==0.2.7 35 | - basicsr==1.4.2 36 | - datasets==2.10.1 37 | - pathlib==1.0.1 -------------------------------------------------------------------------------- /figs/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/comparison.png -------------------------------------------------------------------------------- /figs/demo_conditions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/demo_conditions.png -------------------------------------------------------------------------------- /figs/demo_panel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/demo_panel.png -------------------------------------------------------------------------------- /figs/demo_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/demo_results.png -------------------------------------------------------------------------------- /figs/demo_results2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/demo_results2.png -------------------------------------------------------------------------------- /figs/demo_results3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/demo_results3.png -------------------------------------------------------------------------------- /figs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/pipeline.png -------------------------------------------------------------------------------- /figs/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/figs/results.png -------------------------------------------------------------------------------- /ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /ldm/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') 11 | return x[(...,) + (None,) * dims_to_append] 12 | 13 | 14 | def norm_thresholding(x0, value): 15 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 16 | return x0 * (value / s) 17 | 18 | 19 | def spatial_norm_thresholding(x0, value): 20 | # b c h w 21 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 22 | return x0 * (value / s) -------------------------------------------------------------------------------- /ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /ldm/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr 2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light 3 | -------------------------------------------------------------------------------- /ldm/modules/image_degradation/utils/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/ldm/modules/image_degradation/utils/test.png -------------------------------------------------------------------------------- /models/global_adapter.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from einops import rearrange 3 | 4 | from ldm.modules.attention import FeedForward 5 | 6 | 7 | class GlobalAdapter(nn.Module): 8 | def __init__(self, in_dim, channel_mult=[2, 4]): 9 | super().__init__() 10 | dim_out1, mult1 = in_dim*channel_mult[0], channel_mult[0]*2 11 | dim_out2, mult2 = in_dim*channel_mult[1], channel_mult[1]*2//channel_mult[0] 12 | self.in_dim = in_dim 13 | self.channel_mult = channel_mult 14 | 15 | self.ff1 = FeedForward(in_dim, dim_out=dim_out1, mult=mult1, glu=True, dropout=0.1) 16 | self.ff2 = FeedForward(dim_out1, dim_out=dim_out2, mult=mult2, glu=True, dropout=0.3) 17 | self.norm1 = nn.LayerNorm(in_dim) 18 | self.norm2 = nn.LayerNorm(dim_out1) 19 | 20 | def forward(self, x): 21 | x = self.ff1(self.norm1(x)) 22 | x = self.ff2(self.norm2(x)) 23 | x = rearrange(x, 'b (n d) -> b n d', n=self.channel_mult[-1], d=self.in_dim).contiguous() 24 | return x -------------------------------------------------------------------------------- /models/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from omegaconf import OmegaConf 5 | from ldm.util import instantiate_from_config 6 | 7 | 8 | def get_state_dict(d): 9 | return d.get('state_dict', d) 10 | 11 | 12 | def load_state_dict(ckpt_path, location='cpu'): 13 | _, extension = os.path.splitext(ckpt_path) 14 | if extension.lower() == ".safetensors": 15 | import safetensors.torch 16 | state_dict = safetensors.torch.load_file(ckpt_path, device=location) 17 | else: 18 | state_dict = get_state_dict(torch.load(ckpt_path, map_location=torch.device(location))) 19 | state_dict = get_state_dict(state_dict) 20 | print(f'Loaded state_dict from [{ckpt_path}]') 21 | return state_dict 22 | 23 | 24 | def create_model(config_path): 25 | config = OmegaConf.load(config_path) 26 | model = instantiate_from_config(config.model).cpu() 27 | print(f'Loaded model config from [{config_path}]') 28 | return model 29 | -------------------------------------------------------------------------------- /samples/multi_conditions/case1/canny.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case1/canny.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case1/midas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case1/midas.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case1/prompt.txt: -------------------------------------------------------------------------------- 1 | Stormtrooper's lecture in the forest -------------------------------------------------------------------------------- /samples/multi_conditions/case2/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case2/content.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case2/hed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case2/hed.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case3/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case3/content.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case3/hed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case3/hed.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case3/midas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case3/midas.jpg -------------------------------------------------------------------------------- /samples/multi_conditions/case3/prompt.txt: -------------------------------------------------------------------------------- 1 | A sofa and a deer in the forest -------------------------------------------------------------------------------- /samples/multi_conditions/case3/sketch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/multi_conditions/case3/sketch.jpg -------------------------------------------------------------------------------- /samples/single_condition/case1/prompt.txt: -------------------------------------------------------------------------------- 1 | Robot spider, mars -------------------------------------------------------------------------------- /samples/single_condition/case1/sketch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/single_condition/case1/sketch.jpg -------------------------------------------------------------------------------- /samples/single_condition/case2/prompt.txt: -------------------------------------------------------------------------------- 1 | A warm room with sunshine in -------------------------------------------------------------------------------- /samples/single_condition/case2/seg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/single_condition/case2/seg.jpg -------------------------------------------------------------------------------- /samples/single_condition/case3/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShihaoZhaoZSH/Uni-ControlNet/4da003c334786c5c4edc1e369b724e0aa84f390a/samples/single_condition/case3/content.jpg -------------------------------------------------------------------------------- /src/train/util.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | 6 | def read_anno(anno_path): 7 | fi = open(anno_path) 8 | lines = fi.readlines() 9 | fi.close() 10 | file_ids, annos = [], [] 11 | for line in lines: 12 | id, txt = line.split('\t') 13 | file_ids.append(id) 14 | annos.append(txt) 15 | return file_ids, annos 16 | 17 | 18 | def keep_and_drop(conditions, keep_all_prob, drop_all_prob, drop_each_prob): 19 | results = [] 20 | seed = random.random() 21 | if seed < keep_all_prob: 22 | results = conditions 23 | elif seed < keep_all_prob + drop_all_prob: 24 | for condition in conditions: 25 | results.append(np.zeros(condition.shape)) 26 | else: 27 | for i in range(len(conditions)): 28 | if random.random() < drop_each_prob[i]: 29 | results.append(np.zeros(conditions[i].shape)) 30 | else: 31 | results.append(conditions[i]) 32 | return results -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | save_memory = True 2 | -------------------------------------------------------------------------------- /utils/share.py: -------------------------------------------------------------------------------- 1 | import utils.config as config 2 | from models.hack import disable_verbosity, enable_sliced_attention 3 | 4 | 5 | disable_verbosity() 6 | 7 | if config.save_memory: 8 | enable_sliced_attention() 9 | --------------------------------------------------------------------------------