├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── core ├── __init__.py ├── augmentation.py ├── augmentation_pos.py ├── config.py ├── data │ ├── datasets │ │ ├── __init__.py │ │ └── images │ │ │ ├── image_helper.py │ │ │ ├── multi_posedataset.py │ │ │ ├── parsing_dataset.py │ │ │ ├── pedattr_dataset.py │ │ │ ├── peddet_dataset.py │ │ │ ├── pos_dataset_dev.py │ │ │ ├── reid_dataset.py │ │ │ ├── resources │ │ │ ├── CHval.odgt │ │ │ ├── COCO_val2017_detections_AP_H_56_person.json │ │ │ └── mpii_gt_val.mat │ │ │ ├── seg_data_tools │ │ │ ├── __init__.py │ │ │ ├── collate.py │ │ │ ├── cv2_aug_transforms.py │ │ │ └── transforms.py │ │ │ └── seg_dataset_dev.py │ ├── test_datasets │ │ ├── __init__.py │ │ └── images │ │ │ └── reid_dataset.py │ └── transforms │ │ ├── face_transforms.py │ │ ├── parsing_transforms.py │ │ ├── pedattr_transforms.py │ │ ├── peddet_transforms.py │ │ ├── peddet_transforms_helpers │ │ ├── __init__.py │ │ └── transforms.py │ │ ├── pose_transforms.py │ │ ├── post_transforms.py │ │ ├── reid_transforms.py │ │ ├── seg_aug_dev.py │ │ └── seg_transforms_dev.py ├── distributed_utils.py ├── exceptions.py ├── fp16 │ ├── __init__.py │ ├── amp.py │ ├── opt.py │ ├── scaler.py │ ├── utils.py │ └── wrap.py ├── lr_scheduler │ ├── __init__.py │ └── base.py ├── make_param_group.py ├── memory.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ └── vit.py │ ├── ckpt.py │ ├── decoders │ │ ├── __init__.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── classification_losses.py │ │ │ ├── criterion.py │ │ │ ├── matcher.py │ │ │ ├── pedattr_losses.py │ │ │ ├── peddet_losses.py │ │ │ ├── point_features.py │ │ │ ├── pos_losses.py │ │ │ ├── seg_losses.py │ │ │ └── test_time.py │ │ └── network │ │ │ ├── __init__.py │ │ │ ├── meta_arch │ │ │ ├── __init__.py │ │ │ └── aio_head.py │ │ │ └── transformer_decoder │ │ │ ├── __init__.py │ │ │ ├── position_encoding.py │ │ │ └── transformer_decoder.py │ ├── model_entry.py │ ├── necks │ │ ├── DoNothing.py │ │ ├── __init__.py │ │ └── simple_neck.py │ ├── ops │ │ ├── __init__.py │ │ ├── box_ops.py │ │ ├── boxes.py │ │ └── utils.py │ └── tta.py ├── msg_server.py ├── optim.py ├── optimizers │ ├── __init__.py │ ├── adafactor.py │ ├── adam_clip.py │ └── lars.py ├── solvers │ ├── __init__.py │ ├── solver.py │ ├── solver_deter.py │ ├── solver_multitask_dev.py │ └── utils │ │ ├── __init__.py │ │ ├── attr_tester_dev.py │ │ ├── detools │ │ └── box.py │ │ ├── nms.py │ │ ├── par_tester_dev.py │ │ ├── peddet_tester_dev.py │ │ ├── pos_tester_dev.py │ │ └── seg_tester_dev.py ├── testers │ ├── __init__.py │ ├── reid_tester.py │ ├── tester.py │ ├── tester_deter.py │ └── utils │ │ ├── metrics.py │ │ └── reranking.py └── utils.py ├── experiments └── unihcp │ └── release │ ├── ablation_baseline_coslr1e3_60k_b1000g40_h256_I2k_1_10_001_2I_m256.yaml │ ├── batch_test.sh │ ├── coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256.yaml │ ├── test.sh │ ├── train.sh │ ├── vd_h3m6_pose_test.yaml │ ├── vd_ochuman_pose_test.yaml │ ├── vd_pa100k_lpe_test.yaml │ ├── vd_par_atr_lpe_test.yaml │ ├── vd_par_cihp_lpe_test.yaml │ ├── vd_par_lip_lpe_test.yaml │ ├── vd_par_lpe_test.yaml │ ├── vd_peddet_caltech_test.yaml │ ├── vd_peddet_inter_lpe_test.yaml │ ├── vd_peta_lpe_test.yaml │ ├── vd_pose_aic_lpe_test.yaml │ ├── vd_pose_lpe_test.yaml │ ├── vd_pose_mpii_lpe_test.yaml │ ├── vd_rap2_lpe_test.yaml │ ├── vd_reid_cuhk3_test.yaml │ ├── vd_reid_msmt_test.yaml │ ├── vd_reid_senseid_test.yaml │ └── vd_reid_test.yaml ├── helper ├── align.py ├── flops_helper.py ├── multitask_schedule.py ├── param_count.py └── vis_helper.py ├── multitask.py ├── requirements.txt └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.bak 4 | *.tar 5 | scripts 6 | cfgs 7 | checkpoints 8 | __pycache__ 9 | log*.txt 10 | .ignore 11 | mimic_experiments/ 12 | backup/ 13 | exp_branch/ 14 | exp_modelscaling/ 15 | result_info/ 16 | result_info_context/ 17 | exp_* 18 | failed_to_read_* 19 | .idea/ 20 | parse_result* 21 | bak* 22 | 23 | itchat.pkl 24 | server.txt 25 | 26 | caffemodels/ 27 | feats/ 28 | nart_tools/ 29 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 OpenGVLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UniHCP: A Unified Model for Human-Centric Perceptions 2 | 3 | # Usage 4 | 5 | ## Preparation 6 | 7 | 1. Install all required dependencies in requirements.txt. 8 | 2. Replace all `path...to...` in the .yaml configuration files to the absolute path 9 | to corresponding dataset locations. 10 | 3. Place MAE pretrained weight mae_pretrain_vit_base.pth under `core\models\backbones\pretrain_weights` folder. 11 | 12 | *Only slurm-based distributed training & single-gpu testing is implemented in this repo. 13 | 14 | ## Experiments 15 | 16 | All experiment configurations files and launch scripts are located in `experiments/unihcp/release` folder. 17 | 18 | To perform full multi-task training for UniHCP, replace `` in `train.sh` launch script and run: 19 | 20 | ```bash 21 | sh train.sh 88 coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256 22 | ``` 23 | 24 | To perform evaluations, keep the test_info_list assignments corresponding to the tests you want to perform 25 | , replace ``, then run : 26 | 27 | ```bash 28 | sh batch_test.sh 1 coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256 29 | ``` 30 | 31 | Note that in this case, the program would look for checkpoints located at `experiments/unihcp/release/checkpoints/coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256` 32 | 33 | 34 | # Pretrained Models 35 | Please send the signed agreement to `mail@yuanzheng.ci` to get the download link. 36 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/__init__.py -------------------------------------------------------------------------------- /core/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .images.reid_dataset import ReIDDataset 2 | from .images.pedattr_dataset import AttrDataset 3 | from .images.pos_dataset_dev import COCOPosDatasetDev, MPIIPosDatasetDev 4 | from .images.parsing_dataset import Human3M6ParsingDataset, LIPParsingDataset, CIHPParsingDataset, ATRParsingDataset, DeepFashionParsingDataset, VIPParsingDataset, ModaNetParsingDataset 5 | from .images.multi_posedataset import MultiPoseDatasetDev 6 | from .images.peddet_dataset import PedestrainDetectionDataset 7 | from core.utils import printlog 8 | 9 | def dataset_entry(config): 10 | printlog('config[kwargs]',config['kwargs']) 11 | return globals()[config['type']](**config['kwargs']) 12 | -------------------------------------------------------------------------------- /core/data/datasets/images/pedattr_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pickle 4 | import random 5 | from easydict import EasyDict as edict 6 | import numpy as np 7 | import torch.utils.data as data 8 | from PIL import Image 9 | from core.data.transforms.pedattr_transforms import PedAttrAugmentation, PedAttrTestAugmentation, PedAttrRandomAugmentation 10 | from core import distributed_utils as dist 11 | 12 | 13 | __all__ = ['AttrDataset'] 14 | 15 | class AttrDataset(data.Dataset): 16 | 17 | def __init__(self, ginfo, augmentation, task_spec, train=True, **kwargs): 18 | 19 | assert task_spec.dataset in ['peta', 'PA-100k', 'rap', 'rap2', 'uavhuman', 'HARDHC', 'ClothingAttribute', 'parse27k', 'duke', 'market'], \ 20 | f'dataset name {task_spec.dataset} is not exist' 21 | 22 | data_path = task_spec.data_path 23 | 24 | with open(data_path, "rb+") as f: 25 | dataset_info = pickle.load(f) 26 | dataset_info = edict(dataset_info) 27 | 28 | img_id = dataset_info.image_name 29 | attr_label = dataset_info.label 30 | 31 | if train: 32 | split = 'trainval' 33 | else: 34 | split = 'test' 35 | 36 | assert split in dataset_info.partition.keys(), f'split {split} is not exist' 37 | 38 | height = augmentation.height 39 | width = augmentation.width 40 | 41 | self.dataset = task_spec.dataset 42 | self.root_path = task_spec.root_path 43 | 44 | if train: 45 | self.transform = PedAttrAugmentation(height, width) 46 | if augmentation.get('use_random_aug', False): 47 | self.transform = PedAttrRandomAugmentation(height, width, \ 48 | augmentation.use_random_aug.m, augmentation.use_random_aug.n) 49 | else: 50 | self.transform = PedAttrTestAugmentation(height, width) 51 | 52 | self.attr_id = dataset_info.attr_name 53 | self.attr_num = len(self.attr_id) 54 | 55 | self.img_idx = dataset_info.partition[split] 56 | 57 | if isinstance(self.img_idx, list): 58 | self.img_idx = self.img_idx[0] # default partition 0 59 | 60 | self.img_num = len(self.img_idx) 61 | self.img_idx = np.array(self.img_idx) 62 | self.img_id = [img_id[i] for i in self.img_idx] 63 | self.label = attr_label[self.img_idx] 64 | self.task_name = ginfo.task_name 65 | self.rank = dist.get_rank() 66 | self.train = train 67 | 68 | def __getitem__(self, index): 69 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index] 70 | imgpath = os.path.join(self.root_path, imgname) 71 | 72 | img = Image.open(imgpath).convert("RGB") 73 | 74 | if self.transform is not None: 75 | img = self.transform(img) 76 | 77 | gt_label = gt_label.astype(np.float32) 78 | 79 | output = {'image': img, 'label': gt_label, 'filename': imgname} 80 | return output 81 | 82 | def __len__(self): 83 | return len(self.img_id) 84 | 85 | def __repr__(self): 86 | return self.__class__.__name__ + \ 87 | f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.train else "inference"} ' \ 88 | f'dataset_len:{len(self.img_id)} id_num:{self.attr_num} augmentation: {self.transform}' 89 | 90 | -------------------------------------------------------------------------------- /core/data/datasets/images/resources/mpii_gt_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/datasets/images/resources/mpii_gt_val.mat -------------------------------------------------------------------------------- /core/data/datasets/images/seg_data_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/datasets/images/seg_data_tools/__init__.py -------------------------------------------------------------------------------- /core/data/datasets/images/seg_data_tools/collate.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch.utils.data.dataloader import default_collate 6 | 7 | from lib.extensions.parallel.data_container import DataContainer 8 | 9 | 10 | def stack(batch, data_key=None, return_dc=False): 11 | if isinstance(batch[0][data_key], DataContainer): 12 | if batch[0][data_key].stack: 13 | assert isinstance(batch[0][data_key].data, torch.Tensor) 14 | samples = [sample[data_key].data for sample in batch] 15 | return default_collate(samples) 16 | 17 | elif not return_dc: 18 | return [sample[data_key].data for sample in batch] 19 | 20 | else: 21 | return DataContainer([sample[data_key].data for sample in batch]) 22 | 23 | else: 24 | return default_collate([sample[data_key] for sample in batch]) 25 | 26 | 27 | def collate(batch, trans_dict): 28 | data_keys = batch[0].keys() 29 | 30 | target_width, target_height = trans_dict['input_size'] 31 | target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch) 32 | 33 | 34 | for i in range(len(batch)): 35 | target_width, target_height = target_widths[i], target_heights[i] 36 | 37 | if 'meta' in data_keys: 38 | batch[i]['meta'].data['input_size'] = [target_width, target_height] 39 | 40 | channels, height, width = batch[i]['img'].size() 41 | if height == target_height and width == target_width: 42 | continue 43 | 44 | scaled_size = [width, height] 45 | 46 | if trans_dict['align_method'] in ['only_scale', 'scale_and_pad']: 47 | w_scale_ratio = target_width / width 48 | h_scale_ratio = target_height / height 49 | if trans_dict['align_method'] == 'scale_and_pad': 50 | w_scale_ratio = min(w_scale_ratio, h_scale_ratio) 51 | h_scale_ratio = w_scale_ratio 52 | 53 | scaled_size = (int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))) 54 | if 'meta' in data_keys and 'border_size' in batch[i]['meta'].data: 55 | batch[i]['meta'].data['border_size'] = scaled_size 56 | 57 | scaled_size_hw = (scaled_size[1], scaled_size[0]) 58 | batch[i]['img'] = DataContainer(F.interpolate(batch[i]['img'].data.unsqueeze(0), 59 | scaled_size_hw, mode='bilinear', align_corners=True).squeeze(0), stack=True) 60 | if 'labelmap' in data_keys: 61 | labelmap = batch[i]['labelmap'].data.unsqueeze(0).unsqueeze(0).float() 62 | labelmap = F.interpolate(labelmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0) 63 | batch[i]['labelmap'] = DataContainer(labelmap, stack=True) 64 | 65 | if 'maskmap' in data_keys: 66 | maskmap = batch[i]['maskmap'].data.unsqueeze(0).unsqueeze(0).float() 67 | maskmap = F.interpolate(maskmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0) 68 | batch[i]['maskmap'].data = DataContainer(maskmap, stack=True) 69 | 70 | pad_width = target_width - scaled_size[0] 71 | pad_height = target_height - scaled_size[1] 72 | assert pad_height >= 0 and pad_width >= 0 73 | if pad_width > 0 or pad_height > 0: 74 | assert trans_dict['align_method'] in ['only_pad', 'scale_and_pad'] 75 | left_pad = 0 76 | up_pad = 0 77 | if 'pad_mode' not in trans_dict or trans_dict['pad_mode'] == 'random': 78 | left_pad = random.randint(0, pad_width) # pad_left 79 | up_pad = random.randint(0, pad_height) # pad_up 80 | 81 | elif trans_dict['pad_mode'] == 'pad_left_up': 82 | left_pad = pad_width 83 | up_pad = pad_height 84 | 85 | elif trans_dict['pad_mode'] == 'pad_right_down': 86 | left_pad = 0 87 | up_pad = 0 88 | 89 | elif trans_dict['pad_mode'] == 'pad_center': 90 | left_pad = pad_width // 2 91 | up_pad = pad_height // 2 92 | 93 | elif trans_dict['pad_mode'] == 'pad_border': 94 | if random.randint(0, 1) == 0: 95 | left_pad = pad_width 96 | up_pad = pad_height 97 | else: 98 | left_pad = 0 99 | up_pad = 0 100 | else: 101 | raise ValueError("mode not define") 102 | exit(1) 103 | 104 | pad = (left_pad, pad_width-left_pad, up_pad, pad_height-up_pad) 105 | 106 | batch[i]['img'] = DataContainer(F.pad(batch[i]['img'].data, pad=pad, value=0), stack=batch[i]['img'].stack) 107 | 108 | if 'labelmap' in data_keys: 109 | batch[i]['labelmap'] = DataContainer(F.pad(batch[i]['labelmap'].data, pad=pad, value=-1), stack=batch[i]['labelmap'].stack) 110 | 111 | if 'maskmap' in data_keys: 112 | batch[i]['maskmap'] = DataContainer(F.pad(batch[i]['maskmap'].data, pad=pad, value=0), stack=batch[i]['maskmap'].stack) 113 | 114 | if 'distance_map' in data_keys: 115 | batch[i]['distance_map'] = DataContainer(F.pad(batch[i]['distance_map'].data, pad=pad, value=255), stack=batch[i]['distance_map'].stack) 116 | 117 | if 'angle_map' in data_keys: 118 | batch[i]['angle_map'] = DataContainer(F.pad(batch[i]['angle_map'].data, pad=pad, value=0), stack=batch[i]['angle_map'].stack) 119 | 120 | if 'mask_label_map' in data_keys: 121 | batch[i]['mask_label_map'] = DataContainer(F.pad(batch[i]['mask_label_map'].data, pad=pad, value=-1), stack=batch[i]['mask_label_map'].stack) 122 | 123 | if 'direction_label_map' in data_keys: 124 | batch[i]['direction_label_map'] = DataContainer(F.pad(batch[i]['direction_label_map'].data, pad=pad, value=-1), stack=batch[i]['direction_label_map'].stack) 125 | 126 | if 'multi_label_direction_map' in data_keys: 127 | batch[i]['multi_label_direction_map'] = DataContainer(F.pad(batch[i]['multi_label_direction_map'].data, pad=pad, value=-1), stack=batch[i]['multi_label_direction_map'].stack) 128 | 129 | if 'energy_label_map' in data_keys: 130 | batch[i]['energy_label_map'] = DataContainer(F.pad(batch[i]['energy_label_map'].data, pad=pad, value=-1), stack=batch[i]['energy_label_map'].stack) 131 | 132 | if 'offsetmap_h' in data_keys: 133 | batch[i]['offsetmap_h'] = DataContainer(F.pad(batch[i]['offsetmap_h'].data, pad=pad, value=0), stack=batch[i]['offsetmap_h'].stack) 134 | 135 | if 'offsetmap_w' in data_keys: 136 | batch[i]['offsetmap_w'] = DataContainer(F.pad(batch[i]['offsetmap_w'].data, pad=pad, value=0), stack=batch[i]['offsetmap_w'].stack) 137 | 138 | return dict({key: stack(batch, data_key=key) for key in data_keys}) 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /core/data/datasets/images/seg_data_tools/transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from PIL import Image 4 | 5 | 6 | class Normalize(object): 7 | """Normalize a ``torch.tensor`` 8 | 9 | Args: 10 | inputs (torch.tensor): tensor to be normalized. 11 | mean: (list): the mean of RGB 12 | std: (list): the std of RGB 13 | 14 | Returns: 15 | Tensor: Normalized tensor. 16 | """ 17 | def __init__(self, div_value, mean, std): 18 | self.div_value = div_value 19 | self.mean = mean 20 | self.std =std 21 | 22 | def __call__(self, inputs): 23 | inputs = inputs.div(self.div_value) 24 | for t, m, s in zip(inputs, self.mean, self.std): 25 | t.sub_(m).div_(s) 26 | 27 | return inputs 28 | 29 | 30 | class DeNormalize(object): 31 | """DeNormalize a ``torch.tensor`` 32 | 33 | Args: 34 | inputs (torch.tensor): tensor to be normalized. 35 | mean: (list): the mean of RGB 36 | std: (list): the std of RGB 37 | 38 | Returns: 39 | Tensor: Normalized tensor. 40 | """ 41 | def __init__(self, div_value, mean, std): 42 | self.div_value = div_value 43 | self.mean = mean 44 | self.std =std 45 | 46 | def __call__(self, inputs): 47 | result = inputs.clone() 48 | for i in range(result.size(0)): 49 | result[i, :, :] = result[i, :, :] * self.std[i] + self.mean[i] 50 | 51 | return result.mul_(self.div_value) 52 | 53 | 54 | class ToTensor(object): 55 | """Convert a ``numpy.ndarray or Image`` to tensor. 56 | 57 | See ``ToTensor`` for more details. 58 | 59 | Args: 60 | inputs (numpy.ndarray or Image): Image to be converted to tensor. 61 | 62 | Returns: 63 | Tensor: Converted image. 64 | """ 65 | def __call__(self, inputs): 66 | if isinstance(inputs, Image.Image): 67 | channels = len(inputs.mode) 68 | inputs = np.array(inputs) 69 | inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], channels) 70 | inputs = torch.from_numpy(inputs.transpose(2, 0, 1)) 71 | else: 72 | inputs = torch.from_numpy(inputs.transpose(2, 0, 1)) 73 | 74 | return inputs.float() 75 | 76 | 77 | class ToLabel(object): 78 | def __call__(self, inputs): 79 | return torch.from_numpy(np.array(inputs)).long() 80 | 81 | 82 | class ReLabel(object): 83 | """ 84 | 255 indicate the background, relabel 255 to some value. 85 | """ 86 | def __init__(self, olabel, nlabel): 87 | self.olabel = olabel 88 | self.nlabel = nlabel 89 | 90 | def __call__(self, inputs): 91 | assert isinstance(inputs, torch.LongTensor), 'tensor needs to be LongTensor' 92 | 93 | inputs[inputs == self.olabel] = self.nlabel 94 | return inputs 95 | 96 | 97 | class Compose(object): 98 | 99 | def __init__(self, transforms): 100 | self.transforms = transforms 101 | 102 | def __call__(self, inputs): 103 | for t in self.transforms: 104 | inputs = t(inputs) 105 | 106 | return inputs -------------------------------------------------------------------------------- /core/data/test_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .images.reid_dataset import ReIDTestDataset, ReIDTestDatasetDev 2 | 3 | def dataset_entry(config): 4 | # print('config[kwargs]',config['kwargs']) 5 | return globals()[config['type']](**config['kwargs']) 6 | -------------------------------------------------------------------------------- /core/data/transforms/peddet_transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | COCO dataset which returns image_id for evaluation. 4 | 5 | Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py 6 | """ 7 | from pathlib import Path 8 | import os.path as osp 9 | import json 10 | import torch 11 | import torch.utils.data 12 | import torchvision 13 | import core.data.transforms.peddet_transforms_helpers.transforms as T 14 | import cv2 15 | cv2.ocl.setUseOpenCL(False) 16 | 17 | class PedestrainDetectionAugmentation(object): 18 | def __init__(self, phase, vit=False, maxsize=1333): 19 | if vit: 20 | normalize = T.Compose([ 21 | T.PILToTensor(), 22 | T.Normalize([0., 0., 0.], [1., 1., 1.]) 23 | ]) 24 | else: 25 | normalize = T.Compose([ 26 | T.ToTensor(), 27 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 28 | ]) 29 | 30 | scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] 31 | 32 | if phase == 'train': 33 | self.transformer = T.Compose([ 34 | T.RandomHorizontalFlip(), 35 | T.RandomSelect( 36 | T.RandomResize(scales, max_size=maxsize), 37 | T.Compose([ 38 | T.RandomResize([400, 500, 600]), 39 | T.RandomSizeCrop(384, 600), 40 | T.RandomResize(scales, max_size=maxsize), 41 | ]) 42 | ), 43 | normalize, 44 | ]) 45 | elif phase == 'val': 46 | self.transformer = T.Compose([ 47 | T.RandomResize([800], max_size=maxsize), 48 | normalize, 49 | ]) 50 | else: 51 | raise NotImplementedError 52 | 53 | def __call__(self, image, target): 54 | return self.transformer(image, target) 55 | 56 | class PedestrainDetectionAugmentationCal(object): 57 | def __init__(self, phase, vit=False, maxsize=640): 58 | if vit: 59 | normalize = T.Compose([ 60 | T.PILToTensor(), 61 | T.Normalize([0., 0., 0.], [1., 1., 1.]) 62 | ]) 63 | else: 64 | normalize = T.Compose([ 65 | T.ToTensor(), 66 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 67 | ]) 68 | 69 | # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] 70 | 71 | if phase == 'train': 72 | self.transformer = T.Compose([ 73 | T.RandomHorizontalFlip(), 74 | # T.RandomSelect( 75 | # # T.RandomResize(scales, max_size=maxsize), 76 | # T.Compose([ 77 | # # T.RandomResize([400, 500, 600]), 78 | # # T.RandomSizeCrop(384, 600), 79 | # # T.RandomResize(scales, max_size=maxsize), 80 | # ]) 81 | # ), 82 | normalize, 83 | ]) 84 | elif phase == 'val': 85 | self.transformer = T.Compose([ 86 | # T.RandomResize([800], max_size=maxsize), 87 | normalize, 88 | ]) 89 | else: 90 | raise NotImplementedError 91 | 92 | def __call__(self, image, target): 93 | return self.transformer(image, target) 94 | -------------------------------------------------------------------------------- /core/data/transforms/peddet_transforms_helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/transforms/peddet_transforms_helpers/__init__.py -------------------------------------------------------------------------------- /core/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class MCReadFailException(Exception): 3 | pass 4 | 5 | class NoneImageException(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .opt import * 2 | from .scaler import * 3 | from .amp import * 4 | from . import utils 5 | -------------------------------------------------------------------------------- /core/fp16/amp.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import functools 3 | import itertools 4 | 5 | import torch 6 | 7 | from . import utils, wrap 8 | 9 | __all__ = ['half_function', 'float_function', 10 | 'register_half_function', 'register_float_function', 11 | 'register_float_module', 'init', 'reset'] 12 | 13 | _DECORATOR_HANDLE = None 14 | _USER_CAST_REGISTRY = set() 15 | _USER_FLOAT_MODULE = set() 16 | _ORIGINAL_MODULE_HALF = None 17 | 18 | def _decorator_helper(orig_fn, cast_fn, wrap_fn): 19 | def wrapper(*args, **kwargs): 20 | handle = _DECORATOR_HANDLE 21 | if handle is None or not handle.is_active(): 22 | return orig_fn(*args, **kwargs) 23 | inner_cast_fn = utils.verbosify(cast_fn, orig_fn.__name__, 24 | handle.verbose) 25 | return wrap_fn(orig_fn, inner_cast_fn, handle)(*args, **kwargs) 26 | return wrapper 27 | 28 | # Decorator form 29 | def half_function(fn): 30 | wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=True) 31 | return _decorator_helper(fn, utils.maybe_half, wrap_fn) 32 | 33 | def float_function(fn): 34 | wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=False) 35 | return _decorator_helper(fn, utils.maybe_float, wrap_fn) 36 | 37 | # Registry form 38 | def register_half_function(module, name): 39 | if not hasattr(module, name): 40 | raise ValueError('No function named {} in module {}.'.format( 41 | name, module)) 42 | _USER_CAST_REGISTRY.add((module, name, utils.maybe_half)) 43 | 44 | def register_float_function(module, name): 45 | if not hasattr(module, name): 46 | raise ValueError('No function named {} in module {}.'.format( 47 | name, module)) 48 | _USER_CAST_REGISTRY.add((module, name, utils.maybe_float)) 49 | 50 | def register_float_module(module, cast_args=True): 51 | if not issubclass(module, torch.nn.modules.module.Module): 52 | raise ValueError('{} is not a torch Module'.format(module)) 53 | 54 | if cast_args: 55 | register_float_function(module, 'forward') 56 | 57 | _USER_FLOAT_MODULE.add(module) 58 | 59 | class AmpHandle(object): 60 | def __init__(self, enable_caching=True, verbose=False): 61 | self._enable_caching = enable_caching 62 | self._verbose = verbose 63 | self._cache = dict() 64 | self._is_active = True 65 | self._all_wrappers = [] 66 | 67 | def is_active(self): 68 | return self._is_active 69 | 70 | @contextlib.contextmanager 71 | def _disable_casts(self): 72 | self._is_active = False 73 | yield 74 | self._is_active = True 75 | 76 | def _clear_cache(self): 77 | self._cache.clear() 78 | 79 | # Experimental support for saving / restoring uncasted versions of functions 80 | def _save_func(self, mod, fn, func): 81 | self._all_wrappers.append((mod, fn, func)) 82 | 83 | def _deactivate(self): 84 | for mod, fn, func in self._all_wrappers: 85 | utils.set_func(mod, fn, func) 86 | self._all_wrappers = [] 87 | 88 | @property 89 | def has_cache(self): 90 | return self._enable_caching 91 | 92 | @property 93 | def cache(self): 94 | return self._cache 95 | 96 | def remove_cache(self, param): 97 | if self.has_cache and param in self.cache: 98 | del self.cache[param] 99 | 100 | @property 101 | def verbose(self): 102 | return self._verbose 103 | 104 | def _half_helper(verbose=False): 105 | def _half_wrapper(self): 106 | for module in self.children(): 107 | module.half() 108 | 109 | if self.__class__ in _USER_FLOAT_MODULE: 110 | if verbose: 111 | print('Skip half convert for {}'.format(self.__class__)) 112 | return self 113 | 114 | fn = lambda t: t.half() if t.is_floating_point() else t 115 | for param in self._parameters.values(): 116 | if param is not None: 117 | # Tensors stored in modules are graph leaves, and we don't 118 | # want to create copy nodes, so we have to unpack the data. 119 | param.data = fn(param.data) 120 | if param._grad is not None: 121 | param._grad.data = fn(param._grad.data) 122 | 123 | for key, buf in self._buffers.items(): 124 | if buf is not None: 125 | self._buffers[key] = fn(buf) 126 | 127 | return self 128 | return _half_wrapper 129 | 130 | def init(enable_caching=True, verbose=False): 131 | global _DECORATOR_HANDLE 132 | global _ORIGINAL_MODULE_HALF 133 | 134 | handle = AmpHandle(enable_caching, verbose) 135 | 136 | if len(_USER_FLOAT_MODULE) > 0: 137 | _ORIGINAL_MODULE_HALF = torch.nn.modules.module.Module.half 138 | utils.set_func(torch.nn.modules.module.Module, 'half', 139 | _half_helper(verbose)) 140 | 141 | # Force-{fp16, fp32} for user-annotated functions 142 | for mod, fn, cast_fn in _USER_CAST_REGISTRY: 143 | try_caching = (cast_fn == utils.maybe_half) 144 | wrap.cached_cast(mod, fn, cast_fn, handle, 145 | try_caching, verbose) 146 | _USER_CAST_REGISTRY.clear() 147 | 148 | _DECORATOR_HANDLE = handle 149 | return handle 150 | 151 | def _clear_cache(): 152 | handle = _DECORATOR_HANDLE 153 | if handle is None or not handle.is_active(): 154 | return 155 | handle._clear_cache() 156 | 157 | def reset(): 158 | handle = _DECORATOR_HANDLE 159 | if handle is None or not handle.is_active(): 160 | return 161 | handle._deactivate() 162 | utils.set_func(torch.nn.modules.module.Module, 'half', _ORIGINAL_MODULE_HALF) -------------------------------------------------------------------------------- /core/fp16/scaler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch._six import inf 3 | 4 | from .utils import iter_params 5 | 6 | __all__ = ['scale_check_overflow', 'LossScaler'] 7 | 8 | # from apex_C import scale_check_overflow 9 | 10 | # Python stopgap, until we get a future-proof kernel into upstream 11 | def scale_check_overflow(d_grads, scale): 12 | any_infinite = ((d_grads != d_grads) | (d_grads.abs() == inf)).any() 13 | if any_infinite: 14 | return True 15 | d_grads.mul_(scale) 16 | return False 17 | 18 | class LossScaler(object): 19 | def __init__(self, scale=1.0, dynamic=False): 20 | self._dynamic = dynamic 21 | self._loss_scale = 2.**16 if self._dynamic else scale 22 | self._max_loss_scale = 2.**24 23 | self._scale_seq_len = 2000 24 | self._unskipped = 0 25 | self._has_overflow = False 26 | 27 | @property 28 | def loss_scale(self): 29 | return self._loss_scale 30 | 31 | @property 32 | def has_overflow(self): 33 | return self._has_overflow 34 | 35 | def unscale_and_update(self, param_groups, scale): 36 | if not self._dynamic: 37 | for p in iter_params(param_groups): 38 | if p.grad is not None: 39 | p.grad.data.mul_(1. / scale) 40 | return 41 | 42 | self._has_overflow = False 43 | for p in iter_params(param_groups): 44 | if p.grad is not None: 45 | self._has_overflow = scale_check_overflow(p.grad.data, 46 | 1. / scale) 47 | if self._has_overflow: 48 | break 49 | 50 | # if self._overflow_buf.any(): 51 | if self._has_overflow: 52 | should_skip = True 53 | self._loss_scale /= 2. 54 | self._unskipped = 0 55 | else: 56 | should_skip = False 57 | self._unskipped += 1 58 | 59 | if self._unskipped == self._scale_seq_len: 60 | self._loss_scale = min(self._max_loss_scale, self._loss_scale * 2.) 61 | self._unskipped = 0 62 | 63 | return should_skip 64 | 65 | def backward(self, loss): 66 | scaled_loss = loss*self.loss_scale 67 | scaled_loss.backward() 68 | -------------------------------------------------------------------------------- /core/fp16/wrap.py: -------------------------------------------------------------------------------- 1 | from . import utils 2 | 3 | import functools 4 | 5 | import torch 6 | 7 | def make_cast_wrapper(orig_fn, cast_fn, handle, 8 | try_caching=False): 9 | @functools.wraps(orig_fn) 10 | def wrapper(*args, **kwargs): 11 | if not handle.is_active(): 12 | return orig_fn(*args, **kwargs) 13 | 14 | input_types = [ 15 | v.data.type() for v in list(args) + list(kwargs.values()) 16 | if utils.is_fp_tensor(v) 17 | ] 18 | #print('wrapper: orig_fn:{}, input_types:{}'.format(orig_fn, input_types)) 19 | input_type = input_types[0] 20 | 21 | if try_caching and handle.has_cache: 22 | args = list(args) 23 | for i in range(len(args)): 24 | if utils.should_cache(args[i]): 25 | args[i] = utils.cached_cast(cast_fn, args[i], handle.cache) 26 | for k in kwargs: 27 | if utils.should_cache(kwargs[k]): 28 | kwargs[k] = utils.cached_cast(cast_fn, kwargs[k], handle.cache) 29 | new_args = utils.casted_args(cast_fn, 30 | args, 31 | kwargs) 32 | output = orig_fn(*new_args, **kwargs) 33 | 34 | #if output.type() != input_type: 35 | # print('ori output type: {}, input type: {}'.format(output.type(), input_type)) 36 | # return output.type(input_type) 37 | #return output 38 | return cast_output(output, input_type, verbose=False) 39 | 40 | return wrapper 41 | 42 | def cast_output(output, input_type, verbose=False): 43 | if isinstance(output, dict): 44 | keys = output.keys() 45 | for k in keys: 46 | output[k] = cast_output(output[k], input_type) 47 | return output 48 | 49 | if utils.is_fp_tensor(output) and output.type() != input_type: 50 | if verbose: 51 | print('ori output type: {}, input type: {}'.format(output.type(), input_type)) 52 | return output.type(input_type) 53 | return output 54 | 55 | def cached_cast(mod, fn, cast_fn, handle, 56 | try_caching=False, verbose=False): 57 | if not utils.has_func(mod, fn): 58 | return 59 | 60 | orig_fn = utils.get_func(mod, fn) 61 | cast_fn = utils.verbosify(cast_fn, fn, verbose) 62 | wrapper = make_cast_wrapper(orig_fn, cast_fn, handle, try_caching) 63 | utils.set_func_save(handle, mod, fn, wrapper) 64 | 65 | -------------------------------------------------------------------------------- /core/lr_scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import (StepLRScheduler, CosineLRScheduler, WarmupCosineLRScheduler, WarmupPolyLRScheduler, 2 | HTLTCosineLRScheduler, HTLTDualCosineLRScheduler, LinearLRScheduler) 3 | 4 | def lr_scheduler_entry(config): 5 | return globals()[config['type']+'LRScheduler'](**config['kwargs']) 6 | -------------------------------------------------------------------------------- /core/make_param_group.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import shutil 3 | import torch 4 | import os 5 | import io 6 | import logging 7 | from collections import defaultdict 8 | 9 | 10 | from torch.nn import BatchNorm2d 11 | 12 | def param_group_no_wd(model): 13 | pgroup_no_wd = [] 14 | names_no_wd = [] 15 | pgroup_normal = [] 16 | 17 | type2num = defaultdict(lambda : 0) 18 | for name,m in model.named_modules(): 19 | if isinstance(m, torch.nn.Conv2d): 20 | if m.bias is not None: 21 | pgroup_no_wd.append(m.bias) 22 | names_no_wd.append(name+'.bias') 23 | type2num[m.__class__.__name__+'.bias'] += 1 24 | elif isinstance(m, torch.nn.Linear): 25 | if m.bias is not None: 26 | pgroup_no_wd.append(m.bias) 27 | names_no_wd.append(name+'.bias') 28 | type2num[m.__class__.__name__+'.bias'] += 1 29 | elif isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm1d): 30 | if m.weight is not None: 31 | pgroup_no_wd.append(m.weight) 32 | names_no_wd.append(name+'.weight') 33 | type2num[m.__class__.__name__+'.weight'] += 1 34 | if m.bias is not None: 35 | pgroup_no_wd.append(m.bias) 36 | names_no_wd.append(name+'.bias') 37 | type2num[m.__class__.__name__+'.bias'] += 1 38 | 39 | for name,p in model.named_parameters(): 40 | if not name in names_no_wd: 41 | pgroup_normal.append(p) 42 | 43 | return [{'params': pgroup_normal}, {'params': pgroup_no_wd, 'weight_decay': 0.0}], type2num 44 | 45 | def param_group_fc(model): 46 | logits_w_id = id(model.module.logits.weight) 47 | fc_group = [] 48 | normal_group = [] 49 | for p in model.parameters(): 50 | if id(p) == logits_w_id: 51 | fc_group.append(p) 52 | else: 53 | normal_group.append(p) 54 | param_group = [{'params': fc_group}, {'params': normal_group}] 55 | 56 | return param_group 57 | 58 | def param_group_multitask(model): 59 | backbone_group = [] 60 | neck_group = [] 61 | decoder_group = [] 62 | other_group = [] 63 | for name, p in model.named_parameters(): 64 | if 'module.backbone_module' in name: 65 | backbone_group.append(p) 66 | elif 'module.neck_module' in name: 67 | neck_group.append(p) 68 | elif 'module.decoder_module' in name: 69 | decoder_group.append(p) 70 | else: 71 | other_group.append(p) 72 | 73 | if len(other_group) > 0: 74 | param_group = [{'params': backbone_group}, {'params': neck_group}, \ 75 | {'params': decoder_group}, {'params', other_group}] 76 | else: 77 | param_group = [{'params': backbone_group}, {'params': neck_group}, \ 78 | {'params': decoder_group}] 79 | return param_group 80 | -------------------------------------------------------------------------------- /core/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | import torch 7 | 8 | __all__ = ["retry_if_cuda_oom"] 9 | 10 | 11 | @contextmanager 12 | def _ignore_torch_cuda_oom(): 13 | """ 14 | A context which ignores CUDA OOM exception from pytorch. 15 | """ 16 | try: 17 | yield 18 | except RuntimeError as e: 19 | # NOTE: the string may change? 20 | if "CUDA out of memory. " in str(e): 21 | pass 22 | else: 23 | raise 24 | 25 | 26 | def retry_if_cuda_oom(func): 27 | """ 28 | Makes a function retry itself after encountering 29 | pytorch's CUDA OOM error. 30 | It will first retry after calling `torch.cuda.empty_cache()`. 31 | 32 | If that still fails, it will then retry by trying to convert inputs to CPUs. 33 | In this case, it expects the function to dispatch to CPU implementation. 34 | The return values may become CPU tensors as well and it's user's 35 | responsibility to convert it back to CUDA tensor if needed. 36 | 37 | Args: 38 | func: a stateless callable that takes tensor-like objects as arguments 39 | 40 | Returns: 41 | a callable which retries `func` if OOM is encountered. 42 | 43 | Examples: 44 | :: 45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 46 | # output may be on CPU even if inputs are on GPU 47 | 48 | Note: 49 | 1. When converting inputs to CPU, it will only look at each argument and check 50 | if it has `.device` and `.to` for conversion. Nested structures of tensors 51 | are not supported. 52 | 53 | 2. Since the function might be called more than once, it has to be 54 | stateless. 55 | """ 56 | 57 | def maybe_to_cpu(x): 58 | try: 59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 60 | except AttributeError: 61 | like_gpu_tensor = False 62 | if like_gpu_tensor: 63 | return x.to(device="cpu") 64 | else: 65 | return x 66 | 67 | @wraps(func) 68 | def wrapped(*args, **kwargs): 69 | with _ignore_torch_cuda_oom(): 70 | return func(*args, **kwargs) 71 | 72 | # Clear cache and retry 73 | torch.cuda.empty_cache() 74 | with _ignore_torch_cuda_oom(): 75 | return func(*args, **kwargs) 76 | 77 | # Try on CPU. This slows down the code significantly, therefore print a notice. 78 | logger = logging.getLogger(__name__) 79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) 80 | new_args = (maybe_to_cpu(x) for x in args) 81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 82 | return func(*new_args, **new_kwargs) 83 | 84 | return wrapped 85 | -------------------------------------------------------------------------------- /core/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/__init__.py -------------------------------------------------------------------------------- /core/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vit import vit_base_patch16 2 | 3 | def backbone_entry(config): 4 | return globals()[config['type']](**config['kwargs']) 5 | -------------------------------------------------------------------------------- /core/models/decoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .network import AIOHead 2 | 3 | def decoder_entry(config): 4 | return globals()[config['type']](**config['kwargs']) 5 | -------------------------------------------------------------------------------- /core/models/decoders/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification_losses import MarginCosineProductLoss 2 | from .classification_losses import ArcFaceLoss, Softmax_TripletLoss, Softmax_TripletLoss_wBN 3 | from .seg_losses import FSAuxCELoss, FocalDiceLoss, FocalDiceLoss_bce_cls_emb, FocalDiceLoss_bce_cls_emb_sample_weight 4 | from .pos_losses import BasePosLoss, POS_FocalDiceLoss_bce_cls_emb 5 | from .peddet_losses import DetFocalDiceLoss, DetFocalDiceLoss_hybrid 6 | from .pedattr_losses import CEL_Sigmoid 7 | 8 | def loss_entry(config): 9 | return globals()[config['type']](**config['kwargs']) 10 | -------------------------------------------------------------------------------- /core/models/decoders/losses/pedattr_losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import numpy as np 8 | 9 | 10 | 11 | __all__ = ['CEL_Sigmoid'] 12 | 13 | def ratio2weight(targets, ratio): 14 | ratio = torch.from_numpy(ratio).type_as(targets) 15 | pos_weights = targets * (1 - ratio) 16 | neg_weights = (1 - targets) * ratio 17 | weights = torch.exp(neg_weights + pos_weights) 18 | 19 | # for RAP dataloader, targets element may be 2, with or without smooth, some element must great than 1 20 | weights[targets > 1] = 0.0 21 | 22 | return weights 23 | 24 | class CEL_Sigmoid(nn.Module): 25 | def __init__(self, sample_weight=None, size_average=True, cfg=None): 26 | super(CEL_Sigmoid, self).__init__() 27 | 28 | self.sample_weight = sample_weight 29 | 30 | if sample_weight is not None: 31 | self.sample_weight = np.array(self.sample_weight) 32 | 33 | self.size_average = size_average 34 | 35 | def forward(self, input_var): 36 | logits = input_var['logit'] 37 | targets = input_var['label'] 38 | batch_size = logits.shape[0] 39 | 40 | weight_mask = (targets != -1) # mask -1 labels from HARDHC dataset 41 | loss = F.binary_cross_entropy_with_logits(logits, targets, weight=weight_mask, reduction='none') 42 | 43 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1)) 44 | if self.sample_weight is not None: 45 | weight = ratio2weight(targets_mask, self.sample_weight) 46 | loss = (loss * weight.cuda()) 47 | 48 | loss = loss.sum() / batch_size if self.size_average else loss.sum() 49 | 50 | output = {'loss': loss, 'top1': torch.Tensor([0]).cuda()} 51 | 52 | return output 53 | 54 | def __repr__(self): 55 | return self.__class__.__name__ + '(' \ 56 | + 'sample_weight=' + str(self.sample_weight) \ 57 | + ', size_average=' + str(self.size_average) + ')' 58 | -------------------------------------------------------------------------------- /core/models/decoders/losses/peddet_losses.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .matcher import DetectionHungarianMatcher 6 | from .criterion import DetSetCriterion 7 | 8 | class DetFocalDiceLoss(nn.Module): 9 | def __init__(self, cfg): 10 | super(DetFocalDiceLoss, self).__init__() 11 | matcher = DetectionHungarianMatcher( 12 | cost_class=cfg.class_weight, 13 | cost_bbox=cfg.bbox_weight, 14 | cost_giou=cfg.giou_weight, 15 | ) 16 | 17 | weight_dict = {"loss_ce": cfg.class_weight, 18 | "loss_bbox": cfg.bbox_weight, 19 | "loss_giou": cfg.giou_weight} 20 | 21 | if cfg.deep_supervision: 22 | aux_weight_dict = {} 23 | for i in range(cfg.dec_layers-1): 24 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...} 25 | aux_weight_dict.update({k + f'_enc': v for k, v in weight_dict.items()}) 26 | weight_dict.update(aux_weight_dict) 27 | 28 | 29 | 30 | self.fd_loss = DetSetCriterion( 31 | cfg.num_classes, 32 | ginfo=cfg.ginfo, 33 | matcher=matcher, 34 | weight_dict=weight_dict, 35 | losses=["labels", "boxes"], 36 | focal_alpha=cfg.focal_alpha, 37 | ign_thr=cfg.ign_thr, 38 | ) 39 | 40 | self.cfg = cfg 41 | 42 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx} 43 | losses = self.fd_loss(outputs, targets) 44 | for k in list(losses.keys()): 45 | if k in self.fd_loss.weight_dict: 46 | losses[k] *= self.fd_loss.weight_dict[k] 47 | elif 'loss' in k: 48 | # remove this loss if not specified in `weight_dict` 49 | losses.pop(k) 50 | return losses 51 | 52 | 53 | class DetFocalDiceLoss_hybrid(DetFocalDiceLoss): 54 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx} 55 | multi_targets = copy.deepcopy(targets) 56 | losses = self.fd_loss(outputs, targets) 57 | 58 | for target in multi_targets: 59 | target["boxes"] = target["boxes"].repeat(self.cfg.k_one2many, 1) 60 | target["labels"] = target["labels"].repeat(self.cfg.k_one2many) 61 | assert len(target["iscrowd"].shape) == 1, f"len(target['iscrowd'].shape) == 1: {len(target['iscrowd'].shape) == 1}" 62 | target["iscrowd"] = target["iscrowd"].repeat(self.cfg.k_one2many) 63 | outputs_one2many = dict() 64 | outputs_one2many["pred_logits"] = outputs["pred_logits_one2many"] 65 | outputs_one2many["pred_boxes"] = outputs["pred_boxes_one2many"] 66 | outputs_one2many["aux_outputs"] = outputs["aux_outputs_one2many"] 67 | outputs_one2many["mask"] = outputs["mask"] 68 | losses_one2many = self.fd_loss(outputs_one2many, multi_targets) 69 | 70 | 71 | for k in list(losses.keys()): 72 | if k in self.fd_loss.weight_dict: 73 | losses[k] *= self.fd_loss.weight_dict[k] 74 | elif 'loss' in k: 75 | # remove this loss if not specified in `weight_dict` 76 | losses.pop(k) 77 | for k in list(losses_one2many.keys()): # repeat 78 | if k in self.fd_loss.weight_dict: 79 | losses_one2many[k] *= self.fd_loss.weight_dict[k] 80 | elif 'loss' in k: 81 | losses_one2many.pop(k) 82 | 83 | for key, value in losses_one2many.items(): 84 | if key + "_one2many" in losses.keys(): 85 | losses[key + "_one2many"] += value * self.cfg.get('lambda_one2many', 1) 86 | else: 87 | losses[key + "_one2many"] = value * self.cfg.get('lambda_one2many', 1) 88 | 89 | return losses -------------------------------------------------------------------------------- /core/models/decoders/losses/pos_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .matcher import HungarianMatcher, DirectMatcher, RedundantQMatcher, POSDirectMatcher 5 | from .criterion import SetCriterion, POSSetCriterion 6 | 7 | class BasePosLoss(nn.Module): 8 | def __init__(self, target_type, use_target_weight=True, cfg=None): 9 | super(BasePosLoss, self).__init__() 10 | self.criterion = nn.MSELoss() 11 | 12 | self.target_type = target_type 13 | self.use_target_weight = use_target_weight 14 | 15 | self.cfg = cfg 16 | 17 | def get_loss(self, num_joints, heatmaps_pred, heatmaps_gt, target_weight): 18 | loss = 0. 19 | for idx in range(num_joints): 20 | heatmap_pred = heatmaps_pred[idx].squeeze(1) 21 | heatmap_gt = heatmaps_gt[idx].squeeze(1) 22 | if self.use_target_weight: 23 | loss += self.criterion(heatmap_pred * target_weight[:, idx], 24 | heatmap_gt * target_weight[:, idx]) 25 | else: 26 | loss += self.criterion(heatmap_pred, heatmap_gt) 27 | return loss 28 | 29 | def forward(self, outputs, target, target_weight): # {"aux_outputs": xx, 'xx': xx} 30 | """Forward function.""" 31 | output = outputs['pred_masks'] # {'pred_logits':'pred_masks':} 32 | 33 | batch_size = output.size(0) 34 | num_joints = output.size(1) 35 | 36 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1) 37 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1) 38 | 39 | loss = self.get_loss(num_joints, heatmaps_pred, heatmaps_gt, target_weight) 40 | 41 | # In case of auxiliary losses, we repeat this process with the output of each intermediate layer. 42 | if "aux_outputs" in outputs and self.cfg.get('aux_loss', True): 43 | for aux_outputs in outputs["aux_outputs"]: 44 | heatmaps_pred = aux_outputs['pred_masks'].reshape((batch_size, num_joints, -1)).split(1, 1) 45 | 46 | loss = loss + self.get_loss(num_joints, heatmaps_pred, heatmaps_gt, target_weight) 47 | 48 | return loss / num_joints 49 | 50 | class POS_FocalDiceLoss_bce_cls_emb(nn.Module): 51 | def __init__(self, target_type, use_target_weight=True, cfg=None): 52 | super(POS_FocalDiceLoss_bce_cls_emb, self).__init__() 53 | self.target_type = target_type 54 | self.use_target_weight = use_target_weight 55 | 56 | matcher = POSDirectMatcher() 57 | 58 | weight_dict = {"loss_bce_pos": cfg.class_weight, 59 | "loss_mask_pos": cfg.mask_weight, 60 | } 61 | 62 | if cfg.get('deep_supervision', False): 63 | aux_weight_dict = {} 64 | for i in range(cfg.dec_layers): 65 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...} 66 | weight_dict.update(aux_weight_dict) 67 | 68 | self.fd_loss = POSSetCriterion( 69 | cfg.num_classes, 70 | ginfo=cfg.ginfo, 71 | matcher=matcher, 72 | weight_dict=weight_dict, 73 | losses=[ 74 | "pos_mask", 75 | "pos_bce_labels", 76 | ], 77 | eos_coef=cfg.get('eos_coef', 0.1), 78 | aux=cfg.get('deep_supervision', False), 79 | ignore_blank=cfg.get('ignore_blank', True), 80 | sample_weight=cfg.get('sample_weight', None) 81 | ) 82 | 83 | self.cfg = cfg 84 | 85 | def forward(self, outputs, targets, target_weight, **kwargs): # {"aux_outputs": xx, 'xx': xx} 86 | losses = self.fd_loss(outputs, targets) 87 | 88 | for k in list(losses.keys()): 89 | if k in self.fd_loss.weight_dict: 90 | losses[k] *= self.fd_loss.weight_dict[k] 91 | else: 92 | # remove this loss if not specified in `weight_dict` 93 | losses.pop(k) 94 | 95 | return losses 96 | -------------------------------------------------------------------------------- /core/models/decoders/losses/seg_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .matcher import HungarianMatcher, DirectMatcher, RedundantQMatcher 5 | from .criterion import SetCriterion 6 | 7 | 8 | class FSCELoss(nn.Module): 9 | def __init__(self, configer=None, **kwargs): 10 | super(FSCELoss, self).__init__() 11 | self.configer = configer 12 | weight = None 13 | if 'ce_weight' in self.configer: 14 | weight = self.configer['ce_weight'] 15 | weight = torch.FloatTensor(weight).cuda() 16 | 17 | reduction = 'elementwise_mean' 18 | if 'ce_reduction' in self.configer: 19 | reduction = self.configer['ce_reduction'] 20 | 21 | ignore_index = -1 22 | if 'ce_ignore_index' in self.configer: 23 | ignore_index = self.configer['ce_ignore_index'] 24 | 25 | self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction) 26 | 27 | def forward(self, inputs, *targets, weights=None, **kwargs): 28 | if isinstance(inputs, tuple) or isinstance(inputs, list): 29 | if weights is None: 30 | weights = [1.0] * len(inputs) 31 | 32 | for i in range(len(inputs)): 33 | if i == 0: 34 | if len(targets) > 1: 35 | target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3))) 36 | loss = weights[i] * self.ce_loss(inputs[i], target) 37 | else: 38 | target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3))) 39 | loss = weights[i] * self.ce_loss(inputs[i], target) 40 | else: 41 | if len(targets) > 1: 42 | target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3))) 43 | loss += weights[i] * self.ce_loss(inputs[i], target) 44 | else: 45 | target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3))) 46 | loss += weights[i] * self.ce_loss(inputs[i], target) 47 | 48 | else: 49 | target = self._scale_target(targets[0], (inputs.size(2), inputs.size(3))) 50 | loss = self.ce_loss(inputs, target) 51 | 52 | return loss 53 | 54 | @staticmethod 55 | def _scale_target(targets_, scaled_size): 56 | targets = targets_.clone().unsqueeze(1).float() 57 | targets = F.interpolate(targets, size=scaled_size, mode='nearest') 58 | return targets.squeeze(1).long() 59 | 60 | 61 | class FSAuxCELoss(nn.Module): 62 | def __init__(self, configer=None): 63 | super(FSAuxCELoss, self).__init__() 64 | self.configer = configer 65 | self.ce_loss = FSCELoss(self.configer) 66 | 67 | def forward(self, inputs, targets, **kwargs): 68 | aux_out, seg_out = inputs 69 | seg_loss = self.ce_loss(seg_out, targets) 70 | aux_loss = self.ce_loss(aux_out, targets) 71 | loss = self.configer['loss_weights']['seg_loss'] * seg_loss 72 | loss = loss + self.configer['loss_weights']['aux_loss'] * aux_loss 73 | return loss 74 | 75 | 76 | class FocalDiceLoss(nn.Module): 77 | def __init__(self, cfg): 78 | super(FocalDiceLoss, self).__init__() 79 | matcher = HungarianMatcher( 80 | cost_class=cfg.class_weight, 81 | cost_mask=cfg.mask_weight, 82 | cost_dice=cfg.dice_weight, 83 | num_points=cfg.num_points, 84 | ) 85 | 86 | weight_dict = {"loss_ce": cfg.class_weight, 87 | "loss_mask": cfg.mask_weight, 88 | "loss_dice": cfg.dice_weight} 89 | 90 | if cfg.deep_supervision: 91 | aux_weight_dict = {} 92 | for i in range(cfg.dec_layers): 93 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...} 94 | weight_dict.update(aux_weight_dict) 95 | 96 | self.fd_loss = SetCriterion( 97 | cfg.num_classes, 98 | ginfo=cfg.ginfo, 99 | matcher=matcher, 100 | weight_dict=weight_dict, 101 | eos_coef=cfg.no_object_weight, 102 | losses=["labels", "masks"], 103 | num_points=cfg.num_points, 104 | oversample_ratio=cfg.oversample_ratio, 105 | importance_sample_ratio=cfg.importance_sample_ratio, 106 | ) 107 | 108 | self.cfg = cfg 109 | 110 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx} 111 | losses = self.fd_loss(outputs, targets) 112 | 113 | for k in list(losses.keys()): 114 | if k in self.fd_loss.weight_dict: 115 | losses[k] *= self.fd_loss.weight_dict[k] 116 | else: 117 | # remove this loss if not specified in `weight_dict` 118 | losses.pop(k) 119 | 120 | return losses 121 | 122 | class FocalDiceLoss_bce_cls_emb(nn.Module): 123 | def __init__(self, cfg): 124 | super(FocalDiceLoss_bce_cls_emb, self).__init__() 125 | matcher = DirectMatcher(num_points=cfg.num_points,) 126 | 127 | weight_dict = { "loss_bce": cfg.class_weight, 128 | "loss_mask": cfg.mask_weight, 129 | "loss_dice": cfg.dice_weight} 130 | 131 | if cfg.deep_supervision: 132 | aux_weight_dict = {} 133 | for i in range(cfg.dec_layers): 134 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...} 135 | weight_dict.update(aux_weight_dict) 136 | 137 | self.fd_loss = SetCriterion( 138 | cfg.num_classes, 139 | ginfo=cfg.ginfo, 140 | matcher=matcher, 141 | weight_dict=weight_dict, 142 | eos_coef=cfg.no_object_weight, 143 | losses=[ 144 | "bce_labels", 145 | "masks", 146 | ], 147 | num_points=cfg.num_points, 148 | oversample_ratio=cfg.oversample_ratio, 149 | importance_sample_ratio=cfg.importance_sample_ratio, 150 | ) 151 | 152 | self.cfg = cfg 153 | 154 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx} 155 | losses = self.fd_loss(outputs, targets) 156 | 157 | for k in list(losses.keys()): 158 | if k in self.fd_loss.weight_dict: 159 | losses[k] *= self.fd_loss.weight_dict[k] 160 | else: 161 | # remove this loss if not specified in `weight_dict` 162 | losses.pop(k) 163 | 164 | return losses 165 | 166 | class FocalDiceLoss_bce_cls_emb_sample_weight(FocalDiceLoss): 167 | def __init__(self, cfg): 168 | super(FocalDiceLoss_bce_cls_emb_sample_weight, self).__init__(cfg) 169 | matcher = DirectMatcher(num_points=cfg.num_points,) 170 | 171 | weight_dict = { "loss_bce": cfg.class_weight, 172 | "loss_mask": cfg.mask_weight, 173 | "loss_dice": cfg.dice_weight} 174 | 175 | if cfg.deep_supervision: 176 | aux_weight_dict = {} 177 | for i in range(cfg.dec_layers): 178 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...} 179 | weight_dict.update(aux_weight_dict) 180 | 181 | self.fd_loss = SetCriterion( 182 | cfg.num_classes, 183 | ginfo=cfg.ginfo, 184 | matcher=matcher, 185 | weight_dict=weight_dict, 186 | eos_coef=cfg.no_object_weight, 187 | losses=[ 188 | "bce_labels", 189 | "masks", 190 | ], 191 | num_points=cfg.num_points, 192 | oversample_ratio=cfg.oversample_ratio, 193 | importance_sample_ratio=cfg.importance_sample_ratio, 194 | sample_weight = cfg.get('sample_weight', None) 195 | ) 196 | 197 | self.cfg = cfg 198 | -------------------------------------------------------------------------------- /core/models/decoders/losses/test_time.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from scipy.optimize import linear_sum_assignment 4 | from torch import nn 5 | import numpy as np 6 | def point_sample(input, point_coords, **kwargs): 7 | """ 8 | A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors. 9 | Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside 10 | [0, 1] x [0, 1] square. 11 | 12 | Args: 13 | input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid. 14 | point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains 15 | [0, 1] x [0, 1] normalized point coordinates. 16 | 17 | Returns: 18 | output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains 19 | features for points in `point_coords`. The features are obtained via bilinear 20 | interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`. 21 | """ 22 | add_dim = False 23 | if point_coords.dim() == 3: 24 | add_dim = True 25 | point_coords = point_coords.unsqueeze(2) 26 | output = F.grid_sample(input, 2.0 * point_coords - 1.0, **kwargs) 27 | if add_dim: 28 | output = output.squeeze(3) 29 | return output 30 | 31 | 32 | def batch_dice_loss(inputs: torch.Tensor, targets: torch.Tensor): 33 | """ 34 | Compute the DICE loss, similar to generalized IOU for masks 35 | Args: 36 | inputs: A float tensor of arbitrary shape. 37 | The predictions for each example. 38 | targets: A float tensor with the same shape as inputs. Stores the binary 39 | classification label for each element in inputs 40 | (0 for the negative class and 1 for the positive class). 41 | """ 42 | inputs = inputs.sigmoid() 43 | inputs = inputs.flatten(1) 44 | numerator = 2 * torch.einsum("nc,mc->nm", inputs, targets) 45 | denominator = inputs.sum(-1)[:, None] + targets.sum(-1)[None, :] 46 | loss = 1 - (numerator + 1) / (denominator + 1) 47 | return loss 48 | 49 | 50 | batch_dice_loss_jit = torch.jit.script( 51 | batch_dice_loss 52 | ) # type: torch.jit.ScriptModule 53 | 54 | 55 | def batch_sigmoid_ce_loss(inputs: torch.Tensor, targets: torch.Tensor): 56 | """ 57 | Args: 58 | inputs: A float tensor of arbitrary shape. 59 | The predictions for each example. 60 | targets: A float tensor with the same shape as inputs. Stores the binary 61 | classification label for each element in inputs 62 | (0 for the negative class and 1 for the positive class). 63 | Returns: 64 | Loss tensor 65 | """ 66 | hw = inputs.shape[1] 67 | 68 | pos = F.binary_cross_entropy_with_logits( 69 | inputs, torch.ones_like(inputs), reduction="none" 70 | ) 71 | neg = F.binary_cross_entropy_with_logits( 72 | inputs, torch.zeros_like(inputs), reduction="none" 73 | ) 74 | 75 | loss = torch.einsum("nc,mc->nm", pos, targets) + torch.einsum( 76 | "nc,mc->nm", neg, (1 - targets) 77 | ) 78 | 79 | return loss / hw 80 | 81 | num_queries = 80 82 | num_points=12304 83 | h,w=120,120 84 | redundant_queries=4 85 | 86 | tgt_ids = list(range(20)) 87 | 88 | out_masks = torch.rand((num_queries,h,w)).cuda() 89 | tgt_masks = torch.rand((20,h,w)).cuda() 90 | 91 | import time 92 | s = time.time() 93 | ind = [] 94 | for _ in range(10): 95 | out_prob = torch.full( 96 | (num_queries, num_queries//redundant_queries), 0, dtype=torch.float, 97 | device=out_masks.device 98 | ) 99 | 100 | for i in range(num_queries // redundant_queries): 101 | out_prob[4 * i:4 * (i + 1), i] = 1 102 | 103 | cost_class = -out_prob[:, tgt_ids] 104 | out_mask = out_masks[:, None] 105 | tgt_mask = tgt_masks[:, None] 106 | point_coords = torch.rand(1, num_points, 2, device=out_mask.device) 107 | # get gt labels 108 | tgt_mask = point_sample( 109 | tgt_mask, 110 | point_coords.repeat(tgt_mask.shape[0], 1, 1), 111 | align_corners=False, 112 | ).squeeze(1) # [valid_classes, self.num_points] 113 | 114 | out_mask = point_sample( 115 | out_mask, 116 | point_coords.repeat(out_mask.shape[0], 1, 1), 117 | align_corners=False, 118 | ).squeeze(1) # [num_queries, self.num_points] 119 | 120 | out_mask = out_mask.float() 121 | tgt_mask = tgt_mask.float() 122 | # Compute the focal loss between masks 123 | # cost_mask = batch_sigmoid_ce_loss_jit(out_mask, tgt_mask) 124 | cost_mask = batch_sigmoid_ce_loss(out_mask, tgt_mask) # [num_queries, valid_classes] 125 | 126 | # Compute the dice loss betwen masks 127 | # cost_dice = batch_dice_loss_jit(out_mask, tgt_mask) 128 | cost_dice = batch_dice_loss(out_mask, tgt_mask) 129 | 130 | c = torch.full(cost_dice.shape, float("inf"),) 131 | 132 | for i in tgt_ids: 133 | c[i*redundant_queries:(i+1)*redundant_queries, i]=0 134 | 135 | c += cost_dice.cpu()+cost_dice.cpu() 136 | 137 | ind.append(linear_sum_assignment(c)) 138 | print([i-4*j for (i, j) in ind]) 139 | time_cost = time.time() - s 140 | print(time_cost) 141 | print('----') 142 | 143 | tgt_ids = list(range(20)) 144 | 145 | out_masks = torch.rand((num_queries,h,w)).cuda() 146 | tgt_masks = torch.rand((20,h,w)).cuda() 147 | s = time.time() 148 | for _ in range(10): 149 | for idx, label in enumerate(tgt_ids): 150 | # import pdb;pdb.set_trace() 151 | out_mask = out_masks[idx*redundant_queries:(idx+1)*redundant_queries] 152 | tgt_mask = tgt_masks[idx] 153 | # out_mask = out_mask[None,:] 154 | tgt_mask = tgt_mask[None,:] 155 | 156 | out_mask = out_mask[:, None] 157 | tgt_mask = tgt_mask[:, None] 158 | 159 | point_coords = torch.rand(1, num_points, 2, device=out_mask.device) 160 | # get gt labels 161 | tgt_mask = point_sample( 162 | tgt_mask, 163 | point_coords.repeat(tgt_mask.shape[0], 1, 1), 164 | align_corners=False, 165 | ).squeeze(1) # [valid_classes, self.num_points] 166 | 167 | out_mask = point_sample( 168 | out_mask, 169 | point_coords.repeat(out_mask.shape[0], 1, 1), 170 | align_corners=False, 171 | ).squeeze(1) # [num_queries, self.num_points] 172 | 173 | out_mask = out_mask.float() 174 | tgt_mask = tgt_mask.float() 175 | # Compute the focal loss between masks 176 | # cost_mask = batch_sigmoid_ce_loss_jit(out_mask, tgt_mask) 177 | cost_mask = batch_sigmoid_ce_loss(out_mask, tgt_mask) # [num_queries, valid_classes] 178 | 179 | # Compute the dice loss betwen masks 180 | # cost_dice = batch_dice_loss_jit(out_mask, tgt_mask) 181 | cost_dice = batch_dice_loss(out_mask, tgt_mask) 182 | 183 | indices = np.argmax(cost_dice.cpu()+cost_dice.cpu()) 184 | 185 | t = time.time() - s 186 | print(t) 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /core/models/decoders/network/__init__.py: -------------------------------------------------------------------------------- 1 | from .meta_arch.aio_head import AIOHead 2 | -------------------------------------------------------------------------------- /core/models/decoders/network/meta_arch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/decoders/network/meta_arch/__init__.py -------------------------------------------------------------------------------- /core/models/decoders/network/transformer_decoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer_decoder import TransformerDecoder 2 | -------------------------------------------------------------------------------- /core/models/decoders/network/transformer_decoder/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py 3 | """ 4 | Various positional encodings for the transformer. 5 | """ 6 | import math 7 | 8 | import torch 9 | from torch import nn 10 | 11 | 12 | class PositionEmbeddingSine(nn.Module): 13 | """ 14 | This is a more standard version of the position embedding, very similar to the one 15 | used by the Attention is all you need paper, generalized to work on images. 16 | """ 17 | 18 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 19 | super().__init__() 20 | self.num_pos_feats = num_pos_feats 21 | self.temperature = temperature 22 | self.normalize = normalize 23 | if scale is not None and normalize is False: 24 | raise ValueError("normalize should be True if scale is passed") 25 | if scale is None: 26 | scale = 2 * math.pi 27 | self.scale = scale 28 | 29 | def forward(self, x, mask=None): 30 | if mask is None: 31 | mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) 32 | not_mask = ~mask 33 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 34 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 35 | if self.normalize: 36 | eps = 1e-6 37 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 38 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 39 | 40 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 41 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 42 | 43 | pos_x = x_embed[:, :, :, None] / dim_t 44 | pos_y = y_embed[:, :, :, None] / dim_t 45 | pos_x = torch.stack( 46 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 47 | ).flatten(3) 48 | pos_y = torch.stack( 49 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 50 | ).flatten(3) 51 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 52 | return pos 53 | 54 | def __repr__(self, _repr_indent=4): 55 | head = "Positional encoding " + self.__class__.__name__ 56 | body = [ 57 | "num_pos_feats: {}".format(self.num_pos_feats), 58 | "temperature: {}".format(self.temperature), 59 | "normalize: {}".format(self.normalize), 60 | "scale: {}".format(self.scale), 61 | ] 62 | # _repr_indent = 4 63 | lines = [head] + [" " * _repr_indent + line for line in body] 64 | return "\n".join(lines) 65 | -------------------------------------------------------------------------------- /core/models/model_entry.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from core.utils import (add_task_specific, add_neck_specific, add_decoder_specific, add_backbone_specific, 7 | add_aio_decoder_specific, add_aio_backbone_specific, add_aio_neck_specific) 8 | 9 | class model_entry(nn.Module): 10 | def __init__(self, backbone_module, neck_module, decoder_module): 11 | super(model_entry, self).__init__() 12 | self.backbone_module = backbone_module 13 | self.neck_module = neck_module 14 | self.decoder_module = decoder_module 15 | add_task_specific(self, False) 16 | add_backbone_specific(self.backbone_module, True) 17 | add_neck_specific(self.neck_module, True) 18 | add_decoder_specific(self.decoder_module, True) 19 | if hasattr(self.decoder_module, 'loss'): 20 | if hasattr(self.decoder_module.loss, 'classifier'): 21 | add_task_specific(self.decoder_module.loss, True) 22 | 23 | def forward(self, input_var, current_step): 24 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx} 25 | x = self.neck_module(x) 26 | decoder_feature = self.decoder_module(x) 27 | return decoder_feature 28 | 29 | 30 | class aio_entry(nn.Module): 31 | def __init__(self, backbone_module, neck_module, decoder_module): 32 | super(aio_entry, self).__init__() 33 | self.backbone_module = backbone_module 34 | self.neck_module = neck_module 35 | self.decoder_module = decoder_module 36 | add_task_specific(self, False) 37 | add_aio_backbone_specific(self.backbone_module, True, self.backbone_module.task_sp_list) 38 | add_aio_neck_specific(self.neck_module, True, self.neck_module.task_sp_list) 39 | add_aio_decoder_specific(self.decoder_module, True, self.decoder_module.task_sp_list, 40 | self.decoder_module.neck_sp_list) 41 | 42 | def forward(self, input_var, current_step): 43 | if current_step < self.backbone_module.freeze_iters: 44 | with torch.no_grad(): 45 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx} 46 | else: 47 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx} 48 | x = self.neck_module(x) 49 | decoder_feature = self.decoder_module(x) 50 | return decoder_feature 51 | 52 | 53 | class aio_entry_v2(aio_entry): 54 | def __init__(self, backbone_module, neck_module, decoder_module): 55 | super(aio_entry, self).__init__() 56 | self.backbone_module = backbone_module 57 | self.neck_module = neck_module 58 | self.decoder_module = decoder_module 59 | add_task_specific(self, False) 60 | add_aio_backbone_specific(self.backbone_module, True, self.backbone_module.task_sp_list, 61 | self.backbone_module.neck_sp_list) 62 | add_aio_backbone_specific(self.neck_module, True, self.neck_module.task_sp_list) 63 | add_aio_decoder_specific(self.decoder_module, True, self.decoder_module.task_sp_list, 64 | self.decoder_module.neck_sp_list) 65 | 66 | 67 | -------------------------------------------------------------------------------- /core/models/necks/DoNothing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | __all__ = ['DoNothing'] 8 | 9 | class DoNothing(nn.Module): 10 | def __init__(self, **kwargs): 11 | super(DoNothing, self).__init__() 12 | 13 | def forward(self, x): 14 | x.update({'neck_output':x['backbone_output']}) 15 | return x 16 | -------------------------------------------------------------------------------- /core/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .DoNothing import * 2 | from .simple_neck import SimpleNeck 3 | 4 | def neck_entry(config): 5 | return globals()[config['type']](**config['kwargs']) 6 | -------------------------------------------------------------------------------- /core/models/necks/simple_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Norm2d(nn.Module): 7 | """ 8 | A LayerNorm variant, popularized by Transformers, that performs point-wise mean and 9 | variance normalization over the channel dimension for inputs that have shape 10 | (batch_size, channels, height, width). 11 | https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950 12 | """ 13 | 14 | def __init__(self, embed_dim, eps=1e-6): 15 | super().__init__() 16 | self.weight = nn.Parameter(torch.ones(embed_dim)) 17 | self.bias = nn.Parameter(torch.zeros(embed_dim)) 18 | self.eps = eps 19 | self.normalized_shape = (embed_dim,) 20 | 21 | # >>> workaround for compatability 22 | self.ln = nn.LayerNorm(embed_dim, eps=1e-6) 23 | self.ln.weight = self.weight 24 | self.ln.bias = self.bias 25 | 26 | def forward(self, x): 27 | u = x.mean(1, keepdim=True) 28 | s = (x - u).pow(2).mean(1, keepdim=True) 29 | x = (x - u) / torch.sqrt(s + self.eps) 30 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 31 | return x 32 | 33 | 34 | class Conv2d(torch.nn.Conv2d): 35 | """ 36 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 37 | """ 38 | 39 | def __init__(self, *args, **kwargs): 40 | """ 41 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 42 | 43 | Args: 44 | norm (nn.Module, optional): a normalization layer 45 | activation (callable(Tensor) -> Tensor): a callable activation function 46 | 47 | It assumes that norm layer is used before activation. 48 | """ 49 | norm = kwargs.pop("norm", None) 50 | activation = kwargs.pop("activation", None) 51 | super().__init__(*args, **kwargs) 52 | 53 | self.norm = norm 54 | self.activation = activation 55 | 56 | def forward(self, x): 57 | x = F.conv2d( 58 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups 59 | ) 60 | if self.norm is not None: 61 | x = self.norm(x) 62 | if self.activation is not None: 63 | x = self.activation(x) 64 | return x 65 | 66 | 67 | def _get_activation(activation): 68 | """Return an activation function given a string""" 69 | if activation == "relu": 70 | return nn.ReLU() 71 | elif activation == "gelu": 72 | return nn.GELU() 73 | else: 74 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.") 75 | 76 | 77 | class SimpleNeck(nn.Module): 78 | def __init__(self, 79 | mask_dim, 80 | backbone, # placeholder 81 | bn_group, 82 | activation='gelu', 83 | task_sp_list=(), 84 | mask_forward=True 85 | ): 86 | super(SimpleNeck, self).__init__() 87 | self.task_sp_list = task_sp_list 88 | 89 | self.vis_token_dim = self.embed_dim = backbone.embed_dim 90 | self.mask_dim = mask_dim 91 | 92 | self.mask_map = nn.Sequential( 93 | nn.ConvTranspose2d(self.embed_dim, self.embed_dim, kernel_size=2, stride=2), 94 | Norm2d(self.embed_dim), 95 | _get_activation(activation), 96 | nn.ConvTranspose2d(self.embed_dim, self.mask_dim, kernel_size=2, stride=2), 97 | ) if mask_dim else False 98 | 99 | self.maskformer_num_feature_levels = 1 # always use 3 scales 100 | 101 | self.mask_forward = mask_forward 102 | 103 | def forward(self, features): 104 | if self.mask_map and self.mask_forward: 105 | features.update({'neck_output': {'mask_features': self.mask_map(features['backbone_output']), 106 | 'multi_scale_features': [features['backbone_output']]}}) 107 | else: 108 | features.update({'neck_output': {'mask_features': None, 109 | 'multi_scale_features': [features['backbone_output']]}}) 110 | return features 111 | 112 | 113 | -------------------------------------------------------------------------------- /core/models/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/ops/__init__.py -------------------------------------------------------------------------------- /core/models/ops/box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Utilities for bounding box manipulation and GIoU. 4 | """ 5 | import torch 6 | from torchvision.ops.boxes import box_area 7 | import pdb 8 | 9 | def box_cxcywh_to_xyxy(x): 10 | x_c, y_c, w, h = x.unbind(-1) 11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 12 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 13 | return torch.stack(b, dim=-1) 14 | 15 | 16 | def box_xyxy_to_cxcywh(x): 17 | x0, y0, x1, y1 = x.unbind(-1) 18 | b = [(x0 + x1) / 2, (y0 + y1) / 2, 19 | (x1 - x0), (y1 - y0)] 20 | return torch.stack(b, dim=-1) 21 | 22 | 23 | def box_ioa(boxes1, boxes2): 24 | area1 = box_area(boxes1) 25 | 26 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 27 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 28 | 29 | wh = (rb - lt).clamp(min=0) # [N,M,2] 30 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 31 | 32 | return inter / area1[:, None] 33 | 34 | 35 | # modified from torchvision to also return the union 36 | def box_iou(boxes1, boxes2): 37 | area1 = box_area(boxes1) 38 | area2 = box_area(boxes2) 39 | 40 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 41 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 42 | 43 | wh = (rb - lt).clamp(min=0) # [N,M,2] 44 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 45 | 46 | union = area1[:, None] + area2 - inter 47 | 48 | iou = inter / union 49 | return iou, union 50 | 51 | 52 | def generalized_box_iou(boxes1, boxes2): 53 | """ 54 | Generalized IoU from https://giou.stanford.edu/ 55 | 56 | The boxes should be in [x0, y0, x1, y1] format 57 | 58 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 59 | and M = len(boxes2) 60 | """ 61 | # degenerate boxes gives inf / nan results 62 | # so do an early check 63 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 64 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 65 | iou, union = box_iou(boxes1, boxes2) 66 | 67 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 68 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 69 | 70 | wh = (rb - lt).clamp(min=0) # [N,M,2] 71 | area = wh[:, :, 0] * wh[:, :, 1] 72 | 73 | return iou - (area - union) / area 74 | 75 | 76 | def giou_iou(boxes1, boxes2): 77 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all(), f"we have boxes1[:, 2:]: {boxes1[:, 2:]}, " \ 78 | f"boxes1[:, :2]: {boxes1[:, :2]}" 79 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all(), f"we have boxes2[:, 2:]: {boxes2[:, 2:]}, " \ 80 | f"boxes2[:, :2]: {boxes2[:, :2]}" 81 | iou, union = box_iou(boxes1, boxes2) 82 | 83 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 84 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 85 | 86 | wh = (rb - lt).clamp(min=0) # [N,M,2] 87 | area = wh[:, :, 0] * wh[:, :, 1] 88 | 89 | return iou - (area - union) / area, iou 90 | 91 | 92 | def masks_to_boxes(masks): 93 | """Compute the bounding boxes around the provided masks 94 | 95 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 96 | 97 | Returns a [N, 4] tensors, with the boxes in xyxy format 98 | """ 99 | if masks.numel() == 0: 100 | return torch.zeros((0, 4), device=masks.device) 101 | 102 | h, w = masks.shape[-2:] 103 | 104 | y = torch.arange(0, h, dtype=torch.float) 105 | x = torch.arange(0, w, dtype=torch.float) 106 | y, x = torch.meshgrid(y, x) 107 | 108 | x_mask = (masks * x.unsqueeze(0)) 109 | x_max = x_mask.flatten(1).max(-1)[0] 110 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 111 | 112 | y_mask = (masks * y.unsqueeze(0)) 113 | y_max = y_mask.flatten(1).max(-1)[0] 114 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 115 | 116 | return torch.stack([x_min, y_min, x_max, y_max], 1) 117 | -------------------------------------------------------------------------------- /core/models/ops/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | from collections import namedtuple 6 | 7 | class Conv2d(torch.nn.Conv2d): 8 | """ 9 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 10 | """ 11 | 12 | def __init__(self, *args, **kwargs): 13 | """ 14 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 15 | 16 | Args: 17 | norm (nn.Module, optional): a normalization layer 18 | activation (callable(Tensor) -> Tensor): a callable activation function 19 | 20 | It assumes that norm layer is used before activation. 21 | """ 22 | norm = kwargs.pop("norm", None) 23 | activation = kwargs.pop("activation", None) 24 | super().__init__(*args, **kwargs) 25 | 26 | self.norm = norm 27 | self.activation = activation 28 | 29 | def forward(self, x): 30 | # torchscript does not support SyncBatchNorm yet 31 | # https://github.com/pytorch/pytorch/issues/40507 32 | # and we skip these codes in torchscript since: 33 | # 1. currently we only support torchscript in evaluation mode 34 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or 35 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. 36 | if not torch.jit.is_scripting(): 37 | if x.numel() == 0 and self.training: 38 | # https://github.com/pytorch/pytorch/issues/12013 39 | assert not isinstance( 40 | self.norm, torch.nn.SyncBatchNorm 41 | ), "SyncBatchNorm does not support empty inputs!" 42 | 43 | x = F.conv2d( 44 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups 45 | ) 46 | if self.norm is not None: 47 | x = self.norm(x) 48 | if self.activation is not None: 49 | x = self.activation(x) 50 | return x 51 | 52 | 53 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 54 | """ 55 | A simple structure that contains basic shape specification about a tensor. 56 | It is often used as the auxiliary inputs/outputs of models, 57 | to complement the lack of shape inference ability among pytorch modules. 58 | 59 | Attributes: 60 | channels: 61 | height: 62 | width: 63 | stride: 64 | """ 65 | 66 | def __new__(cls, channels=None, height=None, width=None, stride=None): 67 | return super().__new__(cls, channels, height, width, stride) 68 | 69 | 70 | def get_norm(norm, out_channels): # todo: replace with syncbn 71 | """ 72 | Args: 73 | norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; 74 | or a callable that takes a channel number and returns 75 | the normalization layer as a nn.Module. 76 | 77 | Returns: 78 | nn.Module or None: the normalization layer 79 | """ 80 | if norm is None: 81 | return None 82 | if isinstance(norm, str): 83 | if len(norm) == 0: 84 | return None 85 | norm = { 86 | # "BN": BatchNorm2d, 87 | # # Fixed in https://github.com/pytorch/pytorch/pull/36382 88 | # "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, 89 | # "FrozenBN": FrozenBatchNorm2d, 90 | "GN": lambda channels: nn.GroupNorm(32, channels), 91 | # for debugging: 92 | # "nnSyncBN": nn.SyncBatchNorm, 93 | # "naiveSyncBN": NaiveSyncBatchNorm, 94 | # # expose stats_mode N as an option to caller, required for zero-len inputs 95 | # "naiveSyncBN_N": lambda channels: NaiveSyncBatchNorm(channels, stats_mode="N"), 96 | }[norm] 97 | return norm(out_channels) 98 | 99 | 100 | def c2_xavier_fill(module: nn.Module) -> None: 101 | """ 102 | Initialize `module.weight` using the "XavierFill" implemented in Caffe2. 103 | Also initializes `module.bias` to 0. 104 | 105 | Args: 106 | module (torch.nn.Module): module to initialize. 107 | """ 108 | # Caffe2 implementation of XavierFill in fact 109 | # corresponds to kaiming_uniform_ in PyTorch 110 | nn.init.kaiming_uniform_(module.weight, a=1) 111 | if module.bias is not None: 112 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module, 113 | # torch.Tensor]`. 114 | nn.init.constant_(module.bias, 0) 115 | 116 | 117 | def c2_msra_fill(module: nn.Module) -> None: 118 | """ 119 | Initialize `module.weight` using the "MSRAFill" implemented in Caffe2. 120 | Also initializes `module.bias` to 0. 121 | 122 | Args: 123 | module (torch.nn.Module): module to initialize. 124 | """ 125 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 126 | if module.bias is not None: 127 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module, 128 | # torch.Tensor]`. 129 | nn.init.constant_(module.bias, 0) 130 | 131 | 132 | def cat(tensors: List[torch.Tensor], dim: int = 0): 133 | """ 134 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 135 | """ 136 | assert isinstance(tensors, (list, tuple)) 137 | if len(tensors) == 1: 138 | return tensors[0] 139 | return torch.cat(tensors, dim) -------------------------------------------------------------------------------- /core/models/tta.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | 4 | import numpy as np 5 | import torch 6 | # from fvcore.transforms import HFlipTransform 7 | from torch import nn 8 | from torch.nn.parallel import DistributedDataParallel 9 | 10 | from core.distributed_utils import DistModule 11 | from core.data.transforms.seg_aug_dev import ( 12 | RandomFlip, 13 | ResizeShortestEdge, 14 | ResizeTransform, 15 | HFlipTransform, 16 | NoOpTransform 17 | ) 18 | from core.data.transforms.seg_transforms_dev import apply_augmentations 19 | 20 | __all__ = [ 21 | "SemanticSegmentorWithTTA", 22 | ] 23 | 24 | 25 | class SemanticSegmentorWithTTA(nn.Module): 26 | """ 27 | A SemanticSegmentor with test-time augmentation enabled. 28 | Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`. 29 | 30 | combined with customized augmentation for original image 31 | """ 32 | 33 | def __init__(self, cfg, model, batch_size=1): 34 | """ 35 | Args: 36 | cfg (CfgNode): 37 | model (SemanticSegmentor): a SemanticSegmentor to apply TTA on. 38 | tta_mapper (callable): takes a dataset dict and returns a list of 39 | augmented versions of the dataset dict. Defaults to 40 | `DatasetMapperTTA(cfg)`. 41 | batch_size (int): batch the augmented images into this batch size for inference. 42 | """ 43 | super().__init__() 44 | if isinstance(model, DistributedDataParallel) or isinstance(model, DistModule): 45 | model = model.module 46 | self.cfg = cfg 47 | 48 | self.min_sizes = cfg.min_sizes 49 | self.max_size = cfg.max_size 50 | self.flip = cfg.flip 51 | 52 | self.model = model 53 | 54 | # if tta_mapper is None: 55 | # tta_mapper = DatasetMapperTTA(cfg) 56 | # self.tta_mapper = tta_mapper 57 | assert batch_size == 1 58 | self.batch_size = batch_size 59 | 60 | def tta_mapper(self, dataset_dict): 61 | """ 62 | Args: 63 | dict: a dict in standard model input format. See tutorials for details. 64 | 65 | Returns: 66 | list[dict]: 67 | a list of dicts, which contain augmented version of the input image. 68 | The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. 69 | Each dict has field "transforms" which is a TransformList, 70 | containing the transforms that are used to generate this image. 71 | """ 72 | assert len(dataset_dict["image"].shape) == 4 73 | numpy_image = dataset_dict["image"].squeeze().permute(1, 2, 0).cpu().numpy() 74 | shape = numpy_image.shape 75 | orig_shape = (dataset_dict["height"], dataset_dict["width"]) 76 | if shape[:2] != orig_shape: 77 | # It transforms the "original" image in the dataset to the input image 78 | pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) 79 | else: 80 | pre_tfm = NoOpTransform() 81 | 82 | # Create all combinations of augmentations to use 83 | aug_candidates = [] # each element is a list[Augmentation] 84 | for min_size in self.min_sizes: 85 | resize = ResizeShortestEdge(min_size, self.max_size) 86 | aug_candidates.append([resize]) # resize only 87 | if self.flip: 88 | flip = RandomFlip(prob=1.0) 89 | aug_candidates.append([resize, flip]) # resize + flip 90 | 91 | # Apply all the augmentations 92 | ret = [] 93 | for aug in aug_candidates: 94 | new_image, tfms = apply_augmentations(aug, np.copy(numpy_image)) 95 | torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1))) 96 | 97 | torch_image = torch_image.unsqueeze(0) 98 | 99 | dic = copy.deepcopy(dataset_dict) 100 | dic["transforms"] = pre_tfm + tfms 101 | dic["image"] = torch_image.cuda() 102 | ret.append(dic) 103 | return ret 104 | 105 | def __call__(self, batched_inputs, current_step): 106 | """ 107 | Same input/output format as :meth:`SemanticSegmentor.forward` 108 | """ 109 | self.current_step = current_step # redundant param for api compliance 110 | def _maybe_read_image(dataset_dict): 111 | ret = copy.copy(dataset_dict) 112 | if "image" not in ret: 113 | raise 114 | if "height" not in ret and "width" not in ret: # TODO: BUG HERE 115 | raise 116 | # ret["height"] = ret["ori_image"].shape[1]#ret["image"].shape[1] 117 | # ret["width"] = ret["ori_image"].shape[2] 118 | return ret 119 | 120 | processed_results = [] 121 | # for x in batched_inputs: 122 | result = self._inference_one_image(_maybe_read_image(batched_inputs)) 123 | processed_results.append(result) 124 | return processed_results 125 | 126 | def _inference_one_image(self, input): 127 | """ 128 | Args: 129 | input (dict): one dataset dict with "image" field being a CHW tensor 130 | Returns: 131 | dict: one output dict 132 | """ 133 | augmented_inputs, tfms = self._get_augmented_inputs(input) 134 | 135 | final_predictions = None 136 | count_predictions = 0 137 | for input, tfm in zip(augmented_inputs, tfms): 138 | count_predictions += 1 139 | with torch.no_grad(): 140 | if final_predictions is None: 141 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms): 142 | final_predictions = self.model(input, self.current_step)[0].pop("sem_seg").flip(dims=[2]) # should be [input] originally 143 | else: 144 | final_predictions = self.model(input, self.current_step)[0].pop("sem_seg") 145 | else: 146 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms): 147 | final_predictions += self.model(input, self.current_step)[0].pop("sem_seg").flip(dims=[2]) 148 | else: 149 | final_predictions += self.model(input, self.current_step)[0].pop("sem_seg") 150 | 151 | final_predictions = final_predictions / count_predictions 152 | return {"sem_seg": final_predictions} 153 | 154 | def _get_augmented_inputs(self, input): 155 | augmented_inputs = self.tta_mapper(input) 156 | tfms = [x.pop("transforms") for x in augmented_inputs] 157 | return augmented_inputs, tfms 158 | -------------------------------------------------------------------------------- /core/msg_server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import socket 3 | import itchat 4 | import errno 5 | import time 6 | import threading 7 | 8 | import os 9 | import subprocess 10 | 11 | parser = argparse.ArgumentParser(description="simple server!") 12 | parser.add_argument('--mode', type=str) 13 | parser.add_argument('--ip', type=str) 14 | parser.add_argument('--port', type=int) 15 | parser.add_argument('--timeout', type=int) 16 | 17 | class MsgServer(object): 18 | def __init__(self, server_ip, server_port): 19 | self.init_chat() 20 | self.send('server chat logged in!') 21 | self.start_server(server_ip, server_port) 22 | 23 | def init_chat(self): 24 | itchat.auto_login(enableCmdQR=2) 25 | 26 | def send(self, msg, echo=True): 27 | if echo: 28 | print(msg) 29 | itchat.send(msg, toUserName='filehelper') 30 | 31 | def worker_thread(self, conn, addr): 32 | conn.settimeout(args.timeout) 33 | if conn is not None: 34 | self.send('job connected! [{}]'.format(addr)) 35 | else: 36 | self.send('none connection!') 37 | return -1 38 | 39 | while(True): 40 | try: 41 | recv_data = conn.recv(1024) 42 | except socket.timeout as e: 43 | print('no msg...') 44 | else: 45 | msg_len = len(recv_data) 46 | if msg_len == 0: 47 | self.send('connection break, waiting for other connections..') 48 | break 49 | self.send(str(recv_data, encoding = 'utf-8')) 50 | conn.close() 51 | 52 | def start_server(self, server_ip, server_port): 53 | ip_port = (server_ip, server_port) 54 | s = socket.socket() 55 | s.bind(ip_port) 56 | 57 | # dump ip/port info to file 58 | with open('server.txt', 'w') as f: 59 | f.write('{} {}\n'.format(server_ip, server_port)) 60 | 61 | s.listen() 62 | self.send('server listening on {}, waiting for job connection...'.format(server_ip)) 63 | while(True): 64 | conn, addr = s.accept() 65 | threading.Thread(target=self.worker_thread, args=(conn, addr)).start() 66 | 67 | class MsgClient(object): 68 | def __init__(self, server_ip, server_port): 69 | self._init_client(server_ip, server_port) 70 | self.send('I\'m client!\n') 71 | 72 | def send(self, msg, echo=True): 73 | self.s.send(bytes(msg, encoding = 'utf-8')) 74 | if echo: 75 | print(msg) 76 | 77 | def _init_client(self, server_ip, server_port): 78 | ip_port = (server_ip, server_port) 79 | self.s = socket.socket() 80 | self.s.connect(ip_port) 81 | 82 | def close(self): 83 | self.s.close() 84 | 85 | def itchat_manager(): 86 | def run_and_get(cmd, screen=False): 87 | process = subprocess.Popen( 88 | cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) 89 | output = '' 90 | 91 | for line in process.stdout: 92 | line = line.decode('utf-8') 93 | if screen: 94 | print(line, end='', flush=True) 95 | output += line.strip(' ') 96 | 97 | return output 98 | 99 | @itchat.msg_register(itchat.content.TEXT) 100 | def text_reply(msg): 101 | res_txt = None 102 | cmd_dict = { 103 | 'sq' : 'squeue -p VI_Face_V100', 104 | 'sq1': 'squeue -p VI_Face_1080TI' 105 | } 106 | if msg.text in cmd_dict: 107 | cmd = cmd_dict[msg.text] 108 | res_txt = run_and_get(cmd) 109 | elif msg.text.startswith('exec:'): 110 | cmd = msg.text.replace('exec:', '') 111 | if os.system(cmd) == 0: 112 | res_txt = 'exec successed!' 113 | else: 114 | res_txt = 'exec failed!' 115 | elif msg.text.startswith('getinfo:'): 116 | cmd = msg.text.replace('getinfo:', '') 117 | res_txt = run_and_get(cmd) 118 | 119 | if res_txt is not None: 120 | itchat.send(res_txt, toUserName='filehelper') 121 | 122 | itchat.auto_login(enableCmdQR=2, hotReload=True) 123 | itchat.run(True) 124 | 125 | if __name__ == '__main__': 126 | args = parser.parse_args() 127 | if args.mode == 'server': 128 | if args.ip is None or args.port is None: 129 | with open('server.txt', 'r') as f: 130 | line = f.read().strip().split() 131 | args.ip = line[0] 132 | args.port = int(line[1]) 133 | print('reading ip & port from server.txt, {}:{}'.format(args.ip, args.port)) 134 | s = MsgServer(args.ip, args.port) 135 | elif args.mode == 'manager': 136 | itchat_manager() 137 | else: 138 | s = MsgClient(args.ip, args.port) 139 | time.sleep(5) 140 | s.close() 141 | -------------------------------------------------------------------------------- /core/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | import time 4 | 5 | 6 | class SGD(Optimizer): 7 | r"""Implements stochastic gradient descent (optionally with momentum). 8 | 9 | Nesterov momentum is based on the formula from 10 | `On the importance of initialization and momentum in deep learning`__. 11 | 12 | Args: 13 | params (iterable): iterable of parameters to optimize or dicts defining 14 | parameter groups 15 | lr (float): learning rate 16 | momentum (float, optional): momentum factor (default: 0) 17 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 18 | dampening (float, optional): dampening for momentum (default: 0) 19 | nesterov (bool, optional): enables Nesterov momentum (default: False) 20 | 21 | Example: 22 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) 23 | >>> optimizer.zero_grad() 24 | >>> loss_fn(model(input), target).backward() 25 | >>> optimizer.step() 26 | 27 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf 28 | 29 | .. note:: 30 | The implementation of SGD with Momentum/Nesterov subtly differs from 31 | Sutskever et. al. and implementations in some other frameworks. 32 | 33 | Considering the specific case of Momentum, the update can be written as 34 | 35 | .. math:: 36 | v = \rho * v + g \\ 37 | p = p - lr * v 38 | 39 | where p, g, v and :math:`\rho` denote the parameters, gradient, 40 | velocity, and momentum respectively. 41 | 42 | This is in contrast to Sutskever et. al. and 43 | other frameworks which employ an update of the form 44 | 45 | .. math:: 46 | v = \rho * v + lr * g \\ 47 | p = p - v 48 | 49 | The Nesterov version is analogously modified. 50 | """ 51 | 52 | def __init__(self, params, lr=required, momentum=0, dampening=0, 53 | weight_decay=0, nesterov=False): 54 | if lr is not required and lr < 0.0: 55 | raise ValueError("Invalid learning rate: {}".format(lr)) 56 | if momentum < 0.0: 57 | raise ValueError("Invalid momentum value: {}".format(momentum)) 58 | if weight_decay < 0.0: 59 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 60 | 61 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 62 | weight_decay=weight_decay, nesterov=nesterov) 63 | if nesterov and (momentum <= 0 or dampening != 0): 64 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 65 | super(SGD, self).__init__(params, defaults) 66 | 67 | def __setstate__(self, state): 68 | super(SGD, self).__setstate__(state) 69 | for group in self.param_groups: 70 | group.setdefault('nesterov', False) 71 | 72 | def step(self, closure=None): 73 | """Performs a single optimization step. 74 | 75 | Arguments: 76 | closure (callable, optional): A closure that reevaluates the model 77 | and returns the loss. 78 | """ 79 | loss = None 80 | if closure is not None: 81 | loss = closure() 82 | 83 | for group in self.param_groups: 84 | weight_decay = group['weight_decay'] 85 | momentum = group['momentum'] 86 | dampening = group['dampening'] 87 | nesterov = group['nesterov'] 88 | 89 | for p in group['params']: 90 | if p.grad is None: 91 | continue 92 | d_p = p.grad.data 93 | if weight_decay != 0: 94 | d_p.add_(weight_decay, p.data) 95 | if momentum != 0: 96 | param_state = self.state[p] 97 | if 'momentum_buffer' not in param_state: 98 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 99 | buf.mul_(momentum).add_(d_p) 100 | else: 101 | buf = param_state['momentum_buffer'] 102 | buf.mul_(momentum).add_(1 - dampening, d_p) 103 | if nesterov: 104 | d_p = d_p.add(momentum, buf) 105 | else: 106 | d_p = buf 107 | 108 | p.data.add_(-group['lr'], d_p) 109 | 110 | return loss 111 | 112 | -------------------------------------------------------------------------------- /core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from torch.optim import SGD, RMSprop, Adadelta, Adagrad, Adam, AdamW # noqa F401 2 | from .lars import LARS # noqa F401 3 | from .adam_clip import AdamWithClip, AdamWWithClip, AdamWWithClipDev # noqa F401 4 | from .adafactor import Adafactor_dev 5 | 6 | 7 | def optim_entry(config): 8 | return globals()[config['type']](**config['kwargs']) 9 | -------------------------------------------------------------------------------- /core/optimizers/adafactor.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union 6 | from torch import Tensor 7 | 8 | Params = Union[Iterable[Tensor], Iterable[Dict[str, Any]]] 9 | LossClosure = Callable[[], float] 10 | OptLossClosure = Optional[LossClosure] 11 | Betas2 = Tuple[float, float] 12 | State = Dict[str, Any] 13 | OptFloat = Optional[float] 14 | Nus2 = Tuple[float, float] 15 | Eps2 = Tuple[float, float] 16 | ParamGroup = Dict[str, Any] 17 | 18 | 19 | class Adafactor_dev(Optimizer): 20 | """Implements Adafactor algorithm. 21 | 22 | It has been proposed in: `Adafactor: Adaptive Learning Rates with 23 | Sublinear Memory Cost`__. 24 | 25 | Arguments: 26 | params: iterable of parameters to optimize or dicts defining 27 | parameter groups 28 | lr: external learning rate (default: None) 29 | eps2: regularization constans for square gradient 30 | and parameter scale respectively (default: (1e-30, 1e-3)) 31 | clip_threshold: threshold of root mean square of 32 | final gradient update (default: 1.0) 33 | decay_rate: coefficient used to compute running averages of square 34 | gradient (default: -0.8) 35 | beta1: coefficient used for computing running averages of gradient 36 | (default: None) 37 | weight_decay: weight decay (L2 penalty) (default: 0) 38 | scale_parameter: if true, learning rate is scaled by root mean square 39 | of parameter (default: True) 40 | relative_step: if true, time-dependent learning rate is computed 41 | instead of external learning rate (default: True) 42 | warmup_init: time-dependent learning rate computation depends on 43 | whether warm-up initialization is being used (default: False) 44 | 45 | Example: 46 | >>> import torch_optimizer as optim 47 | >>> optimizer = optim.Adafactor(model.parameters()) 48 | >>> optimizer.zero_grad() 49 | >>> loss_fn(model(input), target).backward() 50 | >>> optimizer.step() 51 | 52 | __ https://arxiv.org/abs/1804.04235 53 | 54 | Note: 55 | Reference code: https://github.com/pytorch/fairseq/blob/master/fairseq/optim/adafactor.py # noqa 56 | """ 57 | 58 | def __init__( 59 | self, 60 | params: Params, 61 | lr: OptFloat = None, 62 | eps2: Eps2 = (1e-30, 1e-3), 63 | clip_threshold: float = 1.0, 64 | decay_rate: float = -0.8, 65 | beta1: OptFloat = None, 66 | weight_decay: float = 0.0, 67 | scale_parameter: bool = True, 68 | relative_step: bool = True, 69 | warmup_init: bool = False, 70 | clip_beta2: Any = False, 71 | ): 72 | if lr is not None and lr <= 0.0: 73 | raise ValueError('Invalid learning rate: {}'.format(lr)) 74 | if weight_decay < 0.0: 75 | raise ValueError( 76 | 'Invalid weight_decay value: {}'.format(weight_decay) 77 | ) 78 | 79 | defaults = dict( 80 | lr=lr, 81 | eps2=eps2, 82 | clip_threshold=clip_threshold, 83 | decay_rate=decay_rate, 84 | beta1=beta1, 85 | weight_decay=weight_decay, 86 | scale_parameter=scale_parameter, 87 | relative_step=relative_step, 88 | warmup_init=warmup_init, 89 | clip_beta2=clip_beta2, 90 | ) 91 | super(Adafactor_dev, self).__init__(params, defaults) 92 | 93 | def _get_lr(self, param_group: ParamGroup, param_state: State) -> float: 94 | rel_step_sz = param_group['lr'] 95 | if param_group['relative_step']: 96 | min_step = ( 97 | 1e-6 * param_state['step'] 98 | if param_group['warmup_init'] 99 | else 1e-2 100 | ) 101 | rel_step_sz = min(min_step, 1.0 / math.sqrt(param_state['step'])) 102 | param_scale = 1.0 103 | if param_group['scale_parameter']: 104 | param_scale = max(param_group['eps2'][1], param_state['RMS']) 105 | return param_scale * rel_step_sz 106 | 107 | def _get_options( 108 | self, param_group: ParamGroup, param_shape: Tuple[int, ...] 109 | ) -> Tuple[bool, bool]: 110 | factored = len(param_shape) >= 2 111 | use_first_moment = param_group['beta1'] is not None 112 | return factored, use_first_moment 113 | 114 | def _rms(self, tensor: torch.Tensor) -> float: 115 | return tensor.norm(2) / (tensor.numel() ** 0.5) 116 | 117 | def _approx_sq_grad( 118 | self, 119 | exp_avg_sq_row: torch.Tensor, 120 | exp_avg_sq_col: torch.Tensor, 121 | output: torch.Tensor, 122 | ) -> None: 123 | r_factor = ( 124 | (exp_avg_sq_row / exp_avg_sq_row.mean(dim=-1, keepdim=True)) 125 | .rsqrt_() 126 | .unsqueeze(-1) 127 | ) 128 | c_factor = exp_avg_sq_col.unsqueeze(-2).rsqrt() 129 | torch.mul(r_factor, c_factor, out=output) 130 | 131 | def step(self, closure: OptLossClosure = None) -> OptFloat: 132 | r"""Performs a single optimization step. 133 | 134 | Arguments: 135 | closure: A closure that reevaluates the model and returns the loss. 136 | """ 137 | loss = None 138 | if closure is not None: 139 | loss = closure() 140 | 141 | for group in self.param_groups: 142 | for p in group['params']: 143 | if p.grad is None: 144 | continue 145 | grad = p.grad.data 146 | if grad.is_sparse: 147 | raise RuntimeError( 148 | 'Adafactor does not support sparse gradients.' 149 | ) 150 | 151 | state = self.state[p] 152 | grad_shape = grad.shape 153 | 154 | factored, use_first_moment = self._get_options( 155 | group, grad_shape 156 | ) 157 | # State Initialization 158 | if len(state) == 0: 159 | state['step'] = 0 160 | 161 | if use_first_moment: 162 | # Exponential moving average of gradient values 163 | state['exp_avg'] = torch.zeros_like( 164 | grad, memory_format=torch.preserve_format 165 | ) 166 | if factored: 167 | state['exp_avg_sq_row'] = torch.zeros( 168 | grad_shape[:-1] 169 | ).type_as(grad) 170 | state['exp_avg_sq_col'] = torch.zeros( 171 | grad_shape[:-2] + grad_shape[-1:] 172 | ).type_as(grad) 173 | else: 174 | state['exp_avg_sq'] = torch.zeros_like( 175 | grad, memory_format=torch.preserve_format 176 | ) 177 | 178 | state['RMS'] = 0 179 | 180 | state['step'] += 1 181 | state['RMS'] = self._rms(p.data) 182 | lr = self._get_lr(group, state) 183 | 184 | beta2t = 1.0 - math.pow(state['step'], group['decay_rate']) 185 | 186 | if group['clip_beta2'] != False: 187 | beta2t = min(beta2t, group['clip_beta2']) 188 | 189 | update = (grad ** 2) + group['eps2'][0] 190 | if factored: 191 | exp_avg_sq_row = state['exp_avg_sq_row'] 192 | exp_avg_sq_col = state['exp_avg_sq_col'] 193 | 194 | exp_avg_sq_row.mul_(beta2t).add_( 195 | update.mean(dim=-1), alpha=1.0 - beta2t 196 | ) 197 | exp_avg_sq_col.mul_(beta2t).add_( 198 | update.mean(dim=-2), alpha=1.0 - beta2t 199 | ) 200 | 201 | # Approximation of exponential moving average of square 202 | # of gradient 203 | self._approx_sq_grad( 204 | exp_avg_sq_row, exp_avg_sq_col, update 205 | ) 206 | update.mul_(grad) 207 | else: 208 | exp_avg_sq = state['exp_avg_sq'] 209 | 210 | exp_avg_sq.mul_(beta2t).add_(update, alpha=1.0 - beta2t) 211 | torch.rsqrt(exp_avg_sq, out=update).mul_(grad) 212 | 213 | update.div_( 214 | max(1.0, self._rms(update) / group['clip_threshold']) 215 | ) 216 | update.mul_(lr) 217 | 218 | if use_first_moment: 219 | exp_avg = state['exp_avg'] 220 | exp_avg.mul_(group['beta1']).add_( 221 | update, alpha=1 - group['beta1'] 222 | ) 223 | update = exp_avg 224 | 225 | if group['weight_decay'] != 0: 226 | p.data.add_(p.data, alpha=-group['weight_decay'] * lr) 227 | 228 | p.data.add_(-update) 229 | 230 | return loss 231 | -------------------------------------------------------------------------------- /core/optimizers/adam_clip.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from torch.nn.utils import clip_grad_norm_ 3 | from torch.optim import Adam, AdamW 4 | 5 | 6 | class AdamWithClip(Adam): 7 | 8 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 9 | weight_decay=0, amsgrad=False, max_norm=None, norm_type=2): 10 | super(AdamWithClip, self).__init__(params, lr, betas, eps, weight_decay, amsgrad) 11 | self.max_norm = max_norm 12 | self.norm_type = norm_type 13 | 14 | def step(self, closure=None): 15 | if self.max_norm is not None: 16 | for group in self.param_groups: 17 | clip_grad_norm_(group['params'], self.max_norm, self.norm_type) 18 | super(AdamWithClip, self).step(closure) 19 | 20 | 21 | class AdamWWithClip(AdamW): 22 | 23 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 24 | weight_decay=0, amsgrad=False, max_norm=None, norm_type=2): 25 | super(AdamWWithClip, self).__init__(params, lr, betas, eps, weight_decay, amsgrad) 26 | self.max_norm = max_norm 27 | self.norm_type = norm_type 28 | 29 | def step(self, closure=None): 30 | 31 | if self.max_norm is not None: 32 | for group in self.param_groups: 33 | clip_grad_norm_(group['params'], self.max_norm, self.norm_type) 34 | super(AdamWWithClip, self).step(closure) 35 | 36 | 37 | class AdamWWithClipDev(AdamW): 38 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 39 | weight_decay=1e-2, amsgrad=False, clip_norm=None, norm_type=2): 40 | super(AdamWWithClipDev, self).__init__(params, lr, betas, eps, weight_decay, amsgrad) 41 | self.clip_norm = clip_norm 42 | self.norm_type = norm_type 43 | 44 | self._split_param_groups = None 45 | self.reset_split_param_groups() 46 | 47 | def reset_split_param_groups(self): 48 | if self.clip_norm is not None: 49 | backbone_param, neck_param, decoder_param, task_param = [], [], [], [] 50 | for x in self.param_groups: 51 | if x["params"][0].backbone_specific: 52 | backbone_param.append(x["params"]) 53 | elif x["params"][0].neck_specific: 54 | neck_param.append(x["params"]) 55 | elif x["params"][0].decoder_specific: 56 | decoder_param.append(x["params"]) 57 | elif x["params"][0].task_specific: 58 | task_param.append(x["params"]) 59 | self._split_param_groups = [_g for _g in [backbone_param, 60 | neck_param, 61 | decoder_param, 62 | task_param] if len(_g) > 0] 63 | print(f">>> reset_split_param_groups, backbone_param: {len(backbone_param)}" 64 | f", neck_param: {len(neck_param)}, decoder_param: {len(decoder_param)}" 65 | f", task_param: {len(task_param)}") 66 | 67 | def step(self, closure=None): 68 | if self.clip_norm is not None: 69 | for _g in self._split_param_groups: 70 | all_params = itertools.chain(*_g) 71 | clip_grad_norm_(all_params, self.clip_norm, self.norm_type) 72 | 73 | super(AdamWWithClipDev, self).step(closure) 74 | -------------------------------------------------------------------------------- /core/optimizers/lars.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | 4 | 5 | class LARS(Optimizer): 6 | r"""Implements layer-wise adaptive rate scaling for SGD, based on 7 | `"Large Batch Training of Convolutional Networks" `_ 8 | 9 | Arguments: 10 | - params (:obj:`iterable`): iterable of parameters to optimize or dicts defining parameter groups 11 | - lr (:obj:`float`): learning rate 12 | - momentum (:obj:`float`, optional): momentum factor (default: 0) 13 | - weight_decay (:obj:`float`, optional): weight decay (L2 penalty) (default: 0) 14 | - dampening (:obj:`float`, optional): dampening for momentum (default: 0) 15 | - eta(:obj:`float`): LARS coefficient (default 0.001) 16 | - nesterov (:obj:`bool`, optional): enables Nesterov momentum (default: False) 17 | 18 | Example: 19 | >>> optimizer = LARS(model.parameters(), lr=0.1, momentum=0.9, eta=1e-3) 20 | >>> optimizer.zero_grad() 21 | >>> loss_fn(model(input), target).backward() 22 | >>> optimizer.step() 23 | """ 24 | 25 | def __init__(self, params, lr=required, momentum=0, dampening=0, 26 | weight_decay=0, eta=0.001, nesterov=False): 27 | if lr is not required and lr < 0.0: 28 | raise ValueError("Invalid learning rate: {}".format(lr)) 29 | if momentum < 0.0: 30 | raise ValueError("Invalid momentum value: {}".format(momentum)) 31 | if weight_decay < 0.0: 32 | raise ValueError( 33 | "Invalid weight_decay value: {}".format(weight_decay)) 34 | if eta < 0.0: 35 | raise ValueError("Invalid LARS coefficient value: {}".format(eta)) 36 | 37 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 38 | weight_decay=weight_decay, eta=eta, nesterov=nesterov) 39 | if nesterov and (momentum <= 0 or dampening != 0): 40 | raise ValueError( 41 | "Nesterov momentum requires a momentum and zero dampening") 42 | super(LARS, self).__init__(params, defaults) 43 | 44 | def __setstate__(self, state): 45 | super(LARS, self).__setstate__(state) 46 | for group in self.param_groups: 47 | group.setdefault('nesterov', False) 48 | 49 | @torch.no_grad() 50 | def step(self, closure=None): 51 | """Performs a single optimization step. 52 | 53 | Arguments: 54 | - closure (:obj:`callable`, optional): A closure that reevaluates the model and returns the loss. 55 | """ 56 | loss = None 57 | if closure is not None: 58 | loss = closure() 59 | 60 | for group in self.param_groups: 61 | weight_decay = group['weight_decay'] 62 | momentum = group['momentum'] 63 | dampening = group['dampening'] 64 | nesterov = group['nesterov'] 65 | eta = group['eta'] 66 | 67 | for p in group['params']: 68 | if p.grad is None: 69 | continue 70 | d_p = p.grad.data 71 | 72 | # compute local learning rate 73 | weight_norm = torch.norm(p.data) 74 | grad_norm = torch.norm(d_p) 75 | 76 | if weight_decay != 0: 77 | d_p.add_(weight_decay, p.data) 78 | grad_norm.add_(weight_decay, weight_norm) 79 | local_lr = eta * weight_norm / grad_norm 80 | 81 | if momentum != 0: 82 | param_state = self.state[p] 83 | if 'momentum_buffer' not in param_state: 84 | buf = param_state['momentum_buffer'] = torch.zeros_like( 85 | p.data) 86 | buf.mul_(momentum).add_(d_p) 87 | else: 88 | buf = param_state['momentum_buffer'] 89 | buf.mul_(momentum).add_(1 - dampening, d_p) 90 | if nesterov: 91 | d_p = d_p.add(momentum, buf) 92 | else: 93 | d_p = buf 94 | 95 | p.data.add_(-group['lr']*local_lr, d_p) 96 | 97 | return loss 98 | -------------------------------------------------------------------------------- /core/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | from .solver import Solver 2 | from .solver_deter import SolverDeter 3 | from .solver_multitask_dev import SolverMultiTaskDev 4 | 5 | def solver_entry(C): 6 | return globals()[C.config['common']['solver']['type']](C) 7 | -------------------------------------------------------------------------------- /core/solvers/solver_deter.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import random 4 | import time 5 | 6 | import core 7 | import core.models.decoders as decoders 8 | import core.models.backbones as backbones 9 | import core.models.necks as necks 10 | import core.data.datasets as datasets 11 | from core.models.model_entry import model_entry 12 | import numpy as np 13 | import torch 14 | import torch.backends.cudnn as cudnn 15 | 16 | from core import distributed_utils as dist 17 | 18 | from core.distributed_utils import DistributedGivenIterationSampler 19 | 20 | from torch.utils.data import DataLoader 21 | 22 | from .solver import Solver 23 | 24 | class WorkerInit(object): 25 | def __init__(self, rank, num_workers): 26 | self.rank = rank 27 | self.num_workers = num_workers 28 | def func(self, pid): 29 | print(f'[rank{self.rank}] setting worker seed {self.rank*self.num_workers+pid}', flush=True) 30 | np.random.seed(self.rank*self.num_workers+pid) 31 | 32 | class SolverDeter(Solver): 33 | 34 | def __init__(self, C): 35 | super().__init__(C) 36 | 37 | if self.config.get('deterministic', False): 38 | if self.config.get('cudnn_deterministic', True): 39 | cudnn.deterministic = True 40 | cudnn.benchmark = False 41 | else: 42 | cudnn.benchmark = True 43 | seed = self.config.get('random_seed', 0) 44 | worker_rank = self.config.get('worker_rank', False) 45 | if worker_rank: 46 | worker_init = WorkerInit(self.C.rank, self.config.workers) 47 | else: 48 | worker_init = WorkerInit(0, 0) 49 | self.worker_init_fn = worker_init.func 50 | random.seed(seed) 51 | np.random.seed(seed) 52 | torch.manual_seed(seed) 53 | torch.cuda.manual_seed(seed) 54 | dist.barrier() 55 | if self.C.rank == 0: 56 | self.logger.info(f'deterministic mode, seed: {seed}, worker_rank: {worker_rank},\ 57 | cudnn_deterministic: {self.config.get("cudnn_deterministic", True)}') 58 | dist.barrier() 59 | else: 60 | self.worker_init_fn = None 61 | 62 | def create_dataloader(self): 63 | config = self.config 64 | ginfo = self.ginfo 65 | 66 | self.sampler = DistributedGivenIterationSampler( 67 | self.dataset, config.max_iter, config.sampler.batch_size, 68 | world_size=ginfo.task_size, rank=ginfo.task_rank, 69 | last_iter=self.last_iter, 70 | shuffle_strategy=config.sampler.shuffle_strategy, 71 | random_seed=ginfo.task_random_seed, 72 | ret_save_path=config.sampler.get('ret_save_path', None)) 73 | 74 | 75 | self.loader = DataLoader(self.dataset, batch_size=config.sampler.batch_size, 76 | shuffle=False, num_workers=config.workers, 77 | pin_memory=False, sampler=self.sampler, worker_init_fn=self.worker_init_fn) 78 | -------------------------------------------------------------------------------- /core/solvers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/solvers/utils/__init__.py -------------------------------------------------------------------------------- /core/solvers/utils/attr_tester_dev.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import os, sys 4 | import random 5 | import datetime 6 | import time 7 | from typing import List 8 | import json 9 | import logging 10 | import numpy as np 11 | from copy import deepcopy 12 | from .seg_tester_dev import DatasetEvaluator 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.parallel 17 | from torch.optim import lr_scheduler 18 | import torch.backends.cudnn as cudnn 19 | from core import distributed_utils as dist 20 | import torch.optim 21 | import torch.multiprocessing as mp 22 | import torch.utils.data 23 | 24 | class PedAttrEvaluator(DatasetEvaluator): 25 | 26 | def __init__( 27 | self, 28 | dataset_name, 29 | config, 30 | distributed=True, 31 | output_dir=None, 32 | ): 33 | 34 | self._logger = logging.getLogger(__name__) 35 | 36 | self._dataset_name = dataset_name 37 | self._distributed = distributed 38 | self._output_dir = output_dir 39 | 40 | self._cpu_device = torch.device("cpu") 41 | self.threshold = 0.5 42 | 43 | def reset(self): 44 | self.gt_label = [] 45 | self.preds_probs = [] 46 | 47 | def process(self, inputs, outputs): 48 | gt_label = inputs['label'] 49 | gt_label[gt_label == -1] = 0 50 | preds_probs = outputs['logit'].sigmoid() 51 | self.gt_label.append(gt_label) 52 | self.preds_probs.append(preds_probs) 53 | 54 | @staticmethod 55 | def all_gather(data, group=0): 56 | assert dist.get_world_size() == 1, f"distributed eval unsupported yet, uncertain if we can use torch.dist with link jointly" 57 | if dist.get_world_size() == 1: 58 | return [data] 59 | 60 | world_size = dist.get_world_size() 61 | tensors_gather = [torch.ones_like(data) for _ in range(world_size)] 62 | dist.allgather(tensors_gather, data, group=group) 63 | return tensors_gather 64 | 65 | def evaluate(self): 66 | gt_label = torch.cat(self.gt_label, dim=0) 67 | preds_probs = torch.cat(self.preds_probs, dim=0) 68 | 69 | if self._distributed: 70 | dist.barrier() 71 | 72 | gt_label = self.all_gather(gt_label) 73 | preds_probs = self.all_gather(preds_probs) 74 | 75 | if dist.get_rank() != 0: 76 | return 77 | 78 | gt_label = torch.cat(gt_label, dim=0) 79 | preds_probs = torch.cat(preds_probs, dim=0) 80 | preds_probs = preds_probs.cpu().numpy() 81 | gt_label = gt_label.cpu().numpy() 82 | 83 | pred_label = preds_probs > self.threshold 84 | 85 | eps = 1e-20 86 | result = {} 87 | 88 | ############################### 89 | # label metrics 90 | # TP + FN 91 | gt_pos = np.sum((gt_label == 1), axis=0).astype(float) 92 | # TN + FP 93 | gt_neg = np.sum((gt_label == 0), axis=0).astype(float) 94 | # TP 95 | true_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=0).astype(float) 96 | # TN 97 | true_neg = np.sum((gt_label == 0) * (pred_label == 0), axis=0).astype(float) 98 | # FP 99 | false_pos = np.sum(((gt_label == 0) * (pred_label == 1)), axis=0).astype(float) 100 | # FN 101 | false_neg = np.sum(((gt_label == 1) * (pred_label == 0)), axis=0).astype(float) 102 | 103 | label_pos_recall = 1.0 * true_pos / (gt_pos + eps) # true positive 104 | label_neg_recall = 1.0 * true_neg / (gt_neg + eps) # true negative 105 | # mean accuracy 106 | label_ma = (label_pos_recall + label_neg_recall) / 2 107 | 108 | result['label_pos_recall'] = label_pos_recall 109 | result['label_neg_recall'] = label_neg_recall 110 | result['label_prec'] = true_pos / (true_pos + false_pos + eps) 111 | result['label_acc'] = true_pos / (true_pos + false_pos + false_neg + eps) 112 | result['label_f1'] = 2 * result['label_prec'] * result['label_pos_recall'] / ( 113 | result['label_prec'] + result['label_pos_recall'] + eps) 114 | 115 | result['label_ma'] = label_ma 116 | result['ma'] = np.mean(label_ma) 117 | 118 | ################ 119 | # instance metrics 120 | gt_pos = np.sum((gt_label == 1), axis=1).astype(float) 121 | true_pos = np.sum((pred_label == 1), axis=1).astype(float) 122 | # true positive 123 | intersect_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=1).astype(float) 124 | # IOU 125 | union_pos = np.sum(((gt_label == 1) + (pred_label == 1)), axis=1).astype(float) 126 | 127 | instance_acc = intersect_pos / (union_pos + eps) 128 | instance_prec = intersect_pos / (true_pos + eps) 129 | instance_recall = intersect_pos / (gt_pos + eps) 130 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps) 131 | 132 | instance_acc = np.mean(instance_acc) 133 | instance_prec = np.mean(instance_prec) 134 | instance_recall = np.mean(instance_recall) 135 | instance_f1 = np.mean(instance_f1) 136 | 137 | result['instance_acc'] = instance_acc 138 | result['instance_prec'] = instance_prec 139 | result['instance_recall'] = instance_recall 140 | result['instance_f1'] = instance_f1 141 | 142 | result['error_num'], result['fn_num'], result['fp_num'] = false_pos + false_neg, false_neg, false_pos 143 | 144 | result['pos_recall'] = np.mean(label_pos_recall) 145 | result['neg_recall'] = np.mean(label_neg_recall) 146 | return result 147 | -------------------------------------------------------------------------------- /core/solvers/utils/nms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch 3 | # Original licence: Copyright (c) Microsoft, under the MIT License. 4 | # ------------------------------------------------------------------------------ 5 | 6 | import numpy as np 7 | 8 | 9 | def nms(dets, thr): 10 | """Greedily select boxes with high confidence and overlap <= thr. 11 | 12 | Args: 13 | dets: [[x1, y1, x2, y2, score]]. 14 | thr: Retain overlap < thr. 15 | 16 | Returns: 17 | list: Indexes to keep. 18 | """ 19 | if len(dets) == 0: 20 | return [] 21 | 22 | x1 = dets[:, 0] 23 | y1 = dets[:, 1] 24 | x2 = dets[:, 2] 25 | y2 = dets[:, 3] 26 | scores = dets[:, 4] 27 | 28 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 29 | order = scores.argsort()[::-1] 30 | 31 | keep = [] 32 | while len(order) > 0: 33 | i = order[0] 34 | keep.append(i) 35 | xx1 = np.maximum(x1[i], x1[order[1:]]) 36 | yy1 = np.maximum(y1[i], y1[order[1:]]) 37 | xx2 = np.minimum(x2[i], x2[order[1:]]) 38 | yy2 = np.minimum(y2[i], y2[order[1:]]) 39 | 40 | w = np.maximum(0.0, xx2 - xx1 + 1) 41 | h = np.maximum(0.0, yy2 - yy1 + 1) 42 | inter = w * h 43 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 44 | 45 | inds = np.where(ovr <= thr)[0] 46 | order = order[inds + 1] 47 | 48 | return keep 49 | 50 | 51 | def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None): 52 | """Calculate oks ious. 53 | 54 | Args: 55 | g: Ground truth keypoints. 56 | d: Detected keypoints. 57 | a_g: Area of the ground truth object. 58 | a_d: Area of the detected object. 59 | sigmas: standard deviation of keypoint labelling. 60 | vis_thr: threshold of the keypoint visibility. 61 | 62 | Returns: 63 | list: The oks ious. 64 | """ 65 | if sigmas is None: 66 | sigmas = np.array([ 67 | .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, 68 | .87, .87, .89, .89 69 | ]) / 10.0 70 | vars = (sigmas * 2)**2 71 | xg = g[0::3] 72 | yg = g[1::3] 73 | vg = g[2::3] 74 | ious = np.zeros(len(d), dtype=np.float32) 75 | for n_d in range(0, len(d)): 76 | xd = d[n_d, 0::3] 77 | yd = d[n_d, 1::3] 78 | vd = d[n_d, 2::3] 79 | dx = xd - xg 80 | dy = yd - yg 81 | e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2 82 | if vis_thr is not None: 83 | ind = list(vg > vis_thr) and list(vd > vis_thr) 84 | e = e[ind] 85 | ious[n_d] = np.sum(np.exp(-e)) / len(e) if len(e) != 0 else 0.0 86 | return ious 87 | 88 | 89 | def oks_nms(kpts_db, thr, sigmas=None, vis_thr=None): 90 | """OKS NMS implementations. 91 | 92 | Args: 93 | kpts_db: keypoints. 94 | thr: Retain overlap < thr. 95 | sigmas: standard deviation of keypoint labelling. 96 | vis_thr: threshold of the keypoint visibility. 97 | 98 | Returns: 99 | np.ndarray: indexes to keep. 100 | """ 101 | if len(kpts_db) == 0: 102 | return [] 103 | 104 | scores = np.array([k['score'] for k in kpts_db]) 105 | kpts = np.array([k['keypoints'].flatten() for k in kpts_db]) 106 | areas = np.array([k['area'] for k in kpts_db]) 107 | 108 | order = scores.argsort()[::-1] 109 | 110 | keep = [] 111 | while len(order) > 0: 112 | i = order[0] 113 | keep.append(i) 114 | 115 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], 116 | sigmas, vis_thr) 117 | 118 | inds = np.where(oks_ovr <= thr)[0] 119 | order = order[inds + 1] 120 | 121 | keep = np.array(keep) 122 | 123 | return keep 124 | 125 | 126 | def _rescore(overlap, scores, thr, type='gaussian'): 127 | """Rescoring mechanism gaussian or linear. 128 | 129 | Args: 130 | overlap: calculated ious 131 | scores: target scores. 132 | thr: retain oks overlap < thr. 133 | type: 'gaussian' or 'linear' 134 | 135 | Returns: 136 | np.ndarray: indexes to keep 137 | """ 138 | assert len(overlap) == len(scores) 139 | assert type in ['gaussian', 'linear'] 140 | 141 | if type == 'linear': 142 | inds = np.where(overlap >= thr)[0] 143 | scores[inds] = scores[inds] * (1 - overlap[inds]) 144 | else: 145 | scores = scores * np.exp(-overlap**2 / thr) 146 | 147 | return scores 148 | 149 | 150 | def soft_oks_nms(kpts_db, thr, max_dets=20, sigmas=None, vis_thr=None): 151 | """Soft OKS NMS implementations. 152 | 153 | Args: 154 | kpts_db 155 | thr: retain oks overlap < thr. 156 | max_dets: max number of detections to keep. 157 | sigmas: Keypoint labelling uncertainty. 158 | 159 | Returns: 160 | np.ndarray: indexes to keep. 161 | """ 162 | if len(kpts_db) == 0: 163 | return [] 164 | 165 | scores = np.array([k['score'] for k in kpts_db]) 166 | kpts = np.array([k['keypoints'].flatten() for k in kpts_db]) 167 | areas = np.array([k['area'] for k in kpts_db]) 168 | 169 | order = scores.argsort()[::-1] 170 | scores = scores[order] 171 | 172 | keep = np.zeros(max_dets, dtype=np.intp) 173 | keep_cnt = 0 174 | while len(order) > 0 and keep_cnt < max_dets: 175 | i = order[0] 176 | 177 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], 178 | sigmas, vis_thr) 179 | 180 | order = order[1:] 181 | scores = _rescore(oks_ovr, scores[1:], thr) 182 | 183 | tmp = scores.argsort()[::-1] 184 | order = order[tmp] 185 | scores = scores[tmp] 186 | 187 | keep[keep_cnt] = i 188 | keep_cnt += 1 189 | 190 | keep = keep[:keep_cnt] 191 | 192 | return keep 193 | -------------------------------------------------------------------------------- /core/solvers/utils/seg_tester_dev.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import itertools 3 | import json 4 | import logging 5 | import os 6 | from collections import OrderedDict 7 | 8 | import numpy as np 9 | import pycocotools.mask as mask_util 10 | import torch 11 | 12 | from core import distributed_utils as dist 13 | 14 | 15 | class DatasetEvaluator: 16 | """ 17 | Base class for a dataset evaluator. 18 | 19 | The function :func:`inference_on_dataset` runs the model over 20 | all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. 21 | 22 | This class will accumulate information of the inputs/outputs (by :meth:`process`), 23 | and produce evaluation results in the end (by :meth:`evaluate`). 24 | """ 25 | 26 | def reset(self): 27 | """ 28 | Preparation for a new round of evaluation. 29 | Should be called before starting a round of evaluation. 30 | """ 31 | pass 32 | 33 | def process(self, inputs, outputs): 34 | """ 35 | Process the pair of inputs and outputs. 36 | If they contain batches, the pairs can be consumed one-by-one using `zip`: 37 | 38 | .. code-block:: python 39 | 40 | for input_, output in zip(inputs, outputs): 41 | # do evaluation on single input/output pair 42 | ... 43 | 44 | Args: 45 | inputs (list): the inputs that's used to call the model. 46 | outputs (list): the return value of `model(inputs)` 47 | """ 48 | pass 49 | 50 | def evaluate(self): 51 | """ 52 | Evaluate/summarize the performance, after processing all input/output pairs. 53 | 54 | Returns: 55 | dict: 56 | A new evaluator class can return a dict of arbitrary format 57 | as long as the user can process the results. 58 | In our train_net.py, we expect the following format: 59 | 60 | * key: the name of the task (e.g., bbox) 61 | * value: a dict of {metric name: score}, e.g.: {"AP50": 80} 62 | """ 63 | pass 64 | 65 | 66 | class SemSegEvaluator(DatasetEvaluator): 67 | """ 68 | Evaluate semantic segmentation metrics. 69 | """ 70 | 71 | def __init__( 72 | self, 73 | dataset_name, 74 | config, 75 | distributed=True, 76 | output_dir=None, 77 | ): 78 | """ 79 | Args: 80 | dataset_name (str): name of the dataset to be evaluated. 81 | distributed (bool): if True, will collect results from all ranks for evaluation. 82 | Otherwise, will evaluate the results in the current process. 83 | output_dir (str): an output directory to dump results. 84 | num_classes, ignore_label: deprecated argument 85 | """ 86 | self._logger = logging.getLogger(__name__) 87 | 88 | self._dataset_name = dataset_name 89 | self._distributed = distributed 90 | self._output_dir = output_dir 91 | 92 | self._cpu_device = torch.device("cpu") 93 | 94 | self._class_names = config.dataset.kwargs.cfg.label_list[1:] 95 | self._num_classes = len(self._class_names) 96 | assert self._num_classes == config.dataset.kwargs.cfg.num_classes, f"{self._num_classes} != {config.dataset.kwargs.cfg.num_classes}" 97 | self._contiguous_id_to_dataset_id = {i: k for i, k in enumerate(self._class_names)} # Dict that maps contiguous training ids to COCO category ids 98 | self._ignore_label = config.dataset.kwargs.cfg.ignore_value 99 | 100 | def reset(self): 101 | self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) 102 | self._predictions = [] 103 | 104 | def process(self, inputs, outputs): 105 | """ 106 | Args: 107 | inputs: the inputs to a model. 108 | It is a list of dicts. Each dict corresponds to an image and 109 | contains keys like "height", "width", "file_name". 110 | outputs: the outputs of a model. It is either list of semantic segmentation predictions 111 | (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic 112 | segmentation prediction in the same format. 113 | """ 114 | # for input, output in zip(inputs, outputs): 115 | input, output = inputs, outputs[0] 116 | 117 | output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) 118 | pred = np.array(output, dtype=np.int) 119 | 120 | gt = input["gt"] 121 | gt[gt == self._ignore_label] = self._num_classes 122 | 123 | self._conf_matrix += np.bincount( 124 | (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1), 125 | minlength=self._conf_matrix.size, 126 | ).reshape(self._conf_matrix.shape) 127 | 128 | self._predictions.extend(self.encode_json_sem_seg(pred, input["filename"])) 129 | 130 | @staticmethod 131 | def all_gather(data, group=None): 132 | assert dist.get_world_size() == 1, f"distributed eval unsupported yet, uncertain if we can use torch.dist with link jointly" 133 | if dist.get_world_size() == 1: 134 | return [data] 135 | 136 | 137 | def evaluate(self): 138 | """ 139 | Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): 140 | 141 | * Mean intersection-over-union averaged across classes (mIoU) 142 | * Frequency Weighted IoU (fwIoU) 143 | * Mean pixel accuracy averaged across classes (mACC) 144 | * Pixel Accuracy (pACC) 145 | """ 146 | 147 | if self._distributed: 148 | dist.barrier() 149 | 150 | conf_matrix_list = self.all_gather(self._conf_matrix) 151 | self._predictions = self.all_gather(self._predictions) 152 | self._predictions = list(itertools.chain(*self._predictions)) 153 | if dist.get_rank() != 0: 154 | return 155 | 156 | self._conf_matrix = np.zeros_like(self._conf_matrix) 157 | for conf_matrix in conf_matrix_list: 158 | self._conf_matrix += conf_matrix 159 | 160 | if self._output_dir: 161 | os.makedirs(self._output_dir, exist_ok=True) 162 | file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") 163 | with open(file_path, "w") as f: 164 | f.write(json.dumps(self._predictions)) 165 | 166 | acc = np.full(self._num_classes, np.nan, dtype=np.float) 167 | iou = np.full(self._num_classes, np.nan, dtype=np.float) 168 | tp = self._conf_matrix.diagonal()[:-1].astype(np.float) 169 | pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) 170 | class_weights = pos_gt / np.sum(pos_gt) 171 | pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) 172 | acc_valid = pos_gt > 0 173 | acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] 174 | iou_valid = (pos_gt + pos_pred) > 0 175 | union = pos_gt + pos_pred - tp 176 | iou[acc_valid] = tp[acc_valid] / union[acc_valid] 177 | macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) 178 | miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) 179 | fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) 180 | pacc = np.sum(tp) / np.sum(pos_gt) 181 | 182 | res = {} 183 | res["mIoU"] = 100 * miou 184 | res["fwIoU"] = 100 * fiou 185 | for i, name in enumerate(self._class_names): 186 | res["IoU-{}".format(name)] = 100 * iou[i] 187 | res["mACC"] = 100 * macc 188 | res["pACC"] = 100 * pacc 189 | for i, name in enumerate(self._class_names): 190 | res["ACC-{}".format(name)] = 100 * acc[i] 191 | 192 | if self._output_dir: 193 | file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") 194 | with open(file_path, "wb") as f: 195 | torch.save(res, f) 196 | results = OrderedDict({"sem_seg": res}) 197 | self._logger.info(results) 198 | return results 199 | 200 | def encode_json_sem_seg(self, sem_seg, input_file_name): 201 | """ 202 | Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. 203 | See http://cocodataset.org/#format-results 204 | """ 205 | json_list = [] 206 | for label in np.unique(sem_seg): 207 | if self._contiguous_id_to_dataset_id is not None: 208 | assert ( 209 | label in self._contiguous_id_to_dataset_id 210 | ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) 211 | dataset_id = self._contiguous_id_to_dataset_id[label] 212 | else: 213 | dataset_id = int(label) 214 | mask = (sem_seg == label).astype(np.uint8) 215 | mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] 216 | mask_rle["counts"] = mask_rle["counts"].decode("utf-8") 217 | json_list.append( 218 | {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} 219 | ) 220 | return json_list 221 | -------------------------------------------------------------------------------- /core/testers/__init__.py: -------------------------------------------------------------------------------- 1 | from .reid_tester import ReIDTester 2 | from ..solvers.solver_multitask_dev import TesterMultiTaskDev 3 | 4 | def tester_entry(C_train, C_test): 5 | return globals()[C_test.config['common']['tester']['type']](C_train, C_test) 6 | -------------------------------------------------------------------------------- /core/testers/tester.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import torch.backends.cudnn as cudnn 5 | from core import distributed_utils as dist 6 | from torch.utils.data import DataLoader 7 | from tensorboardX import SummaryWriter 8 | from easydict import EasyDict as edict 9 | 10 | import numpy as np 11 | import random 12 | import copy 13 | 14 | import core 15 | import core.models.decoders as decoders 16 | import core.models.backbones as backbones 17 | import core.models.necks as necks 18 | import core.data.test_datasets as datasets 19 | from core.models.model_entry import model_entry 20 | from core.utils import (AverageMeter, accuracy, load_state, load_last_iter, 21 | save_state, create_logger, IterLRScheduler, 22 | count_parameters_num, freeze_bn, 23 | change_tensor_cuda, sync_print) 24 | from core.distributed_utils import DistModule, DistributedSequentialSampler, simple_group_split, vreduce, vgather 25 | from dict_recursive_update import recursive_update 26 | 27 | class Tester(object): 28 | def __init__(self, C_train, C_test): 29 | train_config = edict(C_train.config['common']) 30 | ginfo = C_train.ginfo 31 | config = train_config 32 | 33 | if C_test.config.get('common') is not None: 34 | recursive_update(config, C_test.config.get('common')) 35 | config = edict(config) 36 | if 'out_dir' in config: 37 | self.out_dir = config['out_dir']+'test_results/' 38 | else: 39 | self.out_dir = "./test_results/" 40 | 41 | if 'expname' in config: 42 | self.tb_path = '{}events/{}'.format(self.out_dir, config['expname']) 43 | self.ckpt_path = '{}checkpoints/{}'.format(self.out_dir, config['expname']) 44 | self.logs_path = '{}logs/{}'.format(self.out_dir, config['expname']) 45 | else: 46 | save_path = config.get('save_path', os.path.dirname(os.path.abspath(C_train.config_file))) 47 | self.save_path = save_path 48 | self.tb_path = '{}/test_results/events'.format(save_path) 49 | self.ckpt_path = '{}/test_results/checkpoints'.format(save_path) 50 | self.logs_path = '{}/test_results/logs'.format(save_path) 51 | if C_train.rank == 0: 52 | os.makedirs(self.tb_path, exist_ok=True) 53 | os.makedirs(self.ckpt_path, exist_ok=True) 54 | os.makedirs(self.logs_path, exist_ok=True) 55 | self.tb_logger = SummaryWriter(self.tb_path) 56 | else: 57 | while not os.path.exists(self.logs_path): 58 | time.sleep(1) 59 | 60 | if ginfo.task_rank == 0: 61 | self.logger = create_logger('global_logger', '{}/log_task_{}.txt'.format(self.logs_path, ginfo.task_id)) 62 | 63 | self.sync = config.get('sync', True) 64 | self.C_train = C_train 65 | self.C_test = C_test 66 | self.config = config 67 | self.ginfo = ginfo 68 | 69 | # change tensor .cuda 70 | change_tensor_cuda() 71 | 72 | self.tmp = edict() 73 | 74 | ## random seed setting 75 | rng = np.random.RandomState(self.config.get('random_seed', 0)) 76 | self.randomseed_pool = rng.randint(999999, size=config.max_iter) 77 | 78 | def create_dataset(self): 79 | ginfo = self.ginfo 80 | config = self.config 81 | dataset_args = config.dataset['kwargs'] 82 | dataset_args['ginfo'] = ginfo 83 | self.dataset = datasets.dataset_entry(config.dataset) 84 | dist.barrier() 85 | 86 | def create_dataloader(self): 87 | raise NotImplementedError 88 | 89 | def create_model(self): 90 | config = self.config 91 | 92 | backbone_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1) 93 | assert backbone_bn_group_size == 1, 'other bn group size not support!' 94 | backbone_bn_group_comm = self.ginfo.backbone_share_group 95 | 96 | ## build backbone 97 | config.backbone['kwargs']['bn_group'] = backbone_bn_group_comm 98 | backbone_module = backbones.backbone_entry(config.backbone) 99 | count_parameters_num(backbone_module) 100 | 101 | ## build necks 102 | neck_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1) 103 | assert neck_bn_group_size == 1, 'other bn group size not support!' 104 | neck_bn_group_comm = self.ginfo.neck_share_group 105 | 106 | neck_args = config.neck['kwargs'] 107 | neck_args['backbone'] = backbone_module 108 | neck_args['bn_group'] = neck_bn_group_comm 109 | neck_module = necks.neck_entry(config.neck) 110 | 111 | ## add decoder 112 | decoder_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1) 113 | assert decoder_bn_group_size == 1, 'other bn group size not support!' 114 | decoder_bn_group_comm = self.ginfo.decoder_share_group 115 | 116 | decoder_args = config.decoder['kwargs'] 117 | decoder_args['backbone'] = backbone_module 118 | decoder_args['neck'] = neck_module 119 | decoder_args['bn_group'] = decoder_bn_group_comm 120 | decoder_module = decoders.decoder_entry(config.decoder) 121 | 122 | # build 123 | model = model_entry(backbone_module, neck_module, decoder_module) 124 | 125 | ## distributed 126 | model.cuda() 127 | 128 | if self.C_train.rank == 0: 129 | print(model) 130 | 131 | model = DistModule(model, sync=self.sync, task_grp=self.ginfo.group, \ 132 | share_backbone_group=self.ginfo.backbone_share_group, \ 133 | share_neck_group=self.ginfo.neck_share_group, \ 134 | share_decoder_group=self.ginfo.decoder_share_group) 135 | 136 | self.model = model 137 | 138 | def load(self, args): 139 | if args.load_path == '': 140 | return 141 | if args.recover: 142 | self.last_iter = load_state(args.load_path.replace('ckpt_task_', 'ckpt_task{}_'.format(\ 143 | self.ginfo.task_id)), self.model, recover=args.recover) 144 | self.last_iter -= 1 145 | else: 146 | if args.load_single: 147 | load_state(args.load_path, self.model, ignore=args.ignore) 148 | else: 149 | load_state(args.load_path.replace('ckpt_task_', 'ckpt_task{}_'.format(\ 150 | self.ginfo.task_id)), self.model, ignore=args.ignore) 151 | 152 | def initialize(self, args): 153 | 154 | # create dataset to get num_classes 155 | self.create_dataset() 156 | self.create_model() 157 | 158 | self.load_args = args 159 | 160 | self.load(args) 161 | self.create_dataloader() 162 | 163 | def pre_run(self): 164 | tmp = self.tmp 165 | tmp.vbatch_time = AverageMeter(10) 166 | tmp.vdata_time = AverageMeter(10) 167 | tmp.vtop1 = AverageMeter(10) 168 | tmp.top1_list = [torch.Tensor(1).cuda() for _ in range(self.C_train.world_size)] 169 | 170 | self.model.eval() 171 | 172 | def prepare_data(self): 173 | tmp = self.tmp 174 | tmp.input_var = dict() 175 | 176 | for k,v in tmp.input.items(): 177 | if not isinstance(v, list): 178 | tmp.input_var[k] = torch.autograd.Variable(v.cuda()) 179 | 180 | def _set_randomseed(self, seed): 181 | random.seed(seed) 182 | np.random.seed(seed) 183 | torch.manual_seed(seed) 184 | torch.cuda.manual_seed(seed) 185 | torch.cuda.manual_seed_all(seed) 186 | -------------------------------------------------------------------------------- /core/testers/tester_deter.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | from core import distributed_utils as dist 7 | 8 | from .tester import Tester 9 | 10 | class WorkerInit(object): 11 | def __init__(self, rank, num_workers): 12 | self.rank = rank 13 | self.num_workers = num_workers 14 | def func(self, pid): 15 | print(f'[rank{self.rank}] setting worker seed {self.rank*self.num_workers+pid}', flush=True) 16 | np.random.seed(self.rank*self.num_workers+pid) 17 | 18 | class TesterDeter(Tester): 19 | 20 | def __init__(self, C_train, C_test): 21 | super().__init__(C_train, C_test) 22 | 23 | if self.config.get('deterministic', False): 24 | if self.config.get('cudnn_deterministic', True): 25 | cudnn.deterministic = True 26 | cudnn.benchmark = False 27 | else: 28 | cudnn.benchmark = True 29 | seed = self.config.get('random_seed', 0) 30 | worker_rank = self.config.get('worker_rank', False) 31 | if worker_rank: 32 | worker_init = WorkerInit(self.C_train.rank, self.config.workers) 33 | else: 34 | worker_init = WorkerInit(0, 0) 35 | self.worker_init_fn = worker_init.func 36 | random.seed(seed) 37 | np.random.seed(seed) 38 | torch.manual_seed(seed) 39 | torch.cuda.manual_seed(seed) 40 | if self.C_train.rank == 0: 41 | self.logger.info(f'deterministic mode, seed: {seed}, worker_rank: {worker_rank},\ 42 | cudnn_deterministic: {self.config.get("cudnn_deterministic", True)}') 43 | dist.barrier() 44 | else: 45 | self.worker_init_fn = None 46 | -------------------------------------------------------------------------------- /core/testers/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | from core.testers.utils.reranking import re_ranking 5 | 6 | 7 | def euclidean_distance(qf, gf): 8 | m = qf.shape[0] 9 | n = gf.shape[0] 10 | dist_mat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ 11 | torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() 12 | dist_mat.addmm_(1, -2, qf, gf.t()) 13 | return dist_mat.cpu().numpy() 14 | 15 | def cosine_similarity(qf, gf): 16 | epsilon = 0.00001 17 | dist_mat = qf.mm(gf.t()) 18 | qf_norm = torch.norm(qf, p=2, dim=1, keepdim=True) # mx1 19 | gf_norm = torch.norm(gf, p=2, dim=1, keepdim=True) # nx1 20 | qg_normdot = qf_norm.mm(gf_norm.t()) 21 | 22 | dist_mat = dist_mat.mul(1 / qg_normdot).cpu().numpy() 23 | dist_mat = np.clip(dist_mat, -1 + epsilon, 1 - epsilon) 24 | dist_mat = np.arccos(dist_mat) 25 | return dist_mat 26 | 27 | 28 | def eval_func(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50): 29 | """Evaluation with market1501 metric 30 | Key: for each query identity, its gallery images from the same camera view are discarded. 31 | """ 32 | num_q, num_g = distmat.shape 33 | # distmat g 34 | # q 1 3 2 4 35 | # 4 1 2 3 36 | if num_g < max_rank: 37 | max_rank = num_g 38 | print("Note: number of gallery samples is quite small, got {}".format(num_g)) 39 | indices = np.argsort(distmat, axis=1) 40 | # 0 2 1 3 41 | # 1 2 3 0 42 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) 43 | # compute cmc curve for each query 44 | all_cmc = [] 45 | all_AP = [] 46 | num_valid_q = 0. # number of valid query 47 | for q_idx in range(num_q): 48 | # get query pid and camid 49 | q_pid = q_pids[q_idx] 50 | q_camid = q_camids[q_idx] 51 | 52 | # remove gallery samples that have the same pid and camid with query 53 | order = indices[q_idx] # select one row 54 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) 55 | keep = np.invert(remove) 56 | 57 | # compute cmc curve 58 | # binary vector, positions with value 1 are correct matches 59 | orig_cmc = matches[q_idx][keep] 60 | if not np.any(orig_cmc): 61 | # this condition is true when query identity does not appear in gallery 62 | continue 63 | 64 | cmc = orig_cmc.cumsum() 65 | cmc[cmc > 1] = 1 66 | 67 | all_cmc.append(cmc[:max_rank]) 68 | num_valid_q += 1. 69 | 70 | # compute average precision 71 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision 72 | num_rel = orig_cmc.sum() 73 | tmp_cmc = orig_cmc.cumsum() 74 | #tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)] 75 | y = np.arange(1, tmp_cmc.shape[0] + 1) * 1.0 76 | tmp_cmc = tmp_cmc / y 77 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc 78 | AP = tmp_cmc.sum() / num_rel 79 | all_AP.append(AP) 80 | 81 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery" 82 | 83 | all_cmc = np.asarray(all_cmc).astype(np.float32) 84 | all_cmc = all_cmc.sum(0) / num_valid_q 85 | mAP = np.mean(all_AP) 86 | 87 | return all_cmc, mAP 88 | 89 | 90 | class R1_mAP_eval(): 91 | def __init__(self, num_query, max_rank=50, feat_norm=True, reranking=False): 92 | super(R1_mAP_eval, self).__init__() 93 | self.num_query = num_query 94 | self.max_rank = max_rank 95 | self.feat_norm = feat_norm 96 | self.reranking = reranking 97 | 98 | def reset(self): 99 | self.feats = [] 100 | self.pids = [] 101 | self.camids = [] 102 | 103 | def update(self, output): # called once for each batch 104 | feat, pid, camid = output 105 | self.feats.append(feat.cpu()) 106 | self.pids.extend(np.asarray(pid)) 107 | self.camids.extend(np.asarray(camid)) 108 | 109 | def compute(self): # called after each epoch 110 | feats = torch.cat(self.feats, dim=0) 111 | if self.feat_norm: 112 | print("The test feature is normalized") 113 | feats = torch.nn.functional.normalize(feats, dim=1, p=2) # along channel 114 | # query 115 | qf = feats[:self.num_query] 116 | q_pids = np.asarray(self.pids[:self.num_query]) 117 | q_camids = np.asarray(self.camids[:self.num_query]) 118 | # gallery 119 | gf = feats[self.num_query:] 120 | g_pids = np.asarray(self.pids[self.num_query:]) 121 | 122 | g_camids = np.asarray(self.camids[self.num_query:]) 123 | if self.reranking: 124 | print('=> Enter reranking') 125 | # distmat = re_ranking(qf, gf, k1=20, k2=6, lambda_value=0.3) 126 | distmat = re_ranking(qf, gf, k1=50, k2=15, lambda_value=0.3) 127 | 128 | else: 129 | print('=> Computing DistMat with euclidean_distance') 130 | distmat = euclidean_distance(qf, gf) 131 | cmc, mAP = eval_func(distmat, q_pids, g_pids, q_camids, g_camids) 132 | 133 | return cmc, mAP, distmat, self.pids, self.camids, qf, gf 134 | -------------------------------------------------------------------------------- /core/testers/utils/reranking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri, 25 May 2018 20:29:09 5 | 6 | 7 | """ 8 | 9 | """ 10 | CVPR2017 paper:Zhong Z, Zheng L, Cao D, et al. Re-ranking Person Re-identification with k-reciprocal Encoding[J]. 2017. 11 | url:http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf 12 | Matlab version: https://github.com/zhunzhong07/person-re-ranking 13 | """ 14 | 15 | """ 16 | API 17 | 18 | probFea: all feature vectors of the query set (torch tensor) 19 | probFea: all feature vectors of the gallery set (torch tensor) 20 | k1,k2,lambda: parameters, the original paper is (k1=20,k2=6,lambda=0.3) 21 | MemorySave: set to 'True' when using MemorySave mode 22 | Minibatch: avaliable when 'MemorySave' is 'True' 23 | """ 24 | 25 | import numpy as np 26 | import torch 27 | 28 | 29 | def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_local=False): 30 | # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor 31 | query_num = probFea.size(0) 32 | all_num = query_num + galFea.size(0) 33 | if only_local: 34 | original_dist = local_distmat 35 | else: 36 | feat = torch.cat([probFea, galFea]) 37 | # print('using GPU to compute original distance') 38 | distmat = torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num) + \ 39 | torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t() 40 | distmat.addmm_(1, -2, feat, feat.t()) 41 | original_dist = distmat.cpu().numpy() 42 | del feat 43 | if not local_distmat is None: 44 | original_dist = original_dist + local_distmat 45 | gallery_num = original_dist.shape[0] 46 | original_dist = np.transpose(original_dist / np.max(original_dist, axis=0)) 47 | V = np.zeros_like(original_dist).astype(np.float16) 48 | initial_rank = np.argsort(original_dist).astype(np.int32) 49 | 50 | # print('starting re_ranking') 51 | for i in range(all_num): 52 | # k-reciprocal neighbors 53 | forward_k_neigh_index = initial_rank[i, :k1 + 1] 54 | backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] 55 | fi = np.where(backward_k_neigh_index == i)[0] 56 | k_reciprocal_index = forward_k_neigh_index[fi] 57 | k_reciprocal_expansion_index = k_reciprocal_index 58 | for j in range(len(k_reciprocal_index)): 59 | candidate = k_reciprocal_index[j] 60 | candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2)) + 1] 61 | candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index, 62 | :int(np.around(k1 / 2)) + 1] 63 | fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] 64 | candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] 65 | if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len( 66 | candidate_k_reciprocal_index): 67 | k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index) 68 | 69 | k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) 70 | weight = np.exp(-original_dist[i, k_reciprocal_expansion_index]) 71 | V[i, k_reciprocal_expansion_index] = weight / np.sum(weight) 72 | original_dist = original_dist[:query_num, ] 73 | if k2 != 1: 74 | V_qe = np.zeros_like(V, dtype=np.float16) 75 | for i in range(all_num): 76 | V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0) 77 | V = V_qe 78 | del V_qe 79 | del initial_rank 80 | invIndex = [] 81 | for i in range(gallery_num): 82 | invIndex.append(np.where(V[:, i] != 0)[0]) 83 | 84 | jaccard_dist = np.zeros_like(original_dist, dtype=np.float16) 85 | 86 | for i in range(query_num): 87 | temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16) 88 | indNonZero = np.where(V[i, :] != 0)[0] 89 | indImages = [invIndex[ind] for ind in indNonZero] 90 | for j in range(len(indNonZero)): 91 | temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], 92 | V[indImages[j], indNonZero[j]]) 93 | jaccard_dist[i] = 1 - temp_min / (2 - temp_min) 94 | 95 | final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value 96 | del original_dist 97 | del V 98 | del jaccard_dist 99 | final_dist = final_dist[:query_num, query_num:] 100 | return final_dist 101 | 102 | -------------------------------------------------------------------------------- /experiments/unihcp/release/batch_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #set -x 3 | 4 | # source /mnt/lustre/share/spring/r0.3.0 5 | ROOT=../../../ 6 | export PYTHONPATH=$ROOT:$PYTHONPATH 7 | 8 | if [[ ! -d "logs" ]]; then 9 | mkdir logs 10 | fi 11 | 12 | ################ 13 | gpus=${1-8} 14 | 15 | job_name=${2-debug} 16 | ################### ||| additional params usually not used 17 | ################### vvv 18 | iter=${3-newest} 19 | PRETRAIN_JOB_NAME=${4-${job_name}} 20 | CONFIG=${5-${job_name}.yaml} 21 | ################ 22 | g=$((${gpus}<8?${gpus}:8)) 23 | 24 | #### test list 25 | declare -A test_info_list 26 | test_info_list[pose_lpe]=0 27 | test_info_list[ochuman_pose]=0 28 | #test_info_list[pose_mpii_lpe]=x # requires mpii queries 29 | test_info_list[par_lpe]=1 30 | #test_info_list[par_atr_lpe]=x # requires ATR queries 31 | test_info_list[reid]=2 32 | test_info_list[reid_cuhk3]=2 33 | test_info_list[reid_duke]=2 34 | test_info_list[reid_msmt]=2 35 | test_info_list[reid_senseid]=2 36 | test_info_list[par_lip_lpe]=3 37 | test_info_list[par_cihp_lpe]=4 38 | test_info_list[pa100k_lpe]=5 39 | test_info_list[rap2_lpe]=6 40 | #test_info_list[peta_lpe]=x # requires PETA queries 41 | test_info_list[pose_aic_lpe]=7 42 | test_info_list[peddet_caltech]=8 43 | test_info_list[peddet_inter_lpe]=8 44 | 45 | for TASK in "${!test_info_list[@]}"; do 46 | full_job_name=${job_name}_test_${TASK} 47 | now=$(date +"%Y%m%d_%H%M%S") 48 | GINFO_INDEX=${test_info_list[${TASK}]} 49 | LOG_FILE=logs/${full_job_name}_test_${TASK}_${now}.log 50 | echo "=======>${TASK} log file: ${LOG_FILE}" 51 | TEST_CONFIG=vd_${TASK}_test.yaml 52 | TEST_MODEL=checkpoints/${PRETRAIN_JOB_NAME}/ckpt_task${GINFO_INDEX}_iter_${iter}.pth.tar 53 | echo 'start job:' ${full_job_name} ' config:' ${CONFIG} ' test_config:' ${TEST_CONFIG} 54 | 55 | while true # find unused tcp port 56 | do 57 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) 58 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" 59 | if [ "${status}" != "0" ]; then 60 | break; 61 | fi 62 | done 63 | 64 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \ 65 | srun -n$1 -p --gres=gpu:${g} --ntasks-per-node=${g} --gpu \ 66 | --job-name=${full_job_name} --cpus-per-task=5 \ 67 | python -W ignore -u ${ROOT}/test.py \ 68 | --expname ${full_job_name} \ 69 | --config ${CONFIG} \ 70 | --test_config ${TEST_CONFIG} \ 71 | --spec_ginfo_index ${GINFO_INDEX} \ 72 | --load-path=${TEST_MODEL} \ 73 | --tcp_port $PORT \ 74 | 2>&1 | tee ${LOG_FILE} & 75 | 76 | sleep 10 77 | done 78 | 79 | 80 | -------------------------------------------------------------------------------- /experiments/unihcp/release/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | # source /mnt/lustre/share/spring/r0.3.0 5 | ROOT=../../../ 6 | export PYTHONPATH=$ROOT:$PYTHONPATH 7 | 8 | if [[ ! -d "logs" ]]; then 9 | mkdir logs 10 | fi 11 | 12 | while true # find unused tcp port 13 | do 14 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) 15 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" 16 | if [ "${status}" != "0" ]; then 17 | break; 18 | fi 19 | done 20 | ################ 21 | gpus=${1-8} 22 | TASK=${2-pose} 23 | GINFO_INDEX=${3-0} # task index config cherrypick (if necessary) 24 | job_name=${4-debug} 25 | ################### ||| additional params usually not used 26 | ################### vvv 27 | iter=${5-newest} 28 | PRETRAIN_JOB_NAME=${6-${job_name}} 29 | CONFIG=${7-${job_name}.yaml} 30 | TEST_CONFIG=${8-vd_${TASK}_test.yaml} 31 | TEST_MODEL=${9-checkpoints/${PRETRAIN_JOB_NAME}/ckpt_task${GINFO_INDEX}_iter_${iter}.pth.tar} 32 | ################ 33 | 34 | g=$((${gpus}<8?${gpus}:8)) 35 | echo 'start job:' ${job_name} ' config:' ${CONFIG} ' test_config:' ${TEST_CONFIG} 36 | 37 | 38 | now=$(date +"%Y%m%d_%H%M%S") 39 | LOG_FILE=logs/${job_name}_test_${now}.log 40 | echo 'log file: ' ${LOG_FILE} 41 | 42 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \ 43 | srun -n$1 -p --debug --gres=gpu:${g} --ntasks-per-node=${g} --gpu \ 44 | --job-name=${job_name} --cpus-per-task=5 \ 45 | python -W ignore -u ${ROOT}/test.py \ 46 | --expname ${job_name} \ 47 | --config ${CONFIG} \ 48 | --test_config ${TEST_CONFIG} \ 49 | --spec_ginfo_index ${GINFO_INDEX} \ 50 | --load-path=${TEST_MODEL} \ 51 | --tcp_port $PORT \ 52 | 2>&1 | tee ${LOG_FILE} 53 | -------------------------------------------------------------------------------- /experiments/unihcp/release/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | # source /mnt/lustre/share/spring/r0.3.0 5 | ROOT=../../../ 6 | export PYTHONPATH=$ROOT:$PYTHONPATH 7 | 8 | if [[ ! -d "logs" ]]; then 9 | mkdir logs 10 | fi 11 | 12 | while true # find unused tcp port 13 | do 14 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) 15 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" 16 | if [ "${status}" != "0" ]; then 17 | break; 18 | fi 19 | done 20 | 21 | ################ 22 | gpus=${1-48} 23 | job_name=${2-debug} 24 | ################ 25 | CONFIG=${3-${job_name}.yaml} 26 | 27 | g=$((${gpus}<8?${gpus}:8)) 28 | echo 'start job:' ${job_name} ' config:' ${CONFIG} 29 | 30 | AutoResume=checkpoints/${job_name}/ckpt_task_iter_newest.pth.tar 31 | 32 | now=$(date +"%Y%m%d_%H%M%S") 33 | LOG_FILE=logs/${job_name}_${now}.log 34 | echo 'log file: ' ${LOG_FILE} 35 | 36 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \ 37 | srun -n${gpus} -p --gres=gpu:${g} --ntasks-per-node=${g} --gpu \ 38 | --job-name=${job_name} --cpus-per-task=5 \ 39 | python -W ignore -u ${ROOT}/multitask.py \ 40 | --expname ${job_name} \ 41 | --config ${CONFIG} \ 42 | --auto-resume=checkpoints/${job_name}/ckpt_task_iter_newest.pth.tar \ 43 | --tcp_port $PORT \ 44 | 2>&1 | tee ${LOG_FILE} 45 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_h3m6_pose_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev_collate 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: COCOPoseTest # deprecated 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: COCOPosDatasetDev 19 | kwargs: 20 | # use_udp: True 21 | # data_use_ratio: 0.003 22 | ann_file: /mnt/path...to.../h36m/processed/annotation_body2d/h36m_coco_test.json 23 | img_prefix: /mnt/path...to.../h36m/processed/images/ 24 | test_mode: True 25 | data_cfg: { 26 | 'use_gt_bbox': True, 27 | 'image_size':[192, 256], 28 | } 29 | sampler: 30 | batch_size: 256 # per card 31 | evaluation: 32 | cfg: 33 | interval: 10 34 | metric: ['PCK', 'EPE'] 35 | key_indicator: AP 36 | 37 | soft_nms: False 38 | nms_thr: 1.0 39 | oks_thr: 0.9 40 | vis_thr: 0.2 41 | cls_logits_vis_thr: 0.05 42 | no_rescoring: True 43 | 44 | # extra: 45 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 46 | # max_size: 4480 47 | # flip: True 48 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_ochuman_pose_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev_collate 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer', 12 | # 'extra', 'evaluation', 'model_entry_type', 'load_ignore', 'ckpt_task_id'] 13 | tasks: 14 | 0: 15 | name: COCOPoseTest # deprecated 16 | loss_weight: 1.0 # *redundant* 17 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 18 | dataset: 19 | type: COCOPosDatasetDev 20 | kwargs: 21 | # use_udp: False 22 | ann_file: /mnt/path...to.../OCHuman/annotations/ochuman_coco_format_test_range_0.00_1.00.json 23 | img_prefix: /mnt/path...to.../OCHuman/images/ 24 | test_mode: True 25 | data_cfg: { 26 | 'use_gt_bbox': True, 27 | } 28 | sampler: 29 | batch_size: 256 # per card 30 | evaluation: 31 | cfg: 32 | interval: 10 33 | metric: mAP 34 | key_indicator: AP 35 | 36 | soft_nms: False 37 | nms_thr: 1.0 38 | oks_thr: 0.9 39 | vis_thr: 0.2 40 | cls_logits_vis_thr: 0.05 41 | 42 | # extra: 43 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 44 | # max_size: 4480 45 | # flip: True 46 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_pa100k_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | tasks: 12 | 0: 13 | name: pedattrTest 14 | loss_weight: 1.0 15 | gres_ratio: 1 # int, > 0| world/sum*ratio 16 | dataset: 17 | type: AttrDataset 18 | kwargs: 19 | task_spec: 20 | dataset: 'PA-100k' 21 | data_path: /mnt/path...to.../PA-100k/dataset.pkl 22 | root_path: /mnt/path...to.../PA-100k/data/ 23 | augmentation: 24 | height: 256 25 | width: 192 26 | train: False 27 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_par_atr_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: ParTest 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: ATRParsingDataset 19 | kwargs: 20 | data_path: /mnt/path...to.../ATR 21 | dataset: val 22 | is_train: False 23 | cfg: 24 | # mean: [ 0.485, 0.456, 0.406 ] 25 | # std: [ 0.229, 0.224, 0.225 ] 26 | eval_crop_size: [ 480, 480 ] 27 | is_flip: False 28 | is_multi_scale: False 29 | 30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value 31 | num_classes: 18 32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ] 33 | 34 | sampler: 35 | batch_size: 16 # per card 36 | 37 | 38 | # extra: 39 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 40 | # max_size: 4480 41 | # flip: True 42 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_par_cihp_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: ParTest 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: CIHPParsingDataset 19 | kwargs: 20 | data_path: /mnt/path...to.../CIHP 21 | dataset: val 22 | is_train: False 23 | cfg: 24 | # mean: [ 0.485, 0.456, 0.406 ] 25 | # std: [ 0.229, 0.224, 0.225 ] 26 | eval_crop_size: [ 480, 480 ] 27 | is_flip: False 28 | is_multi_scale: False 29 | 30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value 31 | num_classes: 20 32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] 33 | 34 | sampler: 35 | batch_size: 16 # per card 36 | 37 | 38 | # extra: 39 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 40 | # max_size: 4480 41 | # flip: True 42 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_par_lip_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: ParTest 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: LIPParsingDataset 19 | kwargs: 20 | data_path: /mnt/path...to.../LIP 21 | dataset: val 22 | is_train: False 23 | cfg: 24 | # mean: [ 0.485, 0.456, 0.406 ] 25 | # std: [ 0.229, 0.224, 0.225 ] 26 | eval_crop_size: [ 480, 480 ] 27 | is_flip: False 28 | is_multi_scale: False 29 | 30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value 31 | num_classes: 20 32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] 33 | 34 | sampler: 35 | batch_size: 16 # per card 36 | 37 | # extra: 38 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 39 | # max_size: 4480 40 | # flip: True 41 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_par_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: ParTest 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: Human3M6ParsingDataset 19 | kwargs: 20 | data_path: /mnt/path...to.../human3.6 21 | dataset: val 22 | is_train: False 23 | cfg: 24 | # mean: [ 0.485, 0.456, 0.406 ] 25 | # std: [ 0.229, 0.224, 0.225 ] 26 | eval_crop_size: [ 480, 480 ] 27 | is_flip: False 28 | is_multi_scale: False 29 | 30 | ignore_value: 255 # duplicated with decoder.kwargs.ignore_value 31 | num_classes: 25 32 | label_list: [ 0, 1, 2, 3, 6, 7, 8, 17, 18, 19, 25, 26, 27, 32, 33, 34, 38, 39, 43, 44, 33 | 46, 49, 50, 56, 58 ] 34 | 35 | sampler: 36 | batch_size: 16 # per card 37 | 38 | 39 | # extra: 40 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 41 | # max_size: 4480 42 | # flip: True 43 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_peddet_caltech_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | kwargs: 8 | pos_thr: 0.0 9 | gt_path: '/mnt/path...to.../Caltech/test_caltech_heavy_1xnew.odgt' 10 | 11 | 12 | sync: True 13 | collate: det 14 | 15 | tasks: 16 | 0: 17 | name: CrowdHumanPeddetTest 18 | loss_weight: 1.0 # *redundant* 19 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 20 | dataset: 21 | type: PedestrainDetectionDataset 22 | kwargs: 23 | task_spec: 24 | img_folder: /mnt/path...to.../Caltech/Images 25 | ann_file: /mnt/path...to.../Caltech/test_caltech_1xnew.json 26 | return_masks: False 27 | augmentation: {'max_size': 1333} 28 | vit: True 29 | train: False 30 | sampler: 31 | batch_size: 1 32 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_peddet_inter_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | kwargs: 8 | pos_thr: 0.05 9 | gt_path: 'CHval.odgt' 10 | 11 | 12 | sync: True 13 | collate: det 14 | 15 | tasks: 16 | 0: 17 | name: CrowdHumanPeddetTest 18 | loss_weight: 1.0 # *redundant* 19 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 20 | dataset: 21 | type: PedestrainDetectionDataset 22 | kwargs: 23 | task_spec: 24 | img_folder: /mnt/path...to.../CrowdHuman/Images 25 | ann_file: /mnt/path...to.../CrowdHuman/annotations/val.json 26 | return_masks: False 27 | augmentation: {} 28 | vit: True 29 | train: False 30 | sampler: 31 | batch_size: 1 32 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_peta_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | tasks: 12 | 0: 13 | name: pedattrTest 14 | loss_weight: 1.0 15 | gres_ratio: 1 # int, > 0| world/sum*ratio 16 | dataset: 17 | type: AttrDataset 18 | kwargs: 19 | task_spec: 20 | dataset: 'peta' 21 | data_path: /mnt/path...to.../peta/dataset.pkl 22 | root_path: /mnt/path...to.../peta/images/ 23 | augmentation: 24 | height: 256 25 | width: 192 26 | train: False 27 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_pose_aic_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev_collate 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: AICPoseTest # deprecated 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: MultiPoseDatasetDev 19 | kwargs: 20 | dataset_name: 'aic' 21 | ann_file: '/mnt/path...to.../ai_challenge/annotations/aic_val.json' 22 | img_prefix: '/mnt/path...to.../ai_challenge/ai_challenger_keypoint_validation_20170911/keypoint_validation_images_20170911/' 23 | test_mode: True 24 | sampler: 25 | batch_size: 256 # per card 26 | evaluation: 27 | cfg: 28 | interval: 10 29 | metric: mAP 30 | key_indicator: AP 31 | 32 | soft_nms: False 33 | nms_thr: 1.0 34 | oks_thr: 0.9 35 | vis_thr: 0.2 36 | 37 | sigmas: [ 38 | 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, 39 | 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, 40 | 0.01291456, 0.01236173 41 | ] 42 | use_area: False 43 | cls_logits_vis_thr: 0.05 44 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_pose_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev_collate 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: COCOPoseTest # deprecated 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: COCOPosDatasetDev 19 | kwargs: 20 | ann_file: '/mnt/path...to.../coco/annotations/person_keypoints_val2017.json' 21 | img_prefix: '/mnt/path...to.../coco/val2017/' 22 | test_mode: True 23 | sampler: 24 | batch_size: 256 # per card 25 | evaluation: 26 | cfg: 27 | interval: 10 28 | metric: mAP 29 | key_indicator: AP 30 | 31 | soft_nms: False 32 | nms_thr: 1.0 33 | oks_thr: 0.9 34 | vis_thr: 0.2 35 | cls_logits_vis_thr: 0.05 36 | 37 | # extra: 38 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA 39 | # max_size: 4480 40 | # flip: True 41 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_pose_mpii_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev_collate 10 | 11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer'] 12 | tasks: 13 | 0: 14 | name: MPIIPoseTest # deprecated 15 | loss_weight: 1.0 # *redundant* 16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant* 17 | dataset: 18 | type: MPIIPosDatasetDev 19 | kwargs: 20 | ann_file: '/mnt/path...to.../coco/annotations/mpii_val.json' 21 | img_prefix: '/mnt/path...to.../MPI/images/' 22 | test_mode: True 23 | sampler: 24 | batch_size: 256 # per card 25 | evaluation: 26 | cfg: 27 | interval: 10 28 | metric: PCKh 29 | 30 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_rap2_lpe_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: TesterMultiTaskDev 7 | 8 | sync: True 9 | collate: dev 10 | 11 | tasks: 12 | 0: 13 | name: pedattrTest 14 | loss_weight: 1.0 15 | gres_ratio: 1 # int, > 0| world/sum*ratio 16 | dataset: 17 | type: AttrDataset 18 | kwargs: 19 | task_spec: 20 | dataset: 'rap' 21 | data_path: /mnt/path...to.../rap2/dataset.pkl 22 | root_path: /mnt/path...to.../rap2/RAP_dataset/ 23 | augmentation: 24 | height: 256 25 | width: 192 26 | train: False 27 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_reid_cuhk3_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | tester: 3 | type: 'ReIDTester' 4 | test_feature_name: 'feature_nobn' 5 | 6 | sync: True 7 | 8 | tasks : 9 | 0 : 10 | name : cuhk03 11 | loss_weight : 1.0 12 | gres_ratio: 1 13 | dataset: 14 | type: ReIDTestDataset 15 | kwargs: 16 | root_path: /mnt/path...to.../ 17 | query_file_path: 18 | - /mnt/path...to.../cuhk03_1/data_list/probe.txt 19 | gallery_file_path: 20 | - /mnt/path...to.../cuhk03_1/data_list/gallery.txt 21 | loader: 'pil' 22 | vit: True 23 | sampler: 24 | batch_size: 128 # per card 25 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_reid_msmt_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | tester: 3 | type: 'ReIDTester' 4 | test_feature_name: 'feature_nobn' 5 | 6 | sync: True 7 | 8 | tasks : 9 | 0 : 10 | name : msmt 11 | loss_weight : 1.0 12 | gres_ratio: 1 13 | dataset: 14 | type: ReIDTestDataset 15 | kwargs: 16 | root_path: /mnt/path...to.../ 17 | query_file_path: 18 | - /mnt/path...to.../MSMT17_V1/data_list/probe.txt 19 | gallery_file_path: 20 | - /mnt/path...to.../MSMT17_V1/data_list/gallery.txt 21 | loader: 'pil' 22 | vit: True 23 | sampler: 24 | batch_size: 128 # per card 25 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_reid_senseid_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | tester: 3 | type: 'ReIDTester' 4 | test_feature_name: 'feature_nobn' 5 | 6 | sync: True 7 | 8 | tasks : 9 | 0 : 10 | name : senseid 11 | loss_weight : 1.0 12 | gres_ratio: 1 13 | dataset: 14 | type: ReIDTestDataset 15 | kwargs: 16 | root_path: /mnt/path...to.../SenseReID/ 17 | query_file_path: 18 | - /mnt/path...to.../SenseReID/data_list/probe.txt 19 | gallery_file_path: 20 | - /mnt/path...to.../SenseReID/data_list/gallery.txt 21 | loader: 'pil' 22 | vit: True 23 | sampler: 24 | batch_size: 128 # per card 25 | -------------------------------------------------------------------------------- /experiments/unihcp/release/vd_reid_test.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | backbone: 3 | kwargs: 4 | test_pos_mode: learnable_interpolate 5 | tester: 6 | type: 'ReIDTester' 7 | test_feature_name: 'feature_nobn' 8 | 9 | sync: True 10 | 11 | tasks : 12 | 0 : 13 | name : market1501 14 | loss_weight : 1.0 15 | gres_ratio: 1 16 | dataset: 17 | type: ReIDTestDataset 18 | kwargs: 19 | root_path: /mnt/path...to.../ 20 | query_file_path: 21 | - /mnt/path...to.../market1501/data_list/probe.txt 22 | gallery_file_path: 23 | - /mnt/path...to.../market1501/data_list/gallery.txt 24 | loader: 'pil' 25 | vit: True 26 | sampler: 27 | batch_size: 32 # per card 28 | -------------------------------------------------------------------------------- /helper/align.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from skimage import transform as trans 4 | 5 | def affine_align(img, landmark=None, **kwargs): 6 | M = None 7 | src = np.array([ 8 | [38.2946, 51.6963], 9 | [73.5318, 51.5014], 10 | [56.0252, 71.7366], 11 | [41.5493, 92.3655], 12 | [70.7299, 92.2041] ], dtype=np.float32 ) 13 | # src=src * 224 / 112 14 | 15 | dst = landmark.astype(np.float32) 16 | tform = trans.SimilarityTransform() 17 | tform.estimate(dst, src) 18 | M = tform.params[0:2,:] 19 | warped = cv2.warpAffine(img, M, (112, 112), borderValue = 0.0) 20 | return warped 21 | 22 | 23 | def kestrel_get_similar_matrix(src_points, dst_points): 24 | if src_points.size != dst_points.size: 25 | print("error: the size of src_points and dst_points must be same", 26 | "which is {0} vs. {1}".format(src_points.size, dst_points.size)) 27 | exit(-1) 28 | 29 | dst_points = dst_points.T.reshape(-1) 30 | 31 | point_num = src_points.shape[0] 32 | new_src_points = np.zeros((point_num * 2, 4)) 33 | new_src_points[:point_num, :2] = src_points 34 | new_src_points[:point_num, 2] = 1 35 | new_src_points[:point_num, 3] = 0 36 | 37 | new_src_points[point_num:, 0] = src_points[:, 1] 38 | new_src_points[point_num:, 1] = -src_points[:, 0] 39 | new_src_points[point_num:, 2] = 0 40 | new_src_points[point_num:, 3] = 1 41 | 42 | min_square_solution = np.linalg.lstsq(new_src_points, dst_points, 43 | rcond=-1)[0] 44 | 45 | trans_matrix = np.array([ 46 | [ min_square_solution[0], -min_square_solution[1], 0 ], 47 | [ min_square_solution[1], min_square_solution[0], 0 ], 48 | [ min_square_solution[2], min_square_solution[3], 1 ], 49 | ]) 50 | 51 | return trans_matrix.T[:2] 52 | 53 | def transform(pts, M): 54 | dst = np.matmul(pts, M[:, :2].T) 55 | dst[:, 0] += M[0, 2] 56 | dst[:, 1] += M[1, 2] 57 | return dst 58 | 59 | 60 | def affine_alignSDK(img, landmark=None, borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_LINEAR): 61 | M = None 62 | dst_points = np.array([[70.745156, 111.9996875], [108.23625, 111.9996875], [89.700875, 153.514375]], dtype=np.float32) 63 | default_shape = (178,218) 64 | lmk = landmark.astype(np.float32) 65 | src_points = np.array([ 66 | lmk[0], lmk[1], 67 | (lmk[3] + lmk[4]) / 2 68 | ], dtype=np.float32) 69 | # src_points = get_trans_points(landmarks) 70 | trans_matrix = kestrel_get_similar_matrix(src_points, dst_points) 71 | trans_matrix = np.concatenate((trans_matrix, [[0, 0, 1]]), axis=0) 72 | # print(rotate_points_106) 73 | return cv2.warpPerspective(img, trans_matrix, default_shape, borderMode, flags=flags), trans_matrix 74 | -------------------------------------------------------------------------------- /helper/flops_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import logging 4 | from collections import Iterable 5 | 6 | # from .misc_helper import to_device 7 | 8 | 9 | logger = logging.getLogger('global') 10 | 11 | 12 | def clever_format(nums, format="%.2f"): 13 | if not isinstance(nums, Iterable): 14 | nums = [nums] 15 | clever_nums = [] 16 | 17 | for num in nums: 18 | num = int(num) 19 | if num > 1e12: 20 | clever_nums.append(format % (num / 1e12) + "T") 21 | elif num > 1e9: 22 | clever_nums.append(format % (num / 1e9) + "G") 23 | elif num > 1e6: 24 | clever_nums.append(format % (num / 1e6) + "M") 25 | elif num > 1e3: 26 | clever_nums.append(format % (num / 1e3) + "K") 27 | else: 28 | clever_nums.append(format % num + "B") 29 | 30 | clever_nums = clever_nums[0] if len(clever_nums) == 1 else (*clever_nums,) 31 | 32 | return clever_nums 33 | 34 | 35 | def flops_cal(model, input_shape): 36 | inputs = { 37 | 'image': torch.randn(1, input_shape[0], input_shape[1], input_shape[2]), 38 | 'image_info': [[input_shape[1], input_shape[2], 1, input_shape[1], input_shape[2], False]], 39 | 'filename': ['Test.jpg'], 40 | 'label': torch.LongTensor([[0]]), 41 | } 42 | # flops, params = profile(model, inputs=(to_device(inputs),)) 43 | flops, params = profile(model, inputs=(inputs,)) 44 | flops_str, params_str = clever_format([flops, params], "%.3f") 45 | flops = flops / 1e6 46 | params = flops / 1e6 47 | return flops, params, flops_str, params_str 48 | 49 | 50 | def profile(model, inputs, verbose=True): 51 | handler_collection = [] 52 | 53 | def add_hooks(m): 54 | if len(list(m.children())) > 0: 55 | return 56 | 57 | m.register_buffer('total_ops', torch.zeros(1)) 58 | m.register_buffer('total_params', torch.zeros(1)) 59 | 60 | m_type = type(m) 61 | fn = None 62 | if m_type in register_hooks: 63 | fn = register_hooks[m_type] 64 | 65 | if fn is None: 66 | if verbose: 67 | print("No implemented counting method for {} in flops_helper".format(m)) 68 | else: 69 | handler = m.register_forward_hook(fn) 70 | handler_collection.append(handler) 71 | 72 | # original_device = model.parameters().__next__().device 73 | training = model.training 74 | 75 | model.eval() 76 | model.apply(add_hooks) 77 | 78 | # with torch.no_grad(): 79 | model(*inputs) 80 | 81 | total_ops = 0 82 | total_params = 0 83 | for m in model.modules(): 84 | if len(list(m.children())) > 0: # skip for non-leaf module 85 | continue 86 | total_ops += m.total_ops 87 | total_params += m.total_params 88 | 89 | # total_ops = total_ops.item() 90 | # total_params = total_params.item() 91 | total_ops = total_ops[0] 92 | total_params = total_params[0] 93 | 94 | # reset model to original status 95 | model.train(training) 96 | for handler in handler_collection: 97 | handler.remove() 98 | 99 | return total_ops, total_params 100 | 101 | 102 | multiply_adds = 1 103 | 104 | 105 | def count_zero(m, x, y): 106 | m.total_ops = torch.Tensor([0]) 107 | m.total_params = torch.Tensor([0]) 108 | 109 | 110 | def count_conv2d(m, x, y): 111 | cin = m.in_channels 112 | cout = m.out_channels 113 | kh, kw = m.kernel_size 114 | out_h = y.size(2) 115 | out_w = y.size(3) 116 | batch_size = x[0].size(0) 117 | 118 | kernel_ops = multiply_adds * kh * kw 119 | bias_ops = 1 if m.bias is not None else 0 120 | 121 | output_elements = batch_size * out_w * out_h * cout 122 | total_ops = output_elements * kernel_ops * cin // m.groups + bias_ops * output_elements 123 | m.total_ops = torch.Tensor([int(total_ops)]) 124 | 125 | total_params = kh * kw * cin * cout // m.groups + bias_ops * cout 126 | m.total_params = torch.Tensor([int(total_params)]) 127 | 128 | 129 | def count_bn(m, x, y): 130 | x = x[0] 131 | c_out = y.size(1) 132 | nelements = x.numel() 133 | # subtract, divide, gamma, beta 134 | total_ops = 4 * nelements 135 | 136 | m.total_ops = torch.Tensor([int(total_ops)]) 137 | m.total_params = torch.Tensor([int(c_out) * 2]) 138 | 139 | 140 | def count_relu(m, x, y): 141 | x = x[0] 142 | nelements = x.numel() 143 | total_ops = nelements 144 | 145 | m.total_ops = torch.Tensor([int(total_ops)]) 146 | 147 | 148 | def count_softmax(m, x, y): 149 | x = x[0] 150 | batch_size, nfeatures = x.size() 151 | total_exp = nfeatures 152 | total_add = nfeatures - 1 153 | total_div = nfeatures 154 | total_ops = batch_size * (total_exp + total_add + total_div) 155 | 156 | m.total_ops = torch.Tensor([int(total_ops)]) 157 | 158 | 159 | def count_avgpool(m, x, y): 160 | total_add = torch.prod(torch.Tensor([m.kernel_size])) 161 | total_div = 1 162 | kernel_ops = total_add + total_div 163 | num_elements = y.numel() 164 | total_ops = kernel_ops * num_elements 165 | 166 | m.total_ops = torch.Tensor([int(total_ops)]) 167 | 168 | 169 | def count_adap_avgpool(m, x, y): 170 | kernel = torch.Tensor([*(x[0].shape[2:])]) // torch.Tensor(list((m.output_size,))).squeeze() 171 | total_add = torch.prod(kernel) 172 | total_div = 1 173 | kernel_ops = total_add + total_div 174 | num_elements = y.numel() 175 | total_ops = kernel_ops * num_elements 176 | 177 | m.total_ops = torch.Tensor([int(total_ops)]) 178 | 179 | 180 | def count_linear(m, x, y): 181 | # per output element 182 | total_mul = m.in_features 183 | total_add = m.in_features - 1 184 | num_elements = y.numel() 185 | total_ops = (total_mul + total_add) * num_elements 186 | 187 | m.total_ops = torch.Tensor([int(total_ops)]) 188 | m.total_params = torch.Tensor([m.in_features * m.out_features]) 189 | 190 | 191 | register_hooks = { 192 | nn.Conv2d: count_conv2d, 193 | nn.BatchNorm2d: count_zero, 194 | nn.InstanceNorm2d: count_zero, 195 | nn.ConvTranspose2d: count_conv2d, 196 | nn.ReLU: count_zero, 197 | nn.ReLU6: count_zero, 198 | nn.Tanh: count_zero, 199 | nn.LeakyReLU: count_zero, 200 | nn.AvgPool2d: count_zero, 201 | nn.AdaptiveAvgPool2d: count_zero, 202 | nn.Linear: count_linear, 203 | nn.Dropout: count_zero, 204 | nn.Sigmoid: count_zero, 205 | nn.Softmax: count_zero, 206 | # VarChannelConv2d: VarChannelConv2d.flops_count, 207 | # VarChannelBatchNorm2d: VarChannelBatchNorm2d.flops_count, 208 | # VarChannelSyncBatchNorm2d: VarChannelSyncBatchNorm2d.flops_count, 209 | # VarChannelSyncMultiBatchNorm2d: VarChannelSyncMultiBatchNorm2d.flops_count, 210 | # VarChannelLinear: VarChannelLinear.flops_count, 211 | # DeprecatedGroupSyncBatchNorm: count_zero, 212 | # Identity: count_zero, 213 | # VcIdentity: count_zero, 214 | nn.MaxPool2d: count_zero, 215 | nn.CrossEntropyLoss: count_zero, 216 | # SamePadConv2d: count_conv2d, 217 | # conv_bn_swish: count_zero, 218 | # Swish: count_zero 219 | } 220 | -------------------------------------------------------------------------------- /helper/param_count.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | mp.set_start_method('spawn', force=True) 3 | 4 | import torch 5 | import torch.nn as nn 6 | from functools import reduce 7 | import operator 8 | 9 | 10 | 11 | 12 | 13 | def count_parameters_num(model): 14 | count = 0 15 | count_fc = 0 16 | param_dict = {name:param for name,param in model.named_parameters()} 17 | param_keys = param_dict.keys() 18 | for m_name, m in model.named_modules(): 19 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d): 20 | weight_name = m_name + '.weight' 21 | bias_name = m_name + '.bias' 22 | if weight_name in param_keys: 23 | temp_params = param_dict[weight_name] 24 | count += temp_params.data.nelement() 25 | if bias_name in param_keys: 26 | temp_params = param_dict[bias_name] 27 | count += temp_params.data.nelement() 28 | elif isinstance(m, nn.Linear): 29 | weight_name = m_name + '.weight' 30 | bias_name = m_name + '.bias' 31 | if weight_name in param_keys: 32 | temp_params = param_dict[weight_name] 33 | count_fc += temp_params.data.nelement() 34 | if bias_name in param_keys: 35 | temp_params = param_dict[bias_name] 36 | count_fc += temp_params.data.nelement() 37 | print('Number of conv/bn params: %.2fM' % (count / 1e6)) 38 | print('Number of linear params: %.2fM' % (count_fc / 1e6)) 39 | print('Number of all params: %.2fM' % ( (count+count_fc) / 1e6)) 40 | 41 | # def count_flops(model, input_image_size): 42 | # counts = [] 43 | 44 | # # loop over all model parts 45 | # for m in model.modules(): 46 | # if isinstance(m, nn.Conv2d): 47 | # def hook(module, input): 48 | # factor = 2*module.in_channels*module.out_channels 49 | # factor *= module.kernel_size[0]*module.kernel_size[1] 50 | # factor //= module.stride[0]*module.stride[1] 51 | # counts.append( 52 | # factor*input[0].data.shape[2]*input[0].data.shape[3] 53 | # ) 54 | # m.register_forward_pre_hook(hook) 55 | # elif isinstance(m, nn.Linear): 56 | # counts += [ 57 | # 2*m.in_features*m.out_features 58 | # ] 59 | 60 | # noise_image = torch.rand( 61 | # 2, 3, input_image_size, input_image_size 62 | # ) 63 | # # one forward pass 64 | # with torch.no_grad(): 65 | # _ = model(torch.autograd.Variable(noise_image.cuda())) 66 | # return sum(counts) 67 | 68 | def get_layer_param(model): 69 | return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()]) 70 | 71 | def measure_model(model, input_image_size, forward_param=None): 72 | flop_counts = [] 73 | param_counts = [] 74 | multi_add = 2 75 | 76 | # loop over all model parts 77 | for m in model.modules(): 78 | if isinstance(m, nn.Conv2d): 79 | def hook(module, x): 80 | out_h = int((x[0].size()[2] + 2 * module.padding[0] - module.kernel_size[0]) / module.stride[0] + 1) 81 | out_w = int((x[0].size()[3] + 2 * module.padding[1] - module.kernel_size[1]) / module.stride[1] + 1) 82 | ops = module.in_channels * module.out_channels * module.kernel_size[0] * module.kernel_size[1] * out_h * out_w / module.groups * multi_add 83 | flop_counts.append(ops) 84 | param_counts.append(get_layer_param(module)) 85 | m.register_forward_pre_hook(hook) 86 | 87 | elif isinstance(m, nn.ReLU) or isinstance(m, nn.PReLU): 88 | def hook(module, x): 89 | ops = x[0].numel() 90 | flop_counts.append(ops) 91 | param_counts.append(get_layer_param(module)) 92 | m.register_forward_pre_hook(hook) 93 | 94 | elif isinstance(m, nn.AvgPool2d): 95 | def hook(module, x): 96 | in_w = x[0].size()[2] 97 | kernel_ops = module.kernel_size * module.kernel_size 98 | out_w = int((in_w + 2 * module.padding - module.kernel_size) / module.stride + 1) 99 | out_h = int((in_w + 2 * module.padding - module.kernel_size) / module.stride + 1) 100 | ops = x[0].size()[0] * x[0].size()[1] * out_w * out_h * kernel_ops 101 | param_counts.append(get_layer_param(module)) 102 | m.register_forward_pre_hook(hook) 103 | 104 | elif isinstance(m, nn.AdaptiveAvgPool2d): 105 | def hook(module, x): 106 | ops = x[0].size()[0] * x[0].size()[1] * x[0].size()[2] * x[0].size()[3] 107 | flop_counts.append(ops) 108 | param_counts.append(get_layer_param(module)) 109 | m.register_forward_pre_hook(hook) 110 | 111 | elif isinstance(m, nn.Linear): 112 | def hook(module, x): 113 | weight_ops = module.weight.numel() * multi_add 114 | bias_ops = module.bias.numel() 115 | ops = x[0].size()[0] * (weight_ops + bias_ops) 116 | flop_counts.append(ops) 117 | param_counts.append(get_layer_param(module)) 118 | m.register_forward_pre_hook(hook) 119 | 120 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d) \ 121 | or isinstance(m, nn.Dropout2d) or isinstance(m, nn.Dropout): 122 | def hook(module, x): 123 | param_counts.append(get_layer_param(module)) 124 | m.register_forward_pre_hook(hook) 125 | 126 | else: 127 | # print('unknown layer type: %s' % type(m)) 128 | pass 129 | 130 | if isinstance(input_image_size, int): 131 | noise_image = torch.rand(1, 3, input_image_size, input_image_size) 132 | else: 133 | noise_image = torch.rand(1, 3, input_image_size[0], input_image_size[1]) 134 | # one forward pass 135 | with torch.no_grad(): 136 | if forward_param is not None: 137 | _ = model(noise_image.cuda(), forward_param) 138 | else: 139 | _ = model(noise_image.cuda()) 140 | # _ = model(torch.autograd.Variable(noise_image.cuda(), requires_grad=False)) 141 | 142 | return sum(param_counts), sum(flop_counts) 143 | -------------------------------------------------------------------------------- /helper/vis_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import cv2 3 | import os 4 | import numpy as np 5 | import torch 6 | import pickle as pk 7 | 8 | 9 | def inv_normalize_batch(image, mean_arr, stddev_arr): 10 | # normalize image color channels 11 | inv_normed_image = image.clone() 12 | for c in range(3): 13 | if len(image.size()) == 4: 14 | inv_normed_image[:, c, :, :] = (image[:, c, :, :] * stddev_arr[c] + mean_arr[c]) 15 | else: 16 | inv_normed_image[c, :, :] = (image[c, :, :] * stddev_arr[c] + mean_arr[c]) 17 | return inv_normed_image 18 | 19 | 20 | def get_vis_data(input, range_low=-1, range_high=1, vis_height=-1, vis_width=-1, to_rgb=True): 21 | if input is None: 22 | return None 23 | 24 | data = ((input.permute(1, 2, 0) - range_low) / ( 25 | range_high - range_low) * 255.0).data.cpu().numpy() 26 | if vis_height > 0 and vis_width > 0: 27 | if data.shape[0] != vis_height or data.shape[1] != vis_width: 28 | data = cv2.resize(data, (vis_width, vis_height)) 29 | if len(data.shape) == 2: 30 | data = cv2.cvtColor(data, cv2.COLOR_GRAY2RGB) 31 | else: 32 | if to_rgb: 33 | data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB) 34 | return data 35 | 36 | 37 | def vis_one_from_batch(vis_list, range_low=0, range_high=1, 38 | vis_height=140, vis_width=140, vis_channel=3, to_rgb=True, return_CHW=True): 39 | 40 | vis_dict = dict() 41 | for item in vis_list: 42 | ''' 43 | vis_list = [{ 44 | 'name': 'lap_adv', 45 | 'image': laplace_adv 46 | }] 47 | ''' 48 | vis_image = get_vis_data(item['image'], range_low, range_high, vis_height, vis_width, to_rgb=to_rgb) 49 | vis_dict[item['name']] = vis_image 50 | 51 | cnt = 0 52 | for tag, item in vis_dict.items(): 53 | if item is not None: 54 | cnt += 1 55 | 56 | # adapt to visualize format 57 | rst = np.zeros((vis_height, vis_width * cnt, vis_channel)) 58 | pos = 0 59 | for tag, item in vis_dict.items(): 60 | if item is not None: 61 | left = vis_width * pos 62 | right = vis_width * (pos + 1) 63 | rst[:, left: right] = item 64 | cv2.putText(rst, tag, (left + 2, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1) 65 | pos += 1 66 | 67 | rst = rst.clip(0, 255).astype(np.uint8, copy=False) 68 | if return_CHW: 69 | # prepare for tensorboard [RGB, CHW] 70 | rst = rst.transpose((2, 0, 1)) # HWC -> CHW 71 | 72 | return rst 73 | 74 | 75 | def get_features_indices(feature_maps, topk=1): 76 | """ 77 | 78 | :param topk: 79 | :param feature_maps: floatTensor [N, C, H, W] 80 | :return: 81 | """ 82 | input_dim = 4 83 | if len(feature_maps.size()) == 2: 84 | input_dim = 2 85 | feature_maps = feature_maps.unsqueeze(0) 86 | if len(feature_maps.size()) == 3: 87 | input_dim = 3 88 | feature_maps = feature_maps.unsqueeze(0) 89 | N, C, H, W = feature_maps.size() 90 | feats = feature_maps.view(N, C, -1) 91 | feats_sum = torch.sum(feats, dim=2) 92 | y, ind = torch.sort(feats_sum, dim=1, descending=True) 93 | selected_ind = ind[:, :topk] 94 | if input_dim < 4: 95 | return selected_ind.squeeze(0) 96 | return selected_ind 97 | 98 | 99 | def show_feature_map(feature_map, reference=None, range_low=-1, range_high=1): 100 | """ 101 | 可视化特征图 102 | :param feature_map: floatTensor [C, H, W] 103 | :param reference: floatTensor [3, H, W] 104 | :return: 105 | """ 106 | if isinstance(feature_map, torch.Tensor): 107 | feature_map = feature_map.cpu().numpy() 108 | feature_map_num = feature_map.shape[0] 109 | row_num = np.ceil(np.sqrt(feature_map_num)) 110 | 111 | if reference is not None: 112 | if isinstance(reference, torch.Tensor): 113 | reference = ((reference.permute(1, 2, 0) - range_low) / ( 114 | range_high - range_low)).data.cpu().numpy() 115 | reference = np.uint8(255 * reference) 116 | reference = cv2.cvtColor(reference, cv2.COLOR_RGB2BGR) 117 | all_vis = reference 118 | else: 119 | all_vis = None 120 | 121 | for index in range(0, feature_map_num): 122 | feat = feature_map[index] 123 | heatmap = feat / np.max(feat) 124 | heatmap = np.uint8(255 * heatmap) 125 | heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 126 | if reference is not None: 127 | if not heatmap.shape == reference.shape: 128 | heatmap = cv2.resize(heatmap, (reference.shape[1], reference.shape[0]), interpolation=cv2.INTER_CUBIC) 129 | vis = cv2.addWeighted(heatmap, 0.5, reference, 0.5, 0) 130 | else: 131 | vis = heatmap 132 | if index == 0: 133 | all_vis = vis 134 | else: 135 | all_vis = np.hstack([all_vis, vis]) 136 | 137 | return all_vis 138 | 139 | 140 | def dump_to_pickle(file_path, data): 141 | with open(file_path, "wb") as f: 142 | pk.dump(data, f) 143 | 144 | 145 | def load_from_pickle(file_path): 146 | assert os.path.exists(file_path), "file not exist: {}".format(file_path) 147 | with open(file_path, "rb") as f: 148 | meta = pk.load(f) 149 | return meta 150 | -------------------------------------------------------------------------------- /multitask.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import multiprocessing as mp 4 | if mp.get_start_method(allow_none=True) != 'spawn': 5 | mp.set_start_method('spawn') 6 | import argparse 7 | from core.distributed_utils import dist_init 8 | from core.config import Config 9 | from core.solvers import solver_entry 10 | import torch 11 | 12 | parser = argparse.ArgumentParser(description='Multi-Task Training Framework') 13 | parser.add_argument('--load-path', default='', type=str) 14 | parser.add_argument('--ignore', nargs='+', default=[], type=str) 15 | parser.add_argument('--recover', action='store_true') 16 | parser.add_argument('--load-single', action='store_true') 17 | parser.add_argument('--port', default='23456', type=str) 18 | parser.add_argument('--config', default='', type=str) 19 | parser.add_argument('--expname', type=str, default=None, help='experiment name, output folder') 20 | parser.add_argument('--auto-resume', type=str, default=None, help='jobs auto resume from the pattern_path or the folder') 21 | parser.add_argument('--forwardbn', action='store_true', help='just forward for re-calcuating bn values') 22 | parser.add_argument('--finetune',action='store_true') 23 | parser.add_argument("--tcp_port", type=str, default="5671") 24 | 25 | def main(): 26 | args = parser.parse_args() 27 | dist_init(port=str(args.tcp_port)) 28 | 29 | C = Config(args.config) 30 | if args.expname is not None: 31 | C.config['expname'] = args.expname 32 | 33 | S = solver_entry(C) 34 | config_save_to = os.path.join(S.ckpt_path, 'config.yaml') 35 | 36 | # auto resume strategy for srun 37 | if args.auto_resume is not None: 38 | args.auto_resume = os.path.join(S.out_dir, args.auto_resume) 39 | if os.path.isdir(args.auto_resume): 40 | max_iter = 0 41 | filename = os.listdir(args.auto_resume) 42 | for file in filename: 43 | if file.startswith('ckpt_task0') and file.endswith('.pth.tar'): 44 | cur_iter = int(file.split('_')[-1].split('.')[0]) 45 | max_iter = max(max_iter, cur_iter) 46 | if max_iter > 0: 47 | args.load_path = os.path.join(args.auto_resume, 48 | 'ckpt_task_iter_{}.pth.tar'.format(str(max_iter))) 49 | args.recover = True 50 | args.ignore = [] 51 | print('auto-resume from: {}'.format(args.load_path)) 52 | elif args.auto_resume.endswith('.pth.tar'): 53 | tmpl = args.auto_resume.replace('ckpt_task_', 'ckpt_task*_') 54 | import glob 55 | ckpt = glob.glob(tmpl) 56 | if len(ckpt) > 0: 57 | args.load_path = args.auto_resume 58 | args.recover = True 59 | args.ignore = [] 60 | print('auto-resume from: {}'.format(args.load_path)) 61 | else: 62 | print('auto-resume not work:{}'.format(args.auto_resume)) 63 | 64 | #tmp = torch.Tensor(1).cuda() 65 | if not os.path.exists(config_save_to): 66 | shutil.copy(args.config, config_save_to) 67 | 68 | S.initialize(args) 69 | 70 | S.run() 71 | 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dict_recursive_update==1.0.1 2 | easydict==1.9 3 | json_tricks==3.15.5 4 | numpy==1.19.5 5 | opencv_python==4.1.1.26 6 | pycocotools_fix==2.0.0.9 7 | scikit_image==0.16.2 8 | scipy==1.3.1 9 | Shapely==1.8.1.post1 10 | timm==0.6.7 11 | torch==1.8.1+cuda90.cudnn7.6.5 12 | torchvision==0.8.0a0+2f40a48 13 | tqdm==4.37.0 14 | xtcocotools==1.12 -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import multiprocessing as mp 4 | if mp.get_start_method(allow_none=True) != 'spawn': 5 | mp.set_start_method('spawn') 6 | import argparse 7 | from core.distributed_utils import dist_init 8 | from core.config import Config 9 | from core.testers import tester_entry 10 | import torch 11 | import yaml 12 | import re 13 | 14 | parser = argparse.ArgumentParser(description='Multi-Task Training Framework') 15 | parser.add_argument('--spec_ginfo_index', type=int, required=True) 16 | parser.add_argument('--load-path', default='', type=str) 17 | parser.add_argument('--ignore', nargs='+', default=[], type=str) 18 | parser.add_argument('--recover', action='store_true') 19 | parser.add_argument('--load-single', action='store_true') 20 | parser.add_argument('--port', default='23456', type=str) 21 | parser.add_argument('--config', default='', type=str) 22 | parser.add_argument('--test_config', default='', type=str) 23 | parser.add_argument('--expname', type=str, default=None, help='experiment name, output folder') 24 | parser.add_argument('--auto-resume', type=str, default=None, help='jobs auto resume from the pattern_path or the folder') 25 | parser.add_argument("--tcp_port", type=str, default="5671") 26 | 27 | loader = yaml.SafeLoader 28 | loader.add_implicit_resolver( 29 | u'tag:yaml.org,2002:float', 30 | re.compile(u'''^(?: 31 | [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? 32 | |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) 33 | |\\.[0-9_]+(?:[eE][-+][0-9]+)? 34 | |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* 35 | |[-+]?\\.(?:inf|Inf|INF) 36 | |\\.(?:nan|NaN|NAN))$''', re.X), 37 | list(u'-+0123456789.')) 38 | 39 | def main(): 40 | args = parser.parse_args() 41 | dist_init(port=str(args.tcp_port)) 42 | 43 | # auto resume strategy for srun 44 | if args.auto_resume is not None: 45 | if os.path.isdir(args.auto_resume): 46 | max_iter = 0 47 | filename = os.listdir(args.auto_resume) 48 | for file in filename: 49 | if file.startswith('ckpt_task0') and file.endswith('.pth.tar'): 50 | cur_iter = int(file.split('_')[-1].split('.')[0]) 51 | max_iter = max(max_iter, cur_iter) 52 | if max_iter > 0: 53 | args.load_path = os.path.join(args.auto_resume, 54 | 'ckpt_task_iter_{}.pth.tar'.format(str(max_iter))) 55 | args.recover = True 56 | args.ignore = [] 57 | print('auto-resume from: {}'.format(args.load_path)) 58 | elif args.auto_resume.endswith('.pth.tar'): 59 | tmpl = args.auto_resume.replace('ckpt_task_', 'ckpt_task*_') 60 | import glob 61 | ckpt = glob.glob(tmpl) 62 | if len(ckpt) > 0: 63 | args.load_path = args.auto_resume 64 | args.recover = True 65 | args.ignore = [] 66 | print('auto-resume from: {}'.format(args.load_path)) 67 | else: 68 | print('auto-resume not work:{}'.format(args.auto_resume)) 69 | 70 | #tmp = torch.Tensor(1).cuda() 71 | C_train = Config(args.config, spec_ginfo_index=args.spec_ginfo_index) 72 | 73 | with open(args.test_config) as f: 74 | test_config = yaml.load(f, Loader=loader) 75 | num_test_tasks = len(test_config['tasks']) 76 | 77 | for test_spec_ginfo_index in range(num_test_tasks): 78 | C_test = Config(args.test_config, spec_ginfo_index=test_spec_ginfo_index) 79 | if args.expname is not None: 80 | C_train.config['expname'] = args.expname 81 | 82 | S = tester_entry(C_train, C_test) 83 | config_save_to = os.path.join(S.ckpt_path, 'config.yaml') 84 | test_config_save_to = os.path.join(S.ckpt_path, 'test_config_task{}.yaml'.format(test_spec_ginfo_index)) 85 | if not os.path.exists(config_save_to): 86 | shutil.copy(args.config, config_save_to) 87 | shutil.copy(args.test_config, test_config_save_to) 88 | 89 | S.initialize(args) 90 | 91 | S.run() 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | --------------------------------------------------------------------------------