├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── core
├── __init__.py
├── augmentation.py
├── augmentation_pos.py
├── config.py
├── data
│ ├── datasets
│ │ ├── __init__.py
│ │ └── images
│ │ │ ├── image_helper.py
│ │ │ ├── multi_posedataset.py
│ │ │ ├── parsing_dataset.py
│ │ │ ├── pedattr_dataset.py
│ │ │ ├── peddet_dataset.py
│ │ │ ├── pos_dataset_dev.py
│ │ │ ├── reid_dataset.py
│ │ │ ├── resources
│ │ │ ├── CHval.odgt
│ │ │ ├── COCO_val2017_detections_AP_H_56_person.json
│ │ │ └── mpii_gt_val.mat
│ │ │ ├── seg_data_tools
│ │ │ ├── __init__.py
│ │ │ ├── collate.py
│ │ │ ├── cv2_aug_transforms.py
│ │ │ └── transforms.py
│ │ │ └── seg_dataset_dev.py
│ ├── test_datasets
│ │ ├── __init__.py
│ │ └── images
│ │ │ └── reid_dataset.py
│ └── transforms
│ │ ├── face_transforms.py
│ │ ├── parsing_transforms.py
│ │ ├── pedattr_transforms.py
│ │ ├── peddet_transforms.py
│ │ ├── peddet_transforms_helpers
│ │ ├── __init__.py
│ │ └── transforms.py
│ │ ├── pose_transforms.py
│ │ ├── post_transforms.py
│ │ ├── reid_transforms.py
│ │ ├── seg_aug_dev.py
│ │ └── seg_transforms_dev.py
├── distributed_utils.py
├── exceptions.py
├── fp16
│ ├── __init__.py
│ ├── amp.py
│ ├── opt.py
│ ├── scaler.py
│ ├── utils.py
│ └── wrap.py
├── lr_scheduler
│ ├── __init__.py
│ └── base.py
├── make_param_group.py
├── memory.py
├── models
│ ├── __init__.py
│ ├── backbones
│ │ ├── __init__.py
│ │ └── vit.py
│ ├── ckpt.py
│ ├── decoders
│ │ ├── __init__.py
│ │ ├── losses
│ │ │ ├── __init__.py
│ │ │ ├── classification_losses.py
│ │ │ ├── criterion.py
│ │ │ ├── matcher.py
│ │ │ ├── pedattr_losses.py
│ │ │ ├── peddet_losses.py
│ │ │ ├── point_features.py
│ │ │ ├── pos_losses.py
│ │ │ ├── seg_losses.py
│ │ │ └── test_time.py
│ │ └── network
│ │ │ ├── __init__.py
│ │ │ ├── meta_arch
│ │ │ ├── __init__.py
│ │ │ └── aio_head.py
│ │ │ └── transformer_decoder
│ │ │ ├── __init__.py
│ │ │ ├── position_encoding.py
│ │ │ └── transformer_decoder.py
│ ├── model_entry.py
│ ├── necks
│ │ ├── DoNothing.py
│ │ ├── __init__.py
│ │ └── simple_neck.py
│ ├── ops
│ │ ├── __init__.py
│ │ ├── box_ops.py
│ │ ├── boxes.py
│ │ └── utils.py
│ └── tta.py
├── msg_server.py
├── optim.py
├── optimizers
│ ├── __init__.py
│ ├── adafactor.py
│ ├── adam_clip.py
│ └── lars.py
├── solvers
│ ├── __init__.py
│ ├── solver.py
│ ├── solver_deter.py
│ ├── solver_multitask_dev.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── attr_tester_dev.py
│ │ ├── detools
│ │ └── box.py
│ │ ├── nms.py
│ │ ├── par_tester_dev.py
│ │ ├── peddet_tester_dev.py
│ │ ├── pos_tester_dev.py
│ │ └── seg_tester_dev.py
├── testers
│ ├── __init__.py
│ ├── reid_tester.py
│ ├── tester.py
│ ├── tester_deter.py
│ └── utils
│ │ ├── metrics.py
│ │ └── reranking.py
└── utils.py
├── experiments
└── unihcp
│ └── release
│ ├── ablation_baseline_coslr1e3_60k_b1000g40_h256_I2k_1_10_001_2I_m256.yaml
│ ├── batch_test.sh
│ ├── coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256.yaml
│ ├── test.sh
│ ├── train.sh
│ ├── vd_h3m6_pose_test.yaml
│ ├── vd_ochuman_pose_test.yaml
│ ├── vd_pa100k_lpe_test.yaml
│ ├── vd_par_atr_lpe_test.yaml
│ ├── vd_par_cihp_lpe_test.yaml
│ ├── vd_par_lip_lpe_test.yaml
│ ├── vd_par_lpe_test.yaml
│ ├── vd_peddet_caltech_test.yaml
│ ├── vd_peddet_inter_lpe_test.yaml
│ ├── vd_peta_lpe_test.yaml
│ ├── vd_pose_aic_lpe_test.yaml
│ ├── vd_pose_lpe_test.yaml
│ ├── vd_pose_mpii_lpe_test.yaml
│ ├── vd_rap2_lpe_test.yaml
│ ├── vd_reid_cuhk3_test.yaml
│ ├── vd_reid_msmt_test.yaml
│ ├── vd_reid_senseid_test.yaml
│ └── vd_reid_test.yaml
├── helper
├── align.py
├── flops_helper.py
├── multitask_schedule.py
├── param_count.py
└── vis_helper.py
├── multitask.py
├── requirements.txt
└── test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.bak
4 | *.tar
5 | scripts
6 | cfgs
7 | checkpoints
8 | __pycache__
9 | log*.txt
10 | .ignore
11 | mimic_experiments/
12 | backup/
13 | exp_branch/
14 | exp_modelscaling/
15 | result_info/
16 | result_info_context/
17 | exp_*
18 | failed_to_read_*
19 | .idea/
20 | parse_result*
21 | bak*
22 |
23 | itchat.pkl
24 | server.txt
25 |
26 | caffemodels/
27 | feats/
28 | nart_tools/
29 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/.gitmodules
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 OpenGVLab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # UniHCP: A Unified Model for Human-Centric Perceptions
2 |
3 | # Usage
4 |
5 | ## Preparation
6 |
7 | 1. Install all required dependencies in requirements.txt.
8 | 2. Replace all `path...to...` in the .yaml configuration files to the absolute path
9 | to corresponding dataset locations.
10 | 3. Place MAE pretrained weight mae_pretrain_vit_base.pth under `core\models\backbones\pretrain_weights` folder.
11 |
12 | *Only slurm-based distributed training & single-gpu testing is implemented in this repo.
13 |
14 | ## Experiments
15 |
16 | All experiment configurations files and launch scripts are located in `experiments/unihcp/release` folder.
17 |
18 | To perform full multi-task training for UniHCP, replace `` in `train.sh` launch script and run:
19 |
20 | ```bash
21 | sh train.sh 88 coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256
22 | ```
23 |
24 | To perform evaluations, keep the test_info_list assignments corresponding to the tests you want to perform
25 | , replace ``, then run :
26 |
27 | ```bash
28 | sh batch_test.sh 1 coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256
29 | ```
30 |
31 | Note that in this case, the program would look for checkpoints located at `experiments/unihcp/release/checkpoints/coslr1e3_104k_b4324g88_h256_I2k_1_10_001_2I_fairscale_m256`
32 |
33 |
34 | # Pretrained Models
35 | Please send the signed agreement to `mail@yuanzheng.ci` to get the download link.
36 |
--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/__init__.py
--------------------------------------------------------------------------------
/core/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .images.reid_dataset import ReIDDataset
2 | from .images.pedattr_dataset import AttrDataset
3 | from .images.pos_dataset_dev import COCOPosDatasetDev, MPIIPosDatasetDev
4 | from .images.parsing_dataset import Human3M6ParsingDataset, LIPParsingDataset, CIHPParsingDataset, ATRParsingDataset, DeepFashionParsingDataset, VIPParsingDataset, ModaNetParsingDataset
5 | from .images.multi_posedataset import MultiPoseDatasetDev
6 | from .images.peddet_dataset import PedestrainDetectionDataset
7 | from core.utils import printlog
8 |
9 | def dataset_entry(config):
10 | printlog('config[kwargs]',config['kwargs'])
11 | return globals()[config['type']](**config['kwargs'])
12 |
--------------------------------------------------------------------------------
/core/data/datasets/images/pedattr_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import pickle
4 | import random
5 | from easydict import EasyDict as edict
6 | import numpy as np
7 | import torch.utils.data as data
8 | from PIL import Image
9 | from core.data.transforms.pedattr_transforms import PedAttrAugmentation, PedAttrTestAugmentation, PedAttrRandomAugmentation
10 | from core import distributed_utils as dist
11 |
12 |
13 | __all__ = ['AttrDataset']
14 |
15 | class AttrDataset(data.Dataset):
16 |
17 | def __init__(self, ginfo, augmentation, task_spec, train=True, **kwargs):
18 |
19 | assert task_spec.dataset in ['peta', 'PA-100k', 'rap', 'rap2', 'uavhuman', 'HARDHC', 'ClothingAttribute', 'parse27k', 'duke', 'market'], \
20 | f'dataset name {task_spec.dataset} is not exist'
21 |
22 | data_path = task_spec.data_path
23 |
24 | with open(data_path, "rb+") as f:
25 | dataset_info = pickle.load(f)
26 | dataset_info = edict(dataset_info)
27 |
28 | img_id = dataset_info.image_name
29 | attr_label = dataset_info.label
30 |
31 | if train:
32 | split = 'trainval'
33 | else:
34 | split = 'test'
35 |
36 | assert split in dataset_info.partition.keys(), f'split {split} is not exist'
37 |
38 | height = augmentation.height
39 | width = augmentation.width
40 |
41 | self.dataset = task_spec.dataset
42 | self.root_path = task_spec.root_path
43 |
44 | if train:
45 | self.transform = PedAttrAugmentation(height, width)
46 | if augmentation.get('use_random_aug', False):
47 | self.transform = PedAttrRandomAugmentation(height, width, \
48 | augmentation.use_random_aug.m, augmentation.use_random_aug.n)
49 | else:
50 | self.transform = PedAttrTestAugmentation(height, width)
51 |
52 | self.attr_id = dataset_info.attr_name
53 | self.attr_num = len(self.attr_id)
54 |
55 | self.img_idx = dataset_info.partition[split]
56 |
57 | if isinstance(self.img_idx, list):
58 | self.img_idx = self.img_idx[0] # default partition 0
59 |
60 | self.img_num = len(self.img_idx)
61 | self.img_idx = np.array(self.img_idx)
62 | self.img_id = [img_id[i] for i in self.img_idx]
63 | self.label = attr_label[self.img_idx]
64 | self.task_name = ginfo.task_name
65 | self.rank = dist.get_rank()
66 | self.train = train
67 |
68 | def __getitem__(self, index):
69 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index]
70 | imgpath = os.path.join(self.root_path, imgname)
71 |
72 | img = Image.open(imgpath).convert("RGB")
73 |
74 | if self.transform is not None:
75 | img = self.transform(img)
76 |
77 | gt_label = gt_label.astype(np.float32)
78 |
79 | output = {'image': img, 'label': gt_label, 'filename': imgname}
80 | return output
81 |
82 | def __len__(self):
83 | return len(self.img_id)
84 |
85 | def __repr__(self):
86 | return self.__class__.__name__ + \
87 | f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.train else "inference"} ' \
88 | f'dataset_len:{len(self.img_id)} id_num:{self.attr_num} augmentation: {self.transform}'
89 |
90 |
--------------------------------------------------------------------------------
/core/data/datasets/images/resources/mpii_gt_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/datasets/images/resources/mpii_gt_val.mat
--------------------------------------------------------------------------------
/core/data/datasets/images/seg_data_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/datasets/images/seg_data_tools/__init__.py
--------------------------------------------------------------------------------
/core/data/datasets/images/seg_data_tools/collate.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | from torch.utils.data.dataloader import default_collate
6 |
7 | from lib.extensions.parallel.data_container import DataContainer
8 |
9 |
10 | def stack(batch, data_key=None, return_dc=False):
11 | if isinstance(batch[0][data_key], DataContainer):
12 | if batch[0][data_key].stack:
13 | assert isinstance(batch[0][data_key].data, torch.Tensor)
14 | samples = [sample[data_key].data for sample in batch]
15 | return default_collate(samples)
16 |
17 | elif not return_dc:
18 | return [sample[data_key].data for sample in batch]
19 |
20 | else:
21 | return DataContainer([sample[data_key].data for sample in batch])
22 |
23 | else:
24 | return default_collate([sample[data_key] for sample in batch])
25 |
26 |
27 | def collate(batch, trans_dict):
28 | data_keys = batch[0].keys()
29 |
30 | target_width, target_height = trans_dict['input_size']
31 | target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch)
32 |
33 |
34 | for i in range(len(batch)):
35 | target_width, target_height = target_widths[i], target_heights[i]
36 |
37 | if 'meta' in data_keys:
38 | batch[i]['meta'].data['input_size'] = [target_width, target_height]
39 |
40 | channels, height, width = batch[i]['img'].size()
41 | if height == target_height and width == target_width:
42 | continue
43 |
44 | scaled_size = [width, height]
45 |
46 | if trans_dict['align_method'] in ['only_scale', 'scale_and_pad']:
47 | w_scale_ratio = target_width / width
48 | h_scale_ratio = target_height / height
49 | if trans_dict['align_method'] == 'scale_and_pad':
50 | w_scale_ratio = min(w_scale_ratio, h_scale_ratio)
51 | h_scale_ratio = w_scale_ratio
52 |
53 | scaled_size = (int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio)))
54 | if 'meta' in data_keys and 'border_size' in batch[i]['meta'].data:
55 | batch[i]['meta'].data['border_size'] = scaled_size
56 |
57 | scaled_size_hw = (scaled_size[1], scaled_size[0])
58 | batch[i]['img'] = DataContainer(F.interpolate(batch[i]['img'].data.unsqueeze(0),
59 | scaled_size_hw, mode='bilinear', align_corners=True).squeeze(0), stack=True)
60 | if 'labelmap' in data_keys:
61 | labelmap = batch[i]['labelmap'].data.unsqueeze(0).unsqueeze(0).float()
62 | labelmap = F.interpolate(labelmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
63 | batch[i]['labelmap'] = DataContainer(labelmap, stack=True)
64 |
65 | if 'maskmap' in data_keys:
66 | maskmap = batch[i]['maskmap'].data.unsqueeze(0).unsqueeze(0).float()
67 | maskmap = F.interpolate(maskmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
68 | batch[i]['maskmap'].data = DataContainer(maskmap, stack=True)
69 |
70 | pad_width = target_width - scaled_size[0]
71 | pad_height = target_height - scaled_size[1]
72 | assert pad_height >= 0 and pad_width >= 0
73 | if pad_width > 0 or pad_height > 0:
74 | assert trans_dict['align_method'] in ['only_pad', 'scale_and_pad']
75 | left_pad = 0
76 | up_pad = 0
77 | if 'pad_mode' not in trans_dict or trans_dict['pad_mode'] == 'random':
78 | left_pad = random.randint(0, pad_width) # pad_left
79 | up_pad = random.randint(0, pad_height) # pad_up
80 |
81 | elif trans_dict['pad_mode'] == 'pad_left_up':
82 | left_pad = pad_width
83 | up_pad = pad_height
84 |
85 | elif trans_dict['pad_mode'] == 'pad_right_down':
86 | left_pad = 0
87 | up_pad = 0
88 |
89 | elif trans_dict['pad_mode'] == 'pad_center':
90 | left_pad = pad_width // 2
91 | up_pad = pad_height // 2
92 |
93 | elif trans_dict['pad_mode'] == 'pad_border':
94 | if random.randint(0, 1) == 0:
95 | left_pad = pad_width
96 | up_pad = pad_height
97 | else:
98 | left_pad = 0
99 | up_pad = 0
100 | else:
101 | raise ValueError("mode not define")
102 | exit(1)
103 |
104 | pad = (left_pad, pad_width-left_pad, up_pad, pad_height-up_pad)
105 |
106 | batch[i]['img'] = DataContainer(F.pad(batch[i]['img'].data, pad=pad, value=0), stack=batch[i]['img'].stack)
107 |
108 | if 'labelmap' in data_keys:
109 | batch[i]['labelmap'] = DataContainer(F.pad(batch[i]['labelmap'].data, pad=pad, value=-1), stack=batch[i]['labelmap'].stack)
110 |
111 | if 'maskmap' in data_keys:
112 | batch[i]['maskmap'] = DataContainer(F.pad(batch[i]['maskmap'].data, pad=pad, value=0), stack=batch[i]['maskmap'].stack)
113 |
114 | if 'distance_map' in data_keys:
115 | batch[i]['distance_map'] = DataContainer(F.pad(batch[i]['distance_map'].data, pad=pad, value=255), stack=batch[i]['distance_map'].stack)
116 |
117 | if 'angle_map' in data_keys:
118 | batch[i]['angle_map'] = DataContainer(F.pad(batch[i]['angle_map'].data, pad=pad, value=0), stack=batch[i]['angle_map'].stack)
119 |
120 | if 'mask_label_map' in data_keys:
121 | batch[i]['mask_label_map'] = DataContainer(F.pad(batch[i]['mask_label_map'].data, pad=pad, value=-1), stack=batch[i]['mask_label_map'].stack)
122 |
123 | if 'direction_label_map' in data_keys:
124 | batch[i]['direction_label_map'] = DataContainer(F.pad(batch[i]['direction_label_map'].data, pad=pad, value=-1), stack=batch[i]['direction_label_map'].stack)
125 |
126 | if 'multi_label_direction_map' in data_keys:
127 | batch[i]['multi_label_direction_map'] = DataContainer(F.pad(batch[i]['multi_label_direction_map'].data, pad=pad, value=-1), stack=batch[i]['multi_label_direction_map'].stack)
128 |
129 | if 'energy_label_map' in data_keys:
130 | batch[i]['energy_label_map'] = DataContainer(F.pad(batch[i]['energy_label_map'].data, pad=pad, value=-1), stack=batch[i]['energy_label_map'].stack)
131 |
132 | if 'offsetmap_h' in data_keys:
133 | batch[i]['offsetmap_h'] = DataContainer(F.pad(batch[i]['offsetmap_h'].data, pad=pad, value=0), stack=batch[i]['offsetmap_h'].stack)
134 |
135 | if 'offsetmap_w' in data_keys:
136 | batch[i]['offsetmap_w'] = DataContainer(F.pad(batch[i]['offsetmap_w'].data, pad=pad, value=0), stack=batch[i]['offsetmap_w'].stack)
137 |
138 | return dict({key: stack(batch, data_key=key) for key in data_keys})
139 |
140 |
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/core/data/datasets/images/seg_data_tools/transforms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from PIL import Image
4 |
5 |
6 | class Normalize(object):
7 | """Normalize a ``torch.tensor``
8 |
9 | Args:
10 | inputs (torch.tensor): tensor to be normalized.
11 | mean: (list): the mean of RGB
12 | std: (list): the std of RGB
13 |
14 | Returns:
15 | Tensor: Normalized tensor.
16 | """
17 | def __init__(self, div_value, mean, std):
18 | self.div_value = div_value
19 | self.mean = mean
20 | self.std =std
21 |
22 | def __call__(self, inputs):
23 | inputs = inputs.div(self.div_value)
24 | for t, m, s in zip(inputs, self.mean, self.std):
25 | t.sub_(m).div_(s)
26 |
27 | return inputs
28 |
29 |
30 | class DeNormalize(object):
31 | """DeNormalize a ``torch.tensor``
32 |
33 | Args:
34 | inputs (torch.tensor): tensor to be normalized.
35 | mean: (list): the mean of RGB
36 | std: (list): the std of RGB
37 |
38 | Returns:
39 | Tensor: Normalized tensor.
40 | """
41 | def __init__(self, div_value, mean, std):
42 | self.div_value = div_value
43 | self.mean = mean
44 | self.std =std
45 |
46 | def __call__(self, inputs):
47 | result = inputs.clone()
48 | for i in range(result.size(0)):
49 | result[i, :, :] = result[i, :, :] * self.std[i] + self.mean[i]
50 |
51 | return result.mul_(self.div_value)
52 |
53 |
54 | class ToTensor(object):
55 | """Convert a ``numpy.ndarray or Image`` to tensor.
56 |
57 | See ``ToTensor`` for more details.
58 |
59 | Args:
60 | inputs (numpy.ndarray or Image): Image to be converted to tensor.
61 |
62 | Returns:
63 | Tensor: Converted image.
64 | """
65 | def __call__(self, inputs):
66 | if isinstance(inputs, Image.Image):
67 | channels = len(inputs.mode)
68 | inputs = np.array(inputs)
69 | inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], channels)
70 | inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
71 | else:
72 | inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
73 |
74 | return inputs.float()
75 |
76 |
77 | class ToLabel(object):
78 | def __call__(self, inputs):
79 | return torch.from_numpy(np.array(inputs)).long()
80 |
81 |
82 | class ReLabel(object):
83 | """
84 | 255 indicate the background, relabel 255 to some value.
85 | """
86 | def __init__(self, olabel, nlabel):
87 | self.olabel = olabel
88 | self.nlabel = nlabel
89 |
90 | def __call__(self, inputs):
91 | assert isinstance(inputs, torch.LongTensor), 'tensor needs to be LongTensor'
92 |
93 | inputs[inputs == self.olabel] = self.nlabel
94 | return inputs
95 |
96 |
97 | class Compose(object):
98 |
99 | def __init__(self, transforms):
100 | self.transforms = transforms
101 |
102 | def __call__(self, inputs):
103 | for t in self.transforms:
104 | inputs = t(inputs)
105 |
106 | return inputs
--------------------------------------------------------------------------------
/core/data/test_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .images.reid_dataset import ReIDTestDataset, ReIDTestDatasetDev
2 |
3 | def dataset_entry(config):
4 | # print('config[kwargs]',config['kwargs'])
5 | return globals()[config['type']](**config['kwargs'])
6 |
--------------------------------------------------------------------------------
/core/data/transforms/peddet_transforms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | """
3 | COCO dataset which returns image_id for evaluation.
4 |
5 | Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
6 | """
7 | from pathlib import Path
8 | import os.path as osp
9 | import json
10 | import torch
11 | import torch.utils.data
12 | import torchvision
13 | import core.data.transforms.peddet_transforms_helpers.transforms as T
14 | import cv2
15 | cv2.ocl.setUseOpenCL(False)
16 |
17 | class PedestrainDetectionAugmentation(object):
18 | def __init__(self, phase, vit=False, maxsize=1333):
19 | if vit:
20 | normalize = T.Compose([
21 | T.PILToTensor(),
22 | T.Normalize([0., 0., 0.], [1., 1., 1.])
23 | ])
24 | else:
25 | normalize = T.Compose([
26 | T.ToTensor(),
27 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
28 | ])
29 |
30 | scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
31 |
32 | if phase == 'train':
33 | self.transformer = T.Compose([
34 | T.RandomHorizontalFlip(),
35 | T.RandomSelect(
36 | T.RandomResize(scales, max_size=maxsize),
37 | T.Compose([
38 | T.RandomResize([400, 500, 600]),
39 | T.RandomSizeCrop(384, 600),
40 | T.RandomResize(scales, max_size=maxsize),
41 | ])
42 | ),
43 | normalize,
44 | ])
45 | elif phase == 'val':
46 | self.transformer = T.Compose([
47 | T.RandomResize([800], max_size=maxsize),
48 | normalize,
49 | ])
50 | else:
51 | raise NotImplementedError
52 |
53 | def __call__(self, image, target):
54 | return self.transformer(image, target)
55 |
56 | class PedestrainDetectionAugmentationCal(object):
57 | def __init__(self, phase, vit=False, maxsize=640):
58 | if vit:
59 | normalize = T.Compose([
60 | T.PILToTensor(),
61 | T.Normalize([0., 0., 0.], [1., 1., 1.])
62 | ])
63 | else:
64 | normalize = T.Compose([
65 | T.ToTensor(),
66 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
67 | ])
68 |
69 | # scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
70 |
71 | if phase == 'train':
72 | self.transformer = T.Compose([
73 | T.RandomHorizontalFlip(),
74 | # T.RandomSelect(
75 | # # T.RandomResize(scales, max_size=maxsize),
76 | # T.Compose([
77 | # # T.RandomResize([400, 500, 600]),
78 | # # T.RandomSizeCrop(384, 600),
79 | # # T.RandomResize(scales, max_size=maxsize),
80 | # ])
81 | # ),
82 | normalize,
83 | ])
84 | elif phase == 'val':
85 | self.transformer = T.Compose([
86 | # T.RandomResize([800], max_size=maxsize),
87 | normalize,
88 | ])
89 | else:
90 | raise NotImplementedError
91 |
92 | def __call__(self, image, target):
93 | return self.transformer(image, target)
94 |
--------------------------------------------------------------------------------
/core/data/transforms/peddet_transforms_helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/data/transforms/peddet_transforms_helpers/__init__.py
--------------------------------------------------------------------------------
/core/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | class MCReadFailException(Exception):
3 | pass
4 |
5 | class NoneImageException(Exception):
6 | pass
7 |
--------------------------------------------------------------------------------
/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .opt import *
2 | from .scaler import *
3 | from .amp import *
4 | from . import utils
5 |
--------------------------------------------------------------------------------
/core/fp16/amp.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import functools
3 | import itertools
4 |
5 | import torch
6 |
7 | from . import utils, wrap
8 |
9 | __all__ = ['half_function', 'float_function',
10 | 'register_half_function', 'register_float_function',
11 | 'register_float_module', 'init', 'reset']
12 |
13 | _DECORATOR_HANDLE = None
14 | _USER_CAST_REGISTRY = set()
15 | _USER_FLOAT_MODULE = set()
16 | _ORIGINAL_MODULE_HALF = None
17 |
18 | def _decorator_helper(orig_fn, cast_fn, wrap_fn):
19 | def wrapper(*args, **kwargs):
20 | handle = _DECORATOR_HANDLE
21 | if handle is None or not handle.is_active():
22 | return orig_fn(*args, **kwargs)
23 | inner_cast_fn = utils.verbosify(cast_fn, orig_fn.__name__,
24 | handle.verbose)
25 | return wrap_fn(orig_fn, inner_cast_fn, handle)(*args, **kwargs)
26 | return wrapper
27 |
28 | # Decorator form
29 | def half_function(fn):
30 | wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=True)
31 | return _decorator_helper(fn, utils.maybe_half, wrap_fn)
32 |
33 | def float_function(fn):
34 | wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=False)
35 | return _decorator_helper(fn, utils.maybe_float, wrap_fn)
36 |
37 | # Registry form
38 | def register_half_function(module, name):
39 | if not hasattr(module, name):
40 | raise ValueError('No function named {} in module {}.'.format(
41 | name, module))
42 | _USER_CAST_REGISTRY.add((module, name, utils.maybe_half))
43 |
44 | def register_float_function(module, name):
45 | if not hasattr(module, name):
46 | raise ValueError('No function named {} in module {}.'.format(
47 | name, module))
48 | _USER_CAST_REGISTRY.add((module, name, utils.maybe_float))
49 |
50 | def register_float_module(module, cast_args=True):
51 | if not issubclass(module, torch.nn.modules.module.Module):
52 | raise ValueError('{} is not a torch Module'.format(module))
53 |
54 | if cast_args:
55 | register_float_function(module, 'forward')
56 |
57 | _USER_FLOAT_MODULE.add(module)
58 |
59 | class AmpHandle(object):
60 | def __init__(self, enable_caching=True, verbose=False):
61 | self._enable_caching = enable_caching
62 | self._verbose = verbose
63 | self._cache = dict()
64 | self._is_active = True
65 | self._all_wrappers = []
66 |
67 | def is_active(self):
68 | return self._is_active
69 |
70 | @contextlib.contextmanager
71 | def _disable_casts(self):
72 | self._is_active = False
73 | yield
74 | self._is_active = True
75 |
76 | def _clear_cache(self):
77 | self._cache.clear()
78 |
79 | # Experimental support for saving / restoring uncasted versions of functions
80 | def _save_func(self, mod, fn, func):
81 | self._all_wrappers.append((mod, fn, func))
82 |
83 | def _deactivate(self):
84 | for mod, fn, func in self._all_wrappers:
85 | utils.set_func(mod, fn, func)
86 | self._all_wrappers = []
87 |
88 | @property
89 | def has_cache(self):
90 | return self._enable_caching
91 |
92 | @property
93 | def cache(self):
94 | return self._cache
95 |
96 | def remove_cache(self, param):
97 | if self.has_cache and param in self.cache:
98 | del self.cache[param]
99 |
100 | @property
101 | def verbose(self):
102 | return self._verbose
103 |
104 | def _half_helper(verbose=False):
105 | def _half_wrapper(self):
106 | for module in self.children():
107 | module.half()
108 |
109 | if self.__class__ in _USER_FLOAT_MODULE:
110 | if verbose:
111 | print('Skip half convert for {}'.format(self.__class__))
112 | return self
113 |
114 | fn = lambda t: t.half() if t.is_floating_point() else t
115 | for param in self._parameters.values():
116 | if param is not None:
117 | # Tensors stored in modules are graph leaves, and we don't
118 | # want to create copy nodes, so we have to unpack the data.
119 | param.data = fn(param.data)
120 | if param._grad is not None:
121 | param._grad.data = fn(param._grad.data)
122 |
123 | for key, buf in self._buffers.items():
124 | if buf is not None:
125 | self._buffers[key] = fn(buf)
126 |
127 | return self
128 | return _half_wrapper
129 |
130 | def init(enable_caching=True, verbose=False):
131 | global _DECORATOR_HANDLE
132 | global _ORIGINAL_MODULE_HALF
133 |
134 | handle = AmpHandle(enable_caching, verbose)
135 |
136 | if len(_USER_FLOAT_MODULE) > 0:
137 | _ORIGINAL_MODULE_HALF = torch.nn.modules.module.Module.half
138 | utils.set_func(torch.nn.modules.module.Module, 'half',
139 | _half_helper(verbose))
140 |
141 | # Force-{fp16, fp32} for user-annotated functions
142 | for mod, fn, cast_fn in _USER_CAST_REGISTRY:
143 | try_caching = (cast_fn == utils.maybe_half)
144 | wrap.cached_cast(mod, fn, cast_fn, handle,
145 | try_caching, verbose)
146 | _USER_CAST_REGISTRY.clear()
147 |
148 | _DECORATOR_HANDLE = handle
149 | return handle
150 |
151 | def _clear_cache():
152 | handle = _DECORATOR_HANDLE
153 | if handle is None or not handle.is_active():
154 | return
155 | handle._clear_cache()
156 |
157 | def reset():
158 | handle = _DECORATOR_HANDLE
159 | if handle is None or not handle.is_active():
160 | return
161 | handle._deactivate()
162 | utils.set_func(torch.nn.modules.module.Module, 'half', _ORIGINAL_MODULE_HALF)
--------------------------------------------------------------------------------
/core/fp16/scaler.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch._six import inf
3 |
4 | from .utils import iter_params
5 |
6 | __all__ = ['scale_check_overflow', 'LossScaler']
7 |
8 | # from apex_C import scale_check_overflow
9 |
10 | # Python stopgap, until we get a future-proof kernel into upstream
11 | def scale_check_overflow(d_grads, scale):
12 | any_infinite = ((d_grads != d_grads) | (d_grads.abs() == inf)).any()
13 | if any_infinite:
14 | return True
15 | d_grads.mul_(scale)
16 | return False
17 |
18 | class LossScaler(object):
19 | def __init__(self, scale=1.0, dynamic=False):
20 | self._dynamic = dynamic
21 | self._loss_scale = 2.**16 if self._dynamic else scale
22 | self._max_loss_scale = 2.**24
23 | self._scale_seq_len = 2000
24 | self._unskipped = 0
25 | self._has_overflow = False
26 |
27 | @property
28 | def loss_scale(self):
29 | return self._loss_scale
30 |
31 | @property
32 | def has_overflow(self):
33 | return self._has_overflow
34 |
35 | def unscale_and_update(self, param_groups, scale):
36 | if not self._dynamic:
37 | for p in iter_params(param_groups):
38 | if p.grad is not None:
39 | p.grad.data.mul_(1. / scale)
40 | return
41 |
42 | self._has_overflow = False
43 | for p in iter_params(param_groups):
44 | if p.grad is not None:
45 | self._has_overflow = scale_check_overflow(p.grad.data,
46 | 1. / scale)
47 | if self._has_overflow:
48 | break
49 |
50 | # if self._overflow_buf.any():
51 | if self._has_overflow:
52 | should_skip = True
53 | self._loss_scale /= 2.
54 | self._unskipped = 0
55 | else:
56 | should_skip = False
57 | self._unskipped += 1
58 |
59 | if self._unskipped == self._scale_seq_len:
60 | self._loss_scale = min(self._max_loss_scale, self._loss_scale * 2.)
61 | self._unskipped = 0
62 |
63 | return should_skip
64 |
65 | def backward(self, loss):
66 | scaled_loss = loss*self.loss_scale
67 | scaled_loss.backward()
68 |
--------------------------------------------------------------------------------
/core/fp16/wrap.py:
--------------------------------------------------------------------------------
1 | from . import utils
2 |
3 | import functools
4 |
5 | import torch
6 |
7 | def make_cast_wrapper(orig_fn, cast_fn, handle,
8 | try_caching=False):
9 | @functools.wraps(orig_fn)
10 | def wrapper(*args, **kwargs):
11 | if not handle.is_active():
12 | return orig_fn(*args, **kwargs)
13 |
14 | input_types = [
15 | v.data.type() for v in list(args) + list(kwargs.values())
16 | if utils.is_fp_tensor(v)
17 | ]
18 | #print('wrapper: orig_fn:{}, input_types:{}'.format(orig_fn, input_types))
19 | input_type = input_types[0]
20 |
21 | if try_caching and handle.has_cache:
22 | args = list(args)
23 | for i in range(len(args)):
24 | if utils.should_cache(args[i]):
25 | args[i] = utils.cached_cast(cast_fn, args[i], handle.cache)
26 | for k in kwargs:
27 | if utils.should_cache(kwargs[k]):
28 | kwargs[k] = utils.cached_cast(cast_fn, kwargs[k], handle.cache)
29 | new_args = utils.casted_args(cast_fn,
30 | args,
31 | kwargs)
32 | output = orig_fn(*new_args, **kwargs)
33 |
34 | #if output.type() != input_type:
35 | # print('ori output type: {}, input type: {}'.format(output.type(), input_type))
36 | # return output.type(input_type)
37 | #return output
38 | return cast_output(output, input_type, verbose=False)
39 |
40 | return wrapper
41 |
42 | def cast_output(output, input_type, verbose=False):
43 | if isinstance(output, dict):
44 | keys = output.keys()
45 | for k in keys:
46 | output[k] = cast_output(output[k], input_type)
47 | return output
48 |
49 | if utils.is_fp_tensor(output) and output.type() != input_type:
50 | if verbose:
51 | print('ori output type: {}, input type: {}'.format(output.type(), input_type))
52 | return output.type(input_type)
53 | return output
54 |
55 | def cached_cast(mod, fn, cast_fn, handle,
56 | try_caching=False, verbose=False):
57 | if not utils.has_func(mod, fn):
58 | return
59 |
60 | orig_fn = utils.get_func(mod, fn)
61 | cast_fn = utils.verbosify(cast_fn, fn, verbose)
62 | wrapper = make_cast_wrapper(orig_fn, cast_fn, handle, try_caching)
63 | utils.set_func_save(handle, mod, fn, wrapper)
64 |
65 |
--------------------------------------------------------------------------------
/core/lr_scheduler/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import (StepLRScheduler, CosineLRScheduler, WarmupCosineLRScheduler, WarmupPolyLRScheduler,
2 | HTLTCosineLRScheduler, HTLTDualCosineLRScheduler, LinearLRScheduler)
3 |
4 | def lr_scheduler_entry(config):
5 | return globals()[config['type']+'LRScheduler'](**config['kwargs'])
6 |
--------------------------------------------------------------------------------
/core/make_param_group.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import shutil
3 | import torch
4 | import os
5 | import io
6 | import logging
7 | from collections import defaultdict
8 |
9 |
10 | from torch.nn import BatchNorm2d
11 |
12 | def param_group_no_wd(model):
13 | pgroup_no_wd = []
14 | names_no_wd = []
15 | pgroup_normal = []
16 |
17 | type2num = defaultdict(lambda : 0)
18 | for name,m in model.named_modules():
19 | if isinstance(m, torch.nn.Conv2d):
20 | if m.bias is not None:
21 | pgroup_no_wd.append(m.bias)
22 | names_no_wd.append(name+'.bias')
23 | type2num[m.__class__.__name__+'.bias'] += 1
24 | elif isinstance(m, torch.nn.Linear):
25 | if m.bias is not None:
26 | pgroup_no_wd.append(m.bias)
27 | names_no_wd.append(name+'.bias')
28 | type2num[m.__class__.__name__+'.bias'] += 1
29 | elif isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm1d):
30 | if m.weight is not None:
31 | pgroup_no_wd.append(m.weight)
32 | names_no_wd.append(name+'.weight')
33 | type2num[m.__class__.__name__+'.weight'] += 1
34 | if m.bias is not None:
35 | pgroup_no_wd.append(m.bias)
36 | names_no_wd.append(name+'.bias')
37 | type2num[m.__class__.__name__+'.bias'] += 1
38 |
39 | for name,p in model.named_parameters():
40 | if not name in names_no_wd:
41 | pgroup_normal.append(p)
42 |
43 | return [{'params': pgroup_normal}, {'params': pgroup_no_wd, 'weight_decay': 0.0}], type2num
44 |
45 | def param_group_fc(model):
46 | logits_w_id = id(model.module.logits.weight)
47 | fc_group = []
48 | normal_group = []
49 | for p in model.parameters():
50 | if id(p) == logits_w_id:
51 | fc_group.append(p)
52 | else:
53 | normal_group.append(p)
54 | param_group = [{'params': fc_group}, {'params': normal_group}]
55 |
56 | return param_group
57 |
58 | def param_group_multitask(model):
59 | backbone_group = []
60 | neck_group = []
61 | decoder_group = []
62 | other_group = []
63 | for name, p in model.named_parameters():
64 | if 'module.backbone_module' in name:
65 | backbone_group.append(p)
66 | elif 'module.neck_module' in name:
67 | neck_group.append(p)
68 | elif 'module.decoder_module' in name:
69 | decoder_group.append(p)
70 | else:
71 | other_group.append(p)
72 |
73 | if len(other_group) > 0:
74 | param_group = [{'params': backbone_group}, {'params': neck_group}, \
75 | {'params': decoder_group}, {'params', other_group}]
76 | else:
77 | param_group = [{'params': backbone_group}, {'params': neck_group}, \
78 | {'params': decoder_group}]
79 | return param_group
80 |
--------------------------------------------------------------------------------
/core/memory.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 |
3 | import logging
4 | from contextlib import contextmanager
5 | from functools import wraps
6 | import torch
7 |
8 | __all__ = ["retry_if_cuda_oom"]
9 |
10 |
11 | @contextmanager
12 | def _ignore_torch_cuda_oom():
13 | """
14 | A context which ignores CUDA OOM exception from pytorch.
15 | """
16 | try:
17 | yield
18 | except RuntimeError as e:
19 | # NOTE: the string may change?
20 | if "CUDA out of memory. " in str(e):
21 | pass
22 | else:
23 | raise
24 |
25 |
26 | def retry_if_cuda_oom(func):
27 | """
28 | Makes a function retry itself after encountering
29 | pytorch's CUDA OOM error.
30 | It will first retry after calling `torch.cuda.empty_cache()`.
31 |
32 | If that still fails, it will then retry by trying to convert inputs to CPUs.
33 | In this case, it expects the function to dispatch to CPU implementation.
34 | The return values may become CPU tensors as well and it's user's
35 | responsibility to convert it back to CUDA tensor if needed.
36 |
37 | Args:
38 | func: a stateless callable that takes tensor-like objects as arguments
39 |
40 | Returns:
41 | a callable which retries `func` if OOM is encountered.
42 |
43 | Examples:
44 | ::
45 | output = retry_if_cuda_oom(some_torch_function)(input1, input2)
46 | # output may be on CPU even if inputs are on GPU
47 |
48 | Note:
49 | 1. When converting inputs to CPU, it will only look at each argument and check
50 | if it has `.device` and `.to` for conversion. Nested structures of tensors
51 | are not supported.
52 |
53 | 2. Since the function might be called more than once, it has to be
54 | stateless.
55 | """
56 |
57 | def maybe_to_cpu(x):
58 | try:
59 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
60 | except AttributeError:
61 | like_gpu_tensor = False
62 | if like_gpu_tensor:
63 | return x.to(device="cpu")
64 | else:
65 | return x
66 |
67 | @wraps(func)
68 | def wrapped(*args, **kwargs):
69 | with _ignore_torch_cuda_oom():
70 | return func(*args, **kwargs)
71 |
72 | # Clear cache and retry
73 | torch.cuda.empty_cache()
74 | with _ignore_torch_cuda_oom():
75 | return func(*args, **kwargs)
76 |
77 | # Try on CPU. This slows down the code significantly, therefore print a notice.
78 | logger = logging.getLogger(__name__)
79 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
80 | new_args = (maybe_to_cpu(x) for x in args)
81 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
82 | return func(*new_args, **new_kwargs)
83 |
84 | return wrapped
85 |
--------------------------------------------------------------------------------
/core/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/__init__.py
--------------------------------------------------------------------------------
/core/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vit import vit_base_patch16
2 |
3 | def backbone_entry(config):
4 | return globals()[config['type']](**config['kwargs'])
5 |
--------------------------------------------------------------------------------
/core/models/decoders/__init__.py:
--------------------------------------------------------------------------------
1 | from .network import AIOHead
2 |
3 | def decoder_entry(config):
4 | return globals()[config['type']](**config['kwargs'])
5 |
--------------------------------------------------------------------------------
/core/models/decoders/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification_losses import MarginCosineProductLoss
2 | from .classification_losses import ArcFaceLoss, Softmax_TripletLoss, Softmax_TripletLoss_wBN
3 | from .seg_losses import FSAuxCELoss, FocalDiceLoss, FocalDiceLoss_bce_cls_emb, FocalDiceLoss_bce_cls_emb_sample_weight
4 | from .pos_losses import BasePosLoss, POS_FocalDiceLoss_bce_cls_emb
5 | from .peddet_losses import DetFocalDiceLoss, DetFocalDiceLoss_hybrid
6 | from .pedattr_losses import CEL_Sigmoid
7 |
8 | def loss_entry(config):
9 | return globals()[config['type']](**config['kwargs'])
10 |
--------------------------------------------------------------------------------
/core/models/decoders/losses/pedattr_losses.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import numpy as np
8 |
9 |
10 |
11 | __all__ = ['CEL_Sigmoid']
12 |
13 | def ratio2weight(targets, ratio):
14 | ratio = torch.from_numpy(ratio).type_as(targets)
15 | pos_weights = targets * (1 - ratio)
16 | neg_weights = (1 - targets) * ratio
17 | weights = torch.exp(neg_weights + pos_weights)
18 |
19 | # for RAP dataloader, targets element may be 2, with or without smooth, some element must great than 1
20 | weights[targets > 1] = 0.0
21 |
22 | return weights
23 |
24 | class CEL_Sigmoid(nn.Module):
25 | def __init__(self, sample_weight=None, size_average=True, cfg=None):
26 | super(CEL_Sigmoid, self).__init__()
27 |
28 | self.sample_weight = sample_weight
29 |
30 | if sample_weight is not None:
31 | self.sample_weight = np.array(self.sample_weight)
32 |
33 | self.size_average = size_average
34 |
35 | def forward(self, input_var):
36 | logits = input_var['logit']
37 | targets = input_var['label']
38 | batch_size = logits.shape[0]
39 |
40 | weight_mask = (targets != -1) # mask -1 labels from HARDHC dataset
41 | loss = F.binary_cross_entropy_with_logits(logits, targets, weight=weight_mask, reduction='none')
42 |
43 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1))
44 | if self.sample_weight is not None:
45 | weight = ratio2weight(targets_mask, self.sample_weight)
46 | loss = (loss * weight.cuda())
47 |
48 | loss = loss.sum() / batch_size if self.size_average else loss.sum()
49 |
50 | output = {'loss': loss, 'top1': torch.Tensor([0]).cuda()}
51 |
52 | return output
53 |
54 | def __repr__(self):
55 | return self.__class__.__name__ + '(' \
56 | + 'sample_weight=' + str(self.sample_weight) \
57 | + ', size_average=' + str(self.size_average) + ')'
58 |
--------------------------------------------------------------------------------
/core/models/decoders/losses/peddet_losses.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from .matcher import DetectionHungarianMatcher
6 | from .criterion import DetSetCriterion
7 |
8 | class DetFocalDiceLoss(nn.Module):
9 | def __init__(self, cfg):
10 | super(DetFocalDiceLoss, self).__init__()
11 | matcher = DetectionHungarianMatcher(
12 | cost_class=cfg.class_weight,
13 | cost_bbox=cfg.bbox_weight,
14 | cost_giou=cfg.giou_weight,
15 | )
16 |
17 | weight_dict = {"loss_ce": cfg.class_weight,
18 | "loss_bbox": cfg.bbox_weight,
19 | "loss_giou": cfg.giou_weight}
20 |
21 | if cfg.deep_supervision:
22 | aux_weight_dict = {}
23 | for i in range(cfg.dec_layers-1):
24 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...}
25 | aux_weight_dict.update({k + f'_enc': v for k, v in weight_dict.items()})
26 | weight_dict.update(aux_weight_dict)
27 |
28 |
29 |
30 | self.fd_loss = DetSetCriterion(
31 | cfg.num_classes,
32 | ginfo=cfg.ginfo,
33 | matcher=matcher,
34 | weight_dict=weight_dict,
35 | losses=["labels", "boxes"],
36 | focal_alpha=cfg.focal_alpha,
37 | ign_thr=cfg.ign_thr,
38 | )
39 |
40 | self.cfg = cfg
41 |
42 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx}
43 | losses = self.fd_loss(outputs, targets)
44 | for k in list(losses.keys()):
45 | if k in self.fd_loss.weight_dict:
46 | losses[k] *= self.fd_loss.weight_dict[k]
47 | elif 'loss' in k:
48 | # remove this loss if not specified in `weight_dict`
49 | losses.pop(k)
50 | return losses
51 |
52 |
53 | class DetFocalDiceLoss_hybrid(DetFocalDiceLoss):
54 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx}
55 | multi_targets = copy.deepcopy(targets)
56 | losses = self.fd_loss(outputs, targets)
57 |
58 | for target in multi_targets:
59 | target["boxes"] = target["boxes"].repeat(self.cfg.k_one2many, 1)
60 | target["labels"] = target["labels"].repeat(self.cfg.k_one2many)
61 | assert len(target["iscrowd"].shape) == 1, f"len(target['iscrowd'].shape) == 1: {len(target['iscrowd'].shape) == 1}"
62 | target["iscrowd"] = target["iscrowd"].repeat(self.cfg.k_one2many)
63 | outputs_one2many = dict()
64 | outputs_one2many["pred_logits"] = outputs["pred_logits_one2many"]
65 | outputs_one2many["pred_boxes"] = outputs["pred_boxes_one2many"]
66 | outputs_one2many["aux_outputs"] = outputs["aux_outputs_one2many"]
67 | outputs_one2many["mask"] = outputs["mask"]
68 | losses_one2many = self.fd_loss(outputs_one2many, multi_targets)
69 |
70 |
71 | for k in list(losses.keys()):
72 | if k in self.fd_loss.weight_dict:
73 | losses[k] *= self.fd_loss.weight_dict[k]
74 | elif 'loss' in k:
75 | # remove this loss if not specified in `weight_dict`
76 | losses.pop(k)
77 | for k in list(losses_one2many.keys()): # repeat
78 | if k in self.fd_loss.weight_dict:
79 | losses_one2many[k] *= self.fd_loss.weight_dict[k]
80 | elif 'loss' in k:
81 | losses_one2many.pop(k)
82 |
83 | for key, value in losses_one2many.items():
84 | if key + "_one2many" in losses.keys():
85 | losses[key + "_one2many"] += value * self.cfg.get('lambda_one2many', 1)
86 | else:
87 | losses[key + "_one2many"] = value * self.cfg.get('lambda_one2many', 1)
88 |
89 | return losses
--------------------------------------------------------------------------------
/core/models/decoders/losses/pos_losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .matcher import HungarianMatcher, DirectMatcher, RedundantQMatcher, POSDirectMatcher
5 | from .criterion import SetCriterion, POSSetCriterion
6 |
7 | class BasePosLoss(nn.Module):
8 | def __init__(self, target_type, use_target_weight=True, cfg=None):
9 | super(BasePosLoss, self).__init__()
10 | self.criterion = nn.MSELoss()
11 |
12 | self.target_type = target_type
13 | self.use_target_weight = use_target_weight
14 |
15 | self.cfg = cfg
16 |
17 | def get_loss(self, num_joints, heatmaps_pred, heatmaps_gt, target_weight):
18 | loss = 0.
19 | for idx in range(num_joints):
20 | heatmap_pred = heatmaps_pred[idx].squeeze(1)
21 | heatmap_gt = heatmaps_gt[idx].squeeze(1)
22 | if self.use_target_weight:
23 | loss += self.criterion(heatmap_pred * target_weight[:, idx],
24 | heatmap_gt * target_weight[:, idx])
25 | else:
26 | loss += self.criterion(heatmap_pred, heatmap_gt)
27 | return loss
28 |
29 | def forward(self, outputs, target, target_weight): # {"aux_outputs": xx, 'xx': xx}
30 | """Forward function."""
31 | output = outputs['pred_masks'] # {'pred_logits':'pred_masks':}
32 |
33 | batch_size = output.size(0)
34 | num_joints = output.size(1)
35 |
36 | heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
37 | heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
38 |
39 | loss = self.get_loss(num_joints, heatmaps_pred, heatmaps_gt, target_weight)
40 |
41 | # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
42 | if "aux_outputs" in outputs and self.cfg.get('aux_loss', True):
43 | for aux_outputs in outputs["aux_outputs"]:
44 | heatmaps_pred = aux_outputs['pred_masks'].reshape((batch_size, num_joints, -1)).split(1, 1)
45 |
46 | loss = loss + self.get_loss(num_joints, heatmaps_pred, heatmaps_gt, target_weight)
47 |
48 | return loss / num_joints
49 |
50 | class POS_FocalDiceLoss_bce_cls_emb(nn.Module):
51 | def __init__(self, target_type, use_target_weight=True, cfg=None):
52 | super(POS_FocalDiceLoss_bce_cls_emb, self).__init__()
53 | self.target_type = target_type
54 | self.use_target_weight = use_target_weight
55 |
56 | matcher = POSDirectMatcher()
57 |
58 | weight_dict = {"loss_bce_pos": cfg.class_weight,
59 | "loss_mask_pos": cfg.mask_weight,
60 | }
61 |
62 | if cfg.get('deep_supervision', False):
63 | aux_weight_dict = {}
64 | for i in range(cfg.dec_layers):
65 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...}
66 | weight_dict.update(aux_weight_dict)
67 |
68 | self.fd_loss = POSSetCriterion(
69 | cfg.num_classes,
70 | ginfo=cfg.ginfo,
71 | matcher=matcher,
72 | weight_dict=weight_dict,
73 | losses=[
74 | "pos_mask",
75 | "pos_bce_labels",
76 | ],
77 | eos_coef=cfg.get('eos_coef', 0.1),
78 | aux=cfg.get('deep_supervision', False),
79 | ignore_blank=cfg.get('ignore_blank', True),
80 | sample_weight=cfg.get('sample_weight', None)
81 | )
82 |
83 | self.cfg = cfg
84 |
85 | def forward(self, outputs, targets, target_weight, **kwargs): # {"aux_outputs": xx, 'xx': xx}
86 | losses = self.fd_loss(outputs, targets)
87 |
88 | for k in list(losses.keys()):
89 | if k in self.fd_loss.weight_dict:
90 | losses[k] *= self.fd_loss.weight_dict[k]
91 | else:
92 | # remove this loss if not specified in `weight_dict`
93 | losses.pop(k)
94 |
95 | return losses
96 |
--------------------------------------------------------------------------------
/core/models/decoders/losses/seg_losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from .matcher import HungarianMatcher, DirectMatcher, RedundantQMatcher
5 | from .criterion import SetCriterion
6 |
7 |
8 | class FSCELoss(nn.Module):
9 | def __init__(self, configer=None, **kwargs):
10 | super(FSCELoss, self).__init__()
11 | self.configer = configer
12 | weight = None
13 | if 'ce_weight' in self.configer:
14 | weight = self.configer['ce_weight']
15 | weight = torch.FloatTensor(weight).cuda()
16 |
17 | reduction = 'elementwise_mean'
18 | if 'ce_reduction' in self.configer:
19 | reduction = self.configer['ce_reduction']
20 |
21 | ignore_index = -1
22 | if 'ce_ignore_index' in self.configer:
23 | ignore_index = self.configer['ce_ignore_index']
24 |
25 | self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction)
26 |
27 | def forward(self, inputs, *targets, weights=None, **kwargs):
28 | if isinstance(inputs, tuple) or isinstance(inputs, list):
29 | if weights is None:
30 | weights = [1.0] * len(inputs)
31 |
32 | for i in range(len(inputs)):
33 | if i == 0:
34 | if len(targets) > 1:
35 | target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3)))
36 | loss = weights[i] * self.ce_loss(inputs[i], target)
37 | else:
38 | target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3)))
39 | loss = weights[i] * self.ce_loss(inputs[i], target)
40 | else:
41 | if len(targets) > 1:
42 | target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3)))
43 | loss += weights[i] * self.ce_loss(inputs[i], target)
44 | else:
45 | target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3)))
46 | loss += weights[i] * self.ce_loss(inputs[i], target)
47 |
48 | else:
49 | target = self._scale_target(targets[0], (inputs.size(2), inputs.size(3)))
50 | loss = self.ce_loss(inputs, target)
51 |
52 | return loss
53 |
54 | @staticmethod
55 | def _scale_target(targets_, scaled_size):
56 | targets = targets_.clone().unsqueeze(1).float()
57 | targets = F.interpolate(targets, size=scaled_size, mode='nearest')
58 | return targets.squeeze(1).long()
59 |
60 |
61 | class FSAuxCELoss(nn.Module):
62 | def __init__(self, configer=None):
63 | super(FSAuxCELoss, self).__init__()
64 | self.configer = configer
65 | self.ce_loss = FSCELoss(self.configer)
66 |
67 | def forward(self, inputs, targets, **kwargs):
68 | aux_out, seg_out = inputs
69 | seg_loss = self.ce_loss(seg_out, targets)
70 | aux_loss = self.ce_loss(aux_out, targets)
71 | loss = self.configer['loss_weights']['seg_loss'] * seg_loss
72 | loss = loss + self.configer['loss_weights']['aux_loss'] * aux_loss
73 | return loss
74 |
75 |
76 | class FocalDiceLoss(nn.Module):
77 | def __init__(self, cfg):
78 | super(FocalDiceLoss, self).__init__()
79 | matcher = HungarianMatcher(
80 | cost_class=cfg.class_weight,
81 | cost_mask=cfg.mask_weight,
82 | cost_dice=cfg.dice_weight,
83 | num_points=cfg.num_points,
84 | )
85 |
86 | weight_dict = {"loss_ce": cfg.class_weight,
87 | "loss_mask": cfg.mask_weight,
88 | "loss_dice": cfg.dice_weight}
89 |
90 | if cfg.deep_supervision:
91 | aux_weight_dict = {}
92 | for i in range(cfg.dec_layers):
93 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...}
94 | weight_dict.update(aux_weight_dict)
95 |
96 | self.fd_loss = SetCriterion(
97 | cfg.num_classes,
98 | ginfo=cfg.ginfo,
99 | matcher=matcher,
100 | weight_dict=weight_dict,
101 | eos_coef=cfg.no_object_weight,
102 | losses=["labels", "masks"],
103 | num_points=cfg.num_points,
104 | oversample_ratio=cfg.oversample_ratio,
105 | importance_sample_ratio=cfg.importance_sample_ratio,
106 | )
107 |
108 | self.cfg = cfg
109 |
110 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx}
111 | losses = self.fd_loss(outputs, targets)
112 |
113 | for k in list(losses.keys()):
114 | if k in self.fd_loss.weight_dict:
115 | losses[k] *= self.fd_loss.weight_dict[k]
116 | else:
117 | # remove this loss if not specified in `weight_dict`
118 | losses.pop(k)
119 |
120 | return losses
121 |
122 | class FocalDiceLoss_bce_cls_emb(nn.Module):
123 | def __init__(self, cfg):
124 | super(FocalDiceLoss_bce_cls_emb, self).__init__()
125 | matcher = DirectMatcher(num_points=cfg.num_points,)
126 |
127 | weight_dict = { "loss_bce": cfg.class_weight,
128 | "loss_mask": cfg.mask_weight,
129 | "loss_dice": cfg.dice_weight}
130 |
131 | if cfg.deep_supervision:
132 | aux_weight_dict = {}
133 | for i in range(cfg.dec_layers):
134 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...}
135 | weight_dict.update(aux_weight_dict)
136 |
137 | self.fd_loss = SetCriterion(
138 | cfg.num_classes,
139 | ginfo=cfg.ginfo,
140 | matcher=matcher,
141 | weight_dict=weight_dict,
142 | eos_coef=cfg.no_object_weight,
143 | losses=[
144 | "bce_labels",
145 | "masks",
146 | ],
147 | num_points=cfg.num_points,
148 | oversample_ratio=cfg.oversample_ratio,
149 | importance_sample_ratio=cfg.importance_sample_ratio,
150 | )
151 |
152 | self.cfg = cfg
153 |
154 | def forward(self, outputs, targets, **kwargs): # {"aux_outputs": xx, 'xx': xx}
155 | losses = self.fd_loss(outputs, targets)
156 |
157 | for k in list(losses.keys()):
158 | if k in self.fd_loss.weight_dict:
159 | losses[k] *= self.fd_loss.weight_dict[k]
160 | else:
161 | # remove this loss if not specified in `weight_dict`
162 | losses.pop(k)
163 |
164 | return losses
165 |
166 | class FocalDiceLoss_bce_cls_emb_sample_weight(FocalDiceLoss):
167 | def __init__(self, cfg):
168 | super(FocalDiceLoss_bce_cls_emb_sample_weight, self).__init__(cfg)
169 | matcher = DirectMatcher(num_points=cfg.num_points,)
170 |
171 | weight_dict = { "loss_bce": cfg.class_weight,
172 | "loss_mask": cfg.mask_weight,
173 | "loss_dice": cfg.dice_weight}
174 |
175 | if cfg.deep_supervision:
176 | aux_weight_dict = {}
177 | for i in range(cfg.dec_layers):
178 | aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) # {loss_ce_i : cfg.class_weight ...}
179 | weight_dict.update(aux_weight_dict)
180 |
181 | self.fd_loss = SetCriterion(
182 | cfg.num_classes,
183 | ginfo=cfg.ginfo,
184 | matcher=matcher,
185 | weight_dict=weight_dict,
186 | eos_coef=cfg.no_object_weight,
187 | losses=[
188 | "bce_labels",
189 | "masks",
190 | ],
191 | num_points=cfg.num_points,
192 | oversample_ratio=cfg.oversample_ratio,
193 | importance_sample_ratio=cfg.importance_sample_ratio,
194 | sample_weight = cfg.get('sample_weight', None)
195 | )
196 |
197 | self.cfg = cfg
198 |
--------------------------------------------------------------------------------
/core/models/decoders/losses/test_time.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from scipy.optimize import linear_sum_assignment
4 | from torch import nn
5 | import numpy as np
6 | def point_sample(input, point_coords, **kwargs):
7 | """
8 | A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors.
9 | Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside
10 | [0, 1] x [0, 1] square.
11 |
12 | Args:
13 | input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid.
14 | point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains
15 | [0, 1] x [0, 1] normalized point coordinates.
16 |
17 | Returns:
18 | output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains
19 | features for points in `point_coords`. The features are obtained via bilinear
20 | interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`.
21 | """
22 | add_dim = False
23 | if point_coords.dim() == 3:
24 | add_dim = True
25 | point_coords = point_coords.unsqueeze(2)
26 | output = F.grid_sample(input, 2.0 * point_coords - 1.0, **kwargs)
27 | if add_dim:
28 | output = output.squeeze(3)
29 | return output
30 |
31 |
32 | def batch_dice_loss(inputs: torch.Tensor, targets: torch.Tensor):
33 | """
34 | Compute the DICE loss, similar to generalized IOU for masks
35 | Args:
36 | inputs: A float tensor of arbitrary shape.
37 | The predictions for each example.
38 | targets: A float tensor with the same shape as inputs. Stores the binary
39 | classification label for each element in inputs
40 | (0 for the negative class and 1 for the positive class).
41 | """
42 | inputs = inputs.sigmoid()
43 | inputs = inputs.flatten(1)
44 | numerator = 2 * torch.einsum("nc,mc->nm", inputs, targets)
45 | denominator = inputs.sum(-1)[:, None] + targets.sum(-1)[None, :]
46 | loss = 1 - (numerator + 1) / (denominator + 1)
47 | return loss
48 |
49 |
50 | batch_dice_loss_jit = torch.jit.script(
51 | batch_dice_loss
52 | ) # type: torch.jit.ScriptModule
53 |
54 |
55 | def batch_sigmoid_ce_loss(inputs: torch.Tensor, targets: torch.Tensor):
56 | """
57 | Args:
58 | inputs: A float tensor of arbitrary shape.
59 | The predictions for each example.
60 | targets: A float tensor with the same shape as inputs. Stores the binary
61 | classification label for each element in inputs
62 | (0 for the negative class and 1 for the positive class).
63 | Returns:
64 | Loss tensor
65 | """
66 | hw = inputs.shape[1]
67 |
68 | pos = F.binary_cross_entropy_with_logits(
69 | inputs, torch.ones_like(inputs), reduction="none"
70 | )
71 | neg = F.binary_cross_entropy_with_logits(
72 | inputs, torch.zeros_like(inputs), reduction="none"
73 | )
74 |
75 | loss = torch.einsum("nc,mc->nm", pos, targets) + torch.einsum(
76 | "nc,mc->nm", neg, (1 - targets)
77 | )
78 |
79 | return loss / hw
80 |
81 | num_queries = 80
82 | num_points=12304
83 | h,w=120,120
84 | redundant_queries=4
85 |
86 | tgt_ids = list(range(20))
87 |
88 | out_masks = torch.rand((num_queries,h,w)).cuda()
89 | tgt_masks = torch.rand((20,h,w)).cuda()
90 |
91 | import time
92 | s = time.time()
93 | ind = []
94 | for _ in range(10):
95 | out_prob = torch.full(
96 | (num_queries, num_queries//redundant_queries), 0, dtype=torch.float,
97 | device=out_masks.device
98 | )
99 |
100 | for i in range(num_queries // redundant_queries):
101 | out_prob[4 * i:4 * (i + 1), i] = 1
102 |
103 | cost_class = -out_prob[:, tgt_ids]
104 | out_mask = out_masks[:, None]
105 | tgt_mask = tgt_masks[:, None]
106 | point_coords = torch.rand(1, num_points, 2, device=out_mask.device)
107 | # get gt labels
108 | tgt_mask = point_sample(
109 | tgt_mask,
110 | point_coords.repeat(tgt_mask.shape[0], 1, 1),
111 | align_corners=False,
112 | ).squeeze(1) # [valid_classes, self.num_points]
113 |
114 | out_mask = point_sample(
115 | out_mask,
116 | point_coords.repeat(out_mask.shape[0], 1, 1),
117 | align_corners=False,
118 | ).squeeze(1) # [num_queries, self.num_points]
119 |
120 | out_mask = out_mask.float()
121 | tgt_mask = tgt_mask.float()
122 | # Compute the focal loss between masks
123 | # cost_mask = batch_sigmoid_ce_loss_jit(out_mask, tgt_mask)
124 | cost_mask = batch_sigmoid_ce_loss(out_mask, tgt_mask) # [num_queries, valid_classes]
125 |
126 | # Compute the dice loss betwen masks
127 | # cost_dice = batch_dice_loss_jit(out_mask, tgt_mask)
128 | cost_dice = batch_dice_loss(out_mask, tgt_mask)
129 |
130 | c = torch.full(cost_dice.shape, float("inf"),)
131 |
132 | for i in tgt_ids:
133 | c[i*redundant_queries:(i+1)*redundant_queries, i]=0
134 |
135 | c += cost_dice.cpu()+cost_dice.cpu()
136 |
137 | ind.append(linear_sum_assignment(c))
138 | print([i-4*j for (i, j) in ind])
139 | time_cost = time.time() - s
140 | print(time_cost)
141 | print('----')
142 |
143 | tgt_ids = list(range(20))
144 |
145 | out_masks = torch.rand((num_queries,h,w)).cuda()
146 | tgt_masks = torch.rand((20,h,w)).cuda()
147 | s = time.time()
148 | for _ in range(10):
149 | for idx, label in enumerate(tgt_ids):
150 | # import pdb;pdb.set_trace()
151 | out_mask = out_masks[idx*redundant_queries:(idx+1)*redundant_queries]
152 | tgt_mask = tgt_masks[idx]
153 | # out_mask = out_mask[None,:]
154 | tgt_mask = tgt_mask[None,:]
155 |
156 | out_mask = out_mask[:, None]
157 | tgt_mask = tgt_mask[:, None]
158 |
159 | point_coords = torch.rand(1, num_points, 2, device=out_mask.device)
160 | # get gt labels
161 | tgt_mask = point_sample(
162 | tgt_mask,
163 | point_coords.repeat(tgt_mask.shape[0], 1, 1),
164 | align_corners=False,
165 | ).squeeze(1) # [valid_classes, self.num_points]
166 |
167 | out_mask = point_sample(
168 | out_mask,
169 | point_coords.repeat(out_mask.shape[0], 1, 1),
170 | align_corners=False,
171 | ).squeeze(1) # [num_queries, self.num_points]
172 |
173 | out_mask = out_mask.float()
174 | tgt_mask = tgt_mask.float()
175 | # Compute the focal loss between masks
176 | # cost_mask = batch_sigmoid_ce_loss_jit(out_mask, tgt_mask)
177 | cost_mask = batch_sigmoid_ce_loss(out_mask, tgt_mask) # [num_queries, valid_classes]
178 |
179 | # Compute the dice loss betwen masks
180 | # cost_dice = batch_dice_loss_jit(out_mask, tgt_mask)
181 | cost_dice = batch_dice_loss(out_mask, tgt_mask)
182 |
183 | indices = np.argmax(cost_dice.cpu()+cost_dice.cpu())
184 |
185 | t = time.time() - s
186 | print(t)
187 |
188 |
189 |
190 |
--------------------------------------------------------------------------------
/core/models/decoders/network/__init__.py:
--------------------------------------------------------------------------------
1 | from .meta_arch.aio_head import AIOHead
2 |
--------------------------------------------------------------------------------
/core/models/decoders/network/meta_arch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/decoders/network/meta_arch/__init__.py
--------------------------------------------------------------------------------
/core/models/decoders/network/transformer_decoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer_decoder import TransformerDecoder
2 |
--------------------------------------------------------------------------------
/core/models/decoders/network/transformer_decoder/position_encoding.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # # Modified by Bowen Cheng from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
3 | """
4 | Various positional encodings for the transformer.
5 | """
6 | import math
7 |
8 | import torch
9 | from torch import nn
10 |
11 |
12 | class PositionEmbeddingSine(nn.Module):
13 | """
14 | This is a more standard version of the position embedding, very similar to the one
15 | used by the Attention is all you need paper, generalized to work on images.
16 | """
17 |
18 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
19 | super().__init__()
20 | self.num_pos_feats = num_pos_feats
21 | self.temperature = temperature
22 | self.normalize = normalize
23 | if scale is not None and normalize is False:
24 | raise ValueError("normalize should be True if scale is passed")
25 | if scale is None:
26 | scale = 2 * math.pi
27 | self.scale = scale
28 |
29 | def forward(self, x, mask=None):
30 | if mask is None:
31 | mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
32 | not_mask = ~mask
33 | y_embed = not_mask.cumsum(1, dtype=torch.float32)
34 | x_embed = not_mask.cumsum(2, dtype=torch.float32)
35 | if self.normalize:
36 | eps = 1e-6
37 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
38 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
39 |
40 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
41 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
42 |
43 | pos_x = x_embed[:, :, :, None] / dim_t
44 | pos_y = y_embed[:, :, :, None] / dim_t
45 | pos_x = torch.stack(
46 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
47 | ).flatten(3)
48 | pos_y = torch.stack(
49 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
50 | ).flatten(3)
51 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
52 | return pos
53 |
54 | def __repr__(self, _repr_indent=4):
55 | head = "Positional encoding " + self.__class__.__name__
56 | body = [
57 | "num_pos_feats: {}".format(self.num_pos_feats),
58 | "temperature: {}".format(self.temperature),
59 | "normalize: {}".format(self.normalize),
60 | "scale: {}".format(self.scale),
61 | ]
62 | # _repr_indent = 4
63 | lines = [head] + [" " * _repr_indent + line for line in body]
64 | return "\n".join(lines)
65 |
--------------------------------------------------------------------------------
/core/models/model_entry.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from core.utils import (add_task_specific, add_neck_specific, add_decoder_specific, add_backbone_specific,
7 | add_aio_decoder_specific, add_aio_backbone_specific, add_aio_neck_specific)
8 |
9 | class model_entry(nn.Module):
10 | def __init__(self, backbone_module, neck_module, decoder_module):
11 | super(model_entry, self).__init__()
12 | self.backbone_module = backbone_module
13 | self.neck_module = neck_module
14 | self.decoder_module = decoder_module
15 | add_task_specific(self, False)
16 | add_backbone_specific(self.backbone_module, True)
17 | add_neck_specific(self.neck_module, True)
18 | add_decoder_specific(self.decoder_module, True)
19 | if hasattr(self.decoder_module, 'loss'):
20 | if hasattr(self.decoder_module.loss, 'classifier'):
21 | add_task_specific(self.decoder_module.loss, True)
22 |
23 | def forward(self, input_var, current_step):
24 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx}
25 | x = self.neck_module(x)
26 | decoder_feature = self.decoder_module(x)
27 | return decoder_feature
28 |
29 |
30 | class aio_entry(nn.Module):
31 | def __init__(self, backbone_module, neck_module, decoder_module):
32 | super(aio_entry, self).__init__()
33 | self.backbone_module = backbone_module
34 | self.neck_module = neck_module
35 | self.decoder_module = decoder_module
36 | add_task_specific(self, False)
37 | add_aio_backbone_specific(self.backbone_module, True, self.backbone_module.task_sp_list)
38 | add_aio_neck_specific(self.neck_module, True, self.neck_module.task_sp_list)
39 | add_aio_decoder_specific(self.decoder_module, True, self.decoder_module.task_sp_list,
40 | self.decoder_module.neck_sp_list)
41 |
42 | def forward(self, input_var, current_step):
43 | if current_step < self.backbone_module.freeze_iters:
44 | with torch.no_grad():
45 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx}
46 | else:
47 | x = self.backbone_module(input_var) # {'image': img_mask, 'label': target_mask, 'filename': img_name, 'backbone_output':xxx}
48 | x = self.neck_module(x)
49 | decoder_feature = self.decoder_module(x)
50 | return decoder_feature
51 |
52 |
53 | class aio_entry_v2(aio_entry):
54 | def __init__(self, backbone_module, neck_module, decoder_module):
55 | super(aio_entry, self).__init__()
56 | self.backbone_module = backbone_module
57 | self.neck_module = neck_module
58 | self.decoder_module = decoder_module
59 | add_task_specific(self, False)
60 | add_aio_backbone_specific(self.backbone_module, True, self.backbone_module.task_sp_list,
61 | self.backbone_module.neck_sp_list)
62 | add_aio_backbone_specific(self.neck_module, True, self.neck_module.task_sp_list)
63 | add_aio_decoder_specific(self.decoder_module, True, self.decoder_module.task_sp_list,
64 | self.decoder_module.neck_sp_list)
65 |
66 |
67 |
--------------------------------------------------------------------------------
/core/models/necks/DoNothing.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 | __all__ = ['DoNothing']
8 |
9 | class DoNothing(nn.Module):
10 | def __init__(self, **kwargs):
11 | super(DoNothing, self).__init__()
12 |
13 | def forward(self, x):
14 | x.update({'neck_output':x['backbone_output']})
15 | return x
16 |
--------------------------------------------------------------------------------
/core/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .DoNothing import *
2 | from .simple_neck import SimpleNeck
3 |
4 | def neck_entry(config):
5 | return globals()[config['type']](**config['kwargs'])
6 |
--------------------------------------------------------------------------------
/core/models/necks/simple_neck.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class Norm2d(nn.Module):
7 | """
8 | A LayerNorm variant, popularized by Transformers, that performs point-wise mean and
9 | variance normalization over the channel dimension for inputs that have shape
10 | (batch_size, channels, height, width).
11 | https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950
12 | """
13 |
14 | def __init__(self, embed_dim, eps=1e-6):
15 | super().__init__()
16 | self.weight = nn.Parameter(torch.ones(embed_dim))
17 | self.bias = nn.Parameter(torch.zeros(embed_dim))
18 | self.eps = eps
19 | self.normalized_shape = (embed_dim,)
20 |
21 | # >>> workaround for compatability
22 | self.ln = nn.LayerNorm(embed_dim, eps=1e-6)
23 | self.ln.weight = self.weight
24 | self.ln.bias = self.bias
25 |
26 | def forward(self, x):
27 | u = x.mean(1, keepdim=True)
28 | s = (x - u).pow(2).mean(1, keepdim=True)
29 | x = (x - u) / torch.sqrt(s + self.eps)
30 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
31 | return x
32 |
33 |
34 | class Conv2d(torch.nn.Conv2d):
35 | """
36 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
37 | """
38 |
39 | def __init__(self, *args, **kwargs):
40 | """
41 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
42 |
43 | Args:
44 | norm (nn.Module, optional): a normalization layer
45 | activation (callable(Tensor) -> Tensor): a callable activation function
46 |
47 | It assumes that norm layer is used before activation.
48 | """
49 | norm = kwargs.pop("norm", None)
50 | activation = kwargs.pop("activation", None)
51 | super().__init__(*args, **kwargs)
52 |
53 | self.norm = norm
54 | self.activation = activation
55 |
56 | def forward(self, x):
57 | x = F.conv2d(
58 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
59 | )
60 | if self.norm is not None:
61 | x = self.norm(x)
62 | if self.activation is not None:
63 | x = self.activation(x)
64 | return x
65 |
66 |
67 | def _get_activation(activation):
68 | """Return an activation function given a string"""
69 | if activation == "relu":
70 | return nn.ReLU()
71 | elif activation == "gelu":
72 | return nn.GELU()
73 | else:
74 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
75 |
76 |
77 | class SimpleNeck(nn.Module):
78 | def __init__(self,
79 | mask_dim,
80 | backbone, # placeholder
81 | bn_group,
82 | activation='gelu',
83 | task_sp_list=(),
84 | mask_forward=True
85 | ):
86 | super(SimpleNeck, self).__init__()
87 | self.task_sp_list = task_sp_list
88 |
89 | self.vis_token_dim = self.embed_dim = backbone.embed_dim
90 | self.mask_dim = mask_dim
91 |
92 | self.mask_map = nn.Sequential(
93 | nn.ConvTranspose2d(self.embed_dim, self.embed_dim, kernel_size=2, stride=2),
94 | Norm2d(self.embed_dim),
95 | _get_activation(activation),
96 | nn.ConvTranspose2d(self.embed_dim, self.mask_dim, kernel_size=2, stride=2),
97 | ) if mask_dim else False
98 |
99 | self.maskformer_num_feature_levels = 1 # always use 3 scales
100 |
101 | self.mask_forward = mask_forward
102 |
103 | def forward(self, features):
104 | if self.mask_map and self.mask_forward:
105 | features.update({'neck_output': {'mask_features': self.mask_map(features['backbone_output']),
106 | 'multi_scale_features': [features['backbone_output']]}})
107 | else:
108 | features.update({'neck_output': {'mask_features': None,
109 | 'multi_scale_features': [features['backbone_output']]}})
110 | return features
111 |
112 |
113 |
--------------------------------------------------------------------------------
/core/models/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/models/ops/__init__.py
--------------------------------------------------------------------------------
/core/models/ops/box_ops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | """
3 | Utilities for bounding box manipulation and GIoU.
4 | """
5 | import torch
6 | from torchvision.ops.boxes import box_area
7 | import pdb
8 |
9 | def box_cxcywh_to_xyxy(x):
10 | x_c, y_c, w, h = x.unbind(-1)
11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
12 | (x_c + 0.5 * w), (y_c + 0.5 * h)]
13 | return torch.stack(b, dim=-1)
14 |
15 |
16 | def box_xyxy_to_cxcywh(x):
17 | x0, y0, x1, y1 = x.unbind(-1)
18 | b = [(x0 + x1) / 2, (y0 + y1) / 2,
19 | (x1 - x0), (y1 - y0)]
20 | return torch.stack(b, dim=-1)
21 |
22 |
23 | def box_ioa(boxes1, boxes2):
24 | area1 = box_area(boxes1)
25 |
26 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
27 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
28 |
29 | wh = (rb - lt).clamp(min=0) # [N,M,2]
30 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
31 |
32 | return inter / area1[:, None]
33 |
34 |
35 | # modified from torchvision to also return the union
36 | def box_iou(boxes1, boxes2):
37 | area1 = box_area(boxes1)
38 | area2 = box_area(boxes2)
39 |
40 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
41 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
42 |
43 | wh = (rb - lt).clamp(min=0) # [N,M,2]
44 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
45 |
46 | union = area1[:, None] + area2 - inter
47 |
48 | iou = inter / union
49 | return iou, union
50 |
51 |
52 | def generalized_box_iou(boxes1, boxes2):
53 | """
54 | Generalized IoU from https://giou.stanford.edu/
55 |
56 | The boxes should be in [x0, y0, x1, y1] format
57 |
58 | Returns a [N, M] pairwise matrix, where N = len(boxes1)
59 | and M = len(boxes2)
60 | """
61 | # degenerate boxes gives inf / nan results
62 | # so do an early check
63 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
64 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
65 | iou, union = box_iou(boxes1, boxes2)
66 |
67 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
68 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
69 |
70 | wh = (rb - lt).clamp(min=0) # [N,M,2]
71 | area = wh[:, :, 0] * wh[:, :, 1]
72 |
73 | return iou - (area - union) / area
74 |
75 |
76 | def giou_iou(boxes1, boxes2):
77 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all(), f"we have boxes1[:, 2:]: {boxes1[:, 2:]}, " \
78 | f"boxes1[:, :2]: {boxes1[:, :2]}"
79 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all(), f"we have boxes2[:, 2:]: {boxes2[:, 2:]}, " \
80 | f"boxes2[:, :2]: {boxes2[:, :2]}"
81 | iou, union = box_iou(boxes1, boxes2)
82 |
83 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
84 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
85 |
86 | wh = (rb - lt).clamp(min=0) # [N,M,2]
87 | area = wh[:, :, 0] * wh[:, :, 1]
88 |
89 | return iou - (area - union) / area, iou
90 |
91 |
92 | def masks_to_boxes(masks):
93 | """Compute the bounding boxes around the provided masks
94 |
95 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
96 |
97 | Returns a [N, 4] tensors, with the boxes in xyxy format
98 | """
99 | if masks.numel() == 0:
100 | return torch.zeros((0, 4), device=masks.device)
101 |
102 | h, w = masks.shape[-2:]
103 |
104 | y = torch.arange(0, h, dtype=torch.float)
105 | x = torch.arange(0, w, dtype=torch.float)
106 | y, x = torch.meshgrid(y, x)
107 |
108 | x_mask = (masks * x.unsqueeze(0))
109 | x_max = x_mask.flatten(1).max(-1)[0]
110 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
111 |
112 | y_mask = (masks * y.unsqueeze(0))
113 | y_max = y_mask.flatten(1).max(-1)[0]
114 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
115 |
116 | return torch.stack([x_min, y_min, x_max, y_max], 1)
117 |
--------------------------------------------------------------------------------
/core/models/ops/utils.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | import torch
3 | import torch.nn as nn
4 | from torch.nn import functional as F
5 | from collections import namedtuple
6 |
7 | class Conv2d(torch.nn.Conv2d):
8 | """
9 | A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
10 | """
11 |
12 | def __init__(self, *args, **kwargs):
13 | """
14 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
15 |
16 | Args:
17 | norm (nn.Module, optional): a normalization layer
18 | activation (callable(Tensor) -> Tensor): a callable activation function
19 |
20 | It assumes that norm layer is used before activation.
21 | """
22 | norm = kwargs.pop("norm", None)
23 | activation = kwargs.pop("activation", None)
24 | super().__init__(*args, **kwargs)
25 |
26 | self.norm = norm
27 | self.activation = activation
28 |
29 | def forward(self, x):
30 | # torchscript does not support SyncBatchNorm yet
31 | # https://github.com/pytorch/pytorch/issues/40507
32 | # and we skip these codes in torchscript since:
33 | # 1. currently we only support torchscript in evaluation mode
34 | # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or
35 | # later version, `Conv2d` in these PyTorch versions has already supported empty inputs.
36 | if not torch.jit.is_scripting():
37 | if x.numel() == 0 and self.training:
38 | # https://github.com/pytorch/pytorch/issues/12013
39 | assert not isinstance(
40 | self.norm, torch.nn.SyncBatchNorm
41 | ), "SyncBatchNorm does not support empty inputs!"
42 |
43 | x = F.conv2d(
44 | x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups
45 | )
46 | if self.norm is not None:
47 | x = self.norm(x)
48 | if self.activation is not None:
49 | x = self.activation(x)
50 | return x
51 |
52 |
53 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
54 | """
55 | A simple structure that contains basic shape specification about a tensor.
56 | It is often used as the auxiliary inputs/outputs of models,
57 | to complement the lack of shape inference ability among pytorch modules.
58 |
59 | Attributes:
60 | channels:
61 | height:
62 | width:
63 | stride:
64 | """
65 |
66 | def __new__(cls, channels=None, height=None, width=None, stride=None):
67 | return super().__new__(cls, channels, height, width, stride)
68 |
69 |
70 | def get_norm(norm, out_channels): # todo: replace with syncbn
71 | """
72 | Args:
73 | norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
74 | or a callable that takes a channel number and returns
75 | the normalization layer as a nn.Module.
76 |
77 | Returns:
78 | nn.Module or None: the normalization layer
79 | """
80 | if norm is None:
81 | return None
82 | if isinstance(norm, str):
83 | if len(norm) == 0:
84 | return None
85 | norm = {
86 | # "BN": BatchNorm2d,
87 | # # Fixed in https://github.com/pytorch/pytorch/pull/36382
88 | # "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
89 | # "FrozenBN": FrozenBatchNorm2d,
90 | "GN": lambda channels: nn.GroupNorm(32, channels),
91 | # for debugging:
92 | # "nnSyncBN": nn.SyncBatchNorm,
93 | # "naiveSyncBN": NaiveSyncBatchNorm,
94 | # # expose stats_mode N as an option to caller, required for zero-len inputs
95 | # "naiveSyncBN_N": lambda channels: NaiveSyncBatchNorm(channels, stats_mode="N"),
96 | }[norm]
97 | return norm(out_channels)
98 |
99 |
100 | def c2_xavier_fill(module: nn.Module) -> None:
101 | """
102 | Initialize `module.weight` using the "XavierFill" implemented in Caffe2.
103 | Also initializes `module.bias` to 0.
104 |
105 | Args:
106 | module (torch.nn.Module): module to initialize.
107 | """
108 | # Caffe2 implementation of XavierFill in fact
109 | # corresponds to kaiming_uniform_ in PyTorch
110 | nn.init.kaiming_uniform_(module.weight, a=1)
111 | if module.bias is not None:
112 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module,
113 | # torch.Tensor]`.
114 | nn.init.constant_(module.bias, 0)
115 |
116 |
117 | def c2_msra_fill(module: nn.Module) -> None:
118 | """
119 | Initialize `module.weight` using the "MSRAFill" implemented in Caffe2.
120 | Also initializes `module.bias` to 0.
121 |
122 | Args:
123 | module (torch.nn.Module): module to initialize.
124 | """
125 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
126 | if module.bias is not None:
127 | # pyre-fixme[6]: Expected `Tensor` for 1st param but got `Union[nn.Module,
128 | # torch.Tensor]`.
129 | nn.init.constant_(module.bias, 0)
130 |
131 |
132 | def cat(tensors: List[torch.Tensor], dim: int = 0):
133 | """
134 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list
135 | """
136 | assert isinstance(tensors, (list, tuple))
137 | if len(tensors) == 1:
138 | return tensors[0]
139 | return torch.cat(tensors, dim)
--------------------------------------------------------------------------------
/core/models/tta.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import copy
3 |
4 | import numpy as np
5 | import torch
6 | # from fvcore.transforms import HFlipTransform
7 | from torch import nn
8 | from torch.nn.parallel import DistributedDataParallel
9 |
10 | from core.distributed_utils import DistModule
11 | from core.data.transforms.seg_aug_dev import (
12 | RandomFlip,
13 | ResizeShortestEdge,
14 | ResizeTransform,
15 | HFlipTransform,
16 | NoOpTransform
17 | )
18 | from core.data.transforms.seg_transforms_dev import apply_augmentations
19 |
20 | __all__ = [
21 | "SemanticSegmentorWithTTA",
22 | ]
23 |
24 |
25 | class SemanticSegmentorWithTTA(nn.Module):
26 | """
27 | A SemanticSegmentor with test-time augmentation enabled.
28 | Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`.
29 |
30 | combined with customized augmentation for original image
31 | """
32 |
33 | def __init__(self, cfg, model, batch_size=1):
34 | """
35 | Args:
36 | cfg (CfgNode):
37 | model (SemanticSegmentor): a SemanticSegmentor to apply TTA on.
38 | tta_mapper (callable): takes a dataset dict and returns a list of
39 | augmented versions of the dataset dict. Defaults to
40 | `DatasetMapperTTA(cfg)`.
41 | batch_size (int): batch the augmented images into this batch size for inference.
42 | """
43 | super().__init__()
44 | if isinstance(model, DistributedDataParallel) or isinstance(model, DistModule):
45 | model = model.module
46 | self.cfg = cfg
47 |
48 | self.min_sizes = cfg.min_sizes
49 | self.max_size = cfg.max_size
50 | self.flip = cfg.flip
51 |
52 | self.model = model
53 |
54 | # if tta_mapper is None:
55 | # tta_mapper = DatasetMapperTTA(cfg)
56 | # self.tta_mapper = tta_mapper
57 | assert batch_size == 1
58 | self.batch_size = batch_size
59 |
60 | def tta_mapper(self, dataset_dict):
61 | """
62 | Args:
63 | dict: a dict in standard model input format. See tutorials for details.
64 |
65 | Returns:
66 | list[dict]:
67 | a list of dicts, which contain augmented version of the input image.
68 | The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
69 | Each dict has field "transforms" which is a TransformList,
70 | containing the transforms that are used to generate this image.
71 | """
72 | assert len(dataset_dict["image"].shape) == 4
73 | numpy_image = dataset_dict["image"].squeeze().permute(1, 2, 0).cpu().numpy()
74 | shape = numpy_image.shape
75 | orig_shape = (dataset_dict["height"], dataset_dict["width"])
76 | if shape[:2] != orig_shape:
77 | # It transforms the "original" image in the dataset to the input image
78 | pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1])
79 | else:
80 | pre_tfm = NoOpTransform()
81 |
82 | # Create all combinations of augmentations to use
83 | aug_candidates = [] # each element is a list[Augmentation]
84 | for min_size in self.min_sizes:
85 | resize = ResizeShortestEdge(min_size, self.max_size)
86 | aug_candidates.append([resize]) # resize only
87 | if self.flip:
88 | flip = RandomFlip(prob=1.0)
89 | aug_candidates.append([resize, flip]) # resize + flip
90 |
91 | # Apply all the augmentations
92 | ret = []
93 | for aug in aug_candidates:
94 | new_image, tfms = apply_augmentations(aug, np.copy(numpy_image))
95 | torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1)))
96 |
97 | torch_image = torch_image.unsqueeze(0)
98 |
99 | dic = copy.deepcopy(dataset_dict)
100 | dic["transforms"] = pre_tfm + tfms
101 | dic["image"] = torch_image.cuda()
102 | ret.append(dic)
103 | return ret
104 |
105 | def __call__(self, batched_inputs, current_step):
106 | """
107 | Same input/output format as :meth:`SemanticSegmentor.forward`
108 | """
109 | self.current_step = current_step # redundant param for api compliance
110 | def _maybe_read_image(dataset_dict):
111 | ret = copy.copy(dataset_dict)
112 | if "image" not in ret:
113 | raise
114 | if "height" not in ret and "width" not in ret: # TODO: BUG HERE
115 | raise
116 | # ret["height"] = ret["ori_image"].shape[1]#ret["image"].shape[1]
117 | # ret["width"] = ret["ori_image"].shape[2]
118 | return ret
119 |
120 | processed_results = []
121 | # for x in batched_inputs:
122 | result = self._inference_one_image(_maybe_read_image(batched_inputs))
123 | processed_results.append(result)
124 | return processed_results
125 |
126 | def _inference_one_image(self, input):
127 | """
128 | Args:
129 | input (dict): one dataset dict with "image" field being a CHW tensor
130 | Returns:
131 | dict: one output dict
132 | """
133 | augmented_inputs, tfms = self._get_augmented_inputs(input)
134 |
135 | final_predictions = None
136 | count_predictions = 0
137 | for input, tfm in zip(augmented_inputs, tfms):
138 | count_predictions += 1
139 | with torch.no_grad():
140 | if final_predictions is None:
141 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
142 | final_predictions = self.model(input, self.current_step)[0].pop("sem_seg").flip(dims=[2]) # should be [input] originally
143 | else:
144 | final_predictions = self.model(input, self.current_step)[0].pop("sem_seg")
145 | else:
146 | if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
147 | final_predictions += self.model(input, self.current_step)[0].pop("sem_seg").flip(dims=[2])
148 | else:
149 | final_predictions += self.model(input, self.current_step)[0].pop("sem_seg")
150 |
151 | final_predictions = final_predictions / count_predictions
152 | return {"sem_seg": final_predictions}
153 |
154 | def _get_augmented_inputs(self, input):
155 | augmented_inputs = self.tta_mapper(input)
156 | tfms = [x.pop("transforms") for x in augmented_inputs]
157 | return augmented_inputs, tfms
158 |
--------------------------------------------------------------------------------
/core/msg_server.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import socket
3 | import itchat
4 | import errno
5 | import time
6 | import threading
7 |
8 | import os
9 | import subprocess
10 |
11 | parser = argparse.ArgumentParser(description="simple server!")
12 | parser.add_argument('--mode', type=str)
13 | parser.add_argument('--ip', type=str)
14 | parser.add_argument('--port', type=int)
15 | parser.add_argument('--timeout', type=int)
16 |
17 | class MsgServer(object):
18 | def __init__(self, server_ip, server_port):
19 | self.init_chat()
20 | self.send('server chat logged in!')
21 | self.start_server(server_ip, server_port)
22 |
23 | def init_chat(self):
24 | itchat.auto_login(enableCmdQR=2)
25 |
26 | def send(self, msg, echo=True):
27 | if echo:
28 | print(msg)
29 | itchat.send(msg, toUserName='filehelper')
30 |
31 | def worker_thread(self, conn, addr):
32 | conn.settimeout(args.timeout)
33 | if conn is not None:
34 | self.send('job connected! [{}]'.format(addr))
35 | else:
36 | self.send('none connection!')
37 | return -1
38 |
39 | while(True):
40 | try:
41 | recv_data = conn.recv(1024)
42 | except socket.timeout as e:
43 | print('no msg...')
44 | else:
45 | msg_len = len(recv_data)
46 | if msg_len == 0:
47 | self.send('connection break, waiting for other connections..')
48 | break
49 | self.send(str(recv_data, encoding = 'utf-8'))
50 | conn.close()
51 |
52 | def start_server(self, server_ip, server_port):
53 | ip_port = (server_ip, server_port)
54 | s = socket.socket()
55 | s.bind(ip_port)
56 |
57 | # dump ip/port info to file
58 | with open('server.txt', 'w') as f:
59 | f.write('{} {}\n'.format(server_ip, server_port))
60 |
61 | s.listen()
62 | self.send('server listening on {}, waiting for job connection...'.format(server_ip))
63 | while(True):
64 | conn, addr = s.accept()
65 | threading.Thread(target=self.worker_thread, args=(conn, addr)).start()
66 |
67 | class MsgClient(object):
68 | def __init__(self, server_ip, server_port):
69 | self._init_client(server_ip, server_port)
70 | self.send('I\'m client!\n')
71 |
72 | def send(self, msg, echo=True):
73 | self.s.send(bytes(msg, encoding = 'utf-8'))
74 | if echo:
75 | print(msg)
76 |
77 | def _init_client(self, server_ip, server_port):
78 | ip_port = (server_ip, server_port)
79 | self.s = socket.socket()
80 | self.s.connect(ip_port)
81 |
82 | def close(self):
83 | self.s.close()
84 |
85 | def itchat_manager():
86 | def run_and_get(cmd, screen=False):
87 | process = subprocess.Popen(
88 | cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
89 | output = ''
90 |
91 | for line in process.stdout:
92 | line = line.decode('utf-8')
93 | if screen:
94 | print(line, end='', flush=True)
95 | output += line.strip(' ')
96 |
97 | return output
98 |
99 | @itchat.msg_register(itchat.content.TEXT)
100 | def text_reply(msg):
101 | res_txt = None
102 | cmd_dict = {
103 | 'sq' : 'squeue -p VI_Face_V100',
104 | 'sq1': 'squeue -p VI_Face_1080TI'
105 | }
106 | if msg.text in cmd_dict:
107 | cmd = cmd_dict[msg.text]
108 | res_txt = run_and_get(cmd)
109 | elif msg.text.startswith('exec:'):
110 | cmd = msg.text.replace('exec:', '')
111 | if os.system(cmd) == 0:
112 | res_txt = 'exec successed!'
113 | else:
114 | res_txt = 'exec failed!'
115 | elif msg.text.startswith('getinfo:'):
116 | cmd = msg.text.replace('getinfo:', '')
117 | res_txt = run_and_get(cmd)
118 |
119 | if res_txt is not None:
120 | itchat.send(res_txt, toUserName='filehelper')
121 |
122 | itchat.auto_login(enableCmdQR=2, hotReload=True)
123 | itchat.run(True)
124 |
125 | if __name__ == '__main__':
126 | args = parser.parse_args()
127 | if args.mode == 'server':
128 | if args.ip is None or args.port is None:
129 | with open('server.txt', 'r') as f:
130 | line = f.read().strip().split()
131 | args.ip = line[0]
132 | args.port = int(line[1])
133 | print('reading ip & port from server.txt, {}:{}'.format(args.ip, args.port))
134 | s = MsgServer(args.ip, args.port)
135 | elif args.mode == 'manager':
136 | itchat_manager()
137 | else:
138 | s = MsgClient(args.ip, args.port)
139 | time.sleep(5)
140 | s.close()
141 |
--------------------------------------------------------------------------------
/core/optim.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.optim.optimizer import Optimizer, required
3 | import time
4 |
5 |
6 | class SGD(Optimizer):
7 | r"""Implements stochastic gradient descent (optionally with momentum).
8 |
9 | Nesterov momentum is based on the formula from
10 | `On the importance of initialization and momentum in deep learning`__.
11 |
12 | Args:
13 | params (iterable): iterable of parameters to optimize or dicts defining
14 | parameter groups
15 | lr (float): learning rate
16 | momentum (float, optional): momentum factor (default: 0)
17 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
18 | dampening (float, optional): dampening for momentum (default: 0)
19 | nesterov (bool, optional): enables Nesterov momentum (default: False)
20 |
21 | Example:
22 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
23 | >>> optimizer.zero_grad()
24 | >>> loss_fn(model(input), target).backward()
25 | >>> optimizer.step()
26 |
27 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
28 |
29 | .. note::
30 | The implementation of SGD with Momentum/Nesterov subtly differs from
31 | Sutskever et. al. and implementations in some other frameworks.
32 |
33 | Considering the specific case of Momentum, the update can be written as
34 |
35 | .. math::
36 | v = \rho * v + g \\
37 | p = p - lr * v
38 |
39 | where p, g, v and :math:`\rho` denote the parameters, gradient,
40 | velocity, and momentum respectively.
41 |
42 | This is in contrast to Sutskever et. al. and
43 | other frameworks which employ an update of the form
44 |
45 | .. math::
46 | v = \rho * v + lr * g \\
47 | p = p - v
48 |
49 | The Nesterov version is analogously modified.
50 | """
51 |
52 | def __init__(self, params, lr=required, momentum=0, dampening=0,
53 | weight_decay=0, nesterov=False):
54 | if lr is not required and lr < 0.0:
55 | raise ValueError("Invalid learning rate: {}".format(lr))
56 | if momentum < 0.0:
57 | raise ValueError("Invalid momentum value: {}".format(momentum))
58 | if weight_decay < 0.0:
59 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
60 |
61 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
62 | weight_decay=weight_decay, nesterov=nesterov)
63 | if nesterov and (momentum <= 0 or dampening != 0):
64 | raise ValueError("Nesterov momentum requires a momentum and zero dampening")
65 | super(SGD, self).__init__(params, defaults)
66 |
67 | def __setstate__(self, state):
68 | super(SGD, self).__setstate__(state)
69 | for group in self.param_groups:
70 | group.setdefault('nesterov', False)
71 |
72 | def step(self, closure=None):
73 | """Performs a single optimization step.
74 |
75 | Arguments:
76 | closure (callable, optional): A closure that reevaluates the model
77 | and returns the loss.
78 | """
79 | loss = None
80 | if closure is not None:
81 | loss = closure()
82 |
83 | for group in self.param_groups:
84 | weight_decay = group['weight_decay']
85 | momentum = group['momentum']
86 | dampening = group['dampening']
87 | nesterov = group['nesterov']
88 |
89 | for p in group['params']:
90 | if p.grad is None:
91 | continue
92 | d_p = p.grad.data
93 | if weight_decay != 0:
94 | d_p.add_(weight_decay, p.data)
95 | if momentum != 0:
96 | param_state = self.state[p]
97 | if 'momentum_buffer' not in param_state:
98 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
99 | buf.mul_(momentum).add_(d_p)
100 | else:
101 | buf = param_state['momentum_buffer']
102 | buf.mul_(momentum).add_(1 - dampening, d_p)
103 | if nesterov:
104 | d_p = d_p.add(momentum, buf)
105 | else:
106 | d_p = buf
107 |
108 | p.data.add_(-group['lr'], d_p)
109 |
110 | return loss
111 |
112 |
--------------------------------------------------------------------------------
/core/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from torch.optim import SGD, RMSprop, Adadelta, Adagrad, Adam, AdamW # noqa F401
2 | from .lars import LARS # noqa F401
3 | from .adam_clip import AdamWithClip, AdamWWithClip, AdamWWithClipDev # noqa F401
4 | from .adafactor import Adafactor_dev
5 |
6 |
7 | def optim_entry(config):
8 | return globals()[config['type']](**config['kwargs'])
9 |
--------------------------------------------------------------------------------
/core/optimizers/adafactor.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | from torch.optim.optimizer import Optimizer
4 |
5 | from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union
6 | from torch import Tensor
7 |
8 | Params = Union[Iterable[Tensor], Iterable[Dict[str, Any]]]
9 | LossClosure = Callable[[], float]
10 | OptLossClosure = Optional[LossClosure]
11 | Betas2 = Tuple[float, float]
12 | State = Dict[str, Any]
13 | OptFloat = Optional[float]
14 | Nus2 = Tuple[float, float]
15 | Eps2 = Tuple[float, float]
16 | ParamGroup = Dict[str, Any]
17 |
18 |
19 | class Adafactor_dev(Optimizer):
20 | """Implements Adafactor algorithm.
21 |
22 | It has been proposed in: `Adafactor: Adaptive Learning Rates with
23 | Sublinear Memory Cost`__.
24 |
25 | Arguments:
26 | params: iterable of parameters to optimize or dicts defining
27 | parameter groups
28 | lr: external learning rate (default: None)
29 | eps2: regularization constans for square gradient
30 | and parameter scale respectively (default: (1e-30, 1e-3))
31 | clip_threshold: threshold of root mean square of
32 | final gradient update (default: 1.0)
33 | decay_rate: coefficient used to compute running averages of square
34 | gradient (default: -0.8)
35 | beta1: coefficient used for computing running averages of gradient
36 | (default: None)
37 | weight_decay: weight decay (L2 penalty) (default: 0)
38 | scale_parameter: if true, learning rate is scaled by root mean square
39 | of parameter (default: True)
40 | relative_step: if true, time-dependent learning rate is computed
41 | instead of external learning rate (default: True)
42 | warmup_init: time-dependent learning rate computation depends on
43 | whether warm-up initialization is being used (default: False)
44 |
45 | Example:
46 | >>> import torch_optimizer as optim
47 | >>> optimizer = optim.Adafactor(model.parameters())
48 | >>> optimizer.zero_grad()
49 | >>> loss_fn(model(input), target).backward()
50 | >>> optimizer.step()
51 |
52 | __ https://arxiv.org/abs/1804.04235
53 |
54 | Note:
55 | Reference code: https://github.com/pytorch/fairseq/blob/master/fairseq/optim/adafactor.py # noqa
56 | """
57 |
58 | def __init__(
59 | self,
60 | params: Params,
61 | lr: OptFloat = None,
62 | eps2: Eps2 = (1e-30, 1e-3),
63 | clip_threshold: float = 1.0,
64 | decay_rate: float = -0.8,
65 | beta1: OptFloat = None,
66 | weight_decay: float = 0.0,
67 | scale_parameter: bool = True,
68 | relative_step: bool = True,
69 | warmup_init: bool = False,
70 | clip_beta2: Any = False,
71 | ):
72 | if lr is not None and lr <= 0.0:
73 | raise ValueError('Invalid learning rate: {}'.format(lr))
74 | if weight_decay < 0.0:
75 | raise ValueError(
76 | 'Invalid weight_decay value: {}'.format(weight_decay)
77 | )
78 |
79 | defaults = dict(
80 | lr=lr,
81 | eps2=eps2,
82 | clip_threshold=clip_threshold,
83 | decay_rate=decay_rate,
84 | beta1=beta1,
85 | weight_decay=weight_decay,
86 | scale_parameter=scale_parameter,
87 | relative_step=relative_step,
88 | warmup_init=warmup_init,
89 | clip_beta2=clip_beta2,
90 | )
91 | super(Adafactor_dev, self).__init__(params, defaults)
92 |
93 | def _get_lr(self, param_group: ParamGroup, param_state: State) -> float:
94 | rel_step_sz = param_group['lr']
95 | if param_group['relative_step']:
96 | min_step = (
97 | 1e-6 * param_state['step']
98 | if param_group['warmup_init']
99 | else 1e-2
100 | )
101 | rel_step_sz = min(min_step, 1.0 / math.sqrt(param_state['step']))
102 | param_scale = 1.0
103 | if param_group['scale_parameter']:
104 | param_scale = max(param_group['eps2'][1], param_state['RMS'])
105 | return param_scale * rel_step_sz
106 |
107 | def _get_options(
108 | self, param_group: ParamGroup, param_shape: Tuple[int, ...]
109 | ) -> Tuple[bool, bool]:
110 | factored = len(param_shape) >= 2
111 | use_first_moment = param_group['beta1'] is not None
112 | return factored, use_first_moment
113 |
114 | def _rms(self, tensor: torch.Tensor) -> float:
115 | return tensor.norm(2) / (tensor.numel() ** 0.5)
116 |
117 | def _approx_sq_grad(
118 | self,
119 | exp_avg_sq_row: torch.Tensor,
120 | exp_avg_sq_col: torch.Tensor,
121 | output: torch.Tensor,
122 | ) -> None:
123 | r_factor = (
124 | (exp_avg_sq_row / exp_avg_sq_row.mean(dim=-1, keepdim=True))
125 | .rsqrt_()
126 | .unsqueeze(-1)
127 | )
128 | c_factor = exp_avg_sq_col.unsqueeze(-2).rsqrt()
129 | torch.mul(r_factor, c_factor, out=output)
130 |
131 | def step(self, closure: OptLossClosure = None) -> OptFloat:
132 | r"""Performs a single optimization step.
133 |
134 | Arguments:
135 | closure: A closure that reevaluates the model and returns the loss.
136 | """
137 | loss = None
138 | if closure is not None:
139 | loss = closure()
140 |
141 | for group in self.param_groups:
142 | for p in group['params']:
143 | if p.grad is None:
144 | continue
145 | grad = p.grad.data
146 | if grad.is_sparse:
147 | raise RuntimeError(
148 | 'Adafactor does not support sparse gradients.'
149 | )
150 |
151 | state = self.state[p]
152 | grad_shape = grad.shape
153 |
154 | factored, use_first_moment = self._get_options(
155 | group, grad_shape
156 | )
157 | # State Initialization
158 | if len(state) == 0:
159 | state['step'] = 0
160 |
161 | if use_first_moment:
162 | # Exponential moving average of gradient values
163 | state['exp_avg'] = torch.zeros_like(
164 | grad, memory_format=torch.preserve_format
165 | )
166 | if factored:
167 | state['exp_avg_sq_row'] = torch.zeros(
168 | grad_shape[:-1]
169 | ).type_as(grad)
170 | state['exp_avg_sq_col'] = torch.zeros(
171 | grad_shape[:-2] + grad_shape[-1:]
172 | ).type_as(grad)
173 | else:
174 | state['exp_avg_sq'] = torch.zeros_like(
175 | grad, memory_format=torch.preserve_format
176 | )
177 |
178 | state['RMS'] = 0
179 |
180 | state['step'] += 1
181 | state['RMS'] = self._rms(p.data)
182 | lr = self._get_lr(group, state)
183 |
184 | beta2t = 1.0 - math.pow(state['step'], group['decay_rate'])
185 |
186 | if group['clip_beta2'] != False:
187 | beta2t = min(beta2t, group['clip_beta2'])
188 |
189 | update = (grad ** 2) + group['eps2'][0]
190 | if factored:
191 | exp_avg_sq_row = state['exp_avg_sq_row']
192 | exp_avg_sq_col = state['exp_avg_sq_col']
193 |
194 | exp_avg_sq_row.mul_(beta2t).add_(
195 | update.mean(dim=-1), alpha=1.0 - beta2t
196 | )
197 | exp_avg_sq_col.mul_(beta2t).add_(
198 | update.mean(dim=-2), alpha=1.0 - beta2t
199 | )
200 |
201 | # Approximation of exponential moving average of square
202 | # of gradient
203 | self._approx_sq_grad(
204 | exp_avg_sq_row, exp_avg_sq_col, update
205 | )
206 | update.mul_(grad)
207 | else:
208 | exp_avg_sq = state['exp_avg_sq']
209 |
210 | exp_avg_sq.mul_(beta2t).add_(update, alpha=1.0 - beta2t)
211 | torch.rsqrt(exp_avg_sq, out=update).mul_(grad)
212 |
213 | update.div_(
214 | max(1.0, self._rms(update) / group['clip_threshold'])
215 | )
216 | update.mul_(lr)
217 |
218 | if use_first_moment:
219 | exp_avg = state['exp_avg']
220 | exp_avg.mul_(group['beta1']).add_(
221 | update, alpha=1 - group['beta1']
222 | )
223 | update = exp_avg
224 |
225 | if group['weight_decay'] != 0:
226 | p.data.add_(p.data, alpha=-group['weight_decay'] * lr)
227 |
228 | p.data.add_(-update)
229 |
230 | return loss
231 |
--------------------------------------------------------------------------------
/core/optimizers/adam_clip.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from torch.nn.utils import clip_grad_norm_
3 | from torch.optim import Adam, AdamW
4 |
5 |
6 | class AdamWithClip(Adam):
7 |
8 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
9 | weight_decay=0, amsgrad=False, max_norm=None, norm_type=2):
10 | super(AdamWithClip, self).__init__(params, lr, betas, eps, weight_decay, amsgrad)
11 | self.max_norm = max_norm
12 | self.norm_type = norm_type
13 |
14 | def step(self, closure=None):
15 | if self.max_norm is not None:
16 | for group in self.param_groups:
17 | clip_grad_norm_(group['params'], self.max_norm, self.norm_type)
18 | super(AdamWithClip, self).step(closure)
19 |
20 |
21 | class AdamWWithClip(AdamW):
22 |
23 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
24 | weight_decay=0, amsgrad=False, max_norm=None, norm_type=2):
25 | super(AdamWWithClip, self).__init__(params, lr, betas, eps, weight_decay, amsgrad)
26 | self.max_norm = max_norm
27 | self.norm_type = norm_type
28 |
29 | def step(self, closure=None):
30 |
31 | if self.max_norm is not None:
32 | for group in self.param_groups:
33 | clip_grad_norm_(group['params'], self.max_norm, self.norm_type)
34 | super(AdamWWithClip, self).step(closure)
35 |
36 |
37 | class AdamWWithClipDev(AdamW):
38 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
39 | weight_decay=1e-2, amsgrad=False, clip_norm=None, norm_type=2):
40 | super(AdamWWithClipDev, self).__init__(params, lr, betas, eps, weight_decay, amsgrad)
41 | self.clip_norm = clip_norm
42 | self.norm_type = norm_type
43 |
44 | self._split_param_groups = None
45 | self.reset_split_param_groups()
46 |
47 | def reset_split_param_groups(self):
48 | if self.clip_norm is not None:
49 | backbone_param, neck_param, decoder_param, task_param = [], [], [], []
50 | for x in self.param_groups:
51 | if x["params"][0].backbone_specific:
52 | backbone_param.append(x["params"])
53 | elif x["params"][0].neck_specific:
54 | neck_param.append(x["params"])
55 | elif x["params"][0].decoder_specific:
56 | decoder_param.append(x["params"])
57 | elif x["params"][0].task_specific:
58 | task_param.append(x["params"])
59 | self._split_param_groups = [_g for _g in [backbone_param,
60 | neck_param,
61 | decoder_param,
62 | task_param] if len(_g) > 0]
63 | print(f">>> reset_split_param_groups, backbone_param: {len(backbone_param)}"
64 | f", neck_param: {len(neck_param)}, decoder_param: {len(decoder_param)}"
65 | f", task_param: {len(task_param)}")
66 |
67 | def step(self, closure=None):
68 | if self.clip_norm is not None:
69 | for _g in self._split_param_groups:
70 | all_params = itertools.chain(*_g)
71 | clip_grad_norm_(all_params, self.clip_norm, self.norm_type)
72 |
73 | super(AdamWWithClipDev, self).step(closure)
74 |
--------------------------------------------------------------------------------
/core/optimizers/lars.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.optim.optimizer import Optimizer, required
3 |
4 |
5 | class LARS(Optimizer):
6 | r"""Implements layer-wise adaptive rate scaling for SGD, based on
7 | `"Large Batch Training of Convolutional Networks" `_
8 |
9 | Arguments:
10 | - params (:obj:`iterable`): iterable of parameters to optimize or dicts defining parameter groups
11 | - lr (:obj:`float`): learning rate
12 | - momentum (:obj:`float`, optional): momentum factor (default: 0)
13 | - weight_decay (:obj:`float`, optional): weight decay (L2 penalty) (default: 0)
14 | - dampening (:obj:`float`, optional): dampening for momentum (default: 0)
15 | - eta(:obj:`float`): LARS coefficient (default 0.001)
16 | - nesterov (:obj:`bool`, optional): enables Nesterov momentum (default: False)
17 |
18 | Example:
19 | >>> optimizer = LARS(model.parameters(), lr=0.1, momentum=0.9, eta=1e-3)
20 | >>> optimizer.zero_grad()
21 | >>> loss_fn(model(input), target).backward()
22 | >>> optimizer.step()
23 | """
24 |
25 | def __init__(self, params, lr=required, momentum=0, dampening=0,
26 | weight_decay=0, eta=0.001, nesterov=False):
27 | if lr is not required and lr < 0.0:
28 | raise ValueError("Invalid learning rate: {}".format(lr))
29 | if momentum < 0.0:
30 | raise ValueError("Invalid momentum value: {}".format(momentum))
31 | if weight_decay < 0.0:
32 | raise ValueError(
33 | "Invalid weight_decay value: {}".format(weight_decay))
34 | if eta < 0.0:
35 | raise ValueError("Invalid LARS coefficient value: {}".format(eta))
36 |
37 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
38 | weight_decay=weight_decay, eta=eta, nesterov=nesterov)
39 | if nesterov and (momentum <= 0 or dampening != 0):
40 | raise ValueError(
41 | "Nesterov momentum requires a momentum and zero dampening")
42 | super(LARS, self).__init__(params, defaults)
43 |
44 | def __setstate__(self, state):
45 | super(LARS, self).__setstate__(state)
46 | for group in self.param_groups:
47 | group.setdefault('nesterov', False)
48 |
49 | @torch.no_grad()
50 | def step(self, closure=None):
51 | """Performs a single optimization step.
52 |
53 | Arguments:
54 | - closure (:obj:`callable`, optional): A closure that reevaluates the model and returns the loss.
55 | """
56 | loss = None
57 | if closure is not None:
58 | loss = closure()
59 |
60 | for group in self.param_groups:
61 | weight_decay = group['weight_decay']
62 | momentum = group['momentum']
63 | dampening = group['dampening']
64 | nesterov = group['nesterov']
65 | eta = group['eta']
66 |
67 | for p in group['params']:
68 | if p.grad is None:
69 | continue
70 | d_p = p.grad.data
71 |
72 | # compute local learning rate
73 | weight_norm = torch.norm(p.data)
74 | grad_norm = torch.norm(d_p)
75 |
76 | if weight_decay != 0:
77 | d_p.add_(weight_decay, p.data)
78 | grad_norm.add_(weight_decay, weight_norm)
79 | local_lr = eta * weight_norm / grad_norm
80 |
81 | if momentum != 0:
82 | param_state = self.state[p]
83 | if 'momentum_buffer' not in param_state:
84 | buf = param_state['momentum_buffer'] = torch.zeros_like(
85 | p.data)
86 | buf.mul_(momentum).add_(d_p)
87 | else:
88 | buf = param_state['momentum_buffer']
89 | buf.mul_(momentum).add_(1 - dampening, d_p)
90 | if nesterov:
91 | d_p = d_p.add(momentum, buf)
92 | else:
93 | d_p = buf
94 |
95 | p.data.add_(-group['lr']*local_lr, d_p)
96 |
97 | return loss
98 |
--------------------------------------------------------------------------------
/core/solvers/__init__.py:
--------------------------------------------------------------------------------
1 | from .solver import Solver
2 | from .solver_deter import SolverDeter
3 | from .solver_multitask_dev import SolverMultiTaskDev
4 |
5 | def solver_entry(C):
6 | return globals()[C.config['common']['solver']['type']](C)
7 |
--------------------------------------------------------------------------------
/core/solvers/solver_deter.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import os
3 | import random
4 | import time
5 |
6 | import core
7 | import core.models.decoders as decoders
8 | import core.models.backbones as backbones
9 | import core.models.necks as necks
10 | import core.data.datasets as datasets
11 | from core.models.model_entry import model_entry
12 | import numpy as np
13 | import torch
14 | import torch.backends.cudnn as cudnn
15 |
16 | from core import distributed_utils as dist
17 |
18 | from core.distributed_utils import DistributedGivenIterationSampler
19 |
20 | from torch.utils.data import DataLoader
21 |
22 | from .solver import Solver
23 |
24 | class WorkerInit(object):
25 | def __init__(self, rank, num_workers):
26 | self.rank = rank
27 | self.num_workers = num_workers
28 | def func(self, pid):
29 | print(f'[rank{self.rank}] setting worker seed {self.rank*self.num_workers+pid}', flush=True)
30 | np.random.seed(self.rank*self.num_workers+pid)
31 |
32 | class SolverDeter(Solver):
33 |
34 | def __init__(self, C):
35 | super().__init__(C)
36 |
37 | if self.config.get('deterministic', False):
38 | if self.config.get('cudnn_deterministic', True):
39 | cudnn.deterministic = True
40 | cudnn.benchmark = False
41 | else:
42 | cudnn.benchmark = True
43 | seed = self.config.get('random_seed', 0)
44 | worker_rank = self.config.get('worker_rank', False)
45 | if worker_rank:
46 | worker_init = WorkerInit(self.C.rank, self.config.workers)
47 | else:
48 | worker_init = WorkerInit(0, 0)
49 | self.worker_init_fn = worker_init.func
50 | random.seed(seed)
51 | np.random.seed(seed)
52 | torch.manual_seed(seed)
53 | torch.cuda.manual_seed(seed)
54 | dist.barrier()
55 | if self.C.rank == 0:
56 | self.logger.info(f'deterministic mode, seed: {seed}, worker_rank: {worker_rank},\
57 | cudnn_deterministic: {self.config.get("cudnn_deterministic", True)}')
58 | dist.barrier()
59 | else:
60 | self.worker_init_fn = None
61 |
62 | def create_dataloader(self):
63 | config = self.config
64 | ginfo = self.ginfo
65 |
66 | self.sampler = DistributedGivenIterationSampler(
67 | self.dataset, config.max_iter, config.sampler.batch_size,
68 | world_size=ginfo.task_size, rank=ginfo.task_rank,
69 | last_iter=self.last_iter,
70 | shuffle_strategy=config.sampler.shuffle_strategy,
71 | random_seed=ginfo.task_random_seed,
72 | ret_save_path=config.sampler.get('ret_save_path', None))
73 |
74 |
75 | self.loader = DataLoader(self.dataset, batch_size=config.sampler.batch_size,
76 | shuffle=False, num_workers=config.workers,
77 | pin_memory=False, sampler=self.sampler, worker_init_fn=self.worker_init_fn)
78 |
--------------------------------------------------------------------------------
/core/solvers/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/UniHCP/37b93cd450aa423e580043012020a9af2b842e72/core/solvers/utils/__init__.py
--------------------------------------------------------------------------------
/core/solvers/utils/attr_tester_dev.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import math
3 | import os, sys
4 | import random
5 | import datetime
6 | import time
7 | from typing import List
8 | import json
9 | import logging
10 | import numpy as np
11 | from copy import deepcopy
12 | from .seg_tester_dev import DatasetEvaluator
13 |
14 | import torch
15 | import torch.nn as nn
16 | import torch.nn.parallel
17 | from torch.optim import lr_scheduler
18 | import torch.backends.cudnn as cudnn
19 | from core import distributed_utils as dist
20 | import torch.optim
21 | import torch.multiprocessing as mp
22 | import torch.utils.data
23 |
24 | class PedAttrEvaluator(DatasetEvaluator):
25 |
26 | def __init__(
27 | self,
28 | dataset_name,
29 | config,
30 | distributed=True,
31 | output_dir=None,
32 | ):
33 |
34 | self._logger = logging.getLogger(__name__)
35 |
36 | self._dataset_name = dataset_name
37 | self._distributed = distributed
38 | self._output_dir = output_dir
39 |
40 | self._cpu_device = torch.device("cpu")
41 | self.threshold = 0.5
42 |
43 | def reset(self):
44 | self.gt_label = []
45 | self.preds_probs = []
46 |
47 | def process(self, inputs, outputs):
48 | gt_label = inputs['label']
49 | gt_label[gt_label == -1] = 0
50 | preds_probs = outputs['logit'].sigmoid()
51 | self.gt_label.append(gt_label)
52 | self.preds_probs.append(preds_probs)
53 |
54 | @staticmethod
55 | def all_gather(data, group=0):
56 | assert dist.get_world_size() == 1, f"distributed eval unsupported yet, uncertain if we can use torch.dist with link jointly"
57 | if dist.get_world_size() == 1:
58 | return [data]
59 |
60 | world_size = dist.get_world_size()
61 | tensors_gather = [torch.ones_like(data) for _ in range(world_size)]
62 | dist.allgather(tensors_gather, data, group=group)
63 | return tensors_gather
64 |
65 | def evaluate(self):
66 | gt_label = torch.cat(self.gt_label, dim=0)
67 | preds_probs = torch.cat(self.preds_probs, dim=0)
68 |
69 | if self._distributed:
70 | dist.barrier()
71 |
72 | gt_label = self.all_gather(gt_label)
73 | preds_probs = self.all_gather(preds_probs)
74 |
75 | if dist.get_rank() != 0:
76 | return
77 |
78 | gt_label = torch.cat(gt_label, dim=0)
79 | preds_probs = torch.cat(preds_probs, dim=0)
80 | preds_probs = preds_probs.cpu().numpy()
81 | gt_label = gt_label.cpu().numpy()
82 |
83 | pred_label = preds_probs > self.threshold
84 |
85 | eps = 1e-20
86 | result = {}
87 |
88 | ###############################
89 | # label metrics
90 | # TP + FN
91 | gt_pos = np.sum((gt_label == 1), axis=0).astype(float)
92 | # TN + FP
93 | gt_neg = np.sum((gt_label == 0), axis=0).astype(float)
94 | # TP
95 | true_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=0).astype(float)
96 | # TN
97 | true_neg = np.sum((gt_label == 0) * (pred_label == 0), axis=0).astype(float)
98 | # FP
99 | false_pos = np.sum(((gt_label == 0) * (pred_label == 1)), axis=0).astype(float)
100 | # FN
101 | false_neg = np.sum(((gt_label == 1) * (pred_label == 0)), axis=0).astype(float)
102 |
103 | label_pos_recall = 1.0 * true_pos / (gt_pos + eps) # true positive
104 | label_neg_recall = 1.0 * true_neg / (gt_neg + eps) # true negative
105 | # mean accuracy
106 | label_ma = (label_pos_recall + label_neg_recall) / 2
107 |
108 | result['label_pos_recall'] = label_pos_recall
109 | result['label_neg_recall'] = label_neg_recall
110 | result['label_prec'] = true_pos / (true_pos + false_pos + eps)
111 | result['label_acc'] = true_pos / (true_pos + false_pos + false_neg + eps)
112 | result['label_f1'] = 2 * result['label_prec'] * result['label_pos_recall'] / (
113 | result['label_prec'] + result['label_pos_recall'] + eps)
114 |
115 | result['label_ma'] = label_ma
116 | result['ma'] = np.mean(label_ma)
117 |
118 | ################
119 | # instance metrics
120 | gt_pos = np.sum((gt_label == 1), axis=1).astype(float)
121 | true_pos = np.sum((pred_label == 1), axis=1).astype(float)
122 | # true positive
123 | intersect_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=1).astype(float)
124 | # IOU
125 | union_pos = np.sum(((gt_label == 1) + (pred_label == 1)), axis=1).astype(float)
126 |
127 | instance_acc = intersect_pos / (union_pos + eps)
128 | instance_prec = intersect_pos / (true_pos + eps)
129 | instance_recall = intersect_pos / (gt_pos + eps)
130 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps)
131 |
132 | instance_acc = np.mean(instance_acc)
133 | instance_prec = np.mean(instance_prec)
134 | instance_recall = np.mean(instance_recall)
135 | instance_f1 = np.mean(instance_f1)
136 |
137 | result['instance_acc'] = instance_acc
138 | result['instance_prec'] = instance_prec
139 | result['instance_recall'] = instance_recall
140 | result['instance_f1'] = instance_f1
141 |
142 | result['error_num'], result['fn_num'], result['fp_num'] = false_pos + false_neg, false_neg, false_pos
143 |
144 | result['pos_recall'] = np.mean(label_pos_recall)
145 | result['neg_recall'] = np.mean(label_neg_recall)
146 | return result
147 |
--------------------------------------------------------------------------------
/core/solvers/utils/nms.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
3 | # Original licence: Copyright (c) Microsoft, under the MIT License.
4 | # ------------------------------------------------------------------------------
5 |
6 | import numpy as np
7 |
8 |
9 | def nms(dets, thr):
10 | """Greedily select boxes with high confidence and overlap <= thr.
11 |
12 | Args:
13 | dets: [[x1, y1, x2, y2, score]].
14 | thr: Retain overlap < thr.
15 |
16 | Returns:
17 | list: Indexes to keep.
18 | """
19 | if len(dets) == 0:
20 | return []
21 |
22 | x1 = dets[:, 0]
23 | y1 = dets[:, 1]
24 | x2 = dets[:, 2]
25 | y2 = dets[:, 3]
26 | scores = dets[:, 4]
27 |
28 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
29 | order = scores.argsort()[::-1]
30 |
31 | keep = []
32 | while len(order) > 0:
33 | i = order[0]
34 | keep.append(i)
35 | xx1 = np.maximum(x1[i], x1[order[1:]])
36 | yy1 = np.maximum(y1[i], y1[order[1:]])
37 | xx2 = np.minimum(x2[i], x2[order[1:]])
38 | yy2 = np.minimum(y2[i], y2[order[1:]])
39 |
40 | w = np.maximum(0.0, xx2 - xx1 + 1)
41 | h = np.maximum(0.0, yy2 - yy1 + 1)
42 | inter = w * h
43 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
44 |
45 | inds = np.where(ovr <= thr)[0]
46 | order = order[inds + 1]
47 |
48 | return keep
49 |
50 |
51 | def oks_iou(g, d, a_g, a_d, sigmas=None, vis_thr=None):
52 | """Calculate oks ious.
53 |
54 | Args:
55 | g: Ground truth keypoints.
56 | d: Detected keypoints.
57 | a_g: Area of the ground truth object.
58 | a_d: Area of the detected object.
59 | sigmas: standard deviation of keypoint labelling.
60 | vis_thr: threshold of the keypoint visibility.
61 |
62 | Returns:
63 | list: The oks ious.
64 | """
65 | if sigmas is None:
66 | sigmas = np.array([
67 | .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
68 | .87, .87, .89, .89
69 | ]) / 10.0
70 | vars = (sigmas * 2)**2
71 | xg = g[0::3]
72 | yg = g[1::3]
73 | vg = g[2::3]
74 | ious = np.zeros(len(d), dtype=np.float32)
75 | for n_d in range(0, len(d)):
76 | xd = d[n_d, 0::3]
77 | yd = d[n_d, 1::3]
78 | vd = d[n_d, 2::3]
79 | dx = xd - xg
80 | dy = yd - yg
81 | e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
82 | if vis_thr is not None:
83 | ind = list(vg > vis_thr) and list(vd > vis_thr)
84 | e = e[ind]
85 | ious[n_d] = np.sum(np.exp(-e)) / len(e) if len(e) != 0 else 0.0
86 | return ious
87 |
88 |
89 | def oks_nms(kpts_db, thr, sigmas=None, vis_thr=None):
90 | """OKS NMS implementations.
91 |
92 | Args:
93 | kpts_db: keypoints.
94 | thr: Retain overlap < thr.
95 | sigmas: standard deviation of keypoint labelling.
96 | vis_thr: threshold of the keypoint visibility.
97 |
98 | Returns:
99 | np.ndarray: indexes to keep.
100 | """
101 | if len(kpts_db) == 0:
102 | return []
103 |
104 | scores = np.array([k['score'] for k in kpts_db])
105 | kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
106 | areas = np.array([k['area'] for k in kpts_db])
107 |
108 | order = scores.argsort()[::-1]
109 |
110 | keep = []
111 | while len(order) > 0:
112 | i = order[0]
113 | keep.append(i)
114 |
115 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
116 | sigmas, vis_thr)
117 |
118 | inds = np.where(oks_ovr <= thr)[0]
119 | order = order[inds + 1]
120 |
121 | keep = np.array(keep)
122 |
123 | return keep
124 |
125 |
126 | def _rescore(overlap, scores, thr, type='gaussian'):
127 | """Rescoring mechanism gaussian or linear.
128 |
129 | Args:
130 | overlap: calculated ious
131 | scores: target scores.
132 | thr: retain oks overlap < thr.
133 | type: 'gaussian' or 'linear'
134 |
135 | Returns:
136 | np.ndarray: indexes to keep
137 | """
138 | assert len(overlap) == len(scores)
139 | assert type in ['gaussian', 'linear']
140 |
141 | if type == 'linear':
142 | inds = np.where(overlap >= thr)[0]
143 | scores[inds] = scores[inds] * (1 - overlap[inds])
144 | else:
145 | scores = scores * np.exp(-overlap**2 / thr)
146 |
147 | return scores
148 |
149 |
150 | def soft_oks_nms(kpts_db, thr, max_dets=20, sigmas=None, vis_thr=None):
151 | """Soft OKS NMS implementations.
152 |
153 | Args:
154 | kpts_db
155 | thr: retain oks overlap < thr.
156 | max_dets: max number of detections to keep.
157 | sigmas: Keypoint labelling uncertainty.
158 |
159 | Returns:
160 | np.ndarray: indexes to keep.
161 | """
162 | if len(kpts_db) == 0:
163 | return []
164 |
165 | scores = np.array([k['score'] for k in kpts_db])
166 | kpts = np.array([k['keypoints'].flatten() for k in kpts_db])
167 | areas = np.array([k['area'] for k in kpts_db])
168 |
169 | order = scores.argsort()[::-1]
170 | scores = scores[order]
171 |
172 | keep = np.zeros(max_dets, dtype=np.intp)
173 | keep_cnt = 0
174 | while len(order) > 0 and keep_cnt < max_dets:
175 | i = order[0]
176 |
177 | oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
178 | sigmas, vis_thr)
179 |
180 | order = order[1:]
181 | scores = _rescore(oks_ovr, scores[1:], thr)
182 |
183 | tmp = scores.argsort()[::-1]
184 | order = order[tmp]
185 | scores = scores[tmp]
186 |
187 | keep[keep_cnt] = i
188 | keep_cnt += 1
189 |
190 | keep = keep[:keep_cnt]
191 |
192 | return keep
193 |
--------------------------------------------------------------------------------
/core/solvers/utils/seg_tester_dev.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | import itertools
3 | import json
4 | import logging
5 | import os
6 | from collections import OrderedDict
7 |
8 | import numpy as np
9 | import pycocotools.mask as mask_util
10 | import torch
11 |
12 | from core import distributed_utils as dist
13 |
14 |
15 | class DatasetEvaluator:
16 | """
17 | Base class for a dataset evaluator.
18 |
19 | The function :func:`inference_on_dataset` runs the model over
20 | all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
21 |
22 | This class will accumulate information of the inputs/outputs (by :meth:`process`),
23 | and produce evaluation results in the end (by :meth:`evaluate`).
24 | """
25 |
26 | def reset(self):
27 | """
28 | Preparation for a new round of evaluation.
29 | Should be called before starting a round of evaluation.
30 | """
31 | pass
32 |
33 | def process(self, inputs, outputs):
34 | """
35 | Process the pair of inputs and outputs.
36 | If they contain batches, the pairs can be consumed one-by-one using `zip`:
37 |
38 | .. code-block:: python
39 |
40 | for input_, output in zip(inputs, outputs):
41 | # do evaluation on single input/output pair
42 | ...
43 |
44 | Args:
45 | inputs (list): the inputs that's used to call the model.
46 | outputs (list): the return value of `model(inputs)`
47 | """
48 | pass
49 |
50 | def evaluate(self):
51 | """
52 | Evaluate/summarize the performance, after processing all input/output pairs.
53 |
54 | Returns:
55 | dict:
56 | A new evaluator class can return a dict of arbitrary format
57 | as long as the user can process the results.
58 | In our train_net.py, we expect the following format:
59 |
60 | * key: the name of the task (e.g., bbox)
61 | * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
62 | """
63 | pass
64 |
65 |
66 | class SemSegEvaluator(DatasetEvaluator):
67 | """
68 | Evaluate semantic segmentation metrics.
69 | """
70 |
71 | def __init__(
72 | self,
73 | dataset_name,
74 | config,
75 | distributed=True,
76 | output_dir=None,
77 | ):
78 | """
79 | Args:
80 | dataset_name (str): name of the dataset to be evaluated.
81 | distributed (bool): if True, will collect results from all ranks for evaluation.
82 | Otherwise, will evaluate the results in the current process.
83 | output_dir (str): an output directory to dump results.
84 | num_classes, ignore_label: deprecated argument
85 | """
86 | self._logger = logging.getLogger(__name__)
87 |
88 | self._dataset_name = dataset_name
89 | self._distributed = distributed
90 | self._output_dir = output_dir
91 |
92 | self._cpu_device = torch.device("cpu")
93 |
94 | self._class_names = config.dataset.kwargs.cfg.label_list[1:]
95 | self._num_classes = len(self._class_names)
96 | assert self._num_classes == config.dataset.kwargs.cfg.num_classes, f"{self._num_classes} != {config.dataset.kwargs.cfg.num_classes}"
97 | self._contiguous_id_to_dataset_id = {i: k for i, k in enumerate(self._class_names)} # Dict that maps contiguous training ids to COCO category ids
98 | self._ignore_label = config.dataset.kwargs.cfg.ignore_value
99 |
100 | def reset(self):
101 | self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64)
102 | self._predictions = []
103 |
104 | def process(self, inputs, outputs):
105 | """
106 | Args:
107 | inputs: the inputs to a model.
108 | It is a list of dicts. Each dict corresponds to an image and
109 | contains keys like "height", "width", "file_name".
110 | outputs: the outputs of a model. It is either list of semantic segmentation predictions
111 | (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
112 | segmentation prediction in the same format.
113 | """
114 | # for input, output in zip(inputs, outputs):
115 | input, output = inputs, outputs[0]
116 |
117 | output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
118 | pred = np.array(output, dtype=np.int)
119 |
120 | gt = input["gt"]
121 | gt[gt == self._ignore_label] = self._num_classes
122 |
123 | self._conf_matrix += np.bincount(
124 | (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
125 | minlength=self._conf_matrix.size,
126 | ).reshape(self._conf_matrix.shape)
127 |
128 | self._predictions.extend(self.encode_json_sem_seg(pred, input["filename"]))
129 |
130 | @staticmethod
131 | def all_gather(data, group=None):
132 | assert dist.get_world_size() == 1, f"distributed eval unsupported yet, uncertain if we can use torch.dist with link jointly"
133 | if dist.get_world_size() == 1:
134 | return [data]
135 |
136 |
137 | def evaluate(self):
138 | """
139 | Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
140 |
141 | * Mean intersection-over-union averaged across classes (mIoU)
142 | * Frequency Weighted IoU (fwIoU)
143 | * Mean pixel accuracy averaged across classes (mACC)
144 | * Pixel Accuracy (pACC)
145 | """
146 |
147 | if self._distributed:
148 | dist.barrier()
149 |
150 | conf_matrix_list = self.all_gather(self._conf_matrix)
151 | self._predictions = self.all_gather(self._predictions)
152 | self._predictions = list(itertools.chain(*self._predictions))
153 | if dist.get_rank() != 0:
154 | return
155 |
156 | self._conf_matrix = np.zeros_like(self._conf_matrix)
157 | for conf_matrix in conf_matrix_list:
158 | self._conf_matrix += conf_matrix
159 |
160 | if self._output_dir:
161 | os.makedirs(self._output_dir, exist_ok=True)
162 | file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
163 | with open(file_path, "w") as f:
164 | f.write(json.dumps(self._predictions))
165 |
166 | acc = np.full(self._num_classes, np.nan, dtype=np.float)
167 | iou = np.full(self._num_classes, np.nan, dtype=np.float)
168 | tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
169 | pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
170 | class_weights = pos_gt / np.sum(pos_gt)
171 | pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
172 | acc_valid = pos_gt > 0
173 | acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
174 | iou_valid = (pos_gt + pos_pred) > 0
175 | union = pos_gt + pos_pred - tp
176 | iou[acc_valid] = tp[acc_valid] / union[acc_valid]
177 | macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
178 | miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
179 | fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
180 | pacc = np.sum(tp) / np.sum(pos_gt)
181 |
182 | res = {}
183 | res["mIoU"] = 100 * miou
184 | res["fwIoU"] = 100 * fiou
185 | for i, name in enumerate(self._class_names):
186 | res["IoU-{}".format(name)] = 100 * iou[i]
187 | res["mACC"] = 100 * macc
188 | res["pACC"] = 100 * pacc
189 | for i, name in enumerate(self._class_names):
190 | res["ACC-{}".format(name)] = 100 * acc[i]
191 |
192 | if self._output_dir:
193 | file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
194 | with open(file_path, "wb") as f:
195 | torch.save(res, f)
196 | results = OrderedDict({"sem_seg": res})
197 | self._logger.info(results)
198 | return results
199 |
200 | def encode_json_sem_seg(self, sem_seg, input_file_name):
201 | """
202 | Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
203 | See http://cocodataset.org/#format-results
204 | """
205 | json_list = []
206 | for label in np.unique(sem_seg):
207 | if self._contiguous_id_to_dataset_id is not None:
208 | assert (
209 | label in self._contiguous_id_to_dataset_id
210 | ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
211 | dataset_id = self._contiguous_id_to_dataset_id[label]
212 | else:
213 | dataset_id = int(label)
214 | mask = (sem_seg == label).astype(np.uint8)
215 | mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
216 | mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
217 | json_list.append(
218 | {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
219 | )
220 | return json_list
221 |
--------------------------------------------------------------------------------
/core/testers/__init__.py:
--------------------------------------------------------------------------------
1 | from .reid_tester import ReIDTester
2 | from ..solvers.solver_multitask_dev import TesterMultiTaskDev
3 |
4 | def tester_entry(C_train, C_test):
5 | return globals()[C_test.config['common']['tester']['type']](C_train, C_test)
6 |
--------------------------------------------------------------------------------
/core/testers/tester.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import torch
4 | import torch.backends.cudnn as cudnn
5 | from core import distributed_utils as dist
6 | from torch.utils.data import DataLoader
7 | from tensorboardX import SummaryWriter
8 | from easydict import EasyDict as edict
9 |
10 | import numpy as np
11 | import random
12 | import copy
13 |
14 | import core
15 | import core.models.decoders as decoders
16 | import core.models.backbones as backbones
17 | import core.models.necks as necks
18 | import core.data.test_datasets as datasets
19 | from core.models.model_entry import model_entry
20 | from core.utils import (AverageMeter, accuracy, load_state, load_last_iter,
21 | save_state, create_logger, IterLRScheduler,
22 | count_parameters_num, freeze_bn,
23 | change_tensor_cuda, sync_print)
24 | from core.distributed_utils import DistModule, DistributedSequentialSampler, simple_group_split, vreduce, vgather
25 | from dict_recursive_update import recursive_update
26 |
27 | class Tester(object):
28 | def __init__(self, C_train, C_test):
29 | train_config = edict(C_train.config['common'])
30 | ginfo = C_train.ginfo
31 | config = train_config
32 |
33 | if C_test.config.get('common') is not None:
34 | recursive_update(config, C_test.config.get('common'))
35 | config = edict(config)
36 | if 'out_dir' in config:
37 | self.out_dir = config['out_dir']+'test_results/'
38 | else:
39 | self.out_dir = "./test_results/"
40 |
41 | if 'expname' in config:
42 | self.tb_path = '{}events/{}'.format(self.out_dir, config['expname'])
43 | self.ckpt_path = '{}checkpoints/{}'.format(self.out_dir, config['expname'])
44 | self.logs_path = '{}logs/{}'.format(self.out_dir, config['expname'])
45 | else:
46 | save_path = config.get('save_path', os.path.dirname(os.path.abspath(C_train.config_file)))
47 | self.save_path = save_path
48 | self.tb_path = '{}/test_results/events'.format(save_path)
49 | self.ckpt_path = '{}/test_results/checkpoints'.format(save_path)
50 | self.logs_path = '{}/test_results/logs'.format(save_path)
51 | if C_train.rank == 0:
52 | os.makedirs(self.tb_path, exist_ok=True)
53 | os.makedirs(self.ckpt_path, exist_ok=True)
54 | os.makedirs(self.logs_path, exist_ok=True)
55 | self.tb_logger = SummaryWriter(self.tb_path)
56 | else:
57 | while not os.path.exists(self.logs_path):
58 | time.sleep(1)
59 |
60 | if ginfo.task_rank == 0:
61 | self.logger = create_logger('global_logger', '{}/log_task_{}.txt'.format(self.logs_path, ginfo.task_id))
62 |
63 | self.sync = config.get('sync', True)
64 | self.C_train = C_train
65 | self.C_test = C_test
66 | self.config = config
67 | self.ginfo = ginfo
68 |
69 | # change tensor .cuda
70 | change_tensor_cuda()
71 |
72 | self.tmp = edict()
73 |
74 | ## random seed setting
75 | rng = np.random.RandomState(self.config.get('random_seed', 0))
76 | self.randomseed_pool = rng.randint(999999, size=config.max_iter)
77 |
78 | def create_dataset(self):
79 | ginfo = self.ginfo
80 | config = self.config
81 | dataset_args = config.dataset['kwargs']
82 | dataset_args['ginfo'] = ginfo
83 | self.dataset = datasets.dataset_entry(config.dataset)
84 | dist.barrier()
85 |
86 | def create_dataloader(self):
87 | raise NotImplementedError
88 |
89 | def create_model(self):
90 | config = self.config
91 |
92 | backbone_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1)
93 | assert backbone_bn_group_size == 1, 'other bn group size not support!'
94 | backbone_bn_group_comm = self.ginfo.backbone_share_group
95 |
96 | ## build backbone
97 | config.backbone['kwargs']['bn_group'] = backbone_bn_group_comm
98 | backbone_module = backbones.backbone_entry(config.backbone)
99 | count_parameters_num(backbone_module)
100 |
101 | ## build necks
102 | neck_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1)
103 | assert neck_bn_group_size == 1, 'other bn group size not support!'
104 | neck_bn_group_comm = self.ginfo.neck_share_group
105 |
106 | neck_args = config.neck['kwargs']
107 | neck_args['backbone'] = backbone_module
108 | neck_args['bn_group'] = neck_bn_group_comm
109 | neck_module = necks.neck_entry(config.neck)
110 |
111 | ## add decoder
112 | decoder_bn_group_size = config.backbone['kwargs'].get('bn_group_size', 1)
113 | assert decoder_bn_group_size == 1, 'other bn group size not support!'
114 | decoder_bn_group_comm = self.ginfo.decoder_share_group
115 |
116 | decoder_args = config.decoder['kwargs']
117 | decoder_args['backbone'] = backbone_module
118 | decoder_args['neck'] = neck_module
119 | decoder_args['bn_group'] = decoder_bn_group_comm
120 | decoder_module = decoders.decoder_entry(config.decoder)
121 |
122 | # build
123 | model = model_entry(backbone_module, neck_module, decoder_module)
124 |
125 | ## distributed
126 | model.cuda()
127 |
128 | if self.C_train.rank == 0:
129 | print(model)
130 |
131 | model = DistModule(model, sync=self.sync, task_grp=self.ginfo.group, \
132 | share_backbone_group=self.ginfo.backbone_share_group, \
133 | share_neck_group=self.ginfo.neck_share_group, \
134 | share_decoder_group=self.ginfo.decoder_share_group)
135 |
136 | self.model = model
137 |
138 | def load(self, args):
139 | if args.load_path == '':
140 | return
141 | if args.recover:
142 | self.last_iter = load_state(args.load_path.replace('ckpt_task_', 'ckpt_task{}_'.format(\
143 | self.ginfo.task_id)), self.model, recover=args.recover)
144 | self.last_iter -= 1
145 | else:
146 | if args.load_single:
147 | load_state(args.load_path, self.model, ignore=args.ignore)
148 | else:
149 | load_state(args.load_path.replace('ckpt_task_', 'ckpt_task{}_'.format(\
150 | self.ginfo.task_id)), self.model, ignore=args.ignore)
151 |
152 | def initialize(self, args):
153 |
154 | # create dataset to get num_classes
155 | self.create_dataset()
156 | self.create_model()
157 |
158 | self.load_args = args
159 |
160 | self.load(args)
161 | self.create_dataloader()
162 |
163 | def pre_run(self):
164 | tmp = self.tmp
165 | tmp.vbatch_time = AverageMeter(10)
166 | tmp.vdata_time = AverageMeter(10)
167 | tmp.vtop1 = AverageMeter(10)
168 | tmp.top1_list = [torch.Tensor(1).cuda() for _ in range(self.C_train.world_size)]
169 |
170 | self.model.eval()
171 |
172 | def prepare_data(self):
173 | tmp = self.tmp
174 | tmp.input_var = dict()
175 |
176 | for k,v in tmp.input.items():
177 | if not isinstance(v, list):
178 | tmp.input_var[k] = torch.autograd.Variable(v.cuda())
179 |
180 | def _set_randomseed(self, seed):
181 | random.seed(seed)
182 | np.random.seed(seed)
183 | torch.manual_seed(seed)
184 | torch.cuda.manual_seed(seed)
185 | torch.cuda.manual_seed_all(seed)
186 |
--------------------------------------------------------------------------------
/core/testers/tester_deter.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 | import torch
5 | import torch.backends.cudnn as cudnn
6 | from core import distributed_utils as dist
7 |
8 | from .tester import Tester
9 |
10 | class WorkerInit(object):
11 | def __init__(self, rank, num_workers):
12 | self.rank = rank
13 | self.num_workers = num_workers
14 | def func(self, pid):
15 | print(f'[rank{self.rank}] setting worker seed {self.rank*self.num_workers+pid}', flush=True)
16 | np.random.seed(self.rank*self.num_workers+pid)
17 |
18 | class TesterDeter(Tester):
19 |
20 | def __init__(self, C_train, C_test):
21 | super().__init__(C_train, C_test)
22 |
23 | if self.config.get('deterministic', False):
24 | if self.config.get('cudnn_deterministic', True):
25 | cudnn.deterministic = True
26 | cudnn.benchmark = False
27 | else:
28 | cudnn.benchmark = True
29 | seed = self.config.get('random_seed', 0)
30 | worker_rank = self.config.get('worker_rank', False)
31 | if worker_rank:
32 | worker_init = WorkerInit(self.C_train.rank, self.config.workers)
33 | else:
34 | worker_init = WorkerInit(0, 0)
35 | self.worker_init_fn = worker_init.func
36 | random.seed(seed)
37 | np.random.seed(seed)
38 | torch.manual_seed(seed)
39 | torch.cuda.manual_seed(seed)
40 | if self.C_train.rank == 0:
41 | self.logger.info(f'deterministic mode, seed: {seed}, worker_rank: {worker_rank},\
42 | cudnn_deterministic: {self.config.get("cudnn_deterministic", True)}')
43 | dist.barrier()
44 | else:
45 | self.worker_init_fn = None
46 |
--------------------------------------------------------------------------------
/core/testers/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import os
4 | from core.testers.utils.reranking import re_ranking
5 |
6 |
7 | def euclidean_distance(qf, gf):
8 | m = qf.shape[0]
9 | n = gf.shape[0]
10 | dist_mat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
11 | torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
12 | dist_mat.addmm_(1, -2, qf, gf.t())
13 | return dist_mat.cpu().numpy()
14 |
15 | def cosine_similarity(qf, gf):
16 | epsilon = 0.00001
17 | dist_mat = qf.mm(gf.t())
18 | qf_norm = torch.norm(qf, p=2, dim=1, keepdim=True) # mx1
19 | gf_norm = torch.norm(gf, p=2, dim=1, keepdim=True) # nx1
20 | qg_normdot = qf_norm.mm(gf_norm.t())
21 |
22 | dist_mat = dist_mat.mul(1 / qg_normdot).cpu().numpy()
23 | dist_mat = np.clip(dist_mat, -1 + epsilon, 1 - epsilon)
24 | dist_mat = np.arccos(dist_mat)
25 | return dist_mat
26 |
27 |
28 | def eval_func(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50):
29 | """Evaluation with market1501 metric
30 | Key: for each query identity, its gallery images from the same camera view are discarded.
31 | """
32 | num_q, num_g = distmat.shape
33 | # distmat g
34 | # q 1 3 2 4
35 | # 4 1 2 3
36 | if num_g < max_rank:
37 | max_rank = num_g
38 | print("Note: number of gallery samples is quite small, got {}".format(num_g))
39 | indices = np.argsort(distmat, axis=1)
40 | # 0 2 1 3
41 | # 1 2 3 0
42 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
43 | # compute cmc curve for each query
44 | all_cmc = []
45 | all_AP = []
46 | num_valid_q = 0. # number of valid query
47 | for q_idx in range(num_q):
48 | # get query pid and camid
49 | q_pid = q_pids[q_idx]
50 | q_camid = q_camids[q_idx]
51 |
52 | # remove gallery samples that have the same pid and camid with query
53 | order = indices[q_idx] # select one row
54 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
55 | keep = np.invert(remove)
56 |
57 | # compute cmc curve
58 | # binary vector, positions with value 1 are correct matches
59 | orig_cmc = matches[q_idx][keep]
60 | if not np.any(orig_cmc):
61 | # this condition is true when query identity does not appear in gallery
62 | continue
63 |
64 | cmc = orig_cmc.cumsum()
65 | cmc[cmc > 1] = 1
66 |
67 | all_cmc.append(cmc[:max_rank])
68 | num_valid_q += 1.
69 |
70 | # compute average precision
71 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
72 | num_rel = orig_cmc.sum()
73 | tmp_cmc = orig_cmc.cumsum()
74 | #tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)]
75 | y = np.arange(1, tmp_cmc.shape[0] + 1) * 1.0
76 | tmp_cmc = tmp_cmc / y
77 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc
78 | AP = tmp_cmc.sum() / num_rel
79 | all_AP.append(AP)
80 |
81 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
82 |
83 | all_cmc = np.asarray(all_cmc).astype(np.float32)
84 | all_cmc = all_cmc.sum(0) / num_valid_q
85 | mAP = np.mean(all_AP)
86 |
87 | return all_cmc, mAP
88 |
89 |
90 | class R1_mAP_eval():
91 | def __init__(self, num_query, max_rank=50, feat_norm=True, reranking=False):
92 | super(R1_mAP_eval, self).__init__()
93 | self.num_query = num_query
94 | self.max_rank = max_rank
95 | self.feat_norm = feat_norm
96 | self.reranking = reranking
97 |
98 | def reset(self):
99 | self.feats = []
100 | self.pids = []
101 | self.camids = []
102 |
103 | def update(self, output): # called once for each batch
104 | feat, pid, camid = output
105 | self.feats.append(feat.cpu())
106 | self.pids.extend(np.asarray(pid))
107 | self.camids.extend(np.asarray(camid))
108 |
109 | def compute(self): # called after each epoch
110 | feats = torch.cat(self.feats, dim=0)
111 | if self.feat_norm:
112 | print("The test feature is normalized")
113 | feats = torch.nn.functional.normalize(feats, dim=1, p=2) # along channel
114 | # query
115 | qf = feats[:self.num_query]
116 | q_pids = np.asarray(self.pids[:self.num_query])
117 | q_camids = np.asarray(self.camids[:self.num_query])
118 | # gallery
119 | gf = feats[self.num_query:]
120 | g_pids = np.asarray(self.pids[self.num_query:])
121 |
122 | g_camids = np.asarray(self.camids[self.num_query:])
123 | if self.reranking:
124 | print('=> Enter reranking')
125 | # distmat = re_ranking(qf, gf, k1=20, k2=6, lambda_value=0.3)
126 | distmat = re_ranking(qf, gf, k1=50, k2=15, lambda_value=0.3)
127 |
128 | else:
129 | print('=> Computing DistMat with euclidean_distance')
130 | distmat = euclidean_distance(qf, gf)
131 | cmc, mAP = eval_func(distmat, q_pids, g_pids, q_camids, g_camids)
132 |
133 | return cmc, mAP, distmat, self.pids, self.camids, qf, gf
134 |
--------------------------------------------------------------------------------
/core/testers/utils/reranking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Fri, 25 May 2018 20:29:09
5 |
6 |
7 | """
8 |
9 | """
10 | CVPR2017 paper:Zhong Z, Zheng L, Cao D, et al. Re-ranking Person Re-identification with k-reciprocal Encoding[J]. 2017.
11 | url:http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf
12 | Matlab version: https://github.com/zhunzhong07/person-re-ranking
13 | """
14 |
15 | """
16 | API
17 |
18 | probFea: all feature vectors of the query set (torch tensor)
19 | probFea: all feature vectors of the gallery set (torch tensor)
20 | k1,k2,lambda: parameters, the original paper is (k1=20,k2=6,lambda=0.3)
21 | MemorySave: set to 'True' when using MemorySave mode
22 | Minibatch: avaliable when 'MemorySave' is 'True'
23 | """
24 |
25 | import numpy as np
26 | import torch
27 |
28 |
29 | def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat=None, only_local=False):
30 | # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor
31 | query_num = probFea.size(0)
32 | all_num = query_num + galFea.size(0)
33 | if only_local:
34 | original_dist = local_distmat
35 | else:
36 | feat = torch.cat([probFea, galFea])
37 | # print('using GPU to compute original distance')
38 | distmat = torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num) + \
39 | torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t()
40 | distmat.addmm_(1, -2, feat, feat.t())
41 | original_dist = distmat.cpu().numpy()
42 | del feat
43 | if not local_distmat is None:
44 | original_dist = original_dist + local_distmat
45 | gallery_num = original_dist.shape[0]
46 | original_dist = np.transpose(original_dist / np.max(original_dist, axis=0))
47 | V = np.zeros_like(original_dist).astype(np.float16)
48 | initial_rank = np.argsort(original_dist).astype(np.int32)
49 |
50 | # print('starting re_ranking')
51 | for i in range(all_num):
52 | # k-reciprocal neighbors
53 | forward_k_neigh_index = initial_rank[i, :k1 + 1]
54 | backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
55 | fi = np.where(backward_k_neigh_index == i)[0]
56 | k_reciprocal_index = forward_k_neigh_index[fi]
57 | k_reciprocal_expansion_index = k_reciprocal_index
58 | for j in range(len(k_reciprocal_index)):
59 | candidate = k_reciprocal_index[j]
60 | candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2)) + 1]
61 | candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,
62 | :int(np.around(k1 / 2)) + 1]
63 | fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
64 | candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
65 | if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len(
66 | candidate_k_reciprocal_index):
67 | k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index)
68 |
69 | k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
70 | weight = np.exp(-original_dist[i, k_reciprocal_expansion_index])
71 | V[i, k_reciprocal_expansion_index] = weight / np.sum(weight)
72 | original_dist = original_dist[:query_num, ]
73 | if k2 != 1:
74 | V_qe = np.zeros_like(V, dtype=np.float16)
75 | for i in range(all_num):
76 | V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0)
77 | V = V_qe
78 | del V_qe
79 | del initial_rank
80 | invIndex = []
81 | for i in range(gallery_num):
82 | invIndex.append(np.where(V[:, i] != 0)[0])
83 |
84 | jaccard_dist = np.zeros_like(original_dist, dtype=np.float16)
85 |
86 | for i in range(query_num):
87 | temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16)
88 | indNonZero = np.where(V[i, :] != 0)[0]
89 | indImages = [invIndex[ind] for ind in indNonZero]
90 | for j in range(len(indNonZero)):
91 | temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]],
92 | V[indImages[j], indNonZero[j]])
93 | jaccard_dist[i] = 1 - temp_min / (2 - temp_min)
94 |
95 | final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value
96 | del original_dist
97 | del V
98 | del jaccard_dist
99 | final_dist = final_dist[:query_num, query_num:]
100 | return final_dist
101 |
102 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/batch_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #set -x
3 |
4 | # source /mnt/lustre/share/spring/r0.3.0
5 | ROOT=../../../
6 | export PYTHONPATH=$ROOT:$PYTHONPATH
7 |
8 | if [[ ! -d "logs" ]]; then
9 | mkdir logs
10 | fi
11 |
12 | ################
13 | gpus=${1-8}
14 |
15 | job_name=${2-debug}
16 | ################### ||| additional params usually not used
17 | ################### vvv
18 | iter=${3-newest}
19 | PRETRAIN_JOB_NAME=${4-${job_name}}
20 | CONFIG=${5-${job_name}.yaml}
21 | ################
22 | g=$((${gpus}<8?${gpus}:8))
23 |
24 | #### test list
25 | declare -A test_info_list
26 | test_info_list[pose_lpe]=0
27 | test_info_list[ochuman_pose]=0
28 | #test_info_list[pose_mpii_lpe]=x # requires mpii queries
29 | test_info_list[par_lpe]=1
30 | #test_info_list[par_atr_lpe]=x # requires ATR queries
31 | test_info_list[reid]=2
32 | test_info_list[reid_cuhk3]=2
33 | test_info_list[reid_duke]=2
34 | test_info_list[reid_msmt]=2
35 | test_info_list[reid_senseid]=2
36 | test_info_list[par_lip_lpe]=3
37 | test_info_list[par_cihp_lpe]=4
38 | test_info_list[pa100k_lpe]=5
39 | test_info_list[rap2_lpe]=6
40 | #test_info_list[peta_lpe]=x # requires PETA queries
41 | test_info_list[pose_aic_lpe]=7
42 | test_info_list[peddet_caltech]=8
43 | test_info_list[peddet_inter_lpe]=8
44 |
45 | for TASK in "${!test_info_list[@]}"; do
46 | full_job_name=${job_name}_test_${TASK}
47 | now=$(date +"%Y%m%d_%H%M%S")
48 | GINFO_INDEX=${test_info_list[${TASK}]}
49 | LOG_FILE=logs/${full_job_name}_test_${TASK}_${now}.log
50 | echo "=======>${TASK} log file: ${LOG_FILE}"
51 | TEST_CONFIG=vd_${TASK}_test.yaml
52 | TEST_MODEL=checkpoints/${PRETRAIN_JOB_NAME}/ckpt_task${GINFO_INDEX}_iter_${iter}.pth.tar
53 | echo 'start job:' ${full_job_name} ' config:' ${CONFIG} ' test_config:' ${TEST_CONFIG}
54 |
55 | while true # find unused tcp port
56 | do
57 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
58 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
59 | if [ "${status}" != "0" ]; then
60 | break;
61 | fi
62 | done
63 |
64 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \
65 | srun -n$1 -p --gres=gpu:${g} --ntasks-per-node=${g} --gpu \
66 | --job-name=${full_job_name} --cpus-per-task=5 \
67 | python -W ignore -u ${ROOT}/test.py \
68 | --expname ${full_job_name} \
69 | --config ${CONFIG} \
70 | --test_config ${TEST_CONFIG} \
71 | --spec_ginfo_index ${GINFO_INDEX} \
72 | --load-path=${TEST_MODEL} \
73 | --tcp_port $PORT \
74 | 2>&1 | tee ${LOG_FILE} &
75 |
76 | sleep 10
77 | done
78 |
79 |
80 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -x
3 |
4 | # source /mnt/lustre/share/spring/r0.3.0
5 | ROOT=../../../
6 | export PYTHONPATH=$ROOT:$PYTHONPATH
7 |
8 | if [[ ! -d "logs" ]]; then
9 | mkdir logs
10 | fi
11 |
12 | while true # find unused tcp port
13 | do
14 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
15 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
16 | if [ "${status}" != "0" ]; then
17 | break;
18 | fi
19 | done
20 | ################
21 | gpus=${1-8}
22 | TASK=${2-pose}
23 | GINFO_INDEX=${3-0} # task index config cherrypick (if necessary)
24 | job_name=${4-debug}
25 | ################### ||| additional params usually not used
26 | ################### vvv
27 | iter=${5-newest}
28 | PRETRAIN_JOB_NAME=${6-${job_name}}
29 | CONFIG=${7-${job_name}.yaml}
30 | TEST_CONFIG=${8-vd_${TASK}_test.yaml}
31 | TEST_MODEL=${9-checkpoints/${PRETRAIN_JOB_NAME}/ckpt_task${GINFO_INDEX}_iter_${iter}.pth.tar}
32 | ################
33 |
34 | g=$((${gpus}<8?${gpus}:8))
35 | echo 'start job:' ${job_name} ' config:' ${CONFIG} ' test_config:' ${TEST_CONFIG}
36 |
37 |
38 | now=$(date +"%Y%m%d_%H%M%S")
39 | LOG_FILE=logs/${job_name}_test_${now}.log
40 | echo 'log file: ' ${LOG_FILE}
41 |
42 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \
43 | srun -n$1 -p --debug --gres=gpu:${g} --ntasks-per-node=${g} --gpu \
44 | --job-name=${job_name} --cpus-per-task=5 \
45 | python -W ignore -u ${ROOT}/test.py \
46 | --expname ${job_name} \
47 | --config ${CONFIG} \
48 | --test_config ${TEST_CONFIG} \
49 | --spec_ginfo_index ${GINFO_INDEX} \
50 | --load-path=${TEST_MODEL} \
51 | --tcp_port $PORT \
52 | 2>&1 | tee ${LOG_FILE}
53 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -x
3 |
4 | # source /mnt/lustre/share/spring/r0.3.0
5 | ROOT=../../../
6 | export PYTHONPATH=$ROOT:$PYTHONPATH
7 |
8 | if [[ ! -d "logs" ]]; then
9 | mkdir logs
10 | fi
11 |
12 | while true # find unused tcp port
13 | do
14 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
15 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
16 | if [ "${status}" != "0" ]; then
17 | break;
18 | fi
19 | done
20 |
21 | ################
22 | gpus=${1-48}
23 | job_name=${2-debug}
24 | ################
25 | CONFIG=${3-${job_name}.yaml}
26 |
27 | g=$((${gpus}<8?${gpus}:8))
28 | echo 'start job:' ${job_name} ' config:' ${CONFIG}
29 |
30 | AutoResume=checkpoints/${job_name}/ckpt_task_iter_newest.pth.tar
31 |
32 | now=$(date +"%Y%m%d_%H%M%S")
33 | LOG_FILE=logs/${job_name}_${now}.log
34 | echo 'log file: ' ${LOG_FILE}
35 |
36 | GLOG_vmodule=MemcachedClient=-1 MKL_SERVICE_FORCE_INTEL=1 \
37 | srun -n${gpus} -p --gres=gpu:${g} --ntasks-per-node=${g} --gpu \
38 | --job-name=${job_name} --cpus-per-task=5 \
39 | python -W ignore -u ${ROOT}/multitask.py \
40 | --expname ${job_name} \
41 | --config ${CONFIG} \
42 | --auto-resume=checkpoints/${job_name}/ckpt_task_iter_newest.pth.tar \
43 | --tcp_port $PORT \
44 | 2>&1 | tee ${LOG_FILE}
45 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_h3m6_pose_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev_collate
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: COCOPoseTest # deprecated
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: COCOPosDatasetDev
19 | kwargs:
20 | # use_udp: True
21 | # data_use_ratio: 0.003
22 | ann_file: /mnt/path...to.../h36m/processed/annotation_body2d/h36m_coco_test.json
23 | img_prefix: /mnt/path...to.../h36m/processed/images/
24 | test_mode: True
25 | data_cfg: {
26 | 'use_gt_bbox': True,
27 | 'image_size':[192, 256],
28 | }
29 | sampler:
30 | batch_size: 256 # per card
31 | evaluation:
32 | cfg:
33 | interval: 10
34 | metric: ['PCK', 'EPE']
35 | key_indicator: AP
36 |
37 | soft_nms: False
38 | nms_thr: 1.0
39 | oks_thr: 0.9
40 | vis_thr: 0.2
41 | cls_logits_vis_thr: 0.05
42 | no_rescoring: True
43 |
44 | # extra:
45 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
46 | # max_size: 4480
47 | # flip: True
48 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_ochuman_pose_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev_collate
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer',
12 | # 'extra', 'evaluation', 'model_entry_type', 'load_ignore', 'ckpt_task_id']
13 | tasks:
14 | 0:
15 | name: COCOPoseTest # deprecated
16 | loss_weight: 1.0 # *redundant*
17 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
18 | dataset:
19 | type: COCOPosDatasetDev
20 | kwargs:
21 | # use_udp: False
22 | ann_file: /mnt/path...to.../OCHuman/annotations/ochuman_coco_format_test_range_0.00_1.00.json
23 | img_prefix: /mnt/path...to.../OCHuman/images/
24 | test_mode: True
25 | data_cfg: {
26 | 'use_gt_bbox': True,
27 | }
28 | sampler:
29 | batch_size: 256 # per card
30 | evaluation:
31 | cfg:
32 | interval: 10
33 | metric: mAP
34 | key_indicator: AP
35 |
36 | soft_nms: False
37 | nms_thr: 1.0
38 | oks_thr: 0.9
39 | vis_thr: 0.2
40 | cls_logits_vis_thr: 0.05
41 |
42 | # extra:
43 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
44 | # max_size: 4480
45 | # flip: True
46 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_pa100k_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | tasks:
12 | 0:
13 | name: pedattrTest
14 | loss_weight: 1.0
15 | gres_ratio: 1 # int, > 0| world/sum*ratio
16 | dataset:
17 | type: AttrDataset
18 | kwargs:
19 | task_spec:
20 | dataset: 'PA-100k'
21 | data_path: /mnt/path...to.../PA-100k/dataset.pkl
22 | root_path: /mnt/path...to.../PA-100k/data/
23 | augmentation:
24 | height: 256
25 | width: 192
26 | train: False
27 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_par_atr_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: ParTest
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: ATRParsingDataset
19 | kwargs:
20 | data_path: /mnt/path...to.../ATR
21 | dataset: val
22 | is_train: False
23 | cfg:
24 | # mean: [ 0.485, 0.456, 0.406 ]
25 | # std: [ 0.229, 0.224, 0.225 ]
26 | eval_crop_size: [ 480, 480 ]
27 | is_flip: False
28 | is_multi_scale: False
29 |
30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value
31 | num_classes: 18
32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ]
33 |
34 | sampler:
35 | batch_size: 16 # per card
36 |
37 |
38 | # extra:
39 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
40 | # max_size: 4480
41 | # flip: True
42 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_par_cihp_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: ParTest
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: CIHPParsingDataset
19 | kwargs:
20 | data_path: /mnt/path...to.../CIHP
21 | dataset: val
22 | is_train: False
23 | cfg:
24 | # mean: [ 0.485, 0.456, 0.406 ]
25 | # std: [ 0.229, 0.224, 0.225 ]
26 | eval_crop_size: [ 480, 480 ]
27 | is_flip: False
28 | is_multi_scale: False
29 |
30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value
31 | num_classes: 20
32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ]
33 |
34 | sampler:
35 | batch_size: 16 # per card
36 |
37 |
38 | # extra:
39 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
40 | # max_size: 4480
41 | # flip: True
42 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_par_lip_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: ParTest
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: LIPParsingDataset
19 | kwargs:
20 | data_path: /mnt/path...to.../LIP
21 | dataset: val
22 | is_train: False
23 | cfg:
24 | # mean: [ 0.485, 0.456, 0.406 ]
25 | # std: [ 0.229, 0.224, 0.225 ]
26 | eval_crop_size: [ 480, 480 ]
27 | is_flip: False
28 | is_multi_scale: False
29 |
30 | ignore_value: 255 # TODO: duplicated with decoder.kwargs.ignore_value
31 | num_classes: 20
32 | label_list: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ]
33 |
34 | sampler:
35 | batch_size: 16 # per card
36 |
37 | # extra:
38 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
39 | # max_size: 4480
40 | # flip: True
41 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_par_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: ParTest
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: Human3M6ParsingDataset
19 | kwargs:
20 | data_path: /mnt/path...to.../human3.6
21 | dataset: val
22 | is_train: False
23 | cfg:
24 | # mean: [ 0.485, 0.456, 0.406 ]
25 | # std: [ 0.229, 0.224, 0.225 ]
26 | eval_crop_size: [ 480, 480 ]
27 | is_flip: False
28 | is_multi_scale: False
29 |
30 | ignore_value: 255 # duplicated with decoder.kwargs.ignore_value
31 | num_classes: 25
32 | label_list: [ 0, 1, 2, 3, 6, 7, 8, 17, 18, 19, 25, 26, 27, 32, 33, 34, 38, 39, 43, 44,
33 | 46, 49, 50, 56, 58 ]
34 |
35 | sampler:
36 | batch_size: 16 # per card
37 |
38 |
39 | # extra:
40 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
41 | # max_size: 4480
42 | # flip: True
43 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_peddet_caltech_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 | kwargs:
8 | pos_thr: 0.0
9 | gt_path: '/mnt/path...to.../Caltech/test_caltech_heavy_1xnew.odgt'
10 |
11 |
12 | sync: True
13 | collate: det
14 |
15 | tasks:
16 | 0:
17 | name: CrowdHumanPeddetTest
18 | loss_weight: 1.0 # *redundant*
19 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
20 | dataset:
21 | type: PedestrainDetectionDataset
22 | kwargs:
23 | task_spec:
24 | img_folder: /mnt/path...to.../Caltech/Images
25 | ann_file: /mnt/path...to.../Caltech/test_caltech_1xnew.json
26 | return_masks: False
27 | augmentation: {'max_size': 1333}
28 | vit: True
29 | train: False
30 | sampler:
31 | batch_size: 1
32 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_peddet_inter_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 | kwargs:
8 | pos_thr: 0.05
9 | gt_path: 'CHval.odgt'
10 |
11 |
12 | sync: True
13 | collate: det
14 |
15 | tasks:
16 | 0:
17 | name: CrowdHumanPeddetTest
18 | loss_weight: 1.0 # *redundant*
19 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
20 | dataset:
21 | type: PedestrainDetectionDataset
22 | kwargs:
23 | task_spec:
24 | img_folder: /mnt/path...to.../CrowdHuman/Images
25 | ann_file: /mnt/path...to.../CrowdHuman/annotations/val.json
26 | return_masks: False
27 | augmentation: {}
28 | vit: True
29 | train: False
30 | sampler:
31 | batch_size: 1
32 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_peta_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | tasks:
12 | 0:
13 | name: pedattrTest
14 | loss_weight: 1.0
15 | gres_ratio: 1 # int, > 0| world/sum*ratio
16 | dataset:
17 | type: AttrDataset
18 | kwargs:
19 | task_spec:
20 | dataset: 'peta'
21 | data_path: /mnt/path...to.../peta/dataset.pkl
22 | root_path: /mnt/path...to.../peta/images/
23 | augmentation:
24 | height: 256
25 | width: 192
26 | train: False
27 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_pose_aic_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev_collate
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: AICPoseTest # deprecated
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: MultiPoseDatasetDev
19 | kwargs:
20 | dataset_name: 'aic'
21 | ann_file: '/mnt/path...to.../ai_challenge/annotations/aic_val.json'
22 | img_prefix: '/mnt/path...to.../ai_challenge/ai_challenger_keypoint_validation_20170911/keypoint_validation_images_20170911/'
23 | test_mode: True
24 | sampler:
25 | batch_size: 256 # per card
26 | evaluation:
27 | cfg:
28 | interval: 10
29 | metric: mAP
30 | key_indicator: AP
31 |
32 | soft_nms: False
33 | nms_thr: 1.0
34 | oks_thr: 0.9
35 | vis_thr: 0.2
36 |
37 | sigmas: [
38 | 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
39 | 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
40 | 0.01291456, 0.01236173
41 | ]
42 | use_area: False
43 | cls_logits_vis_thr: 0.05
44 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_pose_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev_collate
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: COCOPoseTest # deprecated
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: COCOPosDatasetDev
19 | kwargs:
20 | ann_file: '/mnt/path...to.../coco/annotations/person_keypoints_val2017.json'
21 | img_prefix: '/mnt/path...to.../coco/val2017/'
22 | test_mode: True
23 | sampler:
24 | batch_size: 256 # per card
25 | evaluation:
26 | cfg:
27 | interval: 10
28 | metric: mAP
29 | key_indicator: AP
30 |
31 | soft_nms: False
32 | nms_thr: 1.0
33 | oks_thr: 0.9
34 | vis_thr: 0.2
35 | cls_logits_vis_thr: 0.05
36 |
37 | # extra:
38 | # min_sizes: [320, 480, 640, 800, 960, 1120] # TTA
39 | # max_size: 4480
40 | # flip: True
41 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_pose_mpii_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev_collate
10 |
11 | # task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer']
12 | tasks:
13 | 0:
14 | name: MPIIPoseTest # deprecated
15 | loss_weight: 1.0 # *redundant*
16 | gres_ratio: 1 # int, > 0| world/sum*ratio, *redundant*
17 | dataset:
18 | type: MPIIPosDatasetDev
19 | kwargs:
20 | ann_file: '/mnt/path...to.../coco/annotations/mpii_val.json'
21 | img_prefix: '/mnt/path...to.../MPI/images/'
22 | test_mode: True
23 | sampler:
24 | batch_size: 256 # per card
25 | evaluation:
26 | cfg:
27 | interval: 10
28 | metric: PCKh
29 |
30 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_rap2_lpe_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: TesterMultiTaskDev
7 |
8 | sync: True
9 | collate: dev
10 |
11 | tasks:
12 | 0:
13 | name: pedattrTest
14 | loss_weight: 1.0
15 | gres_ratio: 1 # int, > 0| world/sum*ratio
16 | dataset:
17 | type: AttrDataset
18 | kwargs:
19 | task_spec:
20 | dataset: 'rap'
21 | data_path: /mnt/path...to.../rap2/dataset.pkl
22 | root_path: /mnt/path...to.../rap2/RAP_dataset/
23 | augmentation:
24 | height: 256
25 | width: 192
26 | train: False
27 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_reid_cuhk3_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | tester:
3 | type: 'ReIDTester'
4 | test_feature_name: 'feature_nobn'
5 |
6 | sync: True
7 |
8 | tasks :
9 | 0 :
10 | name : cuhk03
11 | loss_weight : 1.0
12 | gres_ratio: 1
13 | dataset:
14 | type: ReIDTestDataset
15 | kwargs:
16 | root_path: /mnt/path...to.../
17 | query_file_path:
18 | - /mnt/path...to.../cuhk03_1/data_list/probe.txt
19 | gallery_file_path:
20 | - /mnt/path...to.../cuhk03_1/data_list/gallery.txt
21 | loader: 'pil'
22 | vit: True
23 | sampler:
24 | batch_size: 128 # per card
25 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_reid_msmt_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | tester:
3 | type: 'ReIDTester'
4 | test_feature_name: 'feature_nobn'
5 |
6 | sync: True
7 |
8 | tasks :
9 | 0 :
10 | name : msmt
11 | loss_weight : 1.0
12 | gres_ratio: 1
13 | dataset:
14 | type: ReIDTestDataset
15 | kwargs:
16 | root_path: /mnt/path...to.../
17 | query_file_path:
18 | - /mnt/path...to.../MSMT17_V1/data_list/probe.txt
19 | gallery_file_path:
20 | - /mnt/path...to.../MSMT17_V1/data_list/gallery.txt
21 | loader: 'pil'
22 | vit: True
23 | sampler:
24 | batch_size: 128 # per card
25 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_reid_senseid_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | tester:
3 | type: 'ReIDTester'
4 | test_feature_name: 'feature_nobn'
5 |
6 | sync: True
7 |
8 | tasks :
9 | 0 :
10 | name : senseid
11 | loss_weight : 1.0
12 | gres_ratio: 1
13 | dataset:
14 | type: ReIDTestDataset
15 | kwargs:
16 | root_path: /mnt/path...to.../SenseReID/
17 | query_file_path:
18 | - /mnt/path...to.../SenseReID/data_list/probe.txt
19 | gallery_file_path:
20 | - /mnt/path...to.../SenseReID/data_list/gallery.txt
21 | loader: 'pil'
22 | vit: True
23 | sampler:
24 | batch_size: 128 # per card
25 |
--------------------------------------------------------------------------------
/experiments/unihcp/release/vd_reid_test.yaml:
--------------------------------------------------------------------------------
1 | common:
2 | backbone:
3 | kwargs:
4 | test_pos_mode: learnable_interpolate
5 | tester:
6 | type: 'ReIDTester'
7 | test_feature_name: 'feature_nobn'
8 |
9 | sync: True
10 |
11 | tasks :
12 | 0 :
13 | name : market1501
14 | loss_weight : 1.0
15 | gres_ratio: 1
16 | dataset:
17 | type: ReIDTestDataset
18 | kwargs:
19 | root_path: /mnt/path...to.../
20 | query_file_path:
21 | - /mnt/path...to.../market1501/data_list/probe.txt
22 | gallery_file_path:
23 | - /mnt/path...to.../market1501/data_list/gallery.txt
24 | loader: 'pil'
25 | vit: True
26 | sampler:
27 | batch_size: 32 # per card
28 |
--------------------------------------------------------------------------------
/helper/align.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from skimage import transform as trans
4 |
5 | def affine_align(img, landmark=None, **kwargs):
6 | M = None
7 | src = np.array([
8 | [38.2946, 51.6963],
9 | [73.5318, 51.5014],
10 | [56.0252, 71.7366],
11 | [41.5493, 92.3655],
12 | [70.7299, 92.2041] ], dtype=np.float32 )
13 | # src=src * 224 / 112
14 |
15 | dst = landmark.astype(np.float32)
16 | tform = trans.SimilarityTransform()
17 | tform.estimate(dst, src)
18 | M = tform.params[0:2,:]
19 | warped = cv2.warpAffine(img, M, (112, 112), borderValue = 0.0)
20 | return warped
21 |
22 |
23 | def kestrel_get_similar_matrix(src_points, dst_points):
24 | if src_points.size != dst_points.size:
25 | print("error: the size of src_points and dst_points must be same",
26 | "which is {0} vs. {1}".format(src_points.size, dst_points.size))
27 | exit(-1)
28 |
29 | dst_points = dst_points.T.reshape(-1)
30 |
31 | point_num = src_points.shape[0]
32 | new_src_points = np.zeros((point_num * 2, 4))
33 | new_src_points[:point_num, :2] = src_points
34 | new_src_points[:point_num, 2] = 1
35 | new_src_points[:point_num, 3] = 0
36 |
37 | new_src_points[point_num:, 0] = src_points[:, 1]
38 | new_src_points[point_num:, 1] = -src_points[:, 0]
39 | new_src_points[point_num:, 2] = 0
40 | new_src_points[point_num:, 3] = 1
41 |
42 | min_square_solution = np.linalg.lstsq(new_src_points, dst_points,
43 | rcond=-1)[0]
44 |
45 | trans_matrix = np.array([
46 | [ min_square_solution[0], -min_square_solution[1], 0 ],
47 | [ min_square_solution[1], min_square_solution[0], 0 ],
48 | [ min_square_solution[2], min_square_solution[3], 1 ],
49 | ])
50 |
51 | return trans_matrix.T[:2]
52 |
53 | def transform(pts, M):
54 | dst = np.matmul(pts, M[:, :2].T)
55 | dst[:, 0] += M[0, 2]
56 | dst[:, 1] += M[1, 2]
57 | return dst
58 |
59 |
60 | def affine_alignSDK(img, landmark=None, borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_LINEAR):
61 | M = None
62 | dst_points = np.array([[70.745156, 111.9996875], [108.23625, 111.9996875], [89.700875, 153.514375]], dtype=np.float32)
63 | default_shape = (178,218)
64 | lmk = landmark.astype(np.float32)
65 | src_points = np.array([
66 | lmk[0], lmk[1],
67 | (lmk[3] + lmk[4]) / 2
68 | ], dtype=np.float32)
69 | # src_points = get_trans_points(landmarks)
70 | trans_matrix = kestrel_get_similar_matrix(src_points, dst_points)
71 | trans_matrix = np.concatenate((trans_matrix, [[0, 0, 1]]), axis=0)
72 | # print(rotate_points_106)
73 | return cv2.warpPerspective(img, trans_matrix, default_shape, borderMode, flags=flags), trans_matrix
74 |
--------------------------------------------------------------------------------
/helper/flops_helper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import logging
4 | from collections import Iterable
5 |
6 | # from .misc_helper import to_device
7 |
8 |
9 | logger = logging.getLogger('global')
10 |
11 |
12 | def clever_format(nums, format="%.2f"):
13 | if not isinstance(nums, Iterable):
14 | nums = [nums]
15 | clever_nums = []
16 |
17 | for num in nums:
18 | num = int(num)
19 | if num > 1e12:
20 | clever_nums.append(format % (num / 1e12) + "T")
21 | elif num > 1e9:
22 | clever_nums.append(format % (num / 1e9) + "G")
23 | elif num > 1e6:
24 | clever_nums.append(format % (num / 1e6) + "M")
25 | elif num > 1e3:
26 | clever_nums.append(format % (num / 1e3) + "K")
27 | else:
28 | clever_nums.append(format % num + "B")
29 |
30 | clever_nums = clever_nums[0] if len(clever_nums) == 1 else (*clever_nums,)
31 |
32 | return clever_nums
33 |
34 |
35 | def flops_cal(model, input_shape):
36 | inputs = {
37 | 'image': torch.randn(1, input_shape[0], input_shape[1], input_shape[2]),
38 | 'image_info': [[input_shape[1], input_shape[2], 1, input_shape[1], input_shape[2], False]],
39 | 'filename': ['Test.jpg'],
40 | 'label': torch.LongTensor([[0]]),
41 | }
42 | # flops, params = profile(model, inputs=(to_device(inputs),))
43 | flops, params = profile(model, inputs=(inputs,))
44 | flops_str, params_str = clever_format([flops, params], "%.3f")
45 | flops = flops / 1e6
46 | params = flops / 1e6
47 | return flops, params, flops_str, params_str
48 |
49 |
50 | def profile(model, inputs, verbose=True):
51 | handler_collection = []
52 |
53 | def add_hooks(m):
54 | if len(list(m.children())) > 0:
55 | return
56 |
57 | m.register_buffer('total_ops', torch.zeros(1))
58 | m.register_buffer('total_params', torch.zeros(1))
59 |
60 | m_type = type(m)
61 | fn = None
62 | if m_type in register_hooks:
63 | fn = register_hooks[m_type]
64 |
65 | if fn is None:
66 | if verbose:
67 | print("No implemented counting method for {} in flops_helper".format(m))
68 | else:
69 | handler = m.register_forward_hook(fn)
70 | handler_collection.append(handler)
71 |
72 | # original_device = model.parameters().__next__().device
73 | training = model.training
74 |
75 | model.eval()
76 | model.apply(add_hooks)
77 |
78 | # with torch.no_grad():
79 | model(*inputs)
80 |
81 | total_ops = 0
82 | total_params = 0
83 | for m in model.modules():
84 | if len(list(m.children())) > 0: # skip for non-leaf module
85 | continue
86 | total_ops += m.total_ops
87 | total_params += m.total_params
88 |
89 | # total_ops = total_ops.item()
90 | # total_params = total_params.item()
91 | total_ops = total_ops[0]
92 | total_params = total_params[0]
93 |
94 | # reset model to original status
95 | model.train(training)
96 | for handler in handler_collection:
97 | handler.remove()
98 |
99 | return total_ops, total_params
100 |
101 |
102 | multiply_adds = 1
103 |
104 |
105 | def count_zero(m, x, y):
106 | m.total_ops = torch.Tensor([0])
107 | m.total_params = torch.Tensor([0])
108 |
109 |
110 | def count_conv2d(m, x, y):
111 | cin = m.in_channels
112 | cout = m.out_channels
113 | kh, kw = m.kernel_size
114 | out_h = y.size(2)
115 | out_w = y.size(3)
116 | batch_size = x[0].size(0)
117 |
118 | kernel_ops = multiply_adds * kh * kw
119 | bias_ops = 1 if m.bias is not None else 0
120 |
121 | output_elements = batch_size * out_w * out_h * cout
122 | total_ops = output_elements * kernel_ops * cin // m.groups + bias_ops * output_elements
123 | m.total_ops = torch.Tensor([int(total_ops)])
124 |
125 | total_params = kh * kw * cin * cout // m.groups + bias_ops * cout
126 | m.total_params = torch.Tensor([int(total_params)])
127 |
128 |
129 | def count_bn(m, x, y):
130 | x = x[0]
131 | c_out = y.size(1)
132 | nelements = x.numel()
133 | # subtract, divide, gamma, beta
134 | total_ops = 4 * nelements
135 |
136 | m.total_ops = torch.Tensor([int(total_ops)])
137 | m.total_params = torch.Tensor([int(c_out) * 2])
138 |
139 |
140 | def count_relu(m, x, y):
141 | x = x[0]
142 | nelements = x.numel()
143 | total_ops = nelements
144 |
145 | m.total_ops = torch.Tensor([int(total_ops)])
146 |
147 |
148 | def count_softmax(m, x, y):
149 | x = x[0]
150 | batch_size, nfeatures = x.size()
151 | total_exp = nfeatures
152 | total_add = nfeatures - 1
153 | total_div = nfeatures
154 | total_ops = batch_size * (total_exp + total_add + total_div)
155 |
156 | m.total_ops = torch.Tensor([int(total_ops)])
157 |
158 |
159 | def count_avgpool(m, x, y):
160 | total_add = torch.prod(torch.Tensor([m.kernel_size]))
161 | total_div = 1
162 | kernel_ops = total_add + total_div
163 | num_elements = y.numel()
164 | total_ops = kernel_ops * num_elements
165 |
166 | m.total_ops = torch.Tensor([int(total_ops)])
167 |
168 |
169 | def count_adap_avgpool(m, x, y):
170 | kernel = torch.Tensor([*(x[0].shape[2:])]) // torch.Tensor(list((m.output_size,))).squeeze()
171 | total_add = torch.prod(kernel)
172 | total_div = 1
173 | kernel_ops = total_add + total_div
174 | num_elements = y.numel()
175 | total_ops = kernel_ops * num_elements
176 |
177 | m.total_ops = torch.Tensor([int(total_ops)])
178 |
179 |
180 | def count_linear(m, x, y):
181 | # per output element
182 | total_mul = m.in_features
183 | total_add = m.in_features - 1
184 | num_elements = y.numel()
185 | total_ops = (total_mul + total_add) * num_elements
186 |
187 | m.total_ops = torch.Tensor([int(total_ops)])
188 | m.total_params = torch.Tensor([m.in_features * m.out_features])
189 |
190 |
191 | register_hooks = {
192 | nn.Conv2d: count_conv2d,
193 | nn.BatchNorm2d: count_zero,
194 | nn.InstanceNorm2d: count_zero,
195 | nn.ConvTranspose2d: count_conv2d,
196 | nn.ReLU: count_zero,
197 | nn.ReLU6: count_zero,
198 | nn.Tanh: count_zero,
199 | nn.LeakyReLU: count_zero,
200 | nn.AvgPool2d: count_zero,
201 | nn.AdaptiveAvgPool2d: count_zero,
202 | nn.Linear: count_linear,
203 | nn.Dropout: count_zero,
204 | nn.Sigmoid: count_zero,
205 | nn.Softmax: count_zero,
206 | # VarChannelConv2d: VarChannelConv2d.flops_count,
207 | # VarChannelBatchNorm2d: VarChannelBatchNorm2d.flops_count,
208 | # VarChannelSyncBatchNorm2d: VarChannelSyncBatchNorm2d.flops_count,
209 | # VarChannelSyncMultiBatchNorm2d: VarChannelSyncMultiBatchNorm2d.flops_count,
210 | # VarChannelLinear: VarChannelLinear.flops_count,
211 | # DeprecatedGroupSyncBatchNorm: count_zero,
212 | # Identity: count_zero,
213 | # VcIdentity: count_zero,
214 | nn.MaxPool2d: count_zero,
215 | nn.CrossEntropyLoss: count_zero,
216 | # SamePadConv2d: count_conv2d,
217 | # conv_bn_swish: count_zero,
218 | # Swish: count_zero
219 | }
220 |
--------------------------------------------------------------------------------
/helper/param_count.py:
--------------------------------------------------------------------------------
1 | import multiprocessing as mp
2 | mp.set_start_method('spawn', force=True)
3 |
4 | import torch
5 | import torch.nn as nn
6 | from functools import reduce
7 | import operator
8 |
9 |
10 |
11 |
12 |
13 | def count_parameters_num(model):
14 | count = 0
15 | count_fc = 0
16 | param_dict = {name:param for name,param in model.named_parameters()}
17 | param_keys = param_dict.keys()
18 | for m_name, m in model.named_modules():
19 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d):
20 | weight_name = m_name + '.weight'
21 | bias_name = m_name + '.bias'
22 | if weight_name in param_keys:
23 | temp_params = param_dict[weight_name]
24 | count += temp_params.data.nelement()
25 | if bias_name in param_keys:
26 | temp_params = param_dict[bias_name]
27 | count += temp_params.data.nelement()
28 | elif isinstance(m, nn.Linear):
29 | weight_name = m_name + '.weight'
30 | bias_name = m_name + '.bias'
31 | if weight_name in param_keys:
32 | temp_params = param_dict[weight_name]
33 | count_fc += temp_params.data.nelement()
34 | if bias_name in param_keys:
35 | temp_params = param_dict[bias_name]
36 | count_fc += temp_params.data.nelement()
37 | print('Number of conv/bn params: %.2fM' % (count / 1e6))
38 | print('Number of linear params: %.2fM' % (count_fc / 1e6))
39 | print('Number of all params: %.2fM' % ( (count+count_fc) / 1e6))
40 |
41 | # def count_flops(model, input_image_size):
42 | # counts = []
43 |
44 | # # loop over all model parts
45 | # for m in model.modules():
46 | # if isinstance(m, nn.Conv2d):
47 | # def hook(module, input):
48 | # factor = 2*module.in_channels*module.out_channels
49 | # factor *= module.kernel_size[0]*module.kernel_size[1]
50 | # factor //= module.stride[0]*module.stride[1]
51 | # counts.append(
52 | # factor*input[0].data.shape[2]*input[0].data.shape[3]
53 | # )
54 | # m.register_forward_pre_hook(hook)
55 | # elif isinstance(m, nn.Linear):
56 | # counts += [
57 | # 2*m.in_features*m.out_features
58 | # ]
59 |
60 | # noise_image = torch.rand(
61 | # 2, 3, input_image_size, input_image_size
62 | # )
63 | # # one forward pass
64 | # with torch.no_grad():
65 | # _ = model(torch.autograd.Variable(noise_image.cuda()))
66 | # return sum(counts)
67 |
68 | def get_layer_param(model):
69 | return sum([reduce(operator.mul, i.size(), 1) for i in model.parameters()])
70 |
71 | def measure_model(model, input_image_size, forward_param=None):
72 | flop_counts = []
73 | param_counts = []
74 | multi_add = 2
75 |
76 | # loop over all model parts
77 | for m in model.modules():
78 | if isinstance(m, nn.Conv2d):
79 | def hook(module, x):
80 | out_h = int((x[0].size()[2] + 2 * module.padding[0] - module.kernel_size[0]) / module.stride[0] + 1)
81 | out_w = int((x[0].size()[3] + 2 * module.padding[1] - module.kernel_size[1]) / module.stride[1] + 1)
82 | ops = module.in_channels * module.out_channels * module.kernel_size[0] * module.kernel_size[1] * out_h * out_w / module.groups * multi_add
83 | flop_counts.append(ops)
84 | param_counts.append(get_layer_param(module))
85 | m.register_forward_pre_hook(hook)
86 |
87 | elif isinstance(m, nn.ReLU) or isinstance(m, nn.PReLU):
88 | def hook(module, x):
89 | ops = x[0].numel()
90 | flop_counts.append(ops)
91 | param_counts.append(get_layer_param(module))
92 | m.register_forward_pre_hook(hook)
93 |
94 | elif isinstance(m, nn.AvgPool2d):
95 | def hook(module, x):
96 | in_w = x[0].size()[2]
97 | kernel_ops = module.kernel_size * module.kernel_size
98 | out_w = int((in_w + 2 * module.padding - module.kernel_size) / module.stride + 1)
99 | out_h = int((in_w + 2 * module.padding - module.kernel_size) / module.stride + 1)
100 | ops = x[0].size()[0] * x[0].size()[1] * out_w * out_h * kernel_ops
101 | param_counts.append(get_layer_param(module))
102 | m.register_forward_pre_hook(hook)
103 |
104 | elif isinstance(m, nn.AdaptiveAvgPool2d):
105 | def hook(module, x):
106 | ops = x[0].size()[0] * x[0].size()[1] * x[0].size()[2] * x[0].size()[3]
107 | flop_counts.append(ops)
108 | param_counts.append(get_layer_param(module))
109 | m.register_forward_pre_hook(hook)
110 |
111 | elif isinstance(m, nn.Linear):
112 | def hook(module, x):
113 | weight_ops = module.weight.numel() * multi_add
114 | bias_ops = module.bias.numel()
115 | ops = x[0].size()[0] * (weight_ops + bias_ops)
116 | flop_counts.append(ops)
117 | param_counts.append(get_layer_param(module))
118 | m.register_forward_pre_hook(hook)
119 |
120 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d) \
121 | or isinstance(m, nn.Dropout2d) or isinstance(m, nn.Dropout):
122 | def hook(module, x):
123 | param_counts.append(get_layer_param(module))
124 | m.register_forward_pre_hook(hook)
125 |
126 | else:
127 | # print('unknown layer type: %s' % type(m))
128 | pass
129 |
130 | if isinstance(input_image_size, int):
131 | noise_image = torch.rand(1, 3, input_image_size, input_image_size)
132 | else:
133 | noise_image = torch.rand(1, 3, input_image_size[0], input_image_size[1])
134 | # one forward pass
135 | with torch.no_grad():
136 | if forward_param is not None:
137 | _ = model(noise_image.cuda(), forward_param)
138 | else:
139 | _ = model(noise_image.cuda())
140 | # _ = model(torch.autograd.Variable(noise_image.cuda(), requires_grad=False))
141 |
142 | return sum(param_counts), sum(flop_counts)
143 |
--------------------------------------------------------------------------------
/helper/vis_helper.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import cv2
3 | import os
4 | import numpy as np
5 | import torch
6 | import pickle as pk
7 |
8 |
9 | def inv_normalize_batch(image, mean_arr, stddev_arr):
10 | # normalize image color channels
11 | inv_normed_image = image.clone()
12 | for c in range(3):
13 | if len(image.size()) == 4:
14 | inv_normed_image[:, c, :, :] = (image[:, c, :, :] * stddev_arr[c] + mean_arr[c])
15 | else:
16 | inv_normed_image[c, :, :] = (image[c, :, :] * stddev_arr[c] + mean_arr[c])
17 | return inv_normed_image
18 |
19 |
20 | def get_vis_data(input, range_low=-1, range_high=1, vis_height=-1, vis_width=-1, to_rgb=True):
21 | if input is None:
22 | return None
23 |
24 | data = ((input.permute(1, 2, 0) - range_low) / (
25 | range_high - range_low) * 255.0).data.cpu().numpy()
26 | if vis_height > 0 and vis_width > 0:
27 | if data.shape[0] != vis_height or data.shape[1] != vis_width:
28 | data = cv2.resize(data, (vis_width, vis_height))
29 | if len(data.shape) == 2:
30 | data = cv2.cvtColor(data, cv2.COLOR_GRAY2RGB)
31 | else:
32 | if to_rgb:
33 | data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
34 | return data
35 |
36 |
37 | def vis_one_from_batch(vis_list, range_low=0, range_high=1,
38 | vis_height=140, vis_width=140, vis_channel=3, to_rgb=True, return_CHW=True):
39 |
40 | vis_dict = dict()
41 | for item in vis_list:
42 | '''
43 | vis_list = [{
44 | 'name': 'lap_adv',
45 | 'image': laplace_adv
46 | }]
47 | '''
48 | vis_image = get_vis_data(item['image'], range_low, range_high, vis_height, vis_width, to_rgb=to_rgb)
49 | vis_dict[item['name']] = vis_image
50 |
51 | cnt = 0
52 | for tag, item in vis_dict.items():
53 | if item is not None:
54 | cnt += 1
55 |
56 | # adapt to visualize format
57 | rst = np.zeros((vis_height, vis_width * cnt, vis_channel))
58 | pos = 0
59 | for tag, item in vis_dict.items():
60 | if item is not None:
61 | left = vis_width * pos
62 | right = vis_width * (pos + 1)
63 | rst[:, left: right] = item
64 | cv2.putText(rst, tag, (left + 2, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
65 | pos += 1
66 |
67 | rst = rst.clip(0, 255).astype(np.uint8, copy=False)
68 | if return_CHW:
69 | # prepare for tensorboard [RGB, CHW]
70 | rst = rst.transpose((2, 0, 1)) # HWC -> CHW
71 |
72 | return rst
73 |
74 |
75 | def get_features_indices(feature_maps, topk=1):
76 | """
77 |
78 | :param topk:
79 | :param feature_maps: floatTensor [N, C, H, W]
80 | :return:
81 | """
82 | input_dim = 4
83 | if len(feature_maps.size()) == 2:
84 | input_dim = 2
85 | feature_maps = feature_maps.unsqueeze(0)
86 | if len(feature_maps.size()) == 3:
87 | input_dim = 3
88 | feature_maps = feature_maps.unsqueeze(0)
89 | N, C, H, W = feature_maps.size()
90 | feats = feature_maps.view(N, C, -1)
91 | feats_sum = torch.sum(feats, dim=2)
92 | y, ind = torch.sort(feats_sum, dim=1, descending=True)
93 | selected_ind = ind[:, :topk]
94 | if input_dim < 4:
95 | return selected_ind.squeeze(0)
96 | return selected_ind
97 |
98 |
99 | def show_feature_map(feature_map, reference=None, range_low=-1, range_high=1):
100 | """
101 | 可视化特征图
102 | :param feature_map: floatTensor [C, H, W]
103 | :param reference: floatTensor [3, H, W]
104 | :return:
105 | """
106 | if isinstance(feature_map, torch.Tensor):
107 | feature_map = feature_map.cpu().numpy()
108 | feature_map_num = feature_map.shape[0]
109 | row_num = np.ceil(np.sqrt(feature_map_num))
110 |
111 | if reference is not None:
112 | if isinstance(reference, torch.Tensor):
113 | reference = ((reference.permute(1, 2, 0) - range_low) / (
114 | range_high - range_low)).data.cpu().numpy()
115 | reference = np.uint8(255 * reference)
116 | reference = cv2.cvtColor(reference, cv2.COLOR_RGB2BGR)
117 | all_vis = reference
118 | else:
119 | all_vis = None
120 |
121 | for index in range(0, feature_map_num):
122 | feat = feature_map[index]
123 | heatmap = feat / np.max(feat)
124 | heatmap = np.uint8(255 * heatmap)
125 | heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
126 | if reference is not None:
127 | if not heatmap.shape == reference.shape:
128 | heatmap = cv2.resize(heatmap, (reference.shape[1], reference.shape[0]), interpolation=cv2.INTER_CUBIC)
129 | vis = cv2.addWeighted(heatmap, 0.5, reference, 0.5, 0)
130 | else:
131 | vis = heatmap
132 | if index == 0:
133 | all_vis = vis
134 | else:
135 | all_vis = np.hstack([all_vis, vis])
136 |
137 | return all_vis
138 |
139 |
140 | def dump_to_pickle(file_path, data):
141 | with open(file_path, "wb") as f:
142 | pk.dump(data, f)
143 |
144 |
145 | def load_from_pickle(file_path):
146 | assert os.path.exists(file_path), "file not exist: {}".format(file_path)
147 | with open(file_path, "rb") as f:
148 | meta = pk.load(f)
149 | return meta
150 |
--------------------------------------------------------------------------------
/multitask.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import os
3 | import multiprocessing as mp
4 | if mp.get_start_method(allow_none=True) != 'spawn':
5 | mp.set_start_method('spawn')
6 | import argparse
7 | from core.distributed_utils import dist_init
8 | from core.config import Config
9 | from core.solvers import solver_entry
10 | import torch
11 |
12 | parser = argparse.ArgumentParser(description='Multi-Task Training Framework')
13 | parser.add_argument('--load-path', default='', type=str)
14 | parser.add_argument('--ignore', nargs='+', default=[], type=str)
15 | parser.add_argument('--recover', action='store_true')
16 | parser.add_argument('--load-single', action='store_true')
17 | parser.add_argument('--port', default='23456', type=str)
18 | parser.add_argument('--config', default='', type=str)
19 | parser.add_argument('--expname', type=str, default=None, help='experiment name, output folder')
20 | parser.add_argument('--auto-resume', type=str, default=None, help='jobs auto resume from the pattern_path or the folder')
21 | parser.add_argument('--forwardbn', action='store_true', help='just forward for re-calcuating bn values')
22 | parser.add_argument('--finetune',action='store_true')
23 | parser.add_argument("--tcp_port", type=str, default="5671")
24 |
25 | def main():
26 | args = parser.parse_args()
27 | dist_init(port=str(args.tcp_port))
28 |
29 | C = Config(args.config)
30 | if args.expname is not None:
31 | C.config['expname'] = args.expname
32 |
33 | S = solver_entry(C)
34 | config_save_to = os.path.join(S.ckpt_path, 'config.yaml')
35 |
36 | # auto resume strategy for srun
37 | if args.auto_resume is not None:
38 | args.auto_resume = os.path.join(S.out_dir, args.auto_resume)
39 | if os.path.isdir(args.auto_resume):
40 | max_iter = 0
41 | filename = os.listdir(args.auto_resume)
42 | for file in filename:
43 | if file.startswith('ckpt_task0') and file.endswith('.pth.tar'):
44 | cur_iter = int(file.split('_')[-1].split('.')[0])
45 | max_iter = max(max_iter, cur_iter)
46 | if max_iter > 0:
47 | args.load_path = os.path.join(args.auto_resume,
48 | 'ckpt_task_iter_{}.pth.tar'.format(str(max_iter)))
49 | args.recover = True
50 | args.ignore = []
51 | print('auto-resume from: {}'.format(args.load_path))
52 | elif args.auto_resume.endswith('.pth.tar'):
53 | tmpl = args.auto_resume.replace('ckpt_task_', 'ckpt_task*_')
54 | import glob
55 | ckpt = glob.glob(tmpl)
56 | if len(ckpt) > 0:
57 | args.load_path = args.auto_resume
58 | args.recover = True
59 | args.ignore = []
60 | print('auto-resume from: {}'.format(args.load_path))
61 | else:
62 | print('auto-resume not work:{}'.format(args.auto_resume))
63 |
64 | #tmp = torch.Tensor(1).cuda()
65 | if not os.path.exists(config_save_to):
66 | shutil.copy(args.config, config_save_to)
67 |
68 | S.initialize(args)
69 |
70 | S.run()
71 |
72 |
73 |
74 | if __name__ == '__main__':
75 | main()
76 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dict_recursive_update==1.0.1
2 | easydict==1.9
3 | json_tricks==3.15.5
4 | numpy==1.19.5
5 | opencv_python==4.1.1.26
6 | pycocotools_fix==2.0.0.9
7 | scikit_image==0.16.2
8 | scipy==1.3.1
9 | Shapely==1.8.1.post1
10 | timm==0.6.7
11 | torch==1.8.1+cuda90.cudnn7.6.5
12 | torchvision==0.8.0a0+2f40a48
13 | tqdm==4.37.0
14 | xtcocotools==1.12
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import os
3 | import multiprocessing as mp
4 | if mp.get_start_method(allow_none=True) != 'spawn':
5 | mp.set_start_method('spawn')
6 | import argparse
7 | from core.distributed_utils import dist_init
8 | from core.config import Config
9 | from core.testers import tester_entry
10 | import torch
11 | import yaml
12 | import re
13 |
14 | parser = argparse.ArgumentParser(description='Multi-Task Training Framework')
15 | parser.add_argument('--spec_ginfo_index', type=int, required=True)
16 | parser.add_argument('--load-path', default='', type=str)
17 | parser.add_argument('--ignore', nargs='+', default=[], type=str)
18 | parser.add_argument('--recover', action='store_true')
19 | parser.add_argument('--load-single', action='store_true')
20 | parser.add_argument('--port', default='23456', type=str)
21 | parser.add_argument('--config', default='', type=str)
22 | parser.add_argument('--test_config', default='', type=str)
23 | parser.add_argument('--expname', type=str, default=None, help='experiment name, output folder')
24 | parser.add_argument('--auto-resume', type=str, default=None, help='jobs auto resume from the pattern_path or the folder')
25 | parser.add_argument("--tcp_port", type=str, default="5671")
26 |
27 | loader = yaml.SafeLoader
28 | loader.add_implicit_resolver(
29 | u'tag:yaml.org,2002:float',
30 | re.compile(u'''^(?:
31 | [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
32 | |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
33 | |\\.[0-9_]+(?:[eE][-+][0-9]+)?
34 | |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
35 | |[-+]?\\.(?:inf|Inf|INF)
36 | |\\.(?:nan|NaN|NAN))$''', re.X),
37 | list(u'-+0123456789.'))
38 |
39 | def main():
40 | args = parser.parse_args()
41 | dist_init(port=str(args.tcp_port))
42 |
43 | # auto resume strategy for srun
44 | if args.auto_resume is not None:
45 | if os.path.isdir(args.auto_resume):
46 | max_iter = 0
47 | filename = os.listdir(args.auto_resume)
48 | for file in filename:
49 | if file.startswith('ckpt_task0') and file.endswith('.pth.tar'):
50 | cur_iter = int(file.split('_')[-1].split('.')[0])
51 | max_iter = max(max_iter, cur_iter)
52 | if max_iter > 0:
53 | args.load_path = os.path.join(args.auto_resume,
54 | 'ckpt_task_iter_{}.pth.tar'.format(str(max_iter)))
55 | args.recover = True
56 | args.ignore = []
57 | print('auto-resume from: {}'.format(args.load_path))
58 | elif args.auto_resume.endswith('.pth.tar'):
59 | tmpl = args.auto_resume.replace('ckpt_task_', 'ckpt_task*_')
60 | import glob
61 | ckpt = glob.glob(tmpl)
62 | if len(ckpt) > 0:
63 | args.load_path = args.auto_resume
64 | args.recover = True
65 | args.ignore = []
66 | print('auto-resume from: {}'.format(args.load_path))
67 | else:
68 | print('auto-resume not work:{}'.format(args.auto_resume))
69 |
70 | #tmp = torch.Tensor(1).cuda()
71 | C_train = Config(args.config, spec_ginfo_index=args.spec_ginfo_index)
72 |
73 | with open(args.test_config) as f:
74 | test_config = yaml.load(f, Loader=loader)
75 | num_test_tasks = len(test_config['tasks'])
76 |
77 | for test_spec_ginfo_index in range(num_test_tasks):
78 | C_test = Config(args.test_config, spec_ginfo_index=test_spec_ginfo_index)
79 | if args.expname is not None:
80 | C_train.config['expname'] = args.expname
81 |
82 | S = tester_entry(C_train, C_test)
83 | config_save_to = os.path.join(S.ckpt_path, 'config.yaml')
84 | test_config_save_to = os.path.join(S.ckpt_path, 'test_config_task{}.yaml'.format(test_spec_ginfo_index))
85 | if not os.path.exists(config_save_to):
86 | shutil.copy(args.config, config_save_to)
87 | shutil.copy(args.test_config, test_config_save_to)
88 |
89 | S.initialize(args)
90 |
91 | S.run()
92 |
93 |
94 | if __name__ == '__main__':
95 | main()
96 |
--------------------------------------------------------------------------------