├── .idea ├── Rethinking_of_PAR.iml ├── deployment.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── other.xml ├── vcs.xml └── workspace.xml ├── README.md ├── batch_engine.py ├── configs ├── __init__.py ├── default.py ├── multilabel_baseline │ ├── __init__.py │ └── coco.yaml └── pedes_baseline │ ├── __init__.py │ ├── pa100k.yaml │ ├── peta.yaml │ ├── peta_zs.yaml │ ├── rap_zs.yaml │ ├── rapv1.yaml │ └── rapv2.yaml ├── data ├── COCO14 │ └── ml_anno │ │ ├── category.json │ │ ├── coco14_train_anno.pkl │ │ └── coco14_val_anno.pkl ├── PA100k │ └── dataset_all.pkl ├── PETA │ ├── dataset_all.pkl │ └── dataset_zs_run0.pkl ├── RAP │ └── dataset_all.pkl └── RAP2 │ ├── dataset_all.pkl │ └── dataset_zs_run0.pkl ├── dataset ├── __init__.py ├── augmentation.py ├── autoaug.py ├── multi_label │ ├── __init__.py │ ├── coco.py │ └── voc.py └── pedes_attr │ ├── __init__.py │ ├── annotation.md │ ├── pedes.py │ └── preprocess │ ├── __init__.py │ ├── format_pa100k.py │ ├── format_peta.py │ ├── format_rap.py │ └── format_rap2.py ├── docs ├── __init__.py └── illus_zs.png ├── infer.py ├── losses ├── __init__.py ├── bceloss.py ├── label_smoothing.py └── scaledbceloss.py ├── metrics ├── __init__.py ├── ml_metrics.py └── pedestrian_metrics.py ├── models ├── __init__.py ├── backbone │ ├── __init__.py │ ├── bninception.py │ ├── checkpoints │ │ └── __init__.py │ ├── resnet.py │ ├── resnet_ibn.py │ ├── swin_transformer.py │ ├── tresnet │ │ ├── __init__.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── anti_aliasing.py │ │ │ ├── avg_pool.py │ │ │ └── general_layers.py │ │ └── tresnet.py │ └── vit.py ├── base_block.py ├── model_ema.py ├── model_factory.py └── registry.py ├── optim ├── __init__.py └── adamw.py ├── requirements.txt ├── scheduler ├── __init__.py ├── cos_annealing_with_restart.py ├── cosine_lr.py └── scheduler.py ├── tools ├── __init__.py ├── distributed.py ├── function.py ├── utils.py └── vis.py ├── train.py └── train_gpu.sh /.idea/Rethinking_of_PAR.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting (official Pytorch implementation) 2 | 3 | ![zero-shot](docs/illus_zs.png) 4 | _This [paper](https://arxiv.org/abs/2107.03576) submitted to TIP is the extension of the previous [Arxiv paper](https://arxiv.org/abs/2005.11909)._ 5 | 6 | This project is adopted in the [JDAI-CV/fast-reid](https://github.com/JDAI-CV/fast-reid/tree/master/projects/FastAttr) and [PP-Human](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/pphuman/docs/attribute.md) of PaddleDetection. 7 | 8 | #### This project aims to 9 | 1. provide a strong baseline for __Pedestrian Attribute Recognition__ and __Multi-Label Classification__. 10 | 2. provide two new datasets RAPzs and PETAzs following zero-shot pedestrian identity setting. 11 | 3. provide a general training pipeline for pedestrian attribute recognition and multi-label classification task. 12 | 13 | #### This project provide 14 | 1. DDP training, which is mainly used for multi-label classifition. 15 | 2. Training on all attributes, testing on "selected" attribute. Because the proportion of positive samples for other attributes is less than a threshold, such as 0.01. 16 | 1. For PETA and PETAzs, 35 of the 105 attributes are selected for performance evaluation. 17 | 2. For RAPv1, 51 of the 92 attributes are selected for performance evaluation. 18 | 3. For RAPv2 and RAPzs, 54 and 53 of the 152 attributes are selected for performance evaluation. 19 | 4. For PA100k, all attributes are selected for performance evaluation. 20 | - However, training on all attributes _can not bring_ consistent performance improvement on various datasets. 21 | 3. EMA model. 22 | 4. Transformer-base model, such as swin-transformer (with a huge performance improvement) and vit. 23 | 5. Convenient dataset info file like dataset_all.pkl 24 | 25 | 26 | ## Dataset Info 27 | - PETA: Pedestrian Attribute Recognition At Far Distance [[Paper](http://mmlab.ie.cuhk.edu.hk/projects/PETA_files/Pedestrian%20Attribute%20Recognition%20At%20Far%20Distance.pdf)][[Project](http://mmlab.ie.cuhk.edu.hk/projects/PETA.html)] 28 | 29 | - PA100K[[Paper](http://openaccess.thecvf.com/content_ICCV_2017/papers/Liu_HydraPlus-Net_Attentive_Deep_ICCV_2017_paper.pdf)][[Github](https://github.com/xh-liu/HydraPlus-Net)] 30 | 31 | - RAP : A Richly Annotated Dataset for Pedestrian Attribute Recognition 32 | - v1 [[Paper](https://arxiv.org/pdf/1603.07054v3.pdf)][[Project](http://www.rapdataset.com/)] 33 | - v2 [[Paper](https://ieeexplore.ieee.org/abstract/document/8510891)][[Project](http://www.rapdataset.com/)] 34 | 35 | - PETAzs & RAPzs : Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting [Paper](https://arxiv.org/abs/2107.03576) [[Project](http://www.rapdataset.com/)] 36 | 37 | 38 | ## Performance 39 | 40 | ### Pedestrian Attribute Recognition 41 | 42 | |Datasets|Models|ma|Acc|Prec|Rec|F1| 43 | |:------:|:---:|---|---|---|---|---| 44 | |PA100k|resnet50|80.21|79.15|87.79|87.01|87.40| 45 | |--|resnet50*|79.85|79.13|89.45|85.40|87.38| 46 | |--|resnet50 + EMA|81.97|80.20|88.06|88.17|88.11| 47 | |--|bninception|79.13|78.19|87.42|86.21|86.81| 48 | |--|TresnetM|74.46|68.72|79.82|80.71|80.26| 49 | |--|swin_s|82.19|80.35|87.85|88.51|88.18| 50 | |--|vit_s|79.40|77.61|86.41|86.22|86.32| 51 | |--|vit_b|81.01|79.38|87.60|87.49|87.55| 52 | |PETA|resnet50|83.96|78.65|87.08|85.62|86.35| 53 | |PETAzs|resnet50|71.43|58.69|74.41|69.82|72.04| 54 | |RAPv1|resnet50|79.27|67.98|80.19|79.71|79.95| 55 | |RAPv2|resnet50|78.52|66.09|77.20|80.23|78.68| 56 | |RAPzs|resnet50|71.76|64.83|78.75|76.60|77.66| 57 | 58 | - The resnet* model is trained by using the weighted function proposed by Tan in [AAAI2020](https://ojs.aaai.org/index.php/AAAI/article/view/6883). 59 | - Performance in PETAzs and RAPzs based on the first version of PETAzs and RAPzs as described in [paper](https://arxiv.org/abs/2107.03576). 60 | - Experiments are conducted on the input size of (256, 192), so there may be minor differences from the results in the paper. 61 | - The reported performance can be achieved at the first drop of learning rate. We also take this model as the best model. 62 | - Pretrained models are provided now at [Google Drive](). 63 | 64 | ### Multi-label Classification 65 | 66 | |Datasets|Models|mAP|CP|CR|CF1|OP|OR|OF1| 67 | |--------|---|---|---|---|---|---|---|---| 68 | |COCO|resnet101|82.75|84.17|72.07|77.65|85.16|75.47|80.02| 69 | 70 | 71 | ## Pretrained Models 72 | 73 | 74 | 75 | 76 | ## Dependencies 77 | 78 | - python 3.7 79 | - pytorch 1.7.0 80 | - torchvision 0.8.2 81 | - cuda 10.1 82 | 83 | 84 | ## Get Started 85 | 1. Run `git clone https://github.com/valencebond/Rethinking_of_PAR.git` 86 | 2. Create a directory to dowload above datasets. 87 | ``` 88 | cd Rethinking_of_PAR 89 | mkdir data 90 | ``` 91 | 3. Prepare datasets to have following structure: 92 | ``` 93 | ${project_dir}/data 94 | PETA 95 | images/ 96 | PETA.mat 97 | dataset_all.pkl 98 | dataset_zs_run0.pkl 99 | PA100k 100 | data/ 101 | dataset_all.pkl 102 | RAP 103 | RAP_dataset/ 104 | RAP_annotation/ 105 | dataset_all.pkl 106 | RAP2 107 | RAP_dataset/ 108 | RAP_annotation/ 109 | dataset_zs_run0.pkl 110 | COCO14 111 | train2014/ 112 | val2014/ 113 | ml_anno/ 114 | category.json 115 | coco14_train_anno.pkl 116 | coco14_val_anno.pkl 117 | ``` 118 | 4. Train baseline based on resnet50 119 | ``` 120 | sh train.sh 121 | ``` 122 | 123 | ## Acknowledgements 124 | 125 | Codes are based on the repository from [Dangwei Li](https://github.com/dangweili/pedestrian-attribute-recognition-pytorch) 126 | and [Houjing Huang](https://github.com/dangweili/pedestrian-attribute-recognition-pytorch). Thanks for their released code. 127 | 128 | 129 | ### Citation 130 | 131 | If you use this method or this code in your research, please cite as: 132 | 133 | @article{jia2021rethinking, 134 | title={Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting}, 135 | author={Jia, Jian and Huang, Houjing and Chen, Xiaotang and Huang, Kaiqi}, 136 | journal={arXiv preprint arXiv:2107.03576}, 137 | year={2021} 138 | } 139 | 140 | @inproceedings{jia2021spatial, 141 | title={Spatial and semantic consistency regularizations for pedestrian attribute recognition}, 142 | author={Jia, Jian and Chen, Xiaotang and Huang, Kaiqi}, 143 | booktitle={Proceedings of the IEEE/CVF international conference on computer vision}, 144 | pages={962--971}, 145 | year={2021} 146 | } 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /batch_engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | import numpy as np 5 | import torch 6 | from torch.nn.utils import clip_grad_norm_ 7 | from tqdm import tqdm 8 | 9 | from tools.distributed import reduce_tensor 10 | from tools.utils import AverageMeter, to_scalar, time_str 11 | 12 | 13 | def logits4pred(criterion, logits_list): 14 | if criterion.__class__.__name__.lower() in ['bceloss']: 15 | logits = logits_list[0] 16 | probs = logits.sigmoid() 17 | else: 18 | assert False, f"{criterion.__class__.__name__.lower()} not exits" 19 | 20 | return probs, logits 21 | 22 | 23 | def batch_trainer(cfg, args, epoch, model, model_ema, train_loader, criterion, optimizer, loss_w=[1, ], scheduler=None): 24 | model.train() 25 | epoch_time = time.time() 26 | 27 | loss_meter = AverageMeter() 28 | subloss_meters = [AverageMeter() for i in range(len(loss_w))] 29 | 30 | batch_num = len(train_loader) 31 | gt_list = [] 32 | preds_probs = [] 33 | preds_logits = [] 34 | imgname_list = [] 35 | loss_mtr_list = [] 36 | 37 | lr = optimizer.param_groups[1]['lr'] 38 | 39 | for step, (imgs, gt_label, imgname) in enumerate(train_loader): 40 | iter_num = epoch * len(train_loader) + step 41 | 42 | batch_time = time.time() 43 | imgs, gt_label = imgs.cuda(), gt_label.cuda() 44 | train_logits, feat = model(imgs, gt_label) 45 | 46 | 47 | loss_list, loss_mtr = criterion(train_logits, gt_label) 48 | 49 | train_loss = 0 50 | 51 | for i, l in enumerate(loss_w): 52 | train_loss += loss_list[i] * l 53 | 54 | optimizer.zero_grad() 55 | train_loss.backward() 56 | 57 | # for name, param in model.named_parameters(): 58 | # if param.grad is None: 59 | # print("NO " + name) 60 | # else: 61 | # print("YES " + name) 62 | 63 | if cfg.TRAIN.CLIP_GRAD: 64 | clip_grad_norm_(model.parameters(), max_norm=10.0) # make larger learning rate works 65 | 66 | optimizer.step() 67 | 68 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine' and scheduler is not None: 69 | scheduler.step() 70 | 71 | if model_ema is not None: 72 | model_ema.update(model) 73 | 74 | torch.cuda.synchronize() 75 | 76 | if len(loss_list) > 1: 77 | for i, meter in enumerate(subloss_meters): 78 | meter.update( 79 | to_scalar(reduce_tensor(loss_list[i], args.world_size) 80 | if args.distributed else loss_list[i])) 81 | loss_meter.update(to_scalar(reduce_tensor(train_loss, args.world_size) if args.distributed else train_loss)) 82 | 83 | train_probs, train_logits = logits4pred(criterion, train_logits) 84 | 85 | gt_list.append(gt_label.cpu().numpy()) 86 | preds_probs.append(train_probs.detach().cpu().numpy()) 87 | preds_logits.append(train_logits.detach().cpu().numpy()) 88 | 89 | imgname_list.append(imgname) 90 | 91 | log_interval = 100 92 | 93 | if (step + 1) % log_interval == 0 or (step + 1) % len(train_loader) == 0: 94 | if args.local_rank == 0: 95 | print(f'{time_str()}, ' 96 | f'Step {step}/{batch_num} in Ep {epoch}, ' 97 | f'LR: [{optimizer.param_groups[0]["lr"]:.1e}, {optimizer.param_groups[1]["lr"]:.1e}] ' 98 | f'Time: {time.time() - batch_time:.2f}s , ' 99 | f'train_loss: {loss_meter.avg:.4f}, ') 100 | 101 | print([f'{meter.avg:.4f}' for meter in subloss_meters]) 102 | 103 | # break 104 | 105 | train_loss = loss_meter.avg 106 | 107 | gt_label = np.concatenate(gt_list, axis=0) 108 | preds_probs = np.concatenate(preds_probs, axis=0) 109 | 110 | if args.local_rank == 0: 111 | print(f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}') 112 | 113 | return train_loss, gt_label, preds_probs, imgname_list, preds_logits, loss_mtr_list 114 | 115 | 116 | def valid_trainer(cfg, args, epoch, model, valid_loader, criterion, loss_w=[1, ]): 117 | model.eval() 118 | loss_meter = AverageMeter() 119 | subloss_meters = [AverageMeter() for i in range(len(loss_w))] 120 | 121 | preds_probs = [] 122 | preds_logits = [] 123 | gt_list = [] 124 | imgname_list = [] 125 | loss_mtr_list = [] 126 | 127 | with torch.no_grad(): 128 | for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): 129 | imgs = imgs.cuda() 130 | gt_label = gt_label.cuda() 131 | gt_list.append(gt_label.cpu().numpy()) 132 | gt_label[gt_label == -1] = 0 133 | valid_logits, feat = model(imgs, gt_label) 134 | 135 | 136 | loss_list, loss_mtr = criterion(valid_logits, gt_label) 137 | valid_loss = 0 138 | for i, l in enumerate(loss_list): 139 | valid_loss += loss_w[i] * l 140 | 141 | valid_probs, valid_logits = logits4pred(criterion, valid_logits) 142 | preds_probs.append(valid_probs.cpu().numpy()) 143 | preds_logits.append(valid_logits.cpu().numpy()) 144 | 145 | if len(loss_list) > 1: 146 | for i, meter in enumerate(subloss_meters): 147 | meter.update( 148 | to_scalar(reduce_tensor(loss_list[i], args.world_size) if args.distributed else loss_list[i])) 149 | loss_meter.update(to_scalar(reduce_tensor(valid_loss, args.world_size) if args.distributed else valid_loss)) 150 | 151 | torch.cuda.synchronize() 152 | 153 | imgname_list.append(imgname) 154 | 155 | valid_loss = loss_meter.avg 156 | 157 | if args.local_rank == 0: 158 | print([f'{meter.avg:.4f}' for meter in subloss_meters]) 159 | 160 | gt_label = np.concatenate(gt_list, axis=0) 161 | preds_probs = np.concatenate(preds_probs, axis=0) 162 | preds_logits = np.concatenate(preds_logits, axis=0) 163 | 164 | return valid_loss, gt_label, preds_probs, imgname_list, preds_logits, loss_mtr_list 165 | -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import _C as cfg 2 | from .default import update_config -------------------------------------------------------------------------------- /configs/default.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from yacs.config import CfgNode as CN 6 | 7 | _C = CN() 8 | 9 | 10 | # ----- BASIC SETTINGS ----- 11 | _C.NAME = "default" 12 | _C.REDIRECTOR = True 13 | 14 | _C.RELOAD = CN() 15 | _C.RELOAD.TYPE = False 16 | _C.RELOAD.NAME = 'backbone' 17 | _C.RELOAD.PTH = '' 18 | 19 | 20 | _C.DISTRIBUTTED = False 21 | 22 | # ----- DATASET BUILDER ----- 23 | _C.DATASET = CN() 24 | _C.DATASET.TYPE = "pedes" 25 | _C.DATASET.NAME = "PA100k" 26 | _C.DATASET.TARGETTRANSFORM = [] 27 | _C.DATASET.ZERO_SHOT = False 28 | _C.DATASET.LABEL = 'eval' # train on all labels, test on part labels (35 for peta, 51 for rap) 29 | _C.DATASET.TRAIN_SPLIT = 'trainval' 30 | _C.DATASET.VAL_SPLIT = 'val' 31 | _C.DATASET.TEST_SPLIT = 'test' 32 | _C.DATASET.HEIGHT = 256 33 | _C.DATASET.WIDTH = 192 34 | 35 | # ----- BACKBONE BUILDER ----- 36 | _C.BACKBONE = CN() 37 | _C.BACKBONE.TYPE = "resnet50" 38 | _C.BACKBONE.MULTISCALE = False 39 | 40 | # ----- MODULE BUILDER ----- 41 | # _C.MODULE = CN() 42 | # _C.MODULE.TYPE = "GAP" 43 | 44 | # ----- CLASSIFIER BUILDER ----- 45 | _C.CLASSIFIER = CN() 46 | _C.CLASSIFIER.TYPE = "base" 47 | _C.CLASSIFIER.NAME = "" 48 | _C.CLASSIFIER.POOLING = "avg" 49 | _C.CLASSIFIER.BN = False 50 | _C.CLASSIFIER.SCALE = 1 51 | 52 | # ----- TRAIN BUILDER ----- 53 | _C.TRAIN = CN() 54 | _C.TRAIN.BATCH_SIZE = 64 55 | _C.TRAIN.MAX_EPOCH = 30 56 | _C.TRAIN.SHUFFLE = True 57 | _C.TRAIN.NUM_WORKERS = 4 58 | _C.TRAIN.CLIP_GRAD = False 59 | _C.TRAIN.BN_WD = True 60 | 61 | _C.TRAIN.DATAAUG = CN() 62 | _C.TRAIN.DATAAUG.TYPE = 'base' 63 | _C.TRAIN.DATAAUG.AUTOAUG_PROB = 0.5 64 | 65 | _C.TRAIN.EMA = CN() 66 | _C.TRAIN.EMA.ENABLE = False 67 | _C.TRAIN.EMA.DECAY = 0.9998 68 | _C.TRAIN.EMA.FORCE_CPU = False 69 | 70 | _C.TRAIN.OPTIMIZER = CN() 71 | _C.TRAIN.OPTIMIZER.TYPE = "SGD" 72 | _C.TRAIN.OPTIMIZER.MOMENTUM = 0.9 73 | _C.TRAIN.OPTIMIZER.WEIGHT_DECAY = 1e-4 74 | 75 | _C.TRAIN.LR_SCHEDULER = CN() 76 | _C.TRAIN.LR_SCHEDULER.TYPE = "plateau" 77 | _C.TRAIN.LR_SCHEDULER.LR_STEP = [0,] 78 | _C.TRAIN.LR_SCHEDULER.LR_FT = 1e-2 79 | _C.TRAIN.LR_SCHEDULER.LR_NEW = 1e-2 80 | _C.TRAIN.LR_SCHEDULER.WMUP_COEF = 0.1 81 | 82 | 83 | _C.TRAIN.AUX_LOSS_START = -1 84 | 85 | # ----- INFER BUILDER ----- 86 | 87 | _C.INFER = CN() 88 | _C.INFER.SAMPLING = False 89 | 90 | # ----- LOSS BUILDER ----- 91 | _C.LOSS = CN() 92 | _C.LOSS.TYPE = "bce" 93 | _C.LOSS.SAMPLE_WEIGHT = "" # None 94 | _C.LOSS.LOSS_WEIGHT = [1, ] 95 | _C.LOSS.SIZESUM = True # for a sample, BCE losses is the summation of all label instead of the average. 96 | 97 | _C.METRIC = CN() 98 | _C.METRIC.TYPE = 'pedestrian' 99 | 100 | # ------ visualization --------- 101 | _C.VIS = CN() 102 | _C.VIS.CAM = 'valid' 103 | _C.VIS.TENSORBOARD = CN() 104 | _C.VIS.TENSORBOARD.ENABLE = True 105 | 106 | _C.VIS.VISDOM = False 107 | 108 | 109 | # ----------- Transformer ------------- 110 | _C.TRANS = CN() 111 | _C.TRANS.DIM_HIDDEN = 256 112 | _C.TRANS.DROPOUT = 0.1 113 | _C.TRANS.NHEADS = 8 114 | _C.TRANS.DIM_FFD = 2048 115 | _C.TRANS.ENC_LAYERS = 6 116 | _C.TRANS.DEC_LAYERS = 6 117 | _C.TRANS.PRE_NORM = False 118 | _C.TRANS.EOS_COEF = 0.1 119 | _C.TRANS.NUM_QUERIES = 100 120 | 121 | 122 | # testing 123 | # _C.TEST = CN() 124 | # _C.TEST.BATCH_SIZE = 32 125 | # _C.TEST.NUM_WORKERS = 8 126 | # _C.TEST.MODEL_FILE = "" 127 | # 128 | # _C.TRANSFORMS = CN() 129 | # _C.TRANSFORMS.TRAIN_TRANSFORMS = ("random_resized_crop", "random_horizontal_flip") 130 | # _C.TRANSFORMS.TEST_TRANSFORMS = ("shorter_resize_for_crop", "center_crop") 131 | # 132 | # _C.TRANSFORMS.PROCESS_DETAIL = CN() 133 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_CROP = CN() 134 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_CROP.PADDING = 4 135 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP = CN() 136 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP.SCALE = (0.08, 1.0) 137 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP.RATIO = (0.75, 1.333333333) 138 | 139 | 140 | def update_config(cfg, args): 141 | cfg.defrost() 142 | 143 | cfg.merge_from_file(args.cfg) # update cfg 144 | # cfg.merge_from_list(args.opts) 145 | 146 | cfg.freeze() 147 | -------------------------------------------------------------------------------- /configs/multilabel_baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/configs/multilabel_baseline/__init__.py -------------------------------------------------------------------------------- /configs/multilabel_baseline/coco.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet101.sgd.bt32' 2 | 3 | DATASET: 4 | TYPE: 'multi_label' 5 | NAME: 'coco14' 6 | TRAIN_SPLIT: 'train' 7 | VAL_SPLIT: 'val' 8 | HEIGHT: 448 9 | WIDTH: 448 10 | 11 | METRIC: 12 | TYPE: 'multi_label' 13 | 14 | RELOAD: 15 | TYPE: False 16 | NAME: 'backbone' 17 | 18 | BACKBONE: 19 | TYPE: 'resnet101' 20 | MULTISCALE: False 21 | 22 | CLASSIFIER: 23 | NAME: 'linear' 24 | POOLING: 'max' 25 | SCALE: 1 26 | BN: False 27 | 28 | LOSS: 29 | TYPE: 'bceloss' 30 | LOSS_WEIGHT: [1] 31 | SIZESUM: True 32 | 33 | TRAIN: 34 | BN_WD: True 35 | BATCH_SIZE: 16 36 | CLIP_GRAD: True 37 | MAX_EPOCH: 30 38 | OPTIMIZER: 39 | TYPE: 'SGD' 40 | MOMENTUM: 0.9 41 | WEIGHT_DECAY: 1e-4 42 | LR_SCHEDULER: 43 | TYPE: 'multistep' 44 | LR_STEP: [ 14, ] 45 | LR_FT: 0.0005 46 | LR_NEW: 0.005 47 | EMA: 48 | ENABLE: False 49 | 50 | VIS: 51 | CAM: 'train' 52 | 53 | -------------------------------------------------------------------------------- /configs/pedes_baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/configs/pedes_baseline/__init__.py -------------------------------------------------------------------------------- /configs/pedes_baseline/pa100k.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'PA100k' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: False 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | # TYPE: 'bninception' 20 | # TYPE: 'tresnetM' 21 | # TYPE: 'swin_s' 22 | # TYPE: 'vit_s' 23 | # TYPE: 'vit_b' 24 | 25 | CLASSIFIER: 26 | NAME: 'linear' 27 | POOLING: 'avg' 28 | SCALE: 1 29 | BN: False 30 | 31 | LOSS: 32 | TYPE: 'bceloss' 33 | LOSS_WEIGHT: [1] 34 | SAMPLE_WEIGHT: 'weight' 35 | 36 | 37 | TRAIN: 38 | CLIP_GRAD: True 39 | BATCH_SIZE: 64 40 | OPTIMIZER: 41 | TYPE: 'adam' 42 | WEIGHT_DECAY: 5e-4 43 | LR_SCHEDULER: 44 | TYPE: 'plateau' 45 | LR_FT: 0.0001 46 | LR_NEW: 0.0001 47 | EMA: 48 | ENABLE: True 49 | 50 | METRIC: 51 | TYPE: 'pedestrian' 52 | 53 | VIS: 54 | CAM: 'valid' 55 | -------------------------------------------------------------------------------- /configs/pedes_baseline/peta.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'PETA' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: False 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | 20 | CLASSIFIER: 21 | NAME: 'linear' 22 | POOLING: 'avg' 23 | SCALE: 1 24 | BN: False 25 | 26 | LOSS: 27 | TYPE: 'bceloss' 28 | LOSS_WEIGHT: [1] 29 | SAMPLE_WEIGHT: 'weight' 30 | 31 | 32 | TRAIN: 33 | CLIP_GRAD: True 34 | BATCH_SIZE: 64 35 | OPTIMIZER: 36 | TYPE: 'adam' 37 | WEIGHT_DECAY: 5e-4 38 | LR_SCHEDULER: 39 | TYPE: 'plateau' 40 | LR_FT: 0.0001 41 | LR_NEW: 0.0001 42 | EMA: 43 | ENABLE: False 44 | 45 | METRIC: 46 | TYPE: 'pedestrian' 47 | 48 | VIS: 49 | CAM: 'valid' 50 | -------------------------------------------------------------------------------- /configs/pedes_baseline/peta_zs.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'PETA' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: True 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | 20 | CLASSIFIER: 21 | NAME: 'linear' 22 | POOLING: 'avg' 23 | SCALE: 1 24 | BN: False 25 | 26 | LOSS: 27 | TYPE: 'bceloss' 28 | LOSS_WEIGHT: [1] 29 | SAMPLE_WEIGHT: 'weight' 30 | 31 | 32 | TRAIN: 33 | CLIP_GRAD: True 34 | BATCH_SIZE: 64 35 | OPTIMIZER: 36 | TYPE: 'adam' 37 | WEIGHT_DECAY: 5e-4 38 | LR_SCHEDULER: 39 | TYPE: 'plateau' 40 | LR_FT: 0.0001 41 | LR_NEW: 0.0001 42 | EMA: 43 | ENABLE: False 44 | 45 | METRIC: 46 | TYPE: 'pedestrian' 47 | 48 | VIS: 49 | CAM: 'valid' 50 | -------------------------------------------------------------------------------- /configs/pedes_baseline/rap_zs.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'RAP2' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: True 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | 20 | CLASSIFIER: 21 | NAME: 'linear' 22 | POOLING: 'avg' 23 | SCALE: 1 24 | BN: False 25 | 26 | LOSS: 27 | TYPE: 'bceloss' 28 | LOSS_WEIGHT: [1] 29 | SAMPLE_WEIGHT: 'weight' 30 | 31 | 32 | TRAIN: 33 | CLIP_GRAD: True 34 | BATCH_SIZE: 64 35 | OPTIMIZER: 36 | TYPE: 'adam' 37 | WEIGHT_DECAY: 5e-4 38 | LR_SCHEDULER: 39 | TYPE: 'plateau' 40 | LR_FT: 0.0001 41 | LR_NEW: 0.0001 42 | EMA: 43 | ENABLE: False 44 | 45 | METRIC: 46 | TYPE: 'pedestrian' 47 | 48 | VIS: 49 | CAM: 'valid' 50 | -------------------------------------------------------------------------------- /configs/pedes_baseline/rapv1.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'RAP' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: False 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | 20 | CLASSIFIER: 21 | NAME: 'linear' 22 | POOLING: 'avg' 23 | SCALE: 1 24 | BN: False 25 | 26 | LOSS: 27 | TYPE: 'bceloss' 28 | LOSS_WEIGHT: [1] 29 | SAMPLE_WEIGHT: 'weight' 30 | 31 | 32 | TRAIN: 33 | CLIP_GRAD: True 34 | BATCH_SIZE: 64 35 | OPTIMIZER: 36 | TYPE: 'adam' 37 | WEIGHT_DECAY: 5e-4 38 | LR_SCHEDULER: 39 | TYPE: 'plateau' 40 | LR_FT: 0.0001 41 | LR_NEW: 0.0001 42 | EMA: 43 | ENABLE: False 44 | 45 | METRIC: 46 | TYPE: 'pedestrian' 47 | 48 | VIS: 49 | CAM: 'valid' 50 | -------------------------------------------------------------------------------- /configs/pedes_baseline/rapv2.yaml: -------------------------------------------------------------------------------- 1 | NAME: 'resnet50.base.adam' 2 | 3 | DATASET: 4 | TYPE: 'pedes' 5 | NAME: 'RAP2' 6 | TRAIN_SPLIT: 'trainval' 7 | VAL_SPLIT: 'test' 8 | ZERO_SHOT: False 9 | LABEL: 'eval' 10 | HEIGHT: 256 11 | WIDTH: 192 12 | 13 | RELOAD: 14 | TYPE: False 15 | NAME: 'backbone' 16 | 17 | BACKBONE: 18 | TYPE: 'resnet50' 19 | 20 | CLASSIFIER: 21 | NAME: 'linear' 22 | POOLING: 'avg' 23 | SCALE: 1 24 | BN: False 25 | 26 | LOSS: 27 | TYPE: 'bceloss' 28 | LOSS_WEIGHT: [1] 29 | SAMPLE_WEIGHT: 'weight' 30 | 31 | 32 | TRAIN: 33 | CLIP_GRAD: True 34 | BATCH_SIZE: 64 35 | OPTIMIZER: 36 | TYPE: 'adam' 37 | WEIGHT_DECAY: 5e-4 38 | LR_SCHEDULER: 39 | TYPE: 'plateau' 40 | LR_FT: 0.0001 41 | LR_NEW: 0.0001 42 | EMA: 43 | ENABLE: False 44 | 45 | METRIC: 46 | TYPE: 'pedestrian' 47 | 48 | VIS: 49 | CAM: 'valid' 50 | -------------------------------------------------------------------------------- /data/COCO14/ml_anno/category.json: -------------------------------------------------------------------------------- 1 | {"airplane": 0, "apple": 1, "backpack": 2, "banana": 3, "baseball bat": 4, "baseball glove": 5, "bear": 6, "bed": 7, "bench": 8, "bicycle": 9, "bird": 10, "boat": 11, "book": 12, "bottle": 13, "bowl": 14, "broccoli": 15, "bus": 16, "cake": 17, "car": 18, "carrot": 19, "cat": 20, "cell phone": 21, "chair": 22, "clock": 23, "couch": 24, "cow": 25, "cup": 26, "dining table": 27, "dog": 28, "donut": 29, "elephant": 30, "fire hydrant": 31, "fork": 32, "frisbee": 33, "giraffe": 34, "hair drier": 35, "handbag": 36, "horse": 37, "hot dog": 38, "keyboard": 39, "kite": 40, "knife": 41, "laptop": 42, "microwave": 43, "motorcycle": 44, "mouse": 45, "orange": 46, "oven": 47, "parking meter": 48, "person": 49, "pizza": 50, "potted plant": 51, "refrigerator": 52, "remote": 53, "sandwich": 54, "scissors": 55, "sheep": 56, "sink": 57, "skateboard": 58, "skis": 59, "snowboard": 60, "spoon": 61, "sports ball": 62, "stop sign": 63, "suitcase": 64, "surfboard": 65, "teddy bear": 66, "tennis racket": 67, "tie": 68, "toaster": 69, "toilet": 70, "toothbrush": 71, "traffic light": 72, "train": 73, "truck": 74, "tv": 75, "umbrella": 76, "vase": 77, "wine glass": 78, "zebra": 79} -------------------------------------------------------------------------------- /data/COCO14/ml_anno/coco14_train_anno.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/COCO14/ml_anno/coco14_train_anno.pkl -------------------------------------------------------------------------------- /data/COCO14/ml_anno/coco14_val_anno.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/COCO14/ml_anno/coco14_val_anno.pkl -------------------------------------------------------------------------------- /data/PA100k/dataset_all.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PA100k/dataset_all.pkl -------------------------------------------------------------------------------- /data/PETA/dataset_all.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PETA/dataset_all.pkl -------------------------------------------------------------------------------- /data/PETA/dataset_zs_run0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PETA/dataset_zs_run0.pkl -------------------------------------------------------------------------------- /data/RAP/dataset_all.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP/dataset_all.pkl -------------------------------------------------------------------------------- /data/RAP2/dataset_all.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP2/dataset_all.pkl -------------------------------------------------------------------------------- /data/RAP2/dataset_zs_run0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP2/dataset_zs_run0.pkl -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/augmentation.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import numpy as np 4 | import torchvision.transforms as T 5 | from PIL import Image 6 | 7 | from dataset.autoaug import AutoAugment 8 | 9 | 10 | class MultiScaleCrop(object): 11 | 12 | def __init__(self, input_size, scales=None, max_distort=1, fix_crop=True, more_fix_crop=True): 13 | self.scales = scales if scales is not None else [1, 875, .75, .66] 14 | self.max_distort = max_distort 15 | self.fix_crop = fix_crop 16 | self.more_fix_crop = more_fix_crop 17 | self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size] 18 | self.interpolation = Image.BILINEAR 19 | 20 | def __call__(self, img): 21 | im_size = img.size 22 | crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size) 23 | crop_img_group = img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h)) 24 | ret_img_group = crop_img_group.resize((self.input_size[0], self.input_size[1]), self.interpolation) 25 | return ret_img_group 26 | 27 | def _sample_crop_size(self, im_size): 28 | image_w, image_h = im_size[0], im_size[1] 29 | 30 | # find a crop size 31 | base_size = min(image_w, image_h) 32 | crop_sizes = [int(base_size * x) for x in self.scales] 33 | crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes] 34 | crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes] 35 | 36 | pairs = [] 37 | for i, h in enumerate(crop_h): 38 | for j, w in enumerate(crop_w): 39 | if abs(i - j) <= self.max_distort: 40 | pairs.append((w, h)) 41 | 42 | crop_pair = random.choice(pairs) 43 | if not self.fix_crop: 44 | w_offset = random.randint(0, image_w - crop_pair[0]) 45 | h_offset = random.randint(0, image_h - crop_pair[1]) 46 | else: 47 | w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1]) 48 | 49 | return crop_pair[0], crop_pair[1], w_offset, h_offset 50 | 51 | def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h): 52 | offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h) 53 | return random.choice(offsets) 54 | 55 | @staticmethod 56 | def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h): 57 | w_step = (image_w - crop_w) // 4 58 | h_step = (image_h - crop_h) // 4 59 | 60 | ret = list() 61 | ret.append((0, 0)) # upper left 62 | ret.append((4 * w_step, 0)) # upper right 63 | ret.append((0, 4 * h_step)) # lower left 64 | ret.append((4 * w_step, 4 * h_step)) # lower right 65 | ret.append((2 * w_step, 2 * h_step)) # center 66 | 67 | if more_fix_crop: 68 | ret.append((0, 2 * h_step)) # center left 69 | ret.append((4 * w_step, 2 * h_step)) # center right 70 | ret.append((2 * w_step, 4 * h_step)) # lower center 71 | ret.append((2 * w_step, 0 * h_step)) # upper center 72 | 73 | ret.append((1 * w_step, 1 * h_step)) # upper left quarter 74 | ret.append((3 * w_step, 1 * h_step)) # upper right quarter 75 | ret.append((1 * w_step, 3 * h_step)) # lower left quarter 76 | ret.append((3 * w_step, 3 * h_step)) # lower righ quarter 77 | 78 | return ret 79 | 80 | def __str__(self): 81 | return self.__class__.__name__ 82 | 83 | 84 | 85 | def get_transform(cfg): 86 | height = cfg.DATASET.HEIGHT 87 | width = cfg.DATASET.WIDTH 88 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 89 | 90 | if cfg.DATASET.TYPE == 'pedes': 91 | 92 | train_transform = T.Compose([ 93 | T.Resize((height, width)), 94 | T.Pad(10), 95 | T.RandomCrop((height, width)), 96 | T.RandomHorizontalFlip(), 97 | T.ToTensor(), 98 | normalize, 99 | ]) 100 | 101 | valid_transform = T.Compose([ 102 | T.Resize((height, width)), 103 | T.ToTensor(), 104 | normalize 105 | ]) 106 | 107 | elif cfg.DATASET.TYPE == 'multi_label': 108 | 109 | valid_transform = T.Compose([ 110 | T.Resize([height, width]), 111 | T.ToTensor(), 112 | normalize, 113 | ]) 114 | 115 | if cfg.TRAIN.DATAAUG.TYPE == 'autoaug': 116 | train_transform = T.Compose([ 117 | T.RandomApply([AutoAugment()], p=cfg.TRAIN.DATAAUG.AUTOAUG_PROB), 118 | T.Resize((height, width), interpolation=3), 119 | T.RandomHorizontalFlip(), 120 | T.ToTensor(), 121 | ]) 122 | 123 | else: 124 | train_transform = T.Compose([ 125 | T.Resize((height + 64, width + 64)), 126 | MultiScaleCrop(height, scales=(1.0, 0.875, 0.75, 0.66, 0.5), max_distort=2), 127 | T.RandomHorizontalFlip(), 128 | T.ToTensor(), 129 | normalize 130 | ]) 131 | else: 132 | 133 | assert False, 'xxxxxx' 134 | 135 | return train_transform, valid_transform 136 | -------------------------------------------------------------------------------- /dataset/multi_label/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/multi_label/__init__.py -------------------------------------------------------------------------------- /dataset/multi_label/coco.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch.utils.data as data 4 | import json 5 | import os 6 | import subprocess 7 | from PIL import Image 8 | import numpy as np 9 | import torch 10 | import pickle 11 | import logging 12 | 13 | # from util import * 14 | 15 | 16 | urls = {'train_img': 'http://images.cocodataset.org/zips/train2014.zip', 17 | 'val_img': 'http://images.cocodataset.org/zips/val2014.zip', 18 | 'annotations': 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip'} 19 | 20 | # def download_coco2014(root, phase): 21 | # # if not os.path.exists(root): 22 | # # os.makedirs(root) 23 | # # tmpdir = os.path.join(root, 'tmp/') 24 | # # data = os.path.join(root, 'data/') 25 | # # if not os.path.exists(data): 26 | # # os.makedirs(data) 27 | # # if not os.path.exists(tmpdir): 28 | # # os.makedirs(tmpdir) 29 | # # if phase == 'train': 30 | # # filename = 'train2014.zip' 31 | # # elif phase == 'val': 32 | # # filename = 'val2014.zip' 33 | # # cached_file = os.path.join(tmpdir, filename) 34 | # # if not os.path.exists(cached_file): 35 | # # print('Downloading: "{}" to {}\n'.format(urls[phase + '_img'], cached_file)) 36 | # # os.chdir(tmpdir) 37 | # # subprocess.call('wget ' + urls[phase + '_img'], shell=True) 38 | # # os.chdir(root) 39 | # # # extract file 40 | # # img_data = os.path.join(data, filename.split('.')[0]) 41 | # # if not os.path.exists(img_data): 42 | # # print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=data)) 43 | # # command = 'unzip {} -d {}'.format(cached_file,data) 44 | # # os.system(command) 45 | # # print('[dataset] Done!') 46 | # 47 | # # train/val images/annotations 48 | # # cached_file = os.path.join(tmpdir, 'annotations_trainval2014.zip') 49 | # # if not os.path.exists(cached_file): 50 | # # print('Downloading: "{}" to {}\n'.format(urls['annotations'], cached_file)) 51 | # # os.chdir(tmpdir) 52 | # # subprocess.Popen('wget ' + urls['annotations'], shell=True) 53 | # # os.chdir(root) 54 | # # annotations_data = os.path.join(data, 'annotations') 55 | # # if not os.path.exists(annotations_data): 56 | # # print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=data)) 57 | # # command = 'unzip {} -d {}'.format(cached_file, data) 58 | # # os.system(command) 59 | # # print('[annotation] Done!') 60 | # 61 | # anno = os.path.join(data, '{}_anno.json'.format(phase)) 62 | # img_id = {} 63 | # annotations_id = {} 64 | # if not os.path.exists(anno): 65 | # annotations_file = json.load(open(os.path.join(annotations_data, 'instances_{}2014.json'.format(phase)))) 66 | # annotations = annotations_file['annotations'] 67 | # category = annotations_file['categories'] 68 | # category_id = {} 69 | # for cat in category: 70 | # category_id[cat['id']] = cat['name'] 71 | # cat2idx = categoty_to_idx(sorted(category_id.values())) 72 | # images = annotations_file['images'] 73 | # for annotation in annotations: 74 | # if annotation['image_id'] not in annotations_id: 75 | # annotations_id[annotation['image_id']] = set() 76 | # annotations_id[annotation['image_id']].add(cat2idx[category_id[annotation['category_id']]]) 77 | # for img in images: 78 | # if img['id'] not in annotations_id: 79 | # continue 80 | # if img['id'] not in img_id: 81 | # img_id[img['id']] = {} 82 | # img_id[img['id']]['file_name'] = img['file_name'] 83 | # img_id[img['id']]['labels'] = list(annotations_id[img['id']]) 84 | # anno_list = [] 85 | # for k, v in img_id.items(): 86 | # anno_list.append(v) 87 | # json.dump(anno_list, open(anno, 'w')) 88 | # if not os.path.exists(os.path.join(data, 'category.json')): 89 | # json.dump(cat2idx, open(os.path.join(data, 'category.json'), 'w')) 90 | # del img_id 91 | # del anno_list 92 | # del images 93 | # del annotations_id 94 | # del annotations 95 | # del category 96 | # del category_id 97 | # print('[json] Done!') 98 | 99 | 100 | def categoty_to_idx(category): 101 | cat2idx = {} 102 | for cat in category: 103 | cat2idx[cat] = len(cat2idx) 104 | return cat2idx 105 | 106 | 107 | class COCO14(data.Dataset): 108 | 109 | def __init__(self, cfg, split, transform=None, target_transform=None): 110 | 111 | root_path = './data/COCO14' 112 | self.img_dir = os.path.join(root_path, f'{split}2014') 113 | self.split = split 114 | self.transform = transform 115 | self.target_transform = target_transform 116 | 117 | list_path = os.path.join(root_path, 'ml_anno', f'coco14_{self.split}_anno.pkl') 118 | anno = pickle.load(open(list_path, 'rb+')) 119 | self.img_id = anno['image_name'] 120 | self.label = anno['labels'] 121 | self.img_idx = range(len(self.img_id)) 122 | 123 | self.cat2idx = json.load(open(os.path.join(root_path, 'ml_anno', 'category.json'), 'r')) 124 | 125 | self.attr_id = list(self.cat2idx.keys()) 126 | self.attr_num = len(self.cat2idx) 127 | 128 | # just for aligning with pedestrian attribute dataset 129 | self.eval_attr_num = len(self.cat2idx) 130 | 131 | def __len__(self): 132 | return len(self.img_id) 133 | 134 | def __getitem__(self, index): 135 | 136 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index] 137 | imgpath = os.path.join(self.img_dir, imgname) 138 | img = Image.open(imgpath).convert('RGB') 139 | 140 | if self.transform is not None: 141 | img = self.transform(img) 142 | 143 | gt_label = gt_label.astype(np.float32) 144 | 145 | if self.target_transform: 146 | gt_label = gt_label[self.target_transform] 147 | 148 | return img, gt_label, imgname 149 | 150 | 151 | -------------------------------------------------------------------------------- /dataset/multi_label/voc.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import os.path 4 | import tarfile 5 | from urllib.parse import urlparse 6 | 7 | import numpy as np 8 | import torch 9 | import torch.utils.data as data 10 | from PIL import Image 11 | import pickle 12 | 13 | object_categories = ['aeroplane', 'bicycle', 'bird', 'boat', 14 | 'bottle', 'bus', 'car', 'cat', 'chair', 15 | 'cow', 'diningtable', 'dog', 'horse', 16 | 'motorbike', 'person', 'pottedplant', 17 | 'sheep', 'sofa', 'train', 'tvmonitor'] 18 | 19 | urls = { 20 | 'devkit': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar', 21 | 'trainval_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', 22 | 'test_images_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', 23 | 'test_anno_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtestnoimgs_06-Nov-2007.tar', 24 | } 25 | 26 | 27 | def read_image_label(file): 28 | print('[dataset] read ' + file) 29 | data = dict() 30 | with open(file, 'r') as f: 31 | for line in f: 32 | tmp = line.split(' ') 33 | name = tmp[0] 34 | label = int(tmp[-1]) 35 | data[name] = label 36 | # data.append([name, label]) 37 | # print('%s %d' % (name, label)) 38 | return data 39 | 40 | 41 | def read_object_labels(root, dataset, set): 42 | path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main') 43 | labeled_data = dict() 44 | num_classes = len(object_categories) 45 | 46 | for i in range(num_classes): 47 | file = os.path.join(path_labels, object_categories[i] + '_' + set + '.txt') 48 | data = read_image_label(file) 49 | 50 | if i == 0: 51 | for (name, label) in data.items(): 52 | labels = np.zeros(num_classes) 53 | labels[i] = label 54 | labeled_data[name] = labels 55 | else: 56 | for (name, label) in data.items(): 57 | labeled_data[name][i] = label 58 | 59 | return labeled_data 60 | 61 | 62 | def write_object_labels_csv(file, labeled_data): 63 | # write a csv file 64 | print('[dataset] write file %s' % file) 65 | with open(file, 'w') as csvfile: 66 | fieldnames = ['name'] 67 | fieldnames.extend(object_categories) 68 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 69 | 70 | writer.writeheader() 71 | for (name, labels) in labeled_data.items(): 72 | example = {'name': name} 73 | for i in range(20): 74 | example[fieldnames[i + 1]] = int(labels[i]) 75 | writer.writerow(example) 76 | 77 | csvfile.close() 78 | 79 | 80 | def read_object_labels_csv(file, header=True): 81 | images = [] 82 | num_categories = 0 83 | print('[dataset] read', file) 84 | with open(file, 'r') as f: 85 | reader = csv.reader(f) 86 | rownum = 0 87 | for row in reader: 88 | if header and rownum == 0: 89 | header = row 90 | else: 91 | if num_categories == 0: 92 | num_categories = len(row) - 1 93 | name = row[0] 94 | labels = (np.asarray(row[1:num_categories + 1])).astype(np.float32) 95 | labels = torch.from_numpy(labels) 96 | item = (name, labels) 97 | images.append(item) 98 | rownum += 1 99 | return images 100 | 101 | 102 | def read_csv(file): 103 | images = [] 104 | print('[dataset] read', file) 105 | with open(file, 'r') as f: 106 | reader = csv.reader(f) 107 | for i, row in enumerate(reader): 108 | if i == 0: 109 | continue 110 | name = row[0] 111 | labels = (np.asarray(row[1:])).astype(np.float32) 112 | labels = torch.from_numpy(labels) 113 | item = (name, labels) 114 | images.append(item) 115 | return images 116 | 117 | 118 | def find_images_classification(root, dataset, set): 119 | path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main') 120 | images = [] 121 | file = os.path.join(path_labels, set + '.txt') 122 | with open(file, 'r') as f: 123 | for line in f: 124 | images.append(line) 125 | return images 126 | 127 | 128 | class VOC(data.Dataset): 129 | def __init__(self, root='./data/voc', phase='train', transform=None, target_transform=None): 130 | self.root = root 131 | # self.path_devkit = os.path.join(root, 'VOCdevkit') 132 | self.path_images = os.path.join(root, 'VOCdevkit', 'VOC2007', 'JPEGImages') 133 | self.phase = phase 134 | self.transform = transform 135 | self.target_transform = target_transform 136 | 137 | # download dataset 138 | # download_voc2007(self.root) 139 | 140 | # define path of csv file 141 | path_csv = os.path.join(self.root, 'files', 'VOC2007') 142 | # define filename of csv file 143 | file_csv = os.path.join(path_csv, 'classification_' + phase + '.csv') 144 | 145 | # create the csv file if necessary 146 | if not os.path.exists(file_csv): 147 | if not os.path.exists(path_csv): # create dir if necessary 148 | os.makedirs(path_csv) 149 | # generate csv file 150 | labeled_data = read_object_labels(self.root, 'VOC2007', self.phase) 151 | # write csv file 152 | write_object_labels_csv(file_csv, labeled_data) 153 | 154 | self.num_classes = len(object_categories) 155 | self.images = read_object_labels_csv(file_csv) 156 | self.imags, self.gt = zip(*self.images) 157 | 158 | # print('[dataset] VOC 2007 classification set=%s number of classes=%d number of images=%d' % ( 159 | # phase, self.num_classes, len(self.images))) 160 | 161 | def __getitem__(self, index): 162 | path, target = self.images[index] 163 | target[target == -1] = 0 164 | img = Image.open(os.path.join(self.path_images, path + '.jpg')).convert('RGB') 165 | if self.transform is not None: 166 | img = self.transform(img) 167 | if self.target_transform is not None: 168 | target = self.target_transform(target) 169 | 170 | return img, target, 0, path 171 | 172 | def __len__(self): 173 | return len(self.images) 174 | -------------------------------------------------------------------------------- /dataset/pedes_attr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/pedes_attr/__init__.py -------------------------------------------------------------------------------- /dataset/pedes_attr/annotation.md: -------------------------------------------------------------------------------- 1 | ### dataset split 2 | - PETA 19000, train 9500, val 1900, test 7600 3 | - RAP 41585, train 33268, test 8317 4 | - PA100K 100000, train 80000, val 10000, test 10000 5 | - RAPv2 84928, train, 50957 trainset, 16986 valset, 16985 testset 6 | 7 | 8 | ## 统一属性顺序 9 | 1. head region 10 | 2. upper region 11 | 3. lower region 12 | 4. foot region 13 | 5. accessory/bag 14 | 6. age 15 | 7. gender 16 | 8. others 17 | 18 | 19 | 20 | ### PETA （35 in 105） 21 | num_ingroup = [5, 10, 6, 4, 5, 5] 22 | 23 | - 'accessoryHat','accessoryMuffler','accessoryNothing','accessorySunglasses','hairLong' [10, 18, 19, 30, 15] 5 24 | - 'upperBodyCasual', 'upperBodyFormal', 'upperBodyJacket', 'upperBodyLogo', 'upperBodyPlaid', 'upperBodyShortSleeve', 'upperBodyThinStripes', 'upperBodyTshirt','upperBodyOther','upperBodyVNeck' [7, 9, 11, 14, 21, 26, 29, 32, 33, 34] 10 25 | - 'lowerBodyCasual', 'lowerBodyFormal', 'lowerBodyJeans', 'lowerBodyShorts', 'lowerBodyShortSkirt','lowerBodyTrousers' [6, 8, 12, 25, 27, 31] 6 26 | - 'footwearLeatherShoes', 'footwearSandals', 'footwearShoes', 'footwearSneaker' [13, 23, 24, 28] 4 27 | - 'carryingBackpack', 'carryingOther', 'carryingMessengerBag', 'carryingNothing', 'carryingPlasticBags' [4, 5, 17, 20, 22] 5 28 | 29 | - 'personalLess30','personalLess45','personalLess60','personalLarger60', [0, 1, 2, 3] 4 30 | - 'personalMale', [16] 1 31 | 32 | permutation = [10, 18, 19, 30, 15, 7, 9, 11, 14, 21, 26, 29, 32, 33, 34, 6, 8, 12, 25, 27, 31, 13, 23, 24, 28, 4, 5, 17, 20, 22, 0, 1, 2, 3, 16] 33 | 34 | ##### not evaluated attributes 35 | - color: 36 | ['upperBodyBlack', 'upperBodyBlue', 'upperBodyBrown', 'upperBodyGreen', 'upperBodyGrey', 'upperBodyOrange', 'upperBodyPink', 'upperBodyPurple', 'upperBodyRed', 'upperBodyWhite', 'upperBodyYellow', 37 | 'lowerBodyBlack', 'lowerBodyBlue', 'lowerBodyBrown', 'lowerBodyGreen', 'lowerBodyGrey', 'lowerBodyOrange', 'lowerBodyPink', 'lowerBodyPurple', 'lowerBodyRed', 'lowerBodyWhite', 'lowerBodyYellow', 38 | 'hairBlack', 'hairBlue', 'hairBrown', 'hairGreen', 'hairGrey', 'hairOrange', 'hairPink', 'hairPurple', 'hairRed', 'hairWhite', 'hairYellow', 39 | 'footwearBlack', 'footwearBlue', 'footwearBrown', 'footwearGreen', 'footwearGrey', 'footwearOrange', 'footwearPink', 'footwearPurple', 'footwearRed', 'footwearWhite', 'footwearYellow'] 40 | - extra: 41 | ['accessoryHeadphone', 'personalLess15', 'carryingBabyBuggy', 'hairBald', 'footwearBoots', 'lowerBodyCapri', 'carryingShoppingTro', 'carryingUmbrella', 'personalFemale', 'carryingFolder', 'accessoryHairBand', 42 | 'lowerBodyHotPants', 'accessoryKerchief', 'lowerBodyLongSkirt', 'upperBodyLongSleeve', 'lowerBodyPlaid', 'lowerBodyThinStripes', 'carryingLuggageCase', 'upperBodyNoSleeve', 'hairShort', 'footwearStocking', 43 | 'upperBodySuit', 'carryingSuitcase', 'lowerBodySuits', 'upperBodySweater', 'upperBodyThickStripes'] 44 | 45 | 46 | ### PA100K （26) 47 | num_in_group = [2, 6, 6, 1, 4, 7] 48 | 49 | - 'Hat','Glasses', [7,8] 2 50 | - 'ShortSleeve','LongSleeve','UpperStride','UpperLogo','UpperPlaid','UpperSplice', [13,14,15,16,17,18] 6 51 | - 'LowerStripe','LowerPattern','LongCoat','Trousers','Shorts','Skirt&Dress', [19,20,21,22,23,24] 6 52 | - 'boots' [25] 1 53 | - 'HandBag','ShoulderBag','Backpack','HoldObjectsInFront', [9,10,11,12] 4 54 | 55 | - 'AgeOver60','Age18-60','AgeLess18', [1,2,3] 3 56 | - 'Female' [0] 1 57 | - 'Front','Side','Back', [4,5,6] 3 58 | 59 | permutation = [7,8,13,14,15,16,17,18,19,20,21,22,23,24,25,9,10,11,12,1,2,3,0,4,5,6] 60 | 61 | ### RAPv1 (51) 62 | 63 | num_ingroup = [6, 9, 6, 5, 8, 17] 64 | 65 | - head 6：'hs-BaldHead','hs-LongHair','hs-BlackHair','hs-Hat','hs-Glasses','hs-Muffler', [9, 10, 11, 12, 13, 14,] 66 | - upper body 9：'ub-Shirt','ub-Sweater','ub-Vest','ub-TShirt','ub-Cotton','ub-Jacket','ub-SuitUp','ub-Tight','ub-ShortSleeve',[15, 16, 17, 18, 19, 20, 21, 22, 23,] 67 | - lower body 6：'lb-LongTrousers','lb-Skirt','lb-ShortSkirt','lb-Dress','lb-Jeans','lb-TightTrousers', [24, 25,26, 27, 28, 29,] 68 | - footwear 5：'shoes-Leather','shoes-Sport','shoes-Boots','shoes-Cloth','shoes-Casual', [30, 31, 32, 33, 34,] 69 | - accessory 8 [35, 36, 37, 38, 39, 40, 41, 42] : 70 | 'attach-Backpack','attach-SingleShoulderBag','attach-HandBag','attach-Box','attach-PlasticBag','attach-PaperBag','attach-HandTrunk','attach-Other', 71 | 72 | - age 3：'AgeLess16','Age17-30','Age31-45', 1:4 [1, 2, 3,] 73 | - gender 1：'Female', 0 [0,] 74 | - body shape 3：'BodyFat','BodyNormal','BodyThin',4:7 [4, 5, 6,] 75 | - role 2：'Customer','Clerk', 7:9 [ 7, 8,] 76 | - action 8：'action-Calling','action-Talking','action-Gathering','action-Holding','action-Pusing','action-Pulling','action-CarrybyArm','action-CarrybyHand' 77 | [43, 44, 45, 46, 47, 48, 49, 50] 78 | 79 | permutation = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 80 | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 1, 2, 3, 0, 4, 5, 6, 7, 8, 43, 44, 45, 46, 81 | 47, 48, 49, 50] 82 | 83 | - color: 29 84 | ['up-Black', 'up-White', 'up-Gray', 'up-Red', 'up-Green', 'up-Blue', 'up-Yellow', 'up-Brown', 'up-Purple', 'up-Pink', 'up-Orange', 'up-Mixture', 85 | 'low-Black', 'low-White', 'low-Gray', 'low-Red', 'low-Green', 'low-Blue', 'low-Yellow', 'low-Mixture', 86 | 'shoes-Black', 'shoes-White', 'shoes-Gray', 'shoes-Red', 'shoes-Green', 'shoes-Blue', 'shoes-Yellow', 'shoes-Brown', 'shoes-Mixture'] 87 | 88 | - extra: 12 89 | ['faceFront', 'faceBack', 'faceLeft', 'faceRight', 90 | 'occlusionLeft', 'occlusionRight', 'occlusionUp', 'occlusionDown', 'occlusion-Environment', 'occlusion-Attachment', 'occlusion-Person', 'occlusion-Other'] 91 | 92 | ### RAPv2 (54) 93 | num_ingroup = [5, 10, 6, 6, 8, 19] 94 | 95 | - head 5：'hs-BaldHead', 'hs-LongHair', 'hs-BlackHair', 'hs-Hat', 'hs-Glasses', [10,11,12,13,14] 96 | - upper body 10：'ub-Shirt','ub-Sweater','ub-Vest','ub-TShirt','ub-Cotton','ub-Jacket','ub-SuitUp','ub-Tight','ub-ShortSleeve','ub-Others' 97 | [15, 16, 17, 18, 19, 20, 21, 22, 23, 24] 98 | - lower body 6：'lb-LongTrousers','lb-Skirt','lb-ShortSkirt','lb-Dress','lb-Jeans','lb-TightTrousers', [25 ,26, 27, 28, 29, 30] 99 | - footwear 6：'shoes-Leather', 'shoes-Sports', 'shoes-Boots', 'shoes-Cloth', 'shoes-Casual', 'shoes-Other', [31, 32, 33, 34, 35, 36] 100 | - accessory 8 [37, 38, 39, 40, 41, 42, 43, 44] : 101 | 'attachment-Backpack','attachment-ShoulderBag','attachment-HandBag','attachment-Box','attachment-PlasticBag','attachment-PaperBag','attachment-HandTrunk','attachment-Other' 102 | 103 | - age 4：'AgeLess16', 'Age17-30', 'Age31-45', 'Age46-60', [1, 2, 3, 4] 104 | - gender 1：'Female', [0,] 105 | - body shape 3：'BodyFat','BodyNormal','BodyThin',4:7 [5, 6, 7] 106 | - role 2：'Customer','Employee', [ 8, 9,] 107 | - action 9：'action-Calling','action-Talking','action-Gathering','action-Holding','action-Pushing','action-Pulling','action-CarryingByArm','action-CarryingByHand','action-Other' 108 | [45, 46, 47, 48, 49, 50, 51, 52, 53] 109 | 110 | permutation = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 111 | 36, 37, 38, 39, 40, 41, 42, 43, 44, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 45, 46, 47, 48, 49, 50, 51, 52, 53] 112 | -------------------------------------------------------------------------------- /dataset/pedes_attr/pedes.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import pickle 4 | 5 | import numpy as np 6 | import torch.utils.data as data 7 | from PIL import Image 8 | 9 | from tools.function import get_pkl_rootpath 10 | 11 | 12 | class PedesAttr(data.Dataset): 13 | 14 | def __init__(self, cfg, split, transform=None, target_transform=None, idx=None): 15 | 16 | assert cfg.DATASET.NAME in ['PETA', 'PA100k', 'RAP', 'RAP2'], \ 17 | f'dataset name {cfg.DATASET.NAME} is not exist' 18 | 19 | data_path = get_pkl_rootpath(cfg.DATASET.NAME, cfg.DATASET.ZERO_SHOT) 20 | 21 | print("which pickle", data_path) 22 | 23 | dataset_info = pickle.load(open(data_path, 'rb+')) 24 | 25 | img_id = dataset_info.image_name 26 | 27 | attr_label = dataset_info.label 28 | attr_label[attr_label == 2] = 0 29 | self.attr_id = dataset_info.attr_name 30 | self.attr_num = len(self.attr_id) 31 | 32 | if 'label_idx' not in dataset_info.keys(): 33 | print(' this is for zero shot split') 34 | assert cfg.DATASET.ZERO_SHOT 35 | self.eval_attr_num = self.attr_num 36 | else: 37 | self.eval_attr_idx = dataset_info.label_idx.eval 38 | self.eval_attr_num = len(self.eval_attr_idx) 39 | 40 | assert cfg.DATASET.LABEL in ['all', 'eval', 'color'], f'key word {cfg.DATASET.LABEL} error' 41 | if cfg.DATASET.LABEL == 'eval': 42 | attr_label = attr_label[:, self.eval_attr_idx] 43 | self.attr_id = [self.attr_id[i] for i in self.eval_attr_idx] 44 | self.attr_num = len(self.attr_id) 45 | elif cfg.DATASET.LABEL == 'color': 46 | attr_label = attr_label[:, self.eval_attr_idx + dataset_info.label_idx.color] 47 | self.attr_id = [self.attr_id[i] for i in self.eval_attr_idx + dataset_info.label_idx.color] 48 | self.attr_num = len(self.attr_id) 49 | 50 | assert split in dataset_info.partition.keys(), f'split {split} is not exist' 51 | 52 | self.dataset = cfg.DATASET.NAME 53 | self.transform = transform 54 | self.target_transform = target_transform 55 | 56 | self.root_path = dataset_info.root 57 | 58 | if self.target_transform: 59 | self.attr_num = len(self.target_transform) 60 | print(f'{split} target_label: {self.target_transform}') 61 | else: 62 | self.attr_num = len(self.attr_id) 63 | print(f'{split} target_label: all') 64 | 65 | self.img_idx = dataset_info.partition[split] 66 | 67 | if isinstance(self.img_idx, list): 68 | self.img_idx = self.img_idx[0] # default partition 0 69 | 70 | if idx is not None: 71 | self.img_idx = idx 72 | 73 | self.img_num = self.img_idx.shape[0] 74 | self.img_id = [img_id[i] for i in self.img_idx] 75 | self.label = attr_label[self.img_idx] # [:, [0, 12]] 76 | 77 | def __getitem__(self, index): 78 | 79 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index] 80 | 81 | imgpath = os.path.join(self.root_path, imgname) 82 | img = Image.open(imgpath) 83 | 84 | if self.transform is not None: 85 | img = self.transform(img) 86 | 87 | gt_label = gt_label.astype(np.float32) 88 | 89 | if self.target_transform: 90 | gt_label = gt_label[self.target_transform] 91 | 92 | return img, gt_label, imgname, # noisy_weight 93 | 94 | def __len__(self): 95 | return len(self.img_id) 96 | 97 | -------------------------------------------------------------------------------- /dataset/pedes_attr/preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/pedes_attr/preprocess/__init__.py -------------------------------------------------------------------------------- /dataset/pedes_attr/preprocess/format_pa100k.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import pickle 5 | 6 | from easydict import EasyDict 7 | from scipy.io import loadmat 8 | 9 | np.random.seed(0) 10 | random.seed(0) 11 | 12 | group_order = [7, 8, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 9, 10, 11, 12, 1, 2, 3, 0, 4, 5, 6] 13 | 14 | 15 | def make_dir(path): 16 | if os.path.exists(path): 17 | pass 18 | else: 19 | os.mkdir(path) 20 | 21 | 22 | def generate_data_description(save_dir, reorder): 23 | """ 24 | create a dataset description file, which consists of images, labels 25 | """ 26 | # pa100k_data = loadmat('/mnt/data1/jiajian/dataset/attribute/PA100k/annotation.mat') 27 | pa100k_data = loadmat(os.path.join(save_dir, 'annotation.mat')) 28 | 29 | dataset = EasyDict() 30 | dataset.description = 'pa100k' 31 | dataset.reorder = 'group_order' 32 | dataset.root = os.path.join(save_dir, 'data') 33 | 34 | train_image_name = [pa100k_data['train_images_name'][i][0][0] for i in range(80000)] 35 | val_image_name = [pa100k_data['val_images_name'][i][0][0] for i in range(10000)] 36 | test_image_name = [pa100k_data['test_images_name'][i][0][0] for i in range(10000)] 37 | dataset.image_name = train_image_name + val_image_name + test_image_name 38 | 39 | dataset.label = np.concatenate((pa100k_data['train_label'], pa100k_data['val_label'], pa100k_data['test_label']), axis=0) 40 | dataset.attr_name = [pa100k_data['attributes'][i][0][0] for i in range(26)] 41 | 42 | dataset.label_idx = EasyDict() 43 | dataset.label_idx.eval = list(range(26)) 44 | 45 | if reorder: 46 | dataset.label_idx.eval = group_order 47 | 48 | dataset.partition = EasyDict() 49 | dataset.partition.train = np.arange(0, 80000) # np.array(range(80000)) 50 | dataset.partition.val = np.arange(80000, 90000) # np.array(range(80000, 90000)) 51 | dataset.partition.test = np.arange(90000, 100000) # np.array(range(90000, 100000)) 52 | dataset.partition.trainval = np.arange(0, 90000) # np.array(range(90000)) 53 | 54 | dataset.weight_train = np.mean(dataset.label[dataset.partition.train], axis=0).astype(np.float32) 55 | dataset.weight_trainval = np.mean(dataset.label[dataset.partition.trainval], axis=0).astype(np.float32) 56 | 57 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f: 58 | pickle.dump(dataset, f) 59 | 60 | 61 | if __name__ == "__main__": 62 | save_dir = '/mnt/data1/jiajian/datasets/attribute/PA100k/' 63 | # save_dir = './data/PA100k/' 64 | reoder = True 65 | generate_data_description(save_dir, reorder=True) 66 | -------------------------------------------------------------------------------- /dataset/pedes_attr/preprocess/format_peta.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import pickle 5 | 6 | from easydict import EasyDict 7 | from scipy.io import loadmat 8 | 9 | np.random.seed(0) 10 | random.seed(0) 11 | 12 | # note: ref by annotation.md 13 | 14 | group_order = [10, 18, 19, 30, 15, 7, 9, 11, 14, 21, 26, 29, 32, 33, 34, 6, 8, 12, 25, 27, 31, 13, 23, 24, 28, 4, 5, 15 | 17, 20, 22, 0, 1, 2, 3, 16] 16 | 17 | 18 | def make_dir(path): 19 | if os.path.exists(path): 20 | pass 21 | else: 22 | os.mkdir(path) 23 | 24 | 25 | def generate_data_description(save_dir, reorder, new_split_path): 26 | """ 27 | create a dataset description file, which consists of images, labels 28 | """ 29 | peta_data = loadmat(os.path.join(save_dir, 'PETA.mat')) 30 | dataset = EasyDict() 31 | dataset.description = 'peta' 32 | dataset.reorder = 'group_order' 33 | dataset.root = os.path.join(save_dir, 'images') 34 | dataset.image_name = [f'{i + 1:05}.png' for i in range(19000)] 35 | 36 | raw_attr_name = [i[0][0] for i in peta_data['peta'][0][0][1]] 37 | # (19000, 105) 38 | raw_label = peta_data['peta'][0][0][0][:, 4:] 39 | 40 | # (19000, 35) 41 | 42 | dataset.label = raw_label 43 | dataset.attr_name = raw_attr_name 44 | 45 | dataset.label_idx = EasyDict() 46 | dataset.label_idx.eval = list(range(35)) 47 | dataset.label_idx.color = list(range(35, 79)) 48 | dataset.label_idx.extra = range(79, raw_label.shape[1]) # (79, 105) 49 | 50 | if reorder: 51 | dataset.label_idx.eval = group_order 52 | 53 | dataset.partition = EasyDict() 54 | dataset.partition.train = [] 55 | dataset.partition.val = [] 56 | dataset.partition.trainval = [] 57 | dataset.partition.test = [] 58 | 59 | dataset.weight_train = [] 60 | dataset.weight_trainval = [] 61 | 62 | if new_split_path: 63 | 64 | with open(new_split_path, 'rb+') as f: 65 | new_split = pickle.load(f) 66 | 67 | train = np.array(new_split.train_idx) 68 | val = np.array(new_split.val_idx) 69 | test = np.array(new_split.test_idx) 70 | trainval = np.concatenate((train, val), axis=0) 71 | 72 | dataset.partition.train = train 73 | dataset.partition.val = val 74 | dataset.partition.trainval = trainval 75 | dataset.partition.test = test 76 | 77 | weight_train = np.mean(dataset.label[train], axis=0).astype(np.float32) 78 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32) 79 | 80 | dataset.weight_train.append(weight_train) 81 | dataset.weight_trainval.append(weight_trainval) 82 | with open(os.path.join(save_dir, 'dataset_zs_run4.pkl'), 'wb+') as f: 83 | pickle.dump(dataset, f) 84 | 85 | else: 86 | 87 | for idx in range(5): 88 | train = peta_data['peta'][0][0][3][idx][0][0][0][0][:, 0] - 1 89 | val = peta_data['peta'][0][0][3][idx][0][0][0][1][:, 0] - 1 90 | test = peta_data['peta'][0][0][3][idx][0][0][0][2][:, 0] - 1 91 | trainval = np.concatenate((train, val), axis=0) 92 | 93 | dataset.partition.train.append(train) 94 | dataset.partition.val.append(val) 95 | dataset.partition.trainval.append(trainval) 96 | dataset.partition.test.append(test) 97 | 98 | weight_train = np.mean(dataset.label[train], axis=0) 99 | weight_trainval = np.mean(dataset.label[trainval], axis=0) 100 | 101 | dataset.weight_train.append(weight_train) 102 | dataset.weight_trainval.append(weight_trainval) 103 | 104 | """ 105 | dataset.pkl 只包含评价属性的文件 35 label 106 | dataset_all.pkl 包含所有属性的文件 105 label 107 | """ 108 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f: 109 | pickle.dump(dataset, f) 110 | 111 | 112 | if __name__ == "__main__": 113 | save_dir = '/mnt/data1/jiajian/datasets/attribute/PETA/' 114 | new_split_path = '/mnt/data1/jiajian/code/Rethinking_of_PAR/datasets/jian_split/index_peta_split_id50_img300_ratio0.03_4.pkl' 115 | generate_data_description(save_dir, True, new_split_path) 116 | -------------------------------------------------------------------------------- /dataset/pedes_attr/preprocess/format_rap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import pickle 5 | 6 | from easydict import EasyDict 7 | from scipy.io import loadmat 8 | 9 | np.random.seed(0) 10 | random.seed(0) 11 | 12 | 13 | group_order = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 14 | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 1, 2, 3, 0, 4, 5, 6, 7, 8, 43, 44, 15 | 45, 46, 47, 48, 49, 50] 16 | 17 | 18 | def make_dir(path): 19 | if os.path.exists(path): 20 | pass 21 | else: 22 | os.mkdir(path) 23 | 24 | 25 | def generate_data_description(save_dir, reorder): 26 | """ 27 | create a dataset description file, which consists of images, labels 28 | """ 29 | 30 | data = loadmat(os.path.join(save_dir, 'RAP_annotation/RAP_annotation.mat')) 31 | 32 | dataset = EasyDict() 33 | dataset.description = 'rap' 34 | dataset.reorder = 'group_order' 35 | dataset.root = os.path.join(save_dir, 'RAP_dataset') 36 | dataset.image_name = [data['RAP_annotation'][0][0][5][i][0][0] for i in range(41585)] 37 | raw_attr_name = [data['RAP_annotation'][0][0][3][i][0][0] for i in range(92)] 38 | # (41585, 92) 39 | raw_label = data['RAP_annotation'][0][0][1] 40 | dataset.label = raw_label[:, np.array(range(51))] 41 | 42 | dataset.label = raw_label 43 | dataset.attr_name = raw_attr_name 44 | 45 | dataset.label_idx = EasyDict() 46 | dataset.label_idx.eval = list(range(51)) 47 | dataset.label_idx.color = list(range(63, raw_label.shape[1])) # (63, 92) 48 | dataset.label_idx.extra = list(range(51, 63)) 49 | 50 | if reorder: 51 | dataset.label_idx.eval = group_order 52 | 53 | dataset.partition = EasyDict() 54 | dataset.partition.trainval = [] 55 | dataset.partition.test = [] 56 | 57 | dataset.weight_trainval = [] 58 | 59 | for idx in range(5): 60 | trainval = data['RAP_annotation'][0][0][0][idx][0][0][0][0][0, :] - 1 61 | test = data['RAP_annotation'][0][0][0][idx][0][0][0][1][0, :] - 1 62 | 63 | dataset.partition.trainval.append(trainval) 64 | dataset.partition.test.append(test) 65 | 66 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32) 67 | dataset.weight_trainval.append(weight_trainval) 68 | 69 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f: 70 | pickle.dump(dataset, f) 71 | 72 | 73 | if __name__ == "__main__": 74 | save_dir = '/mnt/data1/jiajian/datasets/attribute/RAP/' 75 | reorder = True 76 | generate_data_description(save_dir, reorder) 77 | -------------------------------------------------------------------------------- /dataset/pedes_attr/preprocess/format_rap2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import pickle 5 | from scipy.io import loadmat 6 | from easydict import EasyDict 7 | 8 | np.random.seed(0) 9 | random.seed(0) 10 | 11 | group_order = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 12 | 36, 37, 38, 39, 40, 41, 42, 43, 44, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 45, 46, 47, 48, 49, 50, 51, 52, 53] 13 | 14 | 15 | def make_dir(path): 16 | if os.path.exists(path): 17 | pass 18 | else: 19 | os.mkdir(path) 20 | 21 | 22 | def generate_data_description(save_dir, reorder, new_split_path, version): 23 | data = loadmat(os.path.join(save_dir, 'RAP_annotation/RAP_annotation.mat')) 24 | data = data['RAP_annotation'] 25 | dataset = EasyDict() 26 | dataset.description = 'rap2' 27 | dataset.reorder = 'group_order' 28 | dataset.root = os.path.join(save_dir, 'RAP_dataset') 29 | dataset.image_name = [data['name'][0][0][i][0][0] for i in range(84928)] 30 | raw_attr_name = [data['attribute'][0][0][i][0][0] for i in range(152)] 31 | raw_label = data['data'][0][0] 32 | selected_attr_idx = (data['selected_attribute'][0][0][0] - 1)[group_order].tolist() # 54 33 | 34 | color_attr_idx = list(range(31, 45)) + list(range(53, 67)) + list(range(74, 88)) # 42 35 | extra_attr_idx = np.setdiff1d(range(152), color_attr_idx + selected_attr_idx).tolist()[:24] 36 | extra_attr_idx = extra_attr_idx[:15] + extra_attr_idx[16:] 37 | 38 | dataset.label = raw_label[:, selected_attr_idx + color_attr_idx + extra_attr_idx] # (n, 119) 39 | dataset.attr_name = [raw_attr_name[i] for i in selected_attr_idx + color_attr_idx + extra_attr_idx] 40 | 41 | dataset.label_idx = EasyDict() 42 | dataset.label_idx.eval = list(range(54)) # 54 43 | dataset.label_idx.color = list(range(54, 96)) # not aligned with color label index in label 44 | dataset.label_idx.extra = list(range(96, 119)) # not aligned with extra label index in label 45 | 46 | if reorder: 47 | dataset.label_idx.eval = list(range(54)) 48 | 49 | dataset.partition = EasyDict() 50 | dataset.partition.train = [] 51 | dataset.partition.val = [] 52 | dataset.partition.test = [] 53 | dataset.partition.trainval = [] 54 | 55 | dataset.weight_train = [] 56 | dataset.weight_trainval = [] 57 | 58 | if new_split_path: 59 | 60 | # remove Age46-60 61 | dataset.label_idx.eval.remove(38) # 54 62 | 63 | with open(new_split_path, 'rb+') as f: 64 | new_split = pickle.load(f) 65 | 66 | train = np.array(new_split.train_idx) 67 | val = np.array(new_split.val_idx) 68 | test = np.array(new_split.test_idx) 69 | trainval = np.concatenate((train, val), axis=0) 70 | 71 | print(np.concatenate([trainval, test]).shape) 72 | 73 | dataset.partition.train = train 74 | dataset.partition.val = val 75 | dataset.partition.trainval = trainval 76 | dataset.partition.test = test 77 | 78 | weight_train = np.mean(dataset.label[train], axis=0).astype(np.float32) 79 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32) 80 | 81 | print(weight_trainval[38]) 82 | 83 | dataset.weight_train.append(weight_train) 84 | dataset.weight_trainval.append(weight_trainval) 85 | with open(os.path.join(save_dir, f'dataset_zs_run{version}.pkl'), 'wb+') as f: 86 | pickle.dump(dataset, f) 87 | 88 | else: 89 | for idx in range(5): 90 | train = data['partition_attribute'][0][0][0][idx]['train_index'][0][0][0] - 1 91 | val = data['partition_attribute'][0][0][0][idx]['val_index'][0][0][0] - 1 92 | test = data['partition_attribute'][0][0][0][idx]['test_index'][0][0][0] - 1 93 | trainval = np.concatenate([train, val]) 94 | dataset.partition.train.append(train) 95 | dataset.partition.val.append(val) 96 | dataset.partition.test.append(test) 97 | dataset.partition.trainval.append(trainval) 98 | # cls_weight 99 | weight_train = np.mean(dataset.label[train], axis=0) 100 | weight_trainval = np.mean(dataset.label[trainval], axis=0) 101 | dataset.weight_train.append(weight_train) 102 | dataset.weight_trainval.append(weight_trainval) 103 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f: 104 | pickle.dump(dataset, f) 105 | 106 | 107 | if __name__ == "__main__": 108 | save_dir = '/mnt/data1/jiajian/datasets/attribute/RAP2/' 109 | reorder = True 110 | 111 | for i in range(5): 112 | new_split_path = f'/mnt/data1/jiajian/code/Rethinking_of_PAR/datasets/jian_split/index_rap2_split_id50_img300_ratio0.03_{i}.pkl' 113 | generate_data_description(save_dir, reorder, new_split_path, i) 114 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/docs/__init__.py -------------------------------------------------------------------------------- /docs/illus_zs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/docs/illus_zs.png -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 5 | import pickle 6 | 7 | from dataset.augmentation import get_transform 8 | from dataset.multi_label.coco import COCO14 9 | from metrics.pedestrian_metrics import get_pedestrian_metrics 10 | from models.model_factory import build_backbone, build_classifier 11 | 12 | import numpy as np 13 | import torch 14 | from torch.utils.data import DataLoader 15 | from tqdm import tqdm 16 | 17 | from configs import cfg, update_config 18 | from dataset.pedes_attr.pedes import PedesAttr 19 | from metrics.ml_metrics import get_map_metrics, get_multilabel_metrics 20 | from models.base_block import FeatClassifier 21 | # from models.model_factory import model_dict, classifier_dict 22 | 23 | from tools.function import get_model_log_path, get_reload_weight 24 | from tools.utils import set_seed, str2bool, time_str 25 | from models.backbone import swin_transformer, resnet, bninception 26 | from models.backbone.tresnet import tresnet 27 | from losses import bceloss, scaledbceloss 28 | 29 | set_seed(605) 30 | 31 | 32 | def main(cfg, args): 33 | exp_dir = os.path.join('exp_result', cfg.DATASET.NAME) 34 | model_dir, log_dir = get_model_log_path(exp_dir, cfg.NAME) 35 | 36 | train_tsfm, valid_tsfm = get_transform(cfg) 37 | print(valid_tsfm) 38 | 39 | if cfg.DATASET.TYPE == 'multi_label': 40 | train_set = COCO14(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm, 41 | target_transform=cfg.DATASET.TARGETTRANSFORM) 42 | 43 | valid_set = COCO14(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm, 44 | target_transform=cfg.DATASET.TARGETTRANSFORM) 45 | else: 46 | train_set = PedesAttr(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=valid_tsfm, 47 | target_transform=cfg.DATASET.TARGETTRANSFORM) 48 | valid_set = PedesAttr(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm, 49 | target_transform=cfg.DATASET.TARGETTRANSFORM) 50 | 51 | 52 | train_loader = DataLoader( 53 | dataset=train_set, 54 | batch_size=cfg.TRAIN.BATCH_SIZE, 55 | shuffle=False, 56 | num_workers=4, 57 | pin_memory=True, 58 | ) 59 | 60 | valid_loader = DataLoader( 61 | dataset=valid_set, 62 | batch_size=cfg.TRAIN.BATCH_SIZE, 63 | shuffle=False, 64 | num_workers=4, 65 | pin_memory=True, 66 | ) 67 | 68 | print(f'{cfg.DATASET.TRAIN_SPLIT} set: {len(train_loader.dataset)}, ' 69 | f'{cfg.DATASET.TEST_SPLIT} set: {len(valid_loader.dataset)}, ' 70 | f'attr_num : {train_set.attr_num}') 71 | 72 | backbone, c_output = build_backbone(cfg.BACKBONE.TYPE, cfg.BACKBONE.MULTISCALE) 73 | 74 | 75 | classifier = build_classifier(cfg.CLASSIFIER.NAME)( 76 | nattr=train_set.attr_num, 77 | c_in=c_output, 78 | bn=cfg.CLASSIFIER.BN, 79 | pool=cfg.CLASSIFIER.POOLING, 80 | scale =cfg.CLASSIFIER.SCALE 81 | ) 82 | 83 | model = FeatClassifier(backbone, classifier) 84 | 85 | if torch.cuda.is_available(): 86 | model = torch.nn.DataParallel(model).cuda() 87 | 88 | model = get_reload_weight(model_dir, model, pth='xxxxxxxxxxxxxxx') 89 | 90 | model.eval() 91 | preds_probs = [] 92 | gt_list = [] 93 | path_list = [] 94 | 95 | attn_list = [] 96 | with torch.no_grad(): 97 | for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): 98 | imgs = imgs.cuda() 99 | gt_label = gt_label.cuda() 100 | valid_logits, attns = model(imgs, gt_label) 101 | 102 | valid_probs = torch.sigmoid(valid_logits[0]) 103 | 104 | path_list.extend(imgname) 105 | gt_list.append(gt_label.cpu().numpy()) 106 | preds_probs.append(valid_probs.cpu().numpy()) 107 | 108 | 109 | gt_label = np.concatenate(gt_list, axis=0) 110 | preds_probs = np.concatenate(preds_probs, axis=0) 111 | 112 | 113 | 114 | if cfg.METRIC.TYPE == 'pedestrian': 115 | valid_result = get_pedestrian_metrics(gt_label, preds_probs) 116 | valid_map, _ = get_map_metrics(gt_label, preds_probs) 117 | 118 | print(f'Evaluation on test set, \n', 119 | 'ma: {:.4f}, map: {:.4f}, label_f1: {:4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( 120 | valid_result.ma, valid_map, np.mean(valid_result.label_f1), np.mean(valid_result.label_pos_recall), 121 | np.mean(valid_result.label_neg_recall)), 122 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( 123 | valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall, 124 | valid_result.instance_f1) 125 | ) 126 | 127 | with open(os.path.join(model_dir, 'results_test_feat_best.pkl'), 'wb+') as f: 128 | pickle.dump([valid_result, gt_label, preds_probs, attn_list, path_list], f, protocol=4) 129 | 130 | elif cfg.METRIC.TYPE == 'multi_label': 131 | if not cfg.INFER.SAMPLING: 132 | valid_metric = get_multilabel_metrics(gt_label, preds_probs) 133 | 134 | print( 135 | 'Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, ' 136 | 'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP, valid_metric.OR, valid_metric.OF1, 137 | valid_metric.CP, valid_metric.CR, valid_metric.CF1)) 138 | 139 | print(f'{time_str()}') 140 | print('-' * 60) 141 | 142 | def argument_parser(): 143 | parser = argparse.ArgumentParser(description="attribute recognition", 144 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 145 | 146 | parser.add_argument( 147 | "--cfg", help="decide which cfg to use", type=str, 148 | ) 149 | parser.add_argument("--debug", type=str2bool, default="true") 150 | 151 | args = parser.parse_args() 152 | 153 | return args 154 | 155 | 156 | if __name__ == '__main__': 157 | args = argument_parser() 158 | update_config(cfg, args) 159 | 160 | main(cfg, args) 161 | -------------------------------------------------------------------------------- /losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/losses/__init__.py -------------------------------------------------------------------------------- /losses/bceloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.registry import LOSSES 6 | from tools.function import ratio2weight 7 | 8 | 9 | @LOSSES.register("bceloss") 10 | class BCELoss(nn.Module): 11 | 12 | def __init__(self, sample_weight=None, size_sum=True, scale=None, tb_writer=None): 13 | super(BCELoss, self).__init__() 14 | 15 | self.sample_weight = sample_weight 16 | self.size_sum = size_sum 17 | self.hyper = 0.8 18 | self.smoothing = None 19 | 20 | def forward(self, logits, targets): 21 | logits = logits[0] 22 | 23 | if self.smoothing is not None: 24 | targets = (1 - self.smoothing) * targets + self.smoothing * (1 - targets) 25 | 26 | loss_m = F.binary_cross_entropy_with_logits(logits, targets, reduction='none') 27 | 28 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1)) 29 | if self.sample_weight is not None: 30 | sample_weight = ratio2weight(targets_mask, self.sample_weight) 31 | 32 | loss_m = (loss_m * sample_weight.cuda()) 33 | 34 | # losses = loss_m.sum(1).mean() if self.size_sum else loss_m.mean() 35 | loss = loss_m.sum(1).mean() if self.size_sum else loss_m.sum() 36 | 37 | return [loss], [loss_m] -------------------------------------------------------------------------------- /losses/label_smoothing.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | # https://github.com/pytorch/pytorch/issues/7455 4 | 5 | class LabelSmoothLoss(nn.Module): 6 | 7 | def __init__(self, smoothing=0.0): 8 | super(LabelSmoothLoss, self).__init__() 9 | self.smoothing = smoothing 10 | 11 | def forward(self, input, target): 12 | log_prob = F.log_softmax(input, dim=-1) 13 | weight = input.new_ones(input.size()) * self.smoothing / (input.size(-1) - 1.) 14 | weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing)) 15 | loss = (-weight * log_prob).sum(dim=-1).mean() 16 | return loss 17 | 18 | 19 | class LabelSmoothingLoss(nn.Module): 20 | def __init__(self, classes, smoothing=0.0, dim=-1): 21 | super(LabelSmoothingLoss, self).__init__() 22 | self.confidence = 1.0 - smoothing 23 | self.smoothing = smoothing 24 | self.cls = classes 25 | self.dim = dim 26 | 27 | def forward(self, pred, target): 28 | pred = pred.log_softmax(dim=self.dim) 29 | with torch.no_grad(): 30 | # true_dist = pred.data.clone() 31 | true_dist = torch.zeros_like(pred) 32 | true_dist.fill_(self.smoothing / (self.cls - 1)) 33 | true_dist.scatter_(1, target.unsqueeze(1), self.confidence) 34 | return torch.mean(torch.sum(-true_dist * pred, dim=self.dim)) 35 | -------------------------------------------------------------------------------- /losses/scaledbceloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.registry import LOSSES 6 | from tools.function import ratio2weight 7 | 8 | 9 | @LOSSES.register("scaledbceloss") 10 | class ScaledBCELoss(nn.Module): 11 | 12 | def __init__(self, sample_weight=None, size_sum=True, scale=30, tb_writer=None): 13 | super(ScaledBCELoss, self).__init__() 14 | 15 | self.sample_weight = sample_weight 16 | self.size_sum = size_sum 17 | self.hyper = 0.8 18 | self.smoothing = None 19 | self.pos_scale = scale 20 | self.neg_scale = scale 21 | self.tb_writer = tb_writer 22 | 23 | def forward(self, logits, targets): 24 | batch_size = logits.shape[0] 25 | 26 | logits = logits * targets * self.pos_scale + logits * (1 - targets) * self.neg_scale 27 | 28 | if self.smoothing is not None: 29 | targets = (1 - self.smoothing) * targets + self.smoothing * (1 - targets) 30 | 31 | loss_m = F.binary_cross_entropy_with_logits(logits, targets, reduction='none') 32 | 33 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1)) 34 | 35 | if self.sample_weight is not None: 36 | sample_weight = ratio2weight(targets_mask, self.sample_weight) 37 | 38 | loss_m = (loss_m * sample_weight.cuda()) 39 | 40 | loss = loss_m.sum(1).mean() if self.size_sum else loss_m.mean() 41 | 42 | return [loss], [loss_m] -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/metrics/__init__.py -------------------------------------------------------------------------------- /metrics/ml_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from easydict import EasyDict 3 | from sklearn.metrics import average_precision_score 4 | 5 | 6 | def calc_average_precision(gt_label, probs): 7 | ndata, nattr = gt_label.shape 8 | 9 | ap_list = [] 10 | for i in range(nattr): 11 | y_true = gt_label[:, i] 12 | y_score = probs[:, i] 13 | 14 | ap_list.append(average_precision_score(y_true, y_score)) 15 | ap = np.array(ap_list) 16 | mAp = ap.mean() 17 | return mAp, ap 18 | 19 | def get_map_metrics(gt_label, probs): 20 | mAP, ap = calc_average_precision(gt_label, probs) 21 | 22 | return mAP, ap 23 | 24 | # same as calc_average_precision 25 | def get_mAp(gt_label: np.ndarray, probs: np.ndarray): 26 | ndata, nattr = gt_label.shape 27 | rg = np.arange(1, ndata + 1).astype(float) 28 | ap_list = [] 29 | for k in range(nattr): 30 | # sort scores 31 | scores = probs[:, k] 32 | targets = gt_label[:, k] 33 | sorted_idx = np.argsort(scores)[::-1] # Descending 34 | truth = targets[sorted_idx] 35 | 36 | tp = np.cumsum(truth).astype(float) 37 | # compute precision curve 38 | precision = tp / rg 39 | 40 | # compute average precision 41 | ap_list.append(precision[truth == 1].sum() / max(truth.sum(), 1)) 42 | 43 | ap = np.array(ap_list) 44 | mAp = ap.mean() 45 | return mAp, ap 46 | 47 | 48 | 49 | 50 | 51 | def prob2metric(gt_label: np.ndarray, probs: np.ndarray, th): 52 | eps = 1e-6 53 | ndata, nattr = gt_label.shape 54 | 55 | # ------------------ macro, micro --------------- 56 | # gt_label[gt_label == -1] = 0 57 | pred_label = probs > th 58 | gt_pos = gt_label.sum(0) 59 | pred_pos = pred_label.sum(0) 60 | tp = (gt_label * pred_label).sum(0) 61 | 62 | OP = tp.sum() / pred_pos.sum() 63 | OR = tp.sum() / gt_pos.sum() 64 | OF1 = (2 * OP * OR) / (OP + OR) 65 | 66 | pred_pos[pred_pos == 0] = 1 67 | 68 | CP_all = tp / pred_pos 69 | CR_all = tp / gt_pos 70 | 71 | CP_all_t = tp / pred_pos 72 | CP_all_t[CP_all_t == 0] = 1 73 | CR_all_t = tp / gt_pos 74 | CR_all_t[CR_all_t == 0] = 1 75 | CF1_all = (2 * CP_all * CR_all) / (CP_all_t + CR_all_t) 76 | 77 | CF1_mean = CF1_all.mean() 78 | 79 | CP = np.mean(tp / pred_pos) 80 | CR = np.mean(tp / gt_pos) 81 | CF1 = (2 * CP * CR) / (CP + CR) 82 | 83 | gt_neg = ndata - gt_pos 84 | tn = ((1 - gt_label) * (1 - pred_label)).sum(0) 85 | 86 | label_pos_recall = 1.0 * tp / (gt_pos + eps) # true positive 87 | label_neg_recall = 1.0 * tn / (gt_neg + eps) # true negative 88 | # mean accuracy 89 | label_ma = (label_pos_recall + label_neg_recall) / 2 90 | 91 | ma = label_ma.mean() 92 | 93 | return OP, OR, OF1, CP, CR, CF1, ma, CP_all, CR_all, CF1_all, CF1_mean 94 | 95 | 96 | def get_multilabel_metrics(gt_label, prob_pred, th=0.5): 97 | 98 | result = EasyDict() 99 | 100 | 101 | mAP, ap = calc_average_precision(gt_label, prob_pred) 102 | op, orecall, of1, cp, cr, cf1, ma, cp_all, cr_all, cf1_all, CF1_mean = prob2metric(gt_label, prob_pred, th) 103 | result.map = mAP * 100. 104 | 105 | # to json serializable 106 | result.CP_all = list(cp_all.astype(np.float64)) 107 | result.CR_all = list(cr_all.astype(np.float64)) 108 | result.CF1_all = list(cf1_all.astype(np.float64)) 109 | result.CF1_mean = CF1_mean 110 | 111 | # simplified way 112 | # mAP, ap = calc_average_precision(gt_label, probs) 113 | # pred_label = probs > 0.5 114 | # CP, CR, _, _ = precision_recall_fscore_support(gt_label, pred_label, average='macro') 115 | # CF1 = 2 * CP * CR / (CP + CR) 116 | # OP, OR, OF1, _ = precision_recall_fscore_support(gt_label, pred_label, average='micro') 117 | 118 | result.OP = op * 100. 119 | result.OR = orecall * 100. 120 | result.OF1 = of1 * 100. 121 | result.CP = cp * 100. 122 | result.CR = cr * 100. 123 | result.CF1 = cf1 * 100. 124 | 125 | return result 126 | 127 | -------------------------------------------------------------------------------- /metrics/pedestrian_metrics.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from easydict import EasyDict 5 | import torch 6 | 7 | 8 | def get_pedestrian_metrics(gt_label, preds_probs, threshold=0.5, index=None, cfg=None): 9 | """ 10 | index: evaluated label index 11 | """ 12 | pred_label = preds_probs > threshold 13 | 14 | eps = 1e-20 15 | result = EasyDict() 16 | 17 | if index is not None: 18 | pred_label = pred_label[:, index] 19 | gt_label = gt_label[:, index] 20 | 21 | ############################### 22 | # label metrics 23 | # TP + FN 24 | gt_pos = np.sum((gt_label == 1), axis=0).astype(float) 25 | # TN + FP 26 | gt_neg = np.sum((gt_label == 0), axis=0).astype(float) 27 | # TP 28 | true_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=0).astype(float) 29 | # TN 30 | true_neg = np.sum((gt_label == 0) * (pred_label == 0), axis=0).astype(float) 31 | # FP 32 | false_pos = np.sum(((gt_label == 0) * (pred_label == 1)), axis=0).astype(float) 33 | # FN 34 | false_neg = np.sum(((gt_label == 1) * (pred_label == 0)), axis=0).astype(float) 35 | 36 | label_pos_recall = 1.0 * true_pos / (gt_pos + eps) # true positive 37 | label_neg_recall = 1.0 * true_neg / (gt_neg + eps) # true negative 38 | # mean accuracy 39 | label_ma = (label_pos_recall + label_neg_recall) / 2 40 | 41 | result.label_pos_recall = label_pos_recall 42 | result.label_neg_recall = label_neg_recall 43 | result.label_prec = true_pos / (true_pos + false_pos + eps) 44 | result.label_acc = true_pos / (true_pos + false_pos + false_neg + eps) 45 | result.label_f1 = 2 * result.label_prec * result.label_pos_recall / ( 46 | result.label_prec + result.label_pos_recall + eps) 47 | 48 | result.label_ma = label_ma 49 | result.ma = np.mean(label_ma) 50 | 51 | ################ 52 | # instance metrics 53 | gt_pos = np.sum((gt_label == 1), axis=1).astype(float) 54 | true_pos = np.sum((pred_label == 1), axis=1).astype(float) 55 | # true positive 56 | intersect_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=1).astype(float) 57 | # IOU 58 | union_pos = np.sum(((gt_label == 1) + (pred_label == 1)), axis=1).astype(float) 59 | 60 | instance_acc = intersect_pos / (union_pos + eps) 61 | instance_prec = intersect_pos / (true_pos + eps) 62 | instance_recall = intersect_pos / (gt_pos + eps) 63 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps) 64 | 65 | instance_acc = np.mean(instance_acc) 66 | instance_prec = np.mean(instance_prec) 67 | instance_recall = np.mean(instance_recall) 68 | # instance_f1 = np.mean(instance_f1) 69 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps) 70 | 71 | result.instance_acc = instance_acc 72 | result.instance_prec = instance_prec 73 | result.instance_recall = instance_recall 74 | result.instance_f1 = instance_f1 75 | 76 | result.error_num, result.fn_num, result.fp_num = false_pos + false_neg, false_neg, false_pos 77 | 78 | return result 79 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/__init__.py -------------------------------------------------------------------------------- /models/backbone/checkpoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/checkpoints/__init__.py -------------------------------------------------------------------------------- /models/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.hub import load_state_dict_from_url 3 | 4 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 5 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d'] 6 | 7 | from models.registry import BACKBONE 8 | 9 | model_urls = { 10 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 11 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 12 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 13 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 14 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 15 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', 16 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', 17 | } 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 21 | """3x3 convolution with padding""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=dilation, groups=groups, bias=False, dilation=dilation) 24 | 25 | 26 | def conv1x1(in_planes, out_planes, stride=1): 27 | """1x1 convolution""" 28 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 35 | base_width=64, dilation=1, norm_layer=None): 36 | super(BasicBlock, self).__init__() 37 | if norm_layer is None: 38 | norm_layer = nn.BatchNorm2d 39 | if groups != 1 or base_width != 64: 40 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 41 | if dilation > 1: 42 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 43 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 44 | self.conv1 = conv3x3(inplanes, planes, stride) 45 | self.bn1 = norm_layer(planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv2 = conv3x3(planes, planes) 48 | self.bn2 = norm_layer(planes) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | identity = x 54 | 55 | out = self.conv1(x) 56 | out = self.bn1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.bn2(out) 61 | 62 | if self.downsample is not None: 63 | identity = self.downsample(x) 64 | 65 | out += identity 66 | out = self.relu(out) 67 | 68 | return out 69 | 70 | 71 | class Bottleneck(nn.Module): 72 | expansion = 4 73 | 74 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 75 | base_width=64, dilation=1, norm_layer=None): 76 | super(Bottleneck, self).__init__() 77 | if norm_layer is None: 78 | norm_layer = nn.BatchNorm2d 79 | width = int(planes * (base_width / 64.)) * groups 80 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 81 | self.conv1 = conv1x1(inplanes, width) 82 | self.bn1 = norm_layer(width) 83 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 84 | self.bn2 = norm_layer(width) 85 | self.conv3 = conv1x1(width, planes * self.expansion) 86 | self.bn3 = norm_layer(planes * self.expansion) 87 | self.relu = nn.ReLU(inplace=True) 88 | self.downsample = downsample 89 | self.stride = stride 90 | 91 | def forward(self, x): 92 | identity = x 93 | 94 | out = self.conv1(x) 95 | out = self.bn1(out) 96 | out = self.relu(out) 97 | 98 | out = self.conv2(out) 99 | out = self.bn2(out) 100 | out = self.relu(out) 101 | 102 | out = self.conv3(out) 103 | out = self.bn3(out) 104 | 105 | if self.downsample is not None: 106 | identity = self.downsample(x) 107 | 108 | out += identity 109 | out = self.relu(out) 110 | 111 | return out 112 | 113 | 114 | class ResNet(nn.Module): 115 | 116 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 117 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 118 | norm_layer=None, multi_scale=False): 119 | super(ResNet, self).__init__() 120 | 121 | self.multi_scale = multi_scale 122 | if norm_layer is None: 123 | norm_layer = nn.BatchNorm2d 124 | self._norm_layer = norm_layer 125 | 126 | self.inplanes = 64 127 | self.dilation = 1 128 | if replace_stride_with_dilation is None: 129 | # each element in the tuple indicates if we should replace 130 | # the 2x2 stride with a dilated convolution instead 131 | 132 | # ----------------------------- 133 | # modified 134 | replace_stride_with_dilation = [False, False, False] 135 | # ----------------------------- 136 | 137 | if len(replace_stride_with_dilation) != 3: 138 | raise ValueError("replace_stride_with_dilation should be None " 139 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 140 | self.groups = groups 141 | self.base_width = width_per_group 142 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 143 | bias=False) 144 | self.bn1 = norm_layer(self.inplanes) 145 | self.relu = nn.ReLU(inplace=True) 146 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 147 | self.layer1 = self._make_layer(block, 64, layers[0]) 148 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 149 | dilate=replace_stride_with_dilation[0]) 150 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 151 | dilate=replace_stride_with_dilation[1]) 152 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 153 | dilate=replace_stride_with_dilation[2]) 154 | 155 | # self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 156 | # self.fc = nn.Linear(512 * block.expansion, num_classes) 157 | 158 | for m in self.modules(): 159 | if isinstance(m, nn.Conv2d): 160 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 161 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 162 | nn.init.constant_(m.weight, 1) 163 | nn.init.constant_(m.bias, 0) 164 | 165 | # Zero-initialize the last BN in each residual branch, 166 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 167 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 168 | if zero_init_residual: 169 | for m in self.modules(): 170 | if isinstance(m, Bottleneck): 171 | nn.init.constant_(m.bn3.weight, 0) 172 | elif isinstance(m, BasicBlock): 173 | nn.init.constant_(m.bn2.weight, 0) 174 | 175 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 176 | norm_layer = self._norm_layer 177 | downsample = None 178 | previous_dilation = self.dilation 179 | if dilate: 180 | self.dilation *= stride 181 | stride = 1 182 | if stride != 1 or self.inplanes != planes * block.expansion: 183 | downsample = nn.Sequential( 184 | conv1x1(self.inplanes, planes * block.expansion, stride), 185 | norm_layer(planes * block.expansion), 186 | ) 187 | 188 | layers = [] 189 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 190 | self.base_width, previous_dilation, norm_layer)) 191 | self.inplanes = planes * block.expansion 192 | for _ in range(1, blocks): 193 | layers.append(block(self.inplanes, planes, groups=self.groups, 194 | base_width=self.base_width, dilation=self.dilation, 195 | norm_layer=norm_layer)) 196 | 197 | return nn.Sequential(*layers) 198 | 199 | def forward(self, x): 200 | x = self.conv1(x) 201 | x = self.bn1(x) 202 | x = self.relu(x) 203 | x = self.maxpool(x) 204 | 205 | x1 = self.layer1(x) 206 | x2 = self.layer2(x1) 207 | x3 = self.layer3(x2) 208 | x4 = self.layer4(x3) 209 | 210 | if self.multi_scale: 211 | return [x2, x3, x4] 212 | else: 213 | return x4 214 | 215 | 216 | def remove_fc(state_dict): 217 | """ Remove the fc layer parameter from state_dict. """ 218 | return {key: value for key, value in state_dict.items() if not key.startswith('fc.')} 219 | 220 | 221 | 222 | def _resnet(arch, block, layers, pretrained, progress, **kwargs): 223 | model = ResNet(block, layers, **kwargs) 224 | if pretrained: 225 | state_dict = load_state_dict_from_url(model_urls[arch], 226 | progress=progress) 227 | model.load_state_dict(remove_fc(state_dict), strict=True) 228 | return model 229 | 230 | @BACKBONE.register("resnet18") 231 | def resnet18(pretrained=True, progress=True, **kwargs): 232 | """Constructs a ResNet-18 model. 233 | 234 | Args: 235 | pretrained (bool): If True, returns a model pre-trained on ImageNet 236 | progress (bool): If True, displays a progress bar of the download to stderr 237 | """ 238 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, 239 | **kwargs) 240 | 241 | @BACKBONE.register("resnet34") 242 | def resnet34(pretrained=True, progress=True, **kwargs): 243 | """Constructs a ResNet-34 model. 244 | 245 | Args: 246 | pretrained (bool): If True, returns a model pre-trained on ImageNet 247 | progress (bool): If True, displays a progress bar of the download to stderr 248 | """ 249 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, 250 | **kwargs) 251 | 252 | @BACKBONE.register("resnet50") 253 | def resnet50(pretrained=True, progress=True, **kwargs): 254 | """Constructs a ResNet-50 model. 255 | 256 | Args: 257 | pretrained (bool): If True, returns a model pre-trained on ImageNet 258 | progress (bool): If True, displays a progress bar of the download to stderr 259 | """ 260 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 261 | **kwargs) 262 | 263 | @BACKBONE.register("resnet101") 264 | def resnet101(pretrained=True, progress=True, **kwargs): 265 | """Constructs a ResNet-101 model. 266 | 267 | Args: 268 | pretrained (bool): If True, returns a model pre-trained on ImageNet 269 | progress (bool): If True, displays a progress bar of the download to stderr 270 | """ 271 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, 272 | **kwargs) 273 | 274 | @BACKBONE.register("resnet152") 275 | def resnet152(pretrained=True, progress=True, **kwargs): 276 | """Constructs a ResNet-152 model. 277 | 278 | Args: 279 | pretrained (bool): If True, returns a model pre-trained on ImageNet 280 | progress (bool): If True, displays a progress bar of the download to stderr 281 | """ 282 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, 283 | **kwargs) 284 | 285 | @BACKBONE.register("resnext50_32x4d") 286 | def resnext50_32x4d(pretrained=True, progress=True, **kwargs): 287 | """Constructs a ResNeXt-50 32x4d model. 288 | 289 | Args: 290 | pretrained (bool): If True, returns a model pre-trained on ImageNet 291 | progress (bool): If True, displays a progress bar of the download to stderr 292 | """ 293 | kwargs['groups'] = 32 294 | kwargs['width_per_group'] = 4 295 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], 296 | pretrained, progress, **kwargs) 297 | 298 | @BACKBONE.register("resnext101_32x8d") 299 | def resnext101_32x8d(pretrained=True, progress=True, **kwargs): 300 | """Constructs a ResNeXt-101 32x8d model. 301 | 302 | Args: 303 | pretrained (bool): If True, returns a model pre-trained on ImageNet 304 | progress (bool): If True, displays a progress bar of the download to stderr 305 | """ 306 | kwargs['groups'] = 32 307 | kwargs['width_per_group'] = 8 308 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], 309 | pretrained, progress, **kwargs) 310 | 311 | -------------------------------------------------------------------------------- /models/backbone/resnet_ibn.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | class IBN(nn.Module): 9 | r"""Instance-Batch Normalization layer from 10 | `"Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net" 11 | ` 12 | Args: 13 | planes (int): Number of channels for the input tensor 14 | ratio (float): Ratio of instance normalization in the IBN layer 15 | """ 16 | 17 | def __init__(self, planes, ratio=0.5): 18 | super(IBN, self).__init__() 19 | self.half = int(planes * ratio) 20 | self.IN = nn.InstanceNorm2d(self.half, affine=True) 21 | self.BN = nn.BatchNorm2d(planes - self.half) 22 | 23 | def forward(self, x): 24 | split = torch.split(x, self.half, 1) 25 | out1 = self.IN(split[0].contiguous()) 26 | out2 = self.BN(split[1].contiguous()) 27 | out = torch.cat((out1, out2), 1) 28 | return out 29 | 30 | 31 | __all__ = ['ResNet_IBN', 'resnet18_ibn_a', 'resnet34_ibn_a', 'resnet50_ibn_a', 'resnet101_ibn_a', 'resnet152_ibn_a', 32 | 'resnet18_ibn_b', 'resnet34_ibn_b', 'resnet50_ibn_b', 'resnet101_ibn_b', 'resnet152_ibn_b'] 33 | 34 | model_urls = { 35 | 'resnet18_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_a-2f571257.pth', 36 | 'resnet34_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_a-94bc1577.pth', 37 | 'resnet50_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_a-d9d0bb7b.pth', 38 | 'resnet101_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_a-59ea0ac6.pth', 39 | 'resnet18_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_b-bc2f3c11.pth', 40 | 'resnet34_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_b-04134c37.pth', 41 | 'resnet50_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_b-9ca61e85.pth', 42 | 'resnet101_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_b-c55f6dba.pth', 43 | } 44 | 45 | 46 | class BasicBlock_IBN(nn.Module): 47 | expansion = 1 48 | 49 | def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None): 50 | super(BasicBlock_IBN, self).__init__() 51 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, 52 | padding=1, bias=False) 53 | if ibn == 'a': 54 | self.bn1 = IBN(planes) 55 | else: 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.IN = nn.InstanceNorm2d(planes, affine=True) if ibn == 'b' else None 61 | self.downsample = downsample 62 | self.stride = stride 63 | 64 | def forward(self, x): 65 | residual = x 66 | 67 | out = self.conv1(x) 68 | out = self.bn1(out) 69 | out = self.relu(out) 70 | 71 | out = self.conv2(out) 72 | out = self.bn2(out) 73 | 74 | if self.downsample is not None: 75 | residual = self.downsample(x) 76 | 77 | out += residual 78 | if self.IN is not None: 79 | out = self.IN(out) 80 | out = self.relu(out) 81 | 82 | return out 83 | 84 | 85 | class Bottleneck_IBN(nn.Module): 86 | expansion = 4 87 | 88 | def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None): 89 | super(Bottleneck_IBN, self).__init__() 90 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 91 | if ibn == 'a': 92 | self.bn1 = IBN(planes) 93 | else: 94 | self.bn1 = nn.BatchNorm2d(planes) 95 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 96 | padding=1, bias=False) 97 | self.bn2 = nn.BatchNorm2d(planes) 98 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) 99 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 100 | self.IN = nn.InstanceNorm2d(planes * 4, affine=True) if ibn == 'b' else None 101 | self.relu = nn.ReLU(inplace=True) 102 | self.downsample = downsample 103 | self.stride = stride 104 | 105 | def forward(self, x): 106 | residual = x 107 | 108 | out = self.conv1(x) 109 | out = self.bn1(out) 110 | out = self.relu(out) 111 | 112 | out = self.conv2(out) 113 | out = self.bn2(out) 114 | out = self.relu(out) 115 | 116 | out = self.conv3(out) 117 | out = self.bn3(out) 118 | 119 | if self.downsample is not None: 120 | residual = self.downsample(x) 121 | 122 | out += residual 123 | if self.IN is not None: 124 | out = self.IN(out) 125 | out = self.relu(out) 126 | 127 | return out 128 | 129 | 130 | class ResNet_IBN(nn.Module): 131 | 132 | def __init__(self, 133 | block, 134 | layers, 135 | ibn_cfg=('a', 'a', 'a', None), 136 | num_classes=1000): 137 | self.inplanes = 64 138 | super(ResNet_IBN, self).__init__() 139 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 140 | bias=False) 141 | if ibn_cfg[0] == 'b': 142 | self.bn1 = nn.InstanceNorm2d(64, affine=True) 143 | else: 144 | self.bn1 = nn.BatchNorm2d(64) 145 | self.relu = nn.ReLU(inplace=True) 146 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 147 | self.layer1 = self._make_layer(block, 64, layers[0], ibn=ibn_cfg[0]) 148 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, ibn=ibn_cfg[1]) 149 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, ibn=ibn_cfg[2]) 150 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, ibn=ibn_cfg[3]) 151 | self.avgpool = nn.AvgPool2d(7) 152 | self.fc = nn.Linear(512 * block.expansion, num_classes) 153 | 154 | for m in self.modules(): 155 | if isinstance(m, nn.Conv2d): 156 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 157 | m.weight.data.normal_(0, math.sqrt(2. / n)) 158 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d): 159 | m.weight.data.fill_(1) 160 | m.bias.data.zero_() 161 | 162 | def _make_layer(self, block, planes, blocks, stride=1, ibn=None): 163 | downsample = None 164 | if stride != 1 or self.inplanes != planes * block.expansion: 165 | downsample = nn.Sequential( 166 | nn.Conv2d(self.inplanes, planes * block.expansion, 167 | kernel_size=1, stride=stride, bias=False), 168 | nn.BatchNorm2d(planes * block.expansion), 169 | ) 170 | 171 | layers = [] 172 | layers.append(block(self.inplanes, planes, 173 | None if ibn == 'b' else ibn, 174 | stride, downsample)) 175 | self.inplanes = planes * block.expansion 176 | for i in range(1, blocks): 177 | layers.append(block(self.inplanes, planes, 178 | None if (ibn == 'b' and i < blocks - 1) else ibn)) 179 | 180 | return nn.Sequential(*layers) 181 | 182 | def forward(self, x): 183 | x = self.conv1(x) 184 | x = self.bn1(x) 185 | x = self.relu(x) 186 | x = self.maxpool(x) 187 | 188 | x = self.layer1(x) 189 | x = self.layer2(x) 190 | x = self.layer3(x) 191 | x = self.layer4(x) 192 | 193 | # x = self.avgpool(x) 194 | # x = x.view(x.size(0), -1) 195 | # x = self.fc(x) 196 | 197 | return x 198 | 199 | 200 | def resnet18_ibn_a(pretrained=False, **kwargs): 201 | """Constructs a ResNet-18-IBN-a model. 202 | 203 | Args: 204 | pretrained (bool): If True, returns a model pre-trained on ImageNet 205 | """ 206 | model = ResNet_IBN(block=BasicBlock_IBN, 207 | layers=[2, 2, 2, 2], 208 | ibn_cfg=('a', 'a', 'a', None), 209 | **kwargs) 210 | if pretrained: 211 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet18_ibn_a'])) 212 | return model 213 | 214 | 215 | def resnet34_ibn_a(pretrained=False, **kwargs): 216 | """Constructs a ResNet-34-IBN-a model. 217 | 218 | Args: 219 | pretrained (bool): If True, returns a model pre-trained on ImageNet 220 | """ 221 | model = ResNet_IBN(block=BasicBlock_IBN, 222 | layers=[3, 4, 6, 3], 223 | ibn_cfg=('a', 'a', 'a', None), 224 | **kwargs) 225 | if pretrained: 226 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet34_ibn_a'])) 227 | return model 228 | 229 | 230 | def resnet50_ibn_a(pretrained=True, **kwargs): 231 | """Constructs a ResNet-50-IBN-a model. 232 | 233 | Args: 234 | pretrained (bool): If True, returns a model pre-trained on ImageNet 235 | """ 236 | model = ResNet_IBN(block=Bottleneck_IBN, 237 | layers=[3, 4, 6, 3], 238 | ibn_cfg=('a', 'a', 'a', None), 239 | **kwargs) 240 | if pretrained: 241 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet50_ibn_a'])) 242 | return model 243 | 244 | 245 | def resnet101_ibn_a(pretrained=False, **kwargs): 246 | """Constructs a ResNet-101-IBN-a model. 247 | 248 | Args: 249 | pretrained (bool): If True, returns a model pre-trained on ImageNet 250 | """ 251 | model = ResNet_IBN(block=Bottleneck_IBN, 252 | layers=[3, 4, 23, 3], 253 | ibn_cfg=('a', 'a', 'a', None), 254 | **kwargs) 255 | if pretrained: 256 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet101_ibn_a'])) 257 | return model 258 | 259 | 260 | def resnet152_ibn_a(pretrained=False, **kwargs): 261 | """Constructs a ResNet-152-IBN-a model. 262 | 263 | Args: 264 | pretrained (bool): If True, returns a model pre-trained on ImageNet 265 | """ 266 | model = ResNet_IBN(block=Bottleneck_IBN, 267 | layers=[3, 8, 36, 3], 268 | ibn_cfg=('a', 'a', 'a', None), 269 | **kwargs) 270 | if pretrained: 271 | warnings.warn("Pretrained model not available for ResNet-152-IBN-a!") 272 | return model 273 | 274 | 275 | def resnet18_ibn_b(pretrained=False, **kwargs): 276 | """Constructs a ResNet-18-IBN-b model. 277 | 278 | Args: 279 | pretrained (bool): If True, returns a model pre-trained on ImageNet 280 | """ 281 | model = ResNet_IBN(block=BasicBlock_IBN, 282 | layers=[2, 2, 2, 2], 283 | ibn_cfg=('b', 'b', None, None), 284 | **kwargs) 285 | if pretrained: 286 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet18_ibn_b'])) 287 | return model 288 | 289 | 290 | def resnet34_ibn_b(pretrained=False, **kwargs): 291 | """Constructs a ResNet-34-IBN-b model. 292 | 293 | Args: 294 | pretrained (bool): If True, returns a model pre-trained on ImageNet 295 | """ 296 | model = ResNet_IBN(block=BasicBlock_IBN, 297 | layers=[3, 4, 6, 3], 298 | ibn_cfg=('b', 'b', None, None), 299 | **kwargs) 300 | if pretrained: 301 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet34_ibn_b'])) 302 | return model 303 | 304 | 305 | def resnet50_ibn_b(pretrained=True, **kwargs): 306 | """Constructs a ResNet-50-IBN-b model. 307 | 308 | Args: 309 | pretrained (bool): If True, returns a model pre-trained on ImageNet 310 | """ 311 | model = ResNet_IBN(block=Bottleneck_IBN, 312 | layers=[3, 4, 6, 3], 313 | ibn_cfg=('b', 'b', None, None), 314 | **kwargs) 315 | if pretrained: 316 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet50_ibn_b'])) 317 | return model 318 | 319 | 320 | def resnet101_ibn_b(pretrained=False, **kwargs): 321 | """Constructs a ResNet-101-IBN-b model. 322 | 323 | Args: 324 | pretrained (bool): If True, returns a model pre-trained on ImageNet 325 | """ 326 | model = ResNet_IBN(block=Bottleneck_IBN, 327 | layers=[3, 4, 23, 3], 328 | ibn_cfg=('b', 'b', None, None), 329 | **kwargs) 330 | if pretrained: 331 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet101_ibn_b'])) 332 | return model 333 | 334 | 335 | def resnet152_ibn_b(pretrained=False, **kwargs): 336 | """Constructs a ResNet-152-IBN-b model. 337 | 338 | Args: 339 | pretrained (bool): If True, returns a model pre-trained on ImageNet 340 | """ 341 | model = ResNet_IBN(block=Bottleneck_IBN, 342 | layers=[3, 8, 36, 3], 343 | ibn_cfg=('b', 'b', None, None), 344 | **kwargs) 345 | if pretrained: 346 | warnings.warn("Pretrained model not available for ResNet-152-IBN-b!") 347 | return model 348 | -------------------------------------------------------------------------------- /models/backbone/tresnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .tresnet import TResnetM, TResnetL, TResnetXL 2 | -------------------------------------------------------------------------------- /models/backbone/tresnet/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/tresnet/layers/__init__.py -------------------------------------------------------------------------------- /models/backbone/tresnet/layers/anti_aliasing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.parallel 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class AntiAliasDownsampleLayer(nn.Module): 9 | def __init__(self, remove_model_jit: bool = False, filt_size: int = 3, stride: int = 2, 10 | channels: int = 0): 11 | super(AntiAliasDownsampleLayer, self).__init__() 12 | if not remove_model_jit: 13 | self.op = DownsampleJIT(filt_size, stride, channels) 14 | else: 15 | self.op = Downsample(filt_size, stride, channels) 16 | 17 | def forward(self, x): 18 | return self.op(x) 19 | 20 | 21 | @torch.jit.script 22 | class DownsampleJIT(object): 23 | def __init__(self, filt_size: int = 3, stride: int = 2, channels: int = 0): 24 | self.stride = stride 25 | self.filt_size = filt_size 26 | self.channels = channels 27 | 28 | assert self.filt_size == 3 29 | assert stride == 2 30 | a = torch.tensor([1., 2., 1.]) 31 | 32 | filt = (a[:, None] * a[None, :]).clone().detach() 33 | filt = filt / torch.sum(filt) 34 | self.filt = filt[None, None, :, :].repeat((self.channels, 1, 1, 1)).cuda().half() 35 | 36 | def __call__(self, input: torch.Tensor): 37 | if input.dtype != self.filt.dtype: 38 | self.filt = self.filt.float() 39 | input_pad = F.pad(input, (1, 1, 1, 1), 'reflect') 40 | return F.conv2d(input_pad, self.filt, stride=2, padding=0, groups=input.shape[1]) 41 | 42 | 43 | class Downsample(nn.Module): 44 | def __init__(self, filt_size=3, stride=2, channels=None): 45 | super(Downsample, self).__init__() 46 | self.filt_size = filt_size 47 | self.stride = stride 48 | self.channels = channels 49 | 50 | 51 | assert self.filt_size == 3 52 | a = torch.tensor([1., 2., 1.]) 53 | 54 | filt = (a[:, None] * a[None, :]).clone().detach() 55 | filt = filt / torch.sum(filt) 56 | self.filt = filt[None, None, :, :].repeat((self.channels, 1, 1, 1)) 57 | 58 | def forward(self, input): 59 | input_pad = F.pad(input, (1, 1, 1, 1), 'reflect') 60 | return F.conv2d(input_pad, self.filt, stride=self.stride, padding=0, groups=input.shape[1]) 61 | -------------------------------------------------------------------------------- /models/backbone/tresnet/layers/avg_pool.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class FastAvgPool2d(nn.Module): 5 | def __init__(self, flatten=False): 6 | super(FastAvgPool2d, self).__init__() 7 | self.flatten = flatten 8 | 9 | def forward(self, x): 10 | if self.flatten: 11 | in_size = x.size() 12 | return x.view((in_size[0], in_size[1], -1)).mean(dim=2) 13 | else: 14 | return x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1) 15 | -------------------------------------------------------------------------------- /models/backbone/tresnet/layers/general_layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.backbone.tresnet.layers.avg_pool import FastAvgPool2d 6 | 7 | 8 | class Flatten(nn.Module): 9 | def forward(self, x): 10 | return x.view(x.size(0), -1) 11 | 12 | 13 | class DepthToSpace(nn.Module): 14 | 15 | def __init__(self, block_size): 16 | super().__init__() 17 | self.bs = block_size 18 | 19 | def forward(self, x): 20 | N, C, H, W = x.size() 21 | x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W) 22 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs) 23 | x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs) # (N, C//bs^2, H * bs, W * bs) 24 | return x 25 | 26 | 27 | class SpaceToDepthModule(nn.Module): 28 | def __init__(self, remove_model_jit=False): 29 | super().__init__() 30 | if not remove_model_jit: 31 | self.op = SpaceToDepthJit() 32 | else: 33 | self.op = SpaceToDepth() 34 | 35 | def forward(self, x): 36 | return self.op(x) 37 | 38 | 39 | class SpaceToDepth(nn.Module): 40 | def __init__(self, block_size=4): 41 | super().__init__() 42 | assert block_size == 4 43 | self.bs = block_size 44 | 45 | def forward(self, x): 46 | N, C, H, W = x.size() 47 | x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs) # (N, C, H//bs, bs, W//bs, bs) 48 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 49 | x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs) # (N, C*bs^2, H//bs, W//bs) 50 | return x 51 | 52 | 53 | @torch.jit.script 54 | class SpaceToDepthJit(object): 55 | def __call__(self, x: torch.Tensor): 56 | # assuming hard-coded that block_size==4 for acceleration 57 | N, C, H, W = x.size() 58 | x = x.view(N, C, H // 4, 4, W // 4, 4) # (N, C, H//bs, bs, W//bs, bs) 59 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 60 | x = x.view(N, C * 16, H // 4, W // 4) # (N, C*bs^2, H//bs, W//bs) 61 | return x 62 | 63 | 64 | class hard_sigmoid(nn.Module): 65 | def __init__(self, inplace=True): 66 | super(hard_sigmoid, self).__init__() 67 | self.inplace = inplace 68 | 69 | def forward(self, x): 70 | if self.inplace: 71 | return x.add_(3.).clamp_(0., 6.).div_(6.) 72 | else: 73 | return F.relu6(x + 3.) / 6. 74 | 75 | 76 | class SEModule(nn.Module): 77 | 78 | def __init__(self, channels, reduction_channels, inplace=True): 79 | super(SEModule, self).__init__() 80 | self.avg_pool = FastAvgPool2d() 81 | self.fc1 = nn.Conv2d(channels, reduction_channels, kernel_size=1, padding=0, bias=True) 82 | self.relu = nn.ReLU(inplace=inplace) 83 | self.fc2 = nn.Conv2d(reduction_channels, channels, kernel_size=1, padding=0, bias=True) 84 | # self.activation = hard_sigmoid(inplace=inplace) 85 | self.activation = nn.Sigmoid() 86 | 87 | def forward(self, x): 88 | x_se = self.avg_pool(x) 89 | x_se2 = self.fc1(x_se) 90 | x_se2 = self.relu(x_se2) 91 | x_se = self.fc2(x_se2) 92 | x_se = self.activation(x_se) 93 | return x * x_se 94 | -------------------------------------------------------------------------------- /models/backbone/tresnet/tresnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Module as Module 4 | from collections import OrderedDict 5 | # from src.models.tresnet.layers.anti_aliasing import AntiAliasDownsampleLayer 6 | # from .layers.avg_pool import FastAvgPool2d 7 | # from .layers.general_layers import SEModule, SpaceToDepthModule 8 | from inplace_abn import InPlaceABN 9 | 10 | from models.backbone.tresnet.layers.anti_aliasing import AntiAliasDownsampleLayer 11 | from models.backbone.tresnet.layers.avg_pool import FastAvgPool2d 12 | from models.backbone.tresnet.layers.general_layers import SEModule, SpaceToDepthModule 13 | from models.registry import BACKBONE 14 | 15 | 16 | class bottleneck_head(nn.Module): 17 | def __init__(self, num_features, num_classes, bottleneck_features=200): 18 | super(bottleneck_head, self).__init__() 19 | self.embedding_generator = nn.ModuleList() 20 | self.embedding_generator.append(nn.Linear(num_features, bottleneck_features)) 21 | self.embedding_generator = nn.Sequential(*self.embedding_generator) 22 | self.FC = nn.Linear(bottleneck_features, num_classes) 23 | 24 | def forward(self, x): 25 | self.embedding = self.embedding_generator(x) 26 | logits = self.FC(self.embedding) 27 | return logits 28 | 29 | 30 | def conv2d(ni, nf, stride): 31 | return nn.Sequential( 32 | nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False), 33 | nn.BatchNorm2d(nf), 34 | nn.ReLU(inplace=True) 35 | ) 36 | 37 | 38 | def conv2d_ABN(ni, nf, stride, activation="leaky_relu", kernel_size=3, activation_param=1e-2, groups=1): 39 | return nn.Sequential( 40 | nn.Conv2d(ni, nf, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=groups, 41 | bias=False), 42 | InPlaceABN(num_features=nf, activation=activation, activation_param=activation_param) 43 | ) 44 | 45 | 46 | class BasicBlock(Module): 47 | expansion = 1 48 | 49 | def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None): 50 | super(BasicBlock, self).__init__() 51 | if stride == 1: 52 | self.conv1 = conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3) 53 | else: 54 | if anti_alias_layer is None: 55 | self.conv1 = conv2d_ABN(inplanes, planes, stride=2, activation_param=1e-3) 56 | else: 57 | self.conv1 = nn.Sequential(conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3), 58 | anti_alias_layer(channels=planes, filt_size=3, stride=2)) 59 | 60 | self.conv2 = conv2d_ABN(planes, planes, stride=1, activation="identity") 61 | self.relu = nn.ReLU(inplace=True) 62 | self.downsample = downsample 63 | self.stride = stride 64 | reduce_layer_planes = max(planes * self.expansion // 4, 64) 65 | self.se = SEModule(planes * self.expansion, reduce_layer_planes) if use_se else None 66 | 67 | def forward(self, x): 68 | if self.downsample is not None: 69 | residual = self.downsample(x) 70 | else: 71 | residual = x 72 | 73 | out = self.conv1(x) 74 | out = self.conv2(out) 75 | 76 | if self.se is not None: out = self.se(out) 77 | 78 | out += residual 79 | 80 | out = self.relu(out) 81 | 82 | return out 83 | 84 | 85 | class Bottleneck(Module): 86 | expansion = 4 87 | 88 | def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None): 89 | super(Bottleneck, self).__init__() 90 | self.conv1 = conv2d_ABN(inplanes, planes, kernel_size=1, stride=1, activation="leaky_relu", 91 | activation_param=1e-3) 92 | if stride == 1: 93 | self.conv2 = conv2d_ABN(planes, planes, kernel_size=3, stride=1, activation="leaky_relu", 94 | activation_param=1e-3) 95 | else: 96 | if anti_alias_layer is None: 97 | self.conv2 = conv2d_ABN(planes, planes, kernel_size=3, stride=2, activation="leaky_relu", 98 | activation_param=1e-3) 99 | else: 100 | self.conv2 = nn.Sequential(conv2d_ABN(planes, planes, kernel_size=3, stride=1, 101 | activation="leaky_relu", activation_param=1e-3), 102 | anti_alias_layer(channels=planes, filt_size=3, stride=2)) 103 | 104 | self.conv3 = conv2d_ABN(planes, planes * self.expansion, kernel_size=1, stride=1, 105 | activation="identity") 106 | 107 | self.relu = nn.ReLU(inplace=True) 108 | self.downsample = downsample 109 | self.stride = stride 110 | 111 | reduce_layer_planes = max(planes * self.expansion // 8, 64) 112 | self.se = SEModule(planes, reduce_layer_planes) if use_se else None 113 | 114 | def forward(self, x): 115 | if self.downsample is not None: 116 | residual = self.downsample(x) 117 | else: 118 | residual = x 119 | 120 | out = self.conv1(x) 121 | out = self.conv2(out) 122 | if self.se is not None: out = self.se(out) 123 | 124 | out = self.conv3(out) 125 | out = out + residual # no inplace 126 | out = self.relu(out) 127 | 128 | return out 129 | 130 | 131 | class TResNet(Module): 132 | 133 | def __init__(self, layers, in_chans=3, width_factor=1.0, 134 | do_bottleneck_head=False, bottleneck_features=512): 135 | super(TResNet, self).__init__() 136 | 137 | # JIT layers 138 | space_to_depth = SpaceToDepthModule() 139 | anti_alias_layer = AntiAliasDownsampleLayer 140 | # global_pool_layer = FastAvgPool2d(flatten=True) 141 | 142 | # TResnet stages 143 | self.inplanes = int(64 * width_factor) 144 | self.planes = int(64 * width_factor) 145 | conv1 = conv2d_ABN(in_chans * 16, self.planes, stride=1, kernel_size=3) 146 | layer1 = self._make_layer(BasicBlock, self.planes, layers[0], stride=1, use_se=True, 147 | anti_alias_layer=anti_alias_layer) # 56x56 148 | layer2 = self._make_layer(BasicBlock, self.planes * 2, layers[1], stride=2, use_se=True, 149 | anti_alias_layer=anti_alias_layer) # 28x28 150 | layer3 = self._make_layer(Bottleneck, self.planes * 4, layers[2], stride=2, use_se=True, 151 | anti_alias_layer=anti_alias_layer) # 14x14 152 | layer4 = self._make_layer(Bottleneck, self.planes * 8, layers[3], stride=2, use_se=False, 153 | anti_alias_layer=anti_alias_layer) # 7x7 154 | 155 | # body 156 | self.body = nn.Sequential(OrderedDict([ 157 | ('SpaceToDepth', space_to_depth), 158 | ('conv1', conv1), 159 | ('layer1', layer1), 160 | ('layer2', layer2), 161 | ('layer3', layer3), 162 | ('layer4', layer4)])) 163 | 164 | # head 165 | self.embeddings = [] 166 | # self.global_pool = nn.Sequential(OrderedDict([('global_pool_layer', global_pool_layer)])) 167 | # self.num_features = (self.planes * 8) * Bottleneck.expansion 168 | # if do_bottleneck_head: 169 | # fc = bottleneck_head(self.num_features, num_classes, 170 | # bottleneck_features=bottleneck_features) 171 | # else: 172 | # fc = nn.Linear(self.num_features, num_classes) 173 | 174 | # self.head = nn.Sequential(OrderedDict([('fc', fc)])) 175 | 176 | # model initilization 177 | for m in self.modules(): 178 | if isinstance(m, nn.Conv2d): 179 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu') 180 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, InPlaceABN): 181 | nn.init.constant_(m.weight, 1) 182 | nn.init.constant_(m.bias, 0) 183 | 184 | # residual connections special initialization 185 | for m in self.modules(): 186 | if isinstance(m, BasicBlock): 187 | m.conv2[1].weight = nn.Parameter(torch.zeros_like(m.conv2[1].weight)) # BN to zero 188 | if isinstance(m, Bottleneck): 189 | m.conv3[1].weight = nn.Parameter(torch.zeros_like(m.conv3[1].weight)) # BN to zero 190 | if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) 191 | 192 | def _make_layer(self, block, planes, blocks, stride=1, use_se=True, anti_alias_layer=None): 193 | downsample = None 194 | if stride != 1 or self.inplanes != planes * block.expansion: 195 | layers = [] 196 | if stride == 2: 197 | # avg pooling before 1x1 conv 198 | layers.append(nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True, count_include_pad=False)) 199 | layers += [conv2d_ABN(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, 200 | activation="identity")] 201 | downsample = nn.Sequential(*layers) 202 | 203 | layers = [] 204 | layers.append(block(self.inplanes, planes, stride, downsample, use_se=use_se, 205 | anti_alias_layer=anti_alias_layer)) 206 | self.inplanes = planes * block.expansion 207 | for i in range(1, blocks): layers.append( 208 | block(self.inplanes, planes, use_se=use_se, anti_alias_layer=anti_alias_layer)) 209 | return nn.Sequential(*layers) 210 | 211 | def forward(self, x): 212 | x = self.body(x) 213 | # self.embeddings = self.global_pool(x) 214 | # logits = self.head(self.embeddings) 215 | return x 216 | 217 | @BACKBONE.register("tresnetM") 218 | def TResnetM(): 219 | """Constructs a medium TResnet model. 220 | """ 221 | in_chans = 3 222 | model = TResNet(layers=[3, 4, 11, 3], in_chans=in_chans) 223 | return model 224 | 225 | 226 | @BACKBONE.register("tresnetL") 227 | def TResnetL(): 228 | """Constructs a large TResnet model. 229 | """ 230 | model = TResNet(layers=[4, 5, 18, 3], width_factor=1.2, do_bottleneck_head=False) 231 | 232 | state = torch.load('/mnt/data1/jiajian/code/checkpoints/tresnet_l.pth', map_location='cpu') 233 | filtered_dict = {k: v for k, v in state['model'].items() if 'head.fc' not in k} 234 | model.load_state_dict(filtered_dict, strict=True) 235 | return model 236 | 237 | 238 | def TResnetXL(model_params): 239 | """Constructs a xlarge TResnet model. 240 | """ 241 | in_chans = 3 242 | num_classes = model_params['num_classes'] 243 | model = TResNet(layers=[4, 5, 24, 3], in_chans=in_chans, width_factor=1.3) 244 | 245 | return model 246 | 247 | 248 | if __name__ == '__main__': 249 | a = TResnetL() 250 | b = torch.randn(1, 3, 448, 448) 251 | c = a(b) 252 | print(c.shape) -------------------------------------------------------------------------------- /models/base_block.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.init as init 6 | 7 | import torch.nn.functional as F 8 | from torch.nn.modules.batchnorm import _BatchNorm 9 | 10 | from models.registry import CLASSIFIER 11 | 12 | 13 | class BaseClassifier(nn.Module): 14 | 15 | def fresh_params(self, bn_wd): 16 | if bn_wd: 17 | return self.parameters() 18 | else: 19 | return self.named_parameters() 20 | 21 | @CLASSIFIER.register("linear") 22 | class LinearClassifier(BaseClassifier): 23 | def __init__(self, nattr, c_in, bn=False, pool='avg', scale=1): 24 | super().__init__() 25 | 26 | self.pool = pool 27 | if pool == 'avg': 28 | self.pool = nn.AdaptiveAvgPool2d(1) 29 | elif pool == 'max': 30 | self.pool = nn.AdaptiveMaxPool2d(1) 31 | 32 | self.logits = nn.Sequential( 33 | nn.Linear(c_in, nattr), 34 | nn.BatchNorm1d(nattr) if bn else nn.Identity() 35 | ) 36 | 37 | 38 | def forward(self, feature, label=None): 39 | 40 | if len(feature.shape) == 3: # for vit (bt, nattr, c) 41 | 42 | bt, hw, c = feature.shape 43 | # NOTE ONLY USED FOR INPUT SIZE (256, 192) 44 | h = 16 45 | w = 12 46 | feature = feature.reshape(bt, h, w, c).permute(0, 3, 1, 2) 47 | 48 | feat = self.pool(feature).view(feature.size(0), -1) 49 | x = self.logits(feat) 50 | 51 | return [x], feature 52 | 53 | 54 | 55 | @CLASSIFIER.register("cosine") 56 | class NormClassifier(BaseClassifier): 57 | def __init__(self, nattr, c_in, bn=False, pool='avg', scale=30): 58 | super().__init__() 59 | 60 | self.logits = nn.Parameter(torch.FloatTensor(nattr, c_in)) 61 | 62 | stdv = 1. / math.sqrt(self.logits.data.size(1)) 63 | self.logits.data.uniform_(-stdv, stdv) 64 | 65 | self.pool = pool 66 | if pool == 'avg': 67 | self.pool = nn.AdaptiveAvgPool2d(1) 68 | elif pool == 'max': 69 | self.pool = nn.AdaptiveMaxPool2d(1) 70 | 71 | def forward(self, feature, label=None): 72 | feat = self.pool(feature).view(feature.size(0), -1) 73 | feat_n = F.normalize(feat, dim=1) 74 | weight_n = F.normalize(self.logits, dim=1) 75 | x = torch.matmul(feat_n, weight_n.t()) 76 | return [x], feat_n 77 | 78 | 79 | def initialize_weights(module): 80 | for m in module.children(): 81 | if isinstance(m, nn.Conv2d): 82 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 83 | m.weight.data.normal_(0, math.sqrt(2. / n)) 84 | elif isinstance(m, _BatchNorm): 85 | m.weight.data.fill_(1) 86 | if m.bias is not None: 87 | m.bias.data.zero_() 88 | elif isinstance(m, nn.Linear): 89 | stdv = 1. / math.sqrt(m.weight.size(1)) 90 | m.weight.data.uniform_(-stdv, stdv) 91 | 92 | 93 | class FeatClassifier(nn.Module): 94 | 95 | def __init__(self, backbone, classifier, bn_wd=True): 96 | super(FeatClassifier, self).__init__() 97 | 98 | self.backbone = backbone 99 | self.classifier = classifier 100 | self.bn_wd = bn_wd 101 | 102 | def fresh_params(self): 103 | return self.classifier.fresh_params(self.bn_wd) 104 | 105 | def finetune_params(self): 106 | 107 | if self.bn_wd: 108 | return self.backbone.parameters() 109 | else: 110 | return self.backbone.named_parameters() 111 | 112 | def forward(self, x, label=None): 113 | feat_map = self.backbone(x) 114 | logits, feat = self.classifier(feat_map, label) 115 | return logits, feat 116 | 117 | -------------------------------------------------------------------------------- /models/model_ema.py: -------------------------------------------------------------------------------- 1 | """ Exponential Moving Average (EMA) of model updates 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import logging 6 | from collections import OrderedDict 7 | from copy import deepcopy 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | _logger = logging.getLogger(__name__) 13 | 14 | 15 | class ModelEma: 16 | """ Model Exponential Moving Average (DEPRECATED) 17 | 18 | Keep a moving average of everything in the model state_dict (parameters and buffers). 19 | This version is deprecated, it does not work with scripted models. Will be removed eventually. 20 | 21 | This is intended to allow functionality like 22 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 23 | 24 | A smoothed version of the weights is necessary for some training schemes to perform well. 25 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use 26 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA 27 | smoothing of weights to match results. Pay attention to the decay constant you are using 28 | relative to your update count per epoch. 29 | 30 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but 31 | disable validation of the EMA weights. Validation will have to be done manually in a separate 32 | process, or after the training stops converging. 33 | 34 | This class is sensitive where it is initialized in the sequence of model init, 35 | GPU assignment and distributed training wrappers. 36 | """ 37 | def __init__(self, model, decay=0.9999, device='', resume=''): 38 | # make a copy of the model for accumulating moving average of weights 39 | self.ema = deepcopy(model) 40 | self.ema.eval() 41 | self.decay = decay 42 | self.device = device # perform ema on different device from model if set 43 | if device: 44 | self.ema.to(device=device) 45 | self.ema_has_module = hasattr(self.ema, 'module') 46 | if resume: 47 | self._load_checkpoint(resume) 48 | for p in self.ema.parameters(): 49 | p.requires_grad_(False) 50 | 51 | def _load_checkpoint(self, checkpoint_path): 52 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 53 | assert isinstance(checkpoint, dict) 54 | if 'state_dict_ema' in checkpoint: 55 | new_state_dict = OrderedDict() 56 | for k, v in checkpoint['state_dict_ema'].items(): 57 | # ema model may have been wrapped by DataParallel, and need module prefix 58 | if self.ema_has_module: 59 | name = 'module.' + k if not k.startswith('module') else k 60 | else: 61 | name = k 62 | new_state_dict[name] = v 63 | self.ema.load_state_dict(new_state_dict) 64 | _logger.info("Loaded state_dict_ema") 65 | else: 66 | _logger.warning("Failed to find state_dict_ema, starting from loaded model weights") 67 | 68 | def update(self, model): 69 | # correct a mismatch in state dict keys 70 | needs_module = hasattr(model, 'module') and not self.ema_has_module 71 | with torch.no_grad(): 72 | msd = model.state_dict() 73 | for k, ema_v in self.ema.state_dict().items(): 74 | if needs_module: 75 | k = 'module.' + k 76 | model_v = msd[k].detach() 77 | if self.device: 78 | model_v = model_v.to(device=self.device) 79 | ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v) 80 | 81 | 82 | class ModelEmaV2(nn.Module): 83 | """ Model Exponential Moving Average V2 84 | 85 | Keep a moving average of everything in the model state_dict (parameters and buffers). 86 | V2 of this module is simpler, it does not match params/buffers based on name but simply 87 | iterates in order. It works with torchscript (JIT of full model). 88 | 89 | This is intended to allow functionality like 90 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 91 | 92 | A smoothed version of the weights is necessary for some training schemes to perform well. 93 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use 94 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA 95 | smoothing of weights to match results. Pay attention to the decay constant you are using 96 | relative to your update count per epoch. 97 | 98 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but 99 | disable validation of the EMA weights. Validation will have to be done manually in a separate 100 | process, or after the training stops converging. 101 | 102 | This class is sensitive where it is initialized in the sequence of model init, 103 | GPU assignment and distributed training wrappers. 104 | """ 105 | def __init__(self, model, decay=0.9999, device=None): 106 | super(ModelEmaV2, self).__init__() 107 | # make a copy of the model for accumulating moving average of weights 108 | self.module = deepcopy(model) 109 | self.module.eval() 110 | self.decay = decay 111 | self.device = device # perform ema on different device from model if set 112 | if self.device is not None: 113 | self.module.to(device=device) 114 | 115 | def _update(self, model, update_fn): 116 | with torch.no_grad(): 117 | for ema_v, model_v in zip(self.module.state_dict().values(), model.state_dict().values()): 118 | if self.device is not None: 119 | model_v = model_v.to(device=self.device) 120 | ema_v.copy_(update_fn(ema_v, model_v)) 121 | 122 | def update(self, model): 123 | self._update(model, update_fn=lambda e, m: self.decay * e + (1. - self.decay) * m) 124 | 125 | def set(self, model): 126 | self._update(model, update_fn=lambda e, m: m) 127 | -------------------------------------------------------------------------------- /models/model_factory.py: -------------------------------------------------------------------------------- 1 | from models.registry import BACKBONE 2 | from models.registry import CLASSIFIER 3 | from models.registry import LOSSES 4 | 5 | 6 | def build_backbone(key, multi_scale=False): 7 | 8 | model_dict = { 9 | 'resnet34': 512, 10 | 'resnet18': 512, 11 | 'resnet50': 2048, 12 | 'resnet101': 2048, 13 | 'tresnet': 2432, 14 | 'swin_s': 768, 15 | 'swin_b': 1024, 16 | 'vit_s': 768, 17 | 'vit_b': 768, 18 | 'bninception': 1024, 19 | 'tresnetM': 2048, 20 | 'tresnetL': 2048, 21 | 22 | } 23 | 24 | model = BACKBONE[key]() 25 | output_d = model_dict[key] 26 | 27 | return model, output_d 28 | 29 | 30 | def build_classifier(key): 31 | 32 | return CLASSIFIER[key] 33 | 34 | 35 | def build_loss(key): 36 | 37 | return LOSSES[key] 38 | 39 | -------------------------------------------------------------------------------- /models/registry.py: -------------------------------------------------------------------------------- 1 | def _register_generic(module_dict, module_name, module): 2 | assert module_name not in module_dict 3 | module_dict[module_name] = module 4 | 5 | 6 | class Registry(dict): 7 | """ 8 | A helper class for managing registering modules, it extends a dictionary 9 | and provides a register functions. 10 | 11 | Eg. creeting a registry: 12 | some_registry = Registry({"default": default_module}) 13 | 14 | There're two ways of registering new modules: 15 | 1): normal way is just calling register function: 16 | def foo(): 17 | ... 18 | some_registry.register("foo_module", foo) 19 | 2): used as decorator when declaring the module: 20 | @some_registry.register("foo_module") 21 | @some_registry.register("foo_modeul_nickname") 22 | def foo(): 23 | ... 24 | 25 | Access of module is just like using a dictionary, eg: 26 | f = some_registry["foo_modeul"] 27 | """ 28 | def __init__(self, *args, **kwargs): 29 | super(Registry, self).__init__(*args, **kwargs) 30 | 31 | def register(self, module_name, module=None): 32 | # used as function call 33 | if module is not None: 34 | _register_generic(self, module_name, module) 35 | return 36 | 37 | # used as decorator 38 | def register_fn(fn): 39 | _register_generic(self, module_name, fn) 40 | return fn 41 | 42 | return register_fn 43 | 44 | 45 | BACKBONE = Registry() 46 | CLASSIFIER = Registry() 47 | LOSSES = Registry() -------------------------------------------------------------------------------- /optim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/optim/__init__.py -------------------------------------------------------------------------------- /optim/adamw.py: -------------------------------------------------------------------------------- 1 | """ AdamW Optimizer 2 | Impl copied from PyTorch master 3 | NOTE: Builtin optim.AdamW is used by the factory, this impl only serves as a Python based reference, will be removed 4 | someday 5 | """ 6 | import math 7 | import torch 8 | from torch.optim.optimizer import Optimizer 9 | 10 | 11 | class AdamW(Optimizer): 12 | r"""Implements AdamW algorithm. 13 | The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. 14 | The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. 15 | Arguments: 16 | params (iterable): iterable of parameters to optimize or dicts defining 17 | parameter groups 18 | lr (float, optional): learning rate (default: 1e-3) 19 | betas (Tuple[float, float], optional): coefficients used for computing 20 | running averages of gradient and its square (default: (0.9, 0.999)) 21 | eps (float, optional): term added to the denominator to improve 22 | numerical stability (default: 1e-8) 23 | weight_decay (float, optional): weight decay coefficient (default: 1e-2) 24 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 25 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 26 | (default: False) 27 | .. _Adam\: A Method for Stochastic Optimization: 28 | https://arxiv.org/abs/1412.6980 29 | .. _Decoupled Weight Decay Regularization: 30 | https://arxiv.org/abs/1711.05101 31 | .. _On the Convergence of Adam and Beyond: 32 | https://openreview.net/forum?id=ryQu7f-RZ 33 | """ 34 | 35 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 36 | weight_decay=1e-2, amsgrad=False): 37 | if not 0.0 <= lr: 38 | raise ValueError("Invalid learning rate: {}".format(lr)) 39 | if not 0.0 <= eps: 40 | raise ValueError("Invalid epsilon value: {}".format(eps)) 41 | if not 0.0 <= betas[0] < 1.0: 42 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 43 | if not 0.0 <= betas[1] < 1.0: 44 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 45 | defaults = dict(lr=lr, betas=betas, eps=eps, 46 | weight_decay=weight_decay, amsgrad=amsgrad) 47 | super(AdamW, self).__init__(params, defaults) 48 | 49 | def __setstate__(self, state): 50 | super(AdamW, self).__setstate__(state) 51 | for group in self.param_groups: 52 | group.setdefault('amsgrad', False) 53 | 54 | @torch.no_grad() 55 | def step(self, closure=None): 56 | """Performs a single optimization step. 57 | Arguments: 58 | closure (callable, optional): A closure that reevaluates the model 59 | and returns the losses. 60 | """ 61 | loss = None 62 | if closure is not None: 63 | with torch.enable_grad(): 64 | loss = closure() 65 | 66 | for group in self.param_groups: 67 | for p in group['params']: 68 | if p.grad is None: 69 | continue 70 | 71 | # Perform stepweight decay 72 | p.data.mul_(1 - group['lr'] * group['weight_decay']) 73 | 74 | # Perform optimization step 75 | grad = p.grad 76 | if grad.is_sparse: 77 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') 78 | amsgrad = group['amsgrad'] 79 | 80 | state = self.state[p] 81 | 82 | # State initialization 83 | if len(state) == 0: 84 | state['step'] = 0 85 | # Exponential moving average of gradient values 86 | state['exp_avg'] = torch.zeros_like(p) 87 | # Exponential moving average of squared gradient values 88 | state['exp_avg_sq'] = torch.zeros_like(p) 89 | if amsgrad: 90 | # Maintains max of all exp. moving avg. of sq. grad. values 91 | state['max_exp_avg_sq'] = torch.zeros_like(p) 92 | 93 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 94 | if amsgrad: 95 | max_exp_avg_sq = state['max_exp_avg_sq'] 96 | beta1, beta2 = group['betas'] 97 | 98 | state['step'] += 1 99 | bias_correction1 = 1 - beta1 ** state['step'] 100 | bias_correction2 = 1 - beta2 ** state['step'] 101 | 102 | # Decay the first and second moment running average coefficient 103 | exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) 104 | exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) 105 | if amsgrad: 106 | # Maintains the maximum of all 2nd moment running avg. till now 107 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 108 | # Use the max. for normalizing running avg. of gradient 109 | denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) 110 | else: 111 | denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) 112 | 113 | step_size = group['lr'] / bias_correction1 114 | 115 | p.addcdiv_(exp_avg, denom, value=-step_size) 116 | 117 | return loss -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.4.1 2 | torch==1.4.0 3 | torchvision==0.5.0 4 | tqdm==4.43.0 5 | easydict==1.9 6 | numpy==1.18.1 7 | Pillow==7.1.2 8 | -------------------------------------------------------------------------------- /scheduler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/scheduler/__init__.py -------------------------------------------------------------------------------- /scheduler/cos_annealing_with_restart.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | from torch.optim.lr_scheduler import _LRScheduler 4 | import matplotlib.pyplot as plt 5 | 6 | class CosineAnnealingLR_with_Restart(_LRScheduler): 7 | """Set the learning rate of each parameter group using a cosine annealing 8 | schedule, where :math:`\eta_{max}` is set to the initial lr and 9 | :math:`T_{cur}` is the number of epochs since the last restart in SGDR: 10 | 11 | .. math:: 12 | 13 | \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 + 14 | \cos(\frac{T_{cur}}{T_{max}}\pi)) 15 | 16 | When last_epoch=-1, sets initial lr as lr. 17 | 18 | It has been proposed in 19 | `SGDR: Stochastic Gradient Descent with Warm Restarts`_. The original pytorch 20 | implementation only implements the cosine annealing part of SGDR, 21 | I added my own implementation of the restarts part. 22 | 23 | Args: 24 | optimizer (Optimizer): Wrapped optimizer. 25 | T_max (int): Maximum number of iterations. 26 | T_mult (float): Increase T_max by a factor of T_mult 27 | eta_min (float): Minimum learning rate. Default: 0. 28 | last_epoch (int): The index of last epoch. Default: -1. 29 | 30 | .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: 31 | https://arxiv.org/abs/1608.03983 32 | """ 33 | 34 | def __init__(self, optimizer, T_max, T_mult, eta_min=0, last_epoch=-1): 35 | self.T_max = T_max 36 | self.T_mult = T_mult 37 | self.Te = self.T_max 38 | self.eta_min = eta_min 39 | self.current_epoch = last_epoch 40 | 41 | self.lr_history = [] 42 | 43 | super(CosineAnnealingLR_with_Restart, self).__init__(optimizer, last_epoch) 44 | 45 | def get_lr(self): 46 | new_lrs = [self.eta_min + (base_lr - self.eta_min) * 47 | (1 + math.cos(math.pi * self.current_epoch / self.Te)) / 2 48 | for base_lr in self.base_lrs] 49 | 50 | self.lr_history.append(new_lrs) 51 | return new_lrs 52 | 53 | def step(self, epoch=None): 54 | if epoch is None: 55 | epoch = self.last_epoch + 1 56 | self.last_epoch = epoch 57 | self.current_epoch += 1 58 | 59 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): 60 | param_group['lr'] = lr 61 | 62 | ## restart 63 | if self.current_epoch == self.Te: 64 | 65 | ## reset epochs since the last reset 66 | self.current_epoch = 0 67 | 68 | ## reset the next goal 69 | self.Te = int(self.Te * self.T_mult) 70 | self.T_max = self.T_max + self.Te 71 | 72 | 73 | if __name__ == '__main__': 74 | 75 | params = torch.zeros(10) 76 | lr = 2e-4 77 | 78 | optim = torch.optim.SGD([params], lr, momentum=0.9, weight_decay=5e-4) 79 | 80 | num_steps = 1406 81 | 82 | 83 | scheduler = CosineAnnealingLR_with_Restart(optim, T_max=num_steps * 16, T_mult=1.0, eta_min=1e-7) 84 | 85 | lr = [] 86 | lr_s = [] 87 | for i in range(num_steps * 8): 88 | lr.append(optim.param_groups[0]['lr']) 89 | optim.step() 90 | scheduler.step() 91 | 92 | plt.plot(range(num_steps * 8), lr) 93 | plt.show() 94 | 95 | -------------------------------------------------------------------------------- /scheduler/cosine_lr.py: -------------------------------------------------------------------------------- 1 | """ Cosine Scheduler 2 | Cosine LR schedule with warmup, cycle/restarts, noise. 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import logging 6 | import math 7 | import numpy as np 8 | import torch 9 | from matplotlib import pyplot as plt 10 | 11 | from timm.scheduler.scheduler import Scheduler 12 | from torch.optim import lr_scheduler 13 | from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | 18 | class CosineLRScheduler(Scheduler): 19 | """ 20 | Cosine decay with restarts. 21 | This is described in the paper https://arxiv.org/abs/1608.03983. 22 | Inspiration from 23 | https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py 24 | """ 25 | 26 | def __init__(self, 27 | optimizer: torch.optim.Optimizer, 28 | t_initial: int, 29 | t_mul: float = 1., 30 | lr_min: float = 0., 31 | decay_rate: float = 1., 32 | warmup_t=0, 33 | warmup_lr_init=0, 34 | warmup_prefix=False, 35 | cycle_limit=0, 36 | t_in_epochs=True, 37 | noise_range_t=None, 38 | noise_pct=0.67, 39 | noise_std=1.0, 40 | noise_seed=42, 41 | initialize=True) -> None: 42 | """ 43 | 44 | @param optimizer: 45 | @param t_initial: epoch number of the first cosine decay iteration 46 | @param t_mul: multiplier between multiple cosine decay iterations 47 | @param lr_min: final learning rate 48 | @param decay_rate: decay rate between the peak values of multiple cosine decay iterations 49 | @param warmup_t: the epoch number of warmup stage 50 | @param warmup_lr_init: the initial learning rate of warmup stage 51 | @param warmup_prefix: 52 | @param cycle_limit: the iteration limit number of 53 | @param t_in_epochs: 54 | @param noise_range_t: 55 | @param noise_pct: 56 | @param noise_std: 57 | @param noise_seed: 58 | @param initialize: 59 | """ 60 | super().__init__( 61 | optimizer, param_group_field="lr", 62 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 63 | initialize=initialize) 64 | 65 | assert t_initial > 0 66 | assert lr_min >= 0 67 | if t_initial == 1 and t_mul == 1 and decay_rate == 1: 68 | _logger.warning("Cosine annealing scheduler will have no effect on the learning " 69 | "rate since t_initial = t_mul = eta_mul = 1.") 70 | self.t_initial = t_initial 71 | self.t_mul = t_mul 72 | self.lr_min = lr_min 73 | self.decay_rate = decay_rate 74 | self.cycle_limit = cycle_limit 75 | self.warmup_t = warmup_t 76 | self.warmup_lr_init = warmup_lr_init 77 | self.warmup_prefix = warmup_prefix 78 | self.t_in_epochs = t_in_epochs 79 | if self.warmup_t: 80 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 81 | super().update_groups(self.warmup_lr_init) 82 | else: 83 | self.warmup_steps = [1 for _ in self.base_values] 84 | 85 | def _get_lr(self, t): 86 | if t < self.warmup_t: 87 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 88 | else: 89 | if self.warmup_prefix: 90 | t = t - self.warmup_t 91 | 92 | if self.t_mul != 1: 93 | # math.log(1 - t / self.t_initial * (1 - self.t_mul), self.t_mul) < 1 always hold 94 | i = math.floor(math.log(1 - t / self.t_initial * (1 - self.t_mul), self.t_mul)) 95 | t_i = self.t_mul ** i * self.t_initial 96 | t_curr = t - (1 - self.t_mul ** i) / (1 - self.t_mul) * self.t_initial 97 | else: 98 | i = t // self.t_initial 99 | t_i = self.t_initial 100 | t_curr = t - (self.t_initial * i) 101 | 102 | gamma = self.decay_rate ** i 103 | lr_min = self.lr_min * gamma 104 | lr_max_values = [v * gamma for v in self.base_values] 105 | 106 | if self.cycle_limit == 0 or (self.cycle_limit > 0 and i < self.cycle_limit): 107 | # 0.5 * (1 + math.cos(math.pi * t_curr / t_i)), the proportion; (lr_max - lr_min), lr scale 108 | lrs = [ 109 | lr_min + 0.5 * (lr_max - lr_min) * (1 + math.cos(math.pi * t_curr / t_i)) for lr_max in 110 | lr_max_values 111 | ] 112 | else: 113 | lrs = [self.lr_min for _ in self.base_values] 114 | 115 | return lrs 116 | 117 | def get_epoch_values(self, epoch: int): 118 | if self.t_in_epochs: 119 | return self._get_lr(epoch) 120 | else: 121 | return None 122 | 123 | def get_update_values(self, num_updates: int): 124 | if not self.t_in_epochs: 125 | return self._get_lr(num_updates) 126 | else: 127 | return None 128 | 129 | def get_cycle_length(self, cycles=0): 130 | if not cycles: 131 | cycles = self.cycle_limit 132 | cycles = max(1, cycles) 133 | if self.t_mul == 1.0: 134 | return self.t_initial * cycles 135 | else: 136 | return int(math.floor(-self.t_initial * (self.t_mul ** cycles - 1) / (1 - self.t_mul))) 137 | 138 | 139 | if __name__ == '__main__': 140 | 141 | params = torch.zeros(10) 142 | lr = 2e-4 143 | 144 | optim = torch.optim.SGD([params], lr, momentum=0.9, weight_decay=5e-4) 145 | 146 | # num_epochs = 160 147 | 148 | # scheduler = CosineLRScheduler( 149 | # optim, 150 | # t_initial=30, 151 | # t_mul=1, # cosine decay epoch multiplier 152 | # # lr_min=1e-5, # cosine lr 最终回落的位置 153 | # decay_rate=0.5, 154 | # # warmup_lr_init=1e-5, 155 | # # warmup_t=3, 156 | # cycle_limit=3, # 最大的限制 157 | # # t_in_epochs=True, 158 | # # noise_range_t=None, 159 | # # noise_pct=0.67, 160 | # # noise_std=1, 161 | # # noise_seed=42 162 | # ) 163 | 164 | # be called .step() after every batch 165 | scheduler = lr_scheduler.OneCycleLR(optim, max_lr=lr, steps_per_epoch=641, epochs=40, 166 | pct_start=0.0) 167 | 168 | # plt.figure(figsize=(8, 8)) 169 | lr = [] 170 | lr_s = [] 171 | for i in range(40): 172 | for j in range(641): 173 | lr.append(optim.param_groups[0]['lr']) 174 | lr_s.append(scheduler.get_last_lr()[0]) 175 | optim.step() 176 | scheduler.step() 177 | plt.plot(range(40 * 641), lr) 178 | plt.show() 179 | 180 | plt.plot(range(40 * 641), lr_s) 181 | plt.show() 182 | -------------------------------------------------------------------------------- /scheduler/scheduler.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | import torch 4 | 5 | 6 | class Scheduler: 7 | """ Parameter Scheduler Base Class 8 | A scheduler base class that can be used to schedule any optimizer parameter groups. 9 | Unlike the builtin PyTorch schedulers, this is intended to be consistently called 10 | * At the END of each epoch, before incrementing the epoch count, to calculate next epoch's value 11 | * At the END of each optimizer update, after incrementing the update count, to calculate next update's value 12 | The schedulers built on this should try to remain as stateless as possible (for simplicity). 13 | This family of schedulers is attempting to avoid the confusion of the meaning of 'last_epoch' 14 | and -1 values for special behaviour. All epoch and update counts must be tracked in the training 15 | code and explicitly passed in to the schedulers on the corresponding step or step_update call. 16 | Based on ideas from: 17 | * https://github.com/pytorch/fairseq/tree/master/fairseq/optim/lr_scheduler 18 | * https://github.com/allenai/allennlp/tree/master/allennlp/training/learning_rate_schedulers 19 | """ 20 | 21 | def __init__(self, 22 | optimizer: torch.optim.Optimizer, 23 | param_group_field: str, 24 | noise_range_t=None, 25 | noise_type='normal', 26 | noise_pct=0.67, 27 | noise_std=1.0, 28 | noise_seed=None, 29 | initialize: bool = True) -> None: 30 | self.optimizer = optimizer 31 | self.param_group_field = param_group_field 32 | self._initial_param_group_field = f"initial_{param_group_field}" 33 | if initialize: 34 | for i, group in enumerate(self.optimizer.param_groups): 35 | if param_group_field not in group: 36 | raise KeyError(f"{param_group_field} missing from param_groups[{i}]") 37 | group.setdefault(self._initial_param_group_field, group[param_group_field]) 38 | else: 39 | for i, group in enumerate(self.optimizer.param_groups): 40 | if self._initial_param_group_field not in group: 41 | raise KeyError(f"{self._initial_param_group_field} missing from param_groups[{i}]") 42 | self.base_values = [group[self._initial_param_group_field] for group in self.optimizer.param_groups] 43 | self.metric = None # any point to having this for all? 44 | self.noise_range_t = noise_range_t 45 | self.noise_pct = noise_pct 46 | self.noise_type = noise_type 47 | self.noise_std = noise_std 48 | self.noise_seed = noise_seed if noise_seed is not None else 42 49 | self.update_groups(self.base_values) 50 | 51 | def state_dict(self) -> Dict[str, Any]: 52 | return {key: value for key, value in self.__dict__.items() if key != 'optimizer'} 53 | 54 | def load_state_dict(self, state_dict: Dict[str, Any]) -> None: 55 | self.__dict__.update(state_dict) 56 | 57 | def get_epoch_values(self, epoch: int): 58 | return None 59 | 60 | def get_update_values(self, num_updates: int): 61 | return None 62 | 63 | def step(self, epoch: int, metric: float = None) -> None: 64 | self.metric = metric 65 | values = self.get_epoch_values(epoch) 66 | if values is not None: 67 | values = self._add_noise(values, epoch) 68 | self.update_groups(values) 69 | 70 | def step_update(self, num_updates: int, metric: float = None): 71 | self.metric = metric 72 | values = self.get_update_values(num_updates) 73 | if values is not None: 74 | values = self._add_noise(values, num_updates) 75 | self.update_groups(values) 76 | 77 | def update_groups(self, values): 78 | if not isinstance(values, (list, tuple)): 79 | values = [values] * len(self.optimizer.param_groups) 80 | for param_group, value in zip(self.optimizer.param_groups, values): 81 | param_group[self.param_group_field] = value 82 | 83 | def _add_noise(self, lrs, t): 84 | if self.noise_range_t is not None: 85 | if isinstance(self.noise_range_t, (list, tuple)): 86 | apply_noise = self.noise_range_t[0] <= t < self.noise_range_t[1] 87 | else: 88 | apply_noise = t >= self.noise_range_t 89 | if apply_noise: 90 | g = torch.Generator() 91 | g.manual_seed(self.noise_seed + t) 92 | if self.noise_type == 'normal': 93 | while True: 94 | # resample if noise out of percent limit, brute force but shouldn't spin much 95 | noise = torch.randn(1, generator=g).item() 96 | if abs(noise) < self.noise_pct: 97 | break 98 | else: 99 | noise = 2 * (torch.rand(1, generator=g).item() - 0.5) * self.noise_pct 100 | lrs = [v + v * noise for v in lrs] 101 | return lrs 102 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/tools/__init__.py -------------------------------------------------------------------------------- /tools/distributed.py: -------------------------------------------------------------------------------- 1 | """ Distributed training/validation utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | from torch import distributed as dist 7 | 8 | from models.model_ema import ModelEma 9 | 10 | 11 | def unwrap_model(model): 12 | if isinstance(model, ModelEma): 13 | return unwrap_model(model.ema) 14 | else: 15 | return model.module if hasattr(model, 'module') else model 16 | 17 | 18 | def reduce_tensor(tensor, n): 19 | rt = tensor.clone() 20 | dist.all_reduce(rt, op=dist.ReduceOp.SUM) 21 | rt /= n 22 | return rt 23 | 24 | 25 | def distribute_bn(model, world_size, reduce=False): 26 | # ensure every node has the same running bn stats 27 | for bn_name, bn_buf in unwrap_model(model).named_buffers(recurse=True): 28 | if ('running_mean' in bn_name) or ('running_var' in bn_name): 29 | if reduce: 30 | # average bn stats across whole group 31 | torch.distributed.all_reduce(bn_buf, op=dist.ReduceOp.SUM) 32 | bn_buf /= float(world_size) 33 | else: 34 | # broadcast bn stats from rank 0 to whole group 35 | torch.distributed.broadcast(bn_buf, 0) 36 | -------------------------------------------------------------------------------- /tools/function.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from tools.utils import may_mkdirs 8 | 9 | 10 | def seperate_weight_decay(named_params, lr, weight_decay=1e-5, skip_list=()): 11 | decay = [] 12 | no_decay = [] 13 | for name, param in named_params: 14 | if not param.requires_grad: 15 | continue 16 | if len(param.shape) == 1 or name in skip_list: 17 | no_decay.append(param) 18 | # if 'bias' in name: 19 | # no_decay.append(param) 20 | else: 21 | decay.append(param) 22 | return [{'params': no_decay, 'lr': lr, 'weight_decay': 0.}, 23 | {'params': decay, 'lr': lr, 'weight_decay': weight_decay}] 24 | 25 | 26 | def ratio2weight(targets, ratio): 27 | ratio = torch.from_numpy(ratio).type_as(targets) 28 | 29 | # --------------------- dangwei li TIP20 --------------------- 30 | pos_weights = targets * (1 - ratio) 31 | neg_weights = (1 - targets) * ratio 32 | weights = torch.exp(neg_weights + pos_weights) 33 | 34 | 35 | # --------------------- AAAI --------------------- 36 | # pos_weights = torch.sqrt(1 / (2 * ratio.sqrt())) * targets 37 | # neg_weights = torch.sqrt(1 / (2 * (1 - ratio.sqrt()))) * (1 - targets) 38 | # weights = pos_weights + neg_weights 39 | 40 | # for RAP dataloader, targets element may be 2, with or without smooth, some element must great than 1 41 | weights[targets > 1] = 0.0 42 | 43 | return weights 44 | 45 | 46 | def get_model_log_path(root_path, model_name): 47 | multi_attr_model_dir = os.path.join(root_path, model_name, 'img_model') 48 | may_mkdirs(multi_attr_model_dir) 49 | 50 | multi_attr_log_dir = os.path.join(root_path, model_name, 'log') 51 | may_mkdirs(multi_attr_log_dir) 52 | 53 | return multi_attr_model_dir, multi_attr_log_dir 54 | 55 | 56 | class LogVisual: 57 | 58 | def __init__(self, args): 59 | self.args = vars(args) 60 | self.train_loss = [] 61 | self.val_loss = [] 62 | 63 | self.ap = [] 64 | self.map = [] 65 | self.acc = [] 66 | self.prec = [] 67 | self.recall = [] 68 | self.f1 = [] 69 | 70 | self.error_num = [] 71 | self.fn_num = [] 72 | self.fp_num = [] 73 | 74 | self.save = False 75 | 76 | def append(self, **kwargs): 77 | self.save = False 78 | 79 | if 'result' in kwargs: 80 | self.ap.append(kwargs['result']['label_acc']) 81 | self.map.append(np.mean(kwargs['result']['label_acc'])) 82 | self.acc.append(np.mean(kwargs['result']['instance_acc'])) 83 | self.prec.append(np.mean(kwargs['result']['instance_precision'])) 84 | self.recall.append(np.mean(kwargs['result']['instance_recall'])) 85 | self.f1.append(np.mean(kwargs['result']['floatance_F1'])) 86 | 87 | self.error_num.append(kwargs['result']['error_num']) 88 | self.fn_num.append(kwargs['result']['fn_num']) 89 | self.fp_num.append(kwargs['result']['fp_num']) 90 | 91 | if 'train_loss' in kwargs: 92 | self.train_loss.append(kwargs['train_loss']) 93 | if 'val_loss' in kwargs: 94 | self.val_loss.append(kwargs['val_loss']) 95 | 96 | 97 | def get_pkl_rootpath(dataset, zero_shot): 98 | root = os.path.join("./data", f"{dataset}") 99 | if zero_shot: 100 | data_path = os.path.join(root, 'dataset_zs_run0.pkl') 101 | else: 102 | data_path = os.path.join(root, 'dataset_all.pkl') # 103 | 104 | return data_path 105 | 106 | 107 | def get_reload_weight(model_path, model, pth='ckpt_max.pth'): 108 | model_path = os.path.join(model_path, pth) 109 | load_dict = torch.load(model_path, map_location=lambda storage, loc: storage) 110 | 111 | if isinstance(load_dict, OrderedDict): 112 | pretrain_dict = load_dict 113 | else: 114 | pretrain_dict = load_dict['state_dicts'] 115 | print(f"best performance {load_dict['metric']} in epoch : {load_dict['epoch']}") 116 | 117 | model.load_state_dict(pretrain_dict, strict=True) 118 | 119 | return model 120 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import datetime 4 | import time 5 | # from contextlib import contextmanger 6 | import torch 7 | from torch.autograd import Variable 8 | import random 9 | import numpy as np 10 | from torch import distributed as dist 11 | 12 | from tools.distributed import unwrap_model 13 | 14 | 15 | def time_str(fmt=None): 16 | if fmt is None: 17 | fmt = '%Y-%m-%d_%H:%M:%S' 18 | 19 | # time.strftime(format[, t]) 20 | return datetime.datetime.today().strftime(fmt) 21 | 22 | 23 | def str2bool(v): 24 | return v.lower() in ("yes", "true", "1") 25 | 26 | 27 | def is_iterable(obj): 28 | return hasattr(obj, '__len__') 29 | 30 | 31 | def to_scalar(vt): 32 | """ 33 | preprocess a 1-length pytorch Variable or Tensor to scalar 34 | """ 35 | # if isinstance(vt, Variable): 36 | # return vt.data.cpu().numpy().flatten()[0] 37 | if torch.is_tensor(vt): 38 | if vt.dim() == 0: 39 | return vt.detach().cpu().numpy().flatten().item() 40 | else: 41 | return vt.detach().cpu().numpy() 42 | elif isinstance(vt, np.ndarray): 43 | return vt 44 | else: 45 | raise TypeError('Input should be a ndarray or tensor') 46 | 47 | 48 | # makes the random numbers predictable 49 | def set_seed(rand_seed): 50 | np.random.seed(rand_seed) 51 | random.seed(rand_seed) 52 | torch.backends.cudnn.enabled = True 53 | torch.manual_seed(rand_seed) 54 | torch.cuda.manual_seed(rand_seed) 55 | 56 | 57 | def may_mkdirs(dir_name): 58 | # if not os.cam_path.exists(os.cam_path.dirname(os.cam_path.abspath(fname))): 59 | # os.makedirs(os.cam_path.dirname(os.cam_path.abspath(fname))) 60 | if not os.path.exists(os.path.abspath(dir_name)): 61 | os.makedirs(os.path.abspath(dir_name)) 62 | 63 | 64 | class AverageMeter(object): 65 | """ 66 | Computes and stores the average and current value 67 | 68 | """ 69 | 70 | def __init__(self): 71 | self.val = 0 72 | self.avg = 0 73 | self.sum = 0 74 | self.count = 0 75 | 76 | def reset(self): 77 | self.val = 0 78 | self.avg = 0 79 | self.sum = 0 80 | self.count = 0 81 | 82 | def update(self, val, n=1): 83 | self.val = val 84 | self.sum += val * n 85 | self.count += n 86 | self.avg = self.sum / (self.count + 1e-20) 87 | 88 | 89 | class RunningAverageMeter(object): 90 | """ 91 | Computes and stores the running average and current value 92 | """ 93 | 94 | def __init__(self, hist=0.99): 95 | self.val = None 96 | self.avg = None 97 | self.hist = hist 98 | 99 | def reset(self): 100 | self.val = None 101 | self.avg = None 102 | 103 | def update(self, val): 104 | if self.avg is None: 105 | self.avg = val 106 | else: 107 | self.avg = self.avg * self.hist + val * (1 - self.hist) 108 | self.val = val 109 | 110 | 111 | class RecentAverageMeter(object): 112 | """ 113 | Stores and computes the average of recent values 114 | """ 115 | 116 | def __init__(self, hist_size=100): 117 | self.hist_size = hist_size 118 | self.fifo = [] 119 | self.val = 0 120 | 121 | def reset(self): 122 | self.fifo = [] 123 | self.val = 0 124 | 125 | def update(self, value): 126 | self.val = value 127 | self.fifo.append(value) 128 | if len(self.fifo) > self.hist_size: 129 | del self.fifo[0] 130 | 131 | @property 132 | def avg(self): 133 | assert len(self.fifo) > 0 134 | return float(sum(self.fifo)) / len(self.fifo) 135 | 136 | 137 | class ReDirectSTD(object): 138 | """ 139 | overwrites the sys.stdout or sys.stderr 140 | Args: 141 | fpath: file cam_path 142 | console: one of ['stdout', 'stderr'] 143 | immediately_visiable: False 144 | Usage example: 145 | ReDirectSTD('stdout.txt', 'stdout', False) 146 | ReDirectSTD('stderr.txt', 'stderr', False) 147 | """ 148 | 149 | def __init__(self, fpath=None, console='stdout', immediately_visiable=False): 150 | import sys 151 | import os 152 | assert console in ['stdout', 'stderr'] 153 | self.console = sys.stdout if console == "stdout" else sys.stderr 154 | self.file = fpath 155 | self.f = None 156 | self.immediately_visiable = immediately_visiable 157 | 158 | if fpath is not None: 159 | # Remove existing log file 160 | if os.path.exists(fpath): 161 | os.remove(fpath) 162 | if console == 'stdout': 163 | sys.stdout = self 164 | else: 165 | sys.stderr = self 166 | 167 | def __del__(self): 168 | self.close() 169 | 170 | def __enter__(self): 171 | pass 172 | 173 | def __exit__(self, **args): 174 | self.close() 175 | 176 | def write(self, msg): 177 | self.console.write(msg) 178 | if self.file is not None: 179 | if not os.path.exists(os.path.dirname(os.path.abspath(self.file))): 180 | os.mkdir(os.path.dirname(os.path.abspath(self.file))) 181 | 182 | if self.immediately_visiable: 183 | # open for writing, appending to the end of the file if it exists 184 | with open(self.file, 'a') as f: 185 | f.write(msg) 186 | else: 187 | if self.f is None: 188 | self.f = open(self.file, 'w') 189 | 190 | # print("self.f is not none") 191 | # first time self.f is None, second is not None 192 | self.f.write(msg) 193 | 194 | def flush(self): 195 | self.console.flush() 196 | if self.f is not None: 197 | self.f.flush() 198 | import os 199 | os.fsync(self.f.fileno()) 200 | 201 | def close(self): 202 | self.console.close() 203 | if self.f is not None: 204 | self.f.close() 205 | 206 | 207 | def find_index(seq, item): 208 | for i, x in enumerate(seq): 209 | if item == x: 210 | return i 211 | return -1 212 | 213 | 214 | def set_devices(sys_device_ids): 215 | """ 216 | Args: 217 | sys_device_ids: a tuple; which GPUs to use 218 | e.g. sys_device_ids = (), only use cpu 219 | sys_device_ids = (3,), use the 4-th gpu 220 | sys_device_ids = (0, 1, 2, 3,), use the first 4 gpus 221 | sys_device_ids = (0, 2, 4,), use the 1, 3 and 5 gpus 222 | """ 223 | import os 224 | visiable_devices = '' 225 | for i in sys_device_ids: 226 | visiable_devices += '{}, '.format(i) 227 | os.environ['CUDA_VISIBLE_DEVICES'] = visiable_devices 228 | # Return wrappers 229 | # Models and user defined Variables/Tensors would be transferred to 230 | # the first device 231 | device_id = 0 if len(sys_device_ids) > 0 else -1 232 | 233 | 234 | def transfer_optims(optims, device_id=-1): 235 | for optim in optims: 236 | if isinstance(optim, torch.optim.Optimizer): 237 | transfer_optim_state(optim.state, device_id=device_id) 238 | 239 | 240 | def transfer_optim_state(state, device_id=-1): 241 | """ 242 | Transfer an optimizer.state to cpu or specified gpu, which means 243 | transferring tensors of the optimizer.state to specified device. 244 | The modification is in place for the state. 245 | Args: 246 | state: An torch.optim.Optimizer.state 247 | device_id: gpu id, or -1 which means transferring to cpu 248 | """ 249 | for key, val in state.items(): 250 | if isinstance(val, dict): 251 | transfer_optim_state(val, device_id=device_id) 252 | elif isinstance(val, Variable): 253 | raise RuntimeError("Oops, state[{}] is a Variable!".format(key)) 254 | elif isinstance(val, torch.nn.Parameter): 255 | raise RuntimeError("Oops, state[{}] is a Parameter!".format(key)) 256 | else: 257 | try: 258 | if device_id == -1: 259 | state[key] = val.cpu() 260 | else: 261 | state[key] = val.cuda(device=device_id) 262 | except: 263 | pass 264 | 265 | 266 | def load_state_dict(model, src_state_dict): 267 | """ 268 | copy parameter from src_state_dict to models 269 | Arguments: 270 | model: A torch.nn.Module object 271 | src_state_dict: a dict containing parameters and persistent buffers 272 | """ 273 | from torch.nn import Parameter 274 | dest_state_dict = model.state_dict() 275 | for name, param in src_state_dict.items(): 276 | if name not in dest_state_dict: 277 | continue 278 | if isinstance(param, Parameter): 279 | param = param.data 280 | try: 281 | dest_state_dict[name].copy_(param) 282 | except Exception as msg: 283 | print("Warning: Error occurs when copying '{}': {}".format(name, str(msg))) 284 | 285 | src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys()) 286 | if len(src_missing) > 0: 287 | print("Keys not found in source state_dict: ") 288 | for n in src_missing: 289 | print('\t', n) 290 | 291 | dest_missint = set(src_state_dict.keys()) - set(dest_state_dict.keys()) 292 | if len(dest_missint): 293 | print("Keys not found in destination state_dict: ") 294 | for n in dest_missint: 295 | print('\t', n) 296 | 297 | 298 | def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True): 299 | """ 300 | load state_dict of module & optimizer from file 301 | Args: 302 | modules_optims: A two-element list which contains module and optimizer 303 | ckpt_file: the check point file 304 | load_to_cpu: Boolean, whether to preprocess tensors in models & optimizer to cpu type 305 | """ 306 | map_location = (lambda storage, loc: storage) if load_to_cpu else None 307 | ckpt = torch.load(ckpt_file, map_location=map_location) 308 | for m, sd in zip(modules_optims, ckpt['state_dicts']): 309 | m.load_state_dict(sd) 310 | if verbose: 311 | print("Resume from ckpt {}, \nepoch: {}, scores: {}".format( 312 | ckpt_file, ckpt['ep'], ckpt['scores'])) 313 | return ckpt['ep'], ckpt['scores'] 314 | 315 | 316 | # def get_state_dict(model, unwrap_fn=unwrap_model): 317 | # return unwrap_fn(model).state_dict() 318 | 319 | 320 | def save_ckpt(model, ckpt_files, epoch, metric): 321 | """ 322 | Note: 323 | torch.save() reserves device type and id of tensors to save. 324 | So when loading ckpt, you have to inform torch.load() to load these tensors 325 | to cpu or your desired gpu, if you change devices. 326 | """ 327 | 328 | if not os.path.exists(os.path.dirname(os.path.abspath(ckpt_files))): 329 | os.makedirs(os.path.dirname(os.path.abspath(ckpt_files))) 330 | 331 | save_dict = {'state_dicts': model.state_dict(), 332 | 'state_dict_ema': unwrap_model(model).state_dict(), 333 | 'epoch': f'{time_str()} in epoch {epoch}', 334 | 'metric': metric,} 335 | 336 | torch.save(save_dict, ckpt_files) 337 | 338 | 339 | def adjust_lr_staircase(param_groups, base_lrs, ep, decay_at_epochs, factor): 340 | """ Multiplied by a factor at the beging of specified epochs. Different 341 | params groups specify thier own base learning rates. 342 | Args: 343 | param_groups: a list of params 344 | base_lrs: starting learning rate, len(base_lrs) = len(params_groups) 345 | ep: current epoch, ep >= 1 346 | decay_at_epochs: a list or tuple; learning rates are multiplied by a factor 347 | at the begining of these epochs 348 | factor: a number in range (0, 1) 349 | Example: 350 | base_lrs = [0.1, 0.01] 351 | decay_at_epochs = [51, 101] 352 | factor = 0.1 353 | Note: 354 | It is meant to be called at the begining of an epoch 355 | """ 356 | assert len(base_lrs) == len(param_groups), \ 357 | 'You should specify base lr for each param group.' 358 | assert ep >= 1, "Current epoch number should be >= 1" 359 | 360 | if ep not in decay_at_epochs: 361 | return 362 | 363 | ind = find_index(decay_at_epochs, ep) 364 | for i, (g, base_lr) in enumerate(zip(param_groups, base_lrs)): 365 | g['lr'] = base_lr * factor ** (ind + 1) 366 | print('=====> Param group {}: lr adjusted to {:.10f}'.format(i, g['lr']).rstrip('0')) 367 | 368 | 369 | def may_set_mode(maybe_modules, mode): 370 | """ 371 | maybe_modules, an object or a list of objects. 372 | """ 373 | assert mode in ['train', 'eval'] 374 | if not is_iterable(maybe_modules): 375 | maybe_modules = [maybe_modules] 376 | for m in maybe_modules: 377 | if isinstance(m, torch.nn.Module): 378 | if mode == 'train': 379 | m.train() 380 | else: 381 | m.eval() 382 | 383 | 384 | def get_topk(matrix, k): 385 | """ 386 | retain topk elements of a matrix and set others 0 387 | Args: 388 | matrix (object): np.array 2d 389 | """ 390 | vector = matrix.reshape(matrix.shape[0] * matrix.shape[1]) 391 | vector.sort() 392 | vector.get 393 | 394 | return matrix 395 | 396 | 397 | class Timer: 398 | 399 | def __init__(self): 400 | self.o = time.time() 401 | 402 | def measure(self, p=1): 403 | x = (time.time() - self.o) / p 404 | x = int(x) 405 | if x >= 3600: 406 | return '{:.1f}h'.format(x / 3600) 407 | if x >= 60: 408 | return '{}m'.format(round(x / 60)) 409 | return '{}s'.format(x) 410 | 411 | 412 | class data_prefetcher(): 413 | def __init__(self, loader): 414 | self.loader = iter(loader) 415 | self.stream = torch.cuda.Stream() 416 | # self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1, 3, 1, 1) 417 | # self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1, 3, 1, 1) 418 | # With Amp, it isn't necessary to manually convert data to half. 419 | # if args.fp16: 420 | # self.mean = self.mean.half() 421 | # self.std = self.std.half() 422 | self.preload() 423 | 424 | def preload(self): 425 | try: 426 | self.next_input, self.next_target = next(self.loader) 427 | except StopIteration: 428 | self.next_input = None 429 | self.next_target = None 430 | return 431 | with torch.cuda.stream(self.stream): 432 | self.next_input = self.next_input.cuda(non_blocking=True) 433 | self.next_target = self.next_target.cuda(non_blocking=True) 434 | # With Amp, it isn't necessary to manually convert data to half. 435 | # if args.fp16: 436 | # self.next_input = self.next_input.half() 437 | # else: 438 | self.next_input = self.next_input.float() 439 | # self.next_input = self.next_input.sub_(self.mean).div_(self.std) 440 | 441 | def next(self): 442 | torch.cuda.current_stream().wait_stream(self.stream) 443 | input = self.next_input 444 | target = self.next_target 445 | self.preload() 446 | return input, target 447 | 448 | -------------------------------------------------------------------------------- /tools/vis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def tb_visualizer_pedes(tb_writer, lr, epoch, train_loss, valid_loss, train_result, valid_result, 5 | train_gt, valid_gt, train_loss_mtr, valid_loss_mtr, model, attr_name): 6 | tb_writer.add_scalars('train/lr', {'lr': lr}, epoch) 7 | tb_writer.add_scalars('train/losses', {'train': train_loss, 8 | 'test': valid_loss}, epoch) 9 | 10 | tb_writer.add_scalars('train/perf', {'ma': train_result.ma, 11 | 'pos_recall': np.mean(train_result.label_pos_recall), 12 | 'neg_recall': np.mean(train_result.label_neg_recall), 13 | 'Acc': train_result.instance_acc, 14 | 'Prec': train_result.instance_prec, 15 | 'Rec': train_result.instance_recall, 16 | 'F1': train_result.instance_f1}, epoch) 17 | 18 | tb_writer.add_scalars('test/perf', {'ma': valid_result.ma, 19 | 'pos_recall': np.mean(valid_result.label_pos_recall), 20 | 'neg_recall': np.mean(valid_result.label_neg_recall), 21 | 'Acc': valid_result.instance_acc, 22 | 'Prec': valid_result.instance_prec, 23 | 'Rec': valid_result.instance_recall, 24 | 'F1': valid_result.instance_f1}, epoch) 25 | 26 | 27 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | # os.environ['CUDA_VISIBLE_DEVICES'] = '3' 3 | 4 | import argparse 5 | import pickle 6 | from collections import defaultdict 7 | from datetime import datetime 8 | 9 | import numpy as np 10 | from mmcv.cnn import get_model_complexity_info 11 | from torch.utils.tensorboard import SummaryWriter 12 | from visdom import Visdom 13 | 14 | from configs import cfg, update_config 15 | from dataset.multi_label.coco import COCO14 16 | from dataset.augmentation import get_transform 17 | from metrics.ml_metrics import get_map_metrics, get_multilabel_metrics 18 | from metrics.pedestrian_metrics import get_pedestrian_metrics 19 | from models.model_ema import ModelEmaV2 20 | from optim.adamw import AdamW 21 | from scheduler.cos_annealing_with_restart import CosineAnnealingLR_with_Restart 22 | from scheduler.cosine_lr import CosineLRScheduler 23 | from tools.distributed import distribute_bn 24 | from tools.vis import tb_visualizer_pedes 25 | import torch 26 | from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR 27 | from torch.utils.data import DataLoader 28 | 29 | from batch_engine import valid_trainer, batch_trainer 30 | from dataset.pedes_attr.pedes import PedesAttr 31 | from models.base_block import FeatClassifier 32 | from models.model_factory import build_loss, build_classifier, build_backbone 33 | 34 | from tools.function import get_model_log_path, get_reload_weight, seperate_weight_decay 35 | from tools.utils import time_str, save_ckpt, ReDirectSTD, set_seed, str2bool 36 | from models.backbone import swin_transformer, resnet, bninception, vit 37 | from models.backbone.tresnet import tresnet 38 | from losses import bceloss, scaledbceloss 39 | from models import base_block 40 | 41 | 42 | 43 | 44 | # torch.backends.cudnn.benchmark = True 45 | # torch.autograd.set_detect_anomaly(True) 46 | torch.autograd.set_detect_anomaly(True) 47 | 48 | 49 | def main(cfg, args): 50 | set_seed(605) 51 | exp_dir = os.path.join('exp_result', cfg.DATASET.NAME) 52 | 53 | model_dir, log_dir = get_model_log_path(exp_dir, cfg.NAME) 54 | stdout_file = os.path.join(log_dir, f'stdout_{time_str()}.txt') 55 | save_model_path = os.path.join(model_dir, f'ckpt_max_{time_str()}.pth') 56 | 57 | visdom = None 58 | if cfg.VIS.VISDOM: 59 | visdom = Visdom(env=f'{cfg.DATASET.NAME}_' + cfg.NAME, port=8401) 60 | assert visdom.check_connection() 61 | 62 | writer = None 63 | if cfg.VIS.TENSORBOARD.ENABLE: 64 | current_time = datetime.now().strftime('%b%d_%H-%M-%S') 65 | writer_dir = os.path.join(exp_dir, cfg.NAME, 'runs', current_time) 66 | writer = SummaryWriter(log_dir=writer_dir) 67 | 68 | if cfg.REDIRECTOR: 69 | print('redirector stdout') 70 | ReDirectSTD(stdout_file, 'stdout', False) 71 | 72 | """ 73 | the reason for args usage is CfgNode is immutable 74 | """ 75 | if 'WORLD_SIZE' in os.environ: 76 | args.distributed = int(os.environ['WORLD_SIZE']) > 1 77 | else: 78 | args.distributed = None 79 | 80 | args.world_size = 1 81 | args.rank = 0 # global rank 82 | 83 | if args.distributed: 84 | args.device = 'cuda:%d' % args.local_rank 85 | torch.cuda.set_device(args.local_rank) 86 | torch.distributed.init_process_group(backend='nccl', init_method='env://') 87 | args.world_size = torch.distributed.get_world_size() 88 | args.rank = torch.distributed.get_rank() 89 | print(f'use GPU{args.device} for training') 90 | print(args.world_size, args.rank) 91 | 92 | if args.local_rank == 0: 93 | print(cfg) 94 | 95 | train_tsfm, valid_tsfm = get_transform(cfg) 96 | if args.local_rank == 0: 97 | print(train_tsfm) 98 | 99 | if cfg.DATASET.TYPE == 'pedes': 100 | train_set = PedesAttr(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm, 101 | target_transform=cfg.DATASET.TARGETTRANSFORM) 102 | 103 | valid_set = PedesAttr(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm, 104 | target_transform=cfg.DATASET.TARGETTRANSFORM) 105 | elif cfg.DATASET.TYPE == 'multi_label': 106 | train_set = COCO14(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm, 107 | target_transform=cfg.DATASET.TARGETTRANSFORM) 108 | 109 | valid_set = COCO14(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm, 110 | target_transform=cfg.DATASET.TARGETTRANSFORM) 111 | if args.distributed: 112 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_set) 113 | else: 114 | train_sampler = None 115 | 116 | train_loader = DataLoader( 117 | dataset=train_set, 118 | batch_size=cfg.TRAIN.BATCH_SIZE, 119 | sampler=train_sampler, 120 | shuffle=train_sampler is None, 121 | num_workers=4, 122 | pin_memory=True, 123 | drop_last=True, 124 | ) 125 | 126 | valid_loader = DataLoader( 127 | dataset=valid_set, 128 | batch_size=cfg.TRAIN.BATCH_SIZE, 129 | shuffle=False, 130 | num_workers=4, 131 | pin_memory=True, 132 | ) 133 | 134 | if args.local_rank == 0: 135 | print('-' * 60) 136 | print(f'{cfg.DATASET.NAME} attr_num : {train_set.attr_num}, eval_attr_num : {train_set.eval_attr_num} ' 137 | f'{cfg.DATASET.TRAIN_SPLIT} set: {len(train_loader.dataset)}, ' 138 | f'{cfg.DATASET.TEST_SPLIT} set: {len(valid_loader.dataset)}, ' 139 | ) 140 | 141 | labels = train_set.label 142 | label_ratio = labels.mean(0) if cfg.LOSS.SAMPLE_WEIGHT else None 143 | 144 | backbone, c_output = build_backbone(cfg.BACKBONE.TYPE, cfg.BACKBONE.MULTISCALE) 145 | 146 | 147 | classifier = build_classifier(cfg.CLASSIFIER.NAME)( 148 | nattr=train_set.attr_num, 149 | c_in=c_output, 150 | bn=cfg.CLASSIFIER.BN, 151 | pool=cfg.CLASSIFIER.POOLING, 152 | scale =cfg.CLASSIFIER.SCALE 153 | ) 154 | 155 | model = FeatClassifier(backbone, classifier, bn_wd=cfg.TRAIN.BN_WD) 156 | if args.local_rank == 0: 157 | print(f"backbone: {cfg.BACKBONE.TYPE}, classifier: {cfg.CLASSIFIER.NAME}") 158 | print(f"model_name: {cfg.NAME}") 159 | 160 | # flops, params = get_model_complexity_info(model, (3, 256, 128), print_per_layer_stat=True) 161 | # print('{:<30} {:<8}'.format('Computational complexity: ', flops)) 162 | # print('{:<30} {:<8}'.format('Number of parameters: ', params)) 163 | 164 | model = model.cuda() 165 | if args.distributed: 166 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) 167 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank]) 168 | else: 169 | model = torch.nn.DataParallel(model) 170 | 171 | model_ema = None 172 | if cfg.TRAIN.EMA.ENABLE: 173 | # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper 174 | model_ema = ModelEmaV2( 175 | model, decay=cfg.TRAIN.EMA.DECAY, device='cpu' if cfg.TRAIN.EMA.FORCE_CPU else None) 176 | 177 | if cfg.RELOAD.TYPE: 178 | model = get_reload_weight(model_dir, model, pth=cfg.RELOAD.PTH) 179 | 180 | loss_weight = cfg.LOSS.LOSS_WEIGHT 181 | 182 | 183 | criterion = build_loss(cfg.LOSS.TYPE)( 184 | sample_weight=label_ratio, scale=cfg.CLASSIFIER.SCALE, size_sum=cfg.LOSS.SIZESUM, tb_writer=writer) 185 | criterion = criterion.cuda() 186 | 187 | if cfg.TRAIN.BN_WD: 188 | param_groups = [{'params': model.module.finetune_params(), 189 | 'lr': cfg.TRAIN.LR_SCHEDULER.LR_FT, 190 | 'weight_decay': cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY}, 191 | {'params': model.module.fresh_params(), 192 | 'lr': cfg.TRAIN.LR_SCHEDULER.LR_NEW, 193 | 'weight_decay': cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY}] 194 | else: 195 | # bn parameters are not applied with weight decay 196 | ft_params = seperate_weight_decay( 197 | model.module.finetune_params(), 198 | lr=cfg.TRAIN.LR_SCHEDULER.LR_FT, 199 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY) 200 | 201 | fresh_params = seperate_weight_decay( 202 | model.module.fresh_params(), 203 | lr=cfg.TRAIN.LR_SCHEDULER.LR_NEW, 204 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY) 205 | 206 | param_groups = ft_params + fresh_params 207 | 208 | if cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'sgd': 209 | optimizer = torch.optim.SGD(param_groups, momentum=cfg.TRAIN.OPTIMIZER.MOMENTUM) 210 | elif cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'adam': 211 | optimizer = torch.optim.Adam(param_groups) 212 | elif cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'adamw': 213 | optimizer = AdamW(param_groups) 214 | else: 215 | assert None, f'{cfg.TRAIN.OPTIMIZER.TYPE} is not implemented' 216 | 217 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau': 218 | lr_scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=4) 219 | if cfg.CLASSIFIER.BN: 220 | assert False, 'BN can not compatible with ReduceLROnPlateau' 221 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'multistep': 222 | lr_scheduler = MultiStepLR(optimizer, milestones=cfg.TRAIN.LR_SCHEDULER.LR_STEP, gamma=0.1) 223 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine': 224 | lr_scheduler = CosineAnnealingLR_with_Restart( 225 | optimizer, 226 | T_max=(cfg.TRAIN.MAX_EPOCH + 5) * len(train_loader), 227 | T_mult=1, 228 | eta_min=cfg.TRAIN.LR_SCHEDULER.LR_NEW * 0.001 229 | ) 230 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine': 231 | 232 | 233 | lr_scheduler = CosineLRScheduler( 234 | optimizer, 235 | t_initial=cfg.TRAIN.MAX_EPOCH, 236 | lr_min=1e-5, # cosine lr 最终回落的位置 237 | warmup_lr_init=1e-4, 238 | warmup_t=cfg.TRAIN.MAX_EPOCH * cfg.TRAIN.LR_SCHEDULER.WMUP_COEF, 239 | ) 240 | 241 | else: 242 | assert False, f'{cfg.LR_SCHEDULER.TYPE} has not been achieved yet' 243 | 244 | best_metric, epoch = trainer(cfg, args, epoch=cfg.TRAIN.MAX_EPOCH, 245 | model=model, model_ema=model_ema, 246 | train_loader=train_loader, 247 | valid_loader=valid_loader, 248 | criterion=criterion, 249 | optimizer=optimizer, 250 | lr_scheduler=lr_scheduler, 251 | path=save_model_path, 252 | loss_w=loss_weight, 253 | viz=visdom, 254 | tb_writer=writer) 255 | if args.local_rank == 0: 256 | print(f'{cfg.NAME}, best_metrc : {best_metric} in epoch{epoch}') 257 | 258 | 259 | def trainer(cfg, args, epoch, model, model_ema, train_loader, valid_loader, criterion, optimizer, lr_scheduler, 260 | path, loss_w, viz, tb_writer): 261 | maximum = float(-np.inf) 262 | best_epoch = 0 263 | 264 | result_list = defaultdict() 265 | 266 | result_path = path 267 | result_path = result_path.replace('ckpt_max', 'metric') 268 | result_path = result_path.replace('pth', 'pkl') 269 | 270 | for e in range(epoch): 271 | 272 | if args.distributed: 273 | train_loader.sampler.set_epoch(epoch) 274 | 275 | lr = optimizer.param_groups[1]['lr'] 276 | 277 | train_loss, train_gt, train_probs, train_imgs, train_logits, train_loss_mtr = batch_trainer( 278 | cfg, 279 | args=args, 280 | epoch=e, 281 | model=model, 282 | model_ema=model_ema, 283 | train_loader=train_loader, 284 | criterion=criterion, 285 | optimizer=optimizer, 286 | loss_w=loss_w, 287 | scheduler=lr_scheduler if cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine' else None, 288 | ) 289 | 290 | if args.distributed: 291 | if args.local_rank == 0: 292 | print("Distributing BatchNorm running means and vars") 293 | distribute_bn(model, args.world_size, args.dist_bn == 'reduce') 294 | 295 | if model_ema is not None and not cfg.TRAIN.EMA.FORCE_CPU: 296 | 297 | if args.local_rank == 0: 298 | print('using model_ema to validate') 299 | 300 | if args.distributed: 301 | distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') 302 | valid_loss, valid_gt, valid_probs, valid_imgs, valid_logits, valid_loss_mtr = valid_trainer( 303 | cfg, 304 | args=args, 305 | epoch=e, 306 | model=model_ema.module, 307 | valid_loader=valid_loader, 308 | criterion=criterion, 309 | loss_w=loss_w 310 | ) 311 | else: 312 | valid_loss, valid_gt, valid_probs, valid_imgs, valid_logits, valid_loss_mtr = valid_trainer( 313 | cfg, 314 | args=args, 315 | epoch=e, 316 | model=model, 317 | valid_loader=valid_loader, 318 | criterion=criterion, 319 | loss_w=loss_w 320 | ) 321 | 322 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau': 323 | lr_scheduler.step(metrics=valid_loss) 324 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine': 325 | lr_scheduler.step(epoch=e + 1) 326 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'multistep': 327 | lr_scheduler.step() 328 | 329 | if cfg.METRIC.TYPE == 'pedestrian': 330 | 331 | train_result = get_pedestrian_metrics(train_gt, train_probs, index=None, cfg=cfg) 332 | valid_result = get_pedestrian_metrics(valid_gt, valid_probs, index=None, cfg=cfg) 333 | 334 | if args.local_rank == 0: 335 | print(f'Evaluation on train set, train losses {train_loss}\n', 336 | 'ma: {:.4f}, label_f1: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( 337 | train_result.ma, np.mean(train_result.label_f1), 338 | np.mean(train_result.label_pos_recall), 339 | np.mean(train_result.label_neg_recall)), 340 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( 341 | train_result.instance_acc, train_result.instance_prec, train_result.instance_recall, 342 | train_result.instance_f1)) 343 | 344 | print(f'Evaluation on test set, valid losses {valid_loss}\n', 345 | 'ma: {:.4f}, label_f1: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( 346 | valid_result.ma, np.mean(valid_result.label_f1), 347 | np.mean(valid_result.label_pos_recall), 348 | np.mean(valid_result.label_neg_recall)), 349 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( 350 | valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall, 351 | valid_result.instance_f1)) 352 | 353 | print(f'{time_str()}') 354 | print('-' * 60) 355 | 356 | if args.local_rank == 0: 357 | tb_visualizer_pedes(tb_writer, lr, e, train_loss, valid_loss, train_result, valid_result, 358 | train_gt, valid_gt, train_loss_mtr, valid_loss_mtr, model, train_loader.dataset.attr_id) 359 | 360 | cur_metric = valid_result.ma 361 | if cur_metric > maximum: 362 | maximum = cur_metric 363 | best_epoch = e 364 | save_ckpt(model, path, e, maximum) 365 | 366 | result_list[e] = { 367 | 'train_result': train_result, # 'train_map': train_map, 368 | 'valid_result': valid_result, # 'valid_map': valid_map, 369 | 'train_gt': train_gt, 'train_probs': train_probs, 370 | 'valid_gt': valid_gt, 'valid_probs': valid_probs, 371 | 'train_imgs': train_imgs, 'valid_imgs': valid_imgs 372 | } 373 | 374 | elif cfg.METRIC.TYPE == 'multi_label': 375 | 376 | train_metric = get_multilabel_metrics(train_gt, train_probs) 377 | valid_metric = get_multilabel_metrics(valid_gt, valid_probs) 378 | 379 | if args.local_rank == 0: 380 | print( 381 | 'Train Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, ' 382 | 'CF1: {:.4f}'.format(train_metric.map, train_metric.OP, train_metric.OR, train_metric.OF1, 383 | train_metric.CP, train_metric.CR, train_metric.CF1)) 384 | 385 | print( 386 | 'Test Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, ' 387 | 'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP, valid_metric.OR, valid_metric.OF1, 388 | valid_metric.CP, valid_metric.CR, valid_metric.CF1)) 389 | print(f'{time_str()}') 390 | print('-' * 60) 391 | 392 | tb_writer.add_scalars('train/lr', {'lr': lr}, e) 393 | 394 | tb_writer.add_scalars('train/losses', {'train': train_loss, 395 | 'test': valid_loss}, e) 396 | 397 | tb_writer.add_scalars('train/perf', {'mAP': train_metric.map, 398 | 'OP': train_metric.OP, 399 | 'OR': train_metric.OR, 400 | 'OF1': train_metric.OF1, 401 | 'CP': train_metric.CP, 402 | 'CR': train_metric.CR, 403 | 'CF1': train_metric.CF1}, e) 404 | 405 | tb_writer.add_scalars('test/perf', {'mAP': valid_metric.map, 406 | 'OP': valid_metric.OP, 407 | 'OR': valid_metric.OR, 408 | 'OF1': valid_metric.OF1, 409 | 'CP': valid_metric.CP, 410 | 'CR': valid_metric.CR, 411 | 'CF1': valid_metric.CF1}, e) 412 | 413 | cur_metric = valid_metric.map 414 | if cur_metric > maximum: 415 | maximum = cur_metric 416 | best_epoch = e 417 | save_ckpt(model, path, e, maximum) 418 | 419 | result_list[e] = { 420 | 'train_result': train_metric, 'valid_result': valid_metric, 421 | 'train_gt': train_gt, 'train_probs': train_probs, 422 | 'valid_gt': valid_gt, 'valid_probs': valid_probs 423 | } 424 | else: 425 | assert False, f'{cfg.METRIC.TYPE} is unavailable' 426 | 427 | with open(result_path, 'wb') as f: 428 | pickle.dump(result_list, f) 429 | 430 | return maximum, best_epoch 431 | 432 | 433 | def argument_parser(): 434 | parser = argparse.ArgumentParser(description="attribute recognition", 435 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 436 | 437 | parser.add_argument( 438 | "--cfg", help="decide which cfg to use", type=str, 439 | default="./configs/pedes_baseline/pa100k.yaml", 440 | 441 | ) 442 | 443 | parser.add_argument("--debug", type=str2bool, default="true") 444 | parser.add_argument('--local_rank', help='node rank for distributed training', default=0, 445 | type=int) 446 | parser.add_argument('--dist_bn', type=str, default='', 447 | help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') 448 | 449 | args = parser.parse_args() 450 | 451 | return args 452 | 453 | 454 | if __name__ == '__main__': 455 | args = argument_parser() 456 | 457 | update_config(cfg, args) 458 | main(cfg, args) 459 | -------------------------------------------------------------------------------- /train_gpu.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # for pedestrian attribute recognition 5 | 6 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/peta.yaml 7 | 8 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/peta_zs.yaml 9 | 10 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rapv1.yaml 11 | 12 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rapv2.yaml 13 | 14 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rap_zs.yaml 15 | 16 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/pa100k.yaml 17 | 18 | # for swin transformer, change cfg.TRAIN.BATCH_SIZE: 32 19 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 --master_port=1233 train.py --cfg ./configs/pedes_baseline/pa100k.yaml 20 | 21 | 22 | 23 | 24 | # for multi-label classification 25 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 --master_port=1233 train.py --cfg ./configs/multilabel_baseline/coco.yaml 26 | 27 | 28 | --------------------------------------------------------------------------------