├── .idea
├── Rethinking_of_PAR.iml
├── deployment.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── other.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── batch_engine.py
├── configs
├── __init__.py
├── default.py
├── multilabel_baseline
│ ├── __init__.py
│ └── coco.yaml
└── pedes_baseline
│ ├── __init__.py
│ ├── pa100k.yaml
│ ├── peta.yaml
│ ├── peta_zs.yaml
│ ├── rap_zs.yaml
│ ├── rapv1.yaml
│ └── rapv2.yaml
├── data
├── COCO14
│ └── ml_anno
│ │ ├── category.json
│ │ ├── coco14_train_anno.pkl
│ │ └── coco14_val_anno.pkl
├── PA100k
│ └── dataset_all.pkl
├── PETA
│ ├── dataset_all.pkl
│ └── dataset_zs_run0.pkl
├── RAP
│ └── dataset_all.pkl
└── RAP2
│ ├── dataset_all.pkl
│ └── dataset_zs_run0.pkl
├── dataset
├── __init__.py
├── augmentation.py
├── autoaug.py
├── multi_label
│ ├── __init__.py
│ ├── coco.py
│ └── voc.py
└── pedes_attr
│ ├── __init__.py
│ ├── annotation.md
│ ├── pedes.py
│ └── preprocess
│ ├── __init__.py
│ ├── format_pa100k.py
│ ├── format_peta.py
│ ├── format_rap.py
│ └── format_rap2.py
├── docs
├── __init__.py
└── illus_zs.png
├── infer.py
├── losses
├── __init__.py
├── bceloss.py
├── label_smoothing.py
└── scaledbceloss.py
├── metrics
├── __init__.py
├── ml_metrics.py
└── pedestrian_metrics.py
├── models
├── __init__.py
├── backbone
│ ├── __init__.py
│ ├── bninception.py
│ ├── checkpoints
│ │ └── __init__.py
│ ├── resnet.py
│ ├── resnet_ibn.py
│ ├── swin_transformer.py
│ ├── tresnet
│ │ ├── __init__.py
│ │ ├── layers
│ │ │ ├── __init__.py
│ │ │ ├── anti_aliasing.py
│ │ │ ├── avg_pool.py
│ │ │ └── general_layers.py
│ │ └── tresnet.py
│ └── vit.py
├── base_block.py
├── model_ema.py
├── model_factory.py
└── registry.py
├── optim
├── __init__.py
└── adamw.py
├── requirements.txt
├── scheduler
├── __init__.py
├── cos_annealing_with_restart.py
├── cosine_lr.py
└── scheduler.py
├── tools
├── __init__.py
├── distributed.py
├── function.py
├── utils.py
└── vis.py
├── train.py
└── train_gpu.sh
/.idea/Rethinking_of_PAR.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting (official Pytorch implementation)
2 |
3 | 
4 | _This [paper](https://arxiv.org/abs/2107.03576) submitted to TIP is the extension of the previous [Arxiv paper](https://arxiv.org/abs/2005.11909)._
5 |
6 | This project is adopted in the [JDAI-CV/fast-reid](https://github.com/JDAI-CV/fast-reid/tree/master/projects/FastAttr) and [PP-Human](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/pphuman/docs/attribute.md) of PaddleDetection.
7 |
8 | #### This project aims to
9 | 1. provide a strong baseline for __Pedestrian Attribute Recognition__ and __Multi-Label Classification__.
10 | 2. provide two new datasets RAPzs and PETAzs following zero-shot pedestrian identity setting.
11 | 3. provide a general training pipeline for pedestrian attribute recognition and multi-label classification task.
12 |
13 | #### This project provide
14 | 1. DDP training, which is mainly used for multi-label classifition.
15 | 2. Training on all attributes, testing on "selected" attribute. Because the proportion of positive samples for other attributes is less than a threshold, such as 0.01.
16 | 1. For PETA and PETAzs, 35 of the 105 attributes are selected for performance evaluation.
17 | 2. For RAPv1, 51 of the 92 attributes are selected for performance evaluation.
18 | 3. For RAPv2 and RAPzs, 54 and 53 of the 152 attributes are selected for performance evaluation.
19 | 4. For PA100k, all attributes are selected for performance evaluation.
20 | - However, training on all attributes _can not bring_ consistent performance improvement on various datasets.
21 | 3. EMA model.
22 | 4. Transformer-base model, such as swin-transformer (with a huge performance improvement) and vit.
23 | 5. Convenient dataset info file like dataset_all.pkl
24 |
25 |
26 | ## Dataset Info
27 | - PETA: Pedestrian Attribute Recognition At Far Distance [[Paper](http://mmlab.ie.cuhk.edu.hk/projects/PETA_files/Pedestrian%20Attribute%20Recognition%20At%20Far%20Distance.pdf)][[Project](http://mmlab.ie.cuhk.edu.hk/projects/PETA.html)]
28 |
29 | - PA100K[[Paper](http://openaccess.thecvf.com/content_ICCV_2017/papers/Liu_HydraPlus-Net_Attentive_Deep_ICCV_2017_paper.pdf)][[Github](https://github.com/xh-liu/HydraPlus-Net)]
30 |
31 | - RAP : A Richly Annotated Dataset for Pedestrian Attribute Recognition
32 | - v1 [[Paper](https://arxiv.org/pdf/1603.07054v3.pdf)][[Project](http://www.rapdataset.com/)]
33 | - v2 [[Paper](https://ieeexplore.ieee.org/abstract/document/8510891)][[Project](http://www.rapdataset.com/)]
34 |
35 | - PETAzs & RAPzs : Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting [Paper](https://arxiv.org/abs/2107.03576) [[Project](http://www.rapdataset.com/)]
36 |
37 |
38 | ## Performance
39 |
40 | ### Pedestrian Attribute Recognition
41 |
42 | |Datasets|Models|ma|Acc|Prec|Rec|F1|
43 | |:------:|:---:|---|---|---|---|---|
44 | |PA100k|resnet50|80.21|79.15|87.79|87.01|87.40|
45 | |--|resnet50*|79.85|79.13|89.45|85.40|87.38|
46 | |--|resnet50 + EMA|81.97|80.20|88.06|88.17|88.11|
47 | |--|bninception|79.13|78.19|87.42|86.21|86.81|
48 | |--|TresnetM|74.46|68.72|79.82|80.71|80.26|
49 | |--|swin_s|82.19|80.35|87.85|88.51|88.18|
50 | |--|vit_s|79.40|77.61|86.41|86.22|86.32|
51 | |--|vit_b|81.01|79.38|87.60|87.49|87.55|
52 | |PETA|resnet50|83.96|78.65|87.08|85.62|86.35|
53 | |PETAzs|resnet50|71.43|58.69|74.41|69.82|72.04|
54 | |RAPv1|resnet50|79.27|67.98|80.19|79.71|79.95|
55 | |RAPv2|resnet50|78.52|66.09|77.20|80.23|78.68|
56 | |RAPzs|resnet50|71.76|64.83|78.75|76.60|77.66|
57 |
58 | - The resnet* model is trained by using the weighted function proposed by Tan in [AAAI2020](https://ojs.aaai.org/index.php/AAAI/article/view/6883).
59 | - Performance in PETAzs and RAPzs based on the first version of PETAzs and RAPzs as described in [paper](https://arxiv.org/abs/2107.03576).
60 | - Experiments are conducted on the input size of (256, 192), so there may be minor differences from the results in the paper.
61 | - The reported performance can be achieved at the first drop of learning rate. We also take this model as the best model.
62 | - Pretrained models are provided now at [Google Drive]().
63 |
64 | ### Multi-label Classification
65 |
66 | |Datasets|Models|mAP|CP|CR|CF1|OP|OR|OF1|
67 | |--------|---|---|---|---|---|---|---|---|
68 | |COCO|resnet101|82.75|84.17|72.07|77.65|85.16|75.47|80.02|
69 |
70 |
71 | ## Pretrained Models
72 |
73 |
74 |
75 |
76 | ## Dependencies
77 |
78 | - python 3.7
79 | - pytorch 1.7.0
80 | - torchvision 0.8.2
81 | - cuda 10.1
82 |
83 |
84 | ## Get Started
85 | 1. Run `git clone https://github.com/valencebond/Rethinking_of_PAR.git`
86 | 2. Create a directory to dowload above datasets.
87 | ```
88 | cd Rethinking_of_PAR
89 | mkdir data
90 | ```
91 | 3. Prepare datasets to have following structure:
92 | ```
93 | ${project_dir}/data
94 | PETA
95 | images/
96 | PETA.mat
97 | dataset_all.pkl
98 | dataset_zs_run0.pkl
99 | PA100k
100 | data/
101 | dataset_all.pkl
102 | RAP
103 | RAP_dataset/
104 | RAP_annotation/
105 | dataset_all.pkl
106 | RAP2
107 | RAP_dataset/
108 | RAP_annotation/
109 | dataset_zs_run0.pkl
110 | COCO14
111 | train2014/
112 | val2014/
113 | ml_anno/
114 | category.json
115 | coco14_train_anno.pkl
116 | coco14_val_anno.pkl
117 | ```
118 | 4. Train baseline based on resnet50
119 | ```
120 | sh train.sh
121 | ```
122 |
123 | ## Acknowledgements
124 |
125 | Codes are based on the repository from [Dangwei Li](https://github.com/dangweili/pedestrian-attribute-recognition-pytorch)
126 | and [Houjing Huang](https://github.com/dangweili/pedestrian-attribute-recognition-pytorch). Thanks for their released code.
127 |
128 |
129 | ### Citation
130 |
131 | If you use this method or this code in your research, please cite as:
132 |
133 | @article{jia2021rethinking,
134 | title={Rethinking of Pedestrian Attribute Recognition: A Reliable Evaluation under Zero-Shot Pedestrian Identity Setting},
135 | author={Jia, Jian and Huang, Houjing and Chen, Xiaotang and Huang, Kaiqi},
136 | journal={arXiv preprint arXiv:2107.03576},
137 | year={2021}
138 | }
139 |
140 | @inproceedings{jia2021spatial,
141 | title={Spatial and semantic consistency regularizations for pedestrian attribute recognition},
142 | author={Jia, Jian and Chen, Xiaotang and Huang, Kaiqi},
143 | booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
144 | pages={962--971},
145 | year={2021}
146 | }
147 |
148 |
149 |
150 |
--------------------------------------------------------------------------------
/batch_engine.py:
--------------------------------------------------------------------------------
1 | import math
2 | import time
3 |
4 | import numpy as np
5 | import torch
6 | from torch.nn.utils import clip_grad_norm_
7 | from tqdm import tqdm
8 |
9 | from tools.distributed import reduce_tensor
10 | from tools.utils import AverageMeter, to_scalar, time_str
11 |
12 |
13 | def logits4pred(criterion, logits_list):
14 | if criterion.__class__.__name__.lower() in ['bceloss']:
15 | logits = logits_list[0]
16 | probs = logits.sigmoid()
17 | else:
18 | assert False, f"{criterion.__class__.__name__.lower()} not exits"
19 |
20 | return probs, logits
21 |
22 |
23 | def batch_trainer(cfg, args, epoch, model, model_ema, train_loader, criterion, optimizer, loss_w=[1, ], scheduler=None):
24 | model.train()
25 | epoch_time = time.time()
26 |
27 | loss_meter = AverageMeter()
28 | subloss_meters = [AverageMeter() for i in range(len(loss_w))]
29 |
30 | batch_num = len(train_loader)
31 | gt_list = []
32 | preds_probs = []
33 | preds_logits = []
34 | imgname_list = []
35 | loss_mtr_list = []
36 |
37 | lr = optimizer.param_groups[1]['lr']
38 |
39 | for step, (imgs, gt_label, imgname) in enumerate(train_loader):
40 | iter_num = epoch * len(train_loader) + step
41 |
42 | batch_time = time.time()
43 | imgs, gt_label = imgs.cuda(), gt_label.cuda()
44 | train_logits, feat = model(imgs, gt_label)
45 |
46 |
47 | loss_list, loss_mtr = criterion(train_logits, gt_label)
48 |
49 | train_loss = 0
50 |
51 | for i, l in enumerate(loss_w):
52 | train_loss += loss_list[i] * l
53 |
54 | optimizer.zero_grad()
55 | train_loss.backward()
56 |
57 | # for name, param in model.named_parameters():
58 | # if param.grad is None:
59 | # print("NO " + name)
60 | # else:
61 | # print("YES " + name)
62 |
63 | if cfg.TRAIN.CLIP_GRAD:
64 | clip_grad_norm_(model.parameters(), max_norm=10.0) # make larger learning rate works
65 |
66 | optimizer.step()
67 |
68 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine' and scheduler is not None:
69 | scheduler.step()
70 |
71 | if model_ema is not None:
72 | model_ema.update(model)
73 |
74 | torch.cuda.synchronize()
75 |
76 | if len(loss_list) > 1:
77 | for i, meter in enumerate(subloss_meters):
78 | meter.update(
79 | to_scalar(reduce_tensor(loss_list[i], args.world_size)
80 | if args.distributed else loss_list[i]))
81 | loss_meter.update(to_scalar(reduce_tensor(train_loss, args.world_size) if args.distributed else train_loss))
82 |
83 | train_probs, train_logits = logits4pred(criterion, train_logits)
84 |
85 | gt_list.append(gt_label.cpu().numpy())
86 | preds_probs.append(train_probs.detach().cpu().numpy())
87 | preds_logits.append(train_logits.detach().cpu().numpy())
88 |
89 | imgname_list.append(imgname)
90 |
91 | log_interval = 100
92 |
93 | if (step + 1) % log_interval == 0 or (step + 1) % len(train_loader) == 0:
94 | if args.local_rank == 0:
95 | print(f'{time_str()}, '
96 | f'Step {step}/{batch_num} in Ep {epoch}, '
97 | f'LR: [{optimizer.param_groups[0]["lr"]:.1e}, {optimizer.param_groups[1]["lr"]:.1e}] '
98 | f'Time: {time.time() - batch_time:.2f}s , '
99 | f'train_loss: {loss_meter.avg:.4f}, ')
100 |
101 | print([f'{meter.avg:.4f}' for meter in subloss_meters])
102 |
103 | # break
104 |
105 | train_loss = loss_meter.avg
106 |
107 | gt_label = np.concatenate(gt_list, axis=0)
108 | preds_probs = np.concatenate(preds_probs, axis=0)
109 |
110 | if args.local_rank == 0:
111 | print(f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}')
112 |
113 | return train_loss, gt_label, preds_probs, imgname_list, preds_logits, loss_mtr_list
114 |
115 |
116 | def valid_trainer(cfg, args, epoch, model, valid_loader, criterion, loss_w=[1, ]):
117 | model.eval()
118 | loss_meter = AverageMeter()
119 | subloss_meters = [AverageMeter() for i in range(len(loss_w))]
120 |
121 | preds_probs = []
122 | preds_logits = []
123 | gt_list = []
124 | imgname_list = []
125 | loss_mtr_list = []
126 |
127 | with torch.no_grad():
128 | for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
129 | imgs = imgs.cuda()
130 | gt_label = gt_label.cuda()
131 | gt_list.append(gt_label.cpu().numpy())
132 | gt_label[gt_label == -1] = 0
133 | valid_logits, feat = model(imgs, gt_label)
134 |
135 |
136 | loss_list, loss_mtr = criterion(valid_logits, gt_label)
137 | valid_loss = 0
138 | for i, l in enumerate(loss_list):
139 | valid_loss += loss_w[i] * l
140 |
141 | valid_probs, valid_logits = logits4pred(criterion, valid_logits)
142 | preds_probs.append(valid_probs.cpu().numpy())
143 | preds_logits.append(valid_logits.cpu().numpy())
144 |
145 | if len(loss_list) > 1:
146 | for i, meter in enumerate(subloss_meters):
147 | meter.update(
148 | to_scalar(reduce_tensor(loss_list[i], args.world_size) if args.distributed else loss_list[i]))
149 | loss_meter.update(to_scalar(reduce_tensor(valid_loss, args.world_size) if args.distributed else valid_loss))
150 |
151 | torch.cuda.synchronize()
152 |
153 | imgname_list.append(imgname)
154 |
155 | valid_loss = loss_meter.avg
156 |
157 | if args.local_rank == 0:
158 | print([f'{meter.avg:.4f}' for meter in subloss_meters])
159 |
160 | gt_label = np.concatenate(gt_list, axis=0)
161 | preds_probs = np.concatenate(preds_probs, axis=0)
162 | preds_logits = np.concatenate(preds_logits, axis=0)
163 |
164 | return valid_loss, gt_label, preds_probs, imgname_list, preds_logits, loss_mtr_list
165 |
--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import _C as cfg
2 | from .default import update_config
--------------------------------------------------------------------------------
/configs/default.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from yacs.config import CfgNode as CN
6 |
7 | _C = CN()
8 |
9 |
10 | # ----- BASIC SETTINGS -----
11 | _C.NAME = "default"
12 | _C.REDIRECTOR = True
13 |
14 | _C.RELOAD = CN()
15 | _C.RELOAD.TYPE = False
16 | _C.RELOAD.NAME = 'backbone'
17 | _C.RELOAD.PTH = ''
18 |
19 |
20 | _C.DISTRIBUTTED = False
21 |
22 | # ----- DATASET BUILDER -----
23 | _C.DATASET = CN()
24 | _C.DATASET.TYPE = "pedes"
25 | _C.DATASET.NAME = "PA100k"
26 | _C.DATASET.TARGETTRANSFORM = []
27 | _C.DATASET.ZERO_SHOT = False
28 | _C.DATASET.LABEL = 'eval' # train on all labels, test on part labels (35 for peta, 51 for rap)
29 | _C.DATASET.TRAIN_SPLIT = 'trainval'
30 | _C.DATASET.VAL_SPLIT = 'val'
31 | _C.DATASET.TEST_SPLIT = 'test'
32 | _C.DATASET.HEIGHT = 256
33 | _C.DATASET.WIDTH = 192
34 |
35 | # ----- BACKBONE BUILDER -----
36 | _C.BACKBONE = CN()
37 | _C.BACKBONE.TYPE = "resnet50"
38 | _C.BACKBONE.MULTISCALE = False
39 |
40 | # ----- MODULE BUILDER -----
41 | # _C.MODULE = CN()
42 | # _C.MODULE.TYPE = "GAP"
43 |
44 | # ----- CLASSIFIER BUILDER -----
45 | _C.CLASSIFIER = CN()
46 | _C.CLASSIFIER.TYPE = "base"
47 | _C.CLASSIFIER.NAME = ""
48 | _C.CLASSIFIER.POOLING = "avg"
49 | _C.CLASSIFIER.BN = False
50 | _C.CLASSIFIER.SCALE = 1
51 |
52 | # ----- TRAIN BUILDER -----
53 | _C.TRAIN = CN()
54 | _C.TRAIN.BATCH_SIZE = 64
55 | _C.TRAIN.MAX_EPOCH = 30
56 | _C.TRAIN.SHUFFLE = True
57 | _C.TRAIN.NUM_WORKERS = 4
58 | _C.TRAIN.CLIP_GRAD = False
59 | _C.TRAIN.BN_WD = True
60 |
61 | _C.TRAIN.DATAAUG = CN()
62 | _C.TRAIN.DATAAUG.TYPE = 'base'
63 | _C.TRAIN.DATAAUG.AUTOAUG_PROB = 0.5
64 |
65 | _C.TRAIN.EMA = CN()
66 | _C.TRAIN.EMA.ENABLE = False
67 | _C.TRAIN.EMA.DECAY = 0.9998
68 | _C.TRAIN.EMA.FORCE_CPU = False
69 |
70 | _C.TRAIN.OPTIMIZER = CN()
71 | _C.TRAIN.OPTIMIZER.TYPE = "SGD"
72 | _C.TRAIN.OPTIMIZER.MOMENTUM = 0.9
73 | _C.TRAIN.OPTIMIZER.WEIGHT_DECAY = 1e-4
74 |
75 | _C.TRAIN.LR_SCHEDULER = CN()
76 | _C.TRAIN.LR_SCHEDULER.TYPE = "plateau"
77 | _C.TRAIN.LR_SCHEDULER.LR_STEP = [0,]
78 | _C.TRAIN.LR_SCHEDULER.LR_FT = 1e-2
79 | _C.TRAIN.LR_SCHEDULER.LR_NEW = 1e-2
80 | _C.TRAIN.LR_SCHEDULER.WMUP_COEF = 0.1
81 |
82 |
83 | _C.TRAIN.AUX_LOSS_START = -1
84 |
85 | # ----- INFER BUILDER -----
86 |
87 | _C.INFER = CN()
88 | _C.INFER.SAMPLING = False
89 |
90 | # ----- LOSS BUILDER -----
91 | _C.LOSS = CN()
92 | _C.LOSS.TYPE = "bce"
93 | _C.LOSS.SAMPLE_WEIGHT = "" # None
94 | _C.LOSS.LOSS_WEIGHT = [1, ]
95 | _C.LOSS.SIZESUM = True # for a sample, BCE losses is the summation of all label instead of the average.
96 |
97 | _C.METRIC = CN()
98 | _C.METRIC.TYPE = 'pedestrian'
99 |
100 | # ------ visualization ---------
101 | _C.VIS = CN()
102 | _C.VIS.CAM = 'valid'
103 | _C.VIS.TENSORBOARD = CN()
104 | _C.VIS.TENSORBOARD.ENABLE = True
105 |
106 | _C.VIS.VISDOM = False
107 |
108 |
109 | # ----------- Transformer -------------
110 | _C.TRANS = CN()
111 | _C.TRANS.DIM_HIDDEN = 256
112 | _C.TRANS.DROPOUT = 0.1
113 | _C.TRANS.NHEADS = 8
114 | _C.TRANS.DIM_FFD = 2048
115 | _C.TRANS.ENC_LAYERS = 6
116 | _C.TRANS.DEC_LAYERS = 6
117 | _C.TRANS.PRE_NORM = False
118 | _C.TRANS.EOS_COEF = 0.1
119 | _C.TRANS.NUM_QUERIES = 100
120 |
121 |
122 | # testing
123 | # _C.TEST = CN()
124 | # _C.TEST.BATCH_SIZE = 32
125 | # _C.TEST.NUM_WORKERS = 8
126 | # _C.TEST.MODEL_FILE = ""
127 | #
128 | # _C.TRANSFORMS = CN()
129 | # _C.TRANSFORMS.TRAIN_TRANSFORMS = ("random_resized_crop", "random_horizontal_flip")
130 | # _C.TRANSFORMS.TEST_TRANSFORMS = ("shorter_resize_for_crop", "center_crop")
131 | #
132 | # _C.TRANSFORMS.PROCESS_DETAIL = CN()
133 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_CROP = CN()
134 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_CROP.PADDING = 4
135 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP = CN()
136 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP.SCALE = (0.08, 1.0)
137 | # _C.TRANSFORMS.PROCESS_DETAIL.RANDOM_RESIZED_CROP.RATIO = (0.75, 1.333333333)
138 |
139 |
140 | def update_config(cfg, args):
141 | cfg.defrost()
142 |
143 | cfg.merge_from_file(args.cfg) # update cfg
144 | # cfg.merge_from_list(args.opts)
145 |
146 | cfg.freeze()
147 |
--------------------------------------------------------------------------------
/configs/multilabel_baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/configs/multilabel_baseline/__init__.py
--------------------------------------------------------------------------------
/configs/multilabel_baseline/coco.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet101.sgd.bt32'
2 |
3 | DATASET:
4 | TYPE: 'multi_label'
5 | NAME: 'coco14'
6 | TRAIN_SPLIT: 'train'
7 | VAL_SPLIT: 'val'
8 | HEIGHT: 448
9 | WIDTH: 448
10 |
11 | METRIC:
12 | TYPE: 'multi_label'
13 |
14 | RELOAD:
15 | TYPE: False
16 | NAME: 'backbone'
17 |
18 | BACKBONE:
19 | TYPE: 'resnet101'
20 | MULTISCALE: False
21 |
22 | CLASSIFIER:
23 | NAME: 'linear'
24 | POOLING: 'max'
25 | SCALE: 1
26 | BN: False
27 |
28 | LOSS:
29 | TYPE: 'bceloss'
30 | LOSS_WEIGHT: [1]
31 | SIZESUM: True
32 |
33 | TRAIN:
34 | BN_WD: True
35 | BATCH_SIZE: 16
36 | CLIP_GRAD: True
37 | MAX_EPOCH: 30
38 | OPTIMIZER:
39 | TYPE: 'SGD'
40 | MOMENTUM: 0.9
41 | WEIGHT_DECAY: 1e-4
42 | LR_SCHEDULER:
43 | TYPE: 'multistep'
44 | LR_STEP: [ 14, ]
45 | LR_FT: 0.0005
46 | LR_NEW: 0.005
47 | EMA:
48 | ENABLE: False
49 |
50 | VIS:
51 | CAM: 'train'
52 |
53 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/configs/pedes_baseline/__init__.py
--------------------------------------------------------------------------------
/configs/pedes_baseline/pa100k.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'PA100k'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: False
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 | # TYPE: 'bninception'
20 | # TYPE: 'tresnetM'
21 | # TYPE: 'swin_s'
22 | # TYPE: 'vit_s'
23 | # TYPE: 'vit_b'
24 |
25 | CLASSIFIER:
26 | NAME: 'linear'
27 | POOLING: 'avg'
28 | SCALE: 1
29 | BN: False
30 |
31 | LOSS:
32 | TYPE: 'bceloss'
33 | LOSS_WEIGHT: [1]
34 | SAMPLE_WEIGHT: 'weight'
35 |
36 |
37 | TRAIN:
38 | CLIP_GRAD: True
39 | BATCH_SIZE: 64
40 | OPTIMIZER:
41 | TYPE: 'adam'
42 | WEIGHT_DECAY: 5e-4
43 | LR_SCHEDULER:
44 | TYPE: 'plateau'
45 | LR_FT: 0.0001
46 | LR_NEW: 0.0001
47 | EMA:
48 | ENABLE: True
49 |
50 | METRIC:
51 | TYPE: 'pedestrian'
52 |
53 | VIS:
54 | CAM: 'valid'
55 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/peta.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'PETA'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: False
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 |
20 | CLASSIFIER:
21 | NAME: 'linear'
22 | POOLING: 'avg'
23 | SCALE: 1
24 | BN: False
25 |
26 | LOSS:
27 | TYPE: 'bceloss'
28 | LOSS_WEIGHT: [1]
29 | SAMPLE_WEIGHT: 'weight'
30 |
31 |
32 | TRAIN:
33 | CLIP_GRAD: True
34 | BATCH_SIZE: 64
35 | OPTIMIZER:
36 | TYPE: 'adam'
37 | WEIGHT_DECAY: 5e-4
38 | LR_SCHEDULER:
39 | TYPE: 'plateau'
40 | LR_FT: 0.0001
41 | LR_NEW: 0.0001
42 | EMA:
43 | ENABLE: False
44 |
45 | METRIC:
46 | TYPE: 'pedestrian'
47 |
48 | VIS:
49 | CAM: 'valid'
50 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/peta_zs.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'PETA'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: True
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 |
20 | CLASSIFIER:
21 | NAME: 'linear'
22 | POOLING: 'avg'
23 | SCALE: 1
24 | BN: False
25 |
26 | LOSS:
27 | TYPE: 'bceloss'
28 | LOSS_WEIGHT: [1]
29 | SAMPLE_WEIGHT: 'weight'
30 |
31 |
32 | TRAIN:
33 | CLIP_GRAD: True
34 | BATCH_SIZE: 64
35 | OPTIMIZER:
36 | TYPE: 'adam'
37 | WEIGHT_DECAY: 5e-4
38 | LR_SCHEDULER:
39 | TYPE: 'plateau'
40 | LR_FT: 0.0001
41 | LR_NEW: 0.0001
42 | EMA:
43 | ENABLE: False
44 |
45 | METRIC:
46 | TYPE: 'pedestrian'
47 |
48 | VIS:
49 | CAM: 'valid'
50 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/rap_zs.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'RAP2'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: True
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 |
20 | CLASSIFIER:
21 | NAME: 'linear'
22 | POOLING: 'avg'
23 | SCALE: 1
24 | BN: False
25 |
26 | LOSS:
27 | TYPE: 'bceloss'
28 | LOSS_WEIGHT: [1]
29 | SAMPLE_WEIGHT: 'weight'
30 |
31 |
32 | TRAIN:
33 | CLIP_GRAD: True
34 | BATCH_SIZE: 64
35 | OPTIMIZER:
36 | TYPE: 'adam'
37 | WEIGHT_DECAY: 5e-4
38 | LR_SCHEDULER:
39 | TYPE: 'plateau'
40 | LR_FT: 0.0001
41 | LR_NEW: 0.0001
42 | EMA:
43 | ENABLE: False
44 |
45 | METRIC:
46 | TYPE: 'pedestrian'
47 |
48 | VIS:
49 | CAM: 'valid'
50 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/rapv1.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'RAP'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: False
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 |
20 | CLASSIFIER:
21 | NAME: 'linear'
22 | POOLING: 'avg'
23 | SCALE: 1
24 | BN: False
25 |
26 | LOSS:
27 | TYPE: 'bceloss'
28 | LOSS_WEIGHT: [1]
29 | SAMPLE_WEIGHT: 'weight'
30 |
31 |
32 | TRAIN:
33 | CLIP_GRAD: True
34 | BATCH_SIZE: 64
35 | OPTIMIZER:
36 | TYPE: 'adam'
37 | WEIGHT_DECAY: 5e-4
38 | LR_SCHEDULER:
39 | TYPE: 'plateau'
40 | LR_FT: 0.0001
41 | LR_NEW: 0.0001
42 | EMA:
43 | ENABLE: False
44 |
45 | METRIC:
46 | TYPE: 'pedestrian'
47 |
48 | VIS:
49 | CAM: 'valid'
50 |
--------------------------------------------------------------------------------
/configs/pedes_baseline/rapv2.yaml:
--------------------------------------------------------------------------------
1 | NAME: 'resnet50.base.adam'
2 |
3 | DATASET:
4 | TYPE: 'pedes'
5 | NAME: 'RAP2'
6 | TRAIN_SPLIT: 'trainval'
7 | VAL_SPLIT: 'test'
8 | ZERO_SHOT: False
9 | LABEL: 'eval'
10 | HEIGHT: 256
11 | WIDTH: 192
12 |
13 | RELOAD:
14 | TYPE: False
15 | NAME: 'backbone'
16 |
17 | BACKBONE:
18 | TYPE: 'resnet50'
19 |
20 | CLASSIFIER:
21 | NAME: 'linear'
22 | POOLING: 'avg'
23 | SCALE: 1
24 | BN: False
25 |
26 | LOSS:
27 | TYPE: 'bceloss'
28 | LOSS_WEIGHT: [1]
29 | SAMPLE_WEIGHT: 'weight'
30 |
31 |
32 | TRAIN:
33 | CLIP_GRAD: True
34 | BATCH_SIZE: 64
35 | OPTIMIZER:
36 | TYPE: 'adam'
37 | WEIGHT_DECAY: 5e-4
38 | LR_SCHEDULER:
39 | TYPE: 'plateau'
40 | LR_FT: 0.0001
41 | LR_NEW: 0.0001
42 | EMA:
43 | ENABLE: False
44 |
45 | METRIC:
46 | TYPE: 'pedestrian'
47 |
48 | VIS:
49 | CAM: 'valid'
50 |
--------------------------------------------------------------------------------
/data/COCO14/ml_anno/category.json:
--------------------------------------------------------------------------------
1 | {"airplane": 0, "apple": 1, "backpack": 2, "banana": 3, "baseball bat": 4, "baseball glove": 5, "bear": 6, "bed": 7, "bench": 8, "bicycle": 9, "bird": 10, "boat": 11, "book": 12, "bottle": 13, "bowl": 14, "broccoli": 15, "bus": 16, "cake": 17, "car": 18, "carrot": 19, "cat": 20, "cell phone": 21, "chair": 22, "clock": 23, "couch": 24, "cow": 25, "cup": 26, "dining table": 27, "dog": 28, "donut": 29, "elephant": 30, "fire hydrant": 31, "fork": 32, "frisbee": 33, "giraffe": 34, "hair drier": 35, "handbag": 36, "horse": 37, "hot dog": 38, "keyboard": 39, "kite": 40, "knife": 41, "laptop": 42, "microwave": 43, "motorcycle": 44, "mouse": 45, "orange": 46, "oven": 47, "parking meter": 48, "person": 49, "pizza": 50, "potted plant": 51, "refrigerator": 52, "remote": 53, "sandwich": 54, "scissors": 55, "sheep": 56, "sink": 57, "skateboard": 58, "skis": 59, "snowboard": 60, "spoon": 61, "sports ball": 62, "stop sign": 63, "suitcase": 64, "surfboard": 65, "teddy bear": 66, "tennis racket": 67, "tie": 68, "toaster": 69, "toilet": 70, "toothbrush": 71, "traffic light": 72, "train": 73, "truck": 74, "tv": 75, "umbrella": 76, "vase": 77, "wine glass": 78, "zebra": 79}
--------------------------------------------------------------------------------
/data/COCO14/ml_anno/coco14_train_anno.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/COCO14/ml_anno/coco14_train_anno.pkl
--------------------------------------------------------------------------------
/data/COCO14/ml_anno/coco14_val_anno.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/COCO14/ml_anno/coco14_val_anno.pkl
--------------------------------------------------------------------------------
/data/PA100k/dataset_all.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PA100k/dataset_all.pkl
--------------------------------------------------------------------------------
/data/PETA/dataset_all.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PETA/dataset_all.pkl
--------------------------------------------------------------------------------
/data/PETA/dataset_zs_run0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/PETA/dataset_zs_run0.pkl
--------------------------------------------------------------------------------
/data/RAP/dataset_all.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP/dataset_all.pkl
--------------------------------------------------------------------------------
/data/RAP2/dataset_all.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP2/dataset_all.pkl
--------------------------------------------------------------------------------
/data/RAP2/dataset_zs_run0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/data/RAP2/dataset_zs_run0.pkl
--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/__init__.py
--------------------------------------------------------------------------------
/dataset/augmentation.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import numpy as np
4 | import torchvision.transforms as T
5 | from PIL import Image
6 |
7 | from dataset.autoaug import AutoAugment
8 |
9 |
10 | class MultiScaleCrop(object):
11 |
12 | def __init__(self, input_size, scales=None, max_distort=1, fix_crop=True, more_fix_crop=True):
13 | self.scales = scales if scales is not None else [1, 875, .75, .66]
14 | self.max_distort = max_distort
15 | self.fix_crop = fix_crop
16 | self.more_fix_crop = more_fix_crop
17 | self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size]
18 | self.interpolation = Image.BILINEAR
19 |
20 | def __call__(self, img):
21 | im_size = img.size
22 | crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
23 | crop_img_group = img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h))
24 | ret_img_group = crop_img_group.resize((self.input_size[0], self.input_size[1]), self.interpolation)
25 | return ret_img_group
26 |
27 | def _sample_crop_size(self, im_size):
28 | image_w, image_h = im_size[0], im_size[1]
29 |
30 | # find a crop size
31 | base_size = min(image_w, image_h)
32 | crop_sizes = [int(base_size * x) for x in self.scales]
33 | crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes]
34 | crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes]
35 |
36 | pairs = []
37 | for i, h in enumerate(crop_h):
38 | for j, w in enumerate(crop_w):
39 | if abs(i - j) <= self.max_distort:
40 | pairs.append((w, h))
41 |
42 | crop_pair = random.choice(pairs)
43 | if not self.fix_crop:
44 | w_offset = random.randint(0, image_w - crop_pair[0])
45 | h_offset = random.randint(0, image_h - crop_pair[1])
46 | else:
47 | w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1])
48 |
49 | return crop_pair[0], crop_pair[1], w_offset, h_offset
50 |
51 | def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
52 | offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h)
53 | return random.choice(offsets)
54 |
55 | @staticmethod
56 | def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
57 | w_step = (image_w - crop_w) // 4
58 | h_step = (image_h - crop_h) // 4
59 |
60 | ret = list()
61 | ret.append((0, 0)) # upper left
62 | ret.append((4 * w_step, 0)) # upper right
63 | ret.append((0, 4 * h_step)) # lower left
64 | ret.append((4 * w_step, 4 * h_step)) # lower right
65 | ret.append((2 * w_step, 2 * h_step)) # center
66 |
67 | if more_fix_crop:
68 | ret.append((0, 2 * h_step)) # center left
69 | ret.append((4 * w_step, 2 * h_step)) # center right
70 | ret.append((2 * w_step, 4 * h_step)) # lower center
71 | ret.append((2 * w_step, 0 * h_step)) # upper center
72 |
73 | ret.append((1 * w_step, 1 * h_step)) # upper left quarter
74 | ret.append((3 * w_step, 1 * h_step)) # upper right quarter
75 | ret.append((1 * w_step, 3 * h_step)) # lower left quarter
76 | ret.append((3 * w_step, 3 * h_step)) # lower righ quarter
77 |
78 | return ret
79 |
80 | def __str__(self):
81 | return self.__class__.__name__
82 |
83 |
84 |
85 | def get_transform(cfg):
86 | height = cfg.DATASET.HEIGHT
87 | width = cfg.DATASET.WIDTH
88 | normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
89 |
90 | if cfg.DATASET.TYPE == 'pedes':
91 |
92 | train_transform = T.Compose([
93 | T.Resize((height, width)),
94 | T.Pad(10),
95 | T.RandomCrop((height, width)),
96 | T.RandomHorizontalFlip(),
97 | T.ToTensor(),
98 | normalize,
99 | ])
100 |
101 | valid_transform = T.Compose([
102 | T.Resize((height, width)),
103 | T.ToTensor(),
104 | normalize
105 | ])
106 |
107 | elif cfg.DATASET.TYPE == 'multi_label':
108 |
109 | valid_transform = T.Compose([
110 | T.Resize([height, width]),
111 | T.ToTensor(),
112 | normalize,
113 | ])
114 |
115 | if cfg.TRAIN.DATAAUG.TYPE == 'autoaug':
116 | train_transform = T.Compose([
117 | T.RandomApply([AutoAugment()], p=cfg.TRAIN.DATAAUG.AUTOAUG_PROB),
118 | T.Resize((height, width), interpolation=3),
119 | T.RandomHorizontalFlip(),
120 | T.ToTensor(),
121 | ])
122 |
123 | else:
124 | train_transform = T.Compose([
125 | T.Resize((height + 64, width + 64)),
126 | MultiScaleCrop(height, scales=(1.0, 0.875, 0.75, 0.66, 0.5), max_distort=2),
127 | T.RandomHorizontalFlip(),
128 | T.ToTensor(),
129 | normalize
130 | ])
131 | else:
132 |
133 | assert False, 'xxxxxx'
134 |
135 | return train_transform, valid_transform
136 |
--------------------------------------------------------------------------------
/dataset/multi_label/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/multi_label/__init__.py
--------------------------------------------------------------------------------
/dataset/multi_label/coco.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import torch.utils.data as data
4 | import json
5 | import os
6 | import subprocess
7 | from PIL import Image
8 | import numpy as np
9 | import torch
10 | import pickle
11 | import logging
12 |
13 | # from util import *
14 |
15 |
16 | urls = {'train_img': 'http://images.cocodataset.org/zips/train2014.zip',
17 | 'val_img': 'http://images.cocodataset.org/zips/val2014.zip',
18 | 'annotations': 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip'}
19 |
20 | # def download_coco2014(root, phase):
21 | # # if not os.path.exists(root):
22 | # # os.makedirs(root)
23 | # # tmpdir = os.path.join(root, 'tmp/')
24 | # # data = os.path.join(root, 'data/')
25 | # # if not os.path.exists(data):
26 | # # os.makedirs(data)
27 | # # if not os.path.exists(tmpdir):
28 | # # os.makedirs(tmpdir)
29 | # # if phase == 'train':
30 | # # filename = 'train2014.zip'
31 | # # elif phase == 'val':
32 | # # filename = 'val2014.zip'
33 | # # cached_file = os.path.join(tmpdir, filename)
34 | # # if not os.path.exists(cached_file):
35 | # # print('Downloading: "{}" to {}\n'.format(urls[phase + '_img'], cached_file))
36 | # # os.chdir(tmpdir)
37 | # # subprocess.call('wget ' + urls[phase + '_img'], shell=True)
38 | # # os.chdir(root)
39 | # # # extract file
40 | # # img_data = os.path.join(data, filename.split('.')[0])
41 | # # if not os.path.exists(img_data):
42 | # # print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=data))
43 | # # command = 'unzip {} -d {}'.format(cached_file,data)
44 | # # os.system(command)
45 | # # print('[dataset] Done!')
46 | #
47 | # # train/val images/annotations
48 | # # cached_file = os.path.join(tmpdir, 'annotations_trainval2014.zip')
49 | # # if not os.path.exists(cached_file):
50 | # # print('Downloading: "{}" to {}\n'.format(urls['annotations'], cached_file))
51 | # # os.chdir(tmpdir)
52 | # # subprocess.Popen('wget ' + urls['annotations'], shell=True)
53 | # # os.chdir(root)
54 | # # annotations_data = os.path.join(data, 'annotations')
55 | # # if not os.path.exists(annotations_data):
56 | # # print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=data))
57 | # # command = 'unzip {} -d {}'.format(cached_file, data)
58 | # # os.system(command)
59 | # # print('[annotation] Done!')
60 | #
61 | # anno = os.path.join(data, '{}_anno.json'.format(phase))
62 | # img_id = {}
63 | # annotations_id = {}
64 | # if not os.path.exists(anno):
65 | # annotations_file = json.load(open(os.path.join(annotations_data, 'instances_{}2014.json'.format(phase))))
66 | # annotations = annotations_file['annotations']
67 | # category = annotations_file['categories']
68 | # category_id = {}
69 | # for cat in category:
70 | # category_id[cat['id']] = cat['name']
71 | # cat2idx = categoty_to_idx(sorted(category_id.values()))
72 | # images = annotations_file['images']
73 | # for annotation in annotations:
74 | # if annotation['image_id'] not in annotations_id:
75 | # annotations_id[annotation['image_id']] = set()
76 | # annotations_id[annotation['image_id']].add(cat2idx[category_id[annotation['category_id']]])
77 | # for img in images:
78 | # if img['id'] not in annotations_id:
79 | # continue
80 | # if img['id'] not in img_id:
81 | # img_id[img['id']] = {}
82 | # img_id[img['id']]['file_name'] = img['file_name']
83 | # img_id[img['id']]['labels'] = list(annotations_id[img['id']])
84 | # anno_list = []
85 | # for k, v in img_id.items():
86 | # anno_list.append(v)
87 | # json.dump(anno_list, open(anno, 'w'))
88 | # if not os.path.exists(os.path.join(data, 'category.json')):
89 | # json.dump(cat2idx, open(os.path.join(data, 'category.json'), 'w'))
90 | # del img_id
91 | # del anno_list
92 | # del images
93 | # del annotations_id
94 | # del annotations
95 | # del category
96 | # del category_id
97 | # print('[json] Done!')
98 |
99 |
100 | def categoty_to_idx(category):
101 | cat2idx = {}
102 | for cat in category:
103 | cat2idx[cat] = len(cat2idx)
104 | return cat2idx
105 |
106 |
107 | class COCO14(data.Dataset):
108 |
109 | def __init__(self, cfg, split, transform=None, target_transform=None):
110 |
111 | root_path = './data/COCO14'
112 | self.img_dir = os.path.join(root_path, f'{split}2014')
113 | self.split = split
114 | self.transform = transform
115 | self.target_transform = target_transform
116 |
117 | list_path = os.path.join(root_path, 'ml_anno', f'coco14_{self.split}_anno.pkl')
118 | anno = pickle.load(open(list_path, 'rb+'))
119 | self.img_id = anno['image_name']
120 | self.label = anno['labels']
121 | self.img_idx = range(len(self.img_id))
122 |
123 | self.cat2idx = json.load(open(os.path.join(root_path, 'ml_anno', 'category.json'), 'r'))
124 |
125 | self.attr_id = list(self.cat2idx.keys())
126 | self.attr_num = len(self.cat2idx)
127 |
128 | # just for aligning with pedestrian attribute dataset
129 | self.eval_attr_num = len(self.cat2idx)
130 |
131 | def __len__(self):
132 | return len(self.img_id)
133 |
134 | def __getitem__(self, index):
135 |
136 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index]
137 | imgpath = os.path.join(self.img_dir, imgname)
138 | img = Image.open(imgpath).convert('RGB')
139 |
140 | if self.transform is not None:
141 | img = self.transform(img)
142 |
143 | gt_label = gt_label.astype(np.float32)
144 |
145 | if self.target_transform:
146 | gt_label = gt_label[self.target_transform]
147 |
148 | return img, gt_label, imgname
149 |
150 |
151 |
--------------------------------------------------------------------------------
/dataset/multi_label/voc.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import os.path
4 | import tarfile
5 | from urllib.parse import urlparse
6 |
7 | import numpy as np
8 | import torch
9 | import torch.utils.data as data
10 | from PIL import Image
11 | import pickle
12 |
13 | object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
14 | 'bottle', 'bus', 'car', 'cat', 'chair',
15 | 'cow', 'diningtable', 'dog', 'horse',
16 | 'motorbike', 'person', 'pottedplant',
17 | 'sheep', 'sofa', 'train', 'tvmonitor']
18 |
19 | urls = {
20 | 'devkit': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar',
21 | 'trainval_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
22 | 'test_images_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
23 | 'test_anno_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtestnoimgs_06-Nov-2007.tar',
24 | }
25 |
26 |
27 | def read_image_label(file):
28 | print('[dataset] read ' + file)
29 | data = dict()
30 | with open(file, 'r') as f:
31 | for line in f:
32 | tmp = line.split(' ')
33 | name = tmp[0]
34 | label = int(tmp[-1])
35 | data[name] = label
36 | # data.append([name, label])
37 | # print('%s %d' % (name, label))
38 | return data
39 |
40 |
41 | def read_object_labels(root, dataset, set):
42 | path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
43 | labeled_data = dict()
44 | num_classes = len(object_categories)
45 |
46 | for i in range(num_classes):
47 | file = os.path.join(path_labels, object_categories[i] + '_' + set + '.txt')
48 | data = read_image_label(file)
49 |
50 | if i == 0:
51 | for (name, label) in data.items():
52 | labels = np.zeros(num_classes)
53 | labels[i] = label
54 | labeled_data[name] = labels
55 | else:
56 | for (name, label) in data.items():
57 | labeled_data[name][i] = label
58 |
59 | return labeled_data
60 |
61 |
62 | def write_object_labels_csv(file, labeled_data):
63 | # write a csv file
64 | print('[dataset] write file %s' % file)
65 | with open(file, 'w') as csvfile:
66 | fieldnames = ['name']
67 | fieldnames.extend(object_categories)
68 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
69 |
70 | writer.writeheader()
71 | for (name, labels) in labeled_data.items():
72 | example = {'name': name}
73 | for i in range(20):
74 | example[fieldnames[i + 1]] = int(labels[i])
75 | writer.writerow(example)
76 |
77 | csvfile.close()
78 |
79 |
80 | def read_object_labels_csv(file, header=True):
81 | images = []
82 | num_categories = 0
83 | print('[dataset] read', file)
84 | with open(file, 'r') as f:
85 | reader = csv.reader(f)
86 | rownum = 0
87 | for row in reader:
88 | if header and rownum == 0:
89 | header = row
90 | else:
91 | if num_categories == 0:
92 | num_categories = len(row) - 1
93 | name = row[0]
94 | labels = (np.asarray(row[1:num_categories + 1])).astype(np.float32)
95 | labels = torch.from_numpy(labels)
96 | item = (name, labels)
97 | images.append(item)
98 | rownum += 1
99 | return images
100 |
101 |
102 | def read_csv(file):
103 | images = []
104 | print('[dataset] read', file)
105 | with open(file, 'r') as f:
106 | reader = csv.reader(f)
107 | for i, row in enumerate(reader):
108 | if i == 0:
109 | continue
110 | name = row[0]
111 | labels = (np.asarray(row[1:])).astype(np.float32)
112 | labels = torch.from_numpy(labels)
113 | item = (name, labels)
114 | images.append(item)
115 | return images
116 |
117 |
118 | def find_images_classification(root, dataset, set):
119 | path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
120 | images = []
121 | file = os.path.join(path_labels, set + '.txt')
122 | with open(file, 'r') as f:
123 | for line in f:
124 | images.append(line)
125 | return images
126 |
127 |
128 | class VOC(data.Dataset):
129 | def __init__(self, root='./data/voc', phase='train', transform=None, target_transform=None):
130 | self.root = root
131 | # self.path_devkit = os.path.join(root, 'VOCdevkit')
132 | self.path_images = os.path.join(root, 'VOCdevkit', 'VOC2007', 'JPEGImages')
133 | self.phase = phase
134 | self.transform = transform
135 | self.target_transform = target_transform
136 |
137 | # download dataset
138 | # download_voc2007(self.root)
139 |
140 | # define path of csv file
141 | path_csv = os.path.join(self.root, 'files', 'VOC2007')
142 | # define filename of csv file
143 | file_csv = os.path.join(path_csv, 'classification_' + phase + '.csv')
144 |
145 | # create the csv file if necessary
146 | if not os.path.exists(file_csv):
147 | if not os.path.exists(path_csv): # create dir if necessary
148 | os.makedirs(path_csv)
149 | # generate csv file
150 | labeled_data = read_object_labels(self.root, 'VOC2007', self.phase)
151 | # write csv file
152 | write_object_labels_csv(file_csv, labeled_data)
153 |
154 | self.num_classes = len(object_categories)
155 | self.images = read_object_labels_csv(file_csv)
156 | self.imags, self.gt = zip(*self.images)
157 |
158 | # print('[dataset] VOC 2007 classification set=%s number of classes=%d number of images=%d' % (
159 | # phase, self.num_classes, len(self.images)))
160 |
161 | def __getitem__(self, index):
162 | path, target = self.images[index]
163 | target[target == -1] = 0
164 | img = Image.open(os.path.join(self.path_images, path + '.jpg')).convert('RGB')
165 | if self.transform is not None:
166 | img = self.transform(img)
167 | if self.target_transform is not None:
168 | target = self.target_transform(target)
169 |
170 | return img, target, 0, path
171 |
172 | def __len__(self):
173 | return len(self.images)
174 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/pedes_attr/__init__.py
--------------------------------------------------------------------------------
/dataset/pedes_attr/annotation.md:
--------------------------------------------------------------------------------
1 | ### dataset split
2 | - PETA 19000, train 9500, val 1900, test 7600
3 | - RAP 41585, train 33268, test 8317
4 | - PA100K 100000, train 80000, val 10000, test 10000
5 | - RAPv2 84928, train, 50957 trainset, 16986 valset, 16985 testset
6 |
7 |
8 | ## 统一属性顺序
9 | 1. head region
10 | 2. upper region
11 | 3. lower region
12 | 4. foot region
13 | 5. accessory/bag
14 | 6. age
15 | 7. gender
16 | 8. others
17 |
18 |
19 |
20 | ### PETA (35 in 105)
21 | num_ingroup = [5, 10, 6, 4, 5, 5]
22 |
23 | - 'accessoryHat','accessoryMuffler','accessoryNothing','accessorySunglasses','hairLong' [10, 18, 19, 30, 15] 5
24 | - 'upperBodyCasual', 'upperBodyFormal', 'upperBodyJacket', 'upperBodyLogo', 'upperBodyPlaid', 'upperBodyShortSleeve', 'upperBodyThinStripes', 'upperBodyTshirt','upperBodyOther','upperBodyVNeck' [7, 9, 11, 14, 21, 26, 29, 32, 33, 34] 10
25 | - 'lowerBodyCasual', 'lowerBodyFormal', 'lowerBodyJeans', 'lowerBodyShorts', 'lowerBodyShortSkirt','lowerBodyTrousers' [6, 8, 12, 25, 27, 31] 6
26 | - 'footwearLeatherShoes', 'footwearSandals', 'footwearShoes', 'footwearSneaker' [13, 23, 24, 28] 4
27 | - 'carryingBackpack', 'carryingOther', 'carryingMessengerBag', 'carryingNothing', 'carryingPlasticBags' [4, 5, 17, 20, 22] 5
28 |
29 | - 'personalLess30','personalLess45','personalLess60','personalLarger60', [0, 1, 2, 3] 4
30 | - 'personalMale', [16] 1
31 |
32 | permutation = [10, 18, 19, 30, 15, 7, 9, 11, 14, 21, 26, 29, 32, 33, 34, 6, 8, 12, 25, 27, 31, 13, 23, 24, 28, 4, 5, 17, 20, 22, 0, 1, 2, 3, 16]
33 |
34 | ##### not evaluated attributes
35 | - color:
36 | ['upperBodyBlack', 'upperBodyBlue', 'upperBodyBrown', 'upperBodyGreen', 'upperBodyGrey', 'upperBodyOrange', 'upperBodyPink', 'upperBodyPurple', 'upperBodyRed', 'upperBodyWhite', 'upperBodyYellow',
37 | 'lowerBodyBlack', 'lowerBodyBlue', 'lowerBodyBrown', 'lowerBodyGreen', 'lowerBodyGrey', 'lowerBodyOrange', 'lowerBodyPink', 'lowerBodyPurple', 'lowerBodyRed', 'lowerBodyWhite', 'lowerBodyYellow',
38 | 'hairBlack', 'hairBlue', 'hairBrown', 'hairGreen', 'hairGrey', 'hairOrange', 'hairPink', 'hairPurple', 'hairRed', 'hairWhite', 'hairYellow',
39 | 'footwearBlack', 'footwearBlue', 'footwearBrown', 'footwearGreen', 'footwearGrey', 'footwearOrange', 'footwearPink', 'footwearPurple', 'footwearRed', 'footwearWhite', 'footwearYellow']
40 | - extra:
41 | ['accessoryHeadphone', 'personalLess15', 'carryingBabyBuggy', 'hairBald', 'footwearBoots', 'lowerBodyCapri', 'carryingShoppingTro', 'carryingUmbrella', 'personalFemale', 'carryingFolder', 'accessoryHairBand',
42 | 'lowerBodyHotPants', 'accessoryKerchief', 'lowerBodyLongSkirt', 'upperBodyLongSleeve', 'lowerBodyPlaid', 'lowerBodyThinStripes', 'carryingLuggageCase', 'upperBodyNoSleeve', 'hairShort', 'footwearStocking',
43 | 'upperBodySuit', 'carryingSuitcase', 'lowerBodySuits', 'upperBodySweater', 'upperBodyThickStripes']
44 |
45 |
46 | ### PA100K (26)
47 | num_in_group = [2, 6, 6, 1, 4, 7]
48 |
49 | - 'Hat','Glasses', [7,8] 2
50 | - 'ShortSleeve','LongSleeve','UpperStride','UpperLogo','UpperPlaid','UpperSplice', [13,14,15,16,17,18] 6
51 | - 'LowerStripe','LowerPattern','LongCoat','Trousers','Shorts','Skirt&Dress', [19,20,21,22,23,24] 6
52 | - 'boots' [25] 1
53 | - 'HandBag','ShoulderBag','Backpack','HoldObjectsInFront', [9,10,11,12] 4
54 |
55 | - 'AgeOver60','Age18-60','AgeLess18', [1,2,3] 3
56 | - 'Female' [0] 1
57 | - 'Front','Side','Back', [4,5,6] 3
58 |
59 | permutation = [7,8,13,14,15,16,17,18,19,20,21,22,23,24,25,9,10,11,12,1,2,3,0,4,5,6]
60 |
61 | ### RAPv1 (51)
62 |
63 | num_ingroup = [6, 9, 6, 5, 8, 17]
64 |
65 | - head 6:'hs-BaldHead','hs-LongHair','hs-BlackHair','hs-Hat','hs-Glasses','hs-Muffler', [9, 10, 11, 12, 13, 14,]
66 | - upper body 9:'ub-Shirt','ub-Sweater','ub-Vest','ub-TShirt','ub-Cotton','ub-Jacket','ub-SuitUp','ub-Tight','ub-ShortSleeve',[15, 16, 17, 18, 19, 20, 21, 22, 23,]
67 | - lower body 6:'lb-LongTrousers','lb-Skirt','lb-ShortSkirt','lb-Dress','lb-Jeans','lb-TightTrousers', [24, 25,26, 27, 28, 29,]
68 | - footwear 5:'shoes-Leather','shoes-Sport','shoes-Boots','shoes-Cloth','shoes-Casual', [30, 31, 32, 33, 34,]
69 | - accessory 8 [35, 36, 37, 38, 39, 40, 41, 42] :
70 | 'attach-Backpack','attach-SingleShoulderBag','attach-HandBag','attach-Box','attach-PlasticBag','attach-PaperBag','attach-HandTrunk','attach-Other',
71 |
72 | - age 3:'AgeLess16','Age17-30','Age31-45', 1:4 [1, 2, 3,]
73 | - gender 1:'Female', 0 [0,]
74 | - body shape 3:'BodyFat','BodyNormal','BodyThin',4:7 [4, 5, 6,]
75 | - role 2:'Customer','Clerk', 7:9 [ 7, 8,]
76 | - action 8:'action-Calling','action-Talking','action-Gathering','action-Holding','action-Pusing','action-Pulling','action-CarrybyArm','action-CarrybyHand'
77 | [43, 44, 45, 46, 47, 48, 49, 50]
78 |
79 | permutation = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
80 | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 1, 2, 3, 0, 4, 5, 6, 7, 8, 43, 44, 45, 46,
81 | 47, 48, 49, 50]
82 |
83 | - color: 29
84 | ['up-Black', 'up-White', 'up-Gray', 'up-Red', 'up-Green', 'up-Blue', 'up-Yellow', 'up-Brown', 'up-Purple', 'up-Pink', 'up-Orange', 'up-Mixture',
85 | 'low-Black', 'low-White', 'low-Gray', 'low-Red', 'low-Green', 'low-Blue', 'low-Yellow', 'low-Mixture',
86 | 'shoes-Black', 'shoes-White', 'shoes-Gray', 'shoes-Red', 'shoes-Green', 'shoes-Blue', 'shoes-Yellow', 'shoes-Brown', 'shoes-Mixture']
87 |
88 | - extra: 12
89 | ['faceFront', 'faceBack', 'faceLeft', 'faceRight',
90 | 'occlusionLeft', 'occlusionRight', 'occlusionUp', 'occlusionDown', 'occlusion-Environment', 'occlusion-Attachment', 'occlusion-Person', 'occlusion-Other']
91 |
92 | ### RAPv2 (54)
93 | num_ingroup = [5, 10, 6, 6, 8, 19]
94 |
95 | - head 5:'hs-BaldHead', 'hs-LongHair', 'hs-BlackHair', 'hs-Hat', 'hs-Glasses', [10,11,12,13,14]
96 | - upper body 10:'ub-Shirt','ub-Sweater','ub-Vest','ub-TShirt','ub-Cotton','ub-Jacket','ub-SuitUp','ub-Tight','ub-ShortSleeve','ub-Others'
97 | [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
98 | - lower body 6:'lb-LongTrousers','lb-Skirt','lb-ShortSkirt','lb-Dress','lb-Jeans','lb-TightTrousers', [25 ,26, 27, 28, 29, 30]
99 | - footwear 6:'shoes-Leather', 'shoes-Sports', 'shoes-Boots', 'shoes-Cloth', 'shoes-Casual', 'shoes-Other', [31, 32, 33, 34, 35, 36]
100 | - accessory 8 [37, 38, 39, 40, 41, 42, 43, 44] :
101 | 'attachment-Backpack','attachment-ShoulderBag','attachment-HandBag','attachment-Box','attachment-PlasticBag','attachment-PaperBag','attachment-HandTrunk','attachment-Other'
102 |
103 | - age 4:'AgeLess16', 'Age17-30', 'Age31-45', 'Age46-60', [1, 2, 3, 4]
104 | - gender 1:'Female', [0,]
105 | - body shape 3:'BodyFat','BodyNormal','BodyThin',4:7 [5, 6, 7]
106 | - role 2:'Customer','Employee', [ 8, 9,]
107 | - action 9:'action-Calling','action-Talking','action-Gathering','action-Holding','action-Pushing','action-Pulling','action-CarryingByArm','action-CarryingByHand','action-Other'
108 | [45, 46, 47, 48, 49, 50, 51, 52, 53]
109 |
110 | permutation = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
111 | 36, 37, 38, 39, 40, 41, 42, 43, 44, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 45, 46, 47, 48, 49, 50, 51, 52, 53]
112 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/pedes.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import pickle
4 |
5 | import numpy as np
6 | import torch.utils.data as data
7 | from PIL import Image
8 |
9 | from tools.function import get_pkl_rootpath
10 |
11 |
12 | class PedesAttr(data.Dataset):
13 |
14 | def __init__(self, cfg, split, transform=None, target_transform=None, idx=None):
15 |
16 | assert cfg.DATASET.NAME in ['PETA', 'PA100k', 'RAP', 'RAP2'], \
17 | f'dataset name {cfg.DATASET.NAME} is not exist'
18 |
19 | data_path = get_pkl_rootpath(cfg.DATASET.NAME, cfg.DATASET.ZERO_SHOT)
20 |
21 | print("which pickle", data_path)
22 |
23 | dataset_info = pickle.load(open(data_path, 'rb+'))
24 |
25 | img_id = dataset_info.image_name
26 |
27 | attr_label = dataset_info.label
28 | attr_label[attr_label == 2] = 0
29 | self.attr_id = dataset_info.attr_name
30 | self.attr_num = len(self.attr_id)
31 |
32 | if 'label_idx' not in dataset_info.keys():
33 | print(' this is for zero shot split')
34 | assert cfg.DATASET.ZERO_SHOT
35 | self.eval_attr_num = self.attr_num
36 | else:
37 | self.eval_attr_idx = dataset_info.label_idx.eval
38 | self.eval_attr_num = len(self.eval_attr_idx)
39 |
40 | assert cfg.DATASET.LABEL in ['all', 'eval', 'color'], f'key word {cfg.DATASET.LABEL} error'
41 | if cfg.DATASET.LABEL == 'eval':
42 | attr_label = attr_label[:, self.eval_attr_idx]
43 | self.attr_id = [self.attr_id[i] for i in self.eval_attr_idx]
44 | self.attr_num = len(self.attr_id)
45 | elif cfg.DATASET.LABEL == 'color':
46 | attr_label = attr_label[:, self.eval_attr_idx + dataset_info.label_idx.color]
47 | self.attr_id = [self.attr_id[i] for i in self.eval_attr_idx + dataset_info.label_idx.color]
48 | self.attr_num = len(self.attr_id)
49 |
50 | assert split in dataset_info.partition.keys(), f'split {split} is not exist'
51 |
52 | self.dataset = cfg.DATASET.NAME
53 | self.transform = transform
54 | self.target_transform = target_transform
55 |
56 | self.root_path = dataset_info.root
57 |
58 | if self.target_transform:
59 | self.attr_num = len(self.target_transform)
60 | print(f'{split} target_label: {self.target_transform}')
61 | else:
62 | self.attr_num = len(self.attr_id)
63 | print(f'{split} target_label: all')
64 |
65 | self.img_idx = dataset_info.partition[split]
66 |
67 | if isinstance(self.img_idx, list):
68 | self.img_idx = self.img_idx[0] # default partition 0
69 |
70 | if idx is not None:
71 | self.img_idx = idx
72 |
73 | self.img_num = self.img_idx.shape[0]
74 | self.img_id = [img_id[i] for i in self.img_idx]
75 | self.label = attr_label[self.img_idx] # [:, [0, 12]]
76 |
77 | def __getitem__(self, index):
78 |
79 | imgname, gt_label, imgidx = self.img_id[index], self.label[index], self.img_idx[index]
80 |
81 | imgpath = os.path.join(self.root_path, imgname)
82 | img = Image.open(imgpath)
83 |
84 | if self.transform is not None:
85 | img = self.transform(img)
86 |
87 | gt_label = gt_label.astype(np.float32)
88 |
89 | if self.target_transform:
90 | gt_label = gt_label[self.target_transform]
91 |
92 | return img, gt_label, imgname, # noisy_weight
93 |
94 | def __len__(self):
95 | return len(self.img_id)
96 |
97 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/dataset/pedes_attr/preprocess/__init__.py
--------------------------------------------------------------------------------
/dataset/pedes_attr/preprocess/format_pa100k.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import random
4 | import pickle
5 |
6 | from easydict import EasyDict
7 | from scipy.io import loadmat
8 |
9 | np.random.seed(0)
10 | random.seed(0)
11 |
12 | group_order = [7, 8, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 9, 10, 11, 12, 1, 2, 3, 0, 4, 5, 6]
13 |
14 |
15 | def make_dir(path):
16 | if os.path.exists(path):
17 | pass
18 | else:
19 | os.mkdir(path)
20 |
21 |
22 | def generate_data_description(save_dir, reorder):
23 | """
24 | create a dataset description file, which consists of images, labels
25 | """
26 | # pa100k_data = loadmat('/mnt/data1/jiajian/dataset/attribute/PA100k/annotation.mat')
27 | pa100k_data = loadmat(os.path.join(save_dir, 'annotation.mat'))
28 |
29 | dataset = EasyDict()
30 | dataset.description = 'pa100k'
31 | dataset.reorder = 'group_order'
32 | dataset.root = os.path.join(save_dir, 'data')
33 |
34 | train_image_name = [pa100k_data['train_images_name'][i][0][0] for i in range(80000)]
35 | val_image_name = [pa100k_data['val_images_name'][i][0][0] for i in range(10000)]
36 | test_image_name = [pa100k_data['test_images_name'][i][0][0] for i in range(10000)]
37 | dataset.image_name = train_image_name + val_image_name + test_image_name
38 |
39 | dataset.label = np.concatenate((pa100k_data['train_label'], pa100k_data['val_label'], pa100k_data['test_label']), axis=0)
40 | dataset.attr_name = [pa100k_data['attributes'][i][0][0] for i in range(26)]
41 |
42 | dataset.label_idx = EasyDict()
43 | dataset.label_idx.eval = list(range(26))
44 |
45 | if reorder:
46 | dataset.label_idx.eval = group_order
47 |
48 | dataset.partition = EasyDict()
49 | dataset.partition.train = np.arange(0, 80000) # np.array(range(80000))
50 | dataset.partition.val = np.arange(80000, 90000) # np.array(range(80000, 90000))
51 | dataset.partition.test = np.arange(90000, 100000) # np.array(range(90000, 100000))
52 | dataset.partition.trainval = np.arange(0, 90000) # np.array(range(90000))
53 |
54 | dataset.weight_train = np.mean(dataset.label[dataset.partition.train], axis=0).astype(np.float32)
55 | dataset.weight_trainval = np.mean(dataset.label[dataset.partition.trainval], axis=0).astype(np.float32)
56 |
57 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f:
58 | pickle.dump(dataset, f)
59 |
60 |
61 | if __name__ == "__main__":
62 | save_dir = '/mnt/data1/jiajian/datasets/attribute/PA100k/'
63 | # save_dir = './data/PA100k/'
64 | reoder = True
65 | generate_data_description(save_dir, reorder=True)
66 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/preprocess/format_peta.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import random
4 | import pickle
5 |
6 | from easydict import EasyDict
7 | from scipy.io import loadmat
8 |
9 | np.random.seed(0)
10 | random.seed(0)
11 |
12 | # note: ref by annotation.md
13 |
14 | group_order = [10, 18, 19, 30, 15, 7, 9, 11, 14, 21, 26, 29, 32, 33, 34, 6, 8, 12, 25, 27, 31, 13, 23, 24, 28, 4, 5,
15 | 17, 20, 22, 0, 1, 2, 3, 16]
16 |
17 |
18 | def make_dir(path):
19 | if os.path.exists(path):
20 | pass
21 | else:
22 | os.mkdir(path)
23 |
24 |
25 | def generate_data_description(save_dir, reorder, new_split_path):
26 | """
27 | create a dataset description file, which consists of images, labels
28 | """
29 | peta_data = loadmat(os.path.join(save_dir, 'PETA.mat'))
30 | dataset = EasyDict()
31 | dataset.description = 'peta'
32 | dataset.reorder = 'group_order'
33 | dataset.root = os.path.join(save_dir, 'images')
34 | dataset.image_name = [f'{i + 1:05}.png' for i in range(19000)]
35 |
36 | raw_attr_name = [i[0][0] for i in peta_data['peta'][0][0][1]]
37 | # (19000, 105)
38 | raw_label = peta_data['peta'][0][0][0][:, 4:]
39 |
40 | # (19000, 35)
41 |
42 | dataset.label = raw_label
43 | dataset.attr_name = raw_attr_name
44 |
45 | dataset.label_idx = EasyDict()
46 | dataset.label_idx.eval = list(range(35))
47 | dataset.label_idx.color = list(range(35, 79))
48 | dataset.label_idx.extra = range(79, raw_label.shape[1]) # (79, 105)
49 |
50 | if reorder:
51 | dataset.label_idx.eval = group_order
52 |
53 | dataset.partition = EasyDict()
54 | dataset.partition.train = []
55 | dataset.partition.val = []
56 | dataset.partition.trainval = []
57 | dataset.partition.test = []
58 |
59 | dataset.weight_train = []
60 | dataset.weight_trainval = []
61 |
62 | if new_split_path:
63 |
64 | with open(new_split_path, 'rb+') as f:
65 | new_split = pickle.load(f)
66 |
67 | train = np.array(new_split.train_idx)
68 | val = np.array(new_split.val_idx)
69 | test = np.array(new_split.test_idx)
70 | trainval = np.concatenate((train, val), axis=0)
71 |
72 | dataset.partition.train = train
73 | dataset.partition.val = val
74 | dataset.partition.trainval = trainval
75 | dataset.partition.test = test
76 |
77 | weight_train = np.mean(dataset.label[train], axis=0).astype(np.float32)
78 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32)
79 |
80 | dataset.weight_train.append(weight_train)
81 | dataset.weight_trainval.append(weight_trainval)
82 | with open(os.path.join(save_dir, 'dataset_zs_run4.pkl'), 'wb+') as f:
83 | pickle.dump(dataset, f)
84 |
85 | else:
86 |
87 | for idx in range(5):
88 | train = peta_data['peta'][0][0][3][idx][0][0][0][0][:, 0] - 1
89 | val = peta_data['peta'][0][0][3][idx][0][0][0][1][:, 0] - 1
90 | test = peta_data['peta'][0][0][3][idx][0][0][0][2][:, 0] - 1
91 | trainval = np.concatenate((train, val), axis=0)
92 |
93 | dataset.partition.train.append(train)
94 | dataset.partition.val.append(val)
95 | dataset.partition.trainval.append(trainval)
96 | dataset.partition.test.append(test)
97 |
98 | weight_train = np.mean(dataset.label[train], axis=0)
99 | weight_trainval = np.mean(dataset.label[trainval], axis=0)
100 |
101 | dataset.weight_train.append(weight_train)
102 | dataset.weight_trainval.append(weight_trainval)
103 |
104 | """
105 | dataset.pkl 只包含评价属性的文件 35 label
106 | dataset_all.pkl 包含所有属性的文件 105 label
107 | """
108 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f:
109 | pickle.dump(dataset, f)
110 |
111 |
112 | if __name__ == "__main__":
113 | save_dir = '/mnt/data1/jiajian/datasets/attribute/PETA/'
114 | new_split_path = '/mnt/data1/jiajian/code/Rethinking_of_PAR/datasets/jian_split/index_peta_split_id50_img300_ratio0.03_4.pkl'
115 | generate_data_description(save_dir, True, new_split_path)
116 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/preprocess/format_rap.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import random
4 | import pickle
5 |
6 | from easydict import EasyDict
7 | from scipy.io import loadmat
8 |
9 | np.random.seed(0)
10 | random.seed(0)
11 |
12 |
13 | group_order = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
14 | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 1, 2, 3, 0, 4, 5, 6, 7, 8, 43, 44,
15 | 45, 46, 47, 48, 49, 50]
16 |
17 |
18 | def make_dir(path):
19 | if os.path.exists(path):
20 | pass
21 | else:
22 | os.mkdir(path)
23 |
24 |
25 | def generate_data_description(save_dir, reorder):
26 | """
27 | create a dataset description file, which consists of images, labels
28 | """
29 |
30 | data = loadmat(os.path.join(save_dir, 'RAP_annotation/RAP_annotation.mat'))
31 |
32 | dataset = EasyDict()
33 | dataset.description = 'rap'
34 | dataset.reorder = 'group_order'
35 | dataset.root = os.path.join(save_dir, 'RAP_dataset')
36 | dataset.image_name = [data['RAP_annotation'][0][0][5][i][0][0] for i in range(41585)]
37 | raw_attr_name = [data['RAP_annotation'][0][0][3][i][0][0] for i in range(92)]
38 | # (41585, 92)
39 | raw_label = data['RAP_annotation'][0][0][1]
40 | dataset.label = raw_label[:, np.array(range(51))]
41 |
42 | dataset.label = raw_label
43 | dataset.attr_name = raw_attr_name
44 |
45 | dataset.label_idx = EasyDict()
46 | dataset.label_idx.eval = list(range(51))
47 | dataset.label_idx.color = list(range(63, raw_label.shape[1])) # (63, 92)
48 | dataset.label_idx.extra = list(range(51, 63))
49 |
50 | if reorder:
51 | dataset.label_idx.eval = group_order
52 |
53 | dataset.partition = EasyDict()
54 | dataset.partition.trainval = []
55 | dataset.partition.test = []
56 |
57 | dataset.weight_trainval = []
58 |
59 | for idx in range(5):
60 | trainval = data['RAP_annotation'][0][0][0][idx][0][0][0][0][0, :] - 1
61 | test = data['RAP_annotation'][0][0][0][idx][0][0][0][1][0, :] - 1
62 |
63 | dataset.partition.trainval.append(trainval)
64 | dataset.partition.test.append(test)
65 |
66 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32)
67 | dataset.weight_trainval.append(weight_trainval)
68 |
69 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f:
70 | pickle.dump(dataset, f)
71 |
72 |
73 | if __name__ == "__main__":
74 | save_dir = '/mnt/data1/jiajian/datasets/attribute/RAP/'
75 | reorder = True
76 | generate_data_description(save_dir, reorder)
77 |
--------------------------------------------------------------------------------
/dataset/pedes_attr/preprocess/format_rap2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import random
4 | import pickle
5 | from scipy.io import loadmat
6 | from easydict import EasyDict
7 |
8 | np.random.seed(0)
9 | random.seed(0)
10 |
11 | group_order = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
12 | 36, 37, 38, 39, 40, 41, 42, 43, 44, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 45, 46, 47, 48, 49, 50, 51, 52, 53]
13 |
14 |
15 | def make_dir(path):
16 | if os.path.exists(path):
17 | pass
18 | else:
19 | os.mkdir(path)
20 |
21 |
22 | def generate_data_description(save_dir, reorder, new_split_path, version):
23 | data = loadmat(os.path.join(save_dir, 'RAP_annotation/RAP_annotation.mat'))
24 | data = data['RAP_annotation']
25 | dataset = EasyDict()
26 | dataset.description = 'rap2'
27 | dataset.reorder = 'group_order'
28 | dataset.root = os.path.join(save_dir, 'RAP_dataset')
29 | dataset.image_name = [data['name'][0][0][i][0][0] for i in range(84928)]
30 | raw_attr_name = [data['attribute'][0][0][i][0][0] for i in range(152)]
31 | raw_label = data['data'][0][0]
32 | selected_attr_idx = (data['selected_attribute'][0][0][0] - 1)[group_order].tolist() # 54
33 |
34 | color_attr_idx = list(range(31, 45)) + list(range(53, 67)) + list(range(74, 88)) # 42
35 | extra_attr_idx = np.setdiff1d(range(152), color_attr_idx + selected_attr_idx).tolist()[:24]
36 | extra_attr_idx = extra_attr_idx[:15] + extra_attr_idx[16:]
37 |
38 | dataset.label = raw_label[:, selected_attr_idx + color_attr_idx + extra_attr_idx] # (n, 119)
39 | dataset.attr_name = [raw_attr_name[i] for i in selected_attr_idx + color_attr_idx + extra_attr_idx]
40 |
41 | dataset.label_idx = EasyDict()
42 | dataset.label_idx.eval = list(range(54)) # 54
43 | dataset.label_idx.color = list(range(54, 96)) # not aligned with color label index in label
44 | dataset.label_idx.extra = list(range(96, 119)) # not aligned with extra label index in label
45 |
46 | if reorder:
47 | dataset.label_idx.eval = list(range(54))
48 |
49 | dataset.partition = EasyDict()
50 | dataset.partition.train = []
51 | dataset.partition.val = []
52 | dataset.partition.test = []
53 | dataset.partition.trainval = []
54 |
55 | dataset.weight_train = []
56 | dataset.weight_trainval = []
57 |
58 | if new_split_path:
59 |
60 | # remove Age46-60
61 | dataset.label_idx.eval.remove(38) # 54
62 |
63 | with open(new_split_path, 'rb+') as f:
64 | new_split = pickle.load(f)
65 |
66 | train = np.array(new_split.train_idx)
67 | val = np.array(new_split.val_idx)
68 | test = np.array(new_split.test_idx)
69 | trainval = np.concatenate((train, val), axis=0)
70 |
71 | print(np.concatenate([trainval, test]).shape)
72 |
73 | dataset.partition.train = train
74 | dataset.partition.val = val
75 | dataset.partition.trainval = trainval
76 | dataset.partition.test = test
77 |
78 | weight_train = np.mean(dataset.label[train], axis=0).astype(np.float32)
79 | weight_trainval = np.mean(dataset.label[trainval], axis=0).astype(np.float32)
80 |
81 | print(weight_trainval[38])
82 |
83 | dataset.weight_train.append(weight_train)
84 | dataset.weight_trainval.append(weight_trainval)
85 | with open(os.path.join(save_dir, f'dataset_zs_run{version}.pkl'), 'wb+') as f:
86 | pickle.dump(dataset, f)
87 |
88 | else:
89 | for idx in range(5):
90 | train = data['partition_attribute'][0][0][0][idx]['train_index'][0][0][0] - 1
91 | val = data['partition_attribute'][0][0][0][idx]['val_index'][0][0][0] - 1
92 | test = data['partition_attribute'][0][0][0][idx]['test_index'][0][0][0] - 1
93 | trainval = np.concatenate([train, val])
94 | dataset.partition.train.append(train)
95 | dataset.partition.val.append(val)
96 | dataset.partition.test.append(test)
97 | dataset.partition.trainval.append(trainval)
98 | # cls_weight
99 | weight_train = np.mean(dataset.label[train], axis=0)
100 | weight_trainval = np.mean(dataset.label[trainval], axis=0)
101 | dataset.weight_train.append(weight_train)
102 | dataset.weight_trainval.append(weight_trainval)
103 | with open(os.path.join(save_dir, 'dataset_all.pkl'), 'wb+') as f:
104 | pickle.dump(dataset, f)
105 |
106 |
107 | if __name__ == "__main__":
108 | save_dir = '/mnt/data1/jiajian/datasets/attribute/RAP2/'
109 | reorder = True
110 |
111 | for i in range(5):
112 | new_split_path = f'/mnt/data1/jiajian/code/Rethinking_of_PAR/datasets/jian_split/index_rap2_split_id50_img300_ratio0.03_{i}.pkl'
113 | generate_data_description(save_dir, reorder, new_split_path, i)
114 |
--------------------------------------------------------------------------------
/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/docs/__init__.py
--------------------------------------------------------------------------------
/docs/illus_zs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/docs/illus_zs.png
--------------------------------------------------------------------------------
/infer.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
5 | import pickle
6 |
7 | from dataset.augmentation import get_transform
8 | from dataset.multi_label.coco import COCO14
9 | from metrics.pedestrian_metrics import get_pedestrian_metrics
10 | from models.model_factory import build_backbone, build_classifier
11 |
12 | import numpy as np
13 | import torch
14 | from torch.utils.data import DataLoader
15 | from tqdm import tqdm
16 |
17 | from configs import cfg, update_config
18 | from dataset.pedes_attr.pedes import PedesAttr
19 | from metrics.ml_metrics import get_map_metrics, get_multilabel_metrics
20 | from models.base_block import FeatClassifier
21 | # from models.model_factory import model_dict, classifier_dict
22 |
23 | from tools.function import get_model_log_path, get_reload_weight
24 | from tools.utils import set_seed, str2bool, time_str
25 | from models.backbone import swin_transformer, resnet, bninception
26 | from models.backbone.tresnet import tresnet
27 | from losses import bceloss, scaledbceloss
28 |
29 | set_seed(605)
30 |
31 |
32 | def main(cfg, args):
33 | exp_dir = os.path.join('exp_result', cfg.DATASET.NAME)
34 | model_dir, log_dir = get_model_log_path(exp_dir, cfg.NAME)
35 |
36 | train_tsfm, valid_tsfm = get_transform(cfg)
37 | print(valid_tsfm)
38 |
39 | if cfg.DATASET.TYPE == 'multi_label':
40 | train_set = COCO14(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm,
41 | target_transform=cfg.DATASET.TARGETTRANSFORM)
42 |
43 | valid_set = COCO14(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm,
44 | target_transform=cfg.DATASET.TARGETTRANSFORM)
45 | else:
46 | train_set = PedesAttr(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=valid_tsfm,
47 | target_transform=cfg.DATASET.TARGETTRANSFORM)
48 | valid_set = PedesAttr(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm,
49 | target_transform=cfg.DATASET.TARGETTRANSFORM)
50 |
51 |
52 | train_loader = DataLoader(
53 | dataset=train_set,
54 | batch_size=cfg.TRAIN.BATCH_SIZE,
55 | shuffle=False,
56 | num_workers=4,
57 | pin_memory=True,
58 | )
59 |
60 | valid_loader = DataLoader(
61 | dataset=valid_set,
62 | batch_size=cfg.TRAIN.BATCH_SIZE,
63 | shuffle=False,
64 | num_workers=4,
65 | pin_memory=True,
66 | )
67 |
68 | print(f'{cfg.DATASET.TRAIN_SPLIT} set: {len(train_loader.dataset)}, '
69 | f'{cfg.DATASET.TEST_SPLIT} set: {len(valid_loader.dataset)}, '
70 | f'attr_num : {train_set.attr_num}')
71 |
72 | backbone, c_output = build_backbone(cfg.BACKBONE.TYPE, cfg.BACKBONE.MULTISCALE)
73 |
74 |
75 | classifier = build_classifier(cfg.CLASSIFIER.NAME)(
76 | nattr=train_set.attr_num,
77 | c_in=c_output,
78 | bn=cfg.CLASSIFIER.BN,
79 | pool=cfg.CLASSIFIER.POOLING,
80 | scale =cfg.CLASSIFIER.SCALE
81 | )
82 |
83 | model = FeatClassifier(backbone, classifier)
84 |
85 | if torch.cuda.is_available():
86 | model = torch.nn.DataParallel(model).cuda()
87 |
88 | model = get_reload_weight(model_dir, model, pth='xxxxxxxxxxxxxxx')
89 |
90 | model.eval()
91 | preds_probs = []
92 | gt_list = []
93 | path_list = []
94 |
95 | attn_list = []
96 | with torch.no_grad():
97 | for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)):
98 | imgs = imgs.cuda()
99 | gt_label = gt_label.cuda()
100 | valid_logits, attns = model(imgs, gt_label)
101 |
102 | valid_probs = torch.sigmoid(valid_logits[0])
103 |
104 | path_list.extend(imgname)
105 | gt_list.append(gt_label.cpu().numpy())
106 | preds_probs.append(valid_probs.cpu().numpy())
107 |
108 |
109 | gt_label = np.concatenate(gt_list, axis=0)
110 | preds_probs = np.concatenate(preds_probs, axis=0)
111 |
112 |
113 |
114 | if cfg.METRIC.TYPE == 'pedestrian':
115 | valid_result = get_pedestrian_metrics(gt_label, preds_probs)
116 | valid_map, _ = get_map_metrics(gt_label, preds_probs)
117 |
118 | print(f'Evaluation on test set, \n',
119 | 'ma: {:.4f}, map: {:.4f}, label_f1: {:4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
120 | valid_result.ma, valid_map, np.mean(valid_result.label_f1), np.mean(valid_result.label_pos_recall),
121 | np.mean(valid_result.label_neg_recall)),
122 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
123 | valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall,
124 | valid_result.instance_f1)
125 | )
126 |
127 | with open(os.path.join(model_dir, 'results_test_feat_best.pkl'), 'wb+') as f:
128 | pickle.dump([valid_result, gt_label, preds_probs, attn_list, path_list], f, protocol=4)
129 |
130 | elif cfg.METRIC.TYPE == 'multi_label':
131 | if not cfg.INFER.SAMPLING:
132 | valid_metric = get_multilabel_metrics(gt_label, preds_probs)
133 |
134 | print(
135 | 'Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, '
136 | 'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP, valid_metric.OR, valid_metric.OF1,
137 | valid_metric.CP, valid_metric.CR, valid_metric.CF1))
138 |
139 | print(f'{time_str()}')
140 | print('-' * 60)
141 |
142 | def argument_parser():
143 | parser = argparse.ArgumentParser(description="attribute recognition",
144 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
145 |
146 | parser.add_argument(
147 | "--cfg", help="decide which cfg to use", type=str,
148 | )
149 | parser.add_argument("--debug", type=str2bool, default="true")
150 |
151 | args = parser.parse_args()
152 |
153 | return args
154 |
155 |
156 | if __name__ == '__main__':
157 | args = argument_parser()
158 | update_config(cfg, args)
159 |
160 | main(cfg, args)
161 |
--------------------------------------------------------------------------------
/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/losses/__init__.py
--------------------------------------------------------------------------------
/losses/bceloss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from models.registry import LOSSES
6 | from tools.function import ratio2weight
7 |
8 |
9 | @LOSSES.register("bceloss")
10 | class BCELoss(nn.Module):
11 |
12 | def __init__(self, sample_weight=None, size_sum=True, scale=None, tb_writer=None):
13 | super(BCELoss, self).__init__()
14 |
15 | self.sample_weight = sample_weight
16 | self.size_sum = size_sum
17 | self.hyper = 0.8
18 | self.smoothing = None
19 |
20 | def forward(self, logits, targets):
21 | logits = logits[0]
22 |
23 | if self.smoothing is not None:
24 | targets = (1 - self.smoothing) * targets + self.smoothing * (1 - targets)
25 |
26 | loss_m = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')
27 |
28 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1))
29 | if self.sample_weight is not None:
30 | sample_weight = ratio2weight(targets_mask, self.sample_weight)
31 |
32 | loss_m = (loss_m * sample_weight.cuda())
33 |
34 | # losses = loss_m.sum(1).mean() if self.size_sum else loss_m.mean()
35 | loss = loss_m.sum(1).mean() if self.size_sum else loss_m.sum()
36 |
37 | return [loss], [loss_m]
--------------------------------------------------------------------------------
/losses/label_smoothing.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | # https://github.com/pytorch/pytorch/issues/7455
4 |
5 | class LabelSmoothLoss(nn.Module):
6 |
7 | def __init__(self, smoothing=0.0):
8 | super(LabelSmoothLoss, self).__init__()
9 | self.smoothing = smoothing
10 |
11 | def forward(self, input, target):
12 | log_prob = F.log_softmax(input, dim=-1)
13 | weight = input.new_ones(input.size()) * self.smoothing / (input.size(-1) - 1.)
14 | weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing))
15 | loss = (-weight * log_prob).sum(dim=-1).mean()
16 | return loss
17 |
18 |
19 | class LabelSmoothingLoss(nn.Module):
20 | def __init__(self, classes, smoothing=0.0, dim=-1):
21 | super(LabelSmoothingLoss, self).__init__()
22 | self.confidence = 1.0 - smoothing
23 | self.smoothing = smoothing
24 | self.cls = classes
25 | self.dim = dim
26 |
27 | def forward(self, pred, target):
28 | pred = pred.log_softmax(dim=self.dim)
29 | with torch.no_grad():
30 | # true_dist = pred.data.clone()
31 | true_dist = torch.zeros_like(pred)
32 | true_dist.fill_(self.smoothing / (self.cls - 1))
33 | true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
34 | return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
35 |
--------------------------------------------------------------------------------
/losses/scaledbceloss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from models.registry import LOSSES
6 | from tools.function import ratio2weight
7 |
8 |
9 | @LOSSES.register("scaledbceloss")
10 | class ScaledBCELoss(nn.Module):
11 |
12 | def __init__(self, sample_weight=None, size_sum=True, scale=30, tb_writer=None):
13 | super(ScaledBCELoss, self).__init__()
14 |
15 | self.sample_weight = sample_weight
16 | self.size_sum = size_sum
17 | self.hyper = 0.8
18 | self.smoothing = None
19 | self.pos_scale = scale
20 | self.neg_scale = scale
21 | self.tb_writer = tb_writer
22 |
23 | def forward(self, logits, targets):
24 | batch_size = logits.shape[0]
25 |
26 | logits = logits * targets * self.pos_scale + logits * (1 - targets) * self.neg_scale
27 |
28 | if self.smoothing is not None:
29 | targets = (1 - self.smoothing) * targets + self.smoothing * (1 - targets)
30 |
31 | loss_m = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')
32 |
33 | targets_mask = torch.where(targets.detach().cpu() > 0.5, torch.ones(1), torch.zeros(1))
34 |
35 | if self.sample_weight is not None:
36 | sample_weight = ratio2weight(targets_mask, self.sample_weight)
37 |
38 | loss_m = (loss_m * sample_weight.cuda())
39 |
40 | loss = loss_m.sum(1).mean() if self.size_sum else loss_m.mean()
41 |
42 | return [loss], [loss_m]
--------------------------------------------------------------------------------
/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/metrics/__init__.py
--------------------------------------------------------------------------------
/metrics/ml_metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from easydict import EasyDict
3 | from sklearn.metrics import average_precision_score
4 |
5 |
6 | def calc_average_precision(gt_label, probs):
7 | ndata, nattr = gt_label.shape
8 |
9 | ap_list = []
10 | for i in range(nattr):
11 | y_true = gt_label[:, i]
12 | y_score = probs[:, i]
13 |
14 | ap_list.append(average_precision_score(y_true, y_score))
15 | ap = np.array(ap_list)
16 | mAp = ap.mean()
17 | return mAp, ap
18 |
19 | def get_map_metrics(gt_label, probs):
20 | mAP, ap = calc_average_precision(gt_label, probs)
21 |
22 | return mAP, ap
23 |
24 | # same as calc_average_precision
25 | def get_mAp(gt_label: np.ndarray, probs: np.ndarray):
26 | ndata, nattr = gt_label.shape
27 | rg = np.arange(1, ndata + 1).astype(float)
28 | ap_list = []
29 | for k in range(nattr):
30 | # sort scores
31 | scores = probs[:, k]
32 | targets = gt_label[:, k]
33 | sorted_idx = np.argsort(scores)[::-1] # Descending
34 | truth = targets[sorted_idx]
35 |
36 | tp = np.cumsum(truth).astype(float)
37 | # compute precision curve
38 | precision = tp / rg
39 |
40 | # compute average precision
41 | ap_list.append(precision[truth == 1].sum() / max(truth.sum(), 1))
42 |
43 | ap = np.array(ap_list)
44 | mAp = ap.mean()
45 | return mAp, ap
46 |
47 |
48 |
49 |
50 |
51 | def prob2metric(gt_label: np.ndarray, probs: np.ndarray, th):
52 | eps = 1e-6
53 | ndata, nattr = gt_label.shape
54 |
55 | # ------------------ macro, micro ---------------
56 | # gt_label[gt_label == -1] = 0
57 | pred_label = probs > th
58 | gt_pos = gt_label.sum(0)
59 | pred_pos = pred_label.sum(0)
60 | tp = (gt_label * pred_label).sum(0)
61 |
62 | OP = tp.sum() / pred_pos.sum()
63 | OR = tp.sum() / gt_pos.sum()
64 | OF1 = (2 * OP * OR) / (OP + OR)
65 |
66 | pred_pos[pred_pos == 0] = 1
67 |
68 | CP_all = tp / pred_pos
69 | CR_all = tp / gt_pos
70 |
71 | CP_all_t = tp / pred_pos
72 | CP_all_t[CP_all_t == 0] = 1
73 | CR_all_t = tp / gt_pos
74 | CR_all_t[CR_all_t == 0] = 1
75 | CF1_all = (2 * CP_all * CR_all) / (CP_all_t + CR_all_t)
76 |
77 | CF1_mean = CF1_all.mean()
78 |
79 | CP = np.mean(tp / pred_pos)
80 | CR = np.mean(tp / gt_pos)
81 | CF1 = (2 * CP * CR) / (CP + CR)
82 |
83 | gt_neg = ndata - gt_pos
84 | tn = ((1 - gt_label) * (1 - pred_label)).sum(0)
85 |
86 | label_pos_recall = 1.0 * tp / (gt_pos + eps) # true positive
87 | label_neg_recall = 1.0 * tn / (gt_neg + eps) # true negative
88 | # mean accuracy
89 | label_ma = (label_pos_recall + label_neg_recall) / 2
90 |
91 | ma = label_ma.mean()
92 |
93 | return OP, OR, OF1, CP, CR, CF1, ma, CP_all, CR_all, CF1_all, CF1_mean
94 |
95 |
96 | def get_multilabel_metrics(gt_label, prob_pred, th=0.5):
97 |
98 | result = EasyDict()
99 |
100 |
101 | mAP, ap = calc_average_precision(gt_label, prob_pred)
102 | op, orecall, of1, cp, cr, cf1, ma, cp_all, cr_all, cf1_all, CF1_mean = prob2metric(gt_label, prob_pred, th)
103 | result.map = mAP * 100.
104 |
105 | # to json serializable
106 | result.CP_all = list(cp_all.astype(np.float64))
107 | result.CR_all = list(cr_all.astype(np.float64))
108 | result.CF1_all = list(cf1_all.astype(np.float64))
109 | result.CF1_mean = CF1_mean
110 |
111 | # simplified way
112 | # mAP, ap = calc_average_precision(gt_label, probs)
113 | # pred_label = probs > 0.5
114 | # CP, CR, _, _ = precision_recall_fscore_support(gt_label, pred_label, average='macro')
115 | # CF1 = 2 * CP * CR / (CP + CR)
116 | # OP, OR, OF1, _ = precision_recall_fscore_support(gt_label, pred_label, average='micro')
117 |
118 | result.OP = op * 100.
119 | result.OR = orecall * 100.
120 | result.OF1 = of1 * 100.
121 | result.CP = cp * 100.
122 | result.CR = cr * 100.
123 | result.CF1 = cf1 * 100.
124 |
125 | return result
126 |
127 |
--------------------------------------------------------------------------------
/metrics/pedestrian_metrics.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import numpy as np
4 | from easydict import EasyDict
5 | import torch
6 |
7 |
8 | def get_pedestrian_metrics(gt_label, preds_probs, threshold=0.5, index=None, cfg=None):
9 | """
10 | index: evaluated label index
11 | """
12 | pred_label = preds_probs > threshold
13 |
14 | eps = 1e-20
15 | result = EasyDict()
16 |
17 | if index is not None:
18 | pred_label = pred_label[:, index]
19 | gt_label = gt_label[:, index]
20 |
21 | ###############################
22 | # label metrics
23 | # TP + FN
24 | gt_pos = np.sum((gt_label == 1), axis=0).astype(float)
25 | # TN + FP
26 | gt_neg = np.sum((gt_label == 0), axis=0).astype(float)
27 | # TP
28 | true_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=0).astype(float)
29 | # TN
30 | true_neg = np.sum((gt_label == 0) * (pred_label == 0), axis=0).astype(float)
31 | # FP
32 | false_pos = np.sum(((gt_label == 0) * (pred_label == 1)), axis=0).astype(float)
33 | # FN
34 | false_neg = np.sum(((gt_label == 1) * (pred_label == 0)), axis=0).astype(float)
35 |
36 | label_pos_recall = 1.0 * true_pos / (gt_pos + eps) # true positive
37 | label_neg_recall = 1.0 * true_neg / (gt_neg + eps) # true negative
38 | # mean accuracy
39 | label_ma = (label_pos_recall + label_neg_recall) / 2
40 |
41 | result.label_pos_recall = label_pos_recall
42 | result.label_neg_recall = label_neg_recall
43 | result.label_prec = true_pos / (true_pos + false_pos + eps)
44 | result.label_acc = true_pos / (true_pos + false_pos + false_neg + eps)
45 | result.label_f1 = 2 * result.label_prec * result.label_pos_recall / (
46 | result.label_prec + result.label_pos_recall + eps)
47 |
48 | result.label_ma = label_ma
49 | result.ma = np.mean(label_ma)
50 |
51 | ################
52 | # instance metrics
53 | gt_pos = np.sum((gt_label == 1), axis=1).astype(float)
54 | true_pos = np.sum((pred_label == 1), axis=1).astype(float)
55 | # true positive
56 | intersect_pos = np.sum((gt_label == 1) * (pred_label == 1), axis=1).astype(float)
57 | # IOU
58 | union_pos = np.sum(((gt_label == 1) + (pred_label == 1)), axis=1).astype(float)
59 |
60 | instance_acc = intersect_pos / (union_pos + eps)
61 | instance_prec = intersect_pos / (true_pos + eps)
62 | instance_recall = intersect_pos / (gt_pos + eps)
63 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps)
64 |
65 | instance_acc = np.mean(instance_acc)
66 | instance_prec = np.mean(instance_prec)
67 | instance_recall = np.mean(instance_recall)
68 | # instance_f1 = np.mean(instance_f1)
69 | instance_f1 = 2 * instance_prec * instance_recall / (instance_prec + instance_recall + eps)
70 |
71 | result.instance_acc = instance_acc
72 | result.instance_prec = instance_prec
73 | result.instance_recall = instance_recall
74 | result.instance_f1 = instance_f1
75 |
76 | result.error_num, result.fn_num, result.fp_num = false_pos + false_neg, false_neg, false_pos
77 |
78 | return result
79 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/models/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/__init__.py
--------------------------------------------------------------------------------
/models/backbone/checkpoints/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/checkpoints/__init__.py
--------------------------------------------------------------------------------
/models/backbone/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from torch.hub import load_state_dict_from_url
3 |
4 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
5 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
6 |
7 | from models.registry import BACKBONE
8 |
9 | model_urls = {
10 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
11 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
12 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
13 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
14 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
15 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
16 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
17 | }
18 |
19 |
20 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
21 | """3x3 convolution with padding"""
22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
23 | padding=dilation, groups=groups, bias=False, dilation=dilation)
24 |
25 |
26 | def conv1x1(in_planes, out_planes, stride=1):
27 | """1x1 convolution"""
28 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
35 | base_width=64, dilation=1, norm_layer=None):
36 | super(BasicBlock, self).__init__()
37 | if norm_layer is None:
38 | norm_layer = nn.BatchNorm2d
39 | if groups != 1 or base_width != 64:
40 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
41 | if dilation > 1:
42 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
43 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
44 | self.conv1 = conv3x3(inplanes, planes, stride)
45 | self.bn1 = norm_layer(planes)
46 | self.relu = nn.ReLU(inplace=True)
47 | self.conv2 = conv3x3(planes, planes)
48 | self.bn2 = norm_layer(planes)
49 | self.downsample = downsample
50 | self.stride = stride
51 |
52 | def forward(self, x):
53 | identity = x
54 |
55 | out = self.conv1(x)
56 | out = self.bn1(out)
57 | out = self.relu(out)
58 |
59 | out = self.conv2(out)
60 | out = self.bn2(out)
61 |
62 | if self.downsample is not None:
63 | identity = self.downsample(x)
64 |
65 | out += identity
66 | out = self.relu(out)
67 |
68 | return out
69 |
70 |
71 | class Bottleneck(nn.Module):
72 | expansion = 4
73 |
74 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
75 | base_width=64, dilation=1, norm_layer=None):
76 | super(Bottleneck, self).__init__()
77 | if norm_layer is None:
78 | norm_layer = nn.BatchNorm2d
79 | width = int(planes * (base_width / 64.)) * groups
80 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1
81 | self.conv1 = conv1x1(inplanes, width)
82 | self.bn1 = norm_layer(width)
83 | self.conv2 = conv3x3(width, width, stride, groups, dilation)
84 | self.bn2 = norm_layer(width)
85 | self.conv3 = conv1x1(width, planes * self.expansion)
86 | self.bn3 = norm_layer(planes * self.expansion)
87 | self.relu = nn.ReLU(inplace=True)
88 | self.downsample = downsample
89 | self.stride = stride
90 |
91 | def forward(self, x):
92 | identity = x
93 |
94 | out = self.conv1(x)
95 | out = self.bn1(out)
96 | out = self.relu(out)
97 |
98 | out = self.conv2(out)
99 | out = self.bn2(out)
100 | out = self.relu(out)
101 |
102 | out = self.conv3(out)
103 | out = self.bn3(out)
104 |
105 | if self.downsample is not None:
106 | identity = self.downsample(x)
107 |
108 | out += identity
109 | out = self.relu(out)
110 |
111 | return out
112 |
113 |
114 | class ResNet(nn.Module):
115 |
116 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
117 | groups=1, width_per_group=64, replace_stride_with_dilation=None,
118 | norm_layer=None, multi_scale=False):
119 | super(ResNet, self).__init__()
120 |
121 | self.multi_scale = multi_scale
122 | if norm_layer is None:
123 | norm_layer = nn.BatchNorm2d
124 | self._norm_layer = norm_layer
125 |
126 | self.inplanes = 64
127 | self.dilation = 1
128 | if replace_stride_with_dilation is None:
129 | # each element in the tuple indicates if we should replace
130 | # the 2x2 stride with a dilated convolution instead
131 |
132 | # -----------------------------
133 | # modified
134 | replace_stride_with_dilation = [False, False, False]
135 | # -----------------------------
136 |
137 | if len(replace_stride_with_dilation) != 3:
138 | raise ValueError("replace_stride_with_dilation should be None "
139 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
140 | self.groups = groups
141 | self.base_width = width_per_group
142 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
143 | bias=False)
144 | self.bn1 = norm_layer(self.inplanes)
145 | self.relu = nn.ReLU(inplace=True)
146 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
147 | self.layer1 = self._make_layer(block, 64, layers[0])
148 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
149 | dilate=replace_stride_with_dilation[0])
150 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
151 | dilate=replace_stride_with_dilation[1])
152 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
153 | dilate=replace_stride_with_dilation[2])
154 |
155 | # self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
156 | # self.fc = nn.Linear(512 * block.expansion, num_classes)
157 |
158 | for m in self.modules():
159 | if isinstance(m, nn.Conv2d):
160 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
161 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
162 | nn.init.constant_(m.weight, 1)
163 | nn.init.constant_(m.bias, 0)
164 |
165 | # Zero-initialize the last BN in each residual branch,
166 | # so that the residual branch starts with zeros, and each residual block behaves like an identity.
167 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
168 | if zero_init_residual:
169 | for m in self.modules():
170 | if isinstance(m, Bottleneck):
171 | nn.init.constant_(m.bn3.weight, 0)
172 | elif isinstance(m, BasicBlock):
173 | nn.init.constant_(m.bn2.weight, 0)
174 |
175 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
176 | norm_layer = self._norm_layer
177 | downsample = None
178 | previous_dilation = self.dilation
179 | if dilate:
180 | self.dilation *= stride
181 | stride = 1
182 | if stride != 1 or self.inplanes != planes * block.expansion:
183 | downsample = nn.Sequential(
184 | conv1x1(self.inplanes, planes * block.expansion, stride),
185 | norm_layer(planes * block.expansion),
186 | )
187 |
188 | layers = []
189 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
190 | self.base_width, previous_dilation, norm_layer))
191 | self.inplanes = planes * block.expansion
192 | for _ in range(1, blocks):
193 | layers.append(block(self.inplanes, planes, groups=self.groups,
194 | base_width=self.base_width, dilation=self.dilation,
195 | norm_layer=norm_layer))
196 |
197 | return nn.Sequential(*layers)
198 |
199 | def forward(self, x):
200 | x = self.conv1(x)
201 | x = self.bn1(x)
202 | x = self.relu(x)
203 | x = self.maxpool(x)
204 |
205 | x1 = self.layer1(x)
206 | x2 = self.layer2(x1)
207 | x3 = self.layer3(x2)
208 | x4 = self.layer4(x3)
209 |
210 | if self.multi_scale:
211 | return [x2, x3, x4]
212 | else:
213 | return x4
214 |
215 |
216 | def remove_fc(state_dict):
217 | """ Remove the fc layer parameter from state_dict. """
218 | return {key: value for key, value in state_dict.items() if not key.startswith('fc.')}
219 |
220 |
221 |
222 | def _resnet(arch, block, layers, pretrained, progress, **kwargs):
223 | model = ResNet(block, layers, **kwargs)
224 | if pretrained:
225 | state_dict = load_state_dict_from_url(model_urls[arch],
226 | progress=progress)
227 | model.load_state_dict(remove_fc(state_dict), strict=True)
228 | return model
229 |
230 | @BACKBONE.register("resnet18")
231 | def resnet18(pretrained=True, progress=True, **kwargs):
232 | """Constructs a ResNet-18 model.
233 |
234 | Args:
235 | pretrained (bool): If True, returns a model pre-trained on ImageNet
236 | progress (bool): If True, displays a progress bar of the download to stderr
237 | """
238 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
239 | **kwargs)
240 |
241 | @BACKBONE.register("resnet34")
242 | def resnet34(pretrained=True, progress=True, **kwargs):
243 | """Constructs a ResNet-34 model.
244 |
245 | Args:
246 | pretrained (bool): If True, returns a model pre-trained on ImageNet
247 | progress (bool): If True, displays a progress bar of the download to stderr
248 | """
249 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
250 | **kwargs)
251 |
252 | @BACKBONE.register("resnet50")
253 | def resnet50(pretrained=True, progress=True, **kwargs):
254 | """Constructs a ResNet-50 model.
255 |
256 | Args:
257 | pretrained (bool): If True, returns a model pre-trained on ImageNet
258 | progress (bool): If True, displays a progress bar of the download to stderr
259 | """
260 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
261 | **kwargs)
262 |
263 | @BACKBONE.register("resnet101")
264 | def resnet101(pretrained=True, progress=True, **kwargs):
265 | """Constructs a ResNet-101 model.
266 |
267 | Args:
268 | pretrained (bool): If True, returns a model pre-trained on ImageNet
269 | progress (bool): If True, displays a progress bar of the download to stderr
270 | """
271 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
272 | **kwargs)
273 |
274 | @BACKBONE.register("resnet152")
275 | def resnet152(pretrained=True, progress=True, **kwargs):
276 | """Constructs a ResNet-152 model.
277 |
278 | Args:
279 | pretrained (bool): If True, returns a model pre-trained on ImageNet
280 | progress (bool): If True, displays a progress bar of the download to stderr
281 | """
282 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
283 | **kwargs)
284 |
285 | @BACKBONE.register("resnext50_32x4d")
286 | def resnext50_32x4d(pretrained=True, progress=True, **kwargs):
287 | """Constructs a ResNeXt-50 32x4d model.
288 |
289 | Args:
290 | pretrained (bool): If True, returns a model pre-trained on ImageNet
291 | progress (bool): If True, displays a progress bar of the download to stderr
292 | """
293 | kwargs['groups'] = 32
294 | kwargs['width_per_group'] = 4
295 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
296 | pretrained, progress, **kwargs)
297 |
298 | @BACKBONE.register("resnext101_32x8d")
299 | def resnext101_32x8d(pretrained=True, progress=True, **kwargs):
300 | """Constructs a ResNeXt-101 32x8d model.
301 |
302 | Args:
303 | pretrained (bool): If True, returns a model pre-trained on ImageNet
304 | progress (bool): If True, displays a progress bar of the download to stderr
305 | """
306 | kwargs['groups'] = 32
307 | kwargs['width_per_group'] = 8
308 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
309 | pretrained, progress, **kwargs)
310 |
311 |
--------------------------------------------------------------------------------
/models/backbone/resnet_ibn.py:
--------------------------------------------------------------------------------
1 | import math
2 | import warnings
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | class IBN(nn.Module):
9 | r"""Instance-Batch Normalization layer from
10 | `"Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net"
11 | `
12 | Args:
13 | planes (int): Number of channels for the input tensor
14 | ratio (float): Ratio of instance normalization in the IBN layer
15 | """
16 |
17 | def __init__(self, planes, ratio=0.5):
18 | super(IBN, self).__init__()
19 | self.half = int(planes * ratio)
20 | self.IN = nn.InstanceNorm2d(self.half, affine=True)
21 | self.BN = nn.BatchNorm2d(planes - self.half)
22 |
23 | def forward(self, x):
24 | split = torch.split(x, self.half, 1)
25 | out1 = self.IN(split[0].contiguous())
26 | out2 = self.BN(split[1].contiguous())
27 | out = torch.cat((out1, out2), 1)
28 | return out
29 |
30 |
31 | __all__ = ['ResNet_IBN', 'resnet18_ibn_a', 'resnet34_ibn_a', 'resnet50_ibn_a', 'resnet101_ibn_a', 'resnet152_ibn_a',
32 | 'resnet18_ibn_b', 'resnet34_ibn_b', 'resnet50_ibn_b', 'resnet101_ibn_b', 'resnet152_ibn_b']
33 |
34 | model_urls = {
35 | 'resnet18_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_a-2f571257.pth',
36 | 'resnet34_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_a-94bc1577.pth',
37 | 'resnet50_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_a-d9d0bb7b.pth',
38 | 'resnet101_ibn_a': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_a-59ea0ac6.pth',
39 | 'resnet18_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_b-bc2f3c11.pth',
40 | 'resnet34_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_b-04134c37.pth',
41 | 'resnet50_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_b-9ca61e85.pth',
42 | 'resnet101_ibn_b': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_b-c55f6dba.pth',
43 | }
44 |
45 |
46 | class BasicBlock_IBN(nn.Module):
47 | expansion = 1
48 |
49 | def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None):
50 | super(BasicBlock_IBN, self).__init__()
51 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
52 | padding=1, bias=False)
53 | if ibn == 'a':
54 | self.bn1 = IBN(planes)
55 | else:
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.relu = nn.ReLU(inplace=True)
58 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.IN = nn.InstanceNorm2d(planes, affine=True) if ibn == 'b' else None
61 | self.downsample = downsample
62 | self.stride = stride
63 |
64 | def forward(self, x):
65 | residual = x
66 |
67 | out = self.conv1(x)
68 | out = self.bn1(out)
69 | out = self.relu(out)
70 |
71 | out = self.conv2(out)
72 | out = self.bn2(out)
73 |
74 | if self.downsample is not None:
75 | residual = self.downsample(x)
76 |
77 | out += residual
78 | if self.IN is not None:
79 | out = self.IN(out)
80 | out = self.relu(out)
81 |
82 | return out
83 |
84 |
85 | class Bottleneck_IBN(nn.Module):
86 | expansion = 4
87 |
88 | def __init__(self, inplanes, planes, ibn=None, stride=1, downsample=None):
89 | super(Bottleneck_IBN, self).__init__()
90 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
91 | if ibn == 'a':
92 | self.bn1 = IBN(planes)
93 | else:
94 | self.bn1 = nn.BatchNorm2d(planes)
95 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
96 | padding=1, bias=False)
97 | self.bn2 = nn.BatchNorm2d(planes)
98 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
99 | self.bn3 = nn.BatchNorm2d(planes * self.expansion)
100 | self.IN = nn.InstanceNorm2d(planes * 4, affine=True) if ibn == 'b' else None
101 | self.relu = nn.ReLU(inplace=True)
102 | self.downsample = downsample
103 | self.stride = stride
104 |
105 | def forward(self, x):
106 | residual = x
107 |
108 | out = self.conv1(x)
109 | out = self.bn1(out)
110 | out = self.relu(out)
111 |
112 | out = self.conv2(out)
113 | out = self.bn2(out)
114 | out = self.relu(out)
115 |
116 | out = self.conv3(out)
117 | out = self.bn3(out)
118 |
119 | if self.downsample is not None:
120 | residual = self.downsample(x)
121 |
122 | out += residual
123 | if self.IN is not None:
124 | out = self.IN(out)
125 | out = self.relu(out)
126 |
127 | return out
128 |
129 |
130 | class ResNet_IBN(nn.Module):
131 |
132 | def __init__(self,
133 | block,
134 | layers,
135 | ibn_cfg=('a', 'a', 'a', None),
136 | num_classes=1000):
137 | self.inplanes = 64
138 | super(ResNet_IBN, self).__init__()
139 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
140 | bias=False)
141 | if ibn_cfg[0] == 'b':
142 | self.bn1 = nn.InstanceNorm2d(64, affine=True)
143 | else:
144 | self.bn1 = nn.BatchNorm2d(64)
145 | self.relu = nn.ReLU(inplace=True)
146 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
147 | self.layer1 = self._make_layer(block, 64, layers[0], ibn=ibn_cfg[0])
148 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, ibn=ibn_cfg[1])
149 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, ibn=ibn_cfg[2])
150 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, ibn=ibn_cfg[3])
151 | self.avgpool = nn.AvgPool2d(7)
152 | self.fc = nn.Linear(512 * block.expansion, num_classes)
153 |
154 | for m in self.modules():
155 | if isinstance(m, nn.Conv2d):
156 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
157 | m.weight.data.normal_(0, math.sqrt(2. / n))
158 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
159 | m.weight.data.fill_(1)
160 | m.bias.data.zero_()
161 |
162 | def _make_layer(self, block, planes, blocks, stride=1, ibn=None):
163 | downsample = None
164 | if stride != 1 or self.inplanes != planes * block.expansion:
165 | downsample = nn.Sequential(
166 | nn.Conv2d(self.inplanes, planes * block.expansion,
167 | kernel_size=1, stride=stride, bias=False),
168 | nn.BatchNorm2d(planes * block.expansion),
169 | )
170 |
171 | layers = []
172 | layers.append(block(self.inplanes, planes,
173 | None if ibn == 'b' else ibn,
174 | stride, downsample))
175 | self.inplanes = planes * block.expansion
176 | for i in range(1, blocks):
177 | layers.append(block(self.inplanes, planes,
178 | None if (ibn == 'b' and i < blocks - 1) else ibn))
179 |
180 | return nn.Sequential(*layers)
181 |
182 | def forward(self, x):
183 | x = self.conv1(x)
184 | x = self.bn1(x)
185 | x = self.relu(x)
186 | x = self.maxpool(x)
187 |
188 | x = self.layer1(x)
189 | x = self.layer2(x)
190 | x = self.layer3(x)
191 | x = self.layer4(x)
192 |
193 | # x = self.avgpool(x)
194 | # x = x.view(x.size(0), -1)
195 | # x = self.fc(x)
196 |
197 | return x
198 |
199 |
200 | def resnet18_ibn_a(pretrained=False, **kwargs):
201 | """Constructs a ResNet-18-IBN-a model.
202 |
203 | Args:
204 | pretrained (bool): If True, returns a model pre-trained on ImageNet
205 | """
206 | model = ResNet_IBN(block=BasicBlock_IBN,
207 | layers=[2, 2, 2, 2],
208 | ibn_cfg=('a', 'a', 'a', None),
209 | **kwargs)
210 | if pretrained:
211 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet18_ibn_a']))
212 | return model
213 |
214 |
215 | def resnet34_ibn_a(pretrained=False, **kwargs):
216 | """Constructs a ResNet-34-IBN-a model.
217 |
218 | Args:
219 | pretrained (bool): If True, returns a model pre-trained on ImageNet
220 | """
221 | model = ResNet_IBN(block=BasicBlock_IBN,
222 | layers=[3, 4, 6, 3],
223 | ibn_cfg=('a', 'a', 'a', None),
224 | **kwargs)
225 | if pretrained:
226 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet34_ibn_a']))
227 | return model
228 |
229 |
230 | def resnet50_ibn_a(pretrained=True, **kwargs):
231 | """Constructs a ResNet-50-IBN-a model.
232 |
233 | Args:
234 | pretrained (bool): If True, returns a model pre-trained on ImageNet
235 | """
236 | model = ResNet_IBN(block=Bottleneck_IBN,
237 | layers=[3, 4, 6, 3],
238 | ibn_cfg=('a', 'a', 'a', None),
239 | **kwargs)
240 | if pretrained:
241 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet50_ibn_a']))
242 | return model
243 |
244 |
245 | def resnet101_ibn_a(pretrained=False, **kwargs):
246 | """Constructs a ResNet-101-IBN-a model.
247 |
248 | Args:
249 | pretrained (bool): If True, returns a model pre-trained on ImageNet
250 | """
251 | model = ResNet_IBN(block=Bottleneck_IBN,
252 | layers=[3, 4, 23, 3],
253 | ibn_cfg=('a', 'a', 'a', None),
254 | **kwargs)
255 | if pretrained:
256 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet101_ibn_a']))
257 | return model
258 |
259 |
260 | def resnet152_ibn_a(pretrained=False, **kwargs):
261 | """Constructs a ResNet-152-IBN-a model.
262 |
263 | Args:
264 | pretrained (bool): If True, returns a model pre-trained on ImageNet
265 | """
266 | model = ResNet_IBN(block=Bottleneck_IBN,
267 | layers=[3, 8, 36, 3],
268 | ibn_cfg=('a', 'a', 'a', None),
269 | **kwargs)
270 | if pretrained:
271 | warnings.warn("Pretrained model not available for ResNet-152-IBN-a!")
272 | return model
273 |
274 |
275 | def resnet18_ibn_b(pretrained=False, **kwargs):
276 | """Constructs a ResNet-18-IBN-b model.
277 |
278 | Args:
279 | pretrained (bool): If True, returns a model pre-trained on ImageNet
280 | """
281 | model = ResNet_IBN(block=BasicBlock_IBN,
282 | layers=[2, 2, 2, 2],
283 | ibn_cfg=('b', 'b', None, None),
284 | **kwargs)
285 | if pretrained:
286 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet18_ibn_b']))
287 | return model
288 |
289 |
290 | def resnet34_ibn_b(pretrained=False, **kwargs):
291 | """Constructs a ResNet-34-IBN-b model.
292 |
293 | Args:
294 | pretrained (bool): If True, returns a model pre-trained on ImageNet
295 | """
296 | model = ResNet_IBN(block=BasicBlock_IBN,
297 | layers=[3, 4, 6, 3],
298 | ibn_cfg=('b', 'b', None, None),
299 | **kwargs)
300 | if pretrained:
301 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet34_ibn_b']))
302 | return model
303 |
304 |
305 | def resnet50_ibn_b(pretrained=True, **kwargs):
306 | """Constructs a ResNet-50-IBN-b model.
307 |
308 | Args:
309 | pretrained (bool): If True, returns a model pre-trained on ImageNet
310 | """
311 | model = ResNet_IBN(block=Bottleneck_IBN,
312 | layers=[3, 4, 6, 3],
313 | ibn_cfg=('b', 'b', None, None),
314 | **kwargs)
315 | if pretrained:
316 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet50_ibn_b']))
317 | return model
318 |
319 |
320 | def resnet101_ibn_b(pretrained=False, **kwargs):
321 | """Constructs a ResNet-101-IBN-b model.
322 |
323 | Args:
324 | pretrained (bool): If True, returns a model pre-trained on ImageNet
325 | """
326 | model = ResNet_IBN(block=Bottleneck_IBN,
327 | layers=[3, 4, 23, 3],
328 | ibn_cfg=('b', 'b', None, None),
329 | **kwargs)
330 | if pretrained:
331 | model.load_state_dict(torch.hub.load_state_dict_from_url(model_urls['resnet101_ibn_b']))
332 | return model
333 |
334 |
335 | def resnet152_ibn_b(pretrained=False, **kwargs):
336 | """Constructs a ResNet-152-IBN-b model.
337 |
338 | Args:
339 | pretrained (bool): If True, returns a model pre-trained on ImageNet
340 | """
341 | model = ResNet_IBN(block=Bottleneck_IBN,
342 | layers=[3, 8, 36, 3],
343 | ibn_cfg=('b', 'b', None, None),
344 | **kwargs)
345 | if pretrained:
346 | warnings.warn("Pretrained model not available for ResNet-152-IBN-b!")
347 | return model
348 |
--------------------------------------------------------------------------------
/models/backbone/tresnet/__init__.py:
--------------------------------------------------------------------------------
1 | from .tresnet import TResnetM, TResnetL, TResnetXL
2 |
--------------------------------------------------------------------------------
/models/backbone/tresnet/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/models/backbone/tresnet/layers/__init__.py
--------------------------------------------------------------------------------
/models/backbone/tresnet/layers/anti_aliasing.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.parallel
3 | import numpy as np
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | class AntiAliasDownsampleLayer(nn.Module):
9 | def __init__(self, remove_model_jit: bool = False, filt_size: int = 3, stride: int = 2,
10 | channels: int = 0):
11 | super(AntiAliasDownsampleLayer, self).__init__()
12 | if not remove_model_jit:
13 | self.op = DownsampleJIT(filt_size, stride, channels)
14 | else:
15 | self.op = Downsample(filt_size, stride, channels)
16 |
17 | def forward(self, x):
18 | return self.op(x)
19 |
20 |
21 | @torch.jit.script
22 | class DownsampleJIT(object):
23 | def __init__(self, filt_size: int = 3, stride: int = 2, channels: int = 0):
24 | self.stride = stride
25 | self.filt_size = filt_size
26 | self.channels = channels
27 |
28 | assert self.filt_size == 3
29 | assert stride == 2
30 | a = torch.tensor([1., 2., 1.])
31 |
32 | filt = (a[:, None] * a[None, :]).clone().detach()
33 | filt = filt / torch.sum(filt)
34 | self.filt = filt[None, None, :, :].repeat((self.channels, 1, 1, 1)).cuda().half()
35 |
36 | def __call__(self, input: torch.Tensor):
37 | if input.dtype != self.filt.dtype:
38 | self.filt = self.filt.float()
39 | input_pad = F.pad(input, (1, 1, 1, 1), 'reflect')
40 | return F.conv2d(input_pad, self.filt, stride=2, padding=0, groups=input.shape[1])
41 |
42 |
43 | class Downsample(nn.Module):
44 | def __init__(self, filt_size=3, stride=2, channels=None):
45 | super(Downsample, self).__init__()
46 | self.filt_size = filt_size
47 | self.stride = stride
48 | self.channels = channels
49 |
50 |
51 | assert self.filt_size == 3
52 | a = torch.tensor([1., 2., 1.])
53 |
54 | filt = (a[:, None] * a[None, :]).clone().detach()
55 | filt = filt / torch.sum(filt)
56 | self.filt = filt[None, None, :, :].repeat((self.channels, 1, 1, 1))
57 |
58 | def forward(self, input):
59 | input_pad = F.pad(input, (1, 1, 1, 1), 'reflect')
60 | return F.conv2d(input_pad, self.filt, stride=self.stride, padding=0, groups=input.shape[1])
61 |
--------------------------------------------------------------------------------
/models/backbone/tresnet/layers/avg_pool.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class FastAvgPool2d(nn.Module):
5 | def __init__(self, flatten=False):
6 | super(FastAvgPool2d, self).__init__()
7 | self.flatten = flatten
8 |
9 | def forward(self, x):
10 | if self.flatten:
11 | in_size = x.size()
12 | return x.view((in_size[0], in_size[1], -1)).mean(dim=2)
13 | else:
14 | return x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1)
15 |
--------------------------------------------------------------------------------
/models/backbone/tresnet/layers/general_layers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from models.backbone.tresnet.layers.avg_pool import FastAvgPool2d
6 |
7 |
8 | class Flatten(nn.Module):
9 | def forward(self, x):
10 | return x.view(x.size(0), -1)
11 |
12 |
13 | class DepthToSpace(nn.Module):
14 |
15 | def __init__(self, block_size):
16 | super().__init__()
17 | self.bs = block_size
18 |
19 | def forward(self, x):
20 | N, C, H, W = x.size()
21 | x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W)
22 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs)
23 | x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs) # (N, C//bs^2, H * bs, W * bs)
24 | return x
25 |
26 |
27 | class SpaceToDepthModule(nn.Module):
28 | def __init__(self, remove_model_jit=False):
29 | super().__init__()
30 | if not remove_model_jit:
31 | self.op = SpaceToDepthJit()
32 | else:
33 | self.op = SpaceToDepth()
34 |
35 | def forward(self, x):
36 | return self.op(x)
37 |
38 |
39 | class SpaceToDepth(nn.Module):
40 | def __init__(self, block_size=4):
41 | super().__init__()
42 | assert block_size == 4
43 | self.bs = block_size
44 |
45 | def forward(self, x):
46 | N, C, H, W = x.size()
47 | x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs) # (N, C, H//bs, bs, W//bs, bs)
48 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs)
49 | x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs) # (N, C*bs^2, H//bs, W//bs)
50 | return x
51 |
52 |
53 | @torch.jit.script
54 | class SpaceToDepthJit(object):
55 | def __call__(self, x: torch.Tensor):
56 | # assuming hard-coded that block_size==4 for acceleration
57 | N, C, H, W = x.size()
58 | x = x.view(N, C, H // 4, 4, W // 4, 4) # (N, C, H//bs, bs, W//bs, bs)
59 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs)
60 | x = x.view(N, C * 16, H // 4, W // 4) # (N, C*bs^2, H//bs, W//bs)
61 | return x
62 |
63 |
64 | class hard_sigmoid(nn.Module):
65 | def __init__(self, inplace=True):
66 | super(hard_sigmoid, self).__init__()
67 | self.inplace = inplace
68 |
69 | def forward(self, x):
70 | if self.inplace:
71 | return x.add_(3.).clamp_(0., 6.).div_(6.)
72 | else:
73 | return F.relu6(x + 3.) / 6.
74 |
75 |
76 | class SEModule(nn.Module):
77 |
78 | def __init__(self, channels, reduction_channels, inplace=True):
79 | super(SEModule, self).__init__()
80 | self.avg_pool = FastAvgPool2d()
81 | self.fc1 = nn.Conv2d(channels, reduction_channels, kernel_size=1, padding=0, bias=True)
82 | self.relu = nn.ReLU(inplace=inplace)
83 | self.fc2 = nn.Conv2d(reduction_channels, channels, kernel_size=1, padding=0, bias=True)
84 | # self.activation = hard_sigmoid(inplace=inplace)
85 | self.activation = nn.Sigmoid()
86 |
87 | def forward(self, x):
88 | x_se = self.avg_pool(x)
89 | x_se2 = self.fc1(x_se)
90 | x_se2 = self.relu(x_se2)
91 | x_se = self.fc2(x_se2)
92 | x_se = self.activation(x_se)
93 | return x * x_se
94 |
--------------------------------------------------------------------------------
/models/backbone/tresnet/tresnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import Module as Module
4 | from collections import OrderedDict
5 | # from src.models.tresnet.layers.anti_aliasing import AntiAliasDownsampleLayer
6 | # from .layers.avg_pool import FastAvgPool2d
7 | # from .layers.general_layers import SEModule, SpaceToDepthModule
8 | from inplace_abn import InPlaceABN
9 |
10 | from models.backbone.tresnet.layers.anti_aliasing import AntiAliasDownsampleLayer
11 | from models.backbone.tresnet.layers.avg_pool import FastAvgPool2d
12 | from models.backbone.tresnet.layers.general_layers import SEModule, SpaceToDepthModule
13 | from models.registry import BACKBONE
14 |
15 |
16 | class bottleneck_head(nn.Module):
17 | def __init__(self, num_features, num_classes, bottleneck_features=200):
18 | super(bottleneck_head, self).__init__()
19 | self.embedding_generator = nn.ModuleList()
20 | self.embedding_generator.append(nn.Linear(num_features, bottleneck_features))
21 | self.embedding_generator = nn.Sequential(*self.embedding_generator)
22 | self.FC = nn.Linear(bottleneck_features, num_classes)
23 |
24 | def forward(self, x):
25 | self.embedding = self.embedding_generator(x)
26 | logits = self.FC(self.embedding)
27 | return logits
28 |
29 |
30 | def conv2d(ni, nf, stride):
31 | return nn.Sequential(
32 | nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False),
33 | nn.BatchNorm2d(nf),
34 | nn.ReLU(inplace=True)
35 | )
36 |
37 |
38 | def conv2d_ABN(ni, nf, stride, activation="leaky_relu", kernel_size=3, activation_param=1e-2, groups=1):
39 | return nn.Sequential(
40 | nn.Conv2d(ni, nf, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=groups,
41 | bias=False),
42 | InPlaceABN(num_features=nf, activation=activation, activation_param=activation_param)
43 | )
44 |
45 |
46 | class BasicBlock(Module):
47 | expansion = 1
48 |
49 | def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None):
50 | super(BasicBlock, self).__init__()
51 | if stride == 1:
52 | self.conv1 = conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3)
53 | else:
54 | if anti_alias_layer is None:
55 | self.conv1 = conv2d_ABN(inplanes, planes, stride=2, activation_param=1e-3)
56 | else:
57 | self.conv1 = nn.Sequential(conv2d_ABN(inplanes, planes, stride=1, activation_param=1e-3),
58 | anti_alias_layer(channels=planes, filt_size=3, stride=2))
59 |
60 | self.conv2 = conv2d_ABN(planes, planes, stride=1, activation="identity")
61 | self.relu = nn.ReLU(inplace=True)
62 | self.downsample = downsample
63 | self.stride = stride
64 | reduce_layer_planes = max(planes * self.expansion // 4, 64)
65 | self.se = SEModule(planes * self.expansion, reduce_layer_planes) if use_se else None
66 |
67 | def forward(self, x):
68 | if self.downsample is not None:
69 | residual = self.downsample(x)
70 | else:
71 | residual = x
72 |
73 | out = self.conv1(x)
74 | out = self.conv2(out)
75 |
76 | if self.se is not None: out = self.se(out)
77 |
78 | out += residual
79 |
80 | out = self.relu(out)
81 |
82 | return out
83 |
84 |
85 | class Bottleneck(Module):
86 | expansion = 4
87 |
88 | def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True, anti_alias_layer=None):
89 | super(Bottleneck, self).__init__()
90 | self.conv1 = conv2d_ABN(inplanes, planes, kernel_size=1, stride=1, activation="leaky_relu",
91 | activation_param=1e-3)
92 | if stride == 1:
93 | self.conv2 = conv2d_ABN(planes, planes, kernel_size=3, stride=1, activation="leaky_relu",
94 | activation_param=1e-3)
95 | else:
96 | if anti_alias_layer is None:
97 | self.conv2 = conv2d_ABN(planes, planes, kernel_size=3, stride=2, activation="leaky_relu",
98 | activation_param=1e-3)
99 | else:
100 | self.conv2 = nn.Sequential(conv2d_ABN(planes, planes, kernel_size=3, stride=1,
101 | activation="leaky_relu", activation_param=1e-3),
102 | anti_alias_layer(channels=planes, filt_size=3, stride=2))
103 |
104 | self.conv3 = conv2d_ABN(planes, planes * self.expansion, kernel_size=1, stride=1,
105 | activation="identity")
106 |
107 | self.relu = nn.ReLU(inplace=True)
108 | self.downsample = downsample
109 | self.stride = stride
110 |
111 | reduce_layer_planes = max(planes * self.expansion // 8, 64)
112 | self.se = SEModule(planes, reduce_layer_planes) if use_se else None
113 |
114 | def forward(self, x):
115 | if self.downsample is not None:
116 | residual = self.downsample(x)
117 | else:
118 | residual = x
119 |
120 | out = self.conv1(x)
121 | out = self.conv2(out)
122 | if self.se is not None: out = self.se(out)
123 |
124 | out = self.conv3(out)
125 | out = out + residual # no inplace
126 | out = self.relu(out)
127 |
128 | return out
129 |
130 |
131 | class TResNet(Module):
132 |
133 | def __init__(self, layers, in_chans=3, width_factor=1.0,
134 | do_bottleneck_head=False, bottleneck_features=512):
135 | super(TResNet, self).__init__()
136 |
137 | # JIT layers
138 | space_to_depth = SpaceToDepthModule()
139 | anti_alias_layer = AntiAliasDownsampleLayer
140 | # global_pool_layer = FastAvgPool2d(flatten=True)
141 |
142 | # TResnet stages
143 | self.inplanes = int(64 * width_factor)
144 | self.planes = int(64 * width_factor)
145 | conv1 = conv2d_ABN(in_chans * 16, self.planes, stride=1, kernel_size=3)
146 | layer1 = self._make_layer(BasicBlock, self.planes, layers[0], stride=1, use_se=True,
147 | anti_alias_layer=anti_alias_layer) # 56x56
148 | layer2 = self._make_layer(BasicBlock, self.planes * 2, layers[1], stride=2, use_se=True,
149 | anti_alias_layer=anti_alias_layer) # 28x28
150 | layer3 = self._make_layer(Bottleneck, self.planes * 4, layers[2], stride=2, use_se=True,
151 | anti_alias_layer=anti_alias_layer) # 14x14
152 | layer4 = self._make_layer(Bottleneck, self.planes * 8, layers[3], stride=2, use_se=False,
153 | anti_alias_layer=anti_alias_layer) # 7x7
154 |
155 | # body
156 | self.body = nn.Sequential(OrderedDict([
157 | ('SpaceToDepth', space_to_depth),
158 | ('conv1', conv1),
159 | ('layer1', layer1),
160 | ('layer2', layer2),
161 | ('layer3', layer3),
162 | ('layer4', layer4)]))
163 |
164 | # head
165 | self.embeddings = []
166 | # self.global_pool = nn.Sequential(OrderedDict([('global_pool_layer', global_pool_layer)]))
167 | # self.num_features = (self.planes * 8) * Bottleneck.expansion
168 | # if do_bottleneck_head:
169 | # fc = bottleneck_head(self.num_features, num_classes,
170 | # bottleneck_features=bottleneck_features)
171 | # else:
172 | # fc = nn.Linear(self.num_features, num_classes)
173 |
174 | # self.head = nn.Sequential(OrderedDict([('fc', fc)]))
175 |
176 | # model initilization
177 | for m in self.modules():
178 | if isinstance(m, nn.Conv2d):
179 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
180 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, InPlaceABN):
181 | nn.init.constant_(m.weight, 1)
182 | nn.init.constant_(m.bias, 0)
183 |
184 | # residual connections special initialization
185 | for m in self.modules():
186 | if isinstance(m, BasicBlock):
187 | m.conv2[1].weight = nn.Parameter(torch.zeros_like(m.conv2[1].weight)) # BN to zero
188 | if isinstance(m, Bottleneck):
189 | m.conv3[1].weight = nn.Parameter(torch.zeros_like(m.conv3[1].weight)) # BN to zero
190 | if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01)
191 |
192 | def _make_layer(self, block, planes, blocks, stride=1, use_se=True, anti_alias_layer=None):
193 | downsample = None
194 | if stride != 1 or self.inplanes != planes * block.expansion:
195 | layers = []
196 | if stride == 2:
197 | # avg pooling before 1x1 conv
198 | layers.append(nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True, count_include_pad=False))
199 | layers += [conv2d_ABN(self.inplanes, planes * block.expansion, kernel_size=1, stride=1,
200 | activation="identity")]
201 | downsample = nn.Sequential(*layers)
202 |
203 | layers = []
204 | layers.append(block(self.inplanes, planes, stride, downsample, use_se=use_se,
205 | anti_alias_layer=anti_alias_layer))
206 | self.inplanes = planes * block.expansion
207 | for i in range(1, blocks): layers.append(
208 | block(self.inplanes, planes, use_se=use_se, anti_alias_layer=anti_alias_layer))
209 | return nn.Sequential(*layers)
210 |
211 | def forward(self, x):
212 | x = self.body(x)
213 | # self.embeddings = self.global_pool(x)
214 | # logits = self.head(self.embeddings)
215 | return x
216 |
217 | @BACKBONE.register("tresnetM")
218 | def TResnetM():
219 | """Constructs a medium TResnet model.
220 | """
221 | in_chans = 3
222 | model = TResNet(layers=[3, 4, 11, 3], in_chans=in_chans)
223 | return model
224 |
225 |
226 | @BACKBONE.register("tresnetL")
227 | def TResnetL():
228 | """Constructs a large TResnet model.
229 | """
230 | model = TResNet(layers=[4, 5, 18, 3], width_factor=1.2, do_bottleneck_head=False)
231 |
232 | state = torch.load('/mnt/data1/jiajian/code/checkpoints/tresnet_l.pth', map_location='cpu')
233 | filtered_dict = {k: v for k, v in state['model'].items() if 'head.fc' not in k}
234 | model.load_state_dict(filtered_dict, strict=True)
235 | return model
236 |
237 |
238 | def TResnetXL(model_params):
239 | """Constructs a xlarge TResnet model.
240 | """
241 | in_chans = 3
242 | num_classes = model_params['num_classes']
243 | model = TResNet(layers=[4, 5, 24, 3], in_chans=in_chans, width_factor=1.3)
244 |
245 | return model
246 |
247 |
248 | if __name__ == '__main__':
249 | a = TResnetL()
250 | b = torch.randn(1, 3, 448, 448)
251 | c = a(b)
252 | print(c.shape)
--------------------------------------------------------------------------------
/models/base_block.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.init as init
6 |
7 | import torch.nn.functional as F
8 | from torch.nn.modules.batchnorm import _BatchNorm
9 |
10 | from models.registry import CLASSIFIER
11 |
12 |
13 | class BaseClassifier(nn.Module):
14 |
15 | def fresh_params(self, bn_wd):
16 | if bn_wd:
17 | return self.parameters()
18 | else:
19 | return self.named_parameters()
20 |
21 | @CLASSIFIER.register("linear")
22 | class LinearClassifier(BaseClassifier):
23 | def __init__(self, nattr, c_in, bn=False, pool='avg', scale=1):
24 | super().__init__()
25 |
26 | self.pool = pool
27 | if pool == 'avg':
28 | self.pool = nn.AdaptiveAvgPool2d(1)
29 | elif pool == 'max':
30 | self.pool = nn.AdaptiveMaxPool2d(1)
31 |
32 | self.logits = nn.Sequential(
33 | nn.Linear(c_in, nattr),
34 | nn.BatchNorm1d(nattr) if bn else nn.Identity()
35 | )
36 |
37 |
38 | def forward(self, feature, label=None):
39 |
40 | if len(feature.shape) == 3: # for vit (bt, nattr, c)
41 |
42 | bt, hw, c = feature.shape
43 | # NOTE ONLY USED FOR INPUT SIZE (256, 192)
44 | h = 16
45 | w = 12
46 | feature = feature.reshape(bt, h, w, c).permute(0, 3, 1, 2)
47 |
48 | feat = self.pool(feature).view(feature.size(0), -1)
49 | x = self.logits(feat)
50 |
51 | return [x], feature
52 |
53 |
54 |
55 | @CLASSIFIER.register("cosine")
56 | class NormClassifier(BaseClassifier):
57 | def __init__(self, nattr, c_in, bn=False, pool='avg', scale=30):
58 | super().__init__()
59 |
60 | self.logits = nn.Parameter(torch.FloatTensor(nattr, c_in))
61 |
62 | stdv = 1. / math.sqrt(self.logits.data.size(1))
63 | self.logits.data.uniform_(-stdv, stdv)
64 |
65 | self.pool = pool
66 | if pool == 'avg':
67 | self.pool = nn.AdaptiveAvgPool2d(1)
68 | elif pool == 'max':
69 | self.pool = nn.AdaptiveMaxPool2d(1)
70 |
71 | def forward(self, feature, label=None):
72 | feat = self.pool(feature).view(feature.size(0), -1)
73 | feat_n = F.normalize(feat, dim=1)
74 | weight_n = F.normalize(self.logits, dim=1)
75 | x = torch.matmul(feat_n, weight_n.t())
76 | return [x], feat_n
77 |
78 |
79 | def initialize_weights(module):
80 | for m in module.children():
81 | if isinstance(m, nn.Conv2d):
82 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
83 | m.weight.data.normal_(0, math.sqrt(2. / n))
84 | elif isinstance(m, _BatchNorm):
85 | m.weight.data.fill_(1)
86 | if m.bias is not None:
87 | m.bias.data.zero_()
88 | elif isinstance(m, nn.Linear):
89 | stdv = 1. / math.sqrt(m.weight.size(1))
90 | m.weight.data.uniform_(-stdv, stdv)
91 |
92 |
93 | class FeatClassifier(nn.Module):
94 |
95 | def __init__(self, backbone, classifier, bn_wd=True):
96 | super(FeatClassifier, self).__init__()
97 |
98 | self.backbone = backbone
99 | self.classifier = classifier
100 | self.bn_wd = bn_wd
101 |
102 | def fresh_params(self):
103 | return self.classifier.fresh_params(self.bn_wd)
104 |
105 | def finetune_params(self):
106 |
107 | if self.bn_wd:
108 | return self.backbone.parameters()
109 | else:
110 | return self.backbone.named_parameters()
111 |
112 | def forward(self, x, label=None):
113 | feat_map = self.backbone(x)
114 | logits, feat = self.classifier(feat_map, label)
115 | return logits, feat
116 |
117 |
--------------------------------------------------------------------------------
/models/model_ema.py:
--------------------------------------------------------------------------------
1 | """ Exponential Moving Average (EMA) of model updates
2 |
3 | Hacked together by / Copyright 2020 Ross Wightman
4 | """
5 | import logging
6 | from collections import OrderedDict
7 | from copy import deepcopy
8 |
9 | import torch
10 | import torch.nn as nn
11 |
12 | _logger = logging.getLogger(__name__)
13 |
14 |
15 | class ModelEma:
16 | """ Model Exponential Moving Average (DEPRECATED)
17 |
18 | Keep a moving average of everything in the model state_dict (parameters and buffers).
19 | This version is deprecated, it does not work with scripted models. Will be removed eventually.
20 |
21 | This is intended to allow functionality like
22 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
23 |
24 | A smoothed version of the weights is necessary for some training schemes to perform well.
25 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
26 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
27 | smoothing of weights to match results. Pay attention to the decay constant you are using
28 | relative to your update count per epoch.
29 |
30 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
31 | disable validation of the EMA weights. Validation will have to be done manually in a separate
32 | process, or after the training stops converging.
33 |
34 | This class is sensitive where it is initialized in the sequence of model init,
35 | GPU assignment and distributed training wrappers.
36 | """
37 | def __init__(self, model, decay=0.9999, device='', resume=''):
38 | # make a copy of the model for accumulating moving average of weights
39 | self.ema = deepcopy(model)
40 | self.ema.eval()
41 | self.decay = decay
42 | self.device = device # perform ema on different device from model if set
43 | if device:
44 | self.ema.to(device=device)
45 | self.ema_has_module = hasattr(self.ema, 'module')
46 | if resume:
47 | self._load_checkpoint(resume)
48 | for p in self.ema.parameters():
49 | p.requires_grad_(False)
50 |
51 | def _load_checkpoint(self, checkpoint_path):
52 | checkpoint = torch.load(checkpoint_path, map_location='cpu')
53 | assert isinstance(checkpoint, dict)
54 | if 'state_dict_ema' in checkpoint:
55 | new_state_dict = OrderedDict()
56 | for k, v in checkpoint['state_dict_ema'].items():
57 | # ema model may have been wrapped by DataParallel, and need module prefix
58 | if self.ema_has_module:
59 | name = 'module.' + k if not k.startswith('module') else k
60 | else:
61 | name = k
62 | new_state_dict[name] = v
63 | self.ema.load_state_dict(new_state_dict)
64 | _logger.info("Loaded state_dict_ema")
65 | else:
66 | _logger.warning("Failed to find state_dict_ema, starting from loaded model weights")
67 |
68 | def update(self, model):
69 | # correct a mismatch in state dict keys
70 | needs_module = hasattr(model, 'module') and not self.ema_has_module
71 | with torch.no_grad():
72 | msd = model.state_dict()
73 | for k, ema_v in self.ema.state_dict().items():
74 | if needs_module:
75 | k = 'module.' + k
76 | model_v = msd[k].detach()
77 | if self.device:
78 | model_v = model_v.to(device=self.device)
79 | ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v)
80 |
81 |
82 | class ModelEmaV2(nn.Module):
83 | """ Model Exponential Moving Average V2
84 |
85 | Keep a moving average of everything in the model state_dict (parameters and buffers).
86 | V2 of this module is simpler, it does not match params/buffers based on name but simply
87 | iterates in order. It works with torchscript (JIT of full model).
88 |
89 | This is intended to allow functionality like
90 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
91 |
92 | A smoothed version of the weights is necessary for some training schemes to perform well.
93 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
94 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
95 | smoothing of weights to match results. Pay attention to the decay constant you are using
96 | relative to your update count per epoch.
97 |
98 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
99 | disable validation of the EMA weights. Validation will have to be done manually in a separate
100 | process, or after the training stops converging.
101 |
102 | This class is sensitive where it is initialized in the sequence of model init,
103 | GPU assignment and distributed training wrappers.
104 | """
105 | def __init__(self, model, decay=0.9999, device=None):
106 | super(ModelEmaV2, self).__init__()
107 | # make a copy of the model for accumulating moving average of weights
108 | self.module = deepcopy(model)
109 | self.module.eval()
110 | self.decay = decay
111 | self.device = device # perform ema on different device from model if set
112 | if self.device is not None:
113 | self.module.to(device=device)
114 |
115 | def _update(self, model, update_fn):
116 | with torch.no_grad():
117 | for ema_v, model_v in zip(self.module.state_dict().values(), model.state_dict().values()):
118 | if self.device is not None:
119 | model_v = model_v.to(device=self.device)
120 | ema_v.copy_(update_fn(ema_v, model_v))
121 |
122 | def update(self, model):
123 | self._update(model, update_fn=lambda e, m: self.decay * e + (1. - self.decay) * m)
124 |
125 | def set(self, model):
126 | self._update(model, update_fn=lambda e, m: m)
127 |
--------------------------------------------------------------------------------
/models/model_factory.py:
--------------------------------------------------------------------------------
1 | from models.registry import BACKBONE
2 | from models.registry import CLASSIFIER
3 | from models.registry import LOSSES
4 |
5 |
6 | def build_backbone(key, multi_scale=False):
7 |
8 | model_dict = {
9 | 'resnet34': 512,
10 | 'resnet18': 512,
11 | 'resnet50': 2048,
12 | 'resnet101': 2048,
13 | 'tresnet': 2432,
14 | 'swin_s': 768,
15 | 'swin_b': 1024,
16 | 'vit_s': 768,
17 | 'vit_b': 768,
18 | 'bninception': 1024,
19 | 'tresnetM': 2048,
20 | 'tresnetL': 2048,
21 |
22 | }
23 |
24 | model = BACKBONE[key]()
25 | output_d = model_dict[key]
26 |
27 | return model, output_d
28 |
29 |
30 | def build_classifier(key):
31 |
32 | return CLASSIFIER[key]
33 |
34 |
35 | def build_loss(key):
36 |
37 | return LOSSES[key]
38 |
39 |
--------------------------------------------------------------------------------
/models/registry.py:
--------------------------------------------------------------------------------
1 | def _register_generic(module_dict, module_name, module):
2 | assert module_name not in module_dict
3 | module_dict[module_name] = module
4 |
5 |
6 | class Registry(dict):
7 | """
8 | A helper class for managing registering modules, it extends a dictionary
9 | and provides a register functions.
10 |
11 | Eg. creeting a registry:
12 | some_registry = Registry({"default": default_module})
13 |
14 | There're two ways of registering new modules:
15 | 1): normal way is just calling register function:
16 | def foo():
17 | ...
18 | some_registry.register("foo_module", foo)
19 | 2): used as decorator when declaring the module:
20 | @some_registry.register("foo_module")
21 | @some_registry.register("foo_modeul_nickname")
22 | def foo():
23 | ...
24 |
25 | Access of module is just like using a dictionary, eg:
26 | f = some_registry["foo_modeul"]
27 | """
28 | def __init__(self, *args, **kwargs):
29 | super(Registry, self).__init__(*args, **kwargs)
30 |
31 | def register(self, module_name, module=None):
32 | # used as function call
33 | if module is not None:
34 | _register_generic(self, module_name, module)
35 | return
36 |
37 | # used as decorator
38 | def register_fn(fn):
39 | _register_generic(self, module_name, fn)
40 | return fn
41 |
42 | return register_fn
43 |
44 |
45 | BACKBONE = Registry()
46 | CLASSIFIER = Registry()
47 | LOSSES = Registry()
--------------------------------------------------------------------------------
/optim/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/optim/__init__.py
--------------------------------------------------------------------------------
/optim/adamw.py:
--------------------------------------------------------------------------------
1 | """ AdamW Optimizer
2 | Impl copied from PyTorch master
3 | NOTE: Builtin optim.AdamW is used by the factory, this impl only serves as a Python based reference, will be removed
4 | someday
5 | """
6 | import math
7 | import torch
8 | from torch.optim.optimizer import Optimizer
9 |
10 |
11 | class AdamW(Optimizer):
12 | r"""Implements AdamW algorithm.
13 | The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_.
14 | The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_.
15 | Arguments:
16 | params (iterable): iterable of parameters to optimize or dicts defining
17 | parameter groups
18 | lr (float, optional): learning rate (default: 1e-3)
19 | betas (Tuple[float, float], optional): coefficients used for computing
20 | running averages of gradient and its square (default: (0.9, 0.999))
21 | eps (float, optional): term added to the denominator to improve
22 | numerical stability (default: 1e-8)
23 | weight_decay (float, optional): weight decay coefficient (default: 1e-2)
24 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this
25 | algorithm from the paper `On the Convergence of Adam and Beyond`_
26 | (default: False)
27 | .. _Adam\: A Method for Stochastic Optimization:
28 | https://arxiv.org/abs/1412.6980
29 | .. _Decoupled Weight Decay Regularization:
30 | https://arxiv.org/abs/1711.05101
31 | .. _On the Convergence of Adam and Beyond:
32 | https://openreview.net/forum?id=ryQu7f-RZ
33 | """
34 |
35 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
36 | weight_decay=1e-2, amsgrad=False):
37 | if not 0.0 <= lr:
38 | raise ValueError("Invalid learning rate: {}".format(lr))
39 | if not 0.0 <= eps:
40 | raise ValueError("Invalid epsilon value: {}".format(eps))
41 | if not 0.0 <= betas[0] < 1.0:
42 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
43 | if not 0.0 <= betas[1] < 1.0:
44 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
45 | defaults = dict(lr=lr, betas=betas, eps=eps,
46 | weight_decay=weight_decay, amsgrad=amsgrad)
47 | super(AdamW, self).__init__(params, defaults)
48 |
49 | def __setstate__(self, state):
50 | super(AdamW, self).__setstate__(state)
51 | for group in self.param_groups:
52 | group.setdefault('amsgrad', False)
53 |
54 | @torch.no_grad()
55 | def step(self, closure=None):
56 | """Performs a single optimization step.
57 | Arguments:
58 | closure (callable, optional): A closure that reevaluates the model
59 | and returns the losses.
60 | """
61 | loss = None
62 | if closure is not None:
63 | with torch.enable_grad():
64 | loss = closure()
65 |
66 | for group in self.param_groups:
67 | for p in group['params']:
68 | if p.grad is None:
69 | continue
70 |
71 | # Perform stepweight decay
72 | p.data.mul_(1 - group['lr'] * group['weight_decay'])
73 |
74 | # Perform optimization step
75 | grad = p.grad
76 | if grad.is_sparse:
77 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
78 | amsgrad = group['amsgrad']
79 |
80 | state = self.state[p]
81 |
82 | # State initialization
83 | if len(state) == 0:
84 | state['step'] = 0
85 | # Exponential moving average of gradient values
86 | state['exp_avg'] = torch.zeros_like(p)
87 | # Exponential moving average of squared gradient values
88 | state['exp_avg_sq'] = torch.zeros_like(p)
89 | if amsgrad:
90 | # Maintains max of all exp. moving avg. of sq. grad. values
91 | state['max_exp_avg_sq'] = torch.zeros_like(p)
92 |
93 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
94 | if amsgrad:
95 | max_exp_avg_sq = state['max_exp_avg_sq']
96 | beta1, beta2 = group['betas']
97 |
98 | state['step'] += 1
99 | bias_correction1 = 1 - beta1 ** state['step']
100 | bias_correction2 = 1 - beta2 ** state['step']
101 |
102 | # Decay the first and second moment running average coefficient
103 | exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
104 | exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
105 | if amsgrad:
106 | # Maintains the maximum of all 2nd moment running avg. till now
107 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
108 | # Use the max. for normalizing running avg. of gradient
109 | denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
110 | else:
111 | denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
112 |
113 | step_size = group['lr'] / bias_correction1
114 |
115 | p.addcdiv_(exp_avg, denom, value=-step_size)
116 |
117 | return loss
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.4.1
2 | torch==1.4.0
3 | torchvision==0.5.0
4 | tqdm==4.43.0
5 | easydict==1.9
6 | numpy==1.18.1
7 | Pillow==7.1.2
8 |
--------------------------------------------------------------------------------
/scheduler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/scheduler/__init__.py
--------------------------------------------------------------------------------
/scheduler/cos_annealing_with_restart.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 | from torch.optim.lr_scheduler import _LRScheduler
4 | import matplotlib.pyplot as plt
5 |
6 | class CosineAnnealingLR_with_Restart(_LRScheduler):
7 | """Set the learning rate of each parameter group using a cosine annealing
8 | schedule, where :math:`\eta_{max}` is set to the initial lr and
9 | :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
10 |
11 | .. math::
12 |
13 | \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
14 | \cos(\frac{T_{cur}}{T_{max}}\pi))
15 |
16 | When last_epoch=-1, sets initial lr as lr.
17 |
18 | It has been proposed in
19 | `SGDR: Stochastic Gradient Descent with Warm Restarts`_. The original pytorch
20 | implementation only implements the cosine annealing part of SGDR,
21 | I added my own implementation of the restarts part.
22 |
23 | Args:
24 | optimizer (Optimizer): Wrapped optimizer.
25 | T_max (int): Maximum number of iterations.
26 | T_mult (float): Increase T_max by a factor of T_mult
27 | eta_min (float): Minimum learning rate. Default: 0.
28 | last_epoch (int): The index of last epoch. Default: -1.
29 |
30 | .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
31 | https://arxiv.org/abs/1608.03983
32 | """
33 |
34 | def __init__(self, optimizer, T_max, T_mult, eta_min=0, last_epoch=-1):
35 | self.T_max = T_max
36 | self.T_mult = T_mult
37 | self.Te = self.T_max
38 | self.eta_min = eta_min
39 | self.current_epoch = last_epoch
40 |
41 | self.lr_history = []
42 |
43 | super(CosineAnnealingLR_with_Restart, self).__init__(optimizer, last_epoch)
44 |
45 | def get_lr(self):
46 | new_lrs = [self.eta_min + (base_lr - self.eta_min) *
47 | (1 + math.cos(math.pi * self.current_epoch / self.Te)) / 2
48 | for base_lr in self.base_lrs]
49 |
50 | self.lr_history.append(new_lrs)
51 | return new_lrs
52 |
53 | def step(self, epoch=None):
54 | if epoch is None:
55 | epoch = self.last_epoch + 1
56 | self.last_epoch = epoch
57 | self.current_epoch += 1
58 |
59 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
60 | param_group['lr'] = lr
61 |
62 | ## restart
63 | if self.current_epoch == self.Te:
64 |
65 | ## reset epochs since the last reset
66 | self.current_epoch = 0
67 |
68 | ## reset the next goal
69 | self.Te = int(self.Te * self.T_mult)
70 | self.T_max = self.T_max + self.Te
71 |
72 |
73 | if __name__ == '__main__':
74 |
75 | params = torch.zeros(10)
76 | lr = 2e-4
77 |
78 | optim = torch.optim.SGD([params], lr, momentum=0.9, weight_decay=5e-4)
79 |
80 | num_steps = 1406
81 |
82 |
83 | scheduler = CosineAnnealingLR_with_Restart(optim, T_max=num_steps * 16, T_mult=1.0, eta_min=1e-7)
84 |
85 | lr = []
86 | lr_s = []
87 | for i in range(num_steps * 8):
88 | lr.append(optim.param_groups[0]['lr'])
89 | optim.step()
90 | scheduler.step()
91 |
92 | plt.plot(range(num_steps * 8), lr)
93 | plt.show()
94 |
95 |
--------------------------------------------------------------------------------
/scheduler/cosine_lr.py:
--------------------------------------------------------------------------------
1 | """ Cosine Scheduler
2 | Cosine LR schedule with warmup, cycle/restarts, noise.
3 | Hacked together by / Copyright 2020 Ross Wightman
4 | """
5 | import logging
6 | import math
7 | import numpy as np
8 | import torch
9 | from matplotlib import pyplot as plt
10 |
11 | from timm.scheduler.scheduler import Scheduler
12 | from torch.optim import lr_scheduler
13 | from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
14 |
15 | _logger = logging.getLogger(__name__)
16 |
17 |
18 | class CosineLRScheduler(Scheduler):
19 | """
20 | Cosine decay with restarts.
21 | This is described in the paper https://arxiv.org/abs/1608.03983.
22 | Inspiration from
23 | https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py
24 | """
25 |
26 | def __init__(self,
27 | optimizer: torch.optim.Optimizer,
28 | t_initial: int,
29 | t_mul: float = 1.,
30 | lr_min: float = 0.,
31 | decay_rate: float = 1.,
32 | warmup_t=0,
33 | warmup_lr_init=0,
34 | warmup_prefix=False,
35 | cycle_limit=0,
36 | t_in_epochs=True,
37 | noise_range_t=None,
38 | noise_pct=0.67,
39 | noise_std=1.0,
40 | noise_seed=42,
41 | initialize=True) -> None:
42 | """
43 |
44 | @param optimizer:
45 | @param t_initial: epoch number of the first cosine decay iteration
46 | @param t_mul: multiplier between multiple cosine decay iterations
47 | @param lr_min: final learning rate
48 | @param decay_rate: decay rate between the peak values of multiple cosine decay iterations
49 | @param warmup_t: the epoch number of warmup stage
50 | @param warmup_lr_init: the initial learning rate of warmup stage
51 | @param warmup_prefix:
52 | @param cycle_limit: the iteration limit number of
53 | @param t_in_epochs:
54 | @param noise_range_t:
55 | @param noise_pct:
56 | @param noise_std:
57 | @param noise_seed:
58 | @param initialize:
59 | """
60 | super().__init__(
61 | optimizer, param_group_field="lr",
62 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed,
63 | initialize=initialize)
64 |
65 | assert t_initial > 0
66 | assert lr_min >= 0
67 | if t_initial == 1 and t_mul == 1 and decay_rate == 1:
68 | _logger.warning("Cosine annealing scheduler will have no effect on the learning "
69 | "rate since t_initial = t_mul = eta_mul = 1.")
70 | self.t_initial = t_initial
71 | self.t_mul = t_mul
72 | self.lr_min = lr_min
73 | self.decay_rate = decay_rate
74 | self.cycle_limit = cycle_limit
75 | self.warmup_t = warmup_t
76 | self.warmup_lr_init = warmup_lr_init
77 | self.warmup_prefix = warmup_prefix
78 | self.t_in_epochs = t_in_epochs
79 | if self.warmup_t:
80 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values]
81 | super().update_groups(self.warmup_lr_init)
82 | else:
83 | self.warmup_steps = [1 for _ in self.base_values]
84 |
85 | def _get_lr(self, t):
86 | if t < self.warmup_t:
87 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps]
88 | else:
89 | if self.warmup_prefix:
90 | t = t - self.warmup_t
91 |
92 | if self.t_mul != 1:
93 | # math.log(1 - t / self.t_initial * (1 - self.t_mul), self.t_mul) < 1 always hold
94 | i = math.floor(math.log(1 - t / self.t_initial * (1 - self.t_mul), self.t_mul))
95 | t_i = self.t_mul ** i * self.t_initial
96 | t_curr = t - (1 - self.t_mul ** i) / (1 - self.t_mul) * self.t_initial
97 | else:
98 | i = t // self.t_initial
99 | t_i = self.t_initial
100 | t_curr = t - (self.t_initial * i)
101 |
102 | gamma = self.decay_rate ** i
103 | lr_min = self.lr_min * gamma
104 | lr_max_values = [v * gamma for v in self.base_values]
105 |
106 | if self.cycle_limit == 0 or (self.cycle_limit > 0 and i < self.cycle_limit):
107 | # 0.5 * (1 + math.cos(math.pi * t_curr / t_i)), the proportion; (lr_max - lr_min), lr scale
108 | lrs = [
109 | lr_min + 0.5 * (lr_max - lr_min) * (1 + math.cos(math.pi * t_curr / t_i)) for lr_max in
110 | lr_max_values
111 | ]
112 | else:
113 | lrs = [self.lr_min for _ in self.base_values]
114 |
115 | return lrs
116 |
117 | def get_epoch_values(self, epoch: int):
118 | if self.t_in_epochs:
119 | return self._get_lr(epoch)
120 | else:
121 | return None
122 |
123 | def get_update_values(self, num_updates: int):
124 | if not self.t_in_epochs:
125 | return self._get_lr(num_updates)
126 | else:
127 | return None
128 |
129 | def get_cycle_length(self, cycles=0):
130 | if not cycles:
131 | cycles = self.cycle_limit
132 | cycles = max(1, cycles)
133 | if self.t_mul == 1.0:
134 | return self.t_initial * cycles
135 | else:
136 | return int(math.floor(-self.t_initial * (self.t_mul ** cycles - 1) / (1 - self.t_mul)))
137 |
138 |
139 | if __name__ == '__main__':
140 |
141 | params = torch.zeros(10)
142 | lr = 2e-4
143 |
144 | optim = torch.optim.SGD([params], lr, momentum=0.9, weight_decay=5e-4)
145 |
146 | # num_epochs = 160
147 |
148 | # scheduler = CosineLRScheduler(
149 | # optim,
150 | # t_initial=30,
151 | # t_mul=1, # cosine decay epoch multiplier
152 | # # lr_min=1e-5, # cosine lr 最终回落的位置
153 | # decay_rate=0.5,
154 | # # warmup_lr_init=1e-5,
155 | # # warmup_t=3,
156 | # cycle_limit=3, # 最大的限制
157 | # # t_in_epochs=True,
158 | # # noise_range_t=None,
159 | # # noise_pct=0.67,
160 | # # noise_std=1,
161 | # # noise_seed=42
162 | # )
163 |
164 | # be called .step() after every batch
165 | scheduler = lr_scheduler.OneCycleLR(optim, max_lr=lr, steps_per_epoch=641, epochs=40,
166 | pct_start=0.0)
167 |
168 | # plt.figure(figsize=(8, 8))
169 | lr = []
170 | lr_s = []
171 | for i in range(40):
172 | for j in range(641):
173 | lr.append(optim.param_groups[0]['lr'])
174 | lr_s.append(scheduler.get_last_lr()[0])
175 | optim.step()
176 | scheduler.step()
177 | plt.plot(range(40 * 641), lr)
178 | plt.show()
179 |
180 | plt.plot(range(40 * 641), lr_s)
181 | plt.show()
182 |
--------------------------------------------------------------------------------
/scheduler/scheduler.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any
2 |
3 | import torch
4 |
5 |
6 | class Scheduler:
7 | """ Parameter Scheduler Base Class
8 | A scheduler base class that can be used to schedule any optimizer parameter groups.
9 | Unlike the builtin PyTorch schedulers, this is intended to be consistently called
10 | * At the END of each epoch, before incrementing the epoch count, to calculate next epoch's value
11 | * At the END of each optimizer update, after incrementing the update count, to calculate next update's value
12 | The schedulers built on this should try to remain as stateless as possible (for simplicity).
13 | This family of schedulers is attempting to avoid the confusion of the meaning of 'last_epoch'
14 | and -1 values for special behaviour. All epoch and update counts must be tracked in the training
15 | code and explicitly passed in to the schedulers on the corresponding step or step_update call.
16 | Based on ideas from:
17 | * https://github.com/pytorch/fairseq/tree/master/fairseq/optim/lr_scheduler
18 | * https://github.com/allenai/allennlp/tree/master/allennlp/training/learning_rate_schedulers
19 | """
20 |
21 | def __init__(self,
22 | optimizer: torch.optim.Optimizer,
23 | param_group_field: str,
24 | noise_range_t=None,
25 | noise_type='normal',
26 | noise_pct=0.67,
27 | noise_std=1.0,
28 | noise_seed=None,
29 | initialize: bool = True) -> None:
30 | self.optimizer = optimizer
31 | self.param_group_field = param_group_field
32 | self._initial_param_group_field = f"initial_{param_group_field}"
33 | if initialize:
34 | for i, group in enumerate(self.optimizer.param_groups):
35 | if param_group_field not in group:
36 | raise KeyError(f"{param_group_field} missing from param_groups[{i}]")
37 | group.setdefault(self._initial_param_group_field, group[param_group_field])
38 | else:
39 | for i, group in enumerate(self.optimizer.param_groups):
40 | if self._initial_param_group_field not in group:
41 | raise KeyError(f"{self._initial_param_group_field} missing from param_groups[{i}]")
42 | self.base_values = [group[self._initial_param_group_field] for group in self.optimizer.param_groups]
43 | self.metric = None # any point to having this for all?
44 | self.noise_range_t = noise_range_t
45 | self.noise_pct = noise_pct
46 | self.noise_type = noise_type
47 | self.noise_std = noise_std
48 | self.noise_seed = noise_seed if noise_seed is not None else 42
49 | self.update_groups(self.base_values)
50 |
51 | def state_dict(self) -> Dict[str, Any]:
52 | return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
53 |
54 | def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
55 | self.__dict__.update(state_dict)
56 |
57 | def get_epoch_values(self, epoch: int):
58 | return None
59 |
60 | def get_update_values(self, num_updates: int):
61 | return None
62 |
63 | def step(self, epoch: int, metric: float = None) -> None:
64 | self.metric = metric
65 | values = self.get_epoch_values(epoch)
66 | if values is not None:
67 | values = self._add_noise(values, epoch)
68 | self.update_groups(values)
69 |
70 | def step_update(self, num_updates: int, metric: float = None):
71 | self.metric = metric
72 | values = self.get_update_values(num_updates)
73 | if values is not None:
74 | values = self._add_noise(values, num_updates)
75 | self.update_groups(values)
76 |
77 | def update_groups(self, values):
78 | if not isinstance(values, (list, tuple)):
79 | values = [values] * len(self.optimizer.param_groups)
80 | for param_group, value in zip(self.optimizer.param_groups, values):
81 | param_group[self.param_group_field] = value
82 |
83 | def _add_noise(self, lrs, t):
84 | if self.noise_range_t is not None:
85 | if isinstance(self.noise_range_t, (list, tuple)):
86 | apply_noise = self.noise_range_t[0] <= t < self.noise_range_t[1]
87 | else:
88 | apply_noise = t >= self.noise_range_t
89 | if apply_noise:
90 | g = torch.Generator()
91 | g.manual_seed(self.noise_seed + t)
92 | if self.noise_type == 'normal':
93 | while True:
94 | # resample if noise out of percent limit, brute force but shouldn't spin much
95 | noise = torch.randn(1, generator=g).item()
96 | if abs(noise) < self.noise_pct:
97 | break
98 | else:
99 | noise = 2 * (torch.rand(1, generator=g).item() - 0.5) * self.noise_pct
100 | lrs = [v + v * noise for v in lrs]
101 | return lrs
102 |
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencebond/Rethinking_of_PAR/5f09ea67778ff8a3d83b2bb9a4a9b998df0c4333/tools/__init__.py
--------------------------------------------------------------------------------
/tools/distributed.py:
--------------------------------------------------------------------------------
1 | """ Distributed training/validation utils
2 |
3 | Hacked together by / Copyright 2020 Ross Wightman
4 | """
5 | import torch
6 | from torch import distributed as dist
7 |
8 | from models.model_ema import ModelEma
9 |
10 |
11 | def unwrap_model(model):
12 | if isinstance(model, ModelEma):
13 | return unwrap_model(model.ema)
14 | else:
15 | return model.module if hasattr(model, 'module') else model
16 |
17 |
18 | def reduce_tensor(tensor, n):
19 | rt = tensor.clone()
20 | dist.all_reduce(rt, op=dist.ReduceOp.SUM)
21 | rt /= n
22 | return rt
23 |
24 |
25 | def distribute_bn(model, world_size, reduce=False):
26 | # ensure every node has the same running bn stats
27 | for bn_name, bn_buf in unwrap_model(model).named_buffers(recurse=True):
28 | if ('running_mean' in bn_name) or ('running_var' in bn_name):
29 | if reduce:
30 | # average bn stats across whole group
31 | torch.distributed.all_reduce(bn_buf, op=dist.ReduceOp.SUM)
32 | bn_buf /= float(world_size)
33 | else:
34 | # broadcast bn stats from rank 0 to whole group
35 | torch.distributed.broadcast(bn_buf, 0)
36 |
--------------------------------------------------------------------------------
/tools/function.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import OrderedDict
3 |
4 | import numpy as np
5 | import torch
6 |
7 | from tools.utils import may_mkdirs
8 |
9 |
10 | def seperate_weight_decay(named_params, lr, weight_decay=1e-5, skip_list=()):
11 | decay = []
12 | no_decay = []
13 | for name, param in named_params:
14 | if not param.requires_grad:
15 | continue
16 | if len(param.shape) == 1 or name in skip_list:
17 | no_decay.append(param)
18 | # if 'bias' in name:
19 | # no_decay.append(param)
20 | else:
21 | decay.append(param)
22 | return [{'params': no_decay, 'lr': lr, 'weight_decay': 0.},
23 | {'params': decay, 'lr': lr, 'weight_decay': weight_decay}]
24 |
25 |
26 | def ratio2weight(targets, ratio):
27 | ratio = torch.from_numpy(ratio).type_as(targets)
28 |
29 | # --------------------- dangwei li TIP20 ---------------------
30 | pos_weights = targets * (1 - ratio)
31 | neg_weights = (1 - targets) * ratio
32 | weights = torch.exp(neg_weights + pos_weights)
33 |
34 |
35 | # --------------------- AAAI ---------------------
36 | # pos_weights = torch.sqrt(1 / (2 * ratio.sqrt())) * targets
37 | # neg_weights = torch.sqrt(1 / (2 * (1 - ratio.sqrt()))) * (1 - targets)
38 | # weights = pos_weights + neg_weights
39 |
40 | # for RAP dataloader, targets element may be 2, with or without smooth, some element must great than 1
41 | weights[targets > 1] = 0.0
42 |
43 | return weights
44 |
45 |
46 | def get_model_log_path(root_path, model_name):
47 | multi_attr_model_dir = os.path.join(root_path, model_name, 'img_model')
48 | may_mkdirs(multi_attr_model_dir)
49 |
50 | multi_attr_log_dir = os.path.join(root_path, model_name, 'log')
51 | may_mkdirs(multi_attr_log_dir)
52 |
53 | return multi_attr_model_dir, multi_attr_log_dir
54 |
55 |
56 | class LogVisual:
57 |
58 | def __init__(self, args):
59 | self.args = vars(args)
60 | self.train_loss = []
61 | self.val_loss = []
62 |
63 | self.ap = []
64 | self.map = []
65 | self.acc = []
66 | self.prec = []
67 | self.recall = []
68 | self.f1 = []
69 |
70 | self.error_num = []
71 | self.fn_num = []
72 | self.fp_num = []
73 |
74 | self.save = False
75 |
76 | def append(self, **kwargs):
77 | self.save = False
78 |
79 | if 'result' in kwargs:
80 | self.ap.append(kwargs['result']['label_acc'])
81 | self.map.append(np.mean(kwargs['result']['label_acc']))
82 | self.acc.append(np.mean(kwargs['result']['instance_acc']))
83 | self.prec.append(np.mean(kwargs['result']['instance_precision']))
84 | self.recall.append(np.mean(kwargs['result']['instance_recall']))
85 | self.f1.append(np.mean(kwargs['result']['floatance_F1']))
86 |
87 | self.error_num.append(kwargs['result']['error_num'])
88 | self.fn_num.append(kwargs['result']['fn_num'])
89 | self.fp_num.append(kwargs['result']['fp_num'])
90 |
91 | if 'train_loss' in kwargs:
92 | self.train_loss.append(kwargs['train_loss'])
93 | if 'val_loss' in kwargs:
94 | self.val_loss.append(kwargs['val_loss'])
95 |
96 |
97 | def get_pkl_rootpath(dataset, zero_shot):
98 | root = os.path.join("./data", f"{dataset}")
99 | if zero_shot:
100 | data_path = os.path.join(root, 'dataset_zs_run0.pkl')
101 | else:
102 | data_path = os.path.join(root, 'dataset_all.pkl') #
103 |
104 | return data_path
105 |
106 |
107 | def get_reload_weight(model_path, model, pth='ckpt_max.pth'):
108 | model_path = os.path.join(model_path, pth)
109 | load_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
110 |
111 | if isinstance(load_dict, OrderedDict):
112 | pretrain_dict = load_dict
113 | else:
114 | pretrain_dict = load_dict['state_dicts']
115 | print(f"best performance {load_dict['metric']} in epoch : {load_dict['epoch']}")
116 |
117 | model.load_state_dict(pretrain_dict, strict=True)
118 |
119 | return model
120 |
--------------------------------------------------------------------------------
/tools/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | import datetime
4 | import time
5 | # from contextlib import contextmanger
6 | import torch
7 | from torch.autograd import Variable
8 | import random
9 | import numpy as np
10 | from torch import distributed as dist
11 |
12 | from tools.distributed import unwrap_model
13 |
14 |
15 | def time_str(fmt=None):
16 | if fmt is None:
17 | fmt = '%Y-%m-%d_%H:%M:%S'
18 |
19 | # time.strftime(format[, t])
20 | return datetime.datetime.today().strftime(fmt)
21 |
22 |
23 | def str2bool(v):
24 | return v.lower() in ("yes", "true", "1")
25 |
26 |
27 | def is_iterable(obj):
28 | return hasattr(obj, '__len__')
29 |
30 |
31 | def to_scalar(vt):
32 | """
33 | preprocess a 1-length pytorch Variable or Tensor to scalar
34 | """
35 | # if isinstance(vt, Variable):
36 | # return vt.data.cpu().numpy().flatten()[0]
37 | if torch.is_tensor(vt):
38 | if vt.dim() == 0:
39 | return vt.detach().cpu().numpy().flatten().item()
40 | else:
41 | return vt.detach().cpu().numpy()
42 | elif isinstance(vt, np.ndarray):
43 | return vt
44 | else:
45 | raise TypeError('Input should be a ndarray or tensor')
46 |
47 |
48 | # makes the random numbers predictable
49 | def set_seed(rand_seed):
50 | np.random.seed(rand_seed)
51 | random.seed(rand_seed)
52 | torch.backends.cudnn.enabled = True
53 | torch.manual_seed(rand_seed)
54 | torch.cuda.manual_seed(rand_seed)
55 |
56 |
57 | def may_mkdirs(dir_name):
58 | # if not os.cam_path.exists(os.cam_path.dirname(os.cam_path.abspath(fname))):
59 | # os.makedirs(os.cam_path.dirname(os.cam_path.abspath(fname)))
60 | if not os.path.exists(os.path.abspath(dir_name)):
61 | os.makedirs(os.path.abspath(dir_name))
62 |
63 |
64 | class AverageMeter(object):
65 | """
66 | Computes and stores the average and current value
67 |
68 | """
69 |
70 | def __init__(self):
71 | self.val = 0
72 | self.avg = 0
73 | self.sum = 0
74 | self.count = 0
75 |
76 | def reset(self):
77 | self.val = 0
78 | self.avg = 0
79 | self.sum = 0
80 | self.count = 0
81 |
82 | def update(self, val, n=1):
83 | self.val = val
84 | self.sum += val * n
85 | self.count += n
86 | self.avg = self.sum / (self.count + 1e-20)
87 |
88 |
89 | class RunningAverageMeter(object):
90 | """
91 | Computes and stores the running average and current value
92 | """
93 |
94 | def __init__(self, hist=0.99):
95 | self.val = None
96 | self.avg = None
97 | self.hist = hist
98 |
99 | def reset(self):
100 | self.val = None
101 | self.avg = None
102 |
103 | def update(self, val):
104 | if self.avg is None:
105 | self.avg = val
106 | else:
107 | self.avg = self.avg * self.hist + val * (1 - self.hist)
108 | self.val = val
109 |
110 |
111 | class RecentAverageMeter(object):
112 | """
113 | Stores and computes the average of recent values
114 | """
115 |
116 | def __init__(self, hist_size=100):
117 | self.hist_size = hist_size
118 | self.fifo = []
119 | self.val = 0
120 |
121 | def reset(self):
122 | self.fifo = []
123 | self.val = 0
124 |
125 | def update(self, value):
126 | self.val = value
127 | self.fifo.append(value)
128 | if len(self.fifo) > self.hist_size:
129 | del self.fifo[0]
130 |
131 | @property
132 | def avg(self):
133 | assert len(self.fifo) > 0
134 | return float(sum(self.fifo)) / len(self.fifo)
135 |
136 |
137 | class ReDirectSTD(object):
138 | """
139 | overwrites the sys.stdout or sys.stderr
140 | Args:
141 | fpath: file cam_path
142 | console: one of ['stdout', 'stderr']
143 | immediately_visiable: False
144 | Usage example:
145 | ReDirectSTD('stdout.txt', 'stdout', False)
146 | ReDirectSTD('stderr.txt', 'stderr', False)
147 | """
148 |
149 | def __init__(self, fpath=None, console='stdout', immediately_visiable=False):
150 | import sys
151 | import os
152 | assert console in ['stdout', 'stderr']
153 | self.console = sys.stdout if console == "stdout" else sys.stderr
154 | self.file = fpath
155 | self.f = None
156 | self.immediately_visiable = immediately_visiable
157 |
158 | if fpath is not None:
159 | # Remove existing log file
160 | if os.path.exists(fpath):
161 | os.remove(fpath)
162 | if console == 'stdout':
163 | sys.stdout = self
164 | else:
165 | sys.stderr = self
166 |
167 | def __del__(self):
168 | self.close()
169 |
170 | def __enter__(self):
171 | pass
172 |
173 | def __exit__(self, **args):
174 | self.close()
175 |
176 | def write(self, msg):
177 | self.console.write(msg)
178 | if self.file is not None:
179 | if not os.path.exists(os.path.dirname(os.path.abspath(self.file))):
180 | os.mkdir(os.path.dirname(os.path.abspath(self.file)))
181 |
182 | if self.immediately_visiable:
183 | # open for writing, appending to the end of the file if it exists
184 | with open(self.file, 'a') as f:
185 | f.write(msg)
186 | else:
187 | if self.f is None:
188 | self.f = open(self.file, 'w')
189 |
190 | # print("self.f is not none")
191 | # first time self.f is None, second is not None
192 | self.f.write(msg)
193 |
194 | def flush(self):
195 | self.console.flush()
196 | if self.f is not None:
197 | self.f.flush()
198 | import os
199 | os.fsync(self.f.fileno())
200 |
201 | def close(self):
202 | self.console.close()
203 | if self.f is not None:
204 | self.f.close()
205 |
206 |
207 | def find_index(seq, item):
208 | for i, x in enumerate(seq):
209 | if item == x:
210 | return i
211 | return -1
212 |
213 |
214 | def set_devices(sys_device_ids):
215 | """
216 | Args:
217 | sys_device_ids: a tuple; which GPUs to use
218 | e.g. sys_device_ids = (), only use cpu
219 | sys_device_ids = (3,), use the 4-th gpu
220 | sys_device_ids = (0, 1, 2, 3,), use the first 4 gpus
221 | sys_device_ids = (0, 2, 4,), use the 1, 3 and 5 gpus
222 | """
223 | import os
224 | visiable_devices = ''
225 | for i in sys_device_ids:
226 | visiable_devices += '{}, '.format(i)
227 | os.environ['CUDA_VISIBLE_DEVICES'] = visiable_devices
228 | # Return wrappers
229 | # Models and user defined Variables/Tensors would be transferred to
230 | # the first device
231 | device_id = 0 if len(sys_device_ids) > 0 else -1
232 |
233 |
234 | def transfer_optims(optims, device_id=-1):
235 | for optim in optims:
236 | if isinstance(optim, torch.optim.Optimizer):
237 | transfer_optim_state(optim.state, device_id=device_id)
238 |
239 |
240 | def transfer_optim_state(state, device_id=-1):
241 | """
242 | Transfer an optimizer.state to cpu or specified gpu, which means
243 | transferring tensors of the optimizer.state to specified device.
244 | The modification is in place for the state.
245 | Args:
246 | state: An torch.optim.Optimizer.state
247 | device_id: gpu id, or -1 which means transferring to cpu
248 | """
249 | for key, val in state.items():
250 | if isinstance(val, dict):
251 | transfer_optim_state(val, device_id=device_id)
252 | elif isinstance(val, Variable):
253 | raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
254 | elif isinstance(val, torch.nn.Parameter):
255 | raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
256 | else:
257 | try:
258 | if device_id == -1:
259 | state[key] = val.cpu()
260 | else:
261 | state[key] = val.cuda(device=device_id)
262 | except:
263 | pass
264 |
265 |
266 | def load_state_dict(model, src_state_dict):
267 | """
268 | copy parameter from src_state_dict to models
269 | Arguments:
270 | model: A torch.nn.Module object
271 | src_state_dict: a dict containing parameters and persistent buffers
272 | """
273 | from torch.nn import Parameter
274 | dest_state_dict = model.state_dict()
275 | for name, param in src_state_dict.items():
276 | if name not in dest_state_dict:
277 | continue
278 | if isinstance(param, Parameter):
279 | param = param.data
280 | try:
281 | dest_state_dict[name].copy_(param)
282 | except Exception as msg:
283 | print("Warning: Error occurs when copying '{}': {}".format(name, str(msg)))
284 |
285 | src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
286 | if len(src_missing) > 0:
287 | print("Keys not found in source state_dict: ")
288 | for n in src_missing:
289 | print('\t', n)
290 |
291 | dest_missint = set(src_state_dict.keys()) - set(dest_state_dict.keys())
292 | if len(dest_missint):
293 | print("Keys not found in destination state_dict: ")
294 | for n in dest_missint:
295 | print('\t', n)
296 |
297 |
298 | def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
299 | """
300 | load state_dict of module & optimizer from file
301 | Args:
302 | modules_optims: A two-element list which contains module and optimizer
303 | ckpt_file: the check point file
304 | load_to_cpu: Boolean, whether to preprocess tensors in models & optimizer to cpu type
305 | """
306 | map_location = (lambda storage, loc: storage) if load_to_cpu else None
307 | ckpt = torch.load(ckpt_file, map_location=map_location)
308 | for m, sd in zip(modules_optims, ckpt['state_dicts']):
309 | m.load_state_dict(sd)
310 | if verbose:
311 | print("Resume from ckpt {}, \nepoch: {}, scores: {}".format(
312 | ckpt_file, ckpt['ep'], ckpt['scores']))
313 | return ckpt['ep'], ckpt['scores']
314 |
315 |
316 | # def get_state_dict(model, unwrap_fn=unwrap_model):
317 | # return unwrap_fn(model).state_dict()
318 |
319 |
320 | def save_ckpt(model, ckpt_files, epoch, metric):
321 | """
322 | Note:
323 | torch.save() reserves device type and id of tensors to save.
324 | So when loading ckpt, you have to inform torch.load() to load these tensors
325 | to cpu or your desired gpu, if you change devices.
326 | """
327 |
328 | if not os.path.exists(os.path.dirname(os.path.abspath(ckpt_files))):
329 | os.makedirs(os.path.dirname(os.path.abspath(ckpt_files)))
330 |
331 | save_dict = {'state_dicts': model.state_dict(),
332 | 'state_dict_ema': unwrap_model(model).state_dict(),
333 | 'epoch': f'{time_str()} in epoch {epoch}',
334 | 'metric': metric,}
335 |
336 | torch.save(save_dict, ckpt_files)
337 |
338 |
339 | def adjust_lr_staircase(param_groups, base_lrs, ep, decay_at_epochs, factor):
340 | """ Multiplied by a factor at the beging of specified epochs. Different
341 | params groups specify thier own base learning rates.
342 | Args:
343 | param_groups: a list of params
344 | base_lrs: starting learning rate, len(base_lrs) = len(params_groups)
345 | ep: current epoch, ep >= 1
346 | decay_at_epochs: a list or tuple; learning rates are multiplied by a factor
347 | at the begining of these epochs
348 | factor: a number in range (0, 1)
349 | Example:
350 | base_lrs = [0.1, 0.01]
351 | decay_at_epochs = [51, 101]
352 | factor = 0.1
353 | Note:
354 | It is meant to be called at the begining of an epoch
355 | """
356 | assert len(base_lrs) == len(param_groups), \
357 | 'You should specify base lr for each param group.'
358 | assert ep >= 1, "Current epoch number should be >= 1"
359 |
360 | if ep not in decay_at_epochs:
361 | return
362 |
363 | ind = find_index(decay_at_epochs, ep)
364 | for i, (g, base_lr) in enumerate(zip(param_groups, base_lrs)):
365 | g['lr'] = base_lr * factor ** (ind + 1)
366 | print('=====> Param group {}: lr adjusted to {:.10f}'.format(i, g['lr']).rstrip('0'))
367 |
368 |
369 | def may_set_mode(maybe_modules, mode):
370 | """
371 | maybe_modules, an object or a list of objects.
372 | """
373 | assert mode in ['train', 'eval']
374 | if not is_iterable(maybe_modules):
375 | maybe_modules = [maybe_modules]
376 | for m in maybe_modules:
377 | if isinstance(m, torch.nn.Module):
378 | if mode == 'train':
379 | m.train()
380 | else:
381 | m.eval()
382 |
383 |
384 | def get_topk(matrix, k):
385 | """
386 | retain topk elements of a matrix and set others 0
387 | Args:
388 | matrix (object): np.array 2d
389 | """
390 | vector = matrix.reshape(matrix.shape[0] * matrix.shape[1])
391 | vector.sort()
392 | vector.get
393 |
394 | return matrix
395 |
396 |
397 | class Timer:
398 |
399 | def __init__(self):
400 | self.o = time.time()
401 |
402 | def measure(self, p=1):
403 | x = (time.time() - self.o) / p
404 | x = int(x)
405 | if x >= 3600:
406 | return '{:.1f}h'.format(x / 3600)
407 | if x >= 60:
408 | return '{}m'.format(round(x / 60))
409 | return '{}s'.format(x)
410 |
411 |
412 | class data_prefetcher():
413 | def __init__(self, loader):
414 | self.loader = iter(loader)
415 | self.stream = torch.cuda.Stream()
416 | # self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1, 3, 1, 1)
417 | # self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1, 3, 1, 1)
418 | # With Amp, it isn't necessary to manually convert data to half.
419 | # if args.fp16:
420 | # self.mean = self.mean.half()
421 | # self.std = self.std.half()
422 | self.preload()
423 |
424 | def preload(self):
425 | try:
426 | self.next_input, self.next_target = next(self.loader)
427 | except StopIteration:
428 | self.next_input = None
429 | self.next_target = None
430 | return
431 | with torch.cuda.stream(self.stream):
432 | self.next_input = self.next_input.cuda(non_blocking=True)
433 | self.next_target = self.next_target.cuda(non_blocking=True)
434 | # With Amp, it isn't necessary to manually convert data to half.
435 | # if args.fp16:
436 | # self.next_input = self.next_input.half()
437 | # else:
438 | self.next_input = self.next_input.float()
439 | # self.next_input = self.next_input.sub_(self.mean).div_(self.std)
440 |
441 | def next(self):
442 | torch.cuda.current_stream().wait_stream(self.stream)
443 | input = self.next_input
444 | target = self.next_target
445 | self.preload()
446 | return input, target
447 |
448 |
--------------------------------------------------------------------------------
/tools/vis.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def tb_visualizer_pedes(tb_writer, lr, epoch, train_loss, valid_loss, train_result, valid_result,
5 | train_gt, valid_gt, train_loss_mtr, valid_loss_mtr, model, attr_name):
6 | tb_writer.add_scalars('train/lr', {'lr': lr}, epoch)
7 | tb_writer.add_scalars('train/losses', {'train': train_loss,
8 | 'test': valid_loss}, epoch)
9 |
10 | tb_writer.add_scalars('train/perf', {'ma': train_result.ma,
11 | 'pos_recall': np.mean(train_result.label_pos_recall),
12 | 'neg_recall': np.mean(train_result.label_neg_recall),
13 | 'Acc': train_result.instance_acc,
14 | 'Prec': train_result.instance_prec,
15 | 'Rec': train_result.instance_recall,
16 | 'F1': train_result.instance_f1}, epoch)
17 |
18 | tb_writer.add_scalars('test/perf', {'ma': valid_result.ma,
19 | 'pos_recall': np.mean(valid_result.label_pos_recall),
20 | 'neg_recall': np.mean(valid_result.label_neg_recall),
21 | 'Acc': valid_result.instance_acc,
22 | 'Prec': valid_result.instance_prec,
23 | 'Rec': valid_result.instance_recall,
24 | 'F1': valid_result.instance_f1}, epoch)
25 |
26 |
27 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | # os.environ['CUDA_VISIBLE_DEVICES'] = '3'
3 |
4 | import argparse
5 | import pickle
6 | from collections import defaultdict
7 | from datetime import datetime
8 |
9 | import numpy as np
10 | from mmcv.cnn import get_model_complexity_info
11 | from torch.utils.tensorboard import SummaryWriter
12 | from visdom import Visdom
13 |
14 | from configs import cfg, update_config
15 | from dataset.multi_label.coco import COCO14
16 | from dataset.augmentation import get_transform
17 | from metrics.ml_metrics import get_map_metrics, get_multilabel_metrics
18 | from metrics.pedestrian_metrics import get_pedestrian_metrics
19 | from models.model_ema import ModelEmaV2
20 | from optim.adamw import AdamW
21 | from scheduler.cos_annealing_with_restart import CosineAnnealingLR_with_Restart
22 | from scheduler.cosine_lr import CosineLRScheduler
23 | from tools.distributed import distribute_bn
24 | from tools.vis import tb_visualizer_pedes
25 | import torch
26 | from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
27 | from torch.utils.data import DataLoader
28 |
29 | from batch_engine import valid_trainer, batch_trainer
30 | from dataset.pedes_attr.pedes import PedesAttr
31 | from models.base_block import FeatClassifier
32 | from models.model_factory import build_loss, build_classifier, build_backbone
33 |
34 | from tools.function import get_model_log_path, get_reload_weight, seperate_weight_decay
35 | from tools.utils import time_str, save_ckpt, ReDirectSTD, set_seed, str2bool
36 | from models.backbone import swin_transformer, resnet, bninception, vit
37 | from models.backbone.tresnet import tresnet
38 | from losses import bceloss, scaledbceloss
39 | from models import base_block
40 |
41 |
42 |
43 |
44 | # torch.backends.cudnn.benchmark = True
45 | # torch.autograd.set_detect_anomaly(True)
46 | torch.autograd.set_detect_anomaly(True)
47 |
48 |
49 | def main(cfg, args):
50 | set_seed(605)
51 | exp_dir = os.path.join('exp_result', cfg.DATASET.NAME)
52 |
53 | model_dir, log_dir = get_model_log_path(exp_dir, cfg.NAME)
54 | stdout_file = os.path.join(log_dir, f'stdout_{time_str()}.txt')
55 | save_model_path = os.path.join(model_dir, f'ckpt_max_{time_str()}.pth')
56 |
57 | visdom = None
58 | if cfg.VIS.VISDOM:
59 | visdom = Visdom(env=f'{cfg.DATASET.NAME}_' + cfg.NAME, port=8401)
60 | assert visdom.check_connection()
61 |
62 | writer = None
63 | if cfg.VIS.TENSORBOARD.ENABLE:
64 | current_time = datetime.now().strftime('%b%d_%H-%M-%S')
65 | writer_dir = os.path.join(exp_dir, cfg.NAME, 'runs', current_time)
66 | writer = SummaryWriter(log_dir=writer_dir)
67 |
68 | if cfg.REDIRECTOR:
69 | print('redirector stdout')
70 | ReDirectSTD(stdout_file, 'stdout', False)
71 |
72 | """
73 | the reason for args usage is CfgNode is immutable
74 | """
75 | if 'WORLD_SIZE' in os.environ:
76 | args.distributed = int(os.environ['WORLD_SIZE']) > 1
77 | else:
78 | args.distributed = None
79 |
80 | args.world_size = 1
81 | args.rank = 0 # global rank
82 |
83 | if args.distributed:
84 | args.device = 'cuda:%d' % args.local_rank
85 | torch.cuda.set_device(args.local_rank)
86 | torch.distributed.init_process_group(backend='nccl', init_method='env://')
87 | args.world_size = torch.distributed.get_world_size()
88 | args.rank = torch.distributed.get_rank()
89 | print(f'use GPU{args.device} for training')
90 | print(args.world_size, args.rank)
91 |
92 | if args.local_rank == 0:
93 | print(cfg)
94 |
95 | train_tsfm, valid_tsfm = get_transform(cfg)
96 | if args.local_rank == 0:
97 | print(train_tsfm)
98 |
99 | if cfg.DATASET.TYPE == 'pedes':
100 | train_set = PedesAttr(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm,
101 | target_transform=cfg.DATASET.TARGETTRANSFORM)
102 |
103 | valid_set = PedesAttr(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm,
104 | target_transform=cfg.DATASET.TARGETTRANSFORM)
105 | elif cfg.DATASET.TYPE == 'multi_label':
106 | train_set = COCO14(cfg=cfg, split=cfg.DATASET.TRAIN_SPLIT, transform=train_tsfm,
107 | target_transform=cfg.DATASET.TARGETTRANSFORM)
108 |
109 | valid_set = COCO14(cfg=cfg, split=cfg.DATASET.VAL_SPLIT, transform=valid_tsfm,
110 | target_transform=cfg.DATASET.TARGETTRANSFORM)
111 | if args.distributed:
112 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_set)
113 | else:
114 | train_sampler = None
115 |
116 | train_loader = DataLoader(
117 | dataset=train_set,
118 | batch_size=cfg.TRAIN.BATCH_SIZE,
119 | sampler=train_sampler,
120 | shuffle=train_sampler is None,
121 | num_workers=4,
122 | pin_memory=True,
123 | drop_last=True,
124 | )
125 |
126 | valid_loader = DataLoader(
127 | dataset=valid_set,
128 | batch_size=cfg.TRAIN.BATCH_SIZE,
129 | shuffle=False,
130 | num_workers=4,
131 | pin_memory=True,
132 | )
133 |
134 | if args.local_rank == 0:
135 | print('-' * 60)
136 | print(f'{cfg.DATASET.NAME} attr_num : {train_set.attr_num}, eval_attr_num : {train_set.eval_attr_num} '
137 | f'{cfg.DATASET.TRAIN_SPLIT} set: {len(train_loader.dataset)}, '
138 | f'{cfg.DATASET.TEST_SPLIT} set: {len(valid_loader.dataset)}, '
139 | )
140 |
141 | labels = train_set.label
142 | label_ratio = labels.mean(0) if cfg.LOSS.SAMPLE_WEIGHT else None
143 |
144 | backbone, c_output = build_backbone(cfg.BACKBONE.TYPE, cfg.BACKBONE.MULTISCALE)
145 |
146 |
147 | classifier = build_classifier(cfg.CLASSIFIER.NAME)(
148 | nattr=train_set.attr_num,
149 | c_in=c_output,
150 | bn=cfg.CLASSIFIER.BN,
151 | pool=cfg.CLASSIFIER.POOLING,
152 | scale =cfg.CLASSIFIER.SCALE
153 | )
154 |
155 | model = FeatClassifier(backbone, classifier, bn_wd=cfg.TRAIN.BN_WD)
156 | if args.local_rank == 0:
157 | print(f"backbone: {cfg.BACKBONE.TYPE}, classifier: {cfg.CLASSIFIER.NAME}")
158 | print(f"model_name: {cfg.NAME}")
159 |
160 | # flops, params = get_model_complexity_info(model, (3, 256, 128), print_per_layer_stat=True)
161 | # print('{:<30} {:<8}'.format('Computational complexity: ', flops))
162 | # print('{:<30} {:<8}'.format('Number of parameters: ', params))
163 |
164 | model = model.cuda()
165 | if args.distributed:
166 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
167 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank])
168 | else:
169 | model = torch.nn.DataParallel(model)
170 |
171 | model_ema = None
172 | if cfg.TRAIN.EMA.ENABLE:
173 | # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
174 | model_ema = ModelEmaV2(
175 | model, decay=cfg.TRAIN.EMA.DECAY, device='cpu' if cfg.TRAIN.EMA.FORCE_CPU else None)
176 |
177 | if cfg.RELOAD.TYPE:
178 | model = get_reload_weight(model_dir, model, pth=cfg.RELOAD.PTH)
179 |
180 | loss_weight = cfg.LOSS.LOSS_WEIGHT
181 |
182 |
183 | criterion = build_loss(cfg.LOSS.TYPE)(
184 | sample_weight=label_ratio, scale=cfg.CLASSIFIER.SCALE, size_sum=cfg.LOSS.SIZESUM, tb_writer=writer)
185 | criterion = criterion.cuda()
186 |
187 | if cfg.TRAIN.BN_WD:
188 | param_groups = [{'params': model.module.finetune_params(),
189 | 'lr': cfg.TRAIN.LR_SCHEDULER.LR_FT,
190 | 'weight_decay': cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY},
191 | {'params': model.module.fresh_params(),
192 | 'lr': cfg.TRAIN.LR_SCHEDULER.LR_NEW,
193 | 'weight_decay': cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY}]
194 | else:
195 | # bn parameters are not applied with weight decay
196 | ft_params = seperate_weight_decay(
197 | model.module.finetune_params(),
198 | lr=cfg.TRAIN.LR_SCHEDULER.LR_FT,
199 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY)
200 |
201 | fresh_params = seperate_weight_decay(
202 | model.module.fresh_params(),
203 | lr=cfg.TRAIN.LR_SCHEDULER.LR_NEW,
204 | weight_decay=cfg.TRAIN.OPTIMIZER.WEIGHT_DECAY)
205 |
206 | param_groups = ft_params + fresh_params
207 |
208 | if cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'sgd':
209 | optimizer = torch.optim.SGD(param_groups, momentum=cfg.TRAIN.OPTIMIZER.MOMENTUM)
210 | elif cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'adam':
211 | optimizer = torch.optim.Adam(param_groups)
212 | elif cfg.TRAIN.OPTIMIZER.TYPE.lower() == 'adamw':
213 | optimizer = AdamW(param_groups)
214 | else:
215 | assert None, f'{cfg.TRAIN.OPTIMIZER.TYPE} is not implemented'
216 |
217 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau':
218 | lr_scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=4)
219 | if cfg.CLASSIFIER.BN:
220 | assert False, 'BN can not compatible with ReduceLROnPlateau'
221 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'multistep':
222 | lr_scheduler = MultiStepLR(optimizer, milestones=cfg.TRAIN.LR_SCHEDULER.LR_STEP, gamma=0.1)
223 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine':
224 | lr_scheduler = CosineAnnealingLR_with_Restart(
225 | optimizer,
226 | T_max=(cfg.TRAIN.MAX_EPOCH + 5) * len(train_loader),
227 | T_mult=1,
228 | eta_min=cfg.TRAIN.LR_SCHEDULER.LR_NEW * 0.001
229 | )
230 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine':
231 |
232 |
233 | lr_scheduler = CosineLRScheduler(
234 | optimizer,
235 | t_initial=cfg.TRAIN.MAX_EPOCH,
236 | lr_min=1e-5, # cosine lr 最终回落的位置
237 | warmup_lr_init=1e-4,
238 | warmup_t=cfg.TRAIN.MAX_EPOCH * cfg.TRAIN.LR_SCHEDULER.WMUP_COEF,
239 | )
240 |
241 | else:
242 | assert False, f'{cfg.LR_SCHEDULER.TYPE} has not been achieved yet'
243 |
244 | best_metric, epoch = trainer(cfg, args, epoch=cfg.TRAIN.MAX_EPOCH,
245 | model=model, model_ema=model_ema,
246 | train_loader=train_loader,
247 | valid_loader=valid_loader,
248 | criterion=criterion,
249 | optimizer=optimizer,
250 | lr_scheduler=lr_scheduler,
251 | path=save_model_path,
252 | loss_w=loss_weight,
253 | viz=visdom,
254 | tb_writer=writer)
255 | if args.local_rank == 0:
256 | print(f'{cfg.NAME}, best_metrc : {best_metric} in epoch{epoch}')
257 |
258 |
259 | def trainer(cfg, args, epoch, model, model_ema, train_loader, valid_loader, criterion, optimizer, lr_scheduler,
260 | path, loss_w, viz, tb_writer):
261 | maximum = float(-np.inf)
262 | best_epoch = 0
263 |
264 | result_list = defaultdict()
265 |
266 | result_path = path
267 | result_path = result_path.replace('ckpt_max', 'metric')
268 | result_path = result_path.replace('pth', 'pkl')
269 |
270 | for e in range(epoch):
271 |
272 | if args.distributed:
273 | train_loader.sampler.set_epoch(epoch)
274 |
275 | lr = optimizer.param_groups[1]['lr']
276 |
277 | train_loss, train_gt, train_probs, train_imgs, train_logits, train_loss_mtr = batch_trainer(
278 | cfg,
279 | args=args,
280 | epoch=e,
281 | model=model,
282 | model_ema=model_ema,
283 | train_loader=train_loader,
284 | criterion=criterion,
285 | optimizer=optimizer,
286 | loss_w=loss_w,
287 | scheduler=lr_scheduler if cfg.TRAIN.LR_SCHEDULER.TYPE == 'annealing_cosine' else None,
288 | )
289 |
290 | if args.distributed:
291 | if args.local_rank == 0:
292 | print("Distributing BatchNorm running means and vars")
293 | distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
294 |
295 | if model_ema is not None and not cfg.TRAIN.EMA.FORCE_CPU:
296 |
297 | if args.local_rank == 0:
298 | print('using model_ema to validate')
299 |
300 | if args.distributed:
301 | distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')
302 | valid_loss, valid_gt, valid_probs, valid_imgs, valid_logits, valid_loss_mtr = valid_trainer(
303 | cfg,
304 | args=args,
305 | epoch=e,
306 | model=model_ema.module,
307 | valid_loader=valid_loader,
308 | criterion=criterion,
309 | loss_w=loss_w
310 | )
311 | else:
312 | valid_loss, valid_gt, valid_probs, valid_imgs, valid_logits, valid_loss_mtr = valid_trainer(
313 | cfg,
314 | args=args,
315 | epoch=e,
316 | model=model,
317 | valid_loader=valid_loader,
318 | criterion=criterion,
319 | loss_w=loss_w
320 | )
321 |
322 | if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau':
323 | lr_scheduler.step(metrics=valid_loss)
324 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine':
325 | lr_scheduler.step(epoch=e + 1)
326 | elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'multistep':
327 | lr_scheduler.step()
328 |
329 | if cfg.METRIC.TYPE == 'pedestrian':
330 |
331 | train_result = get_pedestrian_metrics(train_gt, train_probs, index=None, cfg=cfg)
332 | valid_result = get_pedestrian_metrics(valid_gt, valid_probs, index=None, cfg=cfg)
333 |
334 | if args.local_rank == 0:
335 | print(f'Evaluation on train set, train losses {train_loss}\n',
336 | 'ma: {:.4f}, label_f1: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
337 | train_result.ma, np.mean(train_result.label_f1),
338 | np.mean(train_result.label_pos_recall),
339 | np.mean(train_result.label_neg_recall)),
340 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
341 | train_result.instance_acc, train_result.instance_prec, train_result.instance_recall,
342 | train_result.instance_f1))
343 |
344 | print(f'Evaluation on test set, valid losses {valid_loss}\n',
345 | 'ma: {:.4f}, label_f1: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format(
346 | valid_result.ma, np.mean(valid_result.label_f1),
347 | np.mean(valid_result.label_pos_recall),
348 | np.mean(valid_result.label_neg_recall)),
349 | 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format(
350 | valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall,
351 | valid_result.instance_f1))
352 |
353 | print(f'{time_str()}')
354 | print('-' * 60)
355 |
356 | if args.local_rank == 0:
357 | tb_visualizer_pedes(tb_writer, lr, e, train_loss, valid_loss, train_result, valid_result,
358 | train_gt, valid_gt, train_loss_mtr, valid_loss_mtr, model, train_loader.dataset.attr_id)
359 |
360 | cur_metric = valid_result.ma
361 | if cur_metric > maximum:
362 | maximum = cur_metric
363 | best_epoch = e
364 | save_ckpt(model, path, e, maximum)
365 |
366 | result_list[e] = {
367 | 'train_result': train_result, # 'train_map': train_map,
368 | 'valid_result': valid_result, # 'valid_map': valid_map,
369 | 'train_gt': train_gt, 'train_probs': train_probs,
370 | 'valid_gt': valid_gt, 'valid_probs': valid_probs,
371 | 'train_imgs': train_imgs, 'valid_imgs': valid_imgs
372 | }
373 |
374 | elif cfg.METRIC.TYPE == 'multi_label':
375 |
376 | train_metric = get_multilabel_metrics(train_gt, train_probs)
377 | valid_metric = get_multilabel_metrics(valid_gt, valid_probs)
378 |
379 | if args.local_rank == 0:
380 | print(
381 | 'Train Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, '
382 | 'CF1: {:.4f}'.format(train_metric.map, train_metric.OP, train_metric.OR, train_metric.OF1,
383 | train_metric.CP, train_metric.CR, train_metric.CF1))
384 |
385 | print(
386 | 'Test Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, '
387 | 'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP, valid_metric.OR, valid_metric.OF1,
388 | valid_metric.CP, valid_metric.CR, valid_metric.CF1))
389 | print(f'{time_str()}')
390 | print('-' * 60)
391 |
392 | tb_writer.add_scalars('train/lr', {'lr': lr}, e)
393 |
394 | tb_writer.add_scalars('train/losses', {'train': train_loss,
395 | 'test': valid_loss}, e)
396 |
397 | tb_writer.add_scalars('train/perf', {'mAP': train_metric.map,
398 | 'OP': train_metric.OP,
399 | 'OR': train_metric.OR,
400 | 'OF1': train_metric.OF1,
401 | 'CP': train_metric.CP,
402 | 'CR': train_metric.CR,
403 | 'CF1': train_metric.CF1}, e)
404 |
405 | tb_writer.add_scalars('test/perf', {'mAP': valid_metric.map,
406 | 'OP': valid_metric.OP,
407 | 'OR': valid_metric.OR,
408 | 'OF1': valid_metric.OF1,
409 | 'CP': valid_metric.CP,
410 | 'CR': valid_metric.CR,
411 | 'CF1': valid_metric.CF1}, e)
412 |
413 | cur_metric = valid_metric.map
414 | if cur_metric > maximum:
415 | maximum = cur_metric
416 | best_epoch = e
417 | save_ckpt(model, path, e, maximum)
418 |
419 | result_list[e] = {
420 | 'train_result': train_metric, 'valid_result': valid_metric,
421 | 'train_gt': train_gt, 'train_probs': train_probs,
422 | 'valid_gt': valid_gt, 'valid_probs': valid_probs
423 | }
424 | else:
425 | assert False, f'{cfg.METRIC.TYPE} is unavailable'
426 |
427 | with open(result_path, 'wb') as f:
428 | pickle.dump(result_list, f)
429 |
430 | return maximum, best_epoch
431 |
432 |
433 | def argument_parser():
434 | parser = argparse.ArgumentParser(description="attribute recognition",
435 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
436 |
437 | parser.add_argument(
438 | "--cfg", help="decide which cfg to use", type=str,
439 | default="./configs/pedes_baseline/pa100k.yaml",
440 |
441 | )
442 |
443 | parser.add_argument("--debug", type=str2bool, default="true")
444 | parser.add_argument('--local_rank', help='node rank for distributed training', default=0,
445 | type=int)
446 | parser.add_argument('--dist_bn', type=str, default='',
447 | help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")')
448 |
449 | args = parser.parse_args()
450 |
451 | return args
452 |
453 |
454 | if __name__ == '__main__':
455 | args = argument_parser()
456 |
457 | update_config(cfg, args)
458 | main(cfg, args)
459 |
--------------------------------------------------------------------------------
/train_gpu.sh:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # for pedestrian attribute recognition
5 |
6 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/peta.yaml
7 |
8 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/peta_zs.yaml
9 |
10 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rapv1.yaml
11 |
12 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rapv2.yaml
13 |
14 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/rap_zs.yaml
15 |
16 | CUDA_VISIBLE_DEVICES=0 python train.py --cfg ./configs/pedes_baseline/pa100k.yaml
17 |
18 | # for swin transformer, change cfg.TRAIN.BATCH_SIZE: 32
19 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 --master_port=1233 train.py --cfg ./configs/pedes_baseline/pa100k.yaml
20 |
21 |
22 |
23 |
24 | # for multi-label classification
25 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 --master_port=1233 train.py --cfg ./configs/multilabel_baseline/coco.yaml
26 |
27 |
28 |
--------------------------------------------------------------------------------