├── models ├── __init__.py └── detect │ ├── pk-yolo.yaml │ └── yolov9-e.yaml ├── utils ├── tal │ ├── __init__.py │ └── anchor_generator.py ├── loggers │ ├── wandb │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ └── sweep.yaml │ ├── clearml │ │ ├── __init__.py │ │ ├── hpo.py │ │ └── clearml_utils.py │ └── comet │ │ ├── optimizer_config.json │ │ ├── comet_utils.py │ │ └── hpo.py ├── __init__.py ├── lion.py ├── callbacks.py ├── autobatch.py ├── activations.py ├── coco_utils.py ├── triton.py ├── downloads.py └── autoanchor.py ├── data ├── images │ └── horses.jpg ├── multiplane.yaml └── hyps │ └── hyp.scratch-high.yaml ├── spark ├── pretrain │ ├── viz_imgs │ │ ├── recon.png │ │ ├── spconv1.png │ │ ├── spconv2.png │ │ └── spconv3.png │ ├── requirements.txt │ ├── splitedata.py │ ├── models │ │ ├── __init__.py │ │ ├── resnet.py │ │ ├── custom_detr.py │ │ ├── custom_origin.py │ │ ├── custom.py │ │ └── convnext.py │ ├── utils │ │ ├── lr_control.py │ │ ├── imagenet.py │ │ └── arg_util.py │ ├── sampler.py │ ├── decoder.py │ ├── dist.py │ └── README.md ├── downstream_imagenet │ ├── requirements.txt │ ├── lr_decay.py │ ├── README.md │ ├── models │ │ └── __init__.py │ ├── arg.py │ ├── util.py │ └── data.py ├── downstream_mmdet │ ├── configs │ │ ├── _base_ │ │ │ ├── default_runtime.py │ │ │ └── models │ │ │ │ └── mask_rcnn_convnext_fpn.py │ │ └── convnext_spark │ │ │ └── mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py │ ├── mmcv_custom │ │ ├── __init__.py │ │ ├── runner │ │ │ └── checkpoint.py │ │ ├── layer_decay_optimizer_constructor.py │ │ └── customized_text.py │ ├── mmdet │ │ └── models │ │ │ └── backbones │ │ │ └── __init__.py │ └── README.md └── downstream_d2 │ ├── configs │ ├── coco_R_50_FPN_CONV_1x_moco_adam.yaml │ └── Base-RCNN-FPN.yaml │ ├── convert-timm-to-d2.py │ ├── README.md │ └── lr_decay.py ├── requirements.txt ├── hubconf.py └── benchmarks.py /models/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/tal/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/clearml/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /data/images/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/data/images/horses.jpg -------------------------------------------------------------------------------- /spark/pretrain/viz_imgs/recon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/recon.png -------------------------------------------------------------------------------- /spark/downstream_imagenet/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | Pillow 3 | typed-argument-parser 4 | timm==0.5.4 5 | tensorboardx 6 | -------------------------------------------------------------------------------- /spark/pretrain/viz_imgs/spconv1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv1.png -------------------------------------------------------------------------------- /spark/pretrain/viz_imgs/spconv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv2.png -------------------------------------------------------------------------------- /spark/pretrain/viz_imgs/spconv3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv3.png -------------------------------------------------------------------------------- /spark/pretrain/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | Pillow 4 | typed-argument-parser 5 | timm==0.5.4 6 | tensorboardx 7 | -------------------------------------------------------------------------------- /data/multiplane.yaml: -------------------------------------------------------------------------------- 1 | train: ./axial_t1wce_2_class/images/train 2 | val: ./axial_t1wce_2_class/images/test 3 | # train: ./coronal_t1wce_2_class/images/train 4 | # val: ./coronal_t1wce_2_class/images/test 5 | # train: ./sagittal_t1wce_2_class/images/train 6 | # val: ./sagittal_t1wce_2_class/images/test 7 | 8 | nc: 2 9 | names: ['negative','positive'] 10 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='CustomizedTextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /spark/pretrain/splitedata.py: -------------------------------------------------------------------------------- 1 | 2 | import os,glob,shutil 3 | import numpy as np 4 | rootpath = "/media/dzy/deep2/detr/archive" 5 | cls = ['yes', 'no'] 6 | for c in cls: 7 | allfi = glob.glob(os.path.join(rootpath,"train",c,"*.jpg")) 8 | np.random.shuffle(allfi) 9 | targets = os.path.join(rootpath,"val",c) 10 | if not os.path.exists(targets): 11 | os.makedirs(targets) 12 | k = 0 13 | for file in allfi: 14 | if k < len(allfi) *0.15: 15 | target = file.replace("train","val") 16 | shutil.move(file,target) 17 | k+=1 18 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | 11 | from .checkpoint import load_checkpoint 12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor 13 | from .customized_text import CustomizedTextLoggerHook 14 | 15 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook'] 16 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnest import ResNeSt 9 | from .resnet import ResNet, ResNetV1d 10 | from .resnext import ResNeXt 11 | from .ssd_vgg import SSDVGG 12 | from .trident_resnet import TridentResNet 13 | from .swin_transformer import SwinTransformer 14 | from .convnext import ConvNeXt 15 | 16 | __all__ = [ 17 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 18 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 19 | 'ResNeSt', 'TridentResNet', 'SwinTransformer', 'ConvNeXt' 20 | ] 21 | -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | from utils.general import LOGGER 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 12 | if not logger.wandb: 13 | LOGGER.info("install wandb using `pip install wandb` to log the dataset") 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | parser.add_argument('--entity', default=None, help='W&B entity') 22 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 23 | 24 | opt = parser.parse_args() 25 | opt.resume = False # Explicitly disallow resume check for dataset upload job 26 | 27 | create_dataset_artifact(opt) 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements 2 | # Usage: pip install -r requirements.txt 3 | 4 | # Base ------------------------------------------------------------------------ 5 | gitpython 6 | ipython 7 | matplotlib>=3.2.2 8 | numpy>=1.18.5 9 | opencv-python>=4.1.1 10 | Pillow>=7.1.2 11 | psutil 12 | PyYAML>=5.3.1 13 | requests>=2.23.0 14 | scipy>=1.4.1 15 | thop>=0.1.1 16 | torch>=1.7.0 17 | torchvision>=0.8.1 18 | tqdm>=4.64.0 19 | # protobuf<=3.20.1 20 | 21 | # Logging --------------------------------------------------------------------- 22 | tensorboard>=2.4.1 23 | # clearml>=1.2.0 24 | # comet 25 | 26 | # Plotting -------------------------------------------------------------------- 27 | pandas>=1.1.4 28 | seaborn>=0.11.0 29 | 30 | # Export ---------------------------------------------------------------------- 31 | # coremltools>=6.0 32 | # onnx>=1.9.0 33 | # onnx-simplifier>=0.4.1 34 | # nvidia-pyindex 35 | # nvidia-tensorrt 36 | # scikit-learn<=1.1.2 37 | # tensorflow>=2.4.1 38 | # tensorflowjs>=3.9.0 39 | # openvino-dev 40 | 41 | # Deploy ---------------------------------------------------------------------- 42 | # tritonclient[all]~=2.24.0 43 | 44 | # Extras ---------------------------------------------------------------------- 45 | # mss 46 | albumentations>=1.0.3 47 | pycocotools>=2.0 48 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).resolve() 7 | ROOT = FILE.parents[3] # YOLOv5 root directory 8 | if str(ROOT) not in sys.path: 9 | sys.path.append(str(ROOT)) # add ROOT to PATH 10 | 11 | from train import parse_opt, train 12 | from utils.callbacks import Callbacks 13 | from utils.general import increment_path 14 | from utils.torch_utils import select_device 15 | 16 | 17 | def sweep(): 18 | wandb.init() 19 | # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. 20 | hyp_dict = vars(wandb.config).get("_items").copy() 21 | 22 | # Workaround: get necessary opt args 23 | opt = parse_opt(known=True) 24 | opt.batch_size = hyp_dict.get("batch_size") 25 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 26 | opt.epochs = hyp_dict.get("epochs") 27 | opt.nosave = True 28 | opt.data = hyp_dict.get("data") 29 | opt.weights = str(opt.weights) 30 | opt.cfg = str(opt.cfg) 31 | opt.data = str(opt.data) 32 | opt.hyp = str(opt.hyp) 33 | opt.project = str(opt.project) 34 | device = select_device(opt.device, batch_size=opt.batch_size) 35 | 36 | # train 37 | train(hyp_dict, opt, device, callbacks=Callbacks()) 38 | 39 | 40 | if __name__ == "__main__": 41 | sweep() 42 | -------------------------------------------------------------------------------- /spark/downstream_d2/configs/coco_R_50_FPN_CONV_1x_moco_adam.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | 7 | MASK_ON: True 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | RESNETS: 11 | DEPTH: 50 12 | NORM: "SyncBN" 13 | STRIDE_IN_1X1: False 14 | FPN: 15 | NORM: "SyncBN" 16 | ROI_BOX_HEAD: 17 | NAME: "FastRCNNConvFCHead" 18 | NUM_FC: 1 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 7 21 | NORM: "SyncBN" 22 | ROI_MASK_HEAD: 23 | NAME: "MaskRCNNConvUpsampleHead" 24 | NUM_CONV: 4 25 | POOLER_RESOLUTION: 14 26 | NORM: "SyncBN" 27 | 28 | INPUT: 29 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896) 30 | CROP: 31 | ENABLED: False 32 | TYPE: "absolute_range" 33 | SIZE: (384, 600) 34 | FORMAT: "RGB" 35 | TEST: 36 | EVAL_PERIOD: 5000 37 | PRECISE_BN: 38 | ENABLED: True 39 | 40 | SOLVER: 41 | STEPS: (60000, 80000) 42 | MAX_ITER: 90000 43 | GAMMA: 0.25 44 | BASE_LR: 0.00025 45 | WARMUP_FACTOR: 0.01 46 | WARMUP_ITERS: 1000 47 | WEIGHT_DECAY: 0.0001 48 | CHECKPOINT_PERIOD: 5000 49 | CLIP_GRADIENTS: 50 | ENABLED: False 51 | CLIP_TYPE: "value" 52 | CLIP_VALUE: 1.0 53 | NORM_TYPE: 2.0 54 | 55 | # compared to standard detectron2, we add these two new configurations: 56 | OPTIMIZER: "ADAMW" 57 | LR_DECAY: 0.6 58 | -------------------------------------------------------------------------------- /spark/downstream_d2/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-high.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 7.5 # box loss gain 9 | cls: 0.5 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | dfl: 1.5 # dfl loss gain 14 | iou_t: 0.20 # IoU training threshold 15 | anchor_t: 5.0 # anchor-multiple threshold 16 | # anchors: 3 # anchors per output layer (0 to ignore) 17 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 18 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 19 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 20 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 21 | degrees: 0.0 # image rotation (+/- deg) 22 | translate: 0.1 # image translation (+/- fraction) 23 | scale: 0.9 # image scale (+/- gain) 24 | shear: 0.0 # image shear (+/- deg) 25 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 26 | flipud: 0.0 # image flip up-down (probability) 27 | fliplr: 0.5 # image flip left-right (probability) 28 | mosaic: 1.0 # image mosaic (probability) 29 | mixup: 0.15 # image mixup (probability) 30 | copy_paste: 0.3 # segment copy-paste (probability) 31 | -------------------------------------------------------------------------------- /utils/tal/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from utils.general import check_version 4 | 5 | TORCH_1_10 = check_version(torch.__version__, '1.10.0') 6 | 7 | 8 | def make_anchors(feats, strides, grid_cell_offset=0.5): 9 | """Generate anchors from features.""" 10 | anchor_points, stride_tensor = [], [] 11 | assert feats is not None 12 | dtype, device = feats[0].dtype, feats[0].device 13 | for i, stride in enumerate(strides): 14 | _, _, h, w = feats[i].shape 15 | sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x 16 | sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y 17 | sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) 18 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 19 | stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) 20 | return torch.cat(anchor_points), torch.cat(stride_tensor) 21 | 22 | 23 | def dist2bbox(distance, anchor_points, xywh=True, dim=-1): 24 | """Transform distance(ltrb) to box(xywh or xyxy).""" 25 | lt, rb = torch.split(distance, 2, dim) 26 | x1y1 = anchor_points - lt 27 | x2y2 = anchor_points + rb 28 | if xywh: 29 | c_xy = (x1y1 + x2y2) / 2 30 | wh = x2y2 - x1y1 31 | return torch.cat((c_xy, wh), dim) # xywh bbox 32 | return torch.cat((x1y1, x2y2), dim) # xyxy bbox 33 | 34 | 35 | def bbox2dist(anchor_points, bbox, reg_max): 36 | """Transform bbox(xyxy) to dist(ltrb).""" 37 | x1y1, x2y2 = torch.split(bbox, 2, -1) 38 | return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) 39 | -------------------------------------------------------------------------------- /spark/downstream_d2/convert-timm-to-d2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) ByteDance, Inc. and its affiliates. 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import pickle as pkl 10 | 11 | import torch 12 | 13 | 14 | # we use `timm.models.ResNet` in pre-training, so keys are timm-style 15 | def timm_resnet_to_detectron2_resnet(source_file, target_file): 16 | pretrained: dict = torch.load(source_file, map_location='cpu') 17 | for mod_k in {'state_dict', 'state', 'module', 'model'}: 18 | if mod_k in pretrained: 19 | pretrained = pretrained[mod_k] 20 | if any(k.startswith('module.encoder_q.') for k in pretrained.keys()): 21 | pretrained = {k.replace('module.encoder_q.', ''): v for k, v in pretrained.items() if k.startswith('module.encoder_q.')} 22 | 23 | pkl_state = {} 24 | for k, v in pretrained.items(): # convert resnet's keys from timm-style to d2-style 25 | if 'layer' not in k: 26 | k = 'stem.' + k 27 | for t in [1, 2, 3, 4]: 28 | k = k.replace(f'layer{t}', f'res{t+1}') 29 | for t in [1, 2, 3]: 30 | k = k.replace(f'bn{t}', f'conv{t}.norm') 31 | k = k.replace('downsample.0', 'shortcut') 32 | k = k.replace('downsample.1', 'shortcut.norm') 33 | 34 | pkl_state[k] = v.detach().numpy() 35 | 36 | with open(target_file, 'wb') as fp: 37 | print(f'[convert] .pkl is generated! (from `{source_file}`, to `{target_file}`, len(state)=={len(pkl_state)})') 38 | pkl.dump({'model': pkl_state, '__author__': 'https://github.com/keyu-tian/SparK', 'matching_heuristics': True}, fp) 39 | 40 | 41 | if __name__ == '__main__': 42 | import sys 43 | timm_resnet_to_detectron2_resnet(sys.argv[1], sys.argv[2]) 44 | -------------------------------------------------------------------------------- /spark/pretrain/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | from timm import create_model 9 | from timm.loss import SoftTargetCrossEntropy 10 | from timm.models.layers import drop 11 | 12 | 13 | from models.convnext import ConvNeXt 14 | from models.resnet import ResNet 15 | from models.custom import YourConvNet 16 | _import_resnets_for_timm_registration = (ResNet,) 17 | 18 | 19 | # log more 20 | def _ex_repr(self): 21 | return ', '.join( 22 | f'{k}=' + (f'{v:g}' if isinstance(v, float) else str(v)) 23 | for k, v in vars(self).items() 24 | if not k.startswith('_') and k != 'training' 25 | and not isinstance(v, (torch.nn.Module, torch.Tensor)) 26 | ) 27 | for clz in (torch.nn.CrossEntropyLoss, SoftTargetCrossEntropy, drop.DropPath): 28 | if hasattr(clz, 'extra_repr'): 29 | clz.extra_repr = _ex_repr 30 | else: 31 | clz.__repr__ = lambda self: f'{type(self).__name__}({_ex_repr(self)})' 32 | 33 | 34 | pretrain_default_model_kwargs = { 35 | 'V9back': dict(), 36 | 37 | 'resnet50': dict(drop_path_rate=0.05), 38 | 'resnet101': dict(drop_path_rate=0.08), 39 | 'resnet152': dict(drop_path_rate=0.10), 40 | 'resnet200': dict(drop_path_rate=0.15), 41 | 'convnext_small': dict(sparse=True, drop_path_rate=0.2), 42 | 'convnext_base': dict(sparse=True, drop_path_rate=0.3), 43 | 'convnext_large': dict(sparse=True, drop_path_rate=0.4), 44 | 45 | } 46 | for kw in pretrain_default_model_kwargs.values(): 47 | kw['pretrained'] = False 48 | kw['num_classes'] = 0 49 | kw['global_pool'] = '' 50 | 51 | 52 | def build_sparse_encoder(name: str, input_size: int, sbn=False, drop_path_rate=0.0, verbose=False): 53 | from encoder import SparseEncoder 54 | 55 | kwargs = pretrain_default_model_kwargs[name] 56 | if drop_path_rate != 0: 57 | kwargs['drop_path_rate'] = drop_path_rate 58 | print(f'[build_sparse_encoder] model kwargs={kwargs}') 59 | cnn = create_model(name, **kwargs) 60 | 61 | return SparseEncoder(cnn, input_size=input_size, sbn=sbn, verbose=verbose) 62 | 63 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import platform 3 | import threading 4 | 5 | 6 | def emojis(str=''): 7 | # Return platform-dependent emoji-safe version of string 8 | return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str 9 | 10 | 11 | class TryExcept(contextlib.ContextDecorator): 12 | # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager 13 | def __init__(self, msg=''): 14 | self.msg = msg 15 | 16 | def __enter__(self): 17 | pass 18 | 19 | def __exit__(self, exc_type, value, traceback): 20 | if value: 21 | print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) 22 | return True 23 | 24 | 25 | def threaded(func): 26 | # Multi-threads a target function and returns thread. Usage: @threaded decorator 27 | def wrapper(*args, **kwargs): 28 | thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) 29 | thread.start() 30 | return thread 31 | 32 | return wrapper 33 | 34 | 35 | def join_threads(verbose=False): 36 | # Join all daemon threads, i.e. atexit.register(lambda: join_threads()) 37 | main_thread = threading.current_thread() 38 | for t in threading.enumerate(): 39 | if t is not main_thread: 40 | if verbose: 41 | print(f'Joining thread {t.name}') 42 | t.join() 43 | 44 | 45 | def notebook_init(verbose=True): 46 | # Check system software and hardware 47 | print('Checking setup...') 48 | 49 | import os 50 | import shutil 51 | 52 | from utils.general import check_font, check_requirements, is_colab 53 | from utils.torch_utils import select_device # imports 54 | 55 | check_font() 56 | 57 | import psutil 58 | from IPython import display # to display images and clear console output 59 | 60 | if is_colab(): 61 | shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory 62 | 63 | # System info 64 | if verbose: 65 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 66 | ram = psutil.virtual_memory().total 67 | total, used, free = shutil.disk_usage("/") 68 | display.clear_output() 69 | s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' 70 | else: 71 | s = '' 72 | 73 | select_device(newline=False) 74 | print(emojis(f'Setup complete ✅ {s}')) 75 | return display 76 | -------------------------------------------------------------------------------- /spark/pretrain/utils/lr_control.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | from pprint import pformat 9 | 10 | 11 | def lr_wd_annealing(optimizer, peak_lr, wd, wd_end, cur_it, wp_it, max_it): 12 | wp_it = round(wp_it) 13 | if cur_it < wp_it: 14 | cur_lr = 0.005 * peak_lr + 0.995 * peak_lr * cur_it / wp_it 15 | else: 16 | ratio = (cur_it - wp_it) / (max_it - 1 - wp_it) 17 | cur_lr = 0.001 * peak_lr + 0.999 * peak_lr * (0.5 + 0.5 * math.cos(math.pi * ratio)) 18 | 19 | ratio = cur_it / (max_it - 1) 20 | cur_wd = wd_end + (wd - wd_end) * (0.5 + 0.5 * math.cos(math.pi * ratio)) 21 | 22 | min_lr, max_lr = cur_lr, cur_lr 23 | min_wd, max_wd = cur_wd, cur_wd 24 | for param_group in optimizer.param_groups: 25 | scaled_lr = param_group['lr'] = cur_lr * param_group.get('lr_scale', 1) # 'lr_scale' could be assigned 26 | min_lr, max_lr = min(min_lr, scaled_lr), max(max_lr, scaled_lr) 27 | scaled_wd = param_group['weight_decay'] = cur_wd * param_group.get('weight_decay_scale', 1) # 'weight_decay_scale' could be assigned 28 | min_wd, max_wd = min(min_wd, scaled_wd), max(max_wd, scaled_wd) 29 | return min_lr, max_lr, min_wd, max_wd 30 | 31 | 32 | def get_param_groups(model, nowd_keys=()): 33 | para_groups, para_groups_dbg = {}, {} 34 | 35 | for name, para in model.named_parameters(): 36 | if not para.requires_grad: 37 | continue # frozen weights 38 | if len(para.shape) == 1 or name.endswith('.bias') or any(k in name for k in nowd_keys): 39 | wd_scale, group_name = 0., 'no_decay' 40 | else: 41 | wd_scale, group_name = 1., 'decay' 42 | 43 | if group_name not in para_groups: 44 | para_groups[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': 1.} 45 | para_groups_dbg[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': 1.} 46 | para_groups[group_name]['params'].append(para) 47 | para_groups_dbg[group_name]['params'].append(name) 48 | 49 | for g in para_groups_dbg.values(): 50 | g['params'] = pformat(', '.join(g['params']), width=200) 51 | 52 | print(f'[get_ft_param_groups] param groups = \n{pformat(para_groups_dbg, indent=2, width=250)}\n') 53 | return list(para_groups.values()) 54 | -------------------------------------------------------------------------------- /utils/lion.py: -------------------------------------------------------------------------------- 1 | """PyTorch implementation of the Lion optimizer.""" 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | 6 | class Lion(Optimizer): 7 | r"""Implements Lion algorithm.""" 8 | 9 | def __init__(self, params, lr=1e-4, betas=(0.9, 0.99), weight_decay=0.0): 10 | """Initialize the hyperparameters. 11 | Args: 12 | params (iterable): iterable of parameters to optimize or dicts defining 13 | parameter groups 14 | lr (float, optional): learning rate (default: 1e-4) 15 | betas (Tuple[float, float], optional): coefficients used for computing 16 | running averages of gradient and its square (default: (0.9, 0.99)) 17 | weight_decay (float, optional): weight decay coefficient (default: 0) 18 | """ 19 | 20 | if not 0.0 <= lr: 21 | raise ValueError('Invalid learning rate: {}'.format(lr)) 22 | if not 0.0 <= betas[0] < 1.0: 23 | raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0])) 24 | if not 0.0 <= betas[1] < 1.0: 25 | raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1])) 26 | defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay) 27 | super().__init__(params, defaults) 28 | 29 | @torch.no_grad() 30 | def step(self, closure=None): 31 | """Performs a single optimization step. 32 | Args: 33 | closure (callable, optional): A closure that reevaluates the model 34 | and returns the loss. 35 | Returns: 36 | the loss. 37 | """ 38 | loss = None 39 | if closure is not None: 40 | with torch.enable_grad(): 41 | loss = closure() 42 | 43 | for group in self.param_groups: 44 | for p in group['params']: 45 | if p.grad is None: 46 | continue 47 | 48 | # Perform stepweight decay 49 | p.data.mul_(1 - group['lr'] * group['weight_decay']) 50 | 51 | grad = p.grad 52 | state = self.state[p] 53 | # State initialization 54 | if len(state) == 0: 55 | # Exponential moving average of gradient values 56 | state['exp_avg'] = torch.zeros_like(p) 57 | 58 | exp_avg = state['exp_avg'] 59 | beta1, beta2 = group['betas'] 60 | 61 | # Weight update 62 | update = exp_avg * beta1 + grad * (1 - beta1) 63 | p.add_(torch.sign(update), alpha=-group['lr']) 64 | # Decay the momentum running average coefficient 65 | exp_avg.mul_(beta2).add_(grad, alpha=1 - beta2) 66 | 67 | return loss -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | class Callbacks: 5 | """" 6 | Handles all registered callbacks for YOLOv5 Hooks 7 | """ 8 | 9 | def __init__(self): 10 | # Define the available callbacks 11 | self._callbacks = { 12 | 'on_pretrain_routine_start': [], 13 | 'on_pretrain_routine_end': [], 14 | 'on_train_start': [], 15 | 'on_train_epoch_start': [], 16 | 'on_train_batch_start': [], 17 | 'optimizer_step': [], 18 | 'on_before_zero_grad': [], 19 | 'on_train_batch_end': [], 20 | 'on_train_epoch_end': [], 21 | 'on_val_start': [], 22 | 'on_val_batch_start': [], 23 | 'on_val_image_end': [], 24 | 'on_val_batch_end': [], 25 | 'on_val_end': [], 26 | 'on_fit_epoch_end': [], # fit = train + val 27 | 'on_model_save': [], 28 | 'on_train_end': [], 29 | 'on_params_update': [], 30 | 'teardown': [],} 31 | self.stop_training = False # set True to interrupt training 32 | 33 | def register_action(self, hook, name='', callback=None): 34 | """ 35 | Register a new action to a callback hook 36 | 37 | Args: 38 | hook: The callback hook name to register the action to 39 | name: The name of the action for later reference 40 | callback: The callback to fire 41 | """ 42 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 43 | assert callable(callback), f"callback '{callback}' is not callable" 44 | self._callbacks[hook].append({'name': name, 'callback': callback}) 45 | 46 | def get_registered_actions(self, hook=None): 47 | """" 48 | Returns all the registered actions by callback hook 49 | 50 | Args: 51 | hook: The name of the hook to check, defaults to all 52 | """ 53 | return self._callbacks[hook] if hook else self._callbacks 54 | 55 | def run(self, hook, *args, thread=False, **kwargs): 56 | """ 57 | Loop through the registered actions and fire all callbacks on main thread 58 | 59 | Args: 60 | hook: The name of the hook to check, defaults to all 61 | args: Arguments to receive from YOLOv5 62 | thread: (boolean) Run callbacks in daemon thread 63 | kwargs: Keyword Arguments to receive from YOLOv5 64 | """ 65 | 66 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 67 | for logger in self._callbacks[hook]: 68 | if thread: 69 | threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() 70 | else: 71 | logger['callback'](*args, **kwargs) 72 | -------------------------------------------------------------------------------- /spark/pretrain/sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import random 8 | 9 | import numpy as np 10 | import torch 11 | from torch.utils.data.sampler import Sampler 12 | 13 | 14 | def worker_init_fn(worker_id): 15 | # https://pytorch.org/docs/stable/notes/randomness.html#dataloader 16 | worker_seed = torch.initial_seed() % 2 ** 32 17 | np.random.seed(worker_seed) 18 | random.seed(worker_seed) 19 | 20 | 21 | class DistInfiniteBatchSampler(Sampler): 22 | def __init__(self, world_size, rank, dataset_len, glb_batch_size, seed=1, filling=False, shuffle=True): 23 | assert glb_batch_size % world_size == 0 24 | self.world_size, self.rank = world_size, rank 25 | self.dataset_len = dataset_len 26 | self.glb_batch_size = glb_batch_size 27 | self.batch_size = glb_batch_size // world_size 28 | 29 | self.iters_per_ep = (dataset_len + glb_batch_size - 1) // glb_batch_size 30 | self.filling = filling 31 | self.shuffle = shuffle 32 | self.epoch = 0 33 | self.seed = seed 34 | self.indices = self.gener_indices() 35 | 36 | def gener_indices(self): 37 | global_max_p = self.iters_per_ep * self.glb_batch_size # global_max_p % world_size must be 0 cuz glb_batch_size % world_size == 0 38 | if self.shuffle: 39 | g = torch.Generator() 40 | g.manual_seed(self.epoch + self.seed) 41 | global_indices = torch.randperm(self.dataset_len, generator=g) 42 | else: 43 | global_indices = torch.arange(self.dataset_len) 44 | filling = global_max_p - global_indices.shape[0] 45 | if filling > 0 and self.filling: 46 | global_indices = torch.cat((global_indices, global_indices[:filling])) 47 | global_indices = tuple(global_indices.numpy().tolist()) 48 | 49 | seps = torch.linspace(0, len(global_indices), self.world_size + 1, dtype=torch.int) 50 | local_indices = global_indices[seps[self.rank]:seps[self.rank + 1]] 51 | self.max_p = len(local_indices) 52 | return local_indices 53 | 54 | def __iter__(self): 55 | self.epoch = 0 56 | while True: 57 | self.epoch += 1 58 | p, q = 0, 0 59 | while p < self.max_p: 60 | q = p + self.batch_size 61 | yield self.indices[p:q] 62 | p = q 63 | if self.shuffle: 64 | self.indices = self.gener_indices() 65 | 66 | def __len__(self): 67 | return self.iters_per_ep 68 | 69 | 70 | if __name__ == '__main__': 71 | W = 16 72 | for rk in range(W): 73 | ind = DistInfiniteBatchSampler(W, rk, 5024, 5024).gener_indices() 74 | print(rk, len(ind)) 75 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | from pprint import pformat 9 | 10 | 11 | def lr_wd_annealing(optimizer, peak_lr, wd, cur_it, wp_it, max_it): 12 | wp_it = round(wp_it) 13 | if cur_it < wp_it: 14 | cur_lr = 0.005 * peak_lr + 0.995 * peak_lr * cur_it / wp_it 15 | else: 16 | ratio = (cur_it - wp_it) / (max_it - 1 - wp_it) 17 | cur_lr = 0.001 * peak_lr + 0.999 * peak_lr * (0.5 + 0.5 * math.cos(math.pi * ratio)) 18 | 19 | min_lr, max_lr = cur_lr, cur_lr 20 | min_wd, max_wd = wd, wd 21 | for param_group in optimizer.param_groups: 22 | scaled_lr = param_group['lr'] = cur_lr * param_group.get('lr_scale', 1) # 'lr_scale' could be assigned 23 | min_lr, max_lr = min(min_lr, scaled_lr), max(max_lr, scaled_lr) 24 | scaled_wd = param_group['weight_decay'] = wd * param_group.get('weight_decay_scale', 1) # 'weight_decay_scale' could be assigned 25 | min_wd, max_wd = min(min_wd, scaled_wd), max(max_wd, scaled_wd) 26 | return min_lr, max_lr, min_wd, max_wd 27 | 28 | 29 | def get_param_groups(model, nowd_keys=(), lr_scale=0.0): 30 | using_lr_scale = hasattr(model, 'get_layer_id_and_scale_exp') and 0.0 < lr_scale < 1.0 31 | print(f'[get_ft_param_groups][lr decay] using_lr_scale={using_lr_scale}, ft_lr_scale={lr_scale}') 32 | para_groups, para_groups_dbg = {}, {} 33 | 34 | for name, para in model.named_parameters(): 35 | if not para.requires_grad: 36 | continue # frozen weights 37 | if len(para.shape) == 1 or name.endswith('.bias') or any(k in name for k in nowd_keys): 38 | wd_scale, group_name = 0., 'no_decay' 39 | else: 40 | wd_scale, group_name = 1., 'decay' 41 | 42 | if using_lr_scale: 43 | layer_id, scale_exp = model.get_layer_id_and_scale_exp(name) 44 | group_name = f'layer{layer_id}_' + group_name 45 | this_lr_scale = lr_scale ** scale_exp 46 | dbg = f'[layer {layer_id}][sc = {lr_scale} ** {scale_exp}]' 47 | else: 48 | this_lr_scale = 1 49 | dbg = f'[no scale]' 50 | 51 | if group_name not in para_groups: 52 | para_groups[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': this_lr_scale} 53 | para_groups_dbg[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': dbg} 54 | para_groups[group_name]['params'].append(para) 55 | para_groups_dbg[group_name]['params'].append(name) 56 | 57 | for g in para_groups_dbg.values(): 58 | g['params'] = pformat(', '.join(g['params']), width=200) 59 | 60 | print(f'[get_ft_param_groups] param groups = \n{pformat(para_groups_dbg, indent=2, width=250)}\n') 61 | return list(para_groups.values()) 62 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/README.md: -------------------------------------------------------------------------------- 1 | ## About code isolation 2 | 3 | This `downstream_imagenet` is isolated from pre-training codes. One can treat this `downstream_imagenet` as an independent codebase 🛠️. 4 | 5 | 6 | ## Preparation for ImageNet-1k fine-tuning 7 | 8 | See [INSTALL.md](https://github.com/keyu-tian/SparK/blob/main/INSTALL.md) to prepare `pip` dependencies and the ImageNet dataset. 9 | 10 | **Note: for network definitions, we directly use `timm.models.ResNet` and [official ConvNeXt](https://github.com/facebookresearch/ConvNeXt/blob/048efcea897d999aed302f2639b6270aedf8d4c8/models/convnext.py).** 11 | 12 | 13 | ## Fine-tuning on ImageNet-1k from pre-trained weights 14 | 15 | Run [/downstream_imagenet/main.py](/downstream_imagenet/main.py) via `torchrun`. 16 | **It is required to specify** the ImageNet data folder (`--data_path`), your experiment name & log dir (`--exp_name` and `--exp_dir`, automatically created if not exists), the model name (`--model`, valid choices see the keys of 'HP_DEFAULT_VALUES' in [/downstream_imagenet/arg.py line14](/downstream_imagenet/arg.py#L14)), and the pretrained weight file `--resume_from` to run fine-tuning. 17 | 18 | All the other configurations have their default values, listed in [/downstream_imagenet/arg.py#L13](/downstream_imagenet/arg.py#L13). 19 | You can overwrite any defaults by `--bs=1024` or something like that. 20 | 21 | 22 | Here is an example to pretrain a ConvNeXt-Small on an 8-GPU single machine: 23 | ```shell script 24 | $ cd /path/to/SparK/downstream_imagenet 25 | $ torchrun --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr=localhost --master_port= main.py \ 26 | --data_path=/path/to/imagenet --exp_name= --exp_dir=/path/to/logdir \ 27 | --model=convnext_small --resume_from=/some/path/to/convnextS_1kpretrained_official_style.pth 28 | ``` 29 | 30 | For multiple machines, change the `--nnodes` and `--master_addr` to your configurations. E.g.: 31 | ```shell script 32 | $ torchrun --nproc_per_node=8 --nnodes= --node_rank= --master_address= --master_port= main.py \ 33 | ... 34 | ``` 35 | 36 | 37 | ## Logging 38 | 39 | See files under `--exp_dir` to track your experiment: 40 | 41 | - `_1kfinetuned_last.pth`: the latest model weights 42 | - `_1kfinetuned_best.pth`: model weights with the highest acc 43 | - `_1kfinetuned_best_ema.pth`: EMA weights with the highest acc 44 | - `finetune_log.txt`: records some important information such as: 45 | - `git_commit_id`: git version 46 | - `cmd`: all arguments passed to the script 47 | 48 | It also reports training loss/acc, best evaluation acc, and remaining time at each epoch. 49 | 50 | - `tensorboard_log/`: saves a lot of tensorboard logs, you can visualize accuracies, loss values, learning rates, gradient norms and more things via `tensorboard --logdir /path/to/this/tensorboard_log/ --port 23333`. 51 | 52 | ## Resuming 53 | 54 | Use `--resume_from` again, like `--resume_from=path/to/_1kfinetuned_last.pth`. 55 | -------------------------------------------------------------------------------- /spark/pretrain/models/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | from typing import List 7 | 8 | import torch 9 | import torch.nn.functional as F 10 | from timm.models.resnet import ResNet 11 | 12 | 13 | # hack: inject the `get_downsample_ratio` function into `timm.models.resnet.ResNet` 14 | def get_downsample_ratio(self: ResNet) -> int: 15 | return 32 16 | 17 | 18 | # hack: inject the `get_feature_map_channels` function into `timm.models.resnet.ResNet` 19 | def get_feature_map_channels(self: ResNet) -> List[int]: 20 | # `self.feature_info` is maintained by `timm` 21 | 22 | return [info['num_chs'] for info in self.feature_info[1:]] 23 | 24 | 25 | # hack: override the forward function of `timm.models.resnet.ResNet` 26 | def forward(self, x, hierarchical=False): 27 | """ this forward function is a modified version of `timm.models.resnet.ResNet.forward` 28 | # >>> ResNet.forward 29 | """ 30 | x = self.conv1(x) 31 | x = self.bn1(x) 32 | x = self.act1(x) 33 | x = self.maxpool(x) 34 | 35 | if hierarchical: 36 | ls = [] 37 | x = self.layer1(x); ls.append(x) 38 | x = self.layer2(x); ls.append(x) 39 | x = self.layer3(x); ls.append(x) 40 | x = self.layer4(x); ls.append(x) 41 | return ls 42 | else: 43 | x = self.global_pool(x) 44 | if self.drop_rate: 45 | x = F.dropout(x, p=float(self.drop_rate), training=self.training) 46 | x = self.fc(x) 47 | return x 48 | 49 | 50 | ResNet.get_downsample_ratio = get_downsample_ratio 51 | ResNet.get_feature_map_channels = get_feature_map_channels 52 | ResNet.forward = forward 53 | 54 | 55 | @torch.no_grad() 56 | def convnet_1(): 57 | from timm.models import create_model 58 | cnn = create_model('resnet50') 59 | print('get_downsample_ratio:', cnn.get_downsample_ratio()) 60 | print('get_feature_map_channels:', cnn.get_feature_map_channels()) 61 | 62 | downsample_ratio = cnn.get_downsample_ratio() 63 | feature_map_channels = cnn.get_feature_map_channels() 64 | 65 | # check the forward function 66 | B, C, H, W = 4, 3, 224, 224 67 | inp = torch.rand(B, C, H, W) 68 | feats = cnn(inp, hierarchical=True) 69 | assert isinstance(feats, list) 70 | assert len(feats) == len(feature_map_channels) 71 | print([tuple(t.shape) for t in feats]) 72 | 73 | # check the downsample ratio 74 | feats = cnn(inp, hierarchical=True) 75 | assert feats[-1].shape[-2] == H // downsample_ratio 76 | assert feats[-1].shape[-1] == W // downsample_ratio 77 | 78 | # check the channel number 79 | for feat, ch in zip(feats, feature_map_channels): 80 | assert feat.ndim == 4 81 | assert feat.shape[1] == ch 82 | 83 | 84 | if __name__ == '__main__': 85 | convnet_1() 86 | 87 | # 88 | # get_downsample_ratio: 32 89 | # get_feature_map_channels: [256, 512, 1024, 2048] 90 | # [(4, 256, 56, 56), (4, 512, 28, 28), (4, 1024, 14, 14), (4, 2048, 7, 7)] -------------------------------------------------------------------------------- /utils/autobatch.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | import torch 5 | 6 | from utils.general import LOGGER, colorstr 7 | from utils.torch_utils import profile 8 | 9 | 10 | def check_train_batch_size(model, imgsz=640, amp=True): 11 | # Check YOLOv5 training batch size 12 | with torch.cuda.amp.autocast(amp): 13 | return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size 14 | 15 | 16 | def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): 17 | # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory 18 | # Usage: 19 | # import torch 20 | # from utils.autobatch import autobatch 21 | # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) 22 | # print(autobatch(model)) 23 | 24 | # Check device 25 | prefix = colorstr('AutoBatch: ') 26 | LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') 27 | device = next(model.parameters()).device # get model device 28 | if device.type == 'cpu': 29 | LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') 30 | return batch_size 31 | if torch.backends.cudnn.benchmark: 32 | LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') 33 | return batch_size 34 | 35 | # Inspect CUDA memory 36 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 37 | d = str(device).upper() # 'CUDA:0' 38 | properties = torch.cuda.get_device_properties(device) # device properties 39 | t = properties.total_memory / gb # GiB total 40 | r = torch.cuda.memory_reserved(device) / gb # GiB reserved 41 | a = torch.cuda.memory_allocated(device) / gb # GiB allocated 42 | f = t - (r + a) # GiB free 43 | LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') 44 | 45 | # Profile batch sizes 46 | batch_sizes = [1, 2, 4, 8, 16] 47 | try: 48 | img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] 49 | results = profile(img, model, n=3, device=device) 50 | except Exception as e: 51 | LOGGER.warning(f'{prefix}{e}') 52 | 53 | # Fit a solution 54 | y = [x[2] for x in results if x] # memory [2] 55 | p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit 56 | b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) 57 | if None in results: # some sizes failed 58 | i = results.index(None) # first fail index 59 | if b >= batch_sizes[i]: # y intercept above failure point 60 | b = batch_sizes[max(i - 1, 0)] # select prior safe point 61 | if b < 1 or b > 1024: # b outside of safe range 62 | b = batch_size 63 | LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') 64 | 65 | fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted 66 | LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') 67 | return b 68 | -------------------------------------------------------------------------------- /spark/pretrain/decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | from typing import List 9 | 10 | import torch 11 | import torch.nn as nn 12 | from timm.models.layers import trunc_normal_ 13 | 14 | from utils.misc import is_pow2n 15 | 16 | 17 | class UNetBlock(nn.Module): 18 | def __init__(self, cin, cout, bn2d): 19 | """ 20 | a UNet block with 2x up sampling 21 | """ 22 | super().__init__() 23 | self.up_sample = nn.ConvTranspose2d(cin, cin, kernel_size=4, stride=2, padding=1, bias=True) 24 | self.conv = nn.Sequential( 25 | nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=False), bn2d(cin), nn.ReLU6(inplace=True), 26 | nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False), bn2d(cout), 27 | ) 28 | 29 | def forward(self, x): 30 | x = self.up_sample(x) 31 | return self.conv(x) 32 | 33 | 34 | class LightDecoder(nn.Module): 35 | def __init__(self, up_sample_ratio, width=768, sbn=True): # todo: the decoder's width follows a simple halfing rule; you can change it to any other rule 36 | super().__init__() 37 | self.width = width 38 | assert is_pow2n(up_sample_ratio) 39 | n = round(math.log2(up_sample_ratio)) 40 | channels = [self.width // 2 ** i for i in range(n + 1)] # todo: the decoder's width follows a simple halfing rule; you can change it to any other rule 41 | bn2d = nn.SyncBatchNorm if sbn else nn.BatchNorm2d 42 | self.dec = nn.ModuleList([UNetBlock(cin, cout, bn2d) for (cin, cout) in zip(channels[:-1], channels[1:])]) 43 | self.proj = nn.Conv2d(channels[-1], 3, kernel_size=1, stride=1, bias=True) 44 | 45 | self.initialize() 46 | 47 | def forward(self, to_dec: List[torch.Tensor]): 48 | x = 0 49 | for i, d in enumerate(self.dec): 50 | if i < len(to_dec) and to_dec[i] is not None: 51 | x = x + to_dec[i] 52 | x = self.dec[i](x) 53 | return self.proj(x) 54 | 55 | def extra_repr(self) -> str: 56 | return f'width={self.width}' 57 | 58 | def initialize(self): 59 | for m in self.modules(): 60 | if isinstance(m, nn.Linear): 61 | trunc_normal_(m.weight, std=.02) 62 | if m.bias is not None: 63 | nn.init.constant_(m.bias, 0) 64 | elif isinstance(m, nn.Conv2d): 65 | trunc_normal_(m.weight, std=.02) 66 | if m.bias is not None: 67 | nn.init.constant_(m.bias, 0) 68 | elif isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): 69 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 70 | if m.bias is not None: 71 | nn.init.constant_(m.bias, 0.) 72 | elif isinstance(m, (nn.LayerNorm, nn.BatchNorm1d, nn.BatchNorm2d, nn.SyncBatchNorm)): 73 | nn.init.constant_(m.bias, 0) 74 | nn.init.constant_(m.weight, 1.0) 75 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 4.0 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | # checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() 86 | -------------------------------------------------------------------------------- /spark/pretrain/utils/imagenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | from typing import Any, Callable, Optional, Tuple 9 | 10 | import PIL.Image as PImage 11 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 12 | from torchvision.datasets.folder import DatasetFolder, IMG_EXTENSIONS 13 | from torchvision.transforms import transforms 14 | from torch.utils.data import Dataset 15 | 16 | try: 17 | from torchvision.transforms import InterpolationMode 18 | interpolation = InterpolationMode.BICUBIC 19 | except: 20 | import PIL 21 | interpolation = PIL.Image.BICUBIC 22 | 23 | 24 | def pil_loader(path): 25 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 26 | with open(path, 'rb') as f: img: PImage.Image = PImage.open(f).convert('RGB') 27 | return img 28 | 29 | 30 | class ImageNetDataset(DatasetFolder): 31 | def __init__( 32 | self, 33 | imagenet_folder: str, 34 | train: bool, 35 | transform: Callable, 36 | is_valid_file: Optional[Callable[[str], bool]] = None, 37 | ): 38 | imagenet_folder = os.path.join(imagenet_folder, 'train' if train else 'val') 39 | super(ImageNetDataset, self).__init__( 40 | imagenet_folder, 41 | loader=pil_loader, 42 | extensions=IMG_EXTENSIONS if is_valid_file is None else None, 43 | transform=transform, 44 | target_transform=None, is_valid_file=is_valid_file 45 | ) 46 | 47 | self.samples = tuple(img for (img, label) in self.samples) 48 | self.targets = None # this is self-supervised learning so we don't need labels 49 | 50 | def __getitem__(self, index: int) -> Any: 51 | img_file_path = self.samples[index] 52 | return self.transform(self.loader(img_file_path)) 53 | 54 | 55 | def build_dataset_to_pretrain(dataset_path, input_size) -> Dataset: 56 | """ 57 | You may need to modify this function to return your own dataset. 58 | Define a new class, a subclass of `Dataset`, to replace our ImageNetDataset. 59 | Use dataset_path to build your image file path list. 60 | Use input_size to create the transformation function for your images, can refer to the `trans_train` blow. 61 | 62 | :param dataset_path: the folder of dataset 63 | :param input_size: the input size (image resolution) 64 | :return: the dataset used for pretraining 65 | """ 66 | trans_train = transforms.Compose([ 67 | transforms.RandomResizedCrop(input_size, scale=(0.67, 1.0), interpolation=interpolation), 68 | transforms.RandomHorizontalFlip(), 69 | transforms.ToTensor(), 70 | transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), 71 | ]) 72 | 73 | dataset_path = os.path.abspath(dataset_path) 74 | for postfix in ('train', 'val'): 75 | if dataset_path.endswith(postfix): 76 | dataset_path = dataset_path[:-len(postfix)] 77 | 78 | dataset_train = ImageNetDataset(imagenet_folder=dataset_path, transform=trans_train, train=True) 79 | print_transform(trans_train, '[pre-train]') 80 | return dataset_train 81 | 82 | 83 | def print_transform(transform, s): 84 | print(f'Transform {s} = ') 85 | for t in transform.transforms: 86 | print(t) 87 | print('---------------------------\n') 88 | -------------------------------------------------------------------------------- /spark/pretrain/dist.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | from typing import List 9 | from typing import Union 10 | 11 | import sys 12 | import torch 13 | import torch.distributed as tdist 14 | import torch.multiprocessing as mp 15 | 16 | __rank, __local_rank, __world_size, __device = 0, 0, 1, 'cpu' 17 | __initialized = False 18 | 19 | 20 | def initialized(): 21 | return __initialized 22 | 23 | 24 | def initialize(backend='nccl'): 25 | global __device 26 | if not torch.cuda.is_available(): 27 | print(f'[dist initialize] cuda is not available, use cpu instead', file=sys.stderr) 28 | return 29 | elif 'RANK' not in os.environ: 30 | __device = torch.empty(1).cuda().device 31 | print(f'[dist initialize] RANK is not set, use 1 GPU instead', file=sys.stderr) 32 | return 33 | 34 | # ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py#L29 35 | if mp.get_start_method(allow_none=True) is None: 36 | mp.set_start_method('spawn') 37 | global_rank, num_gpus = int(os.environ['RANK']), torch.cuda.device_count() 38 | local_rank = global_rank % num_gpus 39 | torch.cuda.set_device(local_rank) 40 | tdist.init_process_group(backend=backend) 41 | 42 | global __rank, __local_rank, __world_size, __initialized 43 | __local_rank = local_rank 44 | __rank, __world_size = tdist.get_rank(), tdist.get_world_size() 45 | __device = torch.empty(1).cuda().device 46 | __initialized = True 47 | 48 | assert tdist.is_initialized(), 'torch.distributed is not initialized!' 49 | 50 | 51 | def get_rank(): 52 | return __rank 53 | 54 | 55 | def get_local_rank(): 56 | return __local_rank 57 | 58 | 59 | def get_world_size(): 60 | return __world_size 61 | 62 | 63 | def get_device(): 64 | return __device 65 | 66 | 67 | def is_master(): 68 | return __rank == 0 69 | 70 | 71 | def is_local_master(): 72 | return __local_rank == 0 73 | 74 | 75 | def barrier(): 76 | if __initialized: 77 | tdist.barrier() 78 | 79 | 80 | def parallelize(net, syncbn=False): 81 | if syncbn: 82 | net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) 83 | net = net.cuda() 84 | net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[get_local_rank()], find_unused_parameters=False, broadcast_buffers=False) 85 | return net 86 | 87 | 88 | def allreduce(t: torch.Tensor) -> None: 89 | if __initialized: 90 | if not t.is_cuda: 91 | cu = t.detach().cuda() 92 | tdist.all_reduce(cu) 93 | t.copy_(cu.cpu()) 94 | else: 95 | tdist.all_reduce(t) 96 | 97 | 98 | def allgather(t: torch.Tensor, cat=True) -> Union[List[torch.Tensor], torch.Tensor]: 99 | if __initialized: 100 | if not t.is_cuda: 101 | t = t.cuda() 102 | ls = [torch.empty_like(t) for _ in range(__world_size)] 103 | tdist.all_gather(ls, t) 104 | else: 105 | ls = [t] 106 | if cat: 107 | ls = torch.cat(ls, dim=0) 108 | return ls 109 | 110 | 111 | def broadcast(t: torch.Tensor, src_rank) -> None: 112 | if __initialized: 113 | if not t.is_cuda: 114 | cu = t.detach().cuda() 115 | tdist.broadcast(cu, src=src_rank) 116 | t.copy_(cu.cpu()) 117 | else: 118 | tdist.broadcast(t, src=src_rank) 119 | -------------------------------------------------------------------------------- /spark/pretrain/models/custom_detr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | from typing import List 10 | from timm.models.registry import register_model 11 | import torch 12 | from torch import nn 13 | import sys 14 | from HG.HGBlock import HGStem,HGBlock 15 | from HG.block import DWConv 16 | 17 | 18 | class YourConvNet(nn.Module): 19 | def __init__(self, *args, **kwargs): 20 | super().__init__() 21 | 22 | self.mlist=nn.ModuleList( 23 | [HGStem(3, 32, 64), 24 | HGBlock(64, 64, 128, 3, n=6), 25 | 26 | DWConv(128, 128, 3, 2, 1, False), 27 | HGBlock(128, 128, 512, 3, n=6), 28 | HGBlock(512, 128, 512, 3, lightconv=False,shortcut=True,n=6), 29 | 30 | 31 | DWConv(512, 512, 3, 2, 1, False), 32 | HGBlock(512, 256, 1024, 5,lightconv=True,shortcut=False,n=6), 33 | HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6), 34 | HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6), 35 | HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6), 36 | HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6), 37 | 38 | 39 | 40 | DWConv(1024, 1024, 3, 2, 1, False), 41 | HGBlock(1024, 512, 2048, 5, lightconv=True, shortcut=False, n=6), 42 | HGBlock(2048, 512, 2048, 5, lightconv=True, shortcut=True, n=6) 43 | ] 44 | ) 45 | 46 | 47 | def get_downsample_ratio(self) -> int: 48 | return 32 49 | 50 | def get_feature_map_channels(self) -> List[int]: 51 | return [128,512,1024,2048] 52 | 53 | def forward(self, x: torch.Tensor, hierarchical=False): 54 | if hierarchical: 55 | ls = [] 56 | for index,modules in enumerate( self.mlist): 57 | x = modules(x) 58 | if index in [1,4,10,13]: 59 | ls.append(x) 60 | return ls 61 | else: 62 | for modules in self.mlist: 63 | x = modules(x) 64 | return x 65 | 66 | 67 | @register_model 68 | def HGNetv2(pretrained=False, **kwargs): 69 | return YourConvNet(**kwargs) 70 | 71 | 72 | @torch.no_grad() 73 | def convnet_test(): 74 | from timm.models import create_model 75 | cnn = create_model('HGNetv2') 76 | print('get_downsample_ratio:', cnn.get_downsample_ratio()) 77 | print('get_feature_map_channels:', cnn.get_feature_map_channels()) 78 | 79 | downsample_ratio = cnn.get_downsample_ratio() 80 | feature_map_channels = cnn.get_feature_map_channels() 81 | 82 | # check the forward function 83 | B, C, H, W = 4, 3, 224, 224 84 | inp = torch.rand(B, C, H, W) 85 | feats = cnn(inp, hierarchical=True) 86 | assert isinstance(feats, list) 87 | assert len(feats) == len(feature_map_channels) 88 | print([tuple(t.shape) for t in feats]) 89 | 90 | # check the downsample ratio 91 | feats = cnn(inp, hierarchical=True) 92 | assert feats[-1].shape[-2] == H // downsample_ratio 93 | assert feats[-1].shape[-1] == W // downsample_ratio 94 | 95 | # check the channel number 96 | for feat, ch in zip(feats, feature_map_channels): 97 | assert feat.ndim == 4 98 | assert feat.shape[1] == ch 99 | 100 | 101 | if __name__ == '__main__': 102 | convnet_test() 103 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SiLU(nn.Module): 7 | # SiLU activation https://arxiv.org/pdf/1606.08415.pdf 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | class Hardswish(nn.Module): 14 | # Hard-SiLU activation 15 | @staticmethod 16 | def forward(x): 17 | # return x * F.hardsigmoid(x) # for TorchScript and CoreML 18 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX 19 | 20 | 21 | class Mish(nn.Module): 22 | # Mish activation https://github.com/digantamisra98/Mish 23 | @staticmethod 24 | def forward(x): 25 | return x * F.softplus(x).tanh() 26 | 27 | 28 | class MemoryEfficientMish(nn.Module): 29 | # Mish activation memory-efficient 30 | class F(torch.autograd.Function): 31 | 32 | @staticmethod 33 | def forward(ctx, x): 34 | ctx.save_for_backward(x) 35 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | x = ctx.saved_tensors[0] 40 | sx = torch.sigmoid(x) 41 | fx = F.softplus(x).tanh() 42 | return grad_output * (fx + x * sx * (1 - fx * fx)) 43 | 44 | def forward(self, x): 45 | return self.F.apply(x) 46 | 47 | 48 | class FReLU(nn.Module): 49 | # FReLU activation https://arxiv.org/abs/2007.11824 50 | def __init__(self, c1, k=3): # ch_in, kernel 51 | super().__init__() 52 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 53 | self.bn = nn.BatchNorm2d(c1) 54 | 55 | def forward(self, x): 56 | return torch.max(x, self.bn(self.conv(x))) 57 | 58 | 59 | class AconC(nn.Module): 60 | r""" ACON activation (activate or not) 61 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 62 | according to "Activate or Not: Learning Customized Activation" . 63 | """ 64 | 65 | def __init__(self, c1): 66 | super().__init__() 67 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 68 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 69 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 70 | 71 | def forward(self, x): 72 | dpx = (self.p1 - self.p2) * x 73 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 74 | 75 | 76 | class MetaAconC(nn.Module): 77 | r""" ACON activation (activate or not) 78 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 79 | according to "Activate or Not: Learning Customized Activation" . 80 | """ 81 | 82 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 83 | super().__init__() 84 | c2 = max(r, c1 // r) 85 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 86 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 87 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 88 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 89 | # self.bn1 = nn.BatchNorm2d(c2) 90 | # self.bn2 = nn.BatchNorm2d(c1) 91 | 92 | def forward(self, x): 93 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 94 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 95 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 96 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 97 | dpx = (self.p1 - self.p2) * x 98 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 99 | -------------------------------------------------------------------------------- /utils/coco_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | from pycocotools.coco import COCO 4 | from pycocotools import mask as maskUtils 5 | 6 | # coco id: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ 7 | all_instances_ids = [ 8 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9 | 11, 13, 14, 15, 16, 17, 18, 19, 20, 10 | 21, 22, 23, 24, 25, 27, 28, 11 | 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 12 | 41, 42, 43, 44, 46, 47, 48, 49, 50, 13 | 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 14 | 61, 62, 63, 64, 65, 67, 70, 15 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 16 | 81, 82, 84, 85, 86, 87, 88, 89, 90, 17 | ] 18 | 19 | all_stuff_ids = [ 20 | 92, 93, 94, 95, 96, 97, 98, 99, 100, 21 | 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 22 | 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 23 | 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 24 | 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 25 | 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 26 | 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 27 | 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 28 | 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 29 | 181, 182, 30 | # other 31 | 183, 32 | # unlabeled 33 | 0, 34 | ] 35 | 36 | # panoptic id: https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json 37 | panoptic_stuff_ids = [ 38 | 92, 93, 95, 100, 39 | 107, 109, 40 | 112, 118, 119, 41 | 122, 125, 128, 130, 42 | 133, 138, 43 | 141, 144, 145, 147, 148, 149, 44 | 151, 154, 155, 156, 159, 45 | 161, 166, 168, 46 | 171, 175, 176, 177, 178, 180, 47 | 181, 184, 185, 186, 187, 188, 189, 190, 48 | 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 49 | # unlabeled 50 | 0, 51 | ] 52 | 53 | def getCocoIds(name = 'semantic'): 54 | if 'instances' == name: 55 | return all_instances_ids 56 | elif 'stuff' == name: 57 | return all_stuff_ids 58 | elif 'panoptic' == name: 59 | return all_instances_ids + panoptic_stuff_ids 60 | else: # semantic 61 | return all_instances_ids + all_stuff_ids 62 | 63 | def getMappingId(index, name = 'semantic'): 64 | ids = getCocoIds(name = name) 65 | return ids[index] 66 | 67 | def getMappingIndex(id, name = 'semantic'): 68 | ids = getCocoIds(name = name) 69 | return ids.index(id) 70 | 71 | # convert ann to rle encoded string 72 | def annToRLE(ann, img_size): 73 | h, w = img_size 74 | segm = ann['segmentation'] 75 | if list == type(segm): 76 | # polygon -- a single object might consist of multiple parts 77 | # we merge all parts into one mask rle code 78 | rles = maskUtils.frPyObjects(segm, h, w) 79 | rle = maskUtils.merge(rles) 80 | elif list == type(segm['counts']): 81 | # uncompressed RLE 82 | rle = maskUtils.frPyObjects(segm, h, w) 83 | else: 84 | # rle 85 | rle = ann['segmentation'] 86 | return rle 87 | 88 | # decode ann to mask martix 89 | def annToMask(ann, img_size): 90 | rle = annToRLE(ann, img_size) 91 | m = maskUtils.decode(rle) 92 | return m 93 | 94 | # convert mask to polygans 95 | def convert_to_polys(mask): 96 | # opencv 3.2 97 | contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 98 | 99 | # before opencv 3.2 100 | # contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 101 | 102 | segmentation = [] 103 | for contour in contours: 104 | contour = contour.flatten().tolist() 105 | if 4 < len(contour): 106 | segmentation.append(contour) 107 | 108 | return segmentation 109 | -------------------------------------------------------------------------------- /utils/triton.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from urllib.parse import urlparse 3 | 4 | import torch 5 | 6 | 7 | class TritonRemoteModel: 8 | """ A wrapper over a model served by the Triton Inference Server. It can 9 | be configured to communicate over GRPC or HTTP. It accepts Torch Tensors 10 | as input and returns them as outputs. 11 | """ 12 | 13 | def __init__(self, url: str): 14 | """ 15 | Keyword arguments: 16 | url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 17 | """ 18 | 19 | parsed_url = urlparse(url) 20 | if parsed_url.scheme == "grpc": 21 | from tritonclient.grpc import InferenceServerClient, InferInput 22 | 23 | self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client 24 | model_repository = self.client.get_model_repository_index() 25 | self.model_name = model_repository.models[0].name 26 | self.metadata = self.client.get_model_metadata(self.model_name, as_json=True) 27 | 28 | def create_input_placeholders() -> typing.List[InferInput]: 29 | return [ 30 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 31 | 32 | else: 33 | from tritonclient.http import InferenceServerClient, InferInput 34 | 35 | self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client 36 | model_repository = self.client.get_model_repository_index() 37 | self.model_name = model_repository[0]['name'] 38 | self.metadata = self.client.get_model_metadata(self.model_name) 39 | 40 | def create_input_placeholders() -> typing.List[InferInput]: 41 | return [ 42 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 43 | 44 | self._create_input_placeholders_fn = create_input_placeholders 45 | 46 | @property 47 | def runtime(self): 48 | """Returns the model runtime""" 49 | return self.metadata.get("backend", self.metadata.get("platform")) 50 | 51 | def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]: 52 | """ Invokes the model. Parameters can be provided via args or kwargs. 53 | args, if provided, are assumed to match the order of inputs of the model. 54 | kwargs are matched with the model input names. 55 | """ 56 | inputs = self._create_inputs(*args, **kwargs) 57 | response = self.client.infer(model_name=self.model_name, inputs=inputs) 58 | result = [] 59 | for output in self.metadata['outputs']: 60 | tensor = torch.as_tensor(response.as_numpy(output['name'])) 61 | result.append(tensor) 62 | return result[0] if len(result) == 1 else result 63 | 64 | def _create_inputs(self, *args, **kwargs): 65 | args_len, kwargs_len = len(args), len(kwargs) 66 | if not args_len and not kwargs_len: 67 | raise RuntimeError("No inputs provided.") 68 | if args_len and kwargs_len: 69 | raise RuntimeError("Cannot specify args and kwargs at the same time") 70 | 71 | placeholders = self._create_input_placeholders_fn() 72 | if args_len: 73 | if args_len != len(placeholders): 74 | raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.") 75 | for input, value in zip(placeholders, args): 76 | input.set_data_from_numpy(value.cpu().numpy()) 77 | else: 78 | for input in placeholders: 79 | value = kwargs[input.name] 80 | input.set_data_from_numpy(value.cpu().numpy()) 81 | return placeholders 82 | -------------------------------------------------------------------------------- /spark/pretrain/models/custom_origin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | from typing import List 10 | from timm.models.registry import register_model 11 | 12 | 13 | class YourConvNet(nn.Module): 14 | """ 15 | This is a template for your custom ConvNet. 16 | It is required to implement the following three functions: `get_downsample_ratio`, `get_feature_map_channels`, `forward`. 17 | You can refer to the implementations in `pretrain\models\resnet.py` for an example. 18 | """ 19 | 20 | def get_downsample_ratio(self) -> int: 21 | """ 22 | This func would ONLY be used in `SparseEncoder's __init__` (see `pretrain/encoder.py`). 23 | 24 | :return: the TOTAL downsample ratio of the ConvNet. 25 | E.g., for a ResNet-50, this should return 32. 26 | """ 27 | raise NotImplementedError 28 | 29 | def get_feature_map_channels(self) -> List[int]: 30 | """ 31 | This func would ONLY be used in `SparseEncoder's __init__` (see `pretrain/encoder.py`). 32 | 33 | :return: a list of the number of channels of each feature map. 34 | E.g., for a ResNet-50, this should return [256, 512, 1024, 2048]. 35 | """ 36 | raise NotImplementedError 37 | 38 | def forward(self, inp_bchw: torch.Tensor, hierarchical=False): 39 | """ 40 | The forward with `hierarchical=True` would ONLY be used in `SparseEncoder.forward` (see `pretrain/encoder.py`). 41 | 42 | :param inp_bchw: input image tensor, shape: (batch_size, channels, height, width). 43 | :param hierarchical: return the logits (not hierarchical), or the feature maps (hierarchical). 44 | :return: 45 | - hierarchical == False: return the logits of the classification task, shape: (batch_size, num_classes). 46 | - hierarchical == True: return a list of all feature maps, which should have the same length as the return value of `get_feature_map_channels`. 47 | E.g., for a ResNet-50, it should return a list [1st_feat_map, 2nd_feat_map, 3rd_feat_map, 4th_feat_map]. 48 | for an input size of 224, the shapes are [(B, 256, 56, 56), (B, 512, 28, 28), (B, 1024, 14, 14), (B, 2048, 7, 7)] 49 | """ 50 | raise NotImplementedError 51 | 52 | 53 | @register_model 54 | def your_convnet_small(pretrained=False, **kwargs): 55 | raise NotImplementedError 56 | return YourConvNet(**kwargs) 57 | 58 | 59 | @torch.no_grad() 60 | def convnet_test(): 61 | from timm.models import create_model 62 | cnn = create_model('your_convnet_small') 63 | print('get_downsample_ratio:', cnn.get_downsample_ratio()) 64 | print('get_feature_map_channels:', cnn.get_feature_map_channels()) 65 | 66 | downsample_ratio = cnn.get_downsample_ratio() 67 | feature_map_channels = cnn.get_feature_map_channels() 68 | 69 | # check the forward function 70 | B, C, H, W = 4, 3, 224, 224 71 | inp = torch.rand(B, C, H, W) 72 | feats = cnn(inp, hierarchical=True) 73 | assert isinstance(feats, list) 74 | assert len(feats) == len(feature_map_channels) 75 | print([tuple(t.shape) for t in feats]) 76 | 77 | # check the downsample ratio 78 | feats = cnn(inp, hierarchical=True) 79 | assert feats[-1].shape[-2] == H // downsample_ratio 80 | assert feats[-1].shape[-1] == W // downsample_ratio 81 | 82 | # check the channel number 83 | for feat, ch in zip(feats, feature_map_channels): 84 | assert feat.ndim == 4 85 | assert feat.shape[1] == ch 86 | 87 | 88 | if __name__ == '__main__': 89 | convnet_test() 90 | -------------------------------------------------------------------------------- /models/detect/pk-yolo.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 2 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | # activation: nn.LeakyReLU(0.1) 8 | # activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # YOLOv9 backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | [-1, 1, Backbone, []], 19 | # conv down 20 | [1, 1, Down0, [64]], #2 320 1 21 | [1, 1, Down1, [128]], # 3 160 3 22 | [1, 1, Down2, [256]],# 4 80 5 23 | [1, 1, Down3, [512]], #5 40 7 24 | [1, 1, Down4, [1024]], #6 20 9 25 | 26 | # routing 27 | [ 2, 1, CBLinear, [ [ 64 ] ] ], # 10 28 | [ 3, 1, CBLinear, [ [ 64, 128 ] ] ], # 11 29 | [ 4, 1, CBLinear, [ [ 64, 128, 256 ] ] ], # 12 30 | [ 5, 1, CBLinear, [ [ 64, 128, 256, 512 ] ] ], # 13 31 | [ 6, 1, CBLinear, [ [ 64, 128, 256, 512, 1024 ] ] ], # 14 -3 32 | 33 | # conv down fuse 34 | [ 0, 1, Conv, [ 64, 3, 2 ] ], # 15-P1/2 35 | [ [ 7, 8, 9, 10, 11, -1 ], 1, CBFuse, [ [ 0, 0, 0, 0, 0 ] ] ], # 16 36 | 37 | # conv down fuse 38 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 17-P2/4 39 | [ [ 8, 9, 10, 11, -1 ], 1, CBFuse, [ [ 1, 1, 1, 1 ] ] ], # 18 40 | 41 | # elan-1 block 42 | [ -1, 1, RepNCSPELAN4, [ 256, 128, 64, 2 ] ], # 19 43 | 44 | # avg-conv down fuse 45 | [ -1, 1, ADown, [ 256 ] ], # 20-P3/8 46 | [ [ 9, 10, 11, -1 ], 1, CBFuse, [ [ 2, 2, 2 ] ] ], # 21 47 | 48 | # elan-2 block 49 | [ -1, 1, RepNCSPELAN4, [ 512, 256, 128, 2 ] ], # 22 50 | 51 | # avg-conv down fuse 52 | [ -1, 1, ADown, [ 512 ] ], # 23-P4/16 53 | [ [ 10, 11, -1 ], 1, CBFuse, [ [ 3, 3 ] ] ], # 24 54 | 55 | # elan-2 block 56 | [ -1, 1, RepNCSPELAN4, [ 1024, 512, 256, 2 ] ], # 25 57 | 58 | # avg-conv down fuse 59 | [ -1, 1, ADown, [ 1024 ] ], # 26-P5/32 60 | [ [ 11, -1 ], 1, CBFuse, [ [ 4 ] ] ], # 27 61 | 62 | # elan-2 block 63 | [ -1, 1, RepNCSPELAN4, [ 1024, 512, 256, 2 ] ], # 28 25 64 | 65 | ] 66 | 67 | # YOLOv9 head 68 | head: 69 | [ 70 | # multi-level auxiliary branch 71 | 72 | # elan-spp block 73 | [6, 1, SPPELAN, [512, 256]], # 29 74 | 75 | # up-concat merge 76 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 77 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 78 | 79 | # csp-elan block 80 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 81 | 82 | # up-concat merge 83 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 84 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 85 | 86 | # csp-elan block 87 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 88 | 89 | 90 | 91 | # main branch 92 | 93 | # elan-spp block 94 | [25, 1, SPPELAN, [512, 256]], # 36 95 | 96 | # up-concat merge 97 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 98 | [[-1, 22], 1, Concat, [1]], # cat backbone P4 99 | 100 | # csp-elan block 101 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39 102 | 103 | # up-concat merge 104 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 105 | [[-1, 19], 1, Concat, [1]], # cat backbone P3 106 | 107 | # csp-elan block 108 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small) 109 | 110 | # avg-conv-down merge 111 | [-1, 1, ADown, [256]], 112 | [[-1, 36], 1, Concat, [1]], # cat head P4 113 | 114 | # csp-elan block 115 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium) 116 | 117 | # avg-conv-down merge 118 | [-1, 1, ADown, [512]], 119 | [[-1, 33], 1, Concat, [1]], # cat head P5 120 | 121 | # csp-elan block 122 | [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large) 123 | 124 | # detect 125 | [[32, 29, 26, 39, 42, 45], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) 126 | ] 127 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/README.md: -------------------------------------------------------------------------------- 1 | ## About code isolation 2 | 3 | This `downstream_mmdet` is isolated from pre-training codes. One can treat this `downstream_mmdet` as an independent codebase 🛠️. 4 | 5 | ## Fine-tuned ConvNeXt-B weights, log files, and performance 6 | 7 | 8 |
9 | 10 | [[`weights (pre-trained by SparK)`](https://drive.google.com/file/d/1ZjWbqI1qoBcqeQijI5xX9E-YNkxpJcYV/view?usp=share_link)] 11 | [[`weights (fine-tuned on COCO)`](https://drive.google.com/file/d/1t10dmzg5KOO27o2yIglK-gQepB5gR4zR/view?usp=share_link)] 12 | [[`log.json`](https://drive.google.com/file/d/1TuNboXl1qwjf1tggZ3QOssI67uU7Jtig/view?usp=share_link)] 13 | [[`log`](https://drive.google.com/file/d/1JY5CkL_MX08zJ8P1FBIeC60OJsuIiyZc/view?usp=sharing)] 14 |
15 | 16 | 17 |

18 | 19 |

20 | 21 | 22 | ## Installation [MMDetection with commit 6a979e2](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/tree/6a979e2164e3fb0de0ca2546545013a4d71b2f7d) before fine-tuning ConvNeXt on COCO 23 | 24 | We refer to the codebases of [ConvNeXt](https://github.com/facebookresearch/ConvNeXt/tree/048efcea897d999aed302f2639b6270aedf8d4c8) and [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/tree/6a979e2164e3fb0de0ca2546545013a4d71b2f7d). 25 | Please refer to [README.md](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/6a979e2164e3fb0de0ca2546545013a4d71b2f7d/README.md) for installation and dataset preparation instructions. 26 | 27 | Note the COCO dataset folder should be at `downstream_mmdet/data/coco`. 28 | The folder should follow the directory structure requried by `MMDetection`, which should look like this: 29 | ``` 30 | downstream_mmdet/data/coco: 31 | annotations/: 32 | captions_train2017.json captions_val2017.json 33 | instances_train2017.json instances_val2017.json 34 | person_keypoints_train2017.json person_keypoints_val2017.json 35 | train2017/: 36 | a_lot_images.jpg 37 | val2017/: 38 | a_lot_images.jpg 39 | ``` 40 | 41 | 42 | ### Training 43 | 44 | To train a detector with pre-trained models, run: 45 | ``` 46 | # single-gpu training 47 | python tools/train.py --cfg-options model.pretrained= [other optional arguments] 48 | 49 | # multi-gpu training 50 | tools/dist_train.sh --cfg-options model.pretrained= [other optional arguments] 51 | ``` 52 | For example, to train a Mask R-CNN model with a SparK pretrained `ConvNeXt-B` backbone and 4 gpus, run: 53 | ``` 54 | tools/dist_train.sh configs/convnext_spark/mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py 4 \ 55 | --cfg-options model.pretrained=/some/path/to/official_convnext_base_1kpretrained.pth 56 | ``` 57 | 58 | The Mask R-CNN 3x fine-tuning config file can be found at [`configs/convnext_spark`](configs/convnext_spark). This config is basically a copy of [https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py](https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py). 59 | 60 | ### Inference 61 | ``` 62 | # single-gpu testing 63 | python tools/test.py --eval bbox segm 64 | 65 | # multi-gpu testing 66 | tools/dist_test.sh --eval bbox segm 67 | ``` 68 | 69 | ## Acknowledgment 70 | 71 | We appreciate these useful codebases: 72 | 73 | - [MMDetection](https://github.com/open-mmlab/mmdetection) 74 | - [ConvNeXt](https://github.com/facebookresearch/ConvNeXt) 75 | - [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) 76 | 77 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | import torch 10 | from timm.data import Mixup 11 | from timm.loss import BinaryCrossEntropy, SoftTargetCrossEntropy 12 | from timm.models.layers import drop 13 | from timm.models.resnet import ResNet 14 | 15 | from .convnext_official import ConvNeXt 16 | 17 | 18 | def convnext_get_layer_id_and_scale_exp(self: ConvNeXt, para_name: str): 19 | N = 12 if len(self.stages[-2]) > 9 else 6 20 | if para_name.startswith("downsample_layers"): 21 | stage_id = int(para_name.split('.')[1]) 22 | if stage_id == 0: 23 | layer_id = 0 24 | elif stage_id == 1 or stage_id == 2: 25 | layer_id = stage_id + 1 26 | else: # stage_id == 3: 27 | layer_id = N 28 | elif para_name.startswith("stages"): 29 | stage_id = int(para_name.split('.')[1]) 30 | block_id = int(para_name.split('.')[2]) 31 | if stage_id == 0 or stage_id == 1: 32 | layer_id = stage_id + 1 33 | elif stage_id == 2: 34 | layer_id = 3 + block_id // 3 35 | else: # stage_id == 3: 36 | layer_id = N 37 | else: 38 | layer_id = N + 1 # after backbone 39 | 40 | return layer_id, N + 1 - layer_id 41 | 42 | 43 | def resnets_get_layer_id_and_scale_exp(self: ResNet, para_name: str): 44 | # stages: 45 | # 50 : [3, 4, 6, 3] 46 | # 101 : [3, 4, 23, 3] 47 | # 152 : [3, 8, 36, 3] 48 | # 200 : [3, 24, 36, 3] 49 | # eca269d: [3, 30, 48, 8] 50 | 51 | L2, L3 = len(self.layer2), len(self.layer3) 52 | if L2 == 4 and L3 == 6: 53 | blk2, blk3 = 2, 3 54 | elif L2 == 4 and L3 == 23: 55 | blk2, blk3 = 2, 3 56 | elif L2 == 8 and L3 == 36: 57 | blk2, blk3 = 4, 4 58 | elif L2 == 24 and L3 == 36: 59 | blk2, blk3 = 4, 4 60 | elif L2 == 30 and L3 == 48: 61 | blk2, blk3 = 5, 6 62 | else: 63 | raise NotImplementedError 64 | 65 | N2, N3 = math.ceil(L2 / blk2 - 1e-5), math.ceil(L3 / blk3 - 1e-5) 66 | N = 2 + N2 + N3 67 | if para_name.startswith('layer'): # 1, 2, 3, 4, 5 68 | stage_id, block_id = int(para_name.split('.')[0][5:]), int(para_name.split('.')[1]) 69 | if stage_id == 1: 70 | layer_id = 1 71 | elif stage_id == 2: 72 | layer_id = 2 + block_id // blk2 # 2, 3 73 | elif stage_id == 3: 74 | layer_id = 2 + N2 + block_id // blk3 # r50: 4, 5 r101: 4, 5, ..., 11 75 | else: # == 4 76 | layer_id = N # r50: 6 r101: 12 77 | elif para_name.startswith('fc.'): 78 | layer_id = N + 1 # r50: 7 r101: 13 79 | else: 80 | layer_id = 0 81 | 82 | return layer_id, N + 1 - layer_id # r50: 0-7, 7-0 r101: 0-13, 13-0 83 | 84 | 85 | def _ex_repr(self): 86 | return ', '.join( 87 | f'{k}=' + (f'{v:g}' if isinstance(v, float) else str(v)) 88 | for k, v in vars(self).items() 89 | if not k.startswith('_') and k != 'training' 90 | and not isinstance(v, (torch.nn.Module, torch.Tensor)) 91 | ) 92 | 93 | 94 | # IMPORTANT: update some member functions 95 | __UPDATED = False 96 | if not __UPDATED: 97 | for clz in (torch.nn.CrossEntropyLoss, SoftTargetCrossEntropy, BinaryCrossEntropy, Mixup, drop.DropPath): 98 | if hasattr(clz, 'extra_repr'): 99 | clz.extra_repr = _ex_repr 100 | else: 101 | clz.__repr__ = lambda self: f'{type(self).__name__}({_ex_repr(self)})' 102 | ResNet.get_layer_id_and_scale_exp = resnets_get_layer_id_and_scale_exp 103 | ConvNeXt.get_layer_id_and_scale_exp = convnext_get_layer_id_and_scale_exp 104 | __UPDATED = True 105 | -------------------------------------------------------------------------------- /utils/loggers/comet/optimizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": "random", 3 | "parameters": { 4 | "anchor_t": { 5 | "type": "discrete", 6 | "values": [ 7 | 2, 8 | 8 9 | ] 10 | }, 11 | "batch_size": { 12 | "type": "discrete", 13 | "values": [ 14 | 16, 15 | 32, 16 | 64 17 | ] 18 | }, 19 | "box": { 20 | "type": "discrete", 21 | "values": [ 22 | 0.02, 23 | 0.2 24 | ] 25 | }, 26 | "cls": { 27 | "type": "discrete", 28 | "values": [ 29 | 0.2 30 | ] 31 | }, 32 | "cls_pw": { 33 | "type": "discrete", 34 | "values": [ 35 | 0.5 36 | ] 37 | }, 38 | "copy_paste": { 39 | "type": "discrete", 40 | "values": [ 41 | 1 42 | ] 43 | }, 44 | "degrees": { 45 | "type": "discrete", 46 | "values": [ 47 | 0, 48 | 45 49 | ] 50 | }, 51 | "epochs": { 52 | "type": "discrete", 53 | "values": [ 54 | 5 55 | ] 56 | }, 57 | "fl_gamma": { 58 | "type": "discrete", 59 | "values": [ 60 | 0 61 | ] 62 | }, 63 | "fliplr": { 64 | "type": "discrete", 65 | "values": [ 66 | 0 67 | ] 68 | }, 69 | "flipud": { 70 | "type": "discrete", 71 | "values": [ 72 | 0 73 | ] 74 | }, 75 | "hsv_h": { 76 | "type": "discrete", 77 | "values": [ 78 | 0 79 | ] 80 | }, 81 | "hsv_s": { 82 | "type": "discrete", 83 | "values": [ 84 | 0 85 | ] 86 | }, 87 | "hsv_v": { 88 | "type": "discrete", 89 | "values": [ 90 | 0 91 | ] 92 | }, 93 | "iou_t": { 94 | "type": "discrete", 95 | "values": [ 96 | 0.7 97 | ] 98 | }, 99 | "lr0": { 100 | "type": "discrete", 101 | "values": [ 102 | 1e-05, 103 | 0.1 104 | ] 105 | }, 106 | "lrf": { 107 | "type": "discrete", 108 | "values": [ 109 | 0.01, 110 | 1 111 | ] 112 | }, 113 | "mixup": { 114 | "type": "discrete", 115 | "values": [ 116 | 1 117 | ] 118 | }, 119 | "momentum": { 120 | "type": "discrete", 121 | "values": [ 122 | 0.6 123 | ] 124 | }, 125 | "mosaic": { 126 | "type": "discrete", 127 | "values": [ 128 | 0 129 | ] 130 | }, 131 | "obj": { 132 | "type": "discrete", 133 | "values": [ 134 | 0.2 135 | ] 136 | }, 137 | "obj_pw": { 138 | "type": "discrete", 139 | "values": [ 140 | 0.5 141 | ] 142 | }, 143 | "optimizer": { 144 | "type": "categorical", 145 | "values": [ 146 | "SGD", 147 | "Adam", 148 | "AdamW" 149 | ] 150 | }, 151 | "perspective": { 152 | "type": "discrete", 153 | "values": [ 154 | 0 155 | ] 156 | }, 157 | "scale": { 158 | "type": "discrete", 159 | "values": [ 160 | 0 161 | ] 162 | }, 163 | "shear": { 164 | "type": "discrete", 165 | "values": [ 166 | 0 167 | ] 168 | }, 169 | "translate": { 170 | "type": "discrete", 171 | "values": [ 172 | 0 173 | ] 174 | }, 175 | "warmup_bias_lr": { 176 | "type": "discrete", 177 | "values": [ 178 | 0, 179 | 0.2 180 | ] 181 | }, 182 | "warmup_epochs": { 183 | "type": "discrete", 184 | "values": [ 185 | 5 186 | ] 187 | }, 188 | "warmup_momentum": { 189 | "type": "discrete", 190 | "values": [ 191 | 0, 192 | 0.95 193 | ] 194 | }, 195 | "weight_decay": { 196 | "type": "discrete", 197 | "values": [ 198 | 0, 199 | 0.001 200 | ] 201 | } 202 | }, 203 | "spec": { 204 | "maxCombo": 0, 205 | "metric": "metrics/mAP_0.5", 206 | "objective": "maximize" 207 | }, 208 | "trials": 1 209 | } 210 | -------------------------------------------------------------------------------- /models/detect/yolov9-e.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 2 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # YOLOv9 backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | # conv down 19 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 20 | 21 | # conv down 22 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 23 | 24 | # csp-elan block 25 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 26 | 27 | # avg-conv down 28 | [-1, 1, ADown, [256]], # 4-P3/8 29 | 30 | # csp-elan block 31 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 32 | 33 | # avg-conv down 34 | [-1, 1, ADown, [512]], # 6-P4/16 35 | 36 | # csp-elan block 37 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 38 | 39 | # avg-conv down 40 | [-1, 1, ADown, [1024]], # 8-P5/32 41 | 42 | # csp-elan block 43 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 44 | 45 | # routing 46 | [1, 1, CBLinear, [[64]]], # 10 47 | [3, 1, CBLinear, [[64, 128]]], # 11 48 | [5, 1, CBLinear, [[64, 128, 256]]], # 12 49 | [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 50 | [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 51 | 52 | # conv down 53 | [0, 1, Conv, [64, 3, 2]], # 15-P1/2 54 | [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 55 | 56 | # conv down 57 | [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 58 | [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 59 | 60 | # csp-elan block 61 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 62 | 63 | # avg-conv down fuse 64 | [-1, 1, ADown, [256]], # 20-P3/8 65 | [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 66 | 67 | # csp-elan block 68 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 69 | 70 | # avg-conv down fuse 71 | [-1, 1, ADown, [512]], # 23-P4/16 72 | [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 73 | 74 | # csp-elan block 75 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 76 | 77 | # avg-conv down fuse 78 | [-1, 1, ADown, [1024]], # 26-P5/32 79 | [[14, -1], 1, CBFuse, [[4]]], # 27 80 | 81 | # csp-elan block 82 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 83 | ] 84 | 85 | # YOLOv9 head 86 | head: 87 | [ 88 | # multi-level auxiliary branch 89 | 90 | # elan-spp block 91 | [9, 1, SPPELAN, [512, 256]], # 29 92 | 93 | # up-concat merge 94 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 95 | [[-1, 7], 1, Concat, [1]], # cat backbone P4 96 | 97 | # csp-elan block 98 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 99 | 100 | # up-concat merge 101 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 102 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 103 | 104 | # csp-elan block 105 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 106 | 107 | 108 | 109 | # main branch 110 | 111 | # elan-spp block 112 | [28, 1, SPPELAN, [512, 256]], # 36 113 | 114 | # up-concat merge 115 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 116 | [[-1, 25], 1, Concat, [1]], # cat backbone P4 117 | 118 | # csp-elan block 119 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39 120 | 121 | # up-concat merge 122 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 123 | [[-1, 22], 1, Concat, [1]], # cat backbone P3 124 | 125 | # csp-elan block 126 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small) 127 | 128 | # avg-conv-down merge 129 | [-1, 1, ADown, [256]], 130 | [[-1, 39], 1, Concat, [1]], # cat head P4 131 | 132 | # csp-elan block 133 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium) 134 | 135 | # avg-conv-down merge 136 | [-1, 1, ADown, [512]], 137 | [[-1, 36], 1, Concat, [1]], # cat head P5 138 | 139 | # csp-elan block 140 | [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large) 141 | 142 | # detect 143 | [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) 144 | ] 145 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/configs/_base_/models/mask_rcnn_convnext_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | # model settings 10 | model = dict( 11 | type='MaskRCNN', 12 | pretrained=None, 13 | backbone=dict( 14 | type='ConvNeXt', 15 | in_chans=3, 16 | depths=[3, 3, 9, 3], 17 | dims=[96, 192, 384, 768], 18 | drop_path_rate=0.2, 19 | layer_scale_init_value=1e-6, 20 | out_indices=[0, 1, 2, 3], 21 | ), 22 | neck=dict( 23 | type='FPN', 24 | in_channels=[128, 256, 512, 1024], 25 | out_channels=256, 26 | num_outs=5), 27 | rpn_head=dict( 28 | type='RPNHead', 29 | in_channels=256, 30 | feat_channels=256, 31 | anchor_generator=dict( 32 | type='AnchorGenerator', 33 | scales=[8], 34 | ratios=[0.5, 1.0, 2.0], 35 | strides=[4, 8, 16, 32, 64]), 36 | bbox_coder=dict( 37 | type='DeltaXYWHBBoxCoder', 38 | target_means=[.0, .0, .0, .0], 39 | target_stds=[1.0, 1.0, 1.0, 1.0]), 40 | loss_cls=dict( 41 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 42 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 43 | roi_head=dict( 44 | type='StandardRoIHead', 45 | bbox_roi_extractor=dict( 46 | type='SingleRoIExtractor', 47 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 48 | out_channels=256, 49 | featmap_strides=[4, 8, 16, 32]), 50 | bbox_head=dict( 51 | type='Shared2FCBBoxHead', 52 | in_channels=256, 53 | fc_out_channels=1024, 54 | roi_feat_size=7, 55 | num_classes=80, 56 | bbox_coder=dict( 57 | type='DeltaXYWHBBoxCoder', 58 | target_means=[0., 0., 0., 0.], 59 | target_stds=[0.1, 0.1, 0.2, 0.2]), 60 | reg_class_agnostic=False, 61 | loss_cls=dict( 62 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 63 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 64 | mask_roi_extractor=dict( 65 | type='SingleRoIExtractor', 66 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 67 | out_channels=256, 68 | featmap_strides=[4, 8, 16, 32]), 69 | mask_head=dict( 70 | type='FCNMaskHead', 71 | num_convs=4, 72 | in_channels=256, 73 | conv_out_channels=256, 74 | num_classes=80, 75 | loss_mask=dict( 76 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 77 | # model training and testing settings 78 | train_cfg=dict( 79 | rpn=dict( 80 | assigner=dict( 81 | type='MaxIoUAssigner', 82 | pos_iou_thr=0.7, 83 | neg_iou_thr=0.3, 84 | min_pos_iou=0.3, 85 | match_low_quality=True, 86 | ignore_iof_thr=-1), 87 | sampler=dict( 88 | type='RandomSampler', 89 | num=256, 90 | pos_fraction=0.5, 91 | neg_pos_ub=-1, 92 | add_gt_as_proposals=False), 93 | allowed_border=-1, 94 | pos_weight=-1, 95 | debug=False), 96 | rpn_proposal=dict( 97 | nms_pre=2000, 98 | max_per_img=1000, 99 | nms=dict(type='nms', iou_threshold=0.7), 100 | min_bbox_size=0), 101 | rcnn=dict( 102 | assigner=dict( 103 | type='MaxIoUAssigner', 104 | pos_iou_thr=0.5, 105 | neg_iou_thr=0.5, 106 | min_pos_iou=0.5, 107 | match_low_quality=True, 108 | ignore_iof_thr=-1), 109 | sampler=dict( 110 | type='RandomSampler', 111 | num=512, 112 | pos_fraction=0.25, 113 | neg_pos_ub=-1, 114 | add_gt_as_proposals=True), 115 | mask_size=28, 116 | pos_weight=-1, 117 | debug=False)), 118 | test_cfg=dict( 119 | rpn=dict( 120 | nms_pre=1000, 121 | max_per_img=1000, 122 | nms=dict(type='nms', iou_threshold=0.7), 123 | min_bbox_size=0), 124 | rcnn=dict( 125 | score_thr=0.05, 126 | nms=dict(type='nms', iou_threshold=0.5), 127 | max_per_img=100, 128 | mask_thr_binary=0.5))) 129 | -------------------------------------------------------------------------------- /spark/pretrain/utils/arg_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import json 8 | import os 9 | import sys 10 | 11 | from tap import Tap 12 | 13 | import dist 14 | 15 | 16 | class Args(Tap): 17 | # environment 18 | exp_name: str = 'your_exp_name' 19 | exp_dir: str = 'your_exp_dir' # will be created if not exists 20 | data_path: str = 'D://code/Br35H-Mask-RCNN/sparkdata/' 21 | init_weight: str = '' # use some checkpoint as model weight initialization; ONLY load model weights 22 | resume_from: str = '' # resume the experiment from some checkpoint.pth; load model weights, optimizer states, and last epoch 23 | 24 | # SparK hyperparameters 25 | mask: float = 0.6 # mask ratio, should be in (0, 1) 26 | 27 | # encoder hyperparameters 28 | model: str = 'V9back' # resnet50 29 | input_size: int = 224 30 | sbn: bool = True 31 | 32 | # data hyperparameters 33 | bs: int = 10 34 | dataloader_workers: int = 8 35 | 36 | # pre-training hyperparameters 37 | dp: float = 0.0 38 | base_lr: float = 2e-4 39 | wd: float = 0.04 40 | wde: float = 0.2 41 | ep: int = 100 42 | wp_ep: int = 40 43 | clip: int = 5. 44 | opt: str = 'lamb' 45 | ada: float = 0. 46 | 47 | # NO NEED TO SPECIFIED; each of these args would be updated in runtime automatically 48 | lr: float = None 49 | batch_size_per_gpu: int = 0 50 | glb_batch_size: int = 0 51 | densify_norm: str = '' 52 | device: str = 'cpu' 53 | local_rank: int = 0 54 | cmd: str = ' '.join(sys.argv[1:]) 55 | commit_id: str = os.popen(f'git rev-parse HEAD').read().strip() or '[unknown]' 56 | commit_msg: str = (os.popen(f'git log -1').read().strip().splitlines() or ['[unknown]'])[-1].strip() 57 | last_loss: float = 0. 58 | cur_ep: str = '' 59 | remain_time: str = '' 60 | finish_time: str = '' 61 | first_logging: bool = True 62 | log_txt_name: str = '{args.exp_dir}/pretrain_log.txt' 63 | tb_lg_dir: str = '' # tensorboard log directory 64 | 65 | @property 66 | def is_convnext(self): 67 | return 'convnext' in self.model or 'cnx' in self.model 68 | 69 | @property 70 | def is_resnet(self): 71 | return 'resnet' in self.model 72 | 73 | def log_epoch(self): 74 | if not dist.is_local_master(): 75 | return 76 | 77 | if self.first_logging: 78 | self.first_logging = False 79 | with open(self.log_txt_name, 'w') as fp: 80 | json.dump({ 81 | 'name': self.exp_name, 'cmd': self.cmd, 'git_commit_id': self.commit_id, 'git_commit_msg': self.commit_msg, 82 | 'model': self.model, 83 | }, fp) 84 | fp.write('\n\n') 85 | 86 | with open(self.log_txt_name, 'a') as fp: 87 | json.dump({ 88 | 'cur_ep': self.cur_ep, 89 | 'last_L': self.last_loss, 90 | 'rema': self.remain_time, 'fini': self.finish_time, 91 | }, fp) 92 | fp.write('\n') 93 | 94 | 95 | def init_dist_and_get_args(): 96 | from utils import misc 97 | 98 | # initialize 99 | args = Args(explicit_bool=True).parse_args() 100 | e = os.path.abspath(args.exp_dir) 101 | d, e = os.path.dirname(e), os.path.basename(e) 102 | e = ''.join(ch if (ch.isalnum() or ch == '-') else '_' for ch in e) 103 | args.exp_dir = os.path.join(d, e) 104 | 105 | os.makedirs(args.exp_dir, exist_ok=True) 106 | args.log_txt_name = os.path.join(args.exp_dir, 'pretrain_log.txt') 107 | args.tb_lg_dir = args.tb_lg_dir or os.path.join(args.exp_dir, 'tensorboard_log') 108 | try: 109 | os.makedirs(args.tb_lg_dir, exist_ok=True) 110 | except: 111 | pass 112 | 113 | misc.init_distributed_environ(exp_dir=args.exp_dir) 114 | 115 | # update args 116 | if not dist.initialized(): 117 | args.sbn = False 118 | args.first_logging = True 119 | args.device = dist.get_device() 120 | args.batch_size_per_gpu = args.bs // dist.get_world_size() 121 | args.glb_batch_size = args.batch_size_per_gpu * dist.get_world_size() 122 | 123 | if args.is_resnet: 124 | args.ada = args.ada or 0.95 125 | args.densify_norm = 'bn' 126 | 127 | if args.is_convnext: 128 | args.ada = args.ada or 0.999 129 | args.densify_norm = 'ln' 130 | 131 | args.opt = args.opt.lower() 132 | args.lr = args.base_lr * args.glb_batch_size / 256 133 | args.wde = args.wde or args.wd 134 | 135 | return args 136 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/configs/convnext_spark/mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py: -------------------------------------------------------------------------------- 1 | """ 2 | We directly take the ConvNeXt-T+MaskRCNN 3x recipe from https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py 3 | And we modify this ConvNeXt-T+MaskRCNN 3x recipe to our ConvNeXt-B+MaskRCNN 3x recipe. 4 | The modifications (commented as [modified] below) are according to: 5 | - 1. tiny-to-base: (some configs of ConvNext-T are updated to those of ConvNext-B, referring to https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/cascade_mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco_in22k.py) 6 | - model.backbone.{depths, dims, drop_path_rate} 7 | - models.neck 8 | - optimizer.paramwise_cfg.num_layers 9 | 10 | - 2. our paper (https://openreview.net/forum?id=NRxydtWup1S, or https://arxiv.org/abs/2301.03580): 11 | - LR layer decay (optimizer.paramwise_cfg.decay_rate): 0.65 12 | - LR scheduled ratio (lr_config.gamma): 0.2 13 | - Learning rate (optimizer.lr): 0.0002 14 | - optimizer_config.use_fp16: False (we just use fp32 by default; actually we didn't test the performance of using fp16) 15 | """ 16 | 17 | _base_ = [ 18 | '../_base_/models/mask_rcnn_convnext_fpn.py', 19 | '../_base_/datasets/coco_instance.py', 20 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 21 | ] 22 | 23 | model = dict( 24 | backbone=dict( 25 | in_chans=3, 26 | depths=[3, 3, 27, 3], # [modified] according to tiny-to-base 27 | dims=[128, 256, 512, 1024], # [modified] according to tiny-to-base 28 | drop_path_rate=0.5, # [modified] according to tiny-to-base 29 | layer_scale_init_value=1.0, 30 | out_indices=[0, 1, 2, 3], 31 | ), 32 | neck=dict(in_channels=[128, 256, 512, 1024])) # [modified] according to tiny-to-base 33 | 34 | img_norm_cfg = dict( 35 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 36 | 37 | # augmentation strategy originates from DETR / Sparse RCNN 38 | train_pipeline = [ 39 | dict(type='LoadImageFromFile'), 40 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 41 | dict(type='RandomFlip', flip_ratio=0.5), 42 | dict(type='AutoAugment', 43 | policies=[ 44 | [ 45 | dict(type='Resize', 46 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 47 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 48 | (736, 1333), (768, 1333), (800, 1333)], 49 | multiscale_mode='value', 50 | keep_ratio=True) 51 | ], 52 | [ 53 | dict(type='Resize', 54 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 55 | multiscale_mode='value', 56 | keep_ratio=True), 57 | dict(type='RandomCrop', 58 | crop_type='absolute_range', 59 | crop_size=(384, 600), 60 | allow_negative_crop=True), 61 | dict(type='Resize', 62 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 63 | (576, 1333), (608, 1333), (640, 1333), 64 | (672, 1333), (704, 1333), (736, 1333), 65 | (768, 1333), (800, 1333)], 66 | multiscale_mode='value', 67 | override=True, 68 | keep_ratio=True) 69 | ] 70 | ]), 71 | dict(type='Normalize', **img_norm_cfg), 72 | dict(type='Pad', size_divisor=32), 73 | dict(type='DefaultFormatBundle'), 74 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 75 | ] 76 | data = dict(train=dict(pipeline=train_pipeline)) 77 | 78 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 79 | lr=0.0002, betas=(0.9, 0.999), weight_decay=0.05, # [modified] according to our paper 80 | paramwise_cfg={'decay_rate': 0.65, # [modified] according to our paper 81 | 'decay_type': 'layer_wise', 82 | 'num_layers': 12}) # [modified] according to tiny-to-base 83 | lr_config = dict(step=[27, 33], gamma=0.2) # [modified] according to our paper 84 | runner = dict(type='EpochBasedRunnerAmp', max_epochs=36) 85 | 86 | # do not use mmdet version fp16 87 | fp16 = None 88 | optimizer_config = dict( 89 | type="DistOptimizerHook", 90 | update_interval=1, 91 | grad_clip=None, 92 | coalesce=True, 93 | bucket_size_mb=-1, 94 | use_fp16=False, # [modified] True => False 95 | ) -------------------------------------------------------------------------------- /utils/downloads.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess 4 | import urllib 5 | from pathlib import Path 6 | 7 | import requests 8 | import torch 9 | 10 | 11 | def is_url(url, check=True): 12 | # Check if string is URL and check if URL exists 13 | try: 14 | url = str(url) 15 | result = urllib.parse.urlparse(url) 16 | assert all([result.scheme, result.netloc]) # check if is url 17 | return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online 18 | except (AssertionError, urllib.request.HTTPError): 19 | return False 20 | 21 | 22 | def gsutil_getsize(url=''): 23 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 24 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 25 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 26 | 27 | 28 | def url_getsize(url='https://ultralytics.com/images/bus.jpg'): 29 | # Return downloadable file size in bytes 30 | response = requests.head(url, allow_redirects=True) 31 | return int(response.headers.get('content-length', -1)) 32 | 33 | 34 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): 35 | # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes 36 | from utils.general import LOGGER 37 | 38 | file = Path(file) 39 | assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" 40 | try: # url1 41 | LOGGER.info(f'Downloading {url} to {file}...') 42 | torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) 43 | assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check 44 | except Exception as e: # url2 45 | if file.exists(): 46 | file.unlink() # remove partial downloads 47 | LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') 48 | os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail 49 | finally: 50 | if not file.exists() or file.stat().st_size < min_bytes: # check 51 | if file.exists(): 52 | file.unlink() # remove partial downloads 53 | LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") 54 | LOGGER.info('') 55 | 56 | 57 | def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'): 58 | # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc. 59 | from utils.general import LOGGER 60 | 61 | def github_assets(repository, version='latest'): 62 | # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) 63 | if version != 'latest': 64 | version = f'tags/{version}' # i.e. tags/v7.0 65 | response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api 66 | return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets 67 | 68 | file = Path(str(file).strip().replace("'", '')) 69 | if not file.exists(): 70 | # URL specified 71 | name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. 72 | if str(file).startswith(('http:/', 'https:/')): # download 73 | url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ 74 | file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... 75 | if Path(file).is_file(): 76 | LOGGER.info(f'Found {url} locally at {file}') # file already exists 77 | else: 78 | safe_download(file=file, url=url, min_bytes=1E5) 79 | return file 80 | 81 | # GitHub assets 82 | assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default 83 | try: 84 | tag, assets = github_assets(repo, release) 85 | except Exception: 86 | try: 87 | tag, assets = github_assets(repo) # latest release 88 | except Exception: 89 | try: 90 | tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] 91 | except Exception: 92 | tag = release 93 | 94 | file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) 95 | if name in assets: 96 | url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror 97 | safe_download( 98 | file, 99 | url=f'https://github.com/{repo}/releases/download/{tag}/{name}', 100 | min_bytes=1E5, 101 | error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') 102 | 103 | return str(file) 104 | -------------------------------------------------------------------------------- /spark/downstream_d2/README.md: -------------------------------------------------------------------------------- 1 | ## About code isolation 2 | 3 | This `downstream_d2` is isolated from pre-training codes. One can treat this `downstream_d2` as an independent codebase 🛠️. 4 | 5 | 6 | ## Fine-tuned ResNet-50 weights, log files, and performance 7 | 8 |

9 | 10 | [[`weights (pre-trained by SparK)`](https://drive.google.com/file/d/1H8605HbxGvrsu4x4rIoNr-Wkd7JkxFPQ/view?usp=share_link)] 11 | [[`weights (fine-tuned on COCO)`](https://drive.google.com/file/d/1Ue7SiQ1E_AwgtYo56Fm-iUlQPZ8vIwYj/view?usp=share_link)] 12 | [[`metrics.json`](https://drive.google.com/file/d/1wfbUWh4svV8sPWya_0PAhsLHVayDQRCi/view?usp=share_link)] 13 | [[`log.txt`](https://drive.google.com/file/d/11zVo_87pe9DMAmfNQK9FUfyjQWHTRKxV/view?usp=share_link)] 14 | [[`tensorboard file`](https://drive.google.com/file/d/1aM1qj8c3-Uka1dZuYmKhgp1lNJpeMDMl/view?usp=share_link)] 15 |
16 | 17 |

18 | 19 |

20 | 21 | 22 | ## Installation [Detectron2 v0.6](https://github.com/facebookresearch/detectron2/releases/tag/v0.6) before fine-tuning ResNet on COCO 23 | 24 | 25 | 1. Let you in some python environment, e.g.: 26 | ```shell script 27 | $ conda create -n spark python=3.8 -y 28 | $ conda activate spark 29 | ``` 30 | 31 | 2. Install `detectron2==0.6` (e.g., with `torch==1.10.0` and `cuda11.3`): 32 | ```shell script 33 | $ pip install detectron2==0.6 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html 34 | ``` 35 | 36 | You can also find instructions for different pytorch/cuda versions on [this page](https://github.com/facebookresearch/detectron2/releases/tag/v0.6). 37 | 38 | 39 | 3. Put the COCO dataset folder at `downstream_d2/datasets/coco`. 40 | The folder should follow the [directory structure](https://github.com/facebookresearch/detectron2/tree/master/datasets) requried by `Detectron2`, which should look like this: 41 | ``` 42 | downstream_d2/datasets/coco: 43 | annotations/: 44 | captions_train2017.json captions_val2017.json 45 | instances_train2017.json instances_val2017.json 46 | person_keypoints_train2017.json person_keypoints_val2017.json 47 | train2017/: 48 | a_lot_images.jpg 49 | val2017/: 50 | a_lot_images.jpg 51 | ``` 52 | 53 | 54 | ## Training from pre-trained checkpoint 55 | 56 | The script file for COCO fine-tuning (object detection and instance segmentation) is [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py), 57 | which is a modification of [Detectron2's tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/v0.6/tools/train_net.py). 58 | 59 | 60 | Before fine-tuning a ResNet50 pre-trained by SparK, you should first convert our checkpoint file to Detectron2-style `.pkl` file: 61 | 62 | ```shell script 63 | $ cd /path/to/SparK/downstream_d2 64 | $ python3 convert-timm-to-d2.py /some/path/to/resnet50_1kpretrained_timm_style.pth d2-style.pkl 65 | ``` 66 | 67 | For a ResNet50, you should see a log reporting `len(state)==318`: 68 | ```text 69 | [convert] .pkl is generated! (from `/some/path/to/resnet50_1kpretrained_timm_style.pth`, to `d2-style.pkl`, len(state)==318) 70 | ``` 71 | 72 | Then run fine-tuning on single machine with 8 gpus: 73 | 74 | ```shell script 75 | $ cd /path/to/SparK/downstream_d2 76 | $ python3 ./train_net.py --resume --num-gpus 8 --config-file ./configs/coco_R_50_FPN_CONV_1x_moco_adam.yaml \ 77 | MODEL.WEIGHTS d2-style.pkl \ 78 | OUTPUT_DIR 79 | ``` 80 | 81 | For multiple machines, plus these args: 82 | ```shell script 83 | --num-machines --machine-rank --dist-url 84 | ``` 85 | 86 | In `` you'll see the log files generated by `Detectron2`. 87 | 88 | 89 | ## Details: how we modify the official Detectron2's [tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/v0.6/tools/train_net.py) to get our [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py) 90 | 91 | 1. We add two new hyperparameters: 92 | - str `SOLVER.OPTIMIZER`: use 'ADAM' (the same as 'ADAMW') or 'SGD' optimizer 93 | - float `SOLVER.LR_DECAY`: the decay ratio (from 0. to 1.) of layer-wise learning rate decay trick 94 | 95 | 2. We implement layer-wise lr decay in [downstream_d2/lr_decay.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/lr_decay.py). 96 | 97 | 3. We write a script to convert our timm-style pre-trained ResNet weights to Detectron2-style in [downstream_d2/convert-timm-to-d2.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/convert-timm-to-d2.py). 98 | 99 | 4. We also add a hook for logging results to `cfg.OUTPUT_DIR/d2_coco_log.txt`. 100 | 101 | All of our modifications to the original are commented with `# [modification] ...` in [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py) or other files. 102 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 5 | """Creates or loads a YOLO model 6 | 7 | Arguments: 8 | name (str): model name 'yolov3' or path 'path/to/best.pt' 9 | pretrained (bool): load pretrained weights into the model 10 | channels (int): number of input channels 11 | classes (int): number of model classes 12 | autoshape (bool): apply YOLO .autoshape() wrapper to model 13 | verbose (bool): print all information to screen 14 | device (str, torch.device, None): device to use for model parameters 15 | 16 | Returns: 17 | YOLO model 18 | """ 19 | from pathlib import Path 20 | 21 | from models.common import AutoShape, DetectMultiBackend 22 | from models.experimental import attempt_load 23 | from models.yolo import ClassificationModel, DetectionModel, SegmentationModel 24 | from utils.downloads import attempt_download 25 | from utils.general import LOGGER, check_requirements, intersect_dicts, logging 26 | from utils.torch_utils import select_device 27 | 28 | if not verbose: 29 | LOGGER.setLevel(logging.WARNING) 30 | check_requirements(exclude=('opencv-python', 'tensorboard', 'thop')) 31 | name = Path(name) 32 | path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name # checkpoint path 33 | try: 34 | device = select_device(device) 35 | if pretrained and channels == 3 and classes == 80: 36 | try: 37 | model = DetectMultiBackend(path, device=device, fuse=autoshape) # detection model 38 | if autoshape: 39 | if model.pt and isinstance(model.model, ClassificationModel): 40 | LOGGER.warning('WARNING ⚠️ YOLO ClassificationModel is not yet AutoShape compatible. ' 41 | 'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).') 42 | elif model.pt and isinstance(model.model, SegmentationModel): 43 | LOGGER.warning('WARNING ⚠️ YOLO SegmentationModel is not yet AutoShape compatible. ' 44 | 'You will not be able to run inference with this model.') 45 | else: 46 | model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS 47 | except Exception: 48 | model = attempt_load(path, device=device, fuse=False) # arbitrary model 49 | else: 50 | cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path 51 | model = DetectionModel(cfg, channels, classes) # create model 52 | if pretrained: 53 | ckpt = torch.load(attempt_download(path), map_location=device) # load 54 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 55 | csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect 56 | model.load_state_dict(csd, strict=False) # load 57 | if len(ckpt['model'].names) == classes: 58 | model.names = ckpt['model'].names # set class names attribute 59 | if not verbose: 60 | LOGGER.setLevel(logging.INFO) # reset to default 61 | return model.to(device) 62 | 63 | except Exception as e: 64 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 65 | s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.' 66 | raise Exception(s) from e 67 | 68 | 69 | def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None): 70 | # YOLO custom or local model 71 | return _create(path, autoshape=autoshape, verbose=_verbose, device=device) 72 | 73 | 74 | if __name__ == '__main__': 75 | import argparse 76 | from pathlib import Path 77 | 78 | import numpy as np 79 | from PIL import Image 80 | 81 | from utils.general import cv2, print_args 82 | 83 | # Argparser 84 | parser = argparse.ArgumentParser() 85 | parser.add_argument('--model', type=str, default='yolo', help='model name') 86 | opt = parser.parse_args() 87 | print_args(vars(opt)) 88 | 89 | # Model 90 | model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True) 91 | # model = custom(path='path/to/model.pt') # custom 92 | 93 | # Images 94 | imgs = [ 95 | 'data/images/zidane.jpg', # filename 96 | Path('data/images/zidane.jpg'), # Path 97 | 'https://ultralytics.com/images/zidane.jpg', # URI 98 | cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV 99 | Image.open('data/images/bus.jpg'), # PIL 100 | np.zeros((320, 640, 3))] # numpy 101 | 102 | # Inference 103 | results = model(imgs, size=320) # batched inference 104 | 105 | # Results 106 | results.print() 107 | results.save() 108 | -------------------------------------------------------------------------------- /spark/pretrain/models/custom.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | from typing import List 10 | from timm.models.registry import register_model 11 | import torch 12 | from torch import nn 13 | import sys 14 | from HG.HGBlock import HGStem,HGBlock 15 | from HG.block import DWConv 16 | from v9back.common import * 17 | 18 | 19 | class YourConvNet(nn.Module): 20 | def __init__(self, *args, **kwargs): 21 | super().__init__() 22 | 23 | self.mlist=nn.ModuleList( 24 | [Silence(), 25 | Bbackbone(), 26 | ] 27 | ) 28 | self.d0= Down0(64) 29 | self.d1 = Down1(128) 30 | self.d2 = Down2(256) 31 | self.d3 = Down3(512) 32 | self.d4 = Down4(1024) 33 | self.alld = [self.d0,self.d1,self.d2,self.d3,self.d4] 34 | self.cblinear1 = CBLinear(64,[64]) 35 | self.cblinear3 = CBLinear(128, [64, 128]) 36 | self.cblinear5 = CBLinear(256, [64, 128, 256]) 37 | self.cblinear7 = CBLinear(512, [64, 128, 256, 512]) 38 | self.cblinear9 = CBLinear(1024, [64, 128, 256, 512, 1024]) 39 | self.allcblinear = [self.cblinear1,self.cblinear3,self.cblinear5,self.cblinear7,self.cblinear9] 40 | # # conv down 1 41 | self.conv1 = Conv(3, 64, 3, 2 ) 42 | self.cbfuse1 = CBFuse([0, 0, 0, 0, 0]) 43 | 44 | ## conv down 2 45 | self.conv2= Conv(64, 128, 3, 2) 46 | self.cbfuse2 = CBFuse([1, 1, 1, 1]) 47 | self.rep2 = RepNCSPELAN4(128, 256, 128, 64, 2) 48 | ## avg-conv down fuse 1 49 | self.adown3 = ADown(256, 256) 50 | self.cbfuse3 = CBFuse([2, 2, 2]) 51 | self.rep3 = RepNCSPELAN4(256, 512, 256, 128, 2) 52 | 53 | ## avg-conv down fuse 2 54 | self.adown4 = ADown(512, 512) 55 | self.cbfuse4 = CBFuse([3,3]) 56 | self.rep4 = RepNCSPELAN4(512, 1024, 512, 256, 2) 57 | 58 | ## avg-conv down fuse 3 59 | self.adown5 = ADown(1024, 1024) 60 | self.cbfuse5 = CBFuse([4]) 61 | self.rep5 = RepNCSPELAN4(1024, 1024, 512, 256, 2) 62 | 63 | def get_downsample_ratio(self) -> int: 64 | return 32 65 | 66 | def get_feature_map_channels(self) -> List[int]: 67 | return [ 256,512,1024,1024] 68 | 69 | def forward(self, x: torch.Tensor, hierarchical=False): 70 | if hierarchical: 71 | origin = x.clone() 72 | ls = [] 73 | tmp = [] 74 | bx = None 75 | for index,modules in enumerate( self.mlist): 76 | x = modules(x) 77 | if index ==1: 78 | bx = x 79 | for i in range(5): 80 | tmp.append(self.allcblinear[i](self.alld[i](bx))) 81 | 82 | fuse1 = self.cbfuse1([tmp[0],tmp[1],tmp[2],tmp[3],tmp[4],self.conv1(origin)]) 83 | fuse2 = self.cbfuse2([tmp[1],tmp[2],tmp[3],tmp[4],self.conv2(fuse1)]) 84 | fuse2 = self.rep2(fuse2) 85 | 86 | fuse3= self.cbfuse3([ tmp[2], tmp[3], tmp[4], self.adown3(fuse2)]) 87 | fuse3 = self.rep3(fuse3) 88 | 89 | fuse4 = self.cbfuse4([tmp[3], tmp[4], self.adown4(fuse3)]) 90 | fuse4 = self.rep4(fuse4) 91 | 92 | fuse5 = self.cbfuse5([tmp[4], self.adown5(fuse4)]) 93 | fuse5 = self.rep5(fuse5) 94 | 95 | ls.append(fuse2) 96 | ls.append(fuse3) 97 | ls.append(fuse4) 98 | ls.append(fuse5) 99 | return ls 100 | else: 101 | for modules in self.mlist: 102 | x = modules(x) 103 | return x 104 | 105 | 106 | @register_model 107 | def V9back(pretrained=False, **kwargs): 108 | return YourConvNet(**kwargs) 109 | 110 | 111 | @torch.no_grad() 112 | def convnet_test(): 113 | from timm.models import create_model 114 | cnn = create_model('V9back') 115 | print('get_downsample_ratio:', cnn.get_downsample_ratio()) 116 | print('get_feature_map_channels:', cnn.get_feature_map_channels()) 117 | 118 | downsample_ratio = cnn.get_downsample_ratio() 119 | feature_map_channels = cnn.get_feature_map_channels() 120 | 121 | # check the forward function 122 | B, C, H, W = 4, 3, 224, 224 123 | inp = torch.rand(B, C, H, W) 124 | feats = cnn(inp, hierarchical=True) 125 | assert isinstance(feats, list) 126 | assert len(feats) == len(feature_map_channels) 127 | print([tuple(t.shape) for t in feats]) 128 | 129 | # check the downsample ratio 130 | feats = cnn(inp, hierarchical=True) 131 | assert feats[-1].shape[-2] == H // downsample_ratio 132 | assert feats[-1].shape[-1] == W // downsample_ratio 133 | 134 | # check the channel number 135 | for feat, ch in zip(feats, feature_map_channels): 136 | assert feat.ndim == 4 137 | assert feat.shape[1] == ch 138 | 139 | 140 | if __name__ == '__main__': 141 | convnet_test() 142 | -------------------------------------------------------------------------------- /utils/loggers/comet/comet_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from urllib.parse import urlparse 4 | 5 | try: 6 | import comet_ml 7 | except (ModuleNotFoundError, ImportError): 8 | comet_ml = None 9 | 10 | import yaml 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | COMET_PREFIX = "comet://" 15 | COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") 16 | COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt") 17 | 18 | 19 | def download_model_checkpoint(opt, experiment): 20 | model_dir = f"{opt.project}/{experiment.name}" 21 | os.makedirs(model_dir, exist_ok=True) 22 | 23 | model_name = COMET_MODEL_NAME 24 | model_asset_list = experiment.get_model_asset_list(model_name) 25 | 26 | if len(model_asset_list) == 0: 27 | logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}") 28 | return 29 | 30 | model_asset_list = sorted( 31 | model_asset_list, 32 | key=lambda x: x["step"], 33 | reverse=True, 34 | ) 35 | logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list} 36 | 37 | resource_url = urlparse(opt.weights) 38 | checkpoint_filename = resource_url.query 39 | 40 | if checkpoint_filename: 41 | asset_id = logged_checkpoint_map.get(checkpoint_filename) 42 | else: 43 | asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME) 44 | checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME 45 | 46 | if asset_id is None: 47 | logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment") 48 | return 49 | 50 | try: 51 | logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}") 52 | asset_filename = checkpoint_filename 53 | 54 | model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 55 | model_download_path = f"{model_dir}/{asset_filename}" 56 | with open(model_download_path, "wb") as f: 57 | f.write(model_binary) 58 | 59 | opt.weights = model_download_path 60 | 61 | except Exception as e: 62 | logger.warning("COMET WARNING: Unable to download checkpoint from Comet") 63 | logger.exception(e) 64 | 65 | 66 | def set_opt_parameters(opt, experiment): 67 | """Update the opts Namespace with parameters 68 | from Comet's ExistingExperiment when resuming a run 69 | 70 | Args: 71 | opt (argparse.Namespace): Namespace of command line options 72 | experiment (comet_ml.APIExperiment): Comet API Experiment object 73 | """ 74 | asset_list = experiment.get_asset_list() 75 | resume_string = opt.resume 76 | 77 | for asset in asset_list: 78 | if asset["fileName"] == "opt.yaml": 79 | asset_id = asset["assetId"] 80 | asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 81 | opt_dict = yaml.safe_load(asset_binary) 82 | for key, value in opt_dict.items(): 83 | setattr(opt, key, value) 84 | opt.resume = resume_string 85 | 86 | # Save hyperparameters to YAML file 87 | # Necessary to pass checks in training script 88 | save_dir = f"{opt.project}/{experiment.name}" 89 | os.makedirs(save_dir, exist_ok=True) 90 | 91 | hyp_yaml_path = f"{save_dir}/hyp.yaml" 92 | with open(hyp_yaml_path, "w") as f: 93 | yaml.dump(opt.hyp, f) 94 | opt.hyp = hyp_yaml_path 95 | 96 | 97 | def check_comet_weights(opt): 98 | """Downloads model weights from Comet and updates the 99 | weights path to point to saved weights location 100 | 101 | Args: 102 | opt (argparse.Namespace): Command Line arguments passed 103 | to YOLOv5 training script 104 | 105 | Returns: 106 | None/bool: Return True if weights are successfully downloaded 107 | else return None 108 | """ 109 | if comet_ml is None: 110 | return 111 | 112 | if isinstance(opt.weights, str): 113 | if opt.weights.startswith(COMET_PREFIX): 114 | api = comet_ml.API() 115 | resource = urlparse(opt.weights) 116 | experiment_path = f"{resource.netloc}{resource.path}" 117 | experiment = api.get(experiment_path) 118 | download_model_checkpoint(opt, experiment) 119 | return True 120 | 121 | return None 122 | 123 | 124 | def check_comet_resume(opt): 125 | """Restores run parameters to its original state based on the model checkpoint 126 | and logged Experiment parameters. 127 | 128 | Args: 129 | opt (argparse.Namespace): Command Line arguments passed 130 | to YOLOv5 training script 131 | 132 | Returns: 133 | None/bool: Return True if the run is restored successfully 134 | else return None 135 | """ 136 | if comet_ml is None: 137 | return 138 | 139 | if isinstance(opt.resume, str): 140 | if opt.resume.startswith(COMET_PREFIX): 141 | api = comet_ml.API() 142 | resource = urlparse(opt.resume) 143 | experiment_path = f"{resource.netloc}{resource.path}" 144 | experiment = api.get(experiment_path) 145 | set_opt_parameters(opt, experiment) 146 | download_model_checkpoint(opt, experiment) 147 | 148 | return True 149 | 150 | return None 151 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/mmcv_custom/layer_decay_optimizer_constructor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import json 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor 11 | from mmcv.runner import get_dist_info 12 | 13 | 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12): 15 | 16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 17 | return 0 18 | elif var_name.startswith("backbone.downsample_layers"): 19 | stage_id = int(var_name.split('.')[2]) 20 | if stage_id == 0: 21 | layer_id = 0 22 | elif stage_id == 1: 23 | layer_id = 2 24 | elif stage_id == 2: 25 | layer_id = 3 26 | elif stage_id == 3: 27 | layer_id = num_max_layer 28 | return layer_id 29 | elif var_name.startswith("backbone.stages"): 30 | stage_id = int(var_name.split('.')[2]) 31 | block_id = int(var_name.split('.')[3]) 32 | if stage_id == 0: 33 | layer_id = 1 34 | elif stage_id == 1: 35 | layer_id = 2 36 | elif stage_id == 2: 37 | layer_id = 3 + block_id // 3 38 | elif stage_id == 3: 39 | layer_id = num_max_layer 40 | return layer_id 41 | else: 42 | return num_max_layer + 1 43 | 44 | 45 | def get_num_layer_stage_wise(var_name, num_max_layer): 46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 47 | return 0 48 | elif var_name.startswith("backbone.downsample_layers"): 49 | return 0 50 | elif var_name.startswith("backbone.stages"): 51 | stage_id = int(var_name.split('.')[2]) 52 | return stage_id + 1 53 | else: 54 | return num_max_layer - 1 55 | 56 | 57 | @OPTIMIZER_BUILDERS.register_module() 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): 59 | def add_params(self, params, module, prefix='', is_dcn_module=None): 60 | """Add all parameters of module to the params list. 61 | The parameters of the given module will be added to the list of param 62 | groups, with specific rules defined by paramwise_cfg. 63 | Args: 64 | params (list[dict]): A list of param groups, it will be modified 65 | in place. 66 | module (nn.Module): The module to be added. 67 | prefix (str): The prefix of the module 68 | is_dcn_module (int|float|None): If the current module is a 69 | submodule of DCN, `is_dcn_module` will be passed to 70 | control conv_offset layer's learning rate. Defaults to None. 71 | """ 72 | parameter_groups = {} 73 | print(self.paramwise_cfg) 74 | num_layers = self.paramwise_cfg.get('num_layers') + 2 75 | decay_rate = self.paramwise_cfg.get('decay_rate') 76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise") 77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers)) 78 | weight_decay = self.base_wd 79 | 80 | for name, param in module.named_parameters(): 81 | if not param.requires_grad: 82 | continue # frozen weights 83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'): 84 | group_name = "no_decay" 85 | this_weight_decay = 0. 86 | else: 87 | group_name = "decay" 88 | this_weight_decay = weight_decay 89 | 90 | if decay_type == "layer_wise": 91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers')) 92 | elif decay_type == "stage_wise": 93 | layer_id = get_num_layer_stage_wise(name, num_layers) 94 | 95 | group_name = "layer_%d_%s" % (layer_id, group_name) 96 | 97 | if group_name not in parameter_groups: 98 | scale = decay_rate ** (num_layers - layer_id - 1) 99 | 100 | parameter_groups[group_name] = { 101 | "weight_decay": this_weight_decay, 102 | "params": [], 103 | "param_names": [], 104 | "lr_scale": scale, 105 | "group_name": group_name, 106 | "lr": scale * self.base_lr, 107 | } 108 | 109 | parameter_groups[group_name]["params"].append(param) 110 | parameter_groups[group_name]["param_names"].append(name) 111 | rank, _ = get_dist_info() 112 | if rank == 0: 113 | to_display = {} 114 | for key in parameter_groups: 115 | to_display[key] = { 116 | "param_names": parameter_groups[key]["param_names"], 117 | "lr_scale": parameter_groups[key]["lr_scale"], 118 | "lr": parameter_groups[key]["lr"], 119 | "weight_decay": parameter_groups[key]["weight_decay"], 120 | } 121 | print("Param groups = %s" % json.dumps(to_display, indent=2)) 122 | 123 | params.extend(parameter_groups.values()) 124 | -------------------------------------------------------------------------------- /utils/loggers/clearml/hpo.py: -------------------------------------------------------------------------------- 1 | from clearml import Task 2 | # Connecting ClearML with the current process, 3 | # from here on everything is logged automatically 4 | from clearml.automation import HyperParameterOptimizer, UniformParameterRange 5 | from clearml.automation.optuna import OptimizerOptuna 6 | 7 | task = Task.init(project_name='Hyper-Parameter Optimization', 8 | task_name='YOLOv5', 9 | task_type=Task.TaskTypes.optimizer, 10 | reuse_last_task_id=False) 11 | 12 | # Example use case: 13 | optimizer = HyperParameterOptimizer( 14 | # This is the experiment we want to optimize 15 | base_task_id='', 16 | # here we define the hyper-parameters to optimize 17 | # Notice: The parameter name should exactly match what you see in the UI: / 18 | # For Example, here we see in the base experiment a section Named: "General" 19 | # under it a parameter named "batch_size", this becomes "General/batch_size" 20 | # If you have `argparse` for example, then arguments will appear under the "Args" section, 21 | # and you should instead pass "Args/batch_size" 22 | hyper_parameters=[ 23 | UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), 24 | UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), 25 | UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), 26 | UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), 27 | UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), 28 | UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), 29 | UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), 30 | UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), 31 | UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), 32 | UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), 33 | UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), 34 | UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), 35 | UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), 36 | UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), 37 | UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), 38 | UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), 39 | UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), 40 | UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), 41 | UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), 42 | UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), 43 | UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), 44 | UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), 45 | UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), 46 | UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), 47 | UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), 48 | UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), 49 | UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), 50 | UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], 51 | # this is the objective metric we want to maximize/minimize 52 | objective_metric_title='metrics', 53 | objective_metric_series='mAP_0.5', 54 | # now we decide if we want to maximize it or minimize it (accuracy we maximize) 55 | objective_metric_sign='max', 56 | # let us limit the number of concurrent experiments, 57 | # this in turn will make sure we do dont bombard the scheduler with experiments. 58 | # if we have an auto-scaler connected, this, by proxy, will limit the number of machine 59 | max_number_of_concurrent_tasks=1, 60 | # this is the optimizer class (actually doing the optimization) 61 | # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) 62 | optimizer_class=OptimizerOptuna, 63 | # If specified only the top K performing Tasks will be kept, the others will be automatically archived 64 | save_top_k_tasks_only=5, # 5, 65 | compute_time_limit=None, 66 | total_max_jobs=20, 67 | min_iteration_per_job=None, 68 | max_iteration_per_job=None, 69 | ) 70 | 71 | # report every 10 seconds, this is way too often, but we are testing here 72 | optimizer.set_report_period(10 / 60) 73 | # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent 74 | # an_optimizer.start_locally(job_complete_callback=job_complete_callback) 75 | # set the time limit for the optimization process (2 hours) 76 | optimizer.set_time_limit(in_minutes=120.0) 77 | # Start the optimization process in the local environment 78 | optimizer.start_locally() 79 | # wait until process is done (notice we are controlling the optimization process in the background) 80 | optimizer.wait() 81 | # make sure background optimization stopped 82 | optimizer.stop() 83 | 84 | print('We are done, good bye') 85 | -------------------------------------------------------------------------------- /spark/downstream_mmdet/mmcv_custom/customized_text.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import datetime 10 | from collections import OrderedDict 11 | 12 | import torch 13 | 14 | import mmcv 15 | from mmcv.runner import HOOKS 16 | from mmcv.runner import TextLoggerHook 17 | 18 | 19 | @HOOKS.register_module() 20 | class CustomizedTextLoggerHook(TextLoggerHook): 21 | """Customized Text Logger hook. 22 | 23 | This logger prints out both lr and layer_0_lr. 24 | 25 | """ 26 | 27 | def _log_info(self, log_dict, runner): 28 | # print exp name for users to distinguish experiments 29 | # at every ``interval_exp_name`` iterations and the end of each epoch 30 | if runner.meta is not None and 'exp_name' in runner.meta: 31 | if (self.every_n_iters(runner, self.interval_exp_name)) or ( 32 | self.by_epoch and self.end_of_epoch(runner)): 33 | exp_info = f'Exp name: {runner.meta["exp_name"]}' 34 | runner.logger.info(exp_info) 35 | 36 | if log_dict['mode'] == 'train': 37 | lr_str = {} 38 | for lr_type in ['lr', 'layer_0_lr']: 39 | if isinstance(log_dict[lr_type], dict): 40 | lr_str[lr_type] = [] 41 | for k, val in log_dict[lr_type].items(): 42 | lr_str.append(f'{lr_type}_{k}: {val:.3e}') 43 | lr_str[lr_type] = ' '.join(lr_str) 44 | else: 45 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}' 46 | 47 | # by epoch: Epoch [4][100/1000] 48 | # by iter: Iter [100/100000] 49 | if self.by_epoch: 50 | log_str = f'Epoch [{log_dict["epoch"]}]' \ 51 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' 52 | else: 53 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' 54 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, ' 55 | 56 | if 'time' in log_dict.keys(): 57 | self.time_sec_tot += (log_dict['time'] * self.interval) 58 | time_sec_avg = self.time_sec_tot / ( 59 | runner.iter - self.start_iter + 1) 60 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) 61 | eta_str = str(datetime.timedelta(seconds=int(eta_sec))) 62 | log_str += f'eta: {eta_str}, ' 63 | log_str += f'time: {log_dict["time"]:.3f}, ' \ 64 | f'data_time: {log_dict["data_time"]:.3f}, ' 65 | # statistic memory 66 | if torch.cuda.is_available(): 67 | log_str += f'memory: {log_dict["memory"]}, ' 68 | else: 69 | # val/test time 70 | # here 1000 is the length of the val dataloader 71 | # by epoch: Epoch[val] [4][1000] 72 | # by iter: Iter[val] [1000] 73 | if self.by_epoch: 74 | log_str = f'Epoch({log_dict["mode"]}) ' \ 75 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' 76 | else: 77 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' 78 | 79 | log_items = [] 80 | for name, val in log_dict.items(): 81 | # TODO: resolve this hack 82 | # these items have been in log_str 83 | if name in [ 84 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time', 85 | 'memory', 'epoch' 86 | ]: 87 | continue 88 | if isinstance(val, float): 89 | val = f'{val:.4f}' 90 | log_items.append(f'{name}: {val}') 91 | log_str += ', '.join(log_items) 92 | 93 | runner.logger.info(log_str) 94 | 95 | 96 | def log(self, runner): 97 | if 'eval_iter_num' in runner.log_buffer.output: 98 | # this doesn't modify runner.iter and is regardless of by_epoch 99 | cur_iter = runner.log_buffer.output.pop('eval_iter_num') 100 | else: 101 | cur_iter = self.get_iter(runner, inner_iter=True) 102 | 103 | log_dict = OrderedDict( 104 | mode=self.get_mode(runner), 105 | epoch=self.get_epoch(runner), 106 | iter=cur_iter) 107 | 108 | # record lr and layer_0_lr 109 | cur_lr = runner.current_lr() 110 | if isinstance(cur_lr, list): 111 | log_dict['layer_0_lr'] = min(cur_lr) 112 | log_dict['lr'] = max(cur_lr) 113 | else: 114 | assert isinstance(cur_lr, dict) 115 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {} 116 | for k, lr_ in cur_lr.items(): 117 | assert isinstance(lr_, list) 118 | log_dict['layer_0_lr'].update({k: min(lr_)}) 119 | log_dict['lr'].update({k: max(lr_)}) 120 | 121 | if 'time' in runner.log_buffer.output: 122 | # statistic memory 123 | if torch.cuda.is_available(): 124 | log_dict['memory'] = self._get_max_memory(runner) 125 | 126 | log_dict = dict(log_dict, **runner.log_buffer.output) 127 | 128 | self._log_info(log_dict, runner) 129 | self._dump_log(log_dict, runner) 130 | return log_dict 131 | -------------------------------------------------------------------------------- /spark/pretrain/models/convnext.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | # This file is basically a copy of: https://github.com/facebookresearch/ConvNeXt/blob/06f7b05f922e21914916406141f50f82b4a15852/models/convnext.py 8 | from typing import List 9 | 10 | import torch 11 | import torch.nn as nn 12 | from timm.models.layers import trunc_normal_ 13 | from timm.models.registry import register_model 14 | 15 | from encoder import SparseConvNeXtBlock, SparseConvNeXtLayerNorm 16 | 17 | 18 | class ConvNeXt(nn.Module): 19 | r""" ConvNeXt 20 | A PyTorch impl of : `A ConvNet for the 2020s` - 21 | https://arxiv.org/pdf/2201.03545.pdf 22 | Args: 23 | in_chans (int): Number of input image channels. Default: 3 24 | num_classes (int): Number of classes for classification head. Default: 1000 25 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] 26 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] 27 | drop_path_rate (float): Stochastic depth rate. Default: 0. 28 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 29 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. 30 | """ 31 | 32 | def __init__(self, in_chans=3, num_classes=1000, 33 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 34 | layer_scale_init_value=1e-6, head_init_scale=1., global_pool='avg', 35 | sparse=True, 36 | ): 37 | super().__init__() 38 | self.dims: List[int] = dims 39 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 40 | stem = nn.Sequential( 41 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4), 42 | SparseConvNeXtLayerNorm(dims[0], eps=1e-6, data_format="channels_first", sparse=sparse) 43 | ) 44 | self.downsample_layers.append(stem) 45 | for i in range(3): 46 | downsample_layer = nn.Sequential( 47 | SparseConvNeXtLayerNorm(dims[i], eps=1e-6, data_format="channels_first", sparse=sparse), 48 | nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2), 49 | ) 50 | self.downsample_layers.append(downsample_layer) 51 | 52 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 53 | self.drop_path_rate = drop_path_rate 54 | self.layer_scale_init_value = layer_scale_init_value 55 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 56 | cur = 0 57 | for i in range(4): 58 | stage = nn.Sequential( 59 | *[SparseConvNeXtBlock(dim=dims[i], drop_path=dp_rates[cur + j], 60 | layer_scale_init_value=layer_scale_init_value, sparse=sparse) for j in range(depths[i])] 61 | ) 62 | self.stages.append(stage) 63 | cur += depths[i] 64 | self.depths = depths 65 | 66 | self.apply(self._init_weights) 67 | if num_classes > 0: 68 | self.norm = SparseConvNeXtLayerNorm(dims[-1], eps=1e-6, sparse=False) # final norm layer for LE/FT; should not be sparse 69 | self.fc = nn.Linear(dims[-1], num_classes) 70 | else: 71 | self.norm = nn.Identity() 72 | self.fc = nn.Identity() 73 | 74 | def _init_weights(self, m): 75 | if isinstance(m, (nn.Conv2d, nn.Linear)): 76 | trunc_normal_(m.weight, std=.02) 77 | nn.init.constant_(m.bias, 0) 78 | 79 | def get_downsample_ratio(self) -> int: 80 | return 32 81 | 82 | def get_feature_map_channels(self) -> List[int]: 83 | return self.dims 84 | 85 | def forward(self, x, hierarchical=False): 86 | if hierarchical: 87 | ls = [] 88 | for i in range(4): 89 | x = self.downsample_layers[i](x) 90 | x = self.stages[i](x) 91 | ls.append(x) 92 | return ls 93 | else: 94 | return self.fc(self.norm(x.mean([-2, -1]))) # (B, C, H, W) =mean=> (B, C) =norm&fc=> (B, NumCls) 95 | 96 | def get_classifier(self): 97 | return self.fc 98 | 99 | def extra_repr(self): 100 | return f'drop_path_rate={self.drop_path_rate}, layer_scale_init_value={self.layer_scale_init_value:g}' 101 | 102 | 103 | @register_model 104 | def convnext_tiny(pretrained=False, in_22k=False, **kwargs): 105 | model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs) 106 | return model 107 | 108 | 109 | @register_model 110 | def convnext_small(pretrained=False, in_22k=False, **kwargs): 111 | model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs) 112 | return model 113 | 114 | 115 | @register_model 116 | def convnext_base(pretrained=False, in_22k=False, **kwargs): 117 | model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs) 118 | return model 119 | 120 | 121 | @register_model 122 | def convnext_large(pretrained=False, in_22k=False, **kwargs): 123 | model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs) 124 | return model 125 | 126 | -------------------------------------------------------------------------------- /spark/downstream_d2/lr_decay.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Set, Optional, Callable, Any 2 | import torch 3 | import copy 4 | 5 | from detectron2.solver.build import reduce_param_groups 6 | 7 | 8 | def lr_factor_func(para_name: str, is_resnet50, dec: float, debug=False) -> float: 9 | if dec == 0: 10 | dec = 1. 11 | 12 | N = 5 if is_resnet50 else 11 13 | if '.stem.' in para_name: 14 | layer_id = 0 15 | elif '.res' in para_name: 16 | ls = para_name.split('.res')[1].split('.') 17 | if ls[0].isnumeric() and ls[1].isnumeric(): 18 | stage_id, block_id = int(ls[0]), int(ls[1]) 19 | if stage_id == 2: # res2 20 | layer_id = 1 21 | elif stage_id == 3: # res3 22 | layer_id = 2 23 | elif stage_id == 4: # res4 24 | layer_id = 3 + block_id // 3 # 3, 4 or 4, 5 25 | else: # res5 26 | layer_id = N 27 | else: 28 | assert para_name.startswith('roi_heads.res5.norm.') 29 | layer_id = N + 1 # roi_heads.res5.norm.weight and roi_heads.res5.norm.bias of C4 30 | else: 31 | layer_id = N + 1 32 | 33 | exp = N + 1 - layer_id 34 | return f'{dec:g} ** {exp}' if debug else dec ** exp 35 | 36 | 37 | # [modification] see: https://github.com/facebookresearch/detectron2/blob/v0.6/detectron2/solver/build.py#L134 38 | # add the `lr_factor_func` to implement lr decay 39 | def get_default_optimizer_params( 40 | model: torch.nn.Module, 41 | base_lr: Optional[float] = None, 42 | weight_decay: Optional[float] = None, 43 | weight_decay_norm: Optional[float] = None, 44 | bias_lr_factor: Optional[float] = 1.0, 45 | weight_decay_bias: Optional[float] = None, 46 | lr_factor_func: Optional[Callable] = None, 47 | overrides: Optional[Dict[str, Dict[str, float]]] = None, 48 | ) -> List[Dict[str, Any]]: 49 | """ 50 | Get default param list for optimizer, with support for a few types of 51 | overrides. If no overrides needed, this is equivalent to `model.parameters()`. 52 | 53 | Args: 54 | base_lr: lr for every group by default. Can be omitted to use the one in optimizer. 55 | weight_decay: weight decay for every group by default. Can be omitted to use the one 56 | in optimizer. 57 | weight_decay_norm: override weight decay for params in normalization layers 58 | bias_lr_factor: multiplier of lr for bias parameters. 59 | weight_decay_bias: override weight decay for bias parameters. 60 | lr_factor_func: function to calculate lr decay rate by mapping the parameter names to 61 | corresponding lr decay rate. Note that setting this option requires 62 | also setting ``base_lr``. 63 | overrides: if not `None`, provides values for optimizer hyperparameters 64 | (LR, weight decay) for module parameters with a given name; e.g. 65 | ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and 66 | weight decay values for all module parameters named `embedding`. 67 | 68 | For common detection models, ``weight_decay_norm`` is the only option 69 | needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings 70 | from Detectron1 that are not found useful. 71 | 72 | Example: 73 | :: 74 | torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0), 75 | lr=0.01, weight_decay=1e-4, momentum=0.9) 76 | """ 77 | if overrides is None: 78 | overrides = {} 79 | defaults = {} 80 | if base_lr is not None: 81 | defaults["lr"] = base_lr 82 | if weight_decay is not None: 83 | defaults["weight_decay"] = weight_decay 84 | bias_overrides = {} 85 | if bias_lr_factor is not None and bias_lr_factor != 1.0: 86 | # NOTE: unlike Detectron v1, we now by default make bias hyperparameters 87 | # exactly the same as regular weights. 88 | if base_lr is None: 89 | raise ValueError("bias_lr_factor requires base_lr") 90 | bias_overrides["lr"] = base_lr * bias_lr_factor 91 | if weight_decay_bias is not None: 92 | bias_overrides["weight_decay"] = weight_decay_bias 93 | if len(bias_overrides): 94 | if "bias" in overrides: 95 | raise ValueError("Conflicting overrides for 'bias'") 96 | overrides["bias"] = bias_overrides 97 | if lr_factor_func is not None: 98 | if base_lr is None: 99 | raise ValueError("lr_factor_func requires base_lr") 100 | norm_module_types = ( 101 | torch.nn.BatchNorm1d, 102 | torch.nn.BatchNorm2d, 103 | torch.nn.BatchNorm3d, 104 | torch.nn.SyncBatchNorm, 105 | # NaiveSyncBatchNorm inherits from BatchNorm2d 106 | torch.nn.GroupNorm, 107 | torch.nn.InstanceNorm1d, 108 | torch.nn.InstanceNorm2d, 109 | torch.nn.InstanceNorm3d, 110 | torch.nn.LayerNorm, 111 | torch.nn.LocalResponseNorm, 112 | ) 113 | params: List[Dict[str, Any]] = [] 114 | memo: Set[torch.nn.parameter.Parameter] = set() 115 | for module_name, module in model.named_modules(): 116 | for module_param_name, value in module.named_parameters(recurse=False): 117 | if not value.requires_grad: 118 | continue 119 | # Avoid duplicating parameters 120 | if value in memo: 121 | continue 122 | memo.add(value) 123 | 124 | hyperparams = copy.copy(defaults) 125 | if isinstance(module, norm_module_types) and weight_decay_norm is not None: 126 | hyperparams["weight_decay"] = weight_decay_norm 127 | if lr_factor_func is not None: 128 | hyperparams["lr"] *= lr_factor_func(f"{module_name}.{module_param_name}") 129 | 130 | hyperparams.update(overrides.get(module_param_name, {})) 131 | params.append({"params": [value], **hyperparams}) 132 | return reduce_param_groups(params) 133 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/arg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import json 8 | import os 9 | import sys 10 | 11 | from tap import Tap 12 | 13 | HP_DEFAULT_NAMES = ['bs', 'ep', 'wp_ep', 'opt', 'base_lr', 'lr_scale', 'wd', 'mixup', 'rep_aug', 'drop_path', 'ema'] 14 | HP_DEFAULT_VALUES = { 15 | 'convnext_small': (4096, 400, 20, 'adam', 0.0002, 0.7, 0.01, 0.8, 3, 0.3, 0.9999), 16 | 'convnext_base': (4096, 400, 20, 'adam', 0.0001, 0.7, 0.01, 0.8, 3, 0.4, 0.9999), 17 | 'convnext_large': (4096, 200, 10, 'adam', 0.0001, 0.7, 0.02, 0.8, 3, 0.5, 0.9999), 18 | 'convnext_large_384': (1024, 200, 20, 'adam', 0.00006, 0.7, 0.01, 0.8, 3, 0.5, 0.99995), 19 | 20 | 'resnet50': (4096, 300, 5, 'lamb', 0.002, 0.7, 0.02, 0.1, 0, 0.05, 0.9999), 21 | 'resnet101': (4096, 300, 5, 'lamb', 0.001, 0.8, 0.02, 0.1, 0, 0.2, 0.9999), 22 | 'resnet152': (4096, 300, 5, 'lamb', 0.001, 0.8, 0.02, 0.1, 0, 0.2, 0.9999), 23 | 'resnet200': (4096, 300, 5, 'lamb', 0.001, 0.8, 0.02, 0.1, 0, 0.2, 0.9999), 24 | } 25 | 26 | 27 | class FineTuneArgs(Tap): 28 | # environment 29 | exp_name: str 30 | exp_dir: str 31 | data_path: str 32 | model: str 33 | resume_from: str = '' # resume from some checkpoint.pth 34 | 35 | img_size: int = 640 36 | dataloader_workers: int = 8 37 | 38 | # ImageNet classification fine-tuning hyperparameters; see `HP_DEFAULT_VALUES` above for detailed default values 39 | # - batch size, epoch 40 | bs: int = 0 # global batch size (== batch_size_per_gpu * num_gpus) 41 | ep: int = 0 # number of epochs 42 | wp_ep: int = 0 # epochs for warmup 43 | 44 | # - optimization 45 | opt: str = '' # optimizer; 'adam' or 'lamb' 46 | base_lr: float = 0. # lr == base_lr * (bs) 47 | lr_scale: float = 0. # see file `lr_decay.py` for more details 48 | clip: int = -1 # use gradient clipping if clip > 0 49 | 50 | # - regularization tricks 51 | wd: float = 0. # weight decay 52 | mixup: float = 0. # use mixup if mixup > 0 53 | rep_aug: int = 0 # use repeated augmentation if rep_aug > 0 54 | drop_path: float = 0. # drop_path ratio 55 | 56 | # - other tricks 57 | ema: float = 0. # use EMA if ema > 0 58 | sbn: bool = True # use SyncBatchNorm 59 | 60 | # NO NEED TO SPECIFIED; each of these args would be updated in runtime automatically 61 | lr: float = None 62 | batch_size_per_gpu: int = 0 63 | glb_batch_size: int = 0 64 | device: str = 'cpu' 65 | world_size: int = 1 66 | global_rank: int = 0 67 | local_rank: int = 0 # we DO USE this arg 68 | is_master: bool = False 69 | is_local_master: bool = False 70 | cmd: str = ' '.join(sys.argv[1:]) 71 | commit_id: str = os.popen(f'git rev-parse HEAD').read().strip() 72 | commit_msg: str = os.popen(f'git log -1').read().strip().splitlines()[-1].strip() 73 | log_txt_name: str = '{args.exp_dir}/pretrain_log.txt' 74 | tb_lg_dir: str = '' # tensorboard log directory 75 | 76 | train_loss: float = 0. 77 | train_acc: float = 0. 78 | best_val_acc: float = 0. 79 | cur_ep: str = '' 80 | remain_time: str = '' 81 | finish_time: str = '' 82 | first_logging: bool = True 83 | 84 | def log_epoch(self): 85 | if not self.is_local_master: 86 | return 87 | 88 | if self.first_logging: 89 | self.first_logging = False 90 | with open(self.log_txt_name, 'w') as fp: 91 | json.dump({ 92 | 'name': self.exp_name, 'cmd': self.cmd, 'git_commit_id': self.commit_id, 'git_commit_msg': self.commit_msg, 93 | 'model': self.model, 94 | }, fp) 95 | fp.write('\n\n') 96 | 97 | with open(self.log_txt_name, 'a') as fp: 98 | json.dump({ 99 | 'cur_ep': self.cur_ep, 100 | 'train_L': self.train_loss, 'train_acc': self.train_acc, 101 | 'best_val_acc': self.best_val_acc, 102 | 'rema': self.remain_time, 'fini': self.finish_time, 103 | }, fp) 104 | fp.write('\n') 105 | 106 | 107 | def get_args(world_size, global_rank, local_rank, device) -> FineTuneArgs: 108 | # parse args and prepare directories 109 | args = FineTuneArgs(explicit_bool=True).parse_args() 110 | d_name, b_name = os.path.dirname(os.path.abspath(args.exp_dir)), os.path.basename(os.path.abspath(args.exp_dir)) 111 | b_name = ''.join(ch if (ch.isalnum() or ch == '-') else '_' for ch in b_name) 112 | args.exp_dir = os.path.join(d_name, b_name) 113 | os.makedirs(args.exp_dir, exist_ok=True) 114 | args.log_txt_name = os.path.join(args.exp_dir, 'finetune_log.txt') 115 | 116 | args.tb_lg_dir = args.tb_lg_dir or os.path.join(args.exp_dir, 'tensorboard_log') 117 | try: os.makedirs(args.tb_lg_dir, exist_ok=True) 118 | except: pass 119 | 120 | # fill in args.bs, args.ep, etc. with their default values (if their values are not explicitly specified, i.e., if bool(they) == False) 121 | if args.model == 'convnext_large' and args.img_size == 384: 122 | default_values = HP_DEFAULT_VALUES['convnext_large_384'] 123 | else: 124 | default_values = HP_DEFAULT_VALUES[args.model] 125 | for k, v in zip(HP_DEFAULT_NAMES, default_values): 126 | if bool(getattr(args, k)) == False: 127 | setattr(args, k, v) 128 | 129 | # update other runtime args 130 | args.world_size, args.global_rank, args.local_rank, args.device = world_size, global_rank, local_rank, device 131 | args.is_master = global_rank == 0 132 | args.is_local_master = local_rank == 0 133 | args.batch_size_per_gpu = args.bs // world_size 134 | args.glb_batch_size = args.batch_size_per_gpu * world_size 135 | args.lr = args.base_lr * args.glb_batch_size / 256 136 | 137 | return args 138 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import datetime 8 | import os 9 | import sys 10 | from functools import partial 11 | from typing import List, Tuple, Callable 12 | 13 | import pytz 14 | import torch 15 | import torch.distributed as tdist 16 | import torch.multiprocessing as tmp 17 | from timm import create_model 18 | from timm.loss import SoftTargetCrossEntropy, BinaryCrossEntropy 19 | from timm.optim import AdamW, Lamb 20 | from timm.utils import ModelEmaV2 21 | from torch.nn.parallel import DistributedDataParallel 22 | from torch.optim.optimizer import Optimizer 23 | 24 | from arg import FineTuneArgs 25 | from downstream_imagenet.mixup import BatchMixup 26 | from lr_decay import get_param_groups 27 | 28 | 29 | def time_str(for_dirname=False): 30 | return datetime.datetime.now(tz=pytz.timezone('Asia/Shanghai')).strftime('%m-%d_%H-%M-%S' if for_dirname else '[%m-%d %H:%M:%S]') 31 | 32 | 33 | def init_distributed_environ(): 34 | # ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py#L29 35 | if tmp.get_start_method(allow_none=True) is None: 36 | tmp.set_start_method('spawn') 37 | global_rank, num_gpus = int(os.environ.get('RANK', 'error')), torch.cuda.device_count() 38 | local_rank = global_rank % num_gpus 39 | torch.cuda.set_device(local_rank) 40 | 41 | tdist.init_process_group(backend='nccl') 42 | assert tdist.is_initialized(), 'torch.distributed is not initialized!' 43 | torch.backends.cudnn.benchmark = True 44 | torch.backends.cudnn.deterministic = False 45 | 46 | # print only when local_rank == 0 or print(..., force=True) 47 | import builtins as __builtin__ 48 | builtin_print = __builtin__.print 49 | 50 | def prt(msg, *args, **kwargs): 51 | force = kwargs.pop('force', False) 52 | if local_rank == 0 or force: 53 | f_back = sys._getframe().f_back 54 | file_desc = f'{f_back.f_code.co_filename:24s}'[-24:] 55 | builtin_print(f'{time_str()} ({file_desc}, line{f_back.f_lineno:-4d})=> {msg}', *args, **kwargs) 56 | 57 | __builtin__.print = prt 58 | tdist.barrier() 59 | return tdist.get_world_size(), global_rank, local_rank, torch.empty(1).cuda().device 60 | 61 | 62 | def create_model_opt(args: FineTuneArgs) -> Tuple[torch.nn.Module, Callable, torch.nn.Module, DistributedDataParallel, ModelEmaV2, Optimizer]: 63 | num_classes = 1000 64 | model_without_ddp: torch.nn.Module = create_model(args.model, num_classes=num_classes, drop_path_rate=args.drop_path).to(args.device) 65 | model_para = f'{sum(p.numel() for p in model_without_ddp.parameters() if p.requires_grad) / 1e6:.1f}M' 66 | # create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper 67 | model_ema = ModelEmaV2(model_without_ddp, decay=args.ema, device=args.device) 68 | if args.sbn: 69 | model_without_ddp = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model_without_ddp) 70 | print(f'[model={args.model}] [#para={model_para}, drop_path={args.drop_path}, ema={args.ema}] {model_without_ddp}\n') 71 | model = DistributedDataParallel(model_without_ddp, device_ids=[args.local_rank], find_unused_parameters=False, broadcast_buffers=False) 72 | model.train() 73 | opt_cls = { 74 | 'adam': AdamW, 'adamw': AdamW, 75 | 'lamb': partial(Lamb, max_grad_norm=1e7, always_adapt=True, bias_correction=False), 76 | } 77 | param_groups: List[dict] = get_param_groups(model_without_ddp, nowd_keys={'cls_token', 'pos_embed', 'mask_token', 'gamma'}, lr_scale=args.lr_scale) 78 | # param_groups[0] is like this: {'params': List[nn.Parameters], 'lr': float, 'lr_scale': float, 'weight_decay': float, 'weight_decay_scale': float} 79 | optimizer = opt_cls[args.opt](param_groups, lr=args.lr, weight_decay=0) 80 | print(f'[optimizer={type(optimizer)}]') 81 | mixup_fn = BatchMixup( 82 | mixup_alpha=args.mixup, cutmix_alpha=1.0, cutmix_minmax=None, 83 | prob=1.0, switch_prob=0.5, mode='batch', 84 | label_smoothing=0.1, num_classes=num_classes 85 | ) 86 | mixup_fn.mixup_enabled = args.mixup > 0.0 87 | if 'lamb' in args.opt: 88 | # label smoothing is solved in AdaptiveMixup with `label_smoothing`, so here smoothing=0 89 | criterion = BinaryCrossEntropy(smoothing=0, target_threshold=None) 90 | else: 91 | criterion = SoftTargetCrossEntropy() 92 | print(f'[loss_fn] {criterion}') 93 | print(f'[mixup_fn] {mixup_fn}') 94 | return criterion, mixup_fn, model_without_ddp, model, model_ema, optimizer 95 | 96 | 97 | def load_checkpoint(resume_from, model_without_ddp, ema_module, optimizer): 98 | if len(resume_from) == 0 or not os.path.exists(resume_from): 99 | raise AttributeError(f'ckpt `{resume_from}` not found!') 100 | # return 0, '[no performance_desc]' 101 | print(f'[try to resume from file `{resume_from}`]') 102 | checkpoint = torch.load(resume_from, map_location='cpu') 103 | assert checkpoint.get('is_pretrain', False) == False, 'Please do not use `*_withdecoder_1kpretrained_spark_style.pth`, which is ONLY for resuming the pretraining. Use `*_1kpretrained_timm_style.pth` or `*_1kfinetuned*.pth` instead.' 104 | 105 | ep_start, performance_desc = checkpoint.get('epoch', -1) + 1, checkpoint.get('performance_desc', '[no performance_desc]') 106 | missing, unexpected = model_without_ddp.load_state_dict(checkpoint.get('module', checkpoint), strict=False) 107 | print(f'[load_checkpoint] missing_keys={missing}') 108 | print(f'[load_checkpoint] unexpected_keys={unexpected}') 109 | print(f'[load_checkpoint] ep_start={ep_start}, performance_desc={performance_desc}') 110 | 111 | if 'optimizer' in checkpoint: 112 | optimizer.load_state_dict(checkpoint['optimizer']) 113 | if 'ema' in checkpoint: 114 | ema_module.load_state_dict(checkpoint['ema']) 115 | return ep_start, performance_desc 116 | 117 | 118 | def save_checkpoint(save_to, args, epoch, performance_desc, model_without_ddp_state, ema_state, optimizer_state): 119 | checkpoint_path = os.path.join(args.exp_dir, save_to) 120 | if args.is_local_master: 121 | to_save = { 122 | 'args': str(args), 123 | 'arch': args.model, 124 | 'epoch': epoch, 125 | 'performance_desc': performance_desc, 126 | 'module': model_without_ddp_state, 127 | 'ema': ema_state, 128 | 'optimizer': optimizer_state, 129 | 'is_pretrain': False, 130 | } 131 | torch.save(to_save, checkpoint_path) 132 | -------------------------------------------------------------------------------- /benchmarks.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import platform 3 | import sys 4 | import time 5 | from pathlib import Path 6 | 7 | import pandas as pd 8 | 9 | FILE = Path(__file__).resolve() 10 | ROOT = FILE.parents[0] # YOLO root directory 11 | if str(ROOT) not in sys.path: 12 | sys.path.append(str(ROOT)) # add ROOT to PATH 13 | # ROOT = ROOT.relative_to(Path.cwd()) # relative 14 | 15 | import export 16 | from models.experimental import attempt_load 17 | from models.yolo import SegmentationModel 18 | from segment.val import run as val_seg 19 | from utils import notebook_init 20 | from utils.general import LOGGER, check_yaml, file_size, print_args 21 | from utils.torch_utils import select_device 22 | from val import run as val_det 23 | 24 | 25 | def run( 26 | weights=ROOT / 'yolo.pt', # weights path 27 | imgsz=640, # inference size (pixels) 28 | batch_size=1, # batch size 29 | data=ROOT / 'data/coco.yaml', # dataset.yaml path 30 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 31 | half=False, # use FP16 half-precision inference 32 | test=False, # test exports only 33 | pt_only=False, # test PyTorch only 34 | hard_fail=False, # throw error on benchmark failure 35 | ): 36 | y, t = [], time.time() 37 | device = select_device(device) 38 | model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc. 39 | for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU) 40 | try: 41 | assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported 42 | assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML 43 | if 'cpu' in device.type: 44 | assert cpu, 'inference not supported on CPU' 45 | if 'cuda' in device.type: 46 | assert gpu, 'inference not supported on GPU' 47 | 48 | # Export 49 | if f == '-': 50 | w = weights # PyTorch format 51 | else: 52 | w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others 53 | assert suffix in str(w), 'export failed' 54 | 55 | # Validate 56 | if model_type == SegmentationModel: 57 | result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) 58 | metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls)) 59 | else: # DetectionModel: 60 | result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) 61 | metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls)) 62 | speed = result[2][1] # times (preprocess, inference, postprocess) 63 | y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference 64 | except Exception as e: 65 | if hard_fail: 66 | assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}' 67 | LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}') 68 | y.append([name, None, None, None]) # mAP, t_inference 69 | if pt_only and i == 0: 70 | break # break after PyTorch 71 | 72 | # Print results 73 | LOGGER.info('\n') 74 | parse_opt() 75 | notebook_init() # print system info 76 | c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', ''] 77 | py = pd.DataFrame(y, columns=c) 78 | LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)') 79 | LOGGER.info(str(py if map else py.iloc[:, :2])) 80 | if hard_fail and isinstance(hard_fail, str): 81 | metrics = py['mAP50-95'].array # values to compare to floor 82 | floor = eval(hard_fail) # minimum metric floor to pass 83 | assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}' 84 | return py 85 | 86 | 87 | def test( 88 | weights=ROOT / 'yolo.pt', # weights path 89 | imgsz=640, # inference size (pixels) 90 | batch_size=1, # batch size 91 | data=ROOT / 'data/coco128.yaml', # dataset.yaml path 92 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 93 | half=False, # use FP16 half-precision inference 94 | test=False, # test exports only 95 | pt_only=False, # test PyTorch only 96 | hard_fail=False, # throw error on benchmark failure 97 | ): 98 | y, t = [], time.time() 99 | device = select_device(device) 100 | for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable) 101 | try: 102 | w = weights if f == '-' else \ 103 | export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights 104 | assert suffix in str(w), 'export failed' 105 | y.append([name, True]) 106 | except Exception: 107 | y.append([name, False]) # mAP, t_inference 108 | 109 | # Print results 110 | LOGGER.info('\n') 111 | parse_opt() 112 | notebook_init() # print system info 113 | py = pd.DataFrame(y, columns=['Format', 'Export']) 114 | LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)') 115 | LOGGER.info(str(py)) 116 | return py 117 | 118 | 119 | def parse_opt(): 120 | parser = argparse.ArgumentParser() 121 | parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path') 122 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') 123 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 124 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 125 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 126 | parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') 127 | parser.add_argument('--test', action='store_true', help='test exports only') 128 | parser.add_argument('--pt-only', action='store_true', help='test PyTorch only') 129 | parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric') 130 | opt = parser.parse_args() 131 | opt.data = check_yaml(opt.data) # check YAML 132 | print_args(vars(opt)) 133 | return opt 134 | 135 | 136 | def main(opt): 137 | test(**vars(opt)) if opt.test else run(**vars(opt)) 138 | 139 | 140 | if __name__ == "__main__": 141 | opt = parse_opt() 142 | main(opt) 143 | -------------------------------------------------------------------------------- /spark/downstream_imagenet/data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ByteDance, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import random 9 | import time 10 | 11 | import PIL.Image as PImage 12 | import numpy as np 13 | import torch 14 | import torchvision 15 | from timm.data import AutoAugment as TimmAutoAugment 16 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, create_transform 17 | from timm.data.distributed_sampler import RepeatAugSampler 18 | from timm.data.transforms_factory import transforms_imagenet_eval 19 | from torch.utils.data import DataLoader 20 | from torch.utils.data.sampler import Sampler 21 | from torchvision.transforms import AutoAugment as TorchAutoAugment 22 | from torchvision.transforms import transforms, TrivialAugmentWide 23 | 24 | try: 25 | from torchvision.transforms import InterpolationMode 26 | interpolation = InterpolationMode.BICUBIC 27 | except: 28 | import PIL 29 | interpolation = PIL.Image.BICUBIC 30 | 31 | 32 | def create_classification_dataset(data_path, img_size, rep_aug, workers, batch_size_per_gpu, world_size, global_rank): 33 | import warnings 34 | warnings.filterwarnings('ignore', category=UserWarning) 35 | 36 | mean, std = IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 37 | trans_train = create_transform( 38 | is_training=True, input_size=img_size, 39 | auto_augment='v0', interpolation='bicubic', re_prob=0.25, re_mode='pixel', re_count=1, 40 | mean=mean, std=std, 41 | ) 42 | if img_size < 384: 43 | for i, t in enumerate(trans_train.transforms): 44 | if isinstance(t, (TorchAutoAugment, TimmAutoAugment)): 45 | trans_train.transforms[i] = TrivialAugmentWide(interpolation=interpolation) 46 | break 47 | trans_val = transforms_imagenet_eval(img_size=img_size, interpolation='bicubic', crop_pct=0.95, mean=mean, std=std) 48 | else: 49 | trans_val = transforms.Compose([ 50 | transforms.Resize((img_size, img_size), interpolation=interpolation), 51 | transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), 52 | ]) 53 | print_transform(trans_train, '[train]') 54 | print_transform(trans_val, '[val]') 55 | 56 | imagenet_folder = os.path.abspath(data_path) 57 | for postfix in ('train', 'val'): 58 | if imagenet_folder.endswith(postfix): 59 | imagenet_folder = imagenet_folder[:-len(postfix)] 60 | dataset_train = torchvision.datasets.ImageFolder(os.path.join(imagenet_folder, 'train'), trans_train) 61 | dataset_val = torchvision.datasets.ImageFolder(os.path.join(imagenet_folder, 'val'), trans_val) 62 | 63 | if rep_aug: 64 | print(f'[dataset] using repeated augmentation: count={rep_aug}') 65 | train_sp = RepeatAugSampler(dataset_train, shuffle=True, num_repeats=rep_aug) 66 | else: 67 | train_sp = torch.utils.data.distributed.DistributedSampler(dataset_train, shuffle=True, drop_last=True) 68 | 69 | loader_train = DataLoader( 70 | dataset=dataset_train, num_workers=workers, pin_memory=True, 71 | batch_size=batch_size_per_gpu, sampler=train_sp, persistent_workers=workers > 0, 72 | worker_init_fn=worker_init_fn, 73 | ) 74 | iters_train = len(loader_train) 75 | print(f'[dataset: train] bs={world_size}x{batch_size_per_gpu}={world_size * batch_size_per_gpu}, num_iters={iters_train}') 76 | 77 | val_ratio = 2 78 | loader_val = DataLoader( 79 | dataset=dataset_val, num_workers=workers, pin_memory=True, 80 | batch_sampler=DistInfiniteBatchSampler(world_size, global_rank, len(dataset_val), glb_batch_size=val_ratio * batch_size_per_gpu, filling=False, shuffle=False), 81 | worker_init_fn=worker_init_fn, 82 | ) 83 | iters_val = len(loader_val) 84 | print(f'[dataset: val] bs={world_size}x{val_ratio * batch_size_per_gpu}={val_ratio * world_size * batch_size_per_gpu}, num_iters={iters_val}') 85 | 86 | time.sleep(3) 87 | warnings.resetwarnings() 88 | return loader_train, iters_train, iter(loader_val), iters_val 89 | 90 | 91 | def worker_init_fn(worker_id): 92 | # see: https://pytorch.org/docs/stable/notes/randomness.html#dataloader 93 | worker_seed = torch.initial_seed() % 2 ** 32 94 | np.random.seed(worker_seed) 95 | random.seed(worker_seed) 96 | 97 | 98 | def print_transform(transform, s): 99 | print(f'Transform {s} = ') 100 | for t in transform.transforms: 101 | print(t) 102 | print('---------------------------\n') 103 | 104 | 105 | class DistInfiniteBatchSampler(Sampler): 106 | def __init__(self, world_size, global_rank, dataset_len, glb_batch_size, seed=0, filling=False, shuffle=True): 107 | assert glb_batch_size % world_size == 0 108 | self.world_size, self.rank = world_size, global_rank 109 | self.dataset_len = dataset_len 110 | self.glb_batch_size = glb_batch_size 111 | self.batch_size = glb_batch_size // world_size 112 | 113 | self.iters_per_ep = (dataset_len + glb_batch_size - 1) // glb_batch_size 114 | self.filling = filling 115 | self.shuffle = shuffle 116 | self.epoch = 0 117 | self.seed = seed 118 | self.indices = self.gener_indices() 119 | 120 | def gener_indices(self): 121 | global_max_p = self.iters_per_ep * self.glb_batch_size # global_max_p % world_size must be 0 cuz glb_batch_size % world_size == 0 122 | if self.shuffle: 123 | g = torch.Generator() 124 | g.manual_seed(self.epoch + self.seed) 125 | global_indices = torch.randperm(self.dataset_len, generator=g) 126 | else: 127 | global_indices = torch.arange(self.dataset_len) 128 | filling = global_max_p - global_indices.shape[0] 129 | if filling > 0 and self.filling: 130 | global_indices = torch.cat((global_indices, global_indices[:filling])) 131 | global_indices = tuple(global_indices.numpy().tolist()) 132 | 133 | seps = torch.linspace(0, len(global_indices), self.world_size + 1, dtype=torch.int) 134 | local_indices = global_indices[seps[self.rank]:seps[self.rank + 1]] 135 | self.max_p = len(local_indices) 136 | return local_indices 137 | 138 | def __iter__(self): 139 | self.epoch = 0 140 | while True: 141 | self.epoch += 1 142 | p, q = 0, 0 143 | while p < self.max_p: 144 | q = p + self.batch_size 145 | yield self.indices[p:q] 146 | p = q 147 | if self.shuffle: 148 | self.indices = self.gener_indices() 149 | 150 | def __len__(self): 151 | return self.iters_per_ep 152 | -------------------------------------------------------------------------------- /utils/loggers/comet/hpo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import comet_ml 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | FILE = Path(__file__).resolve() 13 | ROOT = FILE.parents[3] # YOLOv5 root directory 14 | if str(ROOT) not in sys.path: 15 | sys.path.append(str(ROOT)) # add ROOT to PATH 16 | 17 | from train import train 18 | from utils.callbacks import Callbacks 19 | from utils.general import increment_path 20 | from utils.torch_utils import select_device 21 | 22 | # Project Configuration 23 | config = comet_ml.config.get_config() 24 | COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") 25 | 26 | 27 | def get_args(known=False): 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') 30 | parser.add_argument('--cfg', type=str, default='', help='model.yaml path') 31 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 32 | parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') 33 | parser.add_argument('--epochs', type=int, default=300, help='total training epochs') 34 | parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') 35 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') 36 | parser.add_argument('--rect', action='store_true', help='rectangular training') 37 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') 38 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 39 | parser.add_argument('--noval', action='store_true', help='only validate final epoch') 40 | parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') 41 | parser.add_argument('--noplots', action='store_true', help='save no plot files') 42 | parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') 43 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 44 | parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') 45 | parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') 46 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 47 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') 48 | parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') 49 | parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') 50 | parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') 51 | parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') 52 | parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') 53 | parser.add_argument('--name', default='exp', help='save to project/name') 54 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 55 | parser.add_argument('--quad', action='store_true', help='quad dataloader') 56 | parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') 57 | parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') 58 | parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') 59 | parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') 60 | parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') 61 | parser.add_argument('--seed', type=int, default=0, help='Global training seed') 62 | parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') 63 | 64 | # Weights & Biases arguments 65 | parser.add_argument('--entity', default=None, help='W&B: Entity') 66 | parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') 67 | parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') 68 | parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') 69 | 70 | # Comet Arguments 71 | parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.") 72 | parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.") 73 | parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.") 74 | parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.") 75 | parser.add_argument("--comet_optimizer_workers", 76 | type=int, 77 | default=1, 78 | help="Comet: Number of Parallel Workers to use with the Comet Optimizer.") 79 | 80 | return parser.parse_known_args()[0] if known else parser.parse_args() 81 | 82 | 83 | def run(parameters, opt): 84 | hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]} 85 | 86 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 87 | opt.batch_size = parameters.get("batch_size") 88 | opt.epochs = parameters.get("epochs") 89 | 90 | device = select_device(opt.device, batch_size=opt.batch_size) 91 | train(hyp_dict, opt, device, callbacks=Callbacks()) 92 | 93 | 94 | if __name__ == "__main__": 95 | opt = get_args(known=True) 96 | 97 | opt.weights = str(opt.weights) 98 | opt.cfg = str(opt.cfg) 99 | opt.data = str(opt.data) 100 | opt.project = str(opt.project) 101 | 102 | optimizer_id = os.getenv("COMET_OPTIMIZER_ID") 103 | if optimizer_id is None: 104 | with open(opt.comet_optimizer_config) as f: 105 | optimizer_config = json.load(f) 106 | optimizer = comet_ml.Optimizer(optimizer_config) 107 | else: 108 | optimizer = comet_ml.Optimizer(optimizer_id) 109 | 110 | opt.comet_optimizer_id = optimizer.id 111 | status = optimizer.status() 112 | 113 | opt.comet_optimizer_objective = status["spec"]["objective"] 114 | opt.comet_optimizer_metric = status["spec"]["metric"] 115 | 116 | logger.info("COMET INFO: Starting Hyperparameter Sweep") 117 | for parameter in optimizer.get_parameters(): 118 | run(parameter["parameters"], opt) 119 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from tqdm import tqdm 7 | 8 | from utils import TryExcept 9 | from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr 10 | 11 | PREFIX = colorstr('AutoAnchor: ') 12 | 13 | 14 | def check_anchor_order(m): 15 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 16 | a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer 17 | da = a[-1] - a[0] # delta a 18 | ds = m.stride[-1] - m.stride[0] # delta s 19 | if da and (da.sign() != ds.sign()): # same order 20 | LOGGER.info(f'{PREFIX}Reversing anchor order') 21 | m.anchors[:] = m.anchors.flip(0) 22 | 23 | 24 | @TryExcept(f'{PREFIX}ERROR') 25 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 26 | # Check anchor fit to data, recompute if necessary 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1 / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides 41 | anchors = m.anchors.clone() * stride # current anchors 42 | bpr, aat = metric(anchors.cpu().view(-1, 2)) 43 | s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' 44 | if bpr > 0.98: # threshold to recompute 45 | LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅') 46 | else: 47 | LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...') 48 | na = m.anchors.numel() // 2 # number of anchors 49 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 50 | new_bpr = metric(anchors)[0] 51 | if new_bpr > bpr: # replace anchors 52 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 53 | m.anchors[:] = anchors.clone().view_as(m.anchors) 54 | check_anchor_order(m) # must be in pixel-space (not grid-space) 55 | m.anchors /= stride 56 | s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' 57 | else: 58 | s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' 59 | LOGGER.info(s) 60 | 61 | 62 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 63 | """ Creates kmeans-evolved anchors from training dataset 64 | 65 | Arguments: 66 | dataset: path to data.yaml, or a loaded dataset 67 | n: number of anchors 68 | img_size: image size used for training 69 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 70 | gen: generations to evolve anchors using genetic algorithm 71 | verbose: print all results 72 | 73 | Return: 74 | k: kmeans evolved anchors 75 | 76 | Usage: 77 | from utils.autoanchor import *; _ = kmean_anchors() 78 | """ 79 | from scipy.cluster.vq import kmeans 80 | 81 | npr = np.random 82 | thr = 1 / thr 83 | 84 | def metric(k, wh): # compute metrics 85 | r = wh[:, None] / k[None] 86 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 87 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 88 | return x, x.max(1)[0] # x, best_x 89 | 90 | def anchor_fitness(k): # mutation fitness 91 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 92 | return (best * (best > thr).float()).mean() # fitness 93 | 94 | def print_results(k, verbose=True): 95 | k = k[np.argsort(k.prod(1))] # sort small to large 96 | x, best = metric(k, wh0) 97 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 98 | s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \ 99 | f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \ 100 | f'past_thr={x[x > thr].mean():.3f}-mean: ' 101 | for x in k: 102 | s += '%i,%i, ' % (round(x[0]), round(x[1])) 103 | if verbose: 104 | LOGGER.info(s[:-2]) 105 | return k 106 | 107 | if isinstance(dataset, str): # *.yaml file 108 | with open(dataset, errors='ignore') as f: 109 | data_dict = yaml.safe_load(f) # model dict 110 | from utils.dataloaders import LoadImagesAndLabels 111 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 112 | 113 | # Get label wh 114 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 115 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 116 | 117 | # Filter 118 | i = (wh0 < 3.0).any(1).sum() 119 | if i: 120 | LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size') 121 | wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels 122 | # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 123 | 124 | # Kmeans init 125 | try: 126 | LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') 127 | assert n <= len(wh) # apply overdetermined constraint 128 | s = wh.std(0) # sigmas for whitening 129 | k = kmeans(wh / s, n, iter=30)[0] * s # points 130 | assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar 131 | except Exception: 132 | LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init') 133 | k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init 134 | wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) 135 | k = print_results(k, verbose=False) 136 | 137 | # Plot 138 | # k, d = [None] * 20, [None] * 20 139 | # for i in tqdm(range(1, 21)): 140 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 141 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 142 | # ax = ax.ravel() 143 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 144 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 145 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 146 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 147 | # fig.savefig('wh.png', dpi=200) 148 | 149 | # Evolve 150 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 151 | pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar 152 | for _ in pbar: 153 | v = np.ones(sh) 154 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 155 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 156 | kg = (k.copy() * v).clip(min=2.0) 157 | fg = anchor_fitness(kg) 158 | if fg > f: 159 | f, k = fg, kg.copy() 160 | pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 161 | if verbose: 162 | print_results(k, verbose) 163 | 164 | return print_results(k).astype(np.float32) 165 | -------------------------------------------------------------------------------- /spark/pretrain/README.md: -------------------------------------------------------------------------------- 1 | ## Preparation for ImageNet-1k pretraining 2 | 3 | See [/INSTALL.md](/INSTALL.md) to prepare `pip` dependencies and the ImageNet dataset. 4 | 5 | **Note: for neural network definitions, we directly use `timm.models.ResNet` and [official ConvNeXt](https://github.com/facebookresearch/ConvNeXt/blob/048efcea897d999aed302f2639b6270aedf8d4c8/models/convnext.py).** 6 | 7 | 8 | ## Tutorial for pretraining your own CNN model 9 | 10 | See [/pretrain/models/custom.py](/pretrain/models/custom.py). Your todo list is: 11 | 12 | - implement `get_downsample_ratio` in [/pretrain/models/custom.py line20](/pretrain/models/custom.py#L20). 13 | - implement `get_feature_map_channels` in [/pretrain/models/custom.py line29](/pretrain/models/custom.py#L29). 14 | - implement `forward` in [/pretrain/models/custom.py line38](/pretrain/models/custom.py#L38). 15 | - define `your_convnet(...)` with `@register_model` in [/pretrain/models/custom.py line54](/pretrain/models/custom.py#L53-L54). 16 | - add default kwargs of `your_convnet(...)` in [/pretrain/models/\_\_init\_\_.py line34](/pretrain/models/__init__.py#L34). 17 | - **Note: see [#54](/../../issues/54) if your CNN contains SE module or global average pooling layer, and see [#56](/../../issues/56) if it contains GroupNorm**. 18 | 19 | Then run the experiment with `--model=your_convnet`. 20 | 21 | 22 | ## Tutorial for pretraining on your own dataset 23 | 24 | See the comment of `build_dataset_to_pretrain` in [line55 of /pretrain/utils/imagenet.py](/pretrain/utils/imagenet.py#L55). Your todo list: 25 | 26 | - Define a subclass of `torch.utils.data.Dataset` for your own unlabeled dataset, to replace our `ImageNetDataset`. 27 | - Use `args.data_path` and `args.input_size` to help build your dataset, with `--data_path=... --input_size=...` to specify them. 28 | - Note the batch size `--bs` is the total batch size of all GPU, which may need to be adjusted based on your dataset size. FYI: we use `--bs=4096` for ImageNet, which contains 1.28 million images. 29 | 30 | **If your dataset is relatively small**, you can try `--init_weight=/path/to/res50_withdecoder_1kpretrained_spark_style.pth` to do your pretraining *from our pretrained weights*, rather than *form scratch*. 31 | 32 | ## Debug on 1 GPU (without DistributedDataParallel) 33 | 34 | Use a small batch size `--bs=32` for avoiding OOM. 35 | 36 | ```shell script 37 | python3 main.py --exp_name=debug --data_path=/path/to/imagenet --model=resnet50 --bs=32 38 | ``` 39 | 40 | 41 | ## Pretraining Any Model on ImageNet-1k (224x224) 42 | 43 | For pretraining, run [/pretrain/main.py](/pretrain/main.py) with `torchrun`. 44 | **It is required to specify** the ImageNet data folder (`--data_path`), your experiment name & log dir (`--exp_name` and `--exp_dir`, automatically created if not exists), and the model name (`--model`, valid choices see the keys of 'pretrain_default_model_kwargs' in [/pretrain/models/\_\_init\_\_.py line34](/pretrain/models/__init__.py#L34)). 45 | 46 | We use the **same** pretraining configurations (lr, batch size, etc.) for all models (ResNets and ConvNeXts) in 224 pretraining. 47 | Their **names** and **default values** are in [/pretrain/utils/arg_util.py line23-44](/pretrain/utils/arg_util.py#L23-L44). 48 | All these default configurations (like batch size 4096) would be used, unless you specify some like `--bs=512`. 49 | 50 | **Note: the batch size `--bs` is the total batch size of all GPU, and the learning rate `--base_lr` is the base lr. The actual lr would be `lr = base_lr * bs / 256`, as in [/pretrain/utils/arg_util.py line131](/pretrain/utils/arg_util.py#L131). So do not use `--lr` to specify a lr (that will be ignored)** 51 | 52 | Here is an example to pretrain a ResNet50 on an 8-GPU single machine (we use DistributedDataParallel), overwriting the default batch size to 512: 53 | ```shell script 54 | $ cd /path/to/SparK/pretrain 55 | $ torchrun --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr=localhost --master_port= main.py \ 56 | --data_path=/path/to/imagenet --exp_name= --exp_dir=/path/to/logdir \ 57 | --model=resnet50 --bs=512 58 | ``` 59 | 60 | For multiple machines, change the `--nnodes`, `--node_rank`, `--master_address` and `--master_port` to your configurations. E.g.: 61 | ```shell script 62 | $ torchrun --nproc_per_node=8 --nnodes= --node_rank= --master_address= --master_port= main.py \ 63 | ... 64 | ``` 65 | 66 | ## Pretraining ConvNeXt-Large on ImageNet-1k (384x384) 67 | 68 | For 384 pretraining we use a larger mask ratio (0.75), a half batch size (2048), and a double base learning rate (4e-4): 69 | 70 | ```shell script 71 | $ cd /path/to/SparK/pretrain 72 | $ torchrun --nproc_per_node=8 --nnodes= --node_rank= --master_address= --master_port= main.py \ 73 | --data_path=/path/to/imagenet --exp_name= --exp_dir=/path/to/logdir \ 74 | --model=convnext_large --input_size=384 --mask=0.75 --bs=2048 --base_lr=4e-4 75 | ``` 76 | 77 | ## Logging 78 | 79 | See files in your `--exp_dir` to track your experiment: 80 | 81 | - `_withdecoder_1kpretrained_spark_style.pth`: saves model and optimizer states, current epoch, current reconstruction loss, etc.; can be used to resume pretraining; can also be used for visualization in [/pretrain/viz_reconstruction.ipynb](/pretrain/viz_reconstruction.ipynb) 82 | - `_1kpretrained_timm_style.pth`: can be used for downstream finetuning 83 | - `pretrain_log.txt`: records some important information such as: 84 | - `git_commit_id`: git version 85 | - `cmd`: the command of this experiment 86 | 87 | It also reports the loss and remaining pretraining time. 88 | 89 | - `tensorboard_log/`: saves a lot of tensorboard logs including loss values, learning rates, gradient norms and more things. Use `tensorboard --logdir /path/to/this/tensorboard_log/ --port 23333` for viz. 90 | - `stdout_backup.txt` and `stderr_backup.txt`: backups stdout/stderr. 91 | 92 | ## Resuming 93 | 94 | Specify `--resume_from=path/to/_withdecoder_1kpretrained_spark_style.pth` to resume pretraining. Note this is different from `--init_weight`: 95 | 96 | - `--resume_from` will load three things: model weights, optimizer states, and current epoch, so it is used to resume some interrupted experiment (will start from that 'current epoch'). 97 | - `--init_weight` ONLY loads the model weights, so it's just like a model initialization (will start from epoch 0). 98 | 99 | 100 | ## Regarding sparse convolution 101 | 102 | We do not use sparse convolutions in this pytorch implementation, due to their limited optimization on modern hardware. 103 | As can be found in [/pretrain/encoder.py](/pretrain/encoder.py), we use masked dense convolution to simulate submanifold sparse convolution. 104 | We also define some sparse pooling or normalization layers in [/pretrain/encoder.py](/pretrain/encoder.py). 105 | All these "sparse" layers are implemented through pytorch built-in operators. 106 | 107 | 108 | ## Some details: how we mask images and how to set the patch size 109 | 110 | In SparK, the mask patch size **equals to** the downsample ratio of the CNN model (so there is no configuration like `--patch_size=32`). 111 | 112 | Here is the reason: when we do mask, we: 113 | 114 | 1. first generate the binary mask for the **smallest** resolution feature map, i.e., generate the `_cur_active` or `active_b1ff` in [/pretrain/spark.py line86-87](/pretrain/spark.py#L86-L87), which is a `torch.BoolTensor` shaped as `[B, 1, fmap_h, fmap_w]`, and would be used to mask the smallest feature map. 115 | 3. then progressively upsample it (i.e., expand its 2nd and 3rd dimensions by calling `repeat_interleave(..., dim=2)` and `repeat_interleave(..., dim=3)` in [/pretrain/encoder.py line16](/pretrain/encoder.py#L16)), to mask those feature maps ([`x` in line21](/pretrain/encoder.py#L21)) with larger resolutions . 116 | 117 | So if you want a patch size of 16 or 8, you should actually define a new CNN model with a downsample ratio of 16 or 8. 118 | See [Tutorial for pretraining your own CNN model (above)](https://github.com/keyu-tian/SparK/tree/main/pretrain/#tutorial-for-pretraining-your-own-cnn-model). 119 | -------------------------------------------------------------------------------- /utils/loggers/clearml/clearml_utils.py: -------------------------------------------------------------------------------- 1 | """Main Logger class for ClearML experiment tracking.""" 2 | import glob 3 | import re 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import yaml 8 | 9 | from utils.plots import Annotator, colors 10 | 11 | try: 12 | import clearml 13 | from clearml import Dataset, Task 14 | 15 | assert hasattr(clearml, '__version__') # verify package import not local dir 16 | except (ImportError, AssertionError): 17 | clearml = None 18 | 19 | 20 | def construct_dataset(clearml_info_string): 21 | """Load in a clearml dataset and fill the internal data_dict with its contents. 22 | """ 23 | dataset_id = clearml_info_string.replace('clearml://', '') 24 | dataset = Dataset.get(dataset_id=dataset_id) 25 | dataset_root_path = Path(dataset.get_local_copy()) 26 | 27 | # We'll search for the yaml file definition in the dataset 28 | yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml"))) 29 | if len(yaml_filenames) > 1: 30 | raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains ' 31 | 'the dataset definition this way.') 32 | elif len(yaml_filenames) == 0: 33 | raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file ' 34 | 'inside the dataset root path.') 35 | with open(yaml_filenames[0]) as f: 36 | dataset_definition = yaml.safe_load(f) 37 | 38 | assert set(dataset_definition.keys()).issuperset( 39 | {'train', 'test', 'val', 'nc', 'names'} 40 | ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')" 41 | 42 | data_dict = dict() 43 | data_dict['train'] = str( 44 | (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None 45 | data_dict['test'] = str( 46 | (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None 47 | data_dict['val'] = str( 48 | (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None 49 | data_dict['nc'] = dataset_definition['nc'] 50 | data_dict['names'] = dataset_definition['names'] 51 | 52 | return data_dict 53 | 54 | 55 | class ClearmlLogger: 56 | """Log training runs, datasets, models, and predictions to ClearML. 57 | 58 | This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default, 59 | this information includes hyperparameters, system configuration and metrics, model metrics, code information and 60 | basic data metrics and analyses. 61 | 62 | By providing additional command line arguments to train.py, datasets, 63 | models and predictions can also be logged. 64 | """ 65 | 66 | def __init__(self, opt, hyp): 67 | """ 68 | - Initialize ClearML Task, this object will capture the experiment 69 | - Upload dataset version to ClearML Data if opt.upload_dataset is True 70 | 71 | arguments: 72 | opt (namespace) -- Commandline arguments for this run 73 | hyp (dict) -- Hyperparameters for this run 74 | 75 | """ 76 | self.current_epoch = 0 77 | # Keep tracked of amount of logged images to enforce a limit 78 | self.current_epoch_logged_images = set() 79 | # Maximum number of images to log to clearML per epoch 80 | self.max_imgs_to_log_per_epoch = 16 81 | # Get the interval of epochs when bounding box images should be logged 82 | self.bbox_interval = opt.bbox_interval 83 | self.clearml = clearml 84 | self.task = None 85 | self.data_dict = None 86 | if self.clearml: 87 | self.task = Task.init( 88 | project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5', 89 | task_name=opt.name if opt.name != 'exp' else 'Training', 90 | tags=['YOLOv5'], 91 | output_uri=True, 92 | auto_connect_frameworks={'pytorch': False} 93 | # We disconnect pytorch auto-detection, because we added manual model save points in the code 94 | ) 95 | # ClearML's hooks will already grab all general parameters 96 | # Only the hyperparameters coming from the yaml config file 97 | # will have to be added manually! 98 | self.task.connect(hyp, name='Hyperparameters') 99 | 100 | # Get ClearML Dataset Version if requested 101 | if opt.data.startswith('clearml://'): 102 | # data_dict should have the following keys: 103 | # names, nc (number of classes), test, train, val (all three relative paths to ../datasets) 104 | self.data_dict = construct_dataset(opt.data) 105 | # Set data to data_dict because wandb will crash without this information and opt is the best way 106 | # to give it to them 107 | opt.data = self.data_dict 108 | 109 | def log_debug_samples(self, files, title='Debug Samples'): 110 | """ 111 | Log files (images) as debug samples in the ClearML task. 112 | 113 | arguments: 114 | files (List(PosixPath)) a list of file paths in PosixPath format 115 | title (str) A title that groups together images with the same values 116 | """ 117 | for f in files: 118 | if f.exists(): 119 | it = re.search(r'_batch(\d+)', f.name) 120 | iteration = int(it.groups()[0]) if it else 0 121 | self.task.get_logger().report_image(title=title, 122 | series=f.name.replace(it.group(), ''), 123 | local_path=str(f), 124 | iteration=iteration) 125 | 126 | def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25): 127 | """ 128 | Draw the bounding boxes on a single image and report the result as a ClearML debug sample. 129 | 130 | arguments: 131 | image_path (PosixPath) the path the original image file 132 | boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] 133 | class_names (dict): dict containing mapping of class int to class name 134 | image (Tensor): A torch tensor containing the actual image data 135 | """ 136 | if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0: 137 | # Log every bbox_interval times and deduplicate for any intermittend extra eval runs 138 | if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images: 139 | im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2)) 140 | annotator = Annotator(im=im, pil=True) 141 | for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])): 142 | color = colors(i) 143 | 144 | class_name = class_names[int(class_nr)] 145 | confidence_percentage = round(float(conf) * 100, 2) 146 | label = f"{class_name}: {confidence_percentage}%" 147 | 148 | if conf > conf_threshold: 149 | annotator.rectangle(box.cpu().numpy(), outline=color) 150 | annotator.box_label(box.cpu().numpy(), label=label, color=color) 151 | 152 | annotated_image = annotator.result() 153 | self.task.get_logger().report_image(title='Bounding Boxes', 154 | series=image_path.name, 155 | iteration=self.current_epoch, 156 | image=annotated_image) 157 | self.current_epoch_logged_images.add(image_path) 158 | --------------------------------------------------------------------------------