├── models
    ├── __init__.py
    └── detect
    │   ├── pk-yolo.yaml
    │   └── yolov9-e.yaml
├── utils
    ├── tal
    │   ├── __init__.py
    │   └── anchor_generator.py
    ├── loggers
    │   ├── wandb
    │   │   ├── __init__.py
    │   │   ├── log_dataset.py
    │   │   ├── sweep.py
    │   │   └── sweep.yaml
    │   ├── clearml
    │   │   ├── __init__.py
    │   │   ├── hpo.py
    │   │   └── clearml_utils.py
    │   └── comet
    │   │   ├── optimizer_config.json
    │   │   ├── comet_utils.py
    │   │   └── hpo.py
    ├── __init__.py
    ├── lion.py
    ├── callbacks.py
    ├── autobatch.py
    ├── activations.py
    ├── coco_utils.py
    ├── triton.py
    ├── downloads.py
    └── autoanchor.py
├── data
    ├── images
    │   └── horses.jpg
    ├── multiplane.yaml
    └── hyps
    │   └── hyp.scratch-high.yaml
├── spark
    ├── pretrain
    │   ├── viz_imgs
    │   │   ├── recon.png
    │   │   ├── spconv1.png
    │   │   ├── spconv2.png
    │   │   └── spconv3.png
    │   ├── requirements.txt
    │   ├── splitedata.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── resnet.py
    │   │   ├── custom_detr.py
    │   │   ├── custom_origin.py
    │   │   ├── custom.py
    │   │   └── convnext.py
    │   ├── utils
    │   │   ├── lr_control.py
    │   │   ├── imagenet.py
    │   │   └── arg_util.py
    │   ├── sampler.py
    │   ├── decoder.py
    │   ├── dist.py
    │   └── README.md
    ├── downstream_imagenet
    │   ├── requirements.txt
    │   ├── lr_decay.py
    │   ├── README.md
    │   ├── models
    │   │   └── __init__.py
    │   ├── arg.py
    │   ├── util.py
    │   └── data.py
    ├── downstream_mmdet
    │   ├── configs
    │   │   ├── _base_
    │   │   │   ├── default_runtime.py
    │   │   │   └── models
    │   │   │   │   └── mask_rcnn_convnext_fpn.py
    │   │   └── convnext_spark
    │   │   │   └── mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py
    │   ├── mmcv_custom
    │   │   ├── __init__.py
    │   │   ├── runner
    │   │   │   └── checkpoint.py
    │   │   ├── layer_decay_optimizer_constructor.py
    │   │   └── customized_text.py
    │   ├── mmdet
    │   │   └── models
    │   │   │   └── backbones
    │   │   │       └── __init__.py
    │   └── README.md
    └── downstream_d2
    │   ├── configs
    │       ├── coco_R_50_FPN_CONV_1x_moco_adam.yaml
    │       └── Base-RCNN-FPN.yaml
    │   ├── convert-timm-to-d2.py
    │   ├── README.md
    │   └── lr_decay.py
├── requirements.txt
├── hubconf.py
└── benchmarks.py


/models/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/utils/tal/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/utils/loggers/wandb/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/utils/loggers/clearml/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/data/images/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/data/images/horses.jpg


--------------------------------------------------------------------------------
/spark/pretrain/viz_imgs/recon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/recon.png


--------------------------------------------------------------------------------
/spark/downstream_imagenet/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | Pillow
3 | typed-argument-parser
4 | timm==0.5.4
5 | tensorboardx
6 | 


--------------------------------------------------------------------------------
/spark/pretrain/viz_imgs/spconv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv1.png


--------------------------------------------------------------------------------
/spark/pretrain/viz_imgs/spconv2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv2.png


--------------------------------------------------------------------------------
/spark/pretrain/viz_imgs/spconv3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mkang315/PK-YOLO/HEAD/spark/pretrain/viz_imgs/spconv3.png


--------------------------------------------------------------------------------
/spark/pretrain/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | Pillow
4 | typed-argument-parser
5 | timm==0.5.4
6 | tensorboardx
7 | 


--------------------------------------------------------------------------------
/data/multiplane.yaml:
--------------------------------------------------------------------------------
 1 | train: ./axial_t1wce_2_class/images/train
 2 | val: ./axial_t1wce_2_class/images/test
 3 | # train: ./coronal_t1wce_2_class/images/train
 4 | # val: ./coronal_t1wce_2_class/images/test
 5 | # train: ./sagittal_t1wce_2_class/images/train
 6 | # val: ./sagittal_t1wce_2_class/images/test
 7 | 
 8 | nc: 2
 9 | names: ['negative','positive']
10 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='CustomizedTextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/spark/pretrain/splitedata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os,glob,shutil
 3 | import numpy as np
 4 | rootpath = "/media/dzy/deep2/detr/archive"
 5 | cls = ['yes', 'no']
 6 | for c in cls:
 7 |     allfi = glob.glob(os.path.join(rootpath,"train",c,"*.jpg"))
 8 |     np.random.shuffle(allfi)
 9 |     targets = os.path.join(rootpath,"val",c)
10 |     if not os.path.exists(targets):
11 |         os.makedirs(targets)
12 |     k = 0
13 |     for file in allfi:
14 |         if k < len(allfi) *0.15:
15 |             target = file.replace("train","val")
16 |             shutil.move(file,target)
17 |         k+=1
18 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # All rights reserved.
 4 | 
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 
11 | from .checkpoint import load_checkpoint
12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor
13 | from .customized_text import CustomizedTextLoggerHook
14 | 
15 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook']
16 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .darknet import Darknet
 2 | from .detectors_resnet import DetectoRS_ResNet
 3 | from .detectors_resnext import DetectoRS_ResNeXt
 4 | from .hourglass import HourglassNet
 5 | from .hrnet import HRNet
 6 | from .regnet import RegNet
 7 | from .res2net import Res2Net
 8 | from .resnest import ResNeSt
 9 | from .resnet import ResNet, ResNetV1d
10 | from .resnext import ResNeXt
11 | from .ssd_vgg import SSDVGG
12 | from .trident_resnet import TridentResNet
13 | from .swin_transformer import SwinTransformer
14 | from .convnext import ConvNeXt
15 | 
16 | __all__ = [
17 |     'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
18 |     'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
19 |     'ResNeSt', 'TridentResNet', 'SwinTransformer', 'ConvNeXt'
20 | ]
21 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/log_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from wandb_utils import WandbLogger
 4 | 
 5 | from utils.general import LOGGER
 6 | 
 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
 8 | 
 9 | 
10 | def create_dataset_artifact(opt):
11 |     logger = WandbLogger(opt, None, job_type='Dataset Creation')  # TODO: return value unused
12 |     if not logger.wandb:
13 |         LOGGER.info("install wandb using `pip install wandb` to log the dataset")
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
19 |     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
20 |     parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project')
21 |     parser.add_argument('--entity', default=None, help='W&B entity')
22 |     parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run')
23 | 
24 |     opt = parser.parse_args()
25 |     opt.resume = False  # Explicitly disallow resume check for dataset upload job
26 | 
27 |     create_dataset_artifact(opt)
28 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements
 2 | # Usage: pip install -r requirements.txt
 3 | 
 4 | # Base ------------------------------------------------------------------------
 5 | gitpython
 6 | ipython
 7 | matplotlib>=3.2.2
 8 | numpy>=1.18.5
 9 | opencv-python>=4.1.1
10 | Pillow>=7.1.2
11 | psutil
12 | PyYAML>=5.3.1
13 | requests>=2.23.0
14 | scipy>=1.4.1
15 | thop>=0.1.1
16 | torch>=1.7.0
17 | torchvision>=0.8.1
18 | tqdm>=4.64.0
19 | # protobuf<=3.20.1
20 | 
21 | # Logging ---------------------------------------------------------------------
22 | tensorboard>=2.4.1
23 | # clearml>=1.2.0
24 | # comet
25 | 
26 | # Plotting --------------------------------------------------------------------
27 | pandas>=1.1.4
28 | seaborn>=0.11.0
29 | 
30 | # Export ----------------------------------------------------------------------
31 | # coremltools>=6.0
32 | # onnx>=1.9.0
33 | # onnx-simplifier>=0.4.1
34 | # nvidia-pyindex
35 | # nvidia-tensorrt
36 | # scikit-learn<=1.1.2
37 | # tensorflow>=2.4.1
38 | # tensorflowjs>=3.9.0
39 | # openvino-dev
40 | 
41 | # Deploy ----------------------------------------------------------------------
42 | # tritonclient[all]~=2.24.0
43 | 
44 | # Extras ----------------------------------------------------------------------
45 | # mss
46 | albumentations>=1.0.3
47 | pycocotools>=2.0
48 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/sweep.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import wandb
 5 | 
 6 | FILE = Path(__file__).resolve()
 7 | ROOT = FILE.parents[3]  # YOLOv5 root directory
 8 | if str(ROOT) not in sys.path:
 9 |     sys.path.append(str(ROOT))  # add ROOT to PATH
10 | 
11 | from train import parse_opt, train
12 | from utils.callbacks import Callbacks
13 | from utils.general import increment_path
14 | from utils.torch_utils import select_device
15 | 
16 | 
17 | def sweep():
18 |     wandb.init()
19 |     # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb.
20 |     hyp_dict = vars(wandb.config).get("_items").copy()
21 | 
22 |     # Workaround: get necessary opt args
23 |     opt = parse_opt(known=True)
24 |     opt.batch_size = hyp_dict.get("batch_size")
25 |     opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
26 |     opt.epochs = hyp_dict.get("epochs")
27 |     opt.nosave = True
28 |     opt.data = hyp_dict.get("data")
29 |     opt.weights = str(opt.weights)
30 |     opt.cfg = str(opt.cfg)
31 |     opt.data = str(opt.data)
32 |     opt.hyp = str(opt.hyp)
33 |     opt.project = str(opt.project)
34 |     device = select_device(opt.device, batch_size=opt.batch_size)
35 | 
36 |     # train
37 |     train(hyp_dict, opt, device, callbacks=Callbacks())
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     sweep()
42 | 


--------------------------------------------------------------------------------
/spark/downstream_d2/configs/coco_R_50_FPN_CONV_1x_moco_adam.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "<see instructions>"
 4 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 5 |   PIXEL_STD: [58.395, 57.120, 57.375]
 6 | 
 7 |   MASK_ON: True
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 |   RESNETS:
11 |     DEPTH: 50
12 |     NORM: "SyncBN"
13 |     STRIDE_IN_1X1: False
14 |   FPN:
15 |     NORM: "SyncBN"
16 |   ROI_BOX_HEAD:
17 |     NAME: "FastRCNNConvFCHead"
18 |     NUM_FC: 1
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 7
21 |     NORM: "SyncBN"
22 |   ROI_MASK_HEAD:
23 |     NAME: "MaskRCNNConvUpsampleHead"
24 |     NUM_CONV: 4
25 |     POOLER_RESOLUTION: 14
26 |     NORM: "SyncBN"
27 | 
28 | INPUT:
29 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896)
30 |   CROP:
31 |     ENABLED: False
32 |     TYPE: "absolute_range"
33 |     SIZE: (384, 600)
34 |   FORMAT: "RGB"
35 | TEST:
36 |   EVAL_PERIOD: 5000
37 |   PRECISE_BN:
38 |     ENABLED: True
39 | 
40 | SOLVER:
41 |   STEPS: (60000, 80000)
42 |   MAX_ITER: 90000
43 |   GAMMA: 0.25
44 |   BASE_LR: 0.00025
45 |   WARMUP_FACTOR: 0.01
46 |   WARMUP_ITERS: 1000
47 |   WEIGHT_DECAY: 0.0001
48 |   CHECKPOINT_PERIOD: 5000
49 |   CLIP_GRADIENTS:
50 |     ENABLED: False
51 |     CLIP_TYPE: "value"
52 |     CLIP_VALUE: 1.0
53 |     NORM_TYPE: 2.0
54 | 
55 |   # compared to standard detectron2, we add these two new configurations:
56 |   OPTIMIZER: "ADAMW"
57 |   LR_DECAY: 0.6
58 | 


--------------------------------------------------------------------------------
/spark/downstream_d2/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/data/hyps/hyp.scratch-high.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 7.5  # box loss gain
 9 | cls: 0.5  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 0.7  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | dfl: 1.5  # dfl loss gain
14 | iou_t: 0.20  # IoU training threshold
15 | anchor_t: 5.0  # anchor-multiple threshold
16 | # anchors: 3  # anchors per output layer (0 to ignore)
17 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
18 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
19 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
20 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
21 | degrees: 0.0  # image rotation (+/- deg)
22 | translate: 0.1  # image translation (+/- fraction)
23 | scale: 0.9  # image scale (+/- gain)
24 | shear: 0.0  # image shear (+/- deg)
25 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
26 | flipud: 0.0  # image flip up-down (probability)
27 | fliplr: 0.5  # image flip left-right (probability)
28 | mosaic: 1.0  # image mosaic (probability)
29 | mixup: 0.15  # image mixup (probability)
30 | copy_paste: 0.3  # segment copy-paste (probability)
31 | 


--------------------------------------------------------------------------------
/utils/tal/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from utils.general import check_version
 4 | 
 5 | TORCH_1_10 = check_version(torch.__version__, '1.10.0')
 6 | 
 7 | 
 8 | def make_anchors(feats, strides, grid_cell_offset=0.5):
 9 |     """Generate anchors from features."""
10 |     anchor_points, stride_tensor = [], []
11 |     assert feats is not None
12 |     dtype, device = feats[0].dtype, feats[0].device
13 |     for i, stride in enumerate(strides):
14 |         _, _, h, w = feats[i].shape
15 |         sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
16 |         sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
17 |         sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx)
18 |         anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
19 |         stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
20 |     return torch.cat(anchor_points), torch.cat(stride_tensor)
21 | 
22 | 
23 | def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
24 |     """Transform distance(ltrb) to box(xywh or xyxy)."""
25 |     lt, rb = torch.split(distance, 2, dim)
26 |     x1y1 = anchor_points - lt
27 |     x2y2 = anchor_points + rb
28 |     if xywh:
29 |         c_xy = (x1y1 + x2y2) / 2
30 |         wh = x2y2 - x1y1
31 |         return torch.cat((c_xy, wh), dim)  # xywh bbox
32 |     return torch.cat((x1y1, x2y2), dim)  # xyxy bbox
33 | 
34 | 
35 | def bbox2dist(anchor_points, bbox, reg_max):
36 |     """Transform bbox(xyxy) to dist(ltrb)."""
37 |     x1y1, x2y2 = torch.split(bbox, 2, -1)
38 |     return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01)  # dist (lt, rb)
39 | 


--------------------------------------------------------------------------------
/spark/downstream_d2/convert-timm-to-d2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) ByteDance, Inc. and its affiliates.
 4 | # All rights reserved.
 5 | #
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree.
 8 | 
 9 | import pickle as pkl
10 | 
11 | import torch
12 | 
13 | 
14 | # we use `timm.models.ResNet` in pre-training, so keys are timm-style
15 | def timm_resnet_to_detectron2_resnet(source_file, target_file):
16 |     pretrained: dict = torch.load(source_file, map_location='cpu')
17 |     for mod_k in {'state_dict', 'state', 'module', 'model'}:
18 |         if mod_k in pretrained:
19 |             pretrained = pretrained[mod_k]
20 |     if any(k.startswith('module.encoder_q.') for k in pretrained.keys()):
21 |         pretrained = {k.replace('module.encoder_q.', ''): v for k, v in pretrained.items() if k.startswith('module.encoder_q.')}
22 |     
23 |     pkl_state = {}
24 |     for k, v in pretrained.items(): # convert resnet's keys from timm-style to d2-style
25 |         if 'layer' not in k:
26 |             k = 'stem.' + k
27 |         for t in [1, 2, 3, 4]:
28 |             k = k.replace(f'layer{t}', f'res{t+1}')
29 |         for t in [1, 2, 3]:
30 |             k = k.replace(f'bn{t}', f'conv{t}.norm')
31 |         k = k.replace('downsample.0', 'shortcut')
32 |         k = k.replace('downsample.1', 'shortcut.norm')
33 |         
34 |         pkl_state[k] = v.detach().numpy()
35 |     
36 |     with open(target_file, 'wb') as fp:
37 |         print(f'[convert] .pkl is generated! (from `{source_file}`, to `{target_file}`, len(state)=={len(pkl_state)})')
38 |         pkl.dump({'model': pkl_state, '__author__': 'https://github.com/keyu-tian/SparK', 'matching_heuristics': True}, fp)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     import sys
43 |     timm_resnet_to_detectron2_resnet(sys.argv[1], sys.argv[2])
44 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | from timm import create_model
 9 | from timm.loss import SoftTargetCrossEntropy
10 | from timm.models.layers import drop
11 | 
12 | 
13 | from models.convnext import ConvNeXt
14 | from models.resnet import ResNet
15 | from models.custom import YourConvNet
16 | _import_resnets_for_timm_registration = (ResNet,)
17 | 
18 | 
19 | # log more
20 | def _ex_repr(self):
21 |     return ', '.join(
22 |         f'{k}=' + (f'{v:g}' if isinstance(v, float) else str(v))
23 |         for k, v in vars(self).items()
24 |         if not k.startswith('_') and k != 'training'
25 |         and not isinstance(v, (torch.nn.Module, torch.Tensor))
26 |     )
27 | for clz in (torch.nn.CrossEntropyLoss, SoftTargetCrossEntropy, drop.DropPath):
28 |     if hasattr(clz, 'extra_repr'):
29 |         clz.extra_repr = _ex_repr
30 |     else:
31 |         clz.__repr__ = lambda self: f'{type(self).__name__}({_ex_repr(self)})'
32 | 
33 | 
34 | pretrain_default_model_kwargs = {
35 |     'V9back': dict(),
36 | 
37 |     'resnet50': dict(drop_path_rate=0.05),
38 |     'resnet101': dict(drop_path_rate=0.08),
39 |     'resnet152': dict(drop_path_rate=0.10),
40 |     'resnet200': dict(drop_path_rate=0.15),
41 |     'convnext_small': dict(sparse=True, drop_path_rate=0.2),
42 |     'convnext_base': dict(sparse=True, drop_path_rate=0.3),
43 |     'convnext_large': dict(sparse=True, drop_path_rate=0.4),
44 | 
45 | }
46 | for kw in pretrain_default_model_kwargs.values():
47 |     kw['pretrained'] = False
48 |     kw['num_classes'] = 0
49 |     kw['global_pool'] = ''
50 | 
51 | 
52 | def build_sparse_encoder(name: str, input_size: int, sbn=False, drop_path_rate=0.0, verbose=False):
53 |     from encoder import SparseEncoder
54 |     
55 |     kwargs = pretrain_default_model_kwargs[name]
56 |     if drop_path_rate != 0:
57 |         kwargs['drop_path_rate'] = drop_path_rate
58 |     print(f'[build_sparse_encoder] model kwargs={kwargs}')
59 |     cnn = create_model(name, **kwargs)
60 |     
61 |     return SparseEncoder(cnn, input_size=input_size, sbn=sbn, verbose=verbose)
62 | 
63 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import platform
 3 | import threading
 4 | 
 5 | 
 6 | def emojis(str=''):
 7 |     # Return platform-dependent emoji-safe version of string
 8 |     return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
 9 | 
10 | 
11 | class TryExcept(contextlib.ContextDecorator):
12 |     # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
13 |     def __init__(self, msg=''):
14 |         self.msg = msg
15 | 
16 |     def __enter__(self):
17 |         pass
18 | 
19 |     def __exit__(self, exc_type, value, traceback):
20 |         if value:
21 |             print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
22 |         return True
23 | 
24 | 
25 | def threaded(func):
26 |     # Multi-threads a target function and returns thread. Usage: @threaded decorator
27 |     def wrapper(*args, **kwargs):
28 |         thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
29 |         thread.start()
30 |         return thread
31 | 
32 |     return wrapper
33 | 
34 | 
35 | def join_threads(verbose=False):
36 |     # Join all daemon threads, i.e. atexit.register(lambda: join_threads())
37 |     main_thread = threading.current_thread()
38 |     for t in threading.enumerate():
39 |         if t is not main_thread:
40 |             if verbose:
41 |                 print(f'Joining thread {t.name}')
42 |             t.join()
43 | 
44 | 
45 | def notebook_init(verbose=True):
46 |     # Check system software and hardware
47 |     print('Checking setup...')
48 | 
49 |     import os
50 |     import shutil
51 | 
52 |     from utils.general import check_font, check_requirements, is_colab
53 |     from utils.torch_utils import select_device  # imports
54 | 
55 |     check_font()
56 | 
57 |     import psutil
58 |     from IPython import display  # to display images and clear console output
59 | 
60 |     if is_colab():
61 |         shutil.rmtree('/content/sample_data', ignore_errors=True)  # remove colab /sample_data directory
62 | 
63 |     # System info
64 |     if verbose:
65 |         gb = 1 << 30  # bytes to GiB (1024 ** 3)
66 |         ram = psutil.virtual_memory().total
67 |         total, used, free = shutil.disk_usage("/")
68 |         display.clear_output()
69 |         s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)'
70 |     else:
71 |         s = ''
72 | 
73 |     select_device(newline=False)
74 |     print(emojis(f'Setup complete ✅ {s}'))
75 |     return display
76 | 


--------------------------------------------------------------------------------
/spark/pretrain/utils/lr_control.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | from pprint import pformat
 9 | 
10 | 
11 | def lr_wd_annealing(optimizer, peak_lr, wd, wd_end, cur_it, wp_it, max_it):
12 |     wp_it = round(wp_it)
13 |     if cur_it < wp_it:
14 |         cur_lr = 0.005 * peak_lr + 0.995 * peak_lr * cur_it / wp_it
15 |     else:
16 |         ratio = (cur_it - wp_it) / (max_it - 1 - wp_it)
17 |         cur_lr = 0.001 * peak_lr + 0.999 * peak_lr * (0.5 + 0.5 * math.cos(math.pi * ratio))
18 |     
19 |     ratio = cur_it / (max_it - 1)
20 |     cur_wd = wd_end + (wd - wd_end) * (0.5 + 0.5 * math.cos(math.pi * ratio))
21 |     
22 |     min_lr, max_lr = cur_lr, cur_lr
23 |     min_wd, max_wd = cur_wd, cur_wd
24 |     for param_group in optimizer.param_groups:
25 |         scaled_lr = param_group['lr'] = cur_lr * param_group.get('lr_scale', 1)  # 'lr_scale' could be assigned
26 |         min_lr, max_lr = min(min_lr, scaled_lr), max(max_lr, scaled_lr)
27 |         scaled_wd = param_group['weight_decay'] = cur_wd * param_group.get('weight_decay_scale', 1)  # 'weight_decay_scale' could be assigned
28 |         min_wd, max_wd = min(min_wd, scaled_wd), max(max_wd, scaled_wd)
29 |     return min_lr, max_lr, min_wd, max_wd
30 | 
31 | 
32 | def get_param_groups(model, nowd_keys=()):
33 |     para_groups, para_groups_dbg = {}, {}
34 |     
35 |     for name, para in model.named_parameters():
36 |         if not para.requires_grad:
37 |             continue  # frozen weights
38 |         if len(para.shape) == 1 or name.endswith('.bias') or any(k in name for k in nowd_keys):
39 |             wd_scale, group_name = 0., 'no_decay'
40 |         else:
41 |             wd_scale, group_name = 1., 'decay'
42 |         
43 |         if group_name not in para_groups:
44 |             para_groups[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': 1.}
45 |             para_groups_dbg[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': 1.}
46 |         para_groups[group_name]['params'].append(para)
47 |         para_groups_dbg[group_name]['params'].append(name)
48 |     
49 |     for g in para_groups_dbg.values():
50 |         g['params'] = pformat(', '.join(g['params']), width=200)
51 |     
52 |     print(f'[get_ft_param_groups] param groups = \n{pformat(para_groups_dbg, indent=2, width=250)}\n')
53 |     return list(para_groups.values())
54 | 


--------------------------------------------------------------------------------
/utils/lion.py:
--------------------------------------------------------------------------------
 1 | """PyTorch implementation of the Lion optimizer."""
 2 | import torch
 3 | from torch.optim.optimizer import Optimizer
 4 | 
 5 | 
 6 | class Lion(Optimizer):
 7 |     r"""Implements Lion algorithm."""
 8 | 
 9 |     def __init__(self, params, lr=1e-4, betas=(0.9, 0.99), weight_decay=0.0):
10 |         """Initialize the hyperparameters.
11 |         Args:
12 |           params (iterable): iterable of parameters to optimize or dicts defining
13 |             parameter groups
14 |           lr (float, optional): learning rate (default: 1e-4)
15 |           betas (Tuple[float, float], optional): coefficients used for computing
16 |             running averages of gradient and its square (default: (0.9, 0.99))
17 |           weight_decay (float, optional): weight decay coefficient (default: 0)
18 |         """
19 | 
20 |         if not 0.0 <= lr:
21 |             raise ValueError('Invalid learning rate: {}'.format(lr))
22 |         if not 0.0 <= betas[0] < 1.0:
23 |             raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0]))
24 |         if not 0.0 <= betas[1] < 1.0:
25 |             raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1]))
26 |         defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay)
27 |         super().__init__(params, defaults)
28 | 
29 |     @torch.no_grad()
30 |     def step(self, closure=None):
31 |         """Performs a single optimization step.
32 |         Args:
33 |           closure (callable, optional): A closure that reevaluates the model
34 |             and returns the loss.
35 |         Returns:
36 |           the loss.
37 |         """
38 |         loss = None
39 |         if closure is not None:
40 |             with torch.enable_grad():
41 |                 loss = closure()
42 | 
43 |         for group in self.param_groups:
44 |             for p in group['params']:
45 |                 if p.grad is None:
46 |                     continue
47 | 
48 |                 # Perform stepweight decay
49 |                 p.data.mul_(1 - group['lr'] * group['weight_decay'])
50 | 
51 |                 grad = p.grad
52 |                 state = self.state[p]
53 |                 # State initialization
54 |                 if len(state) == 0:
55 |                     # Exponential moving average of gradient values
56 |                     state['exp_avg'] = torch.zeros_like(p)
57 | 
58 |                 exp_avg = state['exp_avg']
59 |                 beta1, beta2 = group['betas']
60 | 
61 |                 # Weight update
62 |                 update = exp_avg * beta1 + grad * (1 - beta1)
63 |                 p.add_(torch.sign(update), alpha=-group['lr'])
64 |                 # Decay the momentum running average coefficient
65 |                 exp_avg.mul_(beta2).add_(grad, alpha=1 - beta2)
66 | 
67 |         return loss


--------------------------------------------------------------------------------
/utils/callbacks.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | 
 4 | class Callbacks:
 5 |     """"
 6 |     Handles all registered callbacks for YOLOv5 Hooks
 7 |     """
 8 | 
 9 |     def __init__(self):
10 |         # Define the available callbacks
11 |         self._callbacks = {
12 |             'on_pretrain_routine_start': [],
13 |             'on_pretrain_routine_end': [],
14 |             'on_train_start': [],
15 |             'on_train_epoch_start': [],
16 |             'on_train_batch_start': [],
17 |             'optimizer_step': [],
18 |             'on_before_zero_grad': [],
19 |             'on_train_batch_end': [],
20 |             'on_train_epoch_end': [],
21 |             'on_val_start': [],
22 |             'on_val_batch_start': [],
23 |             'on_val_image_end': [],
24 |             'on_val_batch_end': [],
25 |             'on_val_end': [],
26 |             'on_fit_epoch_end': [],  # fit = train + val
27 |             'on_model_save': [],
28 |             'on_train_end': [],
29 |             'on_params_update': [],
30 |             'teardown': [],}
31 |         self.stop_training = False  # set True to interrupt training
32 | 
33 |     def register_action(self, hook, name='', callback=None):
34 |         """
35 |         Register a new action to a callback hook
36 | 
37 |         Args:
38 |             hook: The callback hook name to register the action to
39 |             name: The name of the action for later reference
40 |             callback: The callback to fire
41 |         """
42 |         assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
43 |         assert callable(callback), f"callback '{callback}' is not callable"
44 |         self._callbacks[hook].append({'name': name, 'callback': callback})
45 | 
46 |     def get_registered_actions(self, hook=None):
47 |         """"
48 |         Returns all the registered actions by callback hook
49 | 
50 |         Args:
51 |             hook: The name of the hook to check, defaults to all
52 |         """
53 |         return self._callbacks[hook] if hook else self._callbacks
54 | 
55 |     def run(self, hook, *args, thread=False, **kwargs):
56 |         """
57 |         Loop through the registered actions and fire all callbacks on main thread
58 | 
59 |         Args:
60 |             hook: The name of the hook to check, defaults to all
61 |             args: Arguments to receive from YOLOv5
62 |             thread: (boolean) Run callbacks in daemon thread
63 |             kwargs: Keyword Arguments to receive from YOLOv5
64 |         """
65 | 
66 |         assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
67 |         for logger in self._callbacks[hook]:
68 |             if thread:
69 |                 threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start()
70 |             else:
71 |                 logger['callback'](*args, **kwargs)
72 | 


--------------------------------------------------------------------------------
/spark/pretrain/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import random
 8 | 
 9 | import numpy as np
10 | import torch
11 | from torch.utils.data.sampler import Sampler
12 | 
13 | 
14 | def worker_init_fn(worker_id):
15 |     # https://pytorch.org/docs/stable/notes/randomness.html#dataloader
16 |     worker_seed = torch.initial_seed() % 2 ** 32
17 |     np.random.seed(worker_seed)
18 |     random.seed(worker_seed)
19 | 
20 | 
21 | class DistInfiniteBatchSampler(Sampler):
22 |     def __init__(self, world_size, rank, dataset_len, glb_batch_size, seed=1, filling=False, shuffle=True):
23 |         assert glb_batch_size % world_size == 0
24 |         self.world_size, self.rank = world_size, rank
25 |         self.dataset_len = dataset_len
26 |         self.glb_batch_size = glb_batch_size
27 |         self.batch_size = glb_batch_size // world_size
28 |         
29 |         self.iters_per_ep = (dataset_len + glb_batch_size - 1) // glb_batch_size
30 |         self.filling = filling
31 |         self.shuffle = shuffle
32 |         self.epoch = 0
33 |         self.seed = seed
34 |         self.indices = self.gener_indices()
35 |     
36 |     def gener_indices(self):
37 |         global_max_p = self.iters_per_ep * self.glb_batch_size  # global_max_p % world_size must be 0 cuz glb_batch_size % world_size == 0
38 |         if self.shuffle:
39 |             g = torch.Generator()
40 |             g.manual_seed(self.epoch + self.seed)
41 |             global_indices = torch.randperm(self.dataset_len, generator=g)
42 |         else:
43 |             global_indices = torch.arange(self.dataset_len)
44 |         filling = global_max_p - global_indices.shape[0]
45 |         if filling > 0 and self.filling:
46 |             global_indices = torch.cat((global_indices, global_indices[:filling]))
47 |         global_indices = tuple(global_indices.numpy().tolist())
48 |         
49 |         seps = torch.linspace(0, len(global_indices), self.world_size + 1, dtype=torch.int)
50 |         local_indices = global_indices[seps[self.rank]:seps[self.rank + 1]]
51 |         self.max_p = len(local_indices)
52 |         return local_indices
53 |     
54 |     def __iter__(self):
55 |         self.epoch = 0
56 |         while True:
57 |             self.epoch += 1
58 |             p, q = 0, 0
59 |             while p < self.max_p:
60 |                 q = p + self.batch_size
61 |                 yield self.indices[p:q]
62 |                 p = q
63 |             if self.shuffle:
64 |                 self.indices = self.gener_indices()
65 |     
66 |     def __len__(self):
67 |         return self.iters_per_ep
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     W = 16
72 |     for rk in range(W):
73 |         ind = DistInfiniteBatchSampler(W, rk, 5024, 5024).gener_indices()
74 |         print(rk, len(ind))
75 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/lr_decay.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | from pprint import pformat
 9 | 
10 | 
11 | def lr_wd_annealing(optimizer, peak_lr, wd, cur_it, wp_it, max_it):
12 |     wp_it = round(wp_it)
13 |     if cur_it < wp_it:
14 |         cur_lr = 0.005 * peak_lr + 0.995 * peak_lr * cur_it / wp_it
15 |     else:
16 |         ratio = (cur_it - wp_it) / (max_it - 1 - wp_it)
17 |         cur_lr = 0.001 * peak_lr + 0.999 * peak_lr * (0.5 + 0.5 * math.cos(math.pi * ratio))
18 |     
19 |     min_lr, max_lr = cur_lr, cur_lr
20 |     min_wd, max_wd = wd, wd
21 |     for param_group in optimizer.param_groups:
22 |         scaled_lr = param_group['lr'] = cur_lr * param_group.get('lr_scale', 1)  # 'lr_scale' could be assigned
23 |         min_lr, max_lr = min(min_lr, scaled_lr), max(max_lr, scaled_lr)
24 |         scaled_wd = param_group['weight_decay'] = wd * param_group.get('weight_decay_scale', 1)  # 'weight_decay_scale' could be assigned
25 |         min_wd, max_wd = min(min_wd, scaled_wd), max(max_wd, scaled_wd)
26 |     return min_lr, max_lr, min_wd, max_wd
27 | 
28 | 
29 | def get_param_groups(model, nowd_keys=(), lr_scale=0.0):
30 |     using_lr_scale = hasattr(model, 'get_layer_id_and_scale_exp') and 0.0 < lr_scale < 1.0
31 |     print(f'[get_ft_param_groups][lr decay] using_lr_scale={using_lr_scale}, ft_lr_scale={lr_scale}')
32 |     para_groups, para_groups_dbg = {}, {}
33 |     
34 |     for name, para in model.named_parameters():
35 |         if not para.requires_grad:
36 |             continue  # frozen weights
37 |         if len(para.shape) == 1 or name.endswith('.bias') or any(k in name for k in nowd_keys):
38 |             wd_scale, group_name = 0., 'no_decay'
39 |         else:
40 |             wd_scale, group_name = 1., 'decay'
41 |         
42 |         if using_lr_scale:
43 |             layer_id, scale_exp = model.get_layer_id_and_scale_exp(name)
44 |             group_name = f'layer{layer_id}_' + group_name
45 |             this_lr_scale = lr_scale ** scale_exp
46 |             dbg = f'[layer {layer_id}][sc = {lr_scale} ** {scale_exp}]'
47 |         else:
48 |             this_lr_scale = 1
49 |             dbg = f'[no scale]'
50 |         
51 |         if group_name not in para_groups:
52 |             para_groups[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': this_lr_scale}
53 |             para_groups_dbg[group_name] = {'params': [], 'weight_decay_scale': wd_scale, 'lr_scale': dbg}
54 |         para_groups[group_name]['params'].append(para)
55 |         para_groups_dbg[group_name]['params'].append(name)
56 |     
57 |     for g in para_groups_dbg.values():
58 |         g['params'] = pformat(', '.join(g['params']), width=200)
59 |     
60 |     print(f'[get_ft_param_groups] param groups = \n{pformat(para_groups_dbg, indent=2, width=250)}\n')
61 |     return list(para_groups.values())
62 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/README.md:
--------------------------------------------------------------------------------
 1 | ## About code isolation
 2 | 
 3 | This `downstream_imagenet` is isolated from pre-training codes. One can treat this `downstream_imagenet` as an independent codebase 🛠️.
 4 | 
 5 | 
 6 | ## Preparation for ImageNet-1k fine-tuning
 7 | 
 8 | See [INSTALL.md](https://github.com/keyu-tian/SparK/blob/main/INSTALL.md) to prepare `pip` dependencies and the ImageNet dataset.
 9 | 
10 | **Note: for network definitions, we directly use `timm.models.ResNet` and [official ConvNeXt](https://github.com/facebookresearch/ConvNeXt/blob/048efcea897d999aed302f2639b6270aedf8d4c8/models/convnext.py).**
11 | 
12 | 
13 | ## Fine-tuning on ImageNet-1k from pre-trained weights
14 | 
15 | Run [/downstream_imagenet/main.py](/downstream_imagenet/main.py) via `torchrun`.
16 | **It is required to specify** the ImageNet data folder (`--data_path`), your experiment name & log dir (`--exp_name` and `--exp_dir`, automatically created if not exists), the model name (`--model`, valid choices see the keys of 'HP_DEFAULT_VALUES' in [/downstream_imagenet/arg.py line14](/downstream_imagenet/arg.py#L14)), and the pretrained weight file `--resume_from` to run fine-tuning.
17 | 
18 | All the other configurations have their default values, listed in [/downstream_imagenet/arg.py#L13](/downstream_imagenet/arg.py#L13).
19 | You can overwrite any defaults by `--bs=1024` or something like that.
20 | 
21 | 
22 | Here is an example to pretrain a ConvNeXt-Small on an 8-GPU single machine:
23 | ```shell script
24 | $ cd /path/to/SparK/downstream_imagenet
25 | $ torchrun --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr=localhost --master_port=<some_port> main.py \
26 |   --data_path=/path/to/imagenet --exp_name=<your_exp_name> --exp_dir=/path/to/logdir \
27 |   --model=convnext_small --resume_from=/some/path/to/convnextS_1kpretrained_official_style.pth
28 | ```
29 | 
30 | For multiple machines, change the `--nnodes` and `--master_addr` to your configurations. E.g.:
31 | ```shell script
32 | $ torchrun --nproc_per_node=8 --nnodes=<your_nnodes> --node_rank=<rank_starts_from_0> --master_address=<some_address> --master_port=<some_port> main.py \
33 |   ...
34 | ```
35 | 
36 | 
37 | ## Logging
38 | 
39 | See files under `--exp_dir` to track your experiment:
40 | 
41 | - `<model>_1kfinetuned_last.pth`: the latest model weights
42 | - `<model>_1kfinetuned_best.pth`: model weights with the highest acc
43 | - `<model>_1kfinetuned_best_ema.pth`: EMA weights with the highest acc
44 | - `finetune_log.txt`: records some important information such as:
45 |     - `git_commit_id`: git version
46 |     - `cmd`: all arguments passed to the script
47 |     
48 |     It also reports training loss/acc, best evaluation acc, and remaining time at each epoch.
49 | 
50 | - `tensorboard_log/`: saves a lot of tensorboard logs, you can visualize accuracies, loss values, learning rates, gradient norms and more things via `tensorboard --logdir /path/to/this/tensorboard_log/ --port 23333`.
51 | 
52 | ## Resuming
53 | 
54 | Use `--resume_from` again, like `--resume_from=path/to/<model>_1kfinetuned_last.pth`.
55 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/resnet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | from typing import List
 7 | 
 8 | import torch
 9 | import torch.nn.functional as F
10 | from timm.models.resnet import ResNet
11 | 
12 | 
13 | # hack: inject the `get_downsample_ratio` function into `timm.models.resnet.ResNet`
14 | def get_downsample_ratio(self: ResNet) -> int:
15 |     return 32
16 | 
17 | 
18 | # hack: inject the `get_feature_map_channels` function into `timm.models.resnet.ResNet`
19 | def get_feature_map_channels(self: ResNet) -> List[int]:
20 |     # `self.feature_info` is maintained by `timm`
21 | 
22 |     return [info['num_chs'] for info in self.feature_info[1:]]
23 | 
24 | 
25 | # hack: override the forward function of `timm.models.resnet.ResNet`
26 | def forward(self, x, hierarchical=False):
27 |     """ this forward function is a modified version of `timm.models.resnet.ResNet.forward`
28 |    # >>> ResNet.forward
29 |     """
30 |     x = self.conv1(x)
31 |     x = self.bn1(x)
32 |     x = self.act1(x)
33 |     x = self.maxpool(x)
34 |     
35 |     if hierarchical:
36 |         ls = []
37 |         x = self.layer1(x); ls.append(x)
38 |         x = self.layer2(x); ls.append(x)
39 |         x = self.layer3(x); ls.append(x)
40 |         x = self.layer4(x); ls.append(x)
41 |         return ls
42 |     else:
43 |         x = self.global_pool(x)
44 |         if self.drop_rate:
45 |             x = F.dropout(x, p=float(self.drop_rate), training=self.training)
46 |         x = self.fc(x)
47 |         return x
48 | 
49 | 
50 | ResNet.get_downsample_ratio = get_downsample_ratio
51 | ResNet.get_feature_map_channels = get_feature_map_channels
52 | ResNet.forward = forward
53 | 
54 | 
55 | @torch.no_grad()
56 | def convnet_1():
57 |     from timm.models import create_model
58 |     cnn = create_model('resnet50')
59 |     print('get_downsample_ratio:', cnn.get_downsample_ratio())
60 |     print('get_feature_map_channels:', cnn.get_feature_map_channels())
61 |     
62 |     downsample_ratio = cnn.get_downsample_ratio()
63 |     feature_map_channels = cnn.get_feature_map_channels()
64 | 
65 |     # check the forward function
66 |     B, C, H, W = 4, 3, 224, 224
67 |     inp = torch.rand(B, C, H, W)
68 |     feats = cnn(inp, hierarchical=True)
69 |     assert isinstance(feats, list)
70 |     assert len(feats) == len(feature_map_channels)
71 |     print([tuple(t.shape) for t in feats])
72 | 
73 |     # check the downsample ratio
74 |     feats = cnn(inp, hierarchical=True)
75 |     assert feats[-1].shape[-2] == H // downsample_ratio
76 |     assert feats[-1].shape[-1] == W // downsample_ratio
77 | 
78 |     # check the channel number
79 |     for feat, ch in zip(feats, feature_map_channels):
80 |         assert feat.ndim == 4
81 |         assert feat.shape[1] == ch
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     convnet_1()
86 | 
87 | #
88 | # get_downsample_ratio: 32
89 | # get_feature_map_channels: [256, 512, 1024, 2048]
90 | # [(4, 256, 56, 56), (4, 512, 28, 28), (4, 1024, 14, 14), (4, 2048, 7, 7)]


--------------------------------------------------------------------------------
/utils/autobatch.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from utils.general import LOGGER, colorstr
 7 | from utils.torch_utils import profile
 8 | 
 9 | 
10 | def check_train_batch_size(model, imgsz=640, amp=True):
11 |     # Check YOLOv5 training batch size
12 |     with torch.cuda.amp.autocast(amp):
13 |         return autobatch(deepcopy(model).train(), imgsz)  # compute optimal batch size
14 | 
15 | 
16 | def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
17 |     # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
18 |     # Usage:
19 |     #     import torch
20 |     #     from utils.autobatch import autobatch
21 |     #     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
22 |     #     print(autobatch(model))
23 | 
24 |     # Check device
25 |     prefix = colorstr('AutoBatch: ')
26 |     LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
27 |     device = next(model.parameters()).device  # get model device
28 |     if device.type == 'cpu':
29 |         LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
30 |         return batch_size
31 |     if torch.backends.cudnn.benchmark:
32 |         LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}')
33 |         return batch_size
34 | 
35 |     # Inspect CUDA memory
36 |     gb = 1 << 30  # bytes to GiB (1024 ** 3)
37 |     d = str(device).upper()  # 'CUDA:0'
38 |     properties = torch.cuda.get_device_properties(device)  # device properties
39 |     t = properties.total_memory / gb  # GiB total
40 |     r = torch.cuda.memory_reserved(device) / gb  # GiB reserved
41 |     a = torch.cuda.memory_allocated(device) / gb  # GiB allocated
42 |     f = t - (r + a)  # GiB free
43 |     LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
44 | 
45 |     # Profile batch sizes
46 |     batch_sizes = [1, 2, 4, 8, 16]
47 |     try:
48 |         img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
49 |         results = profile(img, model, n=3, device=device)
50 |     except Exception as e:
51 |         LOGGER.warning(f'{prefix}{e}')
52 | 
53 |     # Fit a solution
54 |     y = [x[2] for x in results if x]  # memory [2]
55 |     p = np.polyfit(batch_sizes[:len(y)], y, deg=1)  # first degree polynomial fit
56 |     b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
57 |     if None in results:  # some sizes failed
58 |         i = results.index(None)  # first fail index
59 |         if b >= batch_sizes[i]:  # y intercept above failure point
60 |             b = batch_sizes[max(i - 1, 0)]  # select prior safe point
61 |     if b < 1 or b > 1024:  # b outside of safe range
62 |         b = batch_size
63 |         LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.')
64 | 
65 |     fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
66 |     LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅')
67 |     return b
68 | 


--------------------------------------------------------------------------------
/spark/pretrain/decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | from typing import List
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | from timm.models.layers import trunc_normal_
13 | 
14 | from utils.misc import is_pow2n
15 | 
16 | 
17 | class UNetBlock(nn.Module):
18 |     def __init__(self, cin, cout, bn2d):
19 |         """
20 |         a UNet block with 2x up sampling
21 |         """
22 |         super().__init__()
23 |         self.up_sample = nn.ConvTranspose2d(cin, cin, kernel_size=4, stride=2, padding=1, bias=True)
24 |         self.conv = nn.Sequential(
25 |             nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=False), bn2d(cin), nn.ReLU6(inplace=True),
26 |             nn.Conv2d(cin, cout, kernel_size=3, stride=1, padding=1, bias=False), bn2d(cout),
27 |         )
28 |     
29 |     def forward(self, x):
30 |         x = self.up_sample(x)
31 |         return self.conv(x)
32 | 
33 | 
34 | class LightDecoder(nn.Module):
35 |     def __init__(self, up_sample_ratio, width=768, sbn=True):   # todo: the decoder's width follows a simple halfing rule; you can change it to any other rule
36 |         super().__init__()
37 |         self.width = width
38 |         assert is_pow2n(up_sample_ratio)
39 |         n = round(math.log2(up_sample_ratio))
40 |         channels = [self.width // 2 ** i for i in range(n + 1)] # todo: the decoder's width follows a simple halfing rule; you can change it to any other rule
41 |         bn2d = nn.SyncBatchNorm if sbn else nn.BatchNorm2d
42 |         self.dec = nn.ModuleList([UNetBlock(cin, cout, bn2d) for (cin, cout) in zip(channels[:-1], channels[1:])])
43 |         self.proj = nn.Conv2d(channels[-1], 3, kernel_size=1, stride=1, bias=True)
44 |         
45 |         self.initialize()
46 |     
47 |     def forward(self, to_dec: List[torch.Tensor]):
48 |         x = 0
49 |         for i, d in enumerate(self.dec):
50 |             if i < len(to_dec) and to_dec[i] is not None:
51 |                 x = x + to_dec[i]
52 |             x = self.dec[i](x)
53 |         return self.proj(x)
54 |     
55 |     def extra_repr(self) -> str:
56 |         return f'width={self.width}'
57 |     
58 |     def initialize(self):
59 |         for m in self.modules():
60 |             if isinstance(m, nn.Linear):
61 |                 trunc_normal_(m.weight, std=.02)
62 |                 if m.bias is not None:
63 |                     nn.init.constant_(m.bias, 0)
64 |             elif isinstance(m, nn.Conv2d):
65 |                 trunc_normal_(m.weight, std=.02)
66 |                 if m.bias is not None:
67 |                     nn.init.constant_(m.bias, 0)
68 |             elif isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
69 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
70 |                 if m.bias is not None:
71 |                     nn.init.constant_(m.bias, 0.)
72 |             elif isinstance(m, (nn.LayerNorm, nn.BatchNorm1d, nn.BatchNorm2d, nn.SyncBatchNorm)):
73 |                 nn.init.constant_(m.bias, 0)
74 |                 nn.init.constant_(m.weight, 1.0)
75 | 


--------------------------------------------------------------------------------
/utils/loggers/wandb/sweep.yaml:
--------------------------------------------------------------------------------
  1 | # Hyperparameters for training
  2 | # To set range-
  3 | # Provide min and max values as:
  4 | #      parameter:
  5 | #
  6 | #         min: scalar
  7 | #         max: scalar
  8 | # OR
  9 | #
 10 | # Set a specific list of search space-
 11 | #     parameter:
 12 | #         values: [scalar1, scalar2, scalar3...]
 13 | #
 14 | # You can use grid, bayesian and hyperopt search strategy
 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
 16 | 
 17 | program: utils/loggers/wandb/sweep.py
 18 | method: random
 19 | metric:
 20 |   name: metrics/mAP_0.5
 21 |   goal: maximize
 22 | 
 23 | parameters:
 24 |   # hyperparameters: set either min, max range or values list
 25 |   data:
 26 |     value: "data/coco128.yaml"
 27 |   batch_size:
 28 |     values: [64]
 29 |   epochs:
 30 |     values: [10]
 31 | 
 32 |   lr0:
 33 |     distribution: uniform
 34 |     min: 1e-5
 35 |     max: 1e-1
 36 |   lrf:
 37 |     distribution: uniform
 38 |     min: 0.01
 39 |     max: 1.0
 40 |   momentum:
 41 |     distribution: uniform
 42 |     min: 0.6
 43 |     max: 0.98
 44 |   weight_decay:
 45 |     distribution: uniform
 46 |     min: 0.0
 47 |     max: 0.001
 48 |   warmup_epochs:
 49 |     distribution: uniform
 50 |     min: 0.0
 51 |     max: 5.0
 52 |   warmup_momentum:
 53 |     distribution: uniform
 54 |     min: 0.0
 55 |     max: 0.95
 56 |   warmup_bias_lr:
 57 |     distribution: uniform
 58 |     min: 0.0
 59 |     max: 0.2
 60 |   box:
 61 |     distribution: uniform
 62 |     min: 0.02
 63 |     max: 0.2
 64 |   cls:
 65 |     distribution: uniform
 66 |     min: 0.2
 67 |     max: 4.0
 68 |   cls_pw:
 69 |     distribution: uniform
 70 |     min: 0.5
 71 |     max: 2.0
 72 |   obj:
 73 |     distribution: uniform
 74 |     min: 0.2
 75 |     max: 4.0
 76 |   obj_pw:
 77 |     distribution: uniform
 78 |     min: 0.5
 79 |     max: 2.0
 80 |   iou_t:
 81 |     distribution: uniform
 82 |     min: 0.1
 83 |     max: 0.7
 84 |   anchor_t:
 85 |     distribution: uniform
 86 |     min: 2.0
 87 |     max: 8.0
 88 |   fl_gamma:
 89 |     distribution: uniform
 90 |     min: 0.0
 91 |     max: 4.0
 92 |   hsv_h:
 93 |     distribution: uniform
 94 |     min: 0.0
 95 |     max: 0.1
 96 |   hsv_s:
 97 |     distribution: uniform
 98 |     min: 0.0
 99 |     max: 0.9
100 |   hsv_v:
101 |     distribution: uniform
102 |     min: 0.0
103 |     max: 0.9
104 |   degrees:
105 |     distribution: uniform
106 |     min: 0.0
107 |     max: 45.0
108 |   translate:
109 |     distribution: uniform
110 |     min: 0.0
111 |     max: 0.9
112 |   scale:
113 |     distribution: uniform
114 |     min: 0.0
115 |     max: 0.9
116 |   shear:
117 |     distribution: uniform
118 |     min: 0.0
119 |     max: 10.0
120 |   perspective:
121 |     distribution: uniform
122 |     min: 0.0
123 |     max: 0.001
124 |   flipud:
125 |     distribution: uniform
126 |     min: 0.0
127 |     max: 1.0
128 |   fliplr:
129 |     distribution: uniform
130 |     min: 0.0
131 |     max: 1.0
132 |   mosaic:
133 |     distribution: uniform
134 |     min: 0.0
135 |     max: 1.0
136 |   mixup:
137 |     distribution: uniform
138 |     min: 0.0
139 |     max: 1.0
140 |   copy_paste:
141 |     distribution: uniform
142 |     min: 0.0
143 |     max: 1.0
144 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print('apex is not installed')
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {
46 |         'meta': meta,
47 |         'state_dict': weights_to_cpu(get_state_dict(model))
48 |     }
49 |     # save optimizer state dict in the checkpoint
50 |     if isinstance(optimizer, Optimizer):
51 |         checkpoint['optimizer'] = optimizer.state_dict()
52 |     elif isinstance(optimizer, dict):
53 |         checkpoint['optimizer'] = {}
54 |         for name, optim in optimizer.items():
55 |             checkpoint['optimizer'][name] = optim.state_dict()
56 | 
57 |     # save amp state dict in the checkpoint
58 |     # checkpoint['amp'] = apex.amp.state_dict()
59 | 
60 |     if filename.startswith('pavi://'):
61 |         try:
62 |             from pavi import modelcloud
63 |             from pavi.exception import NodeNotFoundError
64 |         except ImportError:
65 |             raise ImportError(
66 |                 'Please install pavi to load checkpoint from modelcloud.')
67 |         model_path = filename[7:]
68 |         root = modelcloud.Folder()
69 |         model_dir, model_name = osp.split(model_path)
70 |         try:
71 |             model = modelcloud.get(model_dir)
72 |         except NodeNotFoundError:
73 |             model = root.create_training_model(model_dir)
74 |         with TemporaryDirectory() as tmp_dir:
75 |             checkpoint_file = osp.join(tmp_dir, model_name)
76 |             with open(checkpoint_file, 'wb') as f:
77 |                 torch.save(checkpoint, f)
78 |                 f.flush()
79 |             model.create_file(checkpoint_file, name=model_name)
80 |     else:
81 |         mmcv.mkdir_or_exist(osp.dirname(filename))
82 |         # immediately flush buffer
83 |         with open(filename, 'wb') as f:
84 |             torch.save(checkpoint, f)
85 |             f.flush()
86 | 


--------------------------------------------------------------------------------
/spark/pretrain/utils/imagenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import os
 8 | from typing import Any, Callable, Optional, Tuple
 9 | 
10 | import PIL.Image as PImage
11 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
12 | from torchvision.datasets.folder import DatasetFolder, IMG_EXTENSIONS
13 | from torchvision.transforms import transforms
14 | from torch.utils.data import Dataset
15 | 
16 | try:
17 |     from torchvision.transforms import InterpolationMode
18 |     interpolation = InterpolationMode.BICUBIC
19 | except:
20 |     import PIL
21 |     interpolation = PIL.Image.BICUBIC
22 | 
23 | 
24 | def pil_loader(path):
25 |     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
26 |     with open(path, 'rb') as f: img: PImage.Image = PImage.open(f).convert('RGB')
27 |     return img
28 | 
29 | 
30 | class ImageNetDataset(DatasetFolder):
31 |     def __init__(
32 |             self,
33 |             imagenet_folder: str,
34 |             train: bool,
35 |             transform: Callable,
36 |             is_valid_file: Optional[Callable[[str], bool]] = None,
37 |     ):
38 |         imagenet_folder = os.path.join(imagenet_folder, 'train' if train else 'val')
39 |         super(ImageNetDataset, self).__init__(
40 |             imagenet_folder,
41 |             loader=pil_loader,
42 |             extensions=IMG_EXTENSIONS if is_valid_file is None else None,
43 |             transform=transform,
44 |             target_transform=None, is_valid_file=is_valid_file
45 |         )
46 |         
47 |         self.samples = tuple(img for (img, label) in self.samples)
48 |         self.targets = None # this is self-supervised learning so we don't need labels
49 |     
50 |     def __getitem__(self, index: int) -> Any:
51 |         img_file_path = self.samples[index]
52 |         return self.transform(self.loader(img_file_path))
53 | 
54 | 
55 | def build_dataset_to_pretrain(dataset_path, input_size) -> Dataset:
56 |     """
57 |     You may need to modify this function to return your own dataset.
58 |     Define a new class, a subclass of `Dataset`, to replace our ImageNetDataset.
59 |     Use dataset_path to build your image file path list.
60 |     Use input_size to create the transformation function for your images, can refer to the `trans_train` blow. 
61 |     
62 |     :param dataset_path: the folder of dataset
63 |     :param input_size: the input size (image resolution)
64 |     :return: the dataset used for pretraining
65 |     """
66 |     trans_train = transforms.Compose([
67 |         transforms.RandomResizedCrop(input_size, scale=(0.67, 1.0), interpolation=interpolation),
68 |         transforms.RandomHorizontalFlip(),
69 |         transforms.ToTensor(),
70 |         transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
71 |     ])
72 |     
73 |     dataset_path = os.path.abspath(dataset_path)
74 |     for postfix in ('train', 'val'):
75 |         if dataset_path.endswith(postfix):
76 |             dataset_path = dataset_path[:-len(postfix)]
77 |     
78 |     dataset_train = ImageNetDataset(imagenet_folder=dataset_path, transform=trans_train, train=True)
79 |     print_transform(trans_train, '[pre-train]')
80 |     return dataset_train
81 | 
82 | 
83 | def print_transform(transform, s):
84 |     print(f'Transform {s} = ')
85 |     for t in transform.transforms:
86 |         print(t)
87 |     print('---------------------------\n')
88 | 


--------------------------------------------------------------------------------
/spark/pretrain/dist.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | from typing import List
  9 | from typing import Union
 10 | 
 11 | import sys
 12 | import torch
 13 | import torch.distributed as tdist
 14 | import torch.multiprocessing as mp
 15 | 
 16 | __rank, __local_rank, __world_size, __device = 0, 0, 1, 'cpu'
 17 | __initialized = False
 18 | 
 19 | 
 20 | def initialized():
 21 |     return __initialized
 22 | 
 23 | 
 24 | def initialize(backend='nccl'):
 25 |     global __device
 26 |     if not torch.cuda.is_available():
 27 |         print(f'[dist initialize] cuda is not available, use cpu instead', file=sys.stderr)
 28 |         return
 29 |     elif 'RANK' not in os.environ:
 30 |         __device = torch.empty(1).cuda().device
 31 |         print(f'[dist initialize] RANK is not set, use 1 GPU instead', file=sys.stderr)
 32 |         return
 33 |     
 34 |     # ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py#L29
 35 |     if mp.get_start_method(allow_none=True) is None:
 36 |         mp.set_start_method('spawn')
 37 |     global_rank, num_gpus = int(os.environ['RANK']), torch.cuda.device_count()
 38 |     local_rank = global_rank % num_gpus
 39 |     torch.cuda.set_device(local_rank)
 40 |     tdist.init_process_group(backend=backend)
 41 |     
 42 |     global __rank, __local_rank, __world_size, __initialized
 43 |     __local_rank = local_rank
 44 |     __rank, __world_size = tdist.get_rank(), tdist.get_world_size()
 45 |     __device = torch.empty(1).cuda().device
 46 |     __initialized = True
 47 |     
 48 |     assert tdist.is_initialized(), 'torch.distributed is not initialized!'
 49 | 
 50 | 
 51 | def get_rank():
 52 |     return __rank
 53 | 
 54 | 
 55 | def get_local_rank():
 56 |     return __local_rank
 57 | 
 58 | 
 59 | def get_world_size():
 60 |     return __world_size
 61 | 
 62 | 
 63 | def get_device():
 64 |     return __device
 65 | 
 66 | 
 67 | def is_master():
 68 |     return __rank == 0
 69 | 
 70 | 
 71 | def is_local_master():
 72 |     return __local_rank == 0
 73 | 
 74 | 
 75 | def barrier():
 76 |     if __initialized:
 77 |         tdist.barrier()
 78 | 
 79 | 
 80 | def parallelize(net, syncbn=False):
 81 |     if syncbn:
 82 |         net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
 83 |     net = net.cuda()
 84 |     net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[get_local_rank()], find_unused_parameters=False, broadcast_buffers=False)
 85 |     return net
 86 | 
 87 | 
 88 | def allreduce(t: torch.Tensor) -> None:
 89 |     if __initialized:
 90 |         if not t.is_cuda:
 91 |             cu = t.detach().cuda()
 92 |             tdist.all_reduce(cu)
 93 |             t.copy_(cu.cpu())
 94 |         else:
 95 |             tdist.all_reduce(t)
 96 | 
 97 | 
 98 | def allgather(t: torch.Tensor, cat=True) -> Union[List[torch.Tensor], torch.Tensor]:
 99 |     if __initialized:
100 |         if not t.is_cuda:
101 |             t = t.cuda()
102 |         ls = [torch.empty_like(t) for _ in range(__world_size)]
103 |         tdist.all_gather(ls, t)
104 |     else:
105 |         ls = [t]
106 |     if cat:
107 |         ls = torch.cat(ls, dim=0)
108 |     return ls
109 | 
110 | 
111 | def broadcast(t: torch.Tensor, src_rank) -> None:
112 |     if __initialized:
113 |         if not t.is_cuda:
114 |             cu = t.detach().cuda()
115 |             tdist.broadcast(cu, src=src_rank)
116 |             t.copy_(cu.cpu())
117 |         else:
118 |             tdist.broadcast(t, src=src_rank)
119 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/custom_detr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from typing import List
 10 | from timm.models.registry import register_model
 11 | import torch
 12 | from torch import nn
 13 | import sys
 14 | from  HG.HGBlock import HGStem,HGBlock
 15 | from  HG.block import DWConv
 16 | 
 17 | 
 18 | class YourConvNet(nn.Module):
 19 |     def __init__(self, *args, **kwargs):
 20 |         super().__init__()
 21 | 
 22 |         self.mlist=nn.ModuleList(
 23 |             [HGStem(3, 32, 64),
 24 |             HGBlock(64, 64, 128, 3, n=6),
 25 | 
 26 |             DWConv(128, 128, 3, 2, 1, False),
 27 |             HGBlock(128, 128, 512, 3, n=6),
 28 |             HGBlock(512, 128, 512, 3, lightconv=False,shortcut=True,n=6),
 29 | 
 30 | 
 31 |             DWConv(512, 512, 3, 2, 1, False),
 32 |             HGBlock(512, 256, 1024, 5,lightconv=True,shortcut=False,n=6),
 33 |             HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6),
 34 |             HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6),
 35 |             HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6),
 36 |             HGBlock(1024, 256, 1024, 5, lightconv=True, shortcut=True, n=6),
 37 | 
 38 | 
 39 | 
 40 |             DWConv(1024, 1024, 3, 2, 1, False),
 41 |             HGBlock(1024, 512, 2048, 5, lightconv=True, shortcut=False, n=6),
 42 |              HGBlock(2048, 512, 2048, 5, lightconv=True, shortcut=True, n=6)
 43 |              ]
 44 |         )
 45 | 
 46 |     
 47 |     def get_downsample_ratio(self) -> int:
 48 |         return 32
 49 |     
 50 |     def get_feature_map_channels(self) -> List[int]:
 51 |         return [128,512,1024,2048]
 52 | 
 53 |     def forward(self, x: torch.Tensor, hierarchical=False):
 54 |         if hierarchical:
 55 |             ls = []
 56 |             for index,modules in enumerate( self.mlist):
 57 |                 x = modules(x)
 58 |                 if index in [1,4,10,13]:
 59 |                     ls.append(x)
 60 |             return ls
 61 |         else:
 62 |             for modules in self.mlist:
 63 |                 x = modules(x)
 64 |         return x
 65 | 
 66 | 
 67 | @register_model
 68 | def HGNetv2(pretrained=False, **kwargs):
 69 |     return YourConvNet(**kwargs)
 70 | 
 71 | 
 72 | @torch.no_grad()
 73 | def convnet_test():
 74 |     from timm.models import create_model
 75 |     cnn = create_model('HGNetv2')
 76 |     print('get_downsample_ratio:', cnn.get_downsample_ratio())
 77 |     print('get_feature_map_channels:', cnn.get_feature_map_channels())
 78 |     
 79 |     downsample_ratio = cnn.get_downsample_ratio()
 80 |     feature_map_channels = cnn.get_feature_map_channels()
 81 |     
 82 |     # check the forward function
 83 |     B, C, H, W = 4, 3, 224, 224
 84 |     inp = torch.rand(B, C, H, W)
 85 |     feats = cnn(inp, hierarchical=True)
 86 |     assert isinstance(feats, list)
 87 |     assert len(feats) == len(feature_map_channels)
 88 |     print([tuple(t.shape) for t in feats])
 89 |     
 90 |     # check the downsample ratio
 91 |     feats = cnn(inp, hierarchical=True)
 92 |     assert feats[-1].shape[-2] == H // downsample_ratio
 93 |     assert feats[-1].shape[-1] == W // downsample_ratio
 94 |     
 95 |     # check the channel number
 96 |     for feat, ch in zip(feats, feature_map_channels):
 97 |         assert feat.ndim == 4
 98 |         assert feat.shape[1] == ch
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     convnet_test()
103 | 


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class SiLU(nn.Module):
 7 |     # SiLU activation https://arxiv.org/pdf/1606.08415.pdf
 8 |     @staticmethod
 9 |     def forward(x):
10 |         return x * torch.sigmoid(x)
11 | 
12 | 
13 | class Hardswish(nn.Module):
14 |     # Hard-SiLU activation
15 |     @staticmethod
16 |     def forward(x):
17 |         # return x * F.hardsigmoid(x)  # for TorchScript and CoreML
18 |         return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0  # for TorchScript, CoreML and ONNX
19 | 
20 | 
21 | class Mish(nn.Module):
22 |     # Mish activation https://github.com/digantamisra98/Mish
23 |     @staticmethod
24 |     def forward(x):
25 |         return x * F.softplus(x).tanh()
26 | 
27 | 
28 | class MemoryEfficientMish(nn.Module):
29 |     # Mish activation memory-efficient
30 |     class F(torch.autograd.Function):
31 | 
32 |         @staticmethod
33 |         def forward(ctx, x):
34 |             ctx.save_for_backward(x)
35 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
36 | 
37 |         @staticmethod
38 |         def backward(ctx, grad_output):
39 |             x = ctx.saved_tensors[0]
40 |             sx = torch.sigmoid(x)
41 |             fx = F.softplus(x).tanh()
42 |             return grad_output * (fx + x * sx * (1 - fx * fx))
43 | 
44 |     def forward(self, x):
45 |         return self.F.apply(x)
46 | 
47 | 
48 | class FReLU(nn.Module):
49 |     # FReLU activation https://arxiv.org/abs/2007.11824
50 |     def __init__(self, c1, k=3):  # ch_in, kernel
51 |         super().__init__()
52 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
53 |         self.bn = nn.BatchNorm2d(c1)
54 | 
55 |     def forward(self, x):
56 |         return torch.max(x, self.bn(self.conv(x)))
57 | 
58 | 
59 | class AconC(nn.Module):
60 |     r""" ACON activation (activate or not)
61 |     AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
62 |     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
63 |     """
64 | 
65 |     def __init__(self, c1):
66 |         super().__init__()
67 |         self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
68 |         self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
69 |         self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
70 | 
71 |     def forward(self, x):
72 |         dpx = (self.p1 - self.p2) * x
73 |         return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
74 | 
75 | 
76 | class MetaAconC(nn.Module):
77 |     r""" ACON activation (activate or not)
78 |     MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
79 |     according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
80 |     """
81 | 
82 |     def __init__(self, c1, k=1, s=1, r=16):  # ch_in, kernel, stride, r
83 |         super().__init__()
84 |         c2 = max(r, c1 // r)
85 |         self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
86 |         self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
87 |         self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
88 |         self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
89 |         # self.bn1 = nn.BatchNorm2d(c2)
90 |         # self.bn2 = nn.BatchNorm2d(c1)
91 | 
92 |     def forward(self, x):
93 |         y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
94 |         # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
95 |         # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y)))))  # bug/unstable
96 |         beta = torch.sigmoid(self.fc2(self.fc1(y)))  # bug patch BN layers removed
97 |         dpx = (self.p1 - self.p2) * x
98 |         return dpx * torch.sigmoid(beta * dpx) + self.p2 * x
99 | 


--------------------------------------------------------------------------------
/utils/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | from pycocotools.coco import COCO
  4 | from pycocotools import mask as maskUtils
  5 | 
  6 | # coco id: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
  7 | all_instances_ids = [
  8 |     1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
  9 |     11, 13, 14, 15, 16, 17, 18, 19, 20,
 10 |     21, 22, 23, 24, 25, 27, 28,
 11 |     31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 12 |     41, 42, 43, 44, 46, 47, 48, 49, 50,
 13 |     51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 14 |     61, 62, 63, 64, 65, 67, 70,
 15 |     72, 73, 74, 75, 76, 77, 78, 79, 80,
 16 |     81, 82, 84, 85, 86, 87, 88, 89, 90,
 17 | ]
 18 | 
 19 | all_stuff_ids = [
 20 |     92, 93, 94, 95, 96, 97, 98, 99, 100,
 21 |     101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
 22 |     111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
 23 |     121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
 24 |     131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
 25 |     141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
 26 |     151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 27 |     161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
 28 |     171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
 29 |     181, 182,
 30 |     # other
 31 |     183,
 32 |     # unlabeled
 33 |     0,
 34 | ]
 35 | 
 36 | # panoptic id: https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
 37 | panoptic_stuff_ids = [
 38 |     92, 93, 95, 100,
 39 |     107, 109,
 40 |     112, 118, 119,
 41 |     122, 125, 128, 130,
 42 |     133, 138,
 43 |     141, 144, 145, 147, 148, 149,
 44 |     151, 154, 155, 156, 159,
 45 |     161, 166, 168,
 46 |     171, 175, 176, 177, 178, 180,
 47 |     181, 184, 185, 186, 187, 188, 189, 190,
 48 |     191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
 49 |     # unlabeled
 50 |     0,
 51 | ]
 52 | 
 53 | def getCocoIds(name = 'semantic'):
 54 |     if 'instances' == name:
 55 |         return all_instances_ids
 56 |     elif 'stuff' == name:
 57 |         return all_stuff_ids
 58 |     elif 'panoptic' == name:
 59 |         return all_instances_ids + panoptic_stuff_ids
 60 |     else: # semantic
 61 |         return all_instances_ids + all_stuff_ids
 62 | 
 63 | def getMappingId(index, name = 'semantic'):
 64 |     ids = getCocoIds(name = name)
 65 |     return ids[index]
 66 | 
 67 | def getMappingIndex(id, name = 'semantic'):
 68 |     ids = getCocoIds(name = name)
 69 |     return ids.index(id)
 70 | 
 71 | # convert ann to rle encoded string
 72 | def annToRLE(ann, img_size):
 73 |     h, w = img_size
 74 |     segm = ann['segmentation']
 75 |     if list == type(segm):
 76 |         # polygon -- a single object might consist of multiple parts
 77 |         # we merge all parts into one mask rle code
 78 |         rles = maskUtils.frPyObjects(segm, h, w)
 79 |         rle = maskUtils.merge(rles)
 80 |     elif list == type(segm['counts']):
 81 |         # uncompressed RLE
 82 |         rle = maskUtils.frPyObjects(segm, h, w)
 83 |     else:
 84 |         # rle
 85 |         rle = ann['segmentation']
 86 |     return rle
 87 | 
 88 | # decode ann to mask martix
 89 | def annToMask(ann, img_size):
 90 |     rle = annToRLE(ann, img_size)
 91 |     m = maskUtils.decode(rle)
 92 |     return m
 93 | 
 94 | # convert mask to polygans
 95 | def convert_to_polys(mask):
 96 |     # opencv 3.2
 97 |     contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 98 | 
 99 |     # before opencv 3.2
100 |     # contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
101 | 
102 |     segmentation = []
103 |     for contour in contours:
104 |         contour = contour.flatten().tolist()
105 |         if 4 < len(contour):
106 |             segmentation.append(contour)
107 | 
108 |     return segmentation
109 | 


--------------------------------------------------------------------------------
/utils/triton.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | from urllib.parse import urlparse
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class TritonRemoteModel:
 8 |     """ A wrapper over a model served by the Triton Inference Server. It can
 9 |     be configured to communicate over GRPC or HTTP. It accepts Torch Tensors
10 |     as input and returns them as outputs.
11 |     """
12 | 
13 |     def __init__(self, url: str):
14 |         """
15 |         Keyword arguments:
16 |         url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
17 |         """
18 | 
19 |         parsed_url = urlparse(url)
20 |         if parsed_url.scheme == "grpc":
21 |             from tritonclient.grpc import InferenceServerClient, InferInput
22 | 
23 |             self.client = InferenceServerClient(parsed_url.netloc)  # Triton GRPC client
24 |             model_repository = self.client.get_model_repository_index()
25 |             self.model_name = model_repository.models[0].name
26 |             self.metadata = self.client.get_model_metadata(self.model_name, as_json=True)
27 | 
28 |             def create_input_placeholders() -> typing.List[InferInput]:
29 |                 return [
30 |                     InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
31 | 
32 |         else:
33 |             from tritonclient.http import InferenceServerClient, InferInput
34 | 
35 |             self.client = InferenceServerClient(parsed_url.netloc)  # Triton HTTP client
36 |             model_repository = self.client.get_model_repository_index()
37 |             self.model_name = model_repository[0]['name']
38 |             self.metadata = self.client.get_model_metadata(self.model_name)
39 | 
40 |             def create_input_placeholders() -> typing.List[InferInput]:
41 |                 return [
42 |                     InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']]
43 | 
44 |         self._create_input_placeholders_fn = create_input_placeholders
45 | 
46 |     @property
47 |     def runtime(self):
48 |         """Returns the model runtime"""
49 |         return self.metadata.get("backend", self.metadata.get("platform"))
50 | 
51 |     def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]:
52 |         """ Invokes the model. Parameters can be provided via args or kwargs.
53 |         args, if provided, are assumed to match the order of inputs of the model.
54 |         kwargs are matched with the model input names.
55 |         """
56 |         inputs = self._create_inputs(*args, **kwargs)
57 |         response = self.client.infer(model_name=self.model_name, inputs=inputs)
58 |         result = []
59 |         for output in self.metadata['outputs']:
60 |             tensor = torch.as_tensor(response.as_numpy(output['name']))
61 |             result.append(tensor)
62 |         return result[0] if len(result) == 1 else result
63 | 
64 |     def _create_inputs(self, *args, **kwargs):
65 |         args_len, kwargs_len = len(args), len(kwargs)
66 |         if not args_len and not kwargs_len:
67 |             raise RuntimeError("No inputs provided.")
68 |         if args_len and kwargs_len:
69 |             raise RuntimeError("Cannot specify args and kwargs at the same time")
70 | 
71 |         placeholders = self._create_input_placeholders_fn()
72 |         if args_len:
73 |             if args_len != len(placeholders):
74 |                 raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.")
75 |             for input, value in zip(placeholders, args):
76 |                 input.set_data_from_numpy(value.cpu().numpy())
77 |         else:
78 |             for input in placeholders:
79 |                 value = kwargs[input.name]
80 |                 input.set_data_from_numpy(value.cpu().numpy())
81 |         return placeholders
82 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/custom_origin.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) ByteDance, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | from typing import List
10 | from timm.models.registry import register_model
11 | 
12 | 
13 | class YourConvNet(nn.Module):
14 |     """
15 |     This is a template for your custom ConvNet.
16 |     It is required to implement the following three functions: `get_downsample_ratio`, `get_feature_map_channels`, `forward`.
17 |     You can refer to the implementations in `pretrain\models\resnet.py` for an example.
18 |     """
19 |     
20 |     def get_downsample_ratio(self) -> int:
21 |         """
22 |         This func would ONLY be used in `SparseEncoder's __init__` (see `pretrain/encoder.py`).
23 |         
24 |         :return: the TOTAL downsample ratio of the ConvNet.
25 |         E.g., for a ResNet-50, this should return 32.
26 |         """
27 |         raise NotImplementedError
28 |     
29 |     def get_feature_map_channels(self) -> List[int]:
30 |         """
31 |         This func would ONLY be used in `SparseEncoder's __init__` (see `pretrain/encoder.py`).
32 |         
33 |         :return: a list of the number of channels of each feature map.
34 |         E.g., for a ResNet-50, this should return [256, 512, 1024, 2048].
35 |         """
36 |         raise NotImplementedError
37 |     
38 |     def forward(self, inp_bchw: torch.Tensor, hierarchical=False):
39 |         """
40 |         The forward with `hierarchical=True` would ONLY be used in `SparseEncoder.forward` (see `pretrain/encoder.py`).
41 |         
42 |         :param inp_bchw: input image tensor, shape: (batch_size, channels, height, width).
43 |         :param hierarchical: return the logits (not hierarchical), or the feature maps (hierarchical).
44 |         :return:
45 |             - hierarchical == False: return the logits of the classification task, shape: (batch_size, num_classes).
46 |             - hierarchical == True: return a list of all feature maps, which should have the same length as the return value of `get_feature_map_channels`.
47 |               E.g., for a ResNet-50, it should return a list [1st_feat_map, 2nd_feat_map, 3rd_feat_map, 4th_feat_map].
48 |                     for an input size of 224, the shapes are [(B, 256, 56, 56), (B, 512, 28, 28), (B, 1024, 14, 14), (B, 2048, 7, 7)]
49 |         """
50 |         raise NotImplementedError
51 | 
52 | 
53 | @register_model
54 | def your_convnet_small(pretrained=False, **kwargs):
55 |     raise NotImplementedError
56 |     return YourConvNet(**kwargs)
57 | 
58 | 
59 | @torch.no_grad()
60 | def convnet_test():
61 |     from timm.models import create_model
62 |     cnn = create_model('your_convnet_small')
63 |     print('get_downsample_ratio:', cnn.get_downsample_ratio())
64 |     print('get_feature_map_channels:', cnn.get_feature_map_channels())
65 |     
66 |     downsample_ratio = cnn.get_downsample_ratio()
67 |     feature_map_channels = cnn.get_feature_map_channels()
68 |     
69 |     # check the forward function
70 |     B, C, H, W = 4, 3, 224, 224
71 |     inp = torch.rand(B, C, H, W)
72 |     feats = cnn(inp, hierarchical=True)
73 |     assert isinstance(feats, list)
74 |     assert len(feats) == len(feature_map_channels)
75 |     print([tuple(t.shape) for t in feats])
76 |     
77 |     # check the downsample ratio
78 |     feats = cnn(inp, hierarchical=True)
79 |     assert feats[-1].shape[-2] == H // downsample_ratio
80 |     assert feats[-1].shape[-1] == W // downsample_ratio
81 |     
82 |     # check the channel number
83 |     for feat, ch in zip(feats, feature_map_channels):
84 |         assert feat.ndim == 4
85 |         assert feat.shape[1] == ch
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     convnet_test()
90 | 


--------------------------------------------------------------------------------
/models/detect/pk-yolo.yaml:
--------------------------------------------------------------------------------
  1 | # YOLOv9
  2 | 
  3 | # parameters
  4 | nc: 2  # number of classes
  5 | depth_multiple: 1.0  # model depth multiple
  6 | width_multiple: 1.0  # layer channel multiple
  7 | # activation: nn.LeakyReLU(0.1)
  8 | # activation: nn.ReLU()
  9 | 
 10 | # anchors
 11 | anchors: 3
 12 | 
 13 | # YOLOv9 backbone
 14 | backbone:
 15 |   [
 16 |    [-1, 1, Silence, []],
 17 | 
 18 |    [-1, 1, Backbone, []],
 19 |    # conv down
 20 |    [1, 1, Down0, [64]], #2 320   1
 21 |    [1, 1, Down1, [128]], # 3  160   3
 22 |     [1, 1, Down2, [256]],# 4  80    5
 23 |     [1, 1, Down3, [512]], #5    40  7
 24 |     [1, 1, Down4, [1024]], #6    20     9
 25 | 
 26 |     # routing
 27 |    [ 2, 1, CBLinear, [ [ 64 ] ] ], # 10
 28 |    [ 3, 1, CBLinear, [ [ 64, 128 ] ] ], # 11
 29 |    [ 4, 1, CBLinear, [ [ 64, 128, 256 ] ] ], # 12
 30 |    [ 5, 1, CBLinear, [ [ 64, 128, 256, 512 ] ] ], # 13
 31 |    [ 6, 1, CBLinear, [ [ 64, 128, 256, 512, 1024 ] ] ], # 14   -3
 32 | 
 33 |     # conv down fuse
 34 |    [ 0, 1, Conv, [ 64, 3, 2 ] ],  # 15-P1/2
 35 |    [ [ 7, 8, 9, 10, 11, -1 ], 1, CBFuse, [ [ 0, 0, 0, 0, 0 ] ] ], # 16
 36 | 
 37 |     # conv down fuse
 38 |    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 17-P2/4
 39 |    [ [ 8, 9, 10, 11, -1 ], 1, CBFuse, [ [ 1, 1, 1, 1 ] ] ], # 18
 40 | 
 41 |     # elan-1 block
 42 |    [ -1, 1, RepNCSPELAN4, [ 256, 128, 64, 2 ] ],  # 19
 43 | 
 44 |     # avg-conv down fuse
 45 |    [ -1, 1, ADown, [ 256 ] ],  # 20-P3/8
 46 |    [ [  9, 10, 11, -1 ], 1, CBFuse, [ [ 2, 2, 2 ] ] ], # 21
 47 | 
 48 |     # elan-2 block
 49 |    [ -1, 1, RepNCSPELAN4, [ 512, 256, 128, 2 ] ],  # 22
 50 | 
 51 |     # avg-conv down fuse
 52 |    [ -1, 1, ADown, [ 512 ] ],  # 23-P4/16
 53 |    [ [ 10, 11, -1 ], 1, CBFuse, [ [ 3, 3 ] ] ], # 24
 54 | 
 55 |     # elan-2 block
 56 |    [ -1, 1, RepNCSPELAN4, [ 1024, 512, 256, 2 ] ],  # 25
 57 | 
 58 |     # avg-conv down fuse
 59 |    [ -1, 1, ADown, [ 1024 ] ],  # 26-P5/32
 60 |    [ [ 11, -1 ], 1, CBFuse, [ [ 4 ] ] ], # 27
 61 | 
 62 |     # elan-2 block
 63 |    [ -1, 1, RepNCSPELAN4, [ 1024, 512, 256, 2 ] ],  # 28   25
 64 | 
 65 |   ]
 66 | 
 67 | # YOLOv9 head
 68 | head:
 69 |   [
 70 |    # multi-level auxiliary branch  
 71 |   
 72 |    # elan-spp block
 73 |    [6, 1, SPPELAN, [512, 256]],  # 29
 74 | 
 75 |    # up-concat merge
 76 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 77 |    [[-1, 5], 1, Concat, [1]],  # cat backbone P4
 78 | 
 79 |    # csp-elan block
 80 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 32
 81 | 
 82 |    # up-concat merge
 83 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 84 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
 85 | 
 86 |    # csp-elan block
 87 |    [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]],  # 35
 88 |    
 89 |    
 90 |    
 91 |    # main branch  
 92 |    
 93 |    # elan-spp block
 94 |    [25, 1, SPPELAN, [512, 256]],  # 36
 95 | 
 96 |    # up-concat merge
 97 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 98 |    [[-1, 22], 1, Concat, [1]],  # cat backbone P4
 99 | 
100 |    # csp-elan block
101 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 39
102 | 
103 |    # up-concat merge
104 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
105 |    [[-1, 19], 1, Concat, [1]],  # cat backbone P3
106 | 
107 |    # csp-elan block
108 |    [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]],  # 42 (P3/8-small)
109 | 
110 |    # avg-conv-down merge
111 |    [-1, 1, ADown, [256]],
112 |    [[-1, 36], 1, Concat, [1]],  # cat head P4
113 | 
114 |    # csp-elan block
115 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 45 (P4/16-medium)
116 | 
117 |    # avg-conv-down merge
118 |    [-1, 1, ADown, [512]],
119 |    [[-1, 33], 1, Concat, [1]],  # cat head P5
120 | 
121 |    # csp-elan block
122 |    [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]],  # 48 (P5/32-large)
123 | 
124 |    # detect
125 |    [[32, 29, 26, 39, 42, 45], 1, DualDDetect, [nc]],  # DualDDetect(A3, A4, A5, P3, P4, P5)
126 |   ]
127 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/README.md:
--------------------------------------------------------------------------------
 1 | ## About code isolation
 2 | 
 3 | This `downstream_mmdet` is isolated from pre-training codes. One can treat this `downstream_mmdet` as an independent codebase 🛠️.
 4 | 
 5 | ## Fine-tuned ConvNeXt-B weights, log files, and performance
 6 | 
 7 | 
 8 | <div align="center">
 9 | 
10 | [[`weights (pre-trained by SparK)`](https://drive.google.com/file/d/1ZjWbqI1qoBcqeQijI5xX9E-YNkxpJcYV/view?usp=share_link)]
11 |   [[`weights (fine-tuned on COCO)`](https://drive.google.com/file/d/1t10dmzg5KOO27o2yIglK-gQepB5gR4zR/view?usp=share_link)]
12 |   [[`log.json`](https://drive.google.com/file/d/1TuNboXl1qwjf1tggZ3QOssI67uU7Jtig/view?usp=share_link)]
13 |   [[`log`](https://drive.google.com/file/d/1JY5CkL_MX08zJ8P1FBIeC60OJsuIiyZc/view?usp=sharing)]
14 | </div>
15 | 
16 | 
17 | <p align="center">
18 | <img src="https://user-images.githubusercontent.com/39692511/211497396-cd031318-ef54-45a4-a283-cd9810c15603.png" width=80%>
19 | <p>
20 | 
21 | 
22 | ## Installation [MMDetection with commit 6a979e2](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/tree/6a979e2164e3fb0de0ca2546545013a4d71b2f7d) before fine-tuning ConvNeXt on COCO
23 | 
24 | We refer to the codebases of [ConvNeXt](https://github.com/facebookresearch/ConvNeXt/tree/048efcea897d999aed302f2639b6270aedf8d4c8) and [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/tree/6a979e2164e3fb0de0ca2546545013a4d71b2f7d).
25 | Please refer to [README.md](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/6a979e2164e3fb0de0ca2546545013a4d71b2f7d/README.md) for installation and dataset preparation instructions.
26 | 
27 | Note the COCO dataset folder should be at `downstream_mmdet/data/coco`.
28 |    The folder should follow the directory structure requried by `MMDetection`, which should look like this:
29 | ```
30 | downstream_mmdet/data/coco:
31 |     annotations/:
32 |         captions_train2017.json  captions_val2017.json
33 |         instances_train2017.json  instances_val2017.json
34 |         person_keypoints_train2017.json  person_keypoints_val2017.json
35 |     train2017/:
36 |         a_lot_images.jpg
37 |     val2017/:
38 |         a_lot_images.jpg
39 | ```
40 | 
41 | 
42 | ### Training
43 | 
44 | To train a detector with pre-trained models, run:
45 | ```
46 | # single-gpu training
47 | python tools/train.py <CONFIG_FILE> --cfg-options model.pretrained=<PRETRAIN_MODEL> [other optional arguments]
48 | 
49 | # multi-gpu training
50 | tools/dist_train.sh <CONFIG_FILE> <GPU_NUM> --cfg-options model.pretrained=<PRETRAIN_MODEL> [other optional arguments] 
51 | ```
52 | For example, to train a Mask R-CNN model with a SparK pretrained `ConvNeXt-B` backbone and 4 gpus, run:
53 | ```
54 | tools/dist_train.sh configs/convnext_spark/mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py 4 \
55 |   --cfg-options model.pretrained=/some/path/to/official_convnext_base_1kpretrained.pth
56 | ```
57 | 
58 | The Mask R-CNN 3x fine-tuning config file can be found at [`configs/convnext_spark`](configs/convnext_spark). This config is basically a copy of [https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py](https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py).
59 | 
60 | ### Inference
61 | ```
62 | # single-gpu testing
63 | python tools/test.py <CONFIG_FILE> <DET_CHECKPOINT_FILE> --eval bbox segm
64 | 
65 | # multi-gpu testing
66 | tools/dist_test.sh <CONFIG_FILE> <DET_CHECKPOINT_FILE> <GPU_NUM> --eval bbox segm
67 | ```
68 | 
69 | ## Acknowledgment 
70 | 
71 | We appreciate these useful codebases:
72 | 
73 | - [MMDetection](https://github.com/open-mmlab/mmdetection)
74 | - [ConvNeXt](https://github.com/facebookresearch/ConvNeXt)
75 | - [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection)
76 | 
77 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/models/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import math
  8 | 
  9 | import torch
 10 | from timm.data import Mixup
 11 | from timm.loss import BinaryCrossEntropy, SoftTargetCrossEntropy
 12 | from timm.models.layers import drop
 13 | from timm.models.resnet import ResNet
 14 | 
 15 | from .convnext_official import ConvNeXt
 16 | 
 17 | 
 18 | def convnext_get_layer_id_and_scale_exp(self: ConvNeXt, para_name: str):
 19 |     N = 12 if len(self.stages[-2]) > 9 else 6
 20 |     if para_name.startswith("downsample_layers"):
 21 |         stage_id = int(para_name.split('.')[1])
 22 |         if stage_id == 0:
 23 |             layer_id = 0
 24 |         elif stage_id == 1 or stage_id == 2:
 25 |             layer_id = stage_id + 1
 26 |         else:  # stage_id == 3:
 27 |             layer_id = N
 28 |     elif para_name.startswith("stages"):
 29 |         stage_id = int(para_name.split('.')[1])
 30 |         block_id = int(para_name.split('.')[2])
 31 |         if stage_id == 0 or stage_id == 1:
 32 |             layer_id = stage_id + 1
 33 |         elif stage_id == 2:
 34 |             layer_id = 3 + block_id // 3
 35 |         else:  # stage_id == 3:
 36 |             layer_id = N
 37 |     else:
 38 |         layer_id = N + 1  # after backbone
 39 |     
 40 |     return layer_id, N + 1 - layer_id
 41 | 
 42 | 
 43 | def resnets_get_layer_id_and_scale_exp(self: ResNet, para_name: str):
 44 |     # stages:
 45 |     # 50  :    [3, 4, 6, 3]
 46 |     # 101 :    [3, 4, 23, 3]
 47 |     # 152 :    [3, 8, 36, 3]
 48 |     # 200 :    [3, 24, 36, 3]
 49 |     # eca269d: [3, 30, 48, 8]
 50 |     
 51 |     L2, L3 = len(self.layer2), len(self.layer3)
 52 |     if L2 == 4 and L3 == 6:
 53 |         blk2, blk3 = 2, 3
 54 |     elif L2 == 4 and L3 == 23:
 55 |         blk2, blk3 = 2, 3
 56 |     elif L2 == 8 and L3 == 36:
 57 |         blk2, blk3 = 4, 4
 58 |     elif L2 == 24 and L3 == 36:
 59 |         blk2, blk3 = 4, 4
 60 |     elif L2 == 30 and L3 == 48:
 61 |         blk2, blk3 = 5, 6
 62 |     else:
 63 |         raise NotImplementedError
 64 |     
 65 |     N2, N3 = math.ceil(L2 / blk2 - 1e-5), math.ceil(L3 / blk3 - 1e-5)
 66 |     N = 2 + N2 + N3
 67 |     if para_name.startswith('layer'):  # 1, 2, 3, 4, 5
 68 |         stage_id, block_id = int(para_name.split('.')[0][5:]), int(para_name.split('.')[1])
 69 |         if stage_id == 1:
 70 |             layer_id = 1
 71 |         elif stage_id == 2:
 72 |             layer_id = 2 + block_id // blk2  # 2, 3
 73 |         elif stage_id == 3:
 74 |             layer_id = 2 + N2 + block_id // blk3  # r50: 4, 5    r101: 4, 5, ..., 11
 75 |         else:  # == 4
 76 |             layer_id = N  # r50: 6       r101: 12
 77 |     elif para_name.startswith('fc.'):
 78 |         layer_id = N + 1  # r50: 7       r101: 13
 79 |     else:
 80 |         layer_id = 0
 81 |     
 82 |     return layer_id, N + 1 - layer_id  # r50: 0-7, 7-0   r101: 0-13, 13-0
 83 | 
 84 | 
 85 | def _ex_repr(self):
 86 |     return ', '.join(
 87 |         f'{k}=' + (f'{v:g}' if isinstance(v, float) else str(v))
 88 |         for k, v in vars(self).items()
 89 |         if not k.startswith('_') and k != 'training'
 90 |         and not isinstance(v, (torch.nn.Module, torch.Tensor))
 91 |     )
 92 | 
 93 | 
 94 | # IMPORTANT: update some member functions
 95 | __UPDATED = False
 96 | if not __UPDATED:
 97 |     for clz in (torch.nn.CrossEntropyLoss, SoftTargetCrossEntropy, BinaryCrossEntropy, Mixup, drop.DropPath):
 98 |         if hasattr(clz, 'extra_repr'):
 99 |             clz.extra_repr = _ex_repr
100 |         else:
101 |             clz.__repr__ = lambda self: f'{type(self).__name__}({_ex_repr(self)})'
102 |     ResNet.get_layer_id_and_scale_exp = resnets_get_layer_id_and_scale_exp
103 |     ConvNeXt.get_layer_id_and_scale_exp = convnext_get_layer_id_and_scale_exp
104 |     __UPDATED = True
105 | 


--------------------------------------------------------------------------------
/utils/loggers/comet/optimizer_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "algorithm": "random",
  3 |   "parameters": {
  4 |     "anchor_t": {
  5 |       "type": "discrete",
  6 |       "values": [
  7 |         2,
  8 |         8
  9 |       ]
 10 |     },
 11 |     "batch_size": {
 12 |       "type": "discrete",
 13 |       "values": [
 14 |         16,
 15 |         32,
 16 |         64
 17 |       ]
 18 |     },
 19 |     "box": {
 20 |       "type": "discrete",
 21 |       "values": [
 22 |         0.02,
 23 |         0.2
 24 |       ]
 25 |     },
 26 |     "cls": {
 27 |       "type": "discrete",
 28 |       "values": [
 29 |         0.2
 30 |       ]
 31 |     },
 32 |     "cls_pw": {
 33 |       "type": "discrete",
 34 |       "values": [
 35 |         0.5
 36 |       ]
 37 |     },
 38 |     "copy_paste": {
 39 |       "type": "discrete",
 40 |       "values": [
 41 |         1
 42 |       ]
 43 |     },
 44 |     "degrees": {
 45 |       "type": "discrete",
 46 |       "values": [
 47 |         0,
 48 |         45
 49 |       ]
 50 |     },
 51 |     "epochs": {
 52 |       "type": "discrete",
 53 |       "values": [
 54 |         5
 55 |       ]
 56 |     },
 57 |     "fl_gamma": {
 58 |       "type": "discrete",
 59 |       "values": [
 60 |         0
 61 |       ]
 62 |     },
 63 |     "fliplr": {
 64 |       "type": "discrete",
 65 |       "values": [
 66 |         0
 67 |       ]
 68 |     },
 69 |     "flipud": {
 70 |       "type": "discrete",
 71 |       "values": [
 72 |         0
 73 |       ]
 74 |     },
 75 |     "hsv_h": {
 76 |       "type": "discrete",
 77 |       "values": [
 78 |         0
 79 |       ]
 80 |     },
 81 |     "hsv_s": {
 82 |       "type": "discrete",
 83 |       "values": [
 84 |         0
 85 |       ]
 86 |     },
 87 |     "hsv_v": {
 88 |       "type": "discrete",
 89 |       "values": [
 90 |         0
 91 |       ]
 92 |     },
 93 |     "iou_t": {
 94 |       "type": "discrete",
 95 |       "values": [
 96 |         0.7
 97 |       ]
 98 |     },
 99 |     "lr0": {
100 |       "type": "discrete",
101 |       "values": [
102 |         1e-05,
103 |         0.1
104 |       ]
105 |     },
106 |     "lrf": {
107 |       "type": "discrete",
108 |       "values": [
109 |         0.01,
110 |         1
111 |       ]
112 |     },
113 |     "mixup": {
114 |       "type": "discrete",
115 |       "values": [
116 |         1
117 |       ]
118 |     },
119 |     "momentum": {
120 |       "type": "discrete",
121 |       "values": [
122 |         0.6
123 |       ]
124 |     },
125 |     "mosaic": {
126 |       "type": "discrete",
127 |       "values": [
128 |         0
129 |       ]
130 |     },
131 |     "obj": {
132 |       "type": "discrete",
133 |       "values": [
134 |         0.2
135 |       ]
136 |     },
137 |     "obj_pw": {
138 |       "type": "discrete",
139 |       "values": [
140 |         0.5
141 |       ]
142 |     },
143 |     "optimizer": {
144 |       "type": "categorical",
145 |       "values": [
146 |         "SGD",
147 |         "Adam",
148 |         "AdamW"
149 |       ]
150 |     },
151 |     "perspective": {
152 |       "type": "discrete",
153 |       "values": [
154 |         0
155 |       ]
156 |     },
157 |     "scale": {
158 |       "type": "discrete",
159 |       "values": [
160 |         0
161 |       ]
162 |     },
163 |     "shear": {
164 |       "type": "discrete",
165 |       "values": [
166 |         0
167 |       ]
168 |     },
169 |     "translate": {
170 |       "type": "discrete",
171 |       "values": [
172 |         0
173 |       ]
174 |     },
175 |     "warmup_bias_lr": {
176 |       "type": "discrete",
177 |       "values": [
178 |         0,
179 |         0.2
180 |       ]
181 |     },
182 |     "warmup_epochs": {
183 |       "type": "discrete",
184 |       "values": [
185 |         5
186 |       ]
187 |     },
188 |     "warmup_momentum": {
189 |       "type": "discrete",
190 |       "values": [
191 |         0,
192 |         0.95
193 |       ]
194 |     },
195 |     "weight_decay": {
196 |       "type": "discrete",
197 |       "values": [
198 |         0,
199 |         0.001
200 |       ]
201 |     }
202 |   },
203 |   "spec": {
204 |     "maxCombo": 0,
205 |     "metric": "metrics/mAP_0.5",
206 |     "objective": "maximize"
207 |   },
208 |   "trials": 1
209 | }
210 | 


--------------------------------------------------------------------------------
/models/detect/yolov9-e.yaml:
--------------------------------------------------------------------------------
  1 | # YOLOv9
  2 | 
  3 | # parameters
  4 | nc: 2  # number of classes
  5 | depth_multiple: 1.0  # model depth multiple
  6 | width_multiple: 1.0  # layer channel multiple
  7 | #activation: nn.LeakyReLU(0.1)
  8 | #activation: nn.ReLU()
  9 | 
 10 | # anchors
 11 | anchors: 3
 12 | 
 13 | # YOLOv9 backbone
 14 | backbone:
 15 |   [
 16 |    [-1, 1, Silence, []],
 17 |   
 18 |    # conv down
 19 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
 20 | 
 21 |    # conv down
 22 |    [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
 23 | 
 24 |    # csp-elan block
 25 |    [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]],  # 3
 26 | 
 27 |    # avg-conv down
 28 |    [-1, 1, ADown, [256]],  # 4-P3/8
 29 | 
 30 |    # csp-elan block
 31 |    [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]],  # 5
 32 | 
 33 |    # avg-conv down
 34 |    [-1, 1, ADown, [512]],  # 6-P4/16
 35 | 
 36 |    # csp-elan block
 37 |    [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]],  # 7
 38 | 
 39 |    # avg-conv down
 40 |    [-1, 1, ADown, [1024]],  # 8-P5/32
 41 | 
 42 |    # csp-elan block
 43 |    [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]],  # 9
 44 |    
 45 |    # routing
 46 |    [1, 1, CBLinear, [[64]]], # 10
 47 |    [3, 1, CBLinear, [[64, 128]]], # 11
 48 |    [5, 1, CBLinear, [[64, 128, 256]]], # 12
 49 |    [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13
 50 |    [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14
 51 |   
 52 |    # conv down
 53 |    [0, 1, Conv, [64, 3, 2]],  # 15-P1/2
 54 |    [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16
 55 | 
 56 |    # conv down
 57 |    [-1, 1, Conv, [128, 3, 2]],  # 17-P2/4
 58 |    [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18  
 59 | 
 60 |    # csp-elan block
 61 |    [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]],  # 19
 62 | 
 63 |    # avg-conv down fuse
 64 |    [-1, 1, ADown, [256]],  # 20-P3/8
 65 |    [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21  
 66 | 
 67 |    # csp-elan block
 68 |    [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]],  # 22
 69 | 
 70 |    # avg-conv down fuse
 71 |    [-1, 1, ADown, [512]],  # 23-P4/16
 72 |    [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 
 73 | 
 74 |    # csp-elan block
 75 |    [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]],  # 25
 76 | 
 77 |    # avg-conv down fuse
 78 |    [-1, 1, ADown, [1024]],  # 26-P5/32
 79 |    [[14, -1], 1, CBFuse, [[4]]], # 27
 80 | 
 81 |    # csp-elan block
 82 |    [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]],  # 28
 83 |   ]
 84 | 
 85 | # YOLOv9 head
 86 | head:
 87 |   [
 88 |    # multi-level auxiliary branch  
 89 |   
 90 |    # elan-spp block
 91 |    [9, 1, SPPELAN, [512, 256]],  # 29
 92 | 
 93 |    # up-concat merge
 94 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 95 |    [[-1, 7], 1, Concat, [1]],  # cat backbone P4
 96 | 
 97 |    # csp-elan block
 98 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 32
 99 | 
100 |    # up-concat merge
101 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102 |    [[-1, 5], 1, Concat, [1]],  # cat backbone P3
103 | 
104 |    # csp-elan block
105 |    [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]],  # 35
106 |    
107 |    
108 |    
109 |    # main branch  
110 |    
111 |    # elan-spp block
112 |    [28, 1, SPPELAN, [512, 256]],  # 36
113 | 
114 |    # up-concat merge
115 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
116 |    [[-1, 25], 1, Concat, [1]],  # cat backbone P4
117 | 
118 |    # csp-elan block
119 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 39
120 | 
121 |    # up-concat merge
122 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
123 |    [[-1, 22], 1, Concat, [1]],  # cat backbone P3
124 | 
125 |    # csp-elan block
126 |    [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]],  # 42 (P3/8-small)
127 | 
128 |    # avg-conv-down merge
129 |    [-1, 1, ADown, [256]],
130 |    [[-1, 39], 1, Concat, [1]],  # cat head P4
131 | 
132 |    # csp-elan block
133 |    [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]],  # 45 (P4/16-medium)
134 | 
135 |    # avg-conv-down merge
136 |    [-1, 1, ADown, [512]],
137 |    [[-1, 36], 1, Concat, [1]],  # cat head P5
138 | 
139 |    # csp-elan block
140 |    [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]],  # 48 (P5/32-large)
141 | 
142 |    # detect
143 |    [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]],  # DualDDetect(A3, A4, A5, P3, P4, P5)
144 |   ]
145 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/configs/_base_/models/mask_rcnn_convnext_fpn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | # model settings
 10 | model = dict(
 11 |     type='MaskRCNN',
 12 |     pretrained=None,
 13 |     backbone=dict(
 14 |         type='ConvNeXt',
 15 |         in_chans=3,
 16 |         depths=[3, 3, 9, 3], 
 17 |         dims=[96, 192, 384, 768], 
 18 |         drop_path_rate=0.2,
 19 |         layer_scale_init_value=1e-6,
 20 |         out_indices=[0, 1, 2, 3],
 21 |     ),
 22 |     neck=dict(
 23 |         type='FPN',
 24 |         in_channels=[128, 256, 512, 1024],
 25 |         out_channels=256,
 26 |         num_outs=5),
 27 |     rpn_head=dict(
 28 |         type='RPNHead',
 29 |         in_channels=256,
 30 |         feat_channels=256,
 31 |         anchor_generator=dict(
 32 |             type='AnchorGenerator',
 33 |             scales=[8],
 34 |             ratios=[0.5, 1.0, 2.0],
 35 |             strides=[4, 8, 16, 32, 64]),
 36 |         bbox_coder=dict(
 37 |             type='DeltaXYWHBBoxCoder',
 38 |             target_means=[.0, .0, .0, .0],
 39 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 40 |         loss_cls=dict(
 41 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 42 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 43 |     roi_head=dict(
 44 |         type='StandardRoIHead',
 45 |         bbox_roi_extractor=dict(
 46 |             type='SingleRoIExtractor',
 47 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 48 |             out_channels=256,
 49 |             featmap_strides=[4, 8, 16, 32]),
 50 |         bbox_head=dict(
 51 |             type='Shared2FCBBoxHead',
 52 |             in_channels=256,
 53 |             fc_out_channels=1024,
 54 |             roi_feat_size=7,
 55 |             num_classes=80,
 56 |             bbox_coder=dict(
 57 |                 type='DeltaXYWHBBoxCoder',
 58 |                 target_means=[0., 0., 0., 0.],
 59 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 60 |             reg_class_agnostic=False,
 61 |             loss_cls=dict(
 62 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 63 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 64 |         mask_roi_extractor=dict(
 65 |             type='SingleRoIExtractor',
 66 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 67 |             out_channels=256,
 68 |             featmap_strides=[4, 8, 16, 32]),
 69 |         mask_head=dict(
 70 |             type='FCNMaskHead',
 71 |             num_convs=4,
 72 |             in_channels=256,
 73 |             conv_out_channels=256,
 74 |             num_classes=80,
 75 |             loss_mask=dict(
 76 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 77 |     # model training and testing settings
 78 |     train_cfg=dict(
 79 |         rpn=dict(
 80 |             assigner=dict(
 81 |                 type='MaxIoUAssigner',
 82 |                 pos_iou_thr=0.7,
 83 |                 neg_iou_thr=0.3,
 84 |                 min_pos_iou=0.3,
 85 |                 match_low_quality=True,
 86 |                 ignore_iof_thr=-1),
 87 |             sampler=dict(
 88 |                 type='RandomSampler',
 89 |                 num=256,
 90 |                 pos_fraction=0.5,
 91 |                 neg_pos_ub=-1,
 92 |                 add_gt_as_proposals=False),
 93 |             allowed_border=-1,
 94 |             pos_weight=-1,
 95 |             debug=False),
 96 |         rpn_proposal=dict(
 97 |             nms_pre=2000,
 98 |             max_per_img=1000,
 99 |             nms=dict(type='nms', iou_threshold=0.7),
100 |             min_bbox_size=0),
101 |         rcnn=dict(
102 |             assigner=dict(
103 |                 type='MaxIoUAssigner',
104 |                 pos_iou_thr=0.5,
105 |                 neg_iou_thr=0.5,
106 |                 min_pos_iou=0.5,
107 |                 match_low_quality=True,
108 |                 ignore_iof_thr=-1),
109 |             sampler=dict(
110 |                 type='RandomSampler',
111 |                 num=512,
112 |                 pos_fraction=0.25,
113 |                 neg_pos_ub=-1,
114 |                 add_gt_as_proposals=True),
115 |             mask_size=28,
116 |             pos_weight=-1,
117 |             debug=False)),
118 |     test_cfg=dict(
119 |         rpn=dict(
120 |             nms_pre=1000,
121 |             max_per_img=1000,
122 |             nms=dict(type='nms', iou_threshold=0.7),
123 |             min_bbox_size=0),
124 |         rcnn=dict(
125 |             score_thr=0.05,
126 |             nms=dict(type='nms', iou_threshold=0.5),
127 |             max_per_img=100,
128 |             mask_thr_binary=0.5)))
129 | 


--------------------------------------------------------------------------------
/spark/pretrain/utils/arg_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import json
  8 | import os
  9 | import sys
 10 | 
 11 | from tap import Tap
 12 | 
 13 | import dist
 14 | 
 15 | 
 16 | class Args(Tap):
 17 |     # environment
 18 |     exp_name: str = 'your_exp_name'
 19 |     exp_dir: str = 'your_exp_dir'   # will be created if not exists
 20 |     data_path: str = 'D://code/Br35H-Mask-RCNN/sparkdata/'
 21 |     init_weight: str = ''   # use some checkpoint as model weight initialization; ONLY load model weights
 22 |     resume_from: str = ''   # resume the experiment from some checkpoint.pth; load model weights, optimizer states, and last epoch
 23 |     
 24 |     # SparK hyperparameters
 25 |     mask: float = 0.6   # mask ratio, should be in (0, 1)
 26 |     
 27 |     # encoder hyperparameters
 28 |     model: str = 'V9back' # resnet50
 29 |     input_size: int = 224
 30 |     sbn: bool = True
 31 |     
 32 |     # data hyperparameters
 33 |     bs: int = 10
 34 |     dataloader_workers: int = 8
 35 |     
 36 |     # pre-training hyperparameters
 37 |     dp: float = 0.0
 38 |     base_lr: float = 2e-4
 39 |     wd: float = 0.04
 40 |     wde: float = 0.2
 41 |     ep: int = 100
 42 |     wp_ep: int = 40
 43 |     clip: int = 5.
 44 |     opt: str = 'lamb'
 45 |     ada: float = 0.
 46 |     
 47 |     # NO NEED TO SPECIFIED; each of these args would be updated in runtime automatically
 48 |     lr: float = None
 49 |     batch_size_per_gpu: int = 0
 50 |     glb_batch_size: int = 0
 51 |     densify_norm: str = ''
 52 |     device: str = 'cpu'
 53 |     local_rank: int = 0
 54 |     cmd: str = ' '.join(sys.argv[1:])
 55 |     commit_id: str = os.popen(f'git rev-parse HEAD').read().strip() or '[unknown]'
 56 |     commit_msg: str = (os.popen(f'git log -1').read().strip().splitlines() or ['[unknown]'])[-1].strip()
 57 |     last_loss: float = 0.
 58 |     cur_ep: str = ''
 59 |     remain_time: str = ''
 60 |     finish_time: str = ''
 61 |     first_logging: bool = True
 62 |     log_txt_name: str = '{args.exp_dir}/pretrain_log.txt'
 63 |     tb_lg_dir: str = ''     # tensorboard log directory
 64 |     
 65 |     @property
 66 |     def is_convnext(self):
 67 |         return 'convnext' in self.model or 'cnx' in self.model
 68 |     
 69 |     @property
 70 |     def is_resnet(self):
 71 |         return 'resnet' in self.model
 72 |     
 73 |     def log_epoch(self):
 74 |         if not dist.is_local_master():
 75 |             return
 76 |         
 77 |         if self.first_logging:
 78 |             self.first_logging = False
 79 |             with open(self.log_txt_name, 'w') as fp:
 80 |                 json.dump({
 81 |                     'name': self.exp_name, 'cmd': self.cmd, 'git_commit_id': self.commit_id, 'git_commit_msg': self.commit_msg,
 82 |                     'model': self.model,
 83 |                 }, fp)
 84 |                 fp.write('\n\n')
 85 |         
 86 |         with open(self.log_txt_name, 'a') as fp:
 87 |             json.dump({
 88 |                 'cur_ep': self.cur_ep,
 89 |                 'last_L': self.last_loss,
 90 |                 'rema': self.remain_time, 'fini': self.finish_time,
 91 |             }, fp)
 92 |             fp.write('\n')
 93 | 
 94 | 
 95 | def init_dist_and_get_args():
 96 |     from utils import misc
 97 |     
 98 |     # initialize
 99 |     args = Args(explicit_bool=True).parse_args()
100 |     e = os.path.abspath(args.exp_dir)
101 |     d, e = os.path.dirname(e), os.path.basename(e)
102 |     e = ''.join(ch if (ch.isalnum() or ch == '-') else '_' for ch in e)
103 |     args.exp_dir = os.path.join(d, e)
104 |     
105 |     os.makedirs(args.exp_dir, exist_ok=True)
106 |     args.log_txt_name = os.path.join(args.exp_dir, 'pretrain_log.txt')
107 |     args.tb_lg_dir = args.tb_lg_dir or os.path.join(args.exp_dir, 'tensorboard_log')
108 |     try:
109 |         os.makedirs(args.tb_lg_dir, exist_ok=True)
110 |     except:
111 |         pass
112 |     
113 |     misc.init_distributed_environ(exp_dir=args.exp_dir)
114 |     
115 |     # update args
116 |     if not dist.initialized():
117 |         args.sbn = False
118 |     args.first_logging = True
119 |     args.device = dist.get_device()
120 |     args.batch_size_per_gpu = args.bs // dist.get_world_size()
121 |     args.glb_batch_size = args.batch_size_per_gpu * dist.get_world_size()
122 |     
123 |     if args.is_resnet:
124 |         args.ada = args.ada or 0.95
125 |         args.densify_norm = 'bn'
126 |     
127 |     if args.is_convnext:
128 |         args.ada = args.ada or 0.999
129 |         args.densify_norm = 'ln'
130 |     
131 |     args.opt = args.opt.lower()
132 |     args.lr = args.base_lr * args.glb_batch_size / 256
133 |     args.wde = args.wde or args.wd
134 |     
135 |     return args
136 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/configs/convnext_spark/mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py:
--------------------------------------------------------------------------------
 1 | """
 2 | We directly take the ConvNeXt-T+MaskRCNN 3x recipe from https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/mask_rcnn_convnext_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco_in1k.py
 3 | And we modify  this  ConvNeXt-T+MaskRCNN 3x recipe to our ConvNeXt-B+MaskRCNN 3x recipe.
 4 | The modifications (commented as [modified] below) are according to:
 5 | - 1. tiny-to-base: (some configs of ConvNext-T are updated to those of ConvNext-B, referring to https://github.com/facebookresearch/ConvNeXt/blob/main/object_detection/configs/convnext/cascade_mask_rcnn_convnext_base_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco_in22k.py)
 6 |     - model.backbone.{depths, dims, drop_path_rate}
 7 |     - models.neck
 8 |     - optimizer.paramwise_cfg.num_layers
 9 | 
10 | - 2. our paper (https://openreview.net/forum?id=NRxydtWup1S, or https://arxiv.org/abs/2301.03580):
11 |     - LR layer decay (optimizer.paramwise_cfg.decay_rate): 0.65
12 |     - LR scheduled ratio (lr_config.gamma): 0.2
13 |     - Learning rate (optimizer.lr): 0.0002
14 |     - optimizer_config.use_fp16: False (we just use fp32 by default; actually we didn't test the performance of using fp16)
15 | """
16 | 
17 | _base_ = [
18 |     '../_base_/models/mask_rcnn_convnext_fpn.py',
19 |     '../_base_/datasets/coco_instance.py',
20 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
21 | ]
22 | 
23 | model = dict(
24 |     backbone=dict(
25 |         in_chans=3,
26 |         depths=[3, 3, 27, 3],           # [modified] according to tiny-to-base
27 |         dims=[128, 256, 512, 1024],     # [modified] according to tiny-to-base
28 |         drop_path_rate=0.5,             # [modified] according to tiny-to-base
29 |         layer_scale_init_value=1.0,
30 |         out_indices=[0, 1, 2, 3],
31 |     ),
32 |     neck=dict(in_channels=[128, 256, 512, 1024]))   # [modified] according to tiny-to-base
33 | 
34 | img_norm_cfg = dict(
35 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
36 | 
37 | # augmentation strategy originates from DETR / Sparse RCNN
38 | train_pipeline = [
39 |     dict(type='LoadImageFromFile'),
40 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
41 |     dict(type='RandomFlip', flip_ratio=0.5),
42 |     dict(type='AutoAugment',
43 |          policies=[
44 |              [
45 |                  dict(type='Resize',
46 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
47 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
48 |                                  (736, 1333), (768, 1333), (800, 1333)],
49 |                       multiscale_mode='value',
50 |                       keep_ratio=True)
51 |              ],
52 |              [
53 |                  dict(type='Resize',
54 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
55 |                       multiscale_mode='value',
56 |                       keep_ratio=True),
57 |                  dict(type='RandomCrop',
58 |                       crop_type='absolute_range',
59 |                       crop_size=(384, 600),
60 |                       allow_negative_crop=True),
61 |                  dict(type='Resize',
62 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
63 |                                  (576, 1333), (608, 1333), (640, 1333),
64 |                                  (672, 1333), (704, 1333), (736, 1333),
65 |                                  (768, 1333), (800, 1333)],
66 |                       multiscale_mode='value',
67 |                       override=True,
68 |                       keep_ratio=True)
69 |              ]
70 |          ]),
71 |     dict(type='Normalize', **img_norm_cfg),
72 |     dict(type='Pad', size_divisor=32),
73 |     dict(type='DefaultFormatBundle'),
74 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
75 | ]
76 | data = dict(train=dict(pipeline=train_pipeline))
77 | 
78 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW',
79 |                  lr=0.0002, betas=(0.9, 0.999), weight_decay=0.05,  # [modified] according to our paper
80 |                  paramwise_cfg={'decay_rate': 0.65,                 # [modified] according to our paper
81 |                                 'decay_type': 'layer_wise',
82 |                                 'num_layers': 12})                  # [modified] according to tiny-to-base
83 | lr_config = dict(step=[27, 33], gamma=0.2)                          # [modified] according to our paper
84 | runner = dict(type='EpochBasedRunnerAmp', max_epochs=36)
85 | 
86 | # do not use mmdet version fp16
87 | fp16 = None
88 | optimizer_config = dict(
89 |     type="DistOptimizerHook",
90 |     update_interval=1,
91 |     grad_clip=None,
92 |     coalesce=True,
93 |     bucket_size_mb=-1,
94 |     use_fp16=False,              # [modified] True => False
95 | )


--------------------------------------------------------------------------------
/utils/downloads.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import subprocess
  4 | import urllib
  5 | from pathlib import Path
  6 | 
  7 | import requests
  8 | import torch
  9 | 
 10 | 
 11 | def is_url(url, check=True):
 12 |     # Check if string is URL and check if URL exists
 13 |     try:
 14 |         url = str(url)
 15 |         result = urllib.parse.urlparse(url)
 16 |         assert all([result.scheme, result.netloc])  # check if is url
 17 |         return (urllib.request.urlopen(url).getcode() == 200) if check else True  # check if exists online
 18 |     except (AssertionError, urllib.request.HTTPError):
 19 |         return False
 20 | 
 21 | 
 22 | def gsutil_getsize(url=''):
 23 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 24 |     s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
 25 |     return eval(s.split(' ')[0]) if len(s) else 0  # bytes
 26 | 
 27 | 
 28 | def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
 29 |     # Return downloadable file size in bytes
 30 |     response = requests.head(url, allow_redirects=True)
 31 |     return int(response.headers.get('content-length', -1))
 32 | 
 33 | 
 34 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
 35 |     # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
 36 |     from utils.general import LOGGER
 37 | 
 38 |     file = Path(file)
 39 |     assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
 40 |     try:  # url1
 41 |         LOGGER.info(f'Downloading {url} to {file}...')
 42 |         torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
 43 |         assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
 44 |     except Exception as e:  # url2
 45 |         if file.exists():
 46 |             file.unlink()  # remove partial downloads
 47 |         LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
 48 |         os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
 49 |     finally:
 50 |         if not file.exists() or file.stat().st_size < min_bytes:  # check
 51 |             if file.exists():
 52 |                 file.unlink()  # remove partial downloads
 53 |             LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
 54 |         LOGGER.info('')
 55 | 
 56 | 
 57 | def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
 58 |     # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
 59 |     from utils.general import LOGGER
 60 | 
 61 |     def github_assets(repository, version='latest'):
 62 |         # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
 63 |         if version != 'latest':
 64 |             version = f'tags/{version}'  # i.e. tags/v7.0
 65 |         response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json()  # github api
 66 |         return response['tag_name'], [x['name'] for x in response['assets']]  # tag, assets
 67 | 
 68 |     file = Path(str(file).strip().replace("'", ''))
 69 |     if not file.exists():
 70 |         # URL specified
 71 |         name = Path(urllib.parse.unquote(str(file))).name  # decode '%2F' to '/' etc.
 72 |         if str(file).startswith(('http:/', 'https:/')):  # download
 73 |             url = str(file).replace(':/', '://')  # Pathlib turns :// -> :/
 74 |             file = name.split('?')[0]  # parse authentication https://url.com/file.txt?auth...
 75 |             if Path(file).is_file():
 76 |                 LOGGER.info(f'Found {url} locally at {file}')  # file already exists
 77 |             else:
 78 |                 safe_download(file=file, url=url, min_bytes=1E5)
 79 |             return file
 80 | 
 81 |         # GitHub assets
 82 |         assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')]  # default
 83 |         try:
 84 |             tag, assets = github_assets(repo, release)
 85 |         except Exception:
 86 |             try:
 87 |                 tag, assets = github_assets(repo)  # latest release
 88 |             except Exception:
 89 |                 try:
 90 |                     tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
 91 |                 except Exception:
 92 |                     tag = release
 93 | 
 94 |         file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
 95 |         if name in assets:
 96 |             url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl'  # backup gdrive mirror
 97 |             safe_download(
 98 |                 file,
 99 |                 url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
100 |                 min_bytes=1E5,
101 |                 error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
102 | 
103 |     return str(file)
104 | 


--------------------------------------------------------------------------------
/spark/downstream_d2/README.md:
--------------------------------------------------------------------------------
  1 | ## About code isolation
  2 | 
  3 | This `downstream_d2` is isolated from pre-training codes. One can treat this `downstream_d2` as an independent codebase 🛠️.
  4 | 
  5 | 
  6 | ## Fine-tuned ResNet-50 weights, log files, and performance
  7 | 
  8 | <div align="center">
  9 | 
 10 |   [[`weights (pre-trained by SparK)`](https://drive.google.com/file/d/1H8605HbxGvrsu4x4rIoNr-Wkd7JkxFPQ/view?usp=share_link)]
 11 |   [[`weights (fine-tuned on COCO)`](https://drive.google.com/file/d/1Ue7SiQ1E_AwgtYo56Fm-iUlQPZ8vIwYj/view?usp=share_link)]
 12 |   [[`metrics.json`](https://drive.google.com/file/d/1wfbUWh4svV8sPWya_0PAhsLHVayDQRCi/view?usp=share_link)]
 13 |   [[`log.txt`](https://drive.google.com/file/d/11zVo_87pe9DMAmfNQK9FUfyjQWHTRKxV/view?usp=share_link)]
 14 |   [[`tensorboard file`](https://drive.google.com/file/d/1aM1qj8c3-Uka1dZuYmKhgp1lNJpeMDMl/view?usp=share_link)]
 15 | </div>
 16 | 
 17 | <p align="center">
 18 | <img src="https://user-images.githubusercontent.com/39692511/211497479-0563e891-f2ad-4cf1-b682-a21c2be1442d.png" width=80%>
 19 | <p>
 20 | 
 21 | 
 22 | ## Installation [Detectron2 v0.6](https://github.com/facebookresearch/detectron2/releases/tag/v0.6) before fine-tuning ResNet on COCO
 23 | 
 24 | 
 25 | 1. Let you in some python environment, e.g.:
 26 | ```shell script
 27 | $ conda create -n spark python=3.8 -y
 28 | $ conda activate spark
 29 | ```
 30 | 
 31 | 2. Install `detectron2==0.6` (e.g., with `torch==1.10.0` and `cuda11.3`):
 32 | ```shell script
 33 | $ pip install detectron2==0.6 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
 34 | ```
 35 | 
 36 | You can also find instructions for different pytorch/cuda versions on [this page](https://github.com/facebookresearch/detectron2/releases/tag/v0.6).
 37 | 
 38 | 
 39 | 3. Put the COCO dataset folder at `downstream_d2/datasets/coco`.
 40 | The folder should follow the [directory structure](https://github.com/facebookresearch/detectron2/tree/master/datasets) requried by `Detectron2`, which should look like this:
 41 | ```
 42 | downstream_d2/datasets/coco:
 43 |     annotations/:
 44 |         captions_train2017.json  captions_val2017.json
 45 |         instances_train2017.json  instances_val2017.json
 46 |         person_keypoints_train2017.json  person_keypoints_val2017.json
 47 |     train2017/:
 48 |         a_lot_images.jpg
 49 |     val2017/:
 50 |         a_lot_images.jpg
 51 | ```
 52 | 
 53 | 
 54 | ## Training from pre-trained checkpoint
 55 | 
 56 | The script file for COCO fine-tuning (object detection and instance segmentation) is [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py),
 57 | which is a modification of [Detectron2's tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/v0.6/tools/train_net.py).
 58 | 
 59 | 
 60 | Before fine-tuning a ResNet50 pre-trained by SparK, you should first convert our checkpoint file to Detectron2-style `.pkl` file:
 61 | 
 62 | ```shell script
 63 | $ cd /path/to/SparK/downstream_d2
 64 | $ python3 convert-timm-to-d2.py /some/path/to/resnet50_1kpretrained_timm_style.pth d2-style.pkl
 65 | ```
 66 | 
 67 | For a ResNet50, you should see a log reporting `len(state)==318`:
 68 | ```text
 69 | [convert] .pkl is generated! (from `/some/path/to/resnet50_1kpretrained_timm_style.pth`, to `d2-style.pkl`, len(state)==318)
 70 | ```
 71 | 
 72 | Then run fine-tuning on single machine with 8 gpus:
 73 | 
 74 | ```shell script
 75 | $ cd /path/to/SparK/downstream_d2
 76 | $ python3 ./train_net.py --resume --num-gpus 8 --config-file ./configs/coco_R_50_FPN_CONV_1x_moco_adam.yaml \
 77 |   MODEL.WEIGHTS d2-style.pkl \
 78 |   OUTPUT_DIR <your_output_dir>
 79 | ```
 80 | 
 81 | For multiple machines, plus these args:
 82 | ```shell script
 83 | --num-machines <total_num> --machine-rank <this_rank> --dist-url <url:port>
 84 | ```
 85 | 
 86 | In `<your_output_dir>` you'll see the log files generated by `Detectron2`.
 87 | 
 88 | 
 89 | ## Details: how we modify the official Detectron2's [tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/v0.6/tools/train_net.py) to get our [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py)
 90 | 
 91 | 1. We add two new hyperparameters:
 92 |     - str `SOLVER.OPTIMIZER`: use 'ADAM' (the same as 'ADAMW') or 'SGD' optimizer
 93 |     - float `SOLVER.LR_DECAY`: the decay ratio (from 0. to 1.) of layer-wise learning rate decay trick
 94 | 
 95 | 2. We implement layer-wise lr decay in [downstream_d2/lr_decay.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/lr_decay.py).
 96 | 
 97 | 3. We write a script to convert our timm-style pre-trained ResNet weights to Detectron2-style in [downstream_d2/convert-timm-to-d2.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/convert-timm-to-d2.py).
 98 | 
 99 | 4. We also add a hook for logging results to `cfg.OUTPUT_DIR/d2_coco_log.txt`.
100 | 
101 | All of our modifications to the original are commented with `# [modification] ...` in [downstream_d2/train_net.py](https://github.com/keyu-tian/SparK/blob/main/downstream_d2/train_net.py) or other files.
102 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
  5 |     """Creates or loads a YOLO model
  6 | 
  7 |     Arguments:
  8 |         name (str): model name 'yolov3' or path 'path/to/best.pt'
  9 |         pretrained (bool): load pretrained weights into the model
 10 |         channels (int): number of input channels
 11 |         classes (int): number of model classes
 12 |         autoshape (bool): apply YOLO .autoshape() wrapper to model
 13 |         verbose (bool): print all information to screen
 14 |         device (str, torch.device, None): device to use for model parameters
 15 | 
 16 |     Returns:
 17 |         YOLO model
 18 |     """
 19 |     from pathlib import Path
 20 | 
 21 |     from models.common import AutoShape, DetectMultiBackend
 22 |     from models.experimental import attempt_load
 23 |     from models.yolo import ClassificationModel, DetectionModel, SegmentationModel
 24 |     from utils.downloads import attempt_download
 25 |     from utils.general import LOGGER, check_requirements, intersect_dicts, logging
 26 |     from utils.torch_utils import select_device
 27 | 
 28 |     if not verbose:
 29 |         LOGGER.setLevel(logging.WARNING)
 30 |     check_requirements(exclude=('opencv-python', 'tensorboard', 'thop'))
 31 |     name = Path(name)
 32 |     path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name  # checkpoint path
 33 |     try:
 34 |         device = select_device(device)
 35 |         if pretrained and channels == 3 and classes == 80:
 36 |             try:
 37 |                 model = DetectMultiBackend(path, device=device, fuse=autoshape)  # detection model
 38 |                 if autoshape:
 39 |                     if model.pt and isinstance(model.model, ClassificationModel):
 40 |                         LOGGER.warning('WARNING ⚠️ YOLO ClassificationModel is not yet AutoShape compatible. '
 41 |                                        'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).')
 42 |                     elif model.pt and isinstance(model.model, SegmentationModel):
 43 |                         LOGGER.warning('WARNING ⚠️ YOLO SegmentationModel is not yet AutoShape compatible. '
 44 |                                        'You will not be able to run inference with this model.')
 45 |                     else:
 46 |                         model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
 47 |             except Exception:
 48 |                 model = attempt_load(path, device=device, fuse=False)  # arbitrary model
 49 |         else:
 50 |             cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0]  # model.yaml path
 51 |             model = DetectionModel(cfg, channels, classes)  # create model
 52 |             if pretrained:
 53 |                 ckpt = torch.load(attempt_download(path), map_location=device)  # load
 54 |                 csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
 55 |                 csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors'])  # intersect
 56 |                 model.load_state_dict(csd, strict=False)  # load
 57 |                 if len(ckpt['model'].names) == classes:
 58 |                     model.names = ckpt['model'].names  # set class names attribute
 59 |         if not verbose:
 60 |             LOGGER.setLevel(logging.INFO)  # reset to default
 61 |         return model.to(device)
 62 | 
 63 |     except Exception as e:
 64 |         help_url = 'https://github.com/ultralytics/yolov5/issues/36'
 65 |         s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.'
 66 |         raise Exception(s) from e
 67 | 
 68 | 
 69 | def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None):
 70 |     # YOLO custom or local model
 71 |     return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
 72 | 
 73 | 
 74 | if __name__ == '__main__':
 75 |     import argparse
 76 |     from pathlib import Path
 77 | 
 78 |     import numpy as np
 79 |     from PIL import Image
 80 | 
 81 |     from utils.general import cv2, print_args
 82 | 
 83 |     # Argparser
 84 |     parser = argparse.ArgumentParser()
 85 |     parser.add_argument('--model', type=str, default='yolo', help='model name')
 86 |     opt = parser.parse_args()
 87 |     print_args(vars(opt))
 88 | 
 89 |     # Model
 90 |     model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
 91 |     # model = custom(path='path/to/model.pt')  # custom
 92 | 
 93 |     # Images
 94 |     imgs = [
 95 |         'data/images/zidane.jpg',  # filename
 96 |         Path('data/images/zidane.jpg'),  # Path
 97 |         'https://ultralytics.com/images/zidane.jpg',  # URI
 98 |         cv2.imread('data/images/bus.jpg')[:, :, ::-1],  # OpenCV
 99 |         Image.open('data/images/bus.jpg'),  # PIL
100 |         np.zeros((320, 640, 3))]  # numpy
101 | 
102 |     # Inference
103 |     results = model(imgs, size=320)  # batched inference
104 | 
105 |     # Results
106 |     results.print()
107 |     results.save()
108 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/custom.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from typing import List
 10 | from timm.models.registry import register_model
 11 | import torch
 12 | from torch import nn
 13 | import sys
 14 | from  HG.HGBlock import HGStem,HGBlock
 15 | from  HG.block import DWConv
 16 | from v9back.common import *
 17 | 
 18 | 
 19 | class YourConvNet(nn.Module):
 20 |     def __init__(self, *args, **kwargs):
 21 |         super().__init__()
 22 | 
 23 |         self.mlist=nn.ModuleList(
 24 |             [Silence(),
 25 |              Bbackbone(),
 26 |              ]
 27 |         )
 28 |         self.d0=  Down0(64)
 29 |         self.d1 = Down1(128)
 30 |         self.d2 = Down2(256)
 31 |         self.d3 = Down3(512)
 32 |         self.d4 = Down4(1024)
 33 |         self.alld = [self.d0,self.d1,self.d2,self.d3,self.d4]
 34 |         self.cblinear1 = CBLinear(64,[64])
 35 |         self.cblinear3 = CBLinear(128, [64, 128])
 36 |         self.cblinear5 = CBLinear(256, [64, 128, 256])
 37 |         self.cblinear7 = CBLinear(512, [64, 128, 256, 512])
 38 |         self.cblinear9 = CBLinear(1024, [64, 128, 256, 512, 1024])
 39 |         self.allcblinear = [self.cblinear1,self.cblinear3,self.cblinear5,self.cblinear7,self.cblinear9]
 40 |         # # conv down 1
 41 |         self.conv1 = Conv(3, 64, 3, 2 )
 42 |         self.cbfuse1 = CBFuse([0, 0, 0, 0, 0])
 43 | 
 44 |         ## conv down 2
 45 |         self.conv2= Conv(64, 128, 3, 2)
 46 |         self.cbfuse2 = CBFuse([1, 1, 1, 1])
 47 |         self.rep2 = RepNCSPELAN4(128, 256, 128, 64, 2)
 48 |         ##   avg-conv down fuse 1
 49 |         self.adown3 = ADown(256, 256)
 50 |         self.cbfuse3 = CBFuse([2, 2, 2])
 51 |         self.rep3 = RepNCSPELAN4(256, 512, 256, 128, 2)
 52 | 
 53 |         ##   avg-conv down fuse 2
 54 |         self.adown4 = ADown(512, 512)
 55 |         self.cbfuse4 = CBFuse([3,3])
 56 |         self.rep4 = RepNCSPELAN4(512, 1024, 512, 256, 2)
 57 | 
 58 |         ##   avg-conv down fuse 3
 59 |         self.adown5 = ADown(1024, 1024)
 60 |         self.cbfuse5 = CBFuse([4])
 61 |         self.rep5 = RepNCSPELAN4(1024, 1024, 512, 256, 2)
 62 | 
 63 |     def get_downsample_ratio(self) -> int:
 64 |         return 32
 65 |     
 66 |     def get_feature_map_channels(self) -> List[int]:
 67 |         return [  256,512,1024,1024]
 68 | 
 69 |     def forward(self, x: torch.Tensor, hierarchical=False):
 70 |         if hierarchical:
 71 |             origin = x.clone()
 72 |             ls = []
 73 |             tmp = []
 74 |             bx = None
 75 |             for index,modules in enumerate( self.mlist):
 76 |                 x = modules(x)
 77 |                 if index ==1:
 78 |                     bx = x
 79 |             for i in  range(5):
 80 |                 tmp.append(self.allcblinear[i](self.alld[i](bx)))
 81 | 
 82 |             fuse1 = self.cbfuse1([tmp[0],tmp[1],tmp[2],tmp[3],tmp[4],self.conv1(origin)])
 83 |             fuse2 = self.cbfuse2([tmp[1],tmp[2],tmp[3],tmp[4],self.conv2(fuse1)])
 84 |             fuse2 = self.rep2(fuse2)
 85 | 
 86 |             fuse3= self.cbfuse3([ tmp[2], tmp[3], tmp[4], self.adown3(fuse2)])
 87 |             fuse3 = self.rep3(fuse3)
 88 | 
 89 |             fuse4 = self.cbfuse4([tmp[3], tmp[4], self.adown4(fuse3)])
 90 |             fuse4 = self.rep4(fuse4)
 91 | 
 92 |             fuse5 = self.cbfuse5([tmp[4], self.adown5(fuse4)])
 93 |             fuse5 = self.rep5(fuse5)
 94 | 
 95 |             ls.append(fuse2)
 96 |             ls.append(fuse3)
 97 |             ls.append(fuse4)
 98 |             ls.append(fuse5)
 99 |             return ls
100 |         else:
101 |             for modules in self.mlist:
102 |                 x = modules(x)
103 |         return x
104 | 
105 | 
106 | @register_model
107 | def V9back(pretrained=False, **kwargs):
108 |     return YourConvNet(**kwargs)
109 | 
110 | 
111 | @torch.no_grad()
112 | def convnet_test():
113 |     from timm.models import create_model
114 |     cnn = create_model('V9back')
115 |     print('get_downsample_ratio:', cnn.get_downsample_ratio())
116 |     print('get_feature_map_channels:', cnn.get_feature_map_channels())
117 |     
118 |     downsample_ratio = cnn.get_downsample_ratio()
119 |     feature_map_channels = cnn.get_feature_map_channels()
120 |     
121 |     # check the forward function
122 |     B, C, H, W = 4, 3, 224, 224
123 |     inp = torch.rand(B, C, H, W)
124 |     feats = cnn(inp, hierarchical=True)
125 |     assert isinstance(feats, list)
126 |     assert len(feats) == len(feature_map_channels)
127 |     print([tuple(t.shape) for t in feats])
128 |     
129 |     # check the downsample ratio
130 |     feats = cnn(inp, hierarchical=True)
131 |     assert feats[-1].shape[-2] == H // downsample_ratio
132 |     assert feats[-1].shape[-1] == W // downsample_ratio
133 |     
134 |     # check the channel number
135 |     for feat, ch in zip(feats, feature_map_channels):
136 |         assert feat.ndim == 4
137 |         assert feat.shape[1] == ch
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     convnet_test()
142 | 


--------------------------------------------------------------------------------
/utils/loggers/comet/comet_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from urllib.parse import urlparse
  4 | 
  5 | try:
  6 |     import comet_ml
  7 | except (ModuleNotFoundError, ImportError):
  8 |     comet_ml = None
  9 | 
 10 | import yaml
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | COMET_PREFIX = "comet://"
 15 | COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5")
 16 | COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt")
 17 | 
 18 | 
 19 | def download_model_checkpoint(opt, experiment):
 20 |     model_dir = f"{opt.project}/{experiment.name}"
 21 |     os.makedirs(model_dir, exist_ok=True)
 22 | 
 23 |     model_name = COMET_MODEL_NAME
 24 |     model_asset_list = experiment.get_model_asset_list(model_name)
 25 | 
 26 |     if len(model_asset_list) == 0:
 27 |         logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}")
 28 |         return
 29 | 
 30 |     model_asset_list = sorted(
 31 |         model_asset_list,
 32 |         key=lambda x: x["step"],
 33 |         reverse=True,
 34 |     )
 35 |     logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list}
 36 | 
 37 |     resource_url = urlparse(opt.weights)
 38 |     checkpoint_filename = resource_url.query
 39 | 
 40 |     if checkpoint_filename:
 41 |         asset_id = logged_checkpoint_map.get(checkpoint_filename)
 42 |     else:
 43 |         asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME)
 44 |         checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME
 45 | 
 46 |     if asset_id is None:
 47 |         logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment")
 48 |         return
 49 | 
 50 |     try:
 51 |         logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}")
 52 |         asset_filename = checkpoint_filename
 53 | 
 54 |         model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
 55 |         model_download_path = f"{model_dir}/{asset_filename}"
 56 |         with open(model_download_path, "wb") as f:
 57 |             f.write(model_binary)
 58 | 
 59 |         opt.weights = model_download_path
 60 | 
 61 |     except Exception as e:
 62 |         logger.warning("COMET WARNING: Unable to download checkpoint from Comet")
 63 |         logger.exception(e)
 64 | 
 65 | 
 66 | def set_opt_parameters(opt, experiment):
 67 |     """Update the opts Namespace with parameters
 68 |     from Comet's ExistingExperiment when resuming a run
 69 | 
 70 |     Args:
 71 |         opt (argparse.Namespace): Namespace of command line options
 72 |         experiment (comet_ml.APIExperiment): Comet API Experiment object
 73 |     """
 74 |     asset_list = experiment.get_asset_list()
 75 |     resume_string = opt.resume
 76 | 
 77 |     for asset in asset_list:
 78 |         if asset["fileName"] == "opt.yaml":
 79 |             asset_id = asset["assetId"]
 80 |             asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False)
 81 |             opt_dict = yaml.safe_load(asset_binary)
 82 |             for key, value in opt_dict.items():
 83 |                 setattr(opt, key, value)
 84 |             opt.resume = resume_string
 85 | 
 86 |     # Save hyperparameters to YAML file
 87 |     # Necessary to pass checks in training script
 88 |     save_dir = f"{opt.project}/{experiment.name}"
 89 |     os.makedirs(save_dir, exist_ok=True)
 90 | 
 91 |     hyp_yaml_path = f"{save_dir}/hyp.yaml"
 92 |     with open(hyp_yaml_path, "w") as f:
 93 |         yaml.dump(opt.hyp, f)
 94 |     opt.hyp = hyp_yaml_path
 95 | 
 96 | 
 97 | def check_comet_weights(opt):
 98 |     """Downloads model weights from Comet and updates the
 99 |     weights path to point to saved weights location
100 | 
101 |     Args:
102 |         opt (argparse.Namespace): Command Line arguments passed
103 |             to YOLOv5 training script
104 | 
105 |     Returns:
106 |         None/bool: Return True if weights are successfully downloaded
107 |             else return None
108 |     """
109 |     if comet_ml is None:
110 |         return
111 | 
112 |     if isinstance(opt.weights, str):
113 |         if opt.weights.startswith(COMET_PREFIX):
114 |             api = comet_ml.API()
115 |             resource = urlparse(opt.weights)
116 |             experiment_path = f"{resource.netloc}{resource.path}"
117 |             experiment = api.get(experiment_path)
118 |             download_model_checkpoint(opt, experiment)
119 |             return True
120 | 
121 |     return None
122 | 
123 | 
124 | def check_comet_resume(opt):
125 |     """Restores run parameters to its original state based on the model checkpoint
126 |     and logged Experiment parameters.
127 | 
128 |     Args:
129 |         opt (argparse.Namespace): Command Line arguments passed
130 |             to YOLOv5 training script
131 | 
132 |     Returns:
133 |         None/bool: Return True if the run is restored successfully
134 |             else return None
135 |     """
136 |     if comet_ml is None:
137 |         return
138 | 
139 |     if isinstance(opt.resume, str):
140 |         if opt.resume.startswith(COMET_PREFIX):
141 |             api = comet_ml.API()
142 |             resource = urlparse(opt.resume)
143 |             experiment_path = f"{resource.netloc}{resource.path}"
144 |             experiment = api.get(experiment_path)
145 |             set_opt_parameters(opt, experiment)
146 |             download_model_checkpoint(opt, experiment)
147 | 
148 |             return True
149 | 
150 |     return None
151 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import json
 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
 11 | from mmcv.runner import get_dist_info
 12 | 
 13 | 
 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12):
 15 |     
 16 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 17 |         return 0
 18 |     elif var_name.startswith("backbone.downsample_layers"):
 19 |         stage_id = int(var_name.split('.')[2])
 20 |         if stage_id == 0:
 21 |             layer_id = 0
 22 |         elif stage_id == 1:
 23 |             layer_id = 2
 24 |         elif stage_id == 2:
 25 |             layer_id = 3
 26 |         elif stage_id == 3:
 27 |             layer_id = num_max_layer
 28 |         return layer_id
 29 |     elif var_name.startswith("backbone.stages"):
 30 |         stage_id = int(var_name.split('.')[2])
 31 |         block_id = int(var_name.split('.')[3])
 32 |         if stage_id == 0:
 33 |             layer_id = 1
 34 |         elif stage_id == 1:
 35 |             layer_id = 2
 36 |         elif stage_id == 2:
 37 |             layer_id = 3 + block_id // 3
 38 |         elif stage_id == 3:
 39 |             layer_id = num_max_layer
 40 |         return layer_id
 41 |     else:
 42 |         return num_max_layer + 1
 43 | 
 44 | 
 45 | def get_num_layer_stage_wise(var_name, num_max_layer):
 46 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 47 |         return 0
 48 |     elif var_name.startswith("backbone.downsample_layers"):
 49 |         return 0
 50 |     elif var_name.startswith("backbone.stages"):
 51 |         stage_id = int(var_name.split('.')[2])
 52 |         return stage_id + 1
 53 |     else:
 54 |         return num_max_layer - 1
 55 |         
 56 | 
 57 | @OPTIMIZER_BUILDERS.register_module()
 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
 59 |     def add_params(self, params, module, prefix='', is_dcn_module=None):
 60 |         """Add all parameters of module to the params list.
 61 |         The parameters of the given module will be added to the list of param
 62 |         groups, with specific rules defined by paramwise_cfg.
 63 |         Args:
 64 |             params (list[dict]): A list of param groups, it will be modified
 65 |                 in place.
 66 |             module (nn.Module): The module to be added.
 67 |             prefix (str): The prefix of the module
 68 |             is_dcn_module (int|float|None): If the current module is a
 69 |                 submodule of DCN, `is_dcn_module` will be passed to
 70 |                 control conv_offset layer's learning rate. Defaults to None.
 71 |         """
 72 |         parameter_groups = {}
 73 |         print(self.paramwise_cfg)
 74 |         num_layers = self.paramwise_cfg.get('num_layers') + 2
 75 |         decay_rate = self.paramwise_cfg.get('decay_rate')
 76 |         decay_type = self.paramwise_cfg.get('decay_type', "layer_wise")
 77 |         print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers))
 78 |         weight_decay = self.base_wd
 79 | 
 80 |         for name, param in module.named_parameters():
 81 |             if not param.requires_grad:
 82 |                 continue  # frozen weights
 83 |             if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
 84 |                 group_name = "no_decay"
 85 |                 this_weight_decay = 0.
 86 |             else:
 87 |                 group_name = "decay"
 88 |                 this_weight_decay = weight_decay
 89 | 
 90 |             if decay_type == "layer_wise":
 91 |                 layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers'))
 92 |             elif decay_type == "stage_wise":
 93 |                 layer_id = get_num_layer_stage_wise(name, num_layers)
 94 |                 
 95 |             group_name = "layer_%d_%s" % (layer_id, group_name)
 96 | 
 97 |             if group_name not in parameter_groups:
 98 |                 scale = decay_rate ** (num_layers - layer_id - 1)
 99 | 
100 |                 parameter_groups[group_name] = {
101 |                     "weight_decay": this_weight_decay,
102 |                     "params": [],
103 |                     "param_names": [], 
104 |                     "lr_scale": scale, 
105 |                     "group_name": group_name, 
106 |                     "lr": scale * self.base_lr, 
107 |                 }
108 | 
109 |             parameter_groups[group_name]["params"].append(param)
110 |             parameter_groups[group_name]["param_names"].append(name)
111 |         rank, _ = get_dist_info()
112 |         if rank == 0:
113 |             to_display = {}
114 |             for key in parameter_groups:
115 |                 to_display[key] = {
116 |                     "param_names": parameter_groups[key]["param_names"], 
117 |                     "lr_scale": parameter_groups[key]["lr_scale"], 
118 |                     "lr": parameter_groups[key]["lr"], 
119 |                     "weight_decay": parameter_groups[key]["weight_decay"], 
120 |                 }
121 |             print("Param groups = %s" % json.dumps(to_display, indent=2))
122 |         
123 |         params.extend(parameter_groups.values())
124 | 


--------------------------------------------------------------------------------
/utils/loggers/clearml/hpo.py:
--------------------------------------------------------------------------------
 1 | from clearml import Task
 2 | # Connecting ClearML with the current process,
 3 | # from here on everything is logged automatically
 4 | from clearml.automation import HyperParameterOptimizer, UniformParameterRange
 5 | from clearml.automation.optuna import OptimizerOptuna
 6 | 
 7 | task = Task.init(project_name='Hyper-Parameter Optimization',
 8 |                  task_name='YOLOv5',
 9 |                  task_type=Task.TaskTypes.optimizer,
10 |                  reuse_last_task_id=False)
11 | 
12 | # Example use case:
13 | optimizer = HyperParameterOptimizer(
14 |     # This is the experiment we want to optimize
15 |     base_task_id='<your_template_task_id>',
16 |     # here we define the hyper-parameters to optimize
17 |     # Notice: The parameter name should exactly match what you see in the UI: <section_name>/<parameter>
18 |     # For Example, here we see in the base experiment a section Named: "General"
19 |     # under it a parameter named "batch_size", this becomes "General/batch_size"
20 |     # If you have `argparse` for example, then arguments will appear under the "Args" section,
21 |     # and you should instead pass "Args/batch_size"
22 |     hyper_parameters=[
23 |         UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1),
24 |         UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0),
25 |         UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98),
26 |         UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001),
27 |         UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0),
28 |         UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95),
29 |         UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2),
30 |         UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2),
31 |         UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0),
32 |         UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0),
33 |         UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0),
34 |         UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0),
35 |         UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7),
36 |         UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0),
37 |         UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0),
38 |         UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1),
39 |         UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9),
40 |         UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9),
41 |         UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0),
42 |         UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9),
43 |         UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9),
44 |         UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0),
45 |         UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001),
46 |         UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0),
47 |         UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0),
48 |         UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0),
49 |         UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0),
50 |         UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)],
51 |     # this is the objective metric we want to maximize/minimize
52 |     objective_metric_title='metrics',
53 |     objective_metric_series='mAP_0.5',
54 |     # now we decide if we want to maximize it or minimize it (accuracy we maximize)
55 |     objective_metric_sign='max',
56 |     # let us limit the number of concurrent experiments,
57 |     # this in turn will make sure we do dont bombard the scheduler with experiments.
58 |     # if we have an auto-scaler connected, this, by proxy, will limit the number of machine
59 |     max_number_of_concurrent_tasks=1,
60 |     # this is the optimizer class (actually doing the optimization)
61 |     # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band)
62 |     optimizer_class=OptimizerOptuna,
63 |     # If specified only the top K performing Tasks will be kept, the others will be automatically archived
64 |     save_top_k_tasks_only=5,  # 5,
65 |     compute_time_limit=None,
66 |     total_max_jobs=20,
67 |     min_iteration_per_job=None,
68 |     max_iteration_per_job=None,
69 | )
70 | 
71 | # report every 10 seconds, this is way too often, but we are testing here
72 | optimizer.set_report_period(10 / 60)
73 | # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent
74 | # an_optimizer.start_locally(job_complete_callback=job_complete_callback)
75 | # set the time limit for the optimization process (2 hours)
76 | optimizer.set_time_limit(in_minutes=120.0)
77 | # Start the optimization process in the local environment
78 | optimizer.start_locally()
79 | # wait until process is done (notice we are controlling the optimization process in the background)
80 | optimizer.wait()
81 | # make sure background optimization stopped
82 | optimizer.stop()
83 | 
84 | print('We are done, good bye')
85 | 


--------------------------------------------------------------------------------
/spark/downstream_mmdet/mmcv_custom/customized_text.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import datetime
 10 | from collections import OrderedDict
 11 | 
 12 | import torch
 13 | 
 14 | import mmcv
 15 | from mmcv.runner import HOOKS
 16 | from mmcv.runner import TextLoggerHook
 17 | 
 18 | 
 19 | @HOOKS.register_module()
 20 | class CustomizedTextLoggerHook(TextLoggerHook):
 21 |     """Customized Text Logger hook.
 22 | 
 23 |     This logger prints out both lr and layer_0_lr.
 24 |         
 25 |     """
 26 |     
 27 |     def _log_info(self, log_dict, runner):
 28 |         # print exp name for users to distinguish experiments
 29 |         # at every ``interval_exp_name`` iterations and the end of each epoch
 30 |         if runner.meta is not None and 'exp_name' in runner.meta:
 31 |             if (self.every_n_iters(runner, self.interval_exp_name)) or (
 32 |                     self.by_epoch and self.end_of_epoch(runner)):
 33 |                 exp_info = f'Exp name: {runner.meta["exp_name"]}'
 34 |                 runner.logger.info(exp_info)
 35 | 
 36 |         if log_dict['mode'] == 'train':
 37 |             lr_str = {}
 38 |             for lr_type in ['lr', 'layer_0_lr']:
 39 |                 if isinstance(log_dict[lr_type], dict):
 40 |                     lr_str[lr_type] = []
 41 |                     for k, val in log_dict[lr_type].items():
 42 |                         lr_str.append(f'{lr_type}_{k}: {val:.3e}')
 43 |                     lr_str[lr_type] = ' '.join(lr_str)
 44 |                 else:
 45 |                     lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}'
 46 | 
 47 |             # by epoch: Epoch [4][100/1000]
 48 |             # by iter:  Iter [100/100000]
 49 |             if self.by_epoch:
 50 |                 log_str = f'Epoch [{log_dict["epoch"]}]' \
 51 |                           f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
 52 |             else:
 53 |                 log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
 54 |             log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, '
 55 | 
 56 |             if 'time' in log_dict.keys():
 57 |                 self.time_sec_tot += (log_dict['time'] * self.interval)
 58 |                 time_sec_avg = self.time_sec_tot / (
 59 |                     runner.iter - self.start_iter + 1)
 60 |                 eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
 61 |                 eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
 62 |                 log_str += f'eta: {eta_str}, '
 63 |                 log_str += f'time: {log_dict["time"]:.3f}, ' \
 64 |                            f'data_time: {log_dict["data_time"]:.3f}, '
 65 |                 # statistic memory
 66 |                 if torch.cuda.is_available():
 67 |                     log_str += f'memory: {log_dict["memory"]}, '
 68 |         else:
 69 |             # val/test time
 70 |             # here 1000 is the length of the val dataloader
 71 |             # by epoch: Epoch[val] [4][1000]
 72 |             # by iter: Iter[val] [1000]
 73 |             if self.by_epoch:
 74 |                 log_str = f'Epoch({log_dict["mode"]}) ' \
 75 |                     f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
 76 |             else:
 77 |                 log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
 78 | 
 79 |         log_items = []
 80 |         for name, val in log_dict.items():
 81 |             # TODO: resolve this hack
 82 |             # these items have been in log_str
 83 |             if name in [
 84 |                     'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time',
 85 |                     'memory', 'epoch'
 86 |             ]:
 87 |                 continue
 88 |             if isinstance(val, float):
 89 |                 val = f'{val:.4f}'
 90 |             log_items.append(f'{name}: {val}')
 91 |         log_str += ', '.join(log_items)
 92 | 
 93 |         runner.logger.info(log_str)
 94 | 
 95 | 
 96 |     def log(self, runner):
 97 |         if 'eval_iter_num' in runner.log_buffer.output:
 98 |             # this doesn't modify runner.iter and is regardless of by_epoch
 99 |             cur_iter = runner.log_buffer.output.pop('eval_iter_num')
100 |         else:
101 |             cur_iter = self.get_iter(runner, inner_iter=True)
102 | 
103 |         log_dict = OrderedDict(
104 |             mode=self.get_mode(runner),
105 |             epoch=self.get_epoch(runner),
106 |             iter=cur_iter)
107 | 
108 |         # record lr and layer_0_lr
109 |         cur_lr = runner.current_lr()
110 |         if isinstance(cur_lr, list):
111 |             log_dict['layer_0_lr'] = min(cur_lr)
112 |             log_dict['lr'] = max(cur_lr)
113 |         else:
114 |             assert isinstance(cur_lr, dict)
115 |             log_dict['lr'], log_dict['layer_0_lr'] = {}, {}
116 |             for k, lr_ in cur_lr.items():
117 |                 assert isinstance(lr_, list)
118 |                 log_dict['layer_0_lr'].update({k: min(lr_)})
119 |                 log_dict['lr'].update({k: max(lr_)})
120 | 
121 |         if 'time' in runner.log_buffer.output:
122 |             # statistic memory
123 |             if torch.cuda.is_available():
124 |                 log_dict['memory'] = self._get_max_memory(runner)
125 | 
126 |         log_dict = dict(log_dict, **runner.log_buffer.output)
127 | 
128 |         self._log_info(log_dict, runner)
129 |         self._dump_log(log_dict, runner)
130 |         return log_dict
131 | 


--------------------------------------------------------------------------------
/spark/pretrain/models/convnext.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | #
  7 | # This file is basically a copy of: https://github.com/facebookresearch/ConvNeXt/blob/06f7b05f922e21914916406141f50f82b4a15852/models/convnext.py
  8 | from typing import List
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | from timm.models.layers import trunc_normal_
 13 | from timm.models.registry import register_model
 14 | 
 15 | from encoder import SparseConvNeXtBlock, SparseConvNeXtLayerNorm
 16 | 
 17 | 
 18 | class ConvNeXt(nn.Module):
 19 |     r""" ConvNeXt
 20 |         A PyTorch impl of : `A ConvNet for the 2020s`  -
 21 |           https://arxiv.org/pdf/2201.03545.pdf
 22 |     Args:
 23 |         in_chans (int): Number of input image channels. Default: 3
 24 |         num_classes (int): Number of classes for classification head. Default: 1000
 25 |         depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
 26 |         dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
 27 |         drop_path_rate (float): Stochastic depth rate. Default: 0.
 28 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 29 |         head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
 30 |     """
 31 |     
 32 |     def __init__(self, in_chans=3, num_classes=1000,
 33 |                  depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
 34 |                  layer_scale_init_value=1e-6, head_init_scale=1., global_pool='avg',
 35 |                  sparse=True,
 36 |                  ):
 37 |         super().__init__()
 38 |         self.dims: List[int] = dims
 39 |         self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
 40 |         stem = nn.Sequential(
 41 |             nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
 42 |             SparseConvNeXtLayerNorm(dims[0], eps=1e-6, data_format="channels_first", sparse=sparse)
 43 |         )
 44 |         self.downsample_layers.append(stem)
 45 |         for i in range(3):
 46 |             downsample_layer = nn.Sequential(
 47 |                 SparseConvNeXtLayerNorm(dims[i], eps=1e-6, data_format="channels_first", sparse=sparse),
 48 |                 nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2),
 49 |             )
 50 |             self.downsample_layers.append(downsample_layer)
 51 |         
 52 |         self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
 53 |         self.drop_path_rate = drop_path_rate
 54 |         self.layer_scale_init_value = layer_scale_init_value
 55 |         dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
 56 |         cur = 0
 57 |         for i in range(4):
 58 |             stage = nn.Sequential(
 59 |                 *[SparseConvNeXtBlock(dim=dims[i], drop_path=dp_rates[cur + j],
 60 |                                       layer_scale_init_value=layer_scale_init_value, sparse=sparse) for j in range(depths[i])]
 61 |             )
 62 |             self.stages.append(stage)
 63 |             cur += depths[i]
 64 |         self.depths = depths
 65 |         
 66 |         self.apply(self._init_weights)
 67 |         if num_classes > 0:
 68 |             self.norm = SparseConvNeXtLayerNorm(dims[-1], eps=1e-6, sparse=False)  # final norm layer for LE/FT; should not be sparse
 69 |             self.fc = nn.Linear(dims[-1], num_classes)
 70 |         else:
 71 |             self.norm = nn.Identity()
 72 |             self.fc = nn.Identity()
 73 |     
 74 |     def _init_weights(self, m):
 75 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
 76 |             trunc_normal_(m.weight, std=.02)
 77 |             nn.init.constant_(m.bias, 0)
 78 |     
 79 |     def get_downsample_ratio(self) -> int:
 80 |         return 32
 81 |     
 82 |     def get_feature_map_channels(self) -> List[int]:
 83 |         return self.dims
 84 |     
 85 |     def forward(self, x, hierarchical=False):
 86 |         if hierarchical:
 87 |             ls = []
 88 |             for i in range(4):
 89 |                 x = self.downsample_layers[i](x)
 90 |                 x = self.stages[i](x)
 91 |                 ls.append(x)
 92 |             return ls
 93 |         else:
 94 |             return self.fc(self.norm(x.mean([-2, -1]))) # (B, C, H, W) =mean=> (B, C) =norm&fc=> (B, NumCls)
 95 |     
 96 |     def get_classifier(self):
 97 |         return self.fc
 98 |     
 99 |     def extra_repr(self):
100 |         return f'drop_path_rate={self.drop_path_rate}, layer_scale_init_value={self.layer_scale_init_value:g}'
101 | 
102 | 
103 | @register_model
104 | def convnext_tiny(pretrained=False, in_22k=False, **kwargs):
105 |     model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
106 |     return model
107 | 
108 | 
109 | @register_model
110 | def convnext_small(pretrained=False, in_22k=False, **kwargs):
111 |     model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
112 |     return model
113 | 
114 | 
115 | @register_model
116 | def convnext_base(pretrained=False, in_22k=False, **kwargs):
117 |     model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
118 |     return model
119 | 
120 | 
121 | @register_model
122 | def convnext_large(pretrained=False, in_22k=False, **kwargs):
123 |     model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
124 |     return model
125 | 
126 | 


--------------------------------------------------------------------------------
/spark/downstream_d2/lr_decay.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Set, Optional, Callable, Any
  2 | import torch
  3 | import copy
  4 | 
  5 | from detectron2.solver.build import reduce_param_groups
  6 | 
  7 | 
  8 | def lr_factor_func(para_name: str, is_resnet50, dec: float, debug=False) -> float:
  9 |     if dec == 0:
 10 |         dec = 1.
 11 |     
 12 |     N = 5 if is_resnet50 else 11
 13 |     if '.stem.' in para_name:
 14 |         layer_id = 0
 15 |     elif '.res' in para_name:
 16 |         ls = para_name.split('.res')[1].split('.')
 17 |         if ls[0].isnumeric() and ls[1].isnumeric():
 18 |             stage_id, block_id = int(ls[0]), int(ls[1])
 19 |             if stage_id == 2:  # res2
 20 |                 layer_id = 1
 21 |             elif stage_id == 3:  # res3
 22 |                 layer_id = 2
 23 |             elif stage_id == 4:  # res4
 24 |                 layer_id = 3 + block_id // 3  # 3, 4  or  4, 5
 25 |             else:  # res5
 26 |                 layer_id = N
 27 |         else:
 28 |             assert para_name.startswith('roi_heads.res5.norm.')
 29 |             layer_id = N + 1  # roi_heads.res5.norm.weight and roi_heads.res5.norm.bias of C4
 30 |     else:
 31 |         layer_id = N + 1
 32 |     
 33 |     exp = N + 1 - layer_id
 34 |     return f'{dec:g} ** {exp}' if debug else dec ** exp
 35 | 
 36 | 
 37 | # [modification] see: https://github.com/facebookresearch/detectron2/blob/v0.6/detectron2/solver/build.py#L134
 38 | # add the `lr_factor_func` to implement lr decay
 39 | def get_default_optimizer_params(
 40 |         model: torch.nn.Module,
 41 |         base_lr: Optional[float] = None,
 42 |         weight_decay: Optional[float] = None,
 43 |         weight_decay_norm: Optional[float] = None,
 44 |         bias_lr_factor: Optional[float] = 1.0,
 45 |         weight_decay_bias: Optional[float] = None,
 46 |         lr_factor_func: Optional[Callable] = None,
 47 |         overrides: Optional[Dict[str, Dict[str, float]]] = None,
 48 | ) -> List[Dict[str, Any]]:
 49 |     """
 50 |     Get default param list for optimizer, with support for a few types of
 51 |     overrides. If no overrides needed, this is equivalent to `model.parameters()`.
 52 |     
 53 |     Args:
 54 |         base_lr: lr for every group by default. Can be omitted to use the one in optimizer.
 55 |         weight_decay: weight decay for every group by default. Can be omitted to use the one
 56 |             in optimizer.
 57 |         weight_decay_norm: override weight decay for params in normalization layers
 58 |         bias_lr_factor: multiplier of lr for bias parameters.
 59 |         weight_decay_bias: override weight decay for bias parameters.
 60 |         lr_factor_func: function to calculate lr decay rate by mapping the parameter names to
 61 |             corresponding lr decay rate. Note that setting this option requires
 62 |             also setting ``base_lr``.
 63 |         overrides: if not `None`, provides values for optimizer hyperparameters
 64 |             (LR, weight decay) for module parameters with a given name; e.g.
 65 |             ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and
 66 |             weight decay values for all module parameters named `embedding`.
 67 | 
 68 |     For common detection models, ``weight_decay_norm`` is the only option
 69 |     needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings
 70 |     from Detectron1 that are not found useful.
 71 | 
 72 |     Example:
 73 |     ::
 74 |         torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0),
 75 |                        lr=0.01, weight_decay=1e-4, momentum=0.9)
 76 |     """
 77 |     if overrides is None:
 78 |         overrides = {}
 79 |     defaults = {}
 80 |     if base_lr is not None:
 81 |         defaults["lr"] = base_lr
 82 |     if weight_decay is not None:
 83 |         defaults["weight_decay"] = weight_decay
 84 |     bias_overrides = {}
 85 |     if bias_lr_factor is not None and bias_lr_factor != 1.0:
 86 |         # NOTE: unlike Detectron v1, we now by default make bias hyperparameters
 87 |         # exactly the same as regular weights.
 88 |         if base_lr is None:
 89 |             raise ValueError("bias_lr_factor requires base_lr")
 90 |         bias_overrides["lr"] = base_lr * bias_lr_factor
 91 |     if weight_decay_bias is not None:
 92 |         bias_overrides["weight_decay"] = weight_decay_bias
 93 |     if len(bias_overrides):
 94 |         if "bias" in overrides:
 95 |             raise ValueError("Conflicting overrides for 'bias'")
 96 |         overrides["bias"] = bias_overrides
 97 |     if lr_factor_func is not None:
 98 |         if base_lr is None:
 99 |             raise ValueError("lr_factor_func requires base_lr")
100 |     norm_module_types = (
101 |         torch.nn.BatchNorm1d,
102 |         torch.nn.BatchNorm2d,
103 |         torch.nn.BatchNorm3d,
104 |         torch.nn.SyncBatchNorm,
105 |         # NaiveSyncBatchNorm inherits from BatchNorm2d
106 |         torch.nn.GroupNorm,
107 |         torch.nn.InstanceNorm1d,
108 |         torch.nn.InstanceNorm2d,
109 |         torch.nn.InstanceNorm3d,
110 |         torch.nn.LayerNorm,
111 |         torch.nn.LocalResponseNorm,
112 |     )
113 |     params: List[Dict[str, Any]] = []
114 |     memo: Set[torch.nn.parameter.Parameter] = set()
115 |     for module_name, module in model.named_modules():
116 |         for module_param_name, value in module.named_parameters(recurse=False):
117 |             if not value.requires_grad:
118 |                 continue
119 |             # Avoid duplicating parameters
120 |             if value in memo:
121 |                 continue
122 |             memo.add(value)
123 |             
124 |             hyperparams = copy.copy(defaults)
125 |             if isinstance(module, norm_module_types) and weight_decay_norm is not None:
126 |                 hyperparams["weight_decay"] = weight_decay_norm
127 |             if lr_factor_func is not None:
128 |                 hyperparams["lr"] *= lr_factor_func(f"{module_name}.{module_param_name}")
129 |             
130 |             hyperparams.update(overrides.get(module_param_name, {}))
131 |             params.append({"params": [value], **hyperparams})
132 |     return reduce_param_groups(params)
133 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/arg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import json
  8 | import os
  9 | import sys
 10 | 
 11 | from tap import Tap
 12 | 
 13 | HP_DEFAULT_NAMES = ['bs', 'ep', 'wp_ep', 'opt', 'base_lr', 'lr_scale', 'wd', 'mixup', 'rep_aug', 'drop_path', 'ema']
 14 | HP_DEFAULT_VALUES = {
 15 |     'convnext_small':     (4096, 400, 20, 'adam', 0.0002,  0.7, 0.01, 0.8, 3, 0.3,  0.9999),
 16 |     'convnext_base':      (4096, 400, 20, 'adam', 0.0001,  0.7, 0.01, 0.8, 3, 0.4,  0.9999),
 17 |     'convnext_large':     (4096, 200, 10, 'adam', 0.0001,  0.7, 0.02, 0.8, 3, 0.5,  0.9999),
 18 |     'convnext_large_384': (1024, 200, 20, 'adam', 0.00006, 0.7, 0.01, 0.8, 3, 0.5,  0.99995),
 19 |     
 20 |     'resnet50':           (4096, 300, 5,  'lamb', 0.002,   0.7, 0.02, 0.1, 0, 0.05, 0.9999),
 21 |     'resnet101':          (4096, 300, 5,  'lamb', 0.001,   0.8, 0.02, 0.1, 0, 0.2,  0.9999),
 22 |     'resnet152':          (4096, 300, 5,  'lamb', 0.001,   0.8, 0.02, 0.1, 0, 0.2,  0.9999),
 23 |     'resnet200':          (4096, 300, 5,  'lamb', 0.001,   0.8, 0.02, 0.1, 0, 0.2,  0.9999),
 24 | }
 25 | 
 26 | 
 27 | class FineTuneArgs(Tap):
 28 |     # environment
 29 |     exp_name: str
 30 |     exp_dir: str
 31 |     data_path: str
 32 |     model: str
 33 |     resume_from: str = ''   # resume from some checkpoint.pth
 34 |     
 35 |     img_size: int = 640
 36 |     dataloader_workers: int = 8
 37 |     
 38 |     # ImageNet classification fine-tuning hyperparameters; see `HP_DEFAULT_VALUES` above for detailed default values
 39 |     # - batch size, epoch
 40 |     bs: int = 0             # global batch size (== batch_size_per_gpu * num_gpus)
 41 |     ep: int = 0             # number of epochs
 42 |     wp_ep: int = 0          # epochs for warmup
 43 |     
 44 |     # - optimization
 45 |     opt: str = ''           # optimizer; 'adam' or 'lamb'
 46 |     base_lr: float = 0.     # lr == base_lr * (bs)
 47 |     lr_scale: float = 0.    # see file `lr_decay.py` for more details
 48 |     clip: int = -1          # use gradient clipping if clip > 0
 49 |     
 50 |     # - regularization tricks
 51 |     wd: float = 0.          # weight decay
 52 |     mixup: float = 0.       # use mixup if mixup > 0
 53 |     rep_aug: int = 0        # use repeated augmentation if rep_aug > 0
 54 |     drop_path: float = 0.   # drop_path ratio
 55 |     
 56 |     # - other tricks
 57 |     ema: float = 0.         # use EMA if ema > 0
 58 |     sbn: bool = True        # use SyncBatchNorm
 59 |     
 60 |     # NO NEED TO SPECIFIED; each of these args would be updated in runtime automatically
 61 |     lr: float = None
 62 |     batch_size_per_gpu: int = 0
 63 |     glb_batch_size: int = 0
 64 |     device: str = 'cpu'
 65 |     world_size: int = 1
 66 |     global_rank: int = 0
 67 |     local_rank: int = 0     # we DO USE this arg
 68 |     is_master: bool = False
 69 |     is_local_master: bool = False
 70 |     cmd: str = ' '.join(sys.argv[1:])
 71 |     commit_id: str = os.popen(f'git rev-parse HEAD').read().strip()
 72 |     commit_msg: str = os.popen(f'git log -1').read().strip().splitlines()[-1].strip()
 73 |     log_txt_name: str = '{args.exp_dir}/pretrain_log.txt'
 74 |     tb_lg_dir: str = ''     # tensorboard log directory
 75 |     
 76 |     train_loss: float = 0.
 77 |     train_acc: float = 0.
 78 |     best_val_acc: float = 0.
 79 |     cur_ep: str = ''
 80 |     remain_time: str = ''
 81 |     finish_time: str = ''
 82 |     first_logging: bool = True
 83 |     
 84 |     def log_epoch(self):
 85 |         if not self.is_local_master:
 86 |             return
 87 |         
 88 |         if self.first_logging:
 89 |             self.first_logging = False
 90 |             with open(self.log_txt_name, 'w') as fp:
 91 |                 json.dump({
 92 |                     'name': self.exp_name, 'cmd': self.cmd, 'git_commit_id': self.commit_id, 'git_commit_msg': self.commit_msg,
 93 |                     'model': self.model,
 94 |                 }, fp)
 95 |                 fp.write('\n\n')
 96 |         
 97 |         with open(self.log_txt_name, 'a') as fp:
 98 |             json.dump({
 99 |                 'cur_ep': self.cur_ep,
100 |                 'train_L': self.train_loss, 'train_acc': self.train_acc,
101 |                 'best_val_acc': self.best_val_acc,
102 |                 'rema': self.remain_time, 'fini': self.finish_time,
103 |             }, fp)
104 |             fp.write('\n')
105 | 
106 | 
107 | def get_args(world_size, global_rank, local_rank, device) -> FineTuneArgs:
108 |     # parse args and prepare directories
109 |     args = FineTuneArgs(explicit_bool=True).parse_args()
110 |     d_name, b_name = os.path.dirname(os.path.abspath(args.exp_dir)), os.path.basename(os.path.abspath(args.exp_dir))
111 |     b_name = ''.join(ch if (ch.isalnum() or ch == '-') else '_' for ch in b_name)
112 |     args.exp_dir = os.path.join(d_name, b_name)
113 |     os.makedirs(args.exp_dir, exist_ok=True)
114 |     args.log_txt_name = os.path.join(args.exp_dir, 'finetune_log.txt')
115 |     
116 |     args.tb_lg_dir = args.tb_lg_dir or os.path.join(args.exp_dir, 'tensorboard_log')
117 |     try: os.makedirs(args.tb_lg_dir, exist_ok=True)
118 |     except: pass
119 |     
120 |     # fill in args.bs, args.ep, etc. with their default values (if their values are not explicitly specified, i.e., if bool(they) == False)
121 |     if args.model == 'convnext_large' and args.img_size == 384:
122 |         default_values = HP_DEFAULT_VALUES['convnext_large_384']
123 |     else:
124 |         default_values = HP_DEFAULT_VALUES[args.model]
125 |     for k, v in zip(HP_DEFAULT_NAMES, default_values):
126 |         if bool(getattr(args, k)) == False:
127 |             setattr(args, k, v)
128 |     
129 |     # update other runtime args
130 |     args.world_size, args.global_rank, args.local_rank, args.device = world_size, global_rank, local_rank, device
131 |     args.is_master = global_rank == 0
132 |     args.is_local_master = local_rank == 0
133 |     args.batch_size_per_gpu = args.bs // world_size
134 |     args.glb_batch_size = args.batch_size_per_gpu * world_size
135 |     args.lr = args.base_lr * args.glb_batch_size / 256
136 |     
137 |     return args
138 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import datetime
  8 | import os
  9 | import sys
 10 | from functools import partial
 11 | from typing import List, Tuple, Callable
 12 | 
 13 | import pytz
 14 | import torch
 15 | import torch.distributed as tdist
 16 | import torch.multiprocessing as tmp
 17 | from timm import create_model
 18 | from timm.loss import SoftTargetCrossEntropy, BinaryCrossEntropy
 19 | from timm.optim import AdamW, Lamb
 20 | from timm.utils import ModelEmaV2
 21 | from torch.nn.parallel import DistributedDataParallel
 22 | from torch.optim.optimizer import Optimizer
 23 | 
 24 | from arg import FineTuneArgs
 25 | from downstream_imagenet.mixup import BatchMixup
 26 | from lr_decay import get_param_groups
 27 | 
 28 | 
 29 | def time_str(for_dirname=False):
 30 |     return datetime.datetime.now(tz=pytz.timezone('Asia/Shanghai')).strftime('%m-%d_%H-%M-%S' if for_dirname else '[%m-%d %H:%M:%S]')
 31 | 
 32 | 
 33 | def init_distributed_environ():
 34 |     # ref: https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py#L29
 35 |     if tmp.get_start_method(allow_none=True) is None:
 36 |         tmp.set_start_method('spawn')
 37 |     global_rank, num_gpus = int(os.environ.get('RANK', 'error')), torch.cuda.device_count()
 38 |     local_rank = global_rank % num_gpus
 39 |     torch.cuda.set_device(local_rank)
 40 |     
 41 |     tdist.init_process_group(backend='nccl')
 42 |     assert tdist.is_initialized(), 'torch.distributed is not initialized!'
 43 |     torch.backends.cudnn.benchmark = True
 44 |     torch.backends.cudnn.deterministic = False
 45 |     
 46 |     # print only when local_rank == 0 or print(..., force=True)
 47 |     import builtins as __builtin__
 48 |     builtin_print = __builtin__.print
 49 |     
 50 |     def prt(msg, *args, **kwargs):
 51 |         force = kwargs.pop('force', False)
 52 |         if local_rank == 0 or force:
 53 |             f_back = sys._getframe().f_back
 54 |             file_desc = f'{f_back.f_code.co_filename:24s}'[-24:]
 55 |             builtin_print(f'{time_str()} ({file_desc}, line{f_back.f_lineno:-4d})=> {msg}', *args, **kwargs)
 56 |     
 57 |     __builtin__.print = prt
 58 |     tdist.barrier()
 59 |     return tdist.get_world_size(), global_rank, local_rank, torch.empty(1).cuda().device
 60 | 
 61 | 
 62 | def create_model_opt(args: FineTuneArgs) -> Tuple[torch.nn.Module, Callable, torch.nn.Module, DistributedDataParallel, ModelEmaV2, Optimizer]:
 63 |     num_classes = 1000
 64 |     model_without_ddp: torch.nn.Module = create_model(args.model, num_classes=num_classes, drop_path_rate=args.drop_path).to(args.device)
 65 |     model_para = f'{sum(p.numel() for p in model_without_ddp.parameters() if p.requires_grad) / 1e6:.1f}M'
 66 |     # create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
 67 |     model_ema = ModelEmaV2(model_without_ddp, decay=args.ema, device=args.device)
 68 |     if args.sbn:
 69 |         model_without_ddp = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model_without_ddp)
 70 |     print(f'[model={args.model}] [#para={model_para}, drop_path={args.drop_path}, ema={args.ema}] {model_without_ddp}\n')
 71 |     model = DistributedDataParallel(model_without_ddp, device_ids=[args.local_rank], find_unused_parameters=False, broadcast_buffers=False)
 72 |     model.train()
 73 |     opt_cls = {
 74 |         'adam': AdamW, 'adamw': AdamW,
 75 |         'lamb': partial(Lamb, max_grad_norm=1e7, always_adapt=True, bias_correction=False),
 76 |     }
 77 |     param_groups: List[dict] = get_param_groups(model_without_ddp, nowd_keys={'cls_token', 'pos_embed', 'mask_token', 'gamma'}, lr_scale=args.lr_scale)
 78 |     # param_groups[0] is like this: {'params': List[nn.Parameters], 'lr': float, 'lr_scale': float, 'weight_decay': float, 'weight_decay_scale': float}
 79 |     optimizer = opt_cls[args.opt](param_groups, lr=args.lr, weight_decay=0)
 80 |     print(f'[optimizer={type(optimizer)}]')
 81 |     mixup_fn = BatchMixup(
 82 |         mixup_alpha=args.mixup, cutmix_alpha=1.0, cutmix_minmax=None,
 83 |         prob=1.0, switch_prob=0.5, mode='batch',
 84 |         label_smoothing=0.1, num_classes=num_classes
 85 |     )
 86 |     mixup_fn.mixup_enabled = args.mixup > 0.0
 87 |     if 'lamb' in args.opt:
 88 |         # label smoothing is solved in AdaptiveMixup with `label_smoothing`, so here smoothing=0
 89 |         criterion = BinaryCrossEntropy(smoothing=0, target_threshold=None)
 90 |     else:
 91 |         criterion = SoftTargetCrossEntropy()
 92 |     print(f'[loss_fn] {criterion}')
 93 |     print(f'[mixup_fn] {mixup_fn}')
 94 |     return criterion, mixup_fn, model_without_ddp, model, model_ema, optimizer
 95 | 
 96 | 
 97 | def load_checkpoint(resume_from, model_without_ddp, ema_module, optimizer):
 98 |     if len(resume_from) == 0 or not os.path.exists(resume_from):
 99 |         raise AttributeError(f'ckpt `{resume_from}` not found!')
100 |         # return 0, '[no performance_desc]'
101 |     print(f'[try to resume from file `{resume_from}`]')
102 |     checkpoint = torch.load(resume_from, map_location='cpu')
103 |     assert checkpoint.get('is_pretrain', False) == False, 'Please do not use `*_withdecoder_1kpretrained_spark_style.pth`, which is ONLY for resuming the pretraining. Use `*_1kpretrained_timm_style.pth` or `*_1kfinetuned*.pth` instead.'
104 |     
105 |     ep_start, performance_desc = checkpoint.get('epoch', -1) + 1, checkpoint.get('performance_desc', '[no performance_desc]')
106 |     missing, unexpected = model_without_ddp.load_state_dict(checkpoint.get('module', checkpoint), strict=False)
107 |     print(f'[load_checkpoint] missing_keys={missing}')
108 |     print(f'[load_checkpoint] unexpected_keys={unexpected}')
109 |     print(f'[load_checkpoint] ep_start={ep_start}, performance_desc={performance_desc}')
110 |     
111 |     if 'optimizer' in checkpoint:
112 |         optimizer.load_state_dict(checkpoint['optimizer'])
113 |     if 'ema' in checkpoint:
114 |         ema_module.load_state_dict(checkpoint['ema'])
115 |     return ep_start, performance_desc
116 | 
117 | 
118 | def save_checkpoint(save_to, args, epoch, performance_desc, model_without_ddp_state, ema_state, optimizer_state):
119 |     checkpoint_path = os.path.join(args.exp_dir, save_to)
120 |     if args.is_local_master:
121 |         to_save = {
122 |             'args': str(args),
123 |             'arch': args.model,
124 |             'epoch': epoch,
125 |             'performance_desc': performance_desc,
126 |             'module': model_without_ddp_state,
127 |             'ema': ema_state,
128 |             'optimizer': optimizer_state,
129 |             'is_pretrain': False,
130 |         }
131 |         torch.save(to_save, checkpoint_path)
132 | 


--------------------------------------------------------------------------------
/benchmarks.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import platform
  3 | import sys
  4 | import time
  5 | from pathlib import Path
  6 | 
  7 | import pandas as pd
  8 | 
  9 | FILE = Path(__file__).resolve()
 10 | ROOT = FILE.parents[0]  # YOLO root directory
 11 | if str(ROOT) not in sys.path:
 12 |     sys.path.append(str(ROOT))  # add ROOT to PATH
 13 | # ROOT = ROOT.relative_to(Path.cwd())  # relative
 14 | 
 15 | import export
 16 | from models.experimental import attempt_load
 17 | from models.yolo import SegmentationModel
 18 | from segment.val import run as val_seg
 19 | from utils import notebook_init
 20 | from utils.general import LOGGER, check_yaml, file_size, print_args
 21 | from utils.torch_utils import select_device
 22 | from val import run as val_det
 23 | 
 24 | 
 25 | def run(
 26 |         weights=ROOT / 'yolo.pt',  # weights path
 27 |         imgsz=640,  # inference size (pixels)
 28 |         batch_size=1,  # batch size
 29 |         data=ROOT / 'data/coco.yaml',  # dataset.yaml path
 30 |         device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
 31 |         half=False,  # use FP16 half-precision inference
 32 |         test=False,  # test exports only
 33 |         pt_only=False,  # test PyTorch only
 34 |         hard_fail=False,  # throw error on benchmark failure
 35 | ):
 36 |     y, t = [], time.time()
 37 |     device = select_device(device)
 38 |     model_type = type(attempt_load(weights, fuse=False))  # DetectionModel, SegmentationModel, etc.
 39 |     for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows():  # index, (name, file, suffix, CPU, GPU)
 40 |         try:
 41 |             assert i not in (9, 10), 'inference not supported'  # Edge TPU and TF.js are unsupported
 42 |             assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13'  # CoreML
 43 |             if 'cpu' in device.type:
 44 |                 assert cpu, 'inference not supported on CPU'
 45 |             if 'cuda' in device.type:
 46 |                 assert gpu, 'inference not supported on GPU'
 47 | 
 48 |             # Export
 49 |             if f == '-':
 50 |                 w = weights  # PyTorch format
 51 |             else:
 52 |                 w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1]  # all others
 53 |             assert suffix in str(w), 'export failed'
 54 | 
 55 |             # Validate
 56 |             if model_type == SegmentationModel:
 57 |                 result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
 58 |                 metric = result[0][7]  # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls))
 59 |             else:  # DetectionModel:
 60 |                 result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half)
 61 |                 metric = result[0][3]  # (p, r, map50, map, *loss(box, obj, cls))
 62 |             speed = result[2][1]  # times (preprocess, inference, postprocess)
 63 |             y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)])  # MB, mAP, t_inference
 64 |         except Exception as e:
 65 |             if hard_fail:
 66 |                 assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}'
 67 |             LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}')
 68 |             y.append([name, None, None, None])  # mAP, t_inference
 69 |         if pt_only and i == 0:
 70 |             break  # break after PyTorch
 71 | 
 72 |     # Print results
 73 |     LOGGER.info('\n')
 74 |     parse_opt()
 75 |     notebook_init()  # print system info
 76 |     c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', '']
 77 |     py = pd.DataFrame(y, columns=c)
 78 |     LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)')
 79 |     LOGGER.info(str(py if map else py.iloc[:, :2]))
 80 |     if hard_fail and isinstance(hard_fail, str):
 81 |         metrics = py['mAP50-95'].array  # values to compare to floor
 82 |         floor = eval(hard_fail)  # minimum metric floor to pass
 83 |         assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}'
 84 |     return py
 85 | 
 86 | 
 87 | def test(
 88 |         weights=ROOT / 'yolo.pt',  # weights path
 89 |         imgsz=640,  # inference size (pixels)
 90 |         batch_size=1,  # batch size
 91 |         data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
 92 |         device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
 93 |         half=False,  # use FP16 half-precision inference
 94 |         test=False,  # test exports only
 95 |         pt_only=False,  # test PyTorch only
 96 |         hard_fail=False,  # throw error on benchmark failure
 97 | ):
 98 |     y, t = [], time.time()
 99 |     device = select_device(device)
100 |     for i, (name, f, suffix, gpu) in export.export_formats().iterrows():  # index, (name, file, suffix, gpu-capable)
101 |         try:
102 |             w = weights if f == '-' else \
103 |                 export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1]  # weights
104 |             assert suffix in str(w), 'export failed'
105 |             y.append([name, True])
106 |         except Exception:
107 |             y.append([name, False])  # mAP, t_inference
108 | 
109 |     # Print results
110 |     LOGGER.info('\n')
111 |     parse_opt()
112 |     notebook_init()  # print system info
113 |     py = pd.DataFrame(y, columns=['Format', 'Export'])
114 |     LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)')
115 |     LOGGER.info(str(py))
116 |     return py
117 | 
118 | 
119 | def parse_opt():
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path')
122 |     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)')
123 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
124 |     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
125 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
126 |     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
127 |     parser.add_argument('--test', action='store_true', help='test exports only')
128 |     parser.add_argument('--pt-only', action='store_true', help='test PyTorch only')
129 |     parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric')
130 |     opt = parser.parse_args()
131 |     opt.data = check_yaml(opt.data)  # check YAML
132 |     print_args(vars(opt))
133 |     return opt
134 | 
135 | 
136 | def main(opt):
137 |     test(**vars(opt)) if opt.test else run(**vars(opt))
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     opt = parse_opt()
142 |     main(opt)
143 | 


--------------------------------------------------------------------------------
/spark/downstream_imagenet/data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) ByteDance, Inc. and its affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import random
  9 | import time
 10 | 
 11 | import PIL.Image as PImage
 12 | import numpy as np
 13 | import torch
 14 | import torchvision
 15 | from timm.data import AutoAugment as TimmAutoAugment
 16 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, create_transform
 17 | from timm.data.distributed_sampler import RepeatAugSampler
 18 | from timm.data.transforms_factory import transforms_imagenet_eval
 19 | from torch.utils.data import DataLoader
 20 | from torch.utils.data.sampler import Sampler
 21 | from torchvision.transforms import AutoAugment as TorchAutoAugment
 22 | from torchvision.transforms import transforms, TrivialAugmentWide
 23 | 
 24 | try:
 25 |     from torchvision.transforms import InterpolationMode
 26 |     interpolation = InterpolationMode.BICUBIC
 27 | except:
 28 |     import PIL
 29 |     interpolation = PIL.Image.BICUBIC
 30 | 
 31 | 
 32 | def create_classification_dataset(data_path, img_size, rep_aug, workers, batch_size_per_gpu, world_size, global_rank):
 33 |     import warnings
 34 |     warnings.filterwarnings('ignore', category=UserWarning)
 35 |     
 36 |     mean, std = IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 37 |     trans_train = create_transform(
 38 |         is_training=True, input_size=img_size,
 39 |         auto_augment='v0', interpolation='bicubic', re_prob=0.25, re_mode='pixel', re_count=1,
 40 |         mean=mean, std=std,
 41 |     )
 42 |     if img_size < 384:
 43 |         for i, t in enumerate(trans_train.transforms):
 44 |             if isinstance(t, (TorchAutoAugment, TimmAutoAugment)):
 45 |                 trans_train.transforms[i] = TrivialAugmentWide(interpolation=interpolation)
 46 |                 break
 47 |         trans_val = transforms_imagenet_eval(img_size=img_size, interpolation='bicubic', crop_pct=0.95, mean=mean, std=std)
 48 |     else:
 49 |         trans_val = transforms.Compose([
 50 |             transforms.Resize((img_size, img_size), interpolation=interpolation),
 51 |             transforms.ToTensor(), transforms.Normalize(mean=mean, std=std),
 52 |         ])
 53 |     print_transform(trans_train, '[train]')
 54 |     print_transform(trans_val, '[val]')
 55 |     
 56 |     imagenet_folder = os.path.abspath(data_path)
 57 |     for postfix in ('train', 'val'):
 58 |         if imagenet_folder.endswith(postfix):
 59 |             imagenet_folder = imagenet_folder[:-len(postfix)]
 60 |     dataset_train = torchvision.datasets.ImageFolder(os.path.join(imagenet_folder, 'train'), trans_train)
 61 |     dataset_val = torchvision.datasets.ImageFolder(os.path.join(imagenet_folder, 'val'), trans_val)
 62 |     
 63 |     if rep_aug:
 64 |         print(f'[dataset] using repeated augmentation: count={rep_aug}')
 65 |         train_sp = RepeatAugSampler(dataset_train, shuffle=True, num_repeats=rep_aug)
 66 |     else:
 67 |         train_sp = torch.utils.data.distributed.DistributedSampler(dataset_train, shuffle=True, drop_last=True)
 68 |     
 69 |     loader_train = DataLoader(
 70 |         dataset=dataset_train, num_workers=workers, pin_memory=True,
 71 |         batch_size=batch_size_per_gpu, sampler=train_sp, persistent_workers=workers > 0,
 72 |         worker_init_fn=worker_init_fn,
 73 |     )
 74 |     iters_train = len(loader_train)
 75 |     print(f'[dataset: train] bs={world_size}x{batch_size_per_gpu}={world_size * batch_size_per_gpu}, num_iters={iters_train}')
 76 |     
 77 |     val_ratio = 2
 78 |     loader_val = DataLoader(
 79 |         dataset=dataset_val, num_workers=workers, pin_memory=True,
 80 |         batch_sampler=DistInfiniteBatchSampler(world_size, global_rank, len(dataset_val), glb_batch_size=val_ratio * batch_size_per_gpu, filling=False, shuffle=False),
 81 |         worker_init_fn=worker_init_fn,
 82 |     )
 83 |     iters_val = len(loader_val)
 84 |     print(f'[dataset: val] bs={world_size}x{val_ratio * batch_size_per_gpu}={val_ratio * world_size * batch_size_per_gpu}, num_iters={iters_val}')
 85 |     
 86 |     time.sleep(3)
 87 |     warnings.resetwarnings()
 88 |     return loader_train, iters_train, iter(loader_val), iters_val
 89 | 
 90 | 
 91 | def worker_init_fn(worker_id):
 92 |     # see: https://pytorch.org/docs/stable/notes/randomness.html#dataloader
 93 |     worker_seed = torch.initial_seed() % 2 ** 32
 94 |     np.random.seed(worker_seed)
 95 |     random.seed(worker_seed)
 96 | 
 97 | 
 98 | def print_transform(transform, s):
 99 |     print(f'Transform {s} = ')
100 |     for t in transform.transforms:
101 |         print(t)
102 |     print('---------------------------\n')
103 | 
104 | 
105 | class DistInfiniteBatchSampler(Sampler):
106 |     def __init__(self, world_size, global_rank, dataset_len, glb_batch_size, seed=0, filling=False, shuffle=True):
107 |         assert glb_batch_size % world_size == 0
108 |         self.world_size, self.rank = world_size, global_rank
109 |         self.dataset_len = dataset_len
110 |         self.glb_batch_size = glb_batch_size
111 |         self.batch_size = glb_batch_size // world_size
112 |         
113 |         self.iters_per_ep = (dataset_len + glb_batch_size - 1) // glb_batch_size
114 |         self.filling = filling
115 |         self.shuffle = shuffle
116 |         self.epoch = 0
117 |         self.seed = seed
118 |         self.indices = self.gener_indices()
119 |     
120 |     def gener_indices(self):
121 |         global_max_p = self.iters_per_ep * self.glb_batch_size  # global_max_p % world_size must be 0 cuz glb_batch_size % world_size == 0
122 |         if self.shuffle:
123 |             g = torch.Generator()
124 |             g.manual_seed(self.epoch + self.seed)
125 |             global_indices = torch.randperm(self.dataset_len, generator=g)
126 |         else:
127 |             global_indices = torch.arange(self.dataset_len)
128 |         filling = global_max_p - global_indices.shape[0]
129 |         if filling > 0 and self.filling:
130 |             global_indices = torch.cat((global_indices, global_indices[:filling]))
131 |         global_indices = tuple(global_indices.numpy().tolist())
132 |         
133 |         seps = torch.linspace(0, len(global_indices), self.world_size + 1, dtype=torch.int)
134 |         local_indices = global_indices[seps[self.rank]:seps[self.rank + 1]]
135 |         self.max_p = len(local_indices)
136 |         return local_indices
137 |     
138 |     def __iter__(self):
139 |         self.epoch = 0
140 |         while True:
141 |             self.epoch += 1
142 |             p, q = 0, 0
143 |             while p < self.max_p:
144 |                 q = p + self.batch_size
145 |                 yield self.indices[p:q]
146 |                 p = q
147 |             if self.shuffle:
148 |                 self.indices = self.gener_indices()
149 |     
150 |     def __len__(self):
151 |         return self.iters_per_ep
152 | 


--------------------------------------------------------------------------------
/utils/loggers/comet/hpo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | import comet_ml
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | FILE = Path(__file__).resolve()
 13 | ROOT = FILE.parents[3]  # YOLOv5 root directory
 14 | if str(ROOT) not in sys.path:
 15 |     sys.path.append(str(ROOT))  # add ROOT to PATH
 16 | 
 17 | from train import train
 18 | from utils.callbacks import Callbacks
 19 | from utils.general import increment_path
 20 | from utils.torch_utils import select_device
 21 | 
 22 | # Project Configuration
 23 | config = comet_ml.config.get_config()
 24 | COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5")
 25 | 
 26 | 
 27 | def get_args(known=False):
 28 |     parser = argparse.ArgumentParser()
 29 |     parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
 30 |     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
 31 |     parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
 32 |     parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
 33 |     parser.add_argument('--epochs', type=int, default=300, help='total training epochs')
 34 |     parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
 35 |     parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
 36 |     parser.add_argument('--rect', action='store_true', help='rectangular training')
 37 |     parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
 38 |     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
 39 |     parser.add_argument('--noval', action='store_true', help='only validate final epoch')
 40 |     parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
 41 |     parser.add_argument('--noplots', action='store_true', help='save no plot files')
 42 |     parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
 43 |     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
 44 |     parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
 45 |     parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
 46 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
 47 |     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
 48 |     parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
 49 |     parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
 50 |     parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
 51 |     parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
 52 |     parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
 53 |     parser.add_argument('--name', default='exp', help='save to project/name')
 54 |     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
 55 |     parser.add_argument('--quad', action='store_true', help='quad dataloader')
 56 |     parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
 57 |     parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
 58 |     parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
 59 |     parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
 60 |     parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
 61 |     parser.add_argument('--seed', type=int, default=0, help='Global training seed')
 62 |     parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
 63 | 
 64 |     # Weights & Biases arguments
 65 |     parser.add_argument('--entity', default=None, help='W&B: Entity')
 66 |     parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
 67 |     parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
 68 |     parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
 69 | 
 70 |     # Comet Arguments
 71 |     parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.")
 72 |     parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.")
 73 |     parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.")
 74 |     parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.")
 75 |     parser.add_argument("--comet_optimizer_workers",
 76 |                         type=int,
 77 |                         default=1,
 78 |                         help="Comet: Number of Parallel Workers to use with the Comet Optimizer.")
 79 | 
 80 |     return parser.parse_known_args()[0] if known else parser.parse_args()
 81 | 
 82 | 
 83 | def run(parameters, opt):
 84 |     hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
 85 | 
 86 |     opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
 87 |     opt.batch_size = parameters.get("batch_size")
 88 |     opt.epochs = parameters.get("epochs")
 89 | 
 90 |     device = select_device(opt.device, batch_size=opt.batch_size)
 91 |     train(hyp_dict, opt, device, callbacks=Callbacks())
 92 | 
 93 | 
 94 | if __name__ == "__main__":
 95 |     opt = get_args(known=True)
 96 | 
 97 |     opt.weights = str(opt.weights)
 98 |     opt.cfg = str(opt.cfg)
 99 |     opt.data = str(opt.data)
100 |     opt.project = str(opt.project)
101 | 
102 |     optimizer_id = os.getenv("COMET_OPTIMIZER_ID")
103 |     if optimizer_id is None:
104 |         with open(opt.comet_optimizer_config) as f:
105 |             optimizer_config = json.load(f)
106 |         optimizer = comet_ml.Optimizer(optimizer_config)
107 |     else:
108 |         optimizer = comet_ml.Optimizer(optimizer_id)
109 | 
110 |     opt.comet_optimizer_id = optimizer.id
111 |     status = optimizer.status()
112 | 
113 |     opt.comet_optimizer_objective = status["spec"]["objective"]
114 |     opt.comet_optimizer_metric = status["spec"]["metric"]
115 | 
116 |     logger.info("COMET INFO: Starting Hyperparameter Sweep")
117 |     for parameter in optimizer.get_parameters():
118 |         run(parameter["parameters"], opt)
119 | 


--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import yaml
  6 | from tqdm import tqdm
  7 | 
  8 | from utils import TryExcept
  9 | from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr
 10 | 
 11 | PREFIX = colorstr('AutoAnchor: ')
 12 | 
 13 | 
 14 | def check_anchor_order(m):
 15 |     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
 16 |     a = m.anchors.prod(-1).mean(-1).view(-1)  # mean anchor area per output layer
 17 |     da = a[-1] - a[0]  # delta a
 18 |     ds = m.stride[-1] - m.stride[0]  # delta s
 19 |     if da and (da.sign() != ds.sign()):  # same order
 20 |         LOGGER.info(f'{PREFIX}Reversing anchor order')
 21 |         m.anchors[:] = m.anchors.flip(0)
 22 | 
 23 | 
 24 | @TryExcept(f'{PREFIX}ERROR')
 25 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 26 |     # Check anchor fit to data, recompute if necessary
 27 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 28 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 29 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 30 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 31 | 
 32 |     def metric(k):  # compute metric
 33 |         r = wh[:, None] / k[None]
 34 |         x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
 35 |         best = x.max(1)[0]  # best_x
 36 |         aat = (x > 1 / thr).float().sum(1).mean()  # anchors above threshold
 37 |         bpr = (best > 1 / thr).float().mean()  # best possible recall
 38 |         return bpr, aat
 39 | 
 40 |     stride = m.stride.to(m.anchors.device).view(-1, 1, 1)  # model strides
 41 |     anchors = m.anchors.clone() * stride  # current anchors
 42 |     bpr, aat = metric(anchors.cpu().view(-1, 2))
 43 |     s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). '
 44 |     if bpr > 0.98:  # threshold to recompute
 45 |         LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅')
 46 |     else:
 47 |         LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...')
 48 |         na = m.anchors.numel() // 2  # number of anchors
 49 |         anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 50 |         new_bpr = metric(anchors)[0]
 51 |         if new_bpr > bpr:  # replace anchors
 52 |             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
 53 |             m.anchors[:] = anchors.clone().view_as(m.anchors)
 54 |             check_anchor_order(m)  # must be in pixel-space (not grid-space)
 55 |             m.anchors /= stride
 56 |             s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)'
 57 |         else:
 58 |             s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)'
 59 |         LOGGER.info(s)
 60 | 
 61 | 
 62 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 63 |     """ Creates kmeans-evolved anchors from training dataset
 64 | 
 65 |         Arguments:
 66 |             dataset: path to data.yaml, or a loaded dataset
 67 |             n: number of anchors
 68 |             img_size: image size used for training
 69 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 70 |             gen: generations to evolve anchors using genetic algorithm
 71 |             verbose: print all results
 72 | 
 73 |         Return:
 74 |             k: kmeans evolved anchors
 75 | 
 76 |         Usage:
 77 |             from utils.autoanchor import *; _ = kmean_anchors()
 78 |     """
 79 |     from scipy.cluster.vq import kmeans
 80 | 
 81 |     npr = np.random
 82 |     thr = 1 / thr
 83 | 
 84 |     def metric(k, wh):  # compute metrics
 85 |         r = wh[:, None] / k[None]
 86 |         x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
 87 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 88 |         return x, x.max(1)[0]  # x, best_x
 89 | 
 90 |     def anchor_fitness(k):  # mutation fitness
 91 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 92 |         return (best * (best > thr).float()).mean()  # fitness
 93 | 
 94 |     def print_results(k, verbose=True):
 95 |         k = k[np.argsort(k.prod(1))]  # sort small to large
 96 |         x, best = metric(k, wh0)
 97 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
 98 |         s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \
 99 |             f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \
100 |             f'past_thr={x[x > thr].mean():.3f}-mean: '
101 |         for x in k:
102 |             s += '%i,%i, ' % (round(x[0]), round(x[1]))
103 |         if verbose:
104 |             LOGGER.info(s[:-2])
105 |         return k
106 | 
107 |     if isinstance(dataset, str):  # *.yaml file
108 |         with open(dataset, errors='ignore') as f:
109 |             data_dict = yaml.safe_load(f)  # model dict
110 |         from utils.dataloaders import LoadImagesAndLabels
111 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
112 | 
113 |     # Get label wh
114 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
115 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
116 | 
117 |     # Filter
118 |     i = (wh0 < 3.0).any(1).sum()
119 |     if i:
120 |         LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size')
121 |     wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32)  # filter > 2 pixels
122 |     # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
123 | 
124 |     # Kmeans init
125 |     try:
126 |         LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
127 |         assert n <= len(wh)  # apply overdetermined constraint
128 |         s = wh.std(0)  # sigmas for whitening
129 |         k = kmeans(wh / s, n, iter=30)[0] * s  # points
130 |         assert n == len(k)  # kmeans may return fewer points than requested if wh is insufficient or too similar
131 |     except Exception:
132 |         LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init')
133 |         k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size  # random init
134 |     wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
135 |     k = print_results(k, verbose=False)
136 | 
137 |     # Plot
138 |     # k, d = [None] * 20, [None] * 20
139 |     # for i in tqdm(range(1, 21)):
140 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
141 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
142 |     # ax = ax.ravel()
143 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
144 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
145 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
146 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
147 |     # fig.savefig('wh.png', dpi=200)
148 | 
149 |     # Evolve
150 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
151 |     pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT)  # progress bar
152 |     for _ in pbar:
153 |         v = np.ones(sh)
154 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
155 |             v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
156 |         kg = (k.copy() * v).clip(min=2.0)
157 |         fg = anchor_fitness(kg)
158 |         if fg > f:
159 |             f, k = fg, kg.copy()
160 |             pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
161 |             if verbose:
162 |                 print_results(k, verbose)
163 | 
164 |     return print_results(k).astype(np.float32)
165 | 


--------------------------------------------------------------------------------
/spark/pretrain/README.md:
--------------------------------------------------------------------------------
  1 | ## Preparation for ImageNet-1k pretraining
  2 | 
  3 | See [/INSTALL.md](/INSTALL.md) to prepare `pip` dependencies and the ImageNet dataset.
  4 | 
  5 | **Note: for neural network definitions, we directly use `timm.models.ResNet` and [official ConvNeXt](https://github.com/facebookresearch/ConvNeXt/blob/048efcea897d999aed302f2639b6270aedf8d4c8/models/convnext.py).**
  6 | 
  7 | 
  8 | ## Tutorial for pretraining your own CNN model
  9 | 
 10 | See [/pretrain/models/custom.py](/pretrain/models/custom.py). Your todo list is:
 11 | 
 12 | - implement `get_downsample_ratio` in [/pretrain/models/custom.py line20](/pretrain/models/custom.py#L20).
 13 | - implement `get_feature_map_channels` in [/pretrain/models/custom.py line29](/pretrain/models/custom.py#L29).
 14 | - implement `forward` in [/pretrain/models/custom.py line38](/pretrain/models/custom.py#L38).
 15 | - define `your_convnet(...)` with `@register_model` in [/pretrain/models/custom.py line54](/pretrain/models/custom.py#L53-L54).
 16 | - add default kwargs of `your_convnet(...)` in [/pretrain/models/\_\_init\_\_.py line34](/pretrain/models/__init__.py#L34).
 17 | - **Note: see [#54](/../../issues/54) if your CNN contains SE module or global average pooling layer, and see [#56](/../../issues/56) if it contains GroupNorm**.
 18 | 
 19 | Then run the experiment with `--model=your_convnet`.
 20 | 
 21 | 
 22 | ## Tutorial for pretraining on your own dataset
 23 | 
 24 | See the comment of `build_dataset_to_pretrain` in [line55 of /pretrain/utils/imagenet.py](/pretrain/utils/imagenet.py#L55). Your todo list:
 25 | 
 26 | - Define a subclass of `torch.utils.data.Dataset` for your own unlabeled dataset, to replace our `ImageNetDataset`.
 27 | - Use `args.data_path` and `args.input_size` to help build your dataset, with `--data_path=... --input_size=...` to specify them.
 28 | - Note the batch size `--bs` is the total batch size of all GPU, which may need to be adjusted based on your dataset size. FYI: we use `--bs=4096` for ImageNet, which contains 1.28 million images.
 29 | 
 30 | **If your dataset is relatively small**, you can try `--init_weight=/path/to/res50_withdecoder_1kpretrained_spark_style.pth` to do your pretraining *from our pretrained weights*, rather than *form scratch*.
 31 | 
 32 | ## Debug on 1 GPU (without DistributedDataParallel)
 33 | 
 34 | Use a small batch size `--bs=32` for avoiding OOM.
 35 | 
 36 | ```shell script
 37 | python3 main.py --exp_name=debug --data_path=/path/to/imagenet --model=resnet50 --bs=32
 38 | ```
 39 | 
 40 | 
 41 | ## Pretraining Any Model on ImageNet-1k (224x224)
 42 | 
 43 | For pretraining, run [/pretrain/main.py](/pretrain/main.py) with `torchrun`.
 44 | **It is required to specify** the ImageNet data folder (`--data_path`), your experiment name & log dir (`--exp_name` and `--exp_dir`, automatically created if not exists), and the model name (`--model`, valid choices see the keys of 'pretrain_default_model_kwargs' in [/pretrain/models/\_\_init\_\_.py line34](/pretrain/models/__init__.py#L34)).
 45 | 
 46 | We use the **same** pretraining configurations (lr, batch size, etc.) for all models (ResNets and ConvNeXts) in 224 pretraining.
 47 | Their **names** and **default values** are in [/pretrain/utils/arg_util.py line23-44](/pretrain/utils/arg_util.py#L23-L44).
 48 | All these default configurations (like batch size 4096) would be used, unless you specify some like `--bs=512`.
 49 | 
 50 | **Note: the batch size `--bs` is the total batch size of all GPU, and the learning rate `--base_lr` is the base lr. The actual lr would be `lr = base_lr * bs / 256`, as in [/pretrain/utils/arg_util.py line131](/pretrain/utils/arg_util.py#L131). So do not use `--lr` to specify a lr (that will be ignored)**
 51 | 
 52 | Here is an example to pretrain a ResNet50 on an 8-GPU single machine (we use DistributedDataParallel), overwriting the default batch size to 512:
 53 | ```shell script
 54 | $ cd /path/to/SparK/pretrain
 55 | $ torchrun --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr=localhost --master_port=<some_port> main.py \
 56 |   --data_path=/path/to/imagenet --exp_name=<your_exp_name> --exp_dir=/path/to/logdir \
 57 |   --model=resnet50 --bs=512
 58 | ```
 59 | 
 60 | For multiple machines, change the `--nnodes`, `--node_rank`, `--master_address` and `--master_port` to your configurations. E.g.:
 61 | ```shell script
 62 | $ torchrun --nproc_per_node=8 --nnodes=<your_nnodes> --node_rank=<rank_starts_from_0> --master_address=<some_address> --master_port=<some_port> main.py \
 63 |   ...
 64 | ```
 65 | 
 66 | ## Pretraining ConvNeXt-Large on ImageNet-1k (384x384)
 67 | 
 68 | For 384 pretraining we use a larger mask ratio (0.75), a half batch size (2048), and a double base learning rate (4e-4):
 69 | 
 70 | ```shell script
 71 | $ cd /path/to/SparK/pretrain
 72 | $ torchrun --nproc_per_node=8 --nnodes=<your_nnodes> --node_rank=<rank_starts_from_0> --master_address=<some_address> --master_port=<some_port> main.py \
 73 |   --data_path=/path/to/imagenet --exp_name=<your_exp_name> --exp_dir=/path/to/logdir \
 74 |   --model=convnext_large --input_size=384 --mask=0.75 --bs=2048 --base_lr=4e-4
 75 | ```
 76 | 
 77 | ## Logging
 78 | 
 79 | See files in your `--exp_dir` to track your experiment:
 80 | 
 81 | - `<model>_withdecoder_1kpretrained_spark_style.pth`: saves model and optimizer states, current epoch, current reconstruction loss, etc.; can be used to resume pretraining; can also be used for visualization in [/pretrain/viz_reconstruction.ipynb](/pretrain/viz_reconstruction.ipynb)
 82 | - `<model>_1kpretrained_timm_style.pth`: can be used for downstream finetuning
 83 | - `pretrain_log.txt`: records some important information such as:
 84 |     - `git_commit_id`: git version
 85 |     - `cmd`: the command of this experiment
 86 |     
 87 |     It also reports the loss and remaining pretraining time.
 88 | 
 89 | - `tensorboard_log/`: saves a lot of tensorboard logs including loss values, learning rates, gradient norms and more things. Use `tensorboard --logdir /path/to/this/tensorboard_log/ --port 23333` for viz.
 90 | - `stdout_backup.txt` and `stderr_backup.txt`: backups stdout/stderr.
 91 | 
 92 | ## Resuming
 93 | 
 94 | Specify `--resume_from=path/to/<model>_withdecoder_1kpretrained_spark_style.pth` to resume pretraining. Note this is different from `--init_weight`:
 95 | 
 96 | - `--resume_from` will load three things: model weights, optimizer states, and current epoch, so it is used to resume some interrupted experiment (will start from that 'current epoch').
 97 | - `--init_weight` ONLY loads the model weights, so it's just like a model initialization (will start from epoch 0).
 98 | 
 99 | 
100 | ## Regarding sparse convolution
101 | 
102 | We do not use sparse convolutions in this pytorch implementation, due to their limited optimization on modern hardware.
103 | As can be found in [/pretrain/encoder.py](/pretrain/encoder.py), we use masked dense convolution to simulate submanifold sparse convolution.
104 | We also define some sparse pooling or normalization layers in [/pretrain/encoder.py](/pretrain/encoder.py).
105 | All these "sparse" layers are implemented through pytorch built-in operators.
106 | 
107 | 
108 | ## Some details: how we mask images and how to set the patch size
109 | 
110 | In SparK, the mask patch size **equals to** the downsample ratio of the CNN model (so there is no configuration like `--patch_size=32`).
111 | 
112 | Here is the reason: when we do mask, we:
113 | 
114 | 1. first generate the binary mask for the **smallest** resolution feature map, i.e., generate the `_cur_active` or `active_b1ff` in [/pretrain/spark.py line86-87](/pretrain/spark.py#L86-L87), which is a `torch.BoolTensor` shaped as `[B, 1, fmap_h, fmap_w]`, and would be used to mask the smallest feature map.
115 | 3. then progressively upsample it (i.e., expand its 2nd and 3rd dimensions by calling `repeat_interleave(..., dim=2)` and `repeat_interleave(..., dim=3)` in [/pretrain/encoder.py line16](/pretrain/encoder.py#L16)), to mask those feature maps ([`x` in line21](/pretrain/encoder.py#L21)) with larger resolutions .
116 | 
117 | So if you want a patch size of 16 or 8, you should actually define a new CNN model with a downsample ratio of 16 or 8.
118 | See [Tutorial for pretraining your own CNN model (above)](https://github.com/keyu-tian/SparK/tree/main/pretrain/#tutorial-for-pretraining-your-own-cnn-model).
119 | 


--------------------------------------------------------------------------------
/utils/loggers/clearml/clearml_utils.py:
--------------------------------------------------------------------------------
  1 | """Main Logger class for ClearML experiment tracking."""
  2 | import glob
  3 | import re
  4 | from pathlib import Path
  5 | 
  6 | import numpy as np
  7 | import yaml
  8 | 
  9 | from utils.plots import Annotator, colors
 10 | 
 11 | try:
 12 |     import clearml
 13 |     from clearml import Dataset, Task
 14 | 
 15 |     assert hasattr(clearml, '__version__')  # verify package import not local dir
 16 | except (ImportError, AssertionError):
 17 |     clearml = None
 18 | 
 19 | 
 20 | def construct_dataset(clearml_info_string):
 21 |     """Load in a clearml dataset and fill the internal data_dict with its contents.
 22 |     """
 23 |     dataset_id = clearml_info_string.replace('clearml://', '')
 24 |     dataset = Dataset.get(dataset_id=dataset_id)
 25 |     dataset_root_path = Path(dataset.get_local_copy())
 26 | 
 27 |     # We'll search for the yaml file definition in the dataset
 28 |     yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml")))
 29 |     if len(yaml_filenames) > 1:
 30 |         raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains '
 31 |                          'the dataset definition this way.')
 32 |     elif len(yaml_filenames) == 0:
 33 |         raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file '
 34 |                          'inside the dataset root path.')
 35 |     with open(yaml_filenames[0]) as f:
 36 |         dataset_definition = yaml.safe_load(f)
 37 | 
 38 |     assert set(dataset_definition.keys()).issuperset(
 39 |         {'train', 'test', 'val', 'nc', 'names'}
 40 |     ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')"
 41 | 
 42 |     data_dict = dict()
 43 |     data_dict['train'] = str(
 44 |         (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None
 45 |     data_dict['test'] = str(
 46 |         (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None
 47 |     data_dict['val'] = str(
 48 |         (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None
 49 |     data_dict['nc'] = dataset_definition['nc']
 50 |     data_dict['names'] = dataset_definition['names']
 51 | 
 52 |     return data_dict
 53 | 
 54 | 
 55 | class ClearmlLogger:
 56 |     """Log training runs, datasets, models, and predictions to ClearML.
 57 | 
 58 |     This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default,
 59 |     this information includes hyperparameters, system configuration and metrics, model metrics, code information and
 60 |     basic data metrics and analyses.
 61 | 
 62 |     By providing additional command line arguments to train.py, datasets,
 63 |     models and predictions can also be logged.
 64 |     """
 65 | 
 66 |     def __init__(self, opt, hyp):
 67 |         """
 68 |         - Initialize ClearML Task, this object will capture the experiment
 69 |         - Upload dataset version to ClearML Data if opt.upload_dataset is True
 70 | 
 71 |         arguments:
 72 |         opt (namespace) -- Commandline arguments for this run
 73 |         hyp (dict) -- Hyperparameters for this run
 74 | 
 75 |         """
 76 |         self.current_epoch = 0
 77 |         # Keep tracked of amount of logged images to enforce a limit
 78 |         self.current_epoch_logged_images = set()
 79 |         # Maximum number of images to log to clearML per epoch
 80 |         self.max_imgs_to_log_per_epoch = 16
 81 |         # Get the interval of epochs when bounding box images should be logged
 82 |         self.bbox_interval = opt.bbox_interval
 83 |         self.clearml = clearml
 84 |         self.task = None
 85 |         self.data_dict = None
 86 |         if self.clearml:
 87 |             self.task = Task.init(
 88 |                 project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5',
 89 |                 task_name=opt.name if opt.name != 'exp' else 'Training',
 90 |                 tags=['YOLOv5'],
 91 |                 output_uri=True,
 92 |                 auto_connect_frameworks={'pytorch': False}
 93 |                 # We disconnect pytorch auto-detection, because we added manual model save points in the code
 94 |             )
 95 |             # ClearML's hooks will already grab all general parameters
 96 |             # Only the hyperparameters coming from the yaml config file
 97 |             # will have to be added manually!
 98 |             self.task.connect(hyp, name='Hyperparameters')
 99 | 
100 |             # Get ClearML Dataset Version if requested
101 |             if opt.data.startswith('clearml://'):
102 |                 # data_dict should have the following keys:
103 |                 # names, nc (number of classes), test, train, val (all three relative paths to ../datasets)
104 |                 self.data_dict = construct_dataset(opt.data)
105 |                 # Set data to data_dict because wandb will crash without this information and opt is the best way
106 |                 # to give it to them
107 |                 opt.data = self.data_dict
108 | 
109 |     def log_debug_samples(self, files, title='Debug Samples'):
110 |         """
111 |         Log files (images) as debug samples in the ClearML task.
112 | 
113 |         arguments:
114 |         files (List(PosixPath)) a list of file paths in PosixPath format
115 |         title (str) A title that groups together images with the same values
116 |         """
117 |         for f in files:
118 |             if f.exists():
119 |                 it = re.search(r'_batch(\d+)', f.name)
120 |                 iteration = int(it.groups()[0]) if it else 0
121 |                 self.task.get_logger().report_image(title=title,
122 |                                                     series=f.name.replace(it.group(), ''),
123 |                                                     local_path=str(f),
124 |                                                     iteration=iteration)
125 | 
126 |     def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25):
127 |         """
128 |         Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
129 | 
130 |         arguments:
131 |         image_path (PosixPath) the path the original image file
132 |         boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
133 |         class_names (dict): dict containing mapping of class int to class name
134 |         image (Tensor): A torch tensor containing the actual image data
135 |         """
136 |         if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0:
137 |             # Log every bbox_interval times and deduplicate for any intermittend extra eval runs
138 |             if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images:
139 |                 im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2))
140 |                 annotator = Annotator(im=im, pil=True)
141 |                 for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])):
142 |                     color = colors(i)
143 | 
144 |                     class_name = class_names[int(class_nr)]
145 |                     confidence_percentage = round(float(conf) * 100, 2)
146 |                     label = f"{class_name}: {confidence_percentage}%"
147 | 
148 |                     if conf > conf_threshold:
149 |                         annotator.rectangle(box.cpu().numpy(), outline=color)
150 |                         annotator.box_label(box.cpu().numpy(), label=label, color=color)
151 | 
152 |                 annotated_image = annotator.result()
153 |                 self.task.get_logger().report_image(title='Bounding Boxes',
154 |                                                     series=image_path.name,
155 |                                                     iteration=self.current_epoch,
156 |                                                     image=annotated_image)
157 |                 self.current_epoch_logged_images.add(image_path)
158 | 


--------------------------------------------------------------------------------