├── README.md ├── benchmarks.py ├── classify ├── predict.py ├── train.py └── val.py ├── data ├── coco.yaml └── hyps │ └── hyp.scratch-high.yaml ├── detect.py ├── detect_and aim.py ├── detect_and_blur.py ├── detect_and_mask.py ├── detect_and_mask_stable_diffusion.py ├── export.py ├── figure └── performance.png ├── hubconf.py ├── llm.py ├── models ├── __init__.py ├── common.py ├── detect │ ├── gelan-c.yaml │ ├── gelan-e.yaml │ ├── gelan.yaml │ ├── yolov7-af.yaml │ ├── yolov9-c.yaml │ ├── yolov9-e.yaml │ └── yolov9.yaml ├── experimental.py ├── hub │ ├── anchors.yaml │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ └── yolov3.yaml ├── panoptic │ └── yolov7-af-pan.yaml ├── segment │ └── yolov7-af-seg.yaml ├── tf.py └── yolo.py ├── panoptic ├── predict.py ├── train.py └── val.py ├── requirements.txt ├── scripts └── get_coco.sh ├── segment ├── predict.py ├── train.py └── val.py ├── train.py ├── train_dual.py ├── train_triple.py ├── utils ├── __init__.py ├── activations.py ├── augmentations.py ├── autoanchor.py ├── autobatch.py ├── callbacks.py ├── dataloaders.py ├── downloads.py ├── general.py ├── lion.py ├── loggers │ ├── __init__.py │ ├── clearml │ │ ├── __init__.py │ │ ├── clearml_utils.py │ │ └── hpo.py │ ├── comet │ │ ├── __init__.py │ │ ├── comet_utils.py │ │ ├── hpo.py │ │ └── optimizer_config.json │ └── wandb │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ ├── sweep.yaml │ │ └── wandb_utils.py ├── loss.py ├── loss_tal.py ├── loss_tal_dual.py ├── loss_tal_triple.py ├── metrics.py ├── panoptic │ ├── __init__.py │ ├── augmentations.py │ ├── dataloaders.py │ ├── general.py │ ├── loss.py │ ├── loss_tal.py │ ├── metrics.py │ ├── plots.py │ └── tal │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── assigner.py ├── plots.py ├── segment │ ├── __init__.py │ ├── augmentations.py │ ├── dataloaders.py │ ├── general.py │ ├── loss.py │ ├── loss_tal.py │ ├── loss_tal_dual.py │ ├── metrics.py │ ├── plots.py │ └── tal │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── assigner.py ├── tal │ ├── __init__.py │ ├── anchor_generator.py │ └── assigner.py ├── torch_utils.py └── triton.py ├── val.py ├── val_dual.py └── val_triple.py /README.md: -------------------------------------------------------------------------------- 1 | # Yolov9 Custom functions 2 | Additional files are "detect_and_mask.py", "detect_and_blur.py", "detect_and aim.py" and "llm.py" 3 | 4 | #### Change the api key in "llm.py" file 5 | 6 | **Features** 7 | * Blur any unwanted object in videos/images with help of gpt 8 | * Count all objetcs by classes and works perfectly on every image or on a video 9 | * Code can run on Both (CPU & GPU) 10 | * Video/WebCam/External Camera/IP Stream Supported 11 | * Inpaint the images with Dalle 2 12 | 13 | 14 | 15 | ### Video Inpainting 16 | 17 | 18 | https://github.com/user-attachments/assets/24b5da9d-df1c-463d-8874-6a29930eb4d3 19 | 20 | ### Video Blurring 21 | 22 | https://github.com/muratali016/Yolov9-Custom-Functions/assets/77502485/f46700a1-7aa2-4e2b-883f-b040c7d17181 23 | 24 | 25 | 26 | ### Ready to use blurring Google Colab file https://colab.research.google.com/drive/153_sLjfXgfdnApeBhossIVwDnCVExPL9?usp=sharing 27 | 28 | ### Ready to use inpainting Google Colab file https://colab.research.google.com/drive/1LMXgwq13C4mxsbV5vlMgGsSVoHpLIXpt?usp=sharing 29 | 30 | 31 | -------------------------------------------------------------------------------- /benchmarks.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import platform 3 | import sys 4 | import time 5 | from pathlib import Path 6 | 7 | import pandas as pd 8 | 9 | FILE = Path(__file__).resolve() 10 | ROOT = FILE.parents[0] # YOLO root directory 11 | if str(ROOT) not in sys.path: 12 | sys.path.append(str(ROOT)) # add ROOT to PATH 13 | # ROOT = ROOT.relative_to(Path.cwd()) # relative 14 | 15 | import export 16 | from models.experimental import attempt_load 17 | from models.yolo import SegmentationModel 18 | from segment.val import run as val_seg 19 | from utils import notebook_init 20 | from utils.general import LOGGER, check_yaml, file_size, print_args 21 | from utils.torch_utils import select_device 22 | from val import run as val_det 23 | 24 | 25 | def run( 26 | weights=ROOT / 'yolo.pt', # weights path 27 | imgsz=640, # inference size (pixels) 28 | batch_size=1, # batch size 29 | data=ROOT / 'data/coco.yaml', # dataset.yaml path 30 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 31 | half=False, # use FP16 half-precision inference 32 | test=False, # test exports only 33 | pt_only=False, # test PyTorch only 34 | hard_fail=False, # throw error on benchmark failure 35 | ): 36 | y, t = [], time.time() 37 | device = select_device(device) 38 | model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc. 39 | for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU) 40 | try: 41 | assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported 42 | assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML 43 | if 'cpu' in device.type: 44 | assert cpu, 'inference not supported on CPU' 45 | if 'cuda' in device.type: 46 | assert gpu, 'inference not supported on GPU' 47 | 48 | # Export 49 | if f == '-': 50 | w = weights # PyTorch format 51 | else: 52 | w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others 53 | assert suffix in str(w), 'export failed' 54 | 55 | # Validate 56 | if model_type == SegmentationModel: 57 | result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) 58 | metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls)) 59 | else: # DetectionModel: 60 | result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) 61 | metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls)) 62 | speed = result[2][1] # times (preprocess, inference, postprocess) 63 | y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference 64 | except Exception as e: 65 | if hard_fail: 66 | assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}' 67 | LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}') 68 | y.append([name, None, None, None]) # mAP, t_inference 69 | if pt_only and i == 0: 70 | break # break after PyTorch 71 | 72 | # Print results 73 | LOGGER.info('\n') 74 | parse_opt() 75 | notebook_init() # print system info 76 | c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', ''] 77 | py = pd.DataFrame(y, columns=c) 78 | LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)') 79 | LOGGER.info(str(py if map else py.iloc[:, :2])) 80 | if hard_fail and isinstance(hard_fail, str): 81 | metrics = py['mAP50-95'].array # values to compare to floor 82 | floor = eval(hard_fail) # minimum metric floor to pass 83 | assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}' 84 | return py 85 | 86 | 87 | def test( 88 | weights=ROOT / 'yolo.pt', # weights path 89 | imgsz=640, # inference size (pixels) 90 | batch_size=1, # batch size 91 | data=ROOT / 'data/coco128.yaml', # dataset.yaml path 92 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 93 | half=False, # use FP16 half-precision inference 94 | test=False, # test exports only 95 | pt_only=False, # test PyTorch only 96 | hard_fail=False, # throw error on benchmark failure 97 | ): 98 | y, t = [], time.time() 99 | device = select_device(device) 100 | for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable) 101 | try: 102 | w = weights if f == '-' else \ 103 | export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights 104 | assert suffix in str(w), 'export failed' 105 | y.append([name, True]) 106 | except Exception: 107 | y.append([name, False]) # mAP, t_inference 108 | 109 | # Print results 110 | LOGGER.info('\n') 111 | parse_opt() 112 | notebook_init() # print system info 113 | py = pd.DataFrame(y, columns=['Format', 'Export']) 114 | LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)') 115 | LOGGER.info(str(py)) 116 | return py 117 | 118 | 119 | def parse_opt(): 120 | parser = argparse.ArgumentParser() 121 | parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path') 122 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') 123 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 124 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 125 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 126 | parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') 127 | parser.add_argument('--test', action='store_true', help='test exports only') 128 | parser.add_argument('--pt-only', action='store_true', help='test PyTorch only') 129 | parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric') 130 | opt = parser.parse_args() 131 | opt.data = check_yaml(opt.data) # check YAML 132 | print_args(vars(opt)) 133 | return opt 134 | 135 | 136 | def main(opt): 137 | test(**vars(opt)) if opt.test else run(**vars(opt)) 138 | 139 | 140 | if __name__ == "__main__": 141 | opt = parse_opt() 142 | main(opt) 143 | -------------------------------------------------------------------------------- /classify/val.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Validate a trained YOLOv5 classification model on a classification dataset 4 | 5 | Usage: 6 | $ bash data/scripts/get_imagenet.sh --val # download ImageNet val split (6.3G, 50000 images) 7 | $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224 # validate ImageNet 8 | 9 | Usage - formats: 10 | $ python classify/val.py --weights yolov5s-cls.pt # PyTorch 11 | yolov5s-cls.torchscript # TorchScript 12 | yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn 13 | yolov5s-cls_openvino_model # OpenVINO 14 | yolov5s-cls.engine # TensorRT 15 | yolov5s-cls.mlmodel # CoreML (macOS-only) 16 | yolov5s-cls_saved_model # TensorFlow SavedModel 17 | yolov5s-cls.pb # TensorFlow GraphDef 18 | yolov5s-cls.tflite # TensorFlow Lite 19 | yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU 20 | yolov5s-cls_paddle_model # PaddlePaddle 21 | """ 22 | 23 | import argparse 24 | import os 25 | import sys 26 | from pathlib import Path 27 | 28 | import torch 29 | from tqdm import tqdm 30 | 31 | FILE = Path(__file__).resolve() 32 | ROOT = FILE.parents[1] # YOLOv5 root directory 33 | if str(ROOT) not in sys.path: 34 | sys.path.append(str(ROOT)) # add ROOT to PATH 35 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative 36 | 37 | from models.common import DetectMultiBackend 38 | from utils.dataloaders import create_classification_dataloader 39 | from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_img_size, check_requirements, colorstr, 40 | increment_path, print_args) 41 | from utils.torch_utils import select_device, smart_inference_mode 42 | 43 | 44 | @smart_inference_mode() 45 | def run( 46 | data=ROOT / '../datasets/mnist', # dataset dir 47 | weights=ROOT / 'yolov5s-cls.pt', # model.pt path(s) 48 | batch_size=128, # batch size 49 | imgsz=224, # inference size (pixels) 50 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 51 | workers=8, # max dataloader workers (per RANK in DDP mode) 52 | verbose=False, # verbose output 53 | project=ROOT / 'runs/val-cls', # save to project/name 54 | name='exp', # save to project/name 55 | exist_ok=False, # existing project/name ok, do not increment 56 | half=False, # use FP16 half-precision inference 57 | dnn=False, # use OpenCV DNN for ONNX inference 58 | model=None, 59 | dataloader=None, 60 | criterion=None, 61 | pbar=None, 62 | ): 63 | # Initialize/load model and set device 64 | training = model is not None 65 | if training: # called by train.py 66 | device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model 67 | half &= device.type != 'cpu' # half precision only supported on CUDA 68 | model.half() if half else model.float() 69 | else: # called directly 70 | device = select_device(device, batch_size=batch_size) 71 | 72 | # Directories 73 | save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run 74 | save_dir.mkdir(parents=True, exist_ok=True) # make dir 75 | 76 | # Load model 77 | model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half) 78 | stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine 79 | imgsz = check_img_size(imgsz, s=stride) # check image size 80 | half = model.fp16 # FP16 supported on limited backends with CUDA 81 | if engine: 82 | batch_size = model.batch_size 83 | else: 84 | device = model.device 85 | if not (pt or jit): 86 | batch_size = 1 # export.py models default to batch-size 1 87 | LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') 88 | 89 | # Dataloader 90 | data = Path(data) 91 | test_dir = data / 'test' if (data / 'test').exists() else data / 'val' # data/test or data/val 92 | dataloader = create_classification_dataloader(path=test_dir, 93 | imgsz=imgsz, 94 | batch_size=batch_size, 95 | augment=False, 96 | rank=-1, 97 | workers=workers) 98 | 99 | model.eval() 100 | pred, targets, loss, dt = [], [], 0, (Profile(), Profile(), Profile()) 101 | n = len(dataloader) # number of batches 102 | action = 'validating' if dataloader.dataset.root.stem == 'val' else 'testing' 103 | desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}" 104 | bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0) 105 | with torch.cuda.amp.autocast(enabled=device.type != 'cpu'): 106 | for images, labels in bar: 107 | with dt[0]: 108 | images, labels = images.to(device, non_blocking=True), labels.to(device) 109 | 110 | with dt[1]: 111 | y = model(images) 112 | 113 | with dt[2]: 114 | pred.append(y.argsort(1, descending=True)[:, :5]) 115 | targets.append(labels) 116 | if criterion: 117 | loss += criterion(y, labels) 118 | 119 | loss /= n 120 | pred, targets = torch.cat(pred), torch.cat(targets) 121 | correct = (targets[:, None] == pred).float() 122 | acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy 123 | top1, top5 = acc.mean(0).tolist() 124 | 125 | if pbar: 126 | pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}" 127 | if verbose: # all classes 128 | LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}") 129 | LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}") 130 | for i, c in model.names.items(): 131 | aci = acc[targets == i] 132 | top1i, top5i = aci.mean(0).tolist() 133 | LOGGER.info(f"{c:>24}{aci.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}") 134 | 135 | # Print results 136 | t = tuple(x.t / len(dataloader.dataset.samples) * 1E3 for x in dt) # speeds per image 137 | shape = (1, 3, imgsz, imgsz) 138 | LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}' % t) 139 | LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") 140 | 141 | return top1, top5, loss 142 | 143 | 144 | def parse_opt(): 145 | parser = argparse.ArgumentParser() 146 | parser.add_argument('--data', type=str, default=ROOT / '../datasets/mnist', help='dataset path') 147 | parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-cls.pt', help='model.pt path(s)') 148 | parser.add_argument('--batch-size', type=int, default=128, help='batch size') 149 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)') 150 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 151 | parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') 152 | parser.add_argument('--verbose', nargs='?', const=True, default=True, help='verbose output') 153 | parser.add_argument('--project', default=ROOT / 'runs/val-cls', help='save to project/name') 154 | parser.add_argument('--name', default='exp', help='save to project/name') 155 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 156 | parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') 157 | parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') 158 | opt = parser.parse_args() 159 | print_args(vars(opt)) 160 | return opt 161 | 162 | 163 | def main(opt): 164 | check_requirements(exclude=('tensorboard', 'thop')) 165 | run(**vars(opt)) 166 | 167 | 168 | if __name__ == "__main__": 169 | opt = parse_opt() 170 | main(opt) 171 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | path: ../datasets/coco # dataset root dir 2 | train: train2017.txt # train images (relative to 'path') 118287 images 3 | val: val2017.txt # val images (relative to 'path') 5000 images 4 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 5 | 6 | # Classes 7 | names: 8 | 0: person 9 | 1: bicycle 10 | 2: car 11 | 3: motorcycle 12 | 4: airplane 13 | 5: bus 14 | 6: train 15 | 7: truck 16 | 8: boat 17 | 9: traffic light 18 | 10: fire hydrant 19 | 11: stop sign 20 | 12: parking meter 21 | 13: bench 22 | 14: bird 23 | 15: cat 24 | 16: dog 25 | 17: horse 26 | 18: sheep 27 | 19: cow 28 | 20: elephant 29 | 21: bear 30 | 22: zebra 31 | 23: giraffe 32 | 24: backpack 33 | 25: umbrella 34 | 26: handbag 35 | 27: tie 36 | 28: suitcase 37 | 29: frisbee 38 | 30: skis 39 | 31: snowboard 40 | 32: sports ball 41 | 33: kite 42 | 34: baseball bat 43 | 35: baseball glove 44 | 36: skateboard 45 | 37: surfboard 46 | 38: tennis racket 47 | 39: bottle 48 | 40: wine glass 49 | 41: cup 50 | 42: fork 51 | 43: knife 52 | 44: spoon 53 | 45: bowl 54 | 46: banana 55 | 47: apple 56 | 48: sandwich 57 | 49: orange 58 | 50: broccoli 59 | 51: carrot 60 | 52: hot dog 61 | 53: pizza 62 | 54: donut 63 | 55: cake 64 | 56: chair 65 | 57: couch 66 | 58: potted plant 67 | 59: bed 68 | 60: dining table 69 | 61: toilet 70 | 62: tv 71 | 63: laptop 72 | 64: mouse 73 | 65: remote 74 | 66: keyboard 75 | 67: cell phone 76 | 68: microwave 77 | 69: oven 78 | 70: toaster 79 | 71: sink 80 | 72: refrigerator 81 | 73: book 82 | 74: clock 83 | 75: vase 84 | 76: scissors 85 | 77: teddy bear 86 | 78: hair drier 87 | 79: toothbrush 88 | 89 | 90 | # Download script/URL (optional) 91 | download: | 92 | from utils.general import download, Path 93 | 94 | 95 | # Download labels 96 | #segments = True # segment or box labels 97 | #dir = Path(yaml['path']) # dataset root dir 98 | #url = 'https://github.com/WongKinYiu/yolov7/releases/download/v0.1/' 99 | #urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels 100 | #download(urls, dir=dir.parent) 101 | 102 | # Download data 103 | #urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 104 | # 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 105 | # 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) 106 | #download(urls, dir=dir / 'images', threads=3) 107 | -------------------------------------------------------------------------------- /data/hyps/hyp.scratch-high.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 7.5 # box loss gain 9 | cls: 0.5 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | dfl: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | dfl: 1.5 # dfl loss gain 14 | iou_t: 0.20 # IoU training threshold 15 | anchor_t: 5.0 # anchor-multiple threshold 16 | # anchors: 3 # anchors per output layer (0 to ignore) 17 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 18 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 19 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 20 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 21 | degrees: 0.0 # image rotation (+/- deg) 22 | translate: 0.1 # image translation (+/- fraction) 23 | scale: 0.9 # image scale (+/- gain) 24 | shear: 0.0 # image shear (+/- deg) 25 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 26 | flipud: 0.0 # image flip up-down (probability) 27 | fliplr: 0.5 # image flip left-right (probability) 28 | mosaic: 1.0 # image mosaic (probability) 29 | mixup: 0.15 # image mixup (probability) 30 | copy_paste: 0.3 # segment copy-paste (probability) 31 | -------------------------------------------------------------------------------- /figure/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muratali016/Yolov9-Custom-Functions/a0a60a1cc9d43a4853f82bbc32bab27915fe0463/figure/performance.png -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): 5 | """Creates or loads a YOLO model 6 | 7 | Arguments: 8 | name (str): model name 'yolov3' or path 'path/to/best.pt' 9 | pretrained (bool): load pretrained weights into the model 10 | channels (int): number of input channels 11 | classes (int): number of model classes 12 | autoshape (bool): apply YOLO .autoshape() wrapper to model 13 | verbose (bool): print all information to screen 14 | device (str, torch.device, None): device to use for model parameters 15 | 16 | Returns: 17 | YOLO model 18 | """ 19 | from pathlib import Path 20 | 21 | from models.common import AutoShape, DetectMultiBackend 22 | from models.experimental import attempt_load 23 | from models.yolo import ClassificationModel, DetectionModel, SegmentationModel 24 | from utils.downloads import attempt_download 25 | from utils.general import LOGGER, check_requirements, intersect_dicts, logging 26 | from utils.torch_utils import select_device 27 | 28 | if not verbose: 29 | LOGGER.setLevel(logging.WARNING) 30 | check_requirements(exclude=('opencv-python', 'tensorboard', 'thop')) 31 | name = Path(name) 32 | path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name # checkpoint path 33 | try: 34 | device = select_device(device) 35 | if pretrained and channels == 3 and classes == 80: 36 | try: 37 | model = DetectMultiBackend(path, device=device, fuse=autoshape) # detection model 38 | if autoshape: 39 | if model.pt and isinstance(model.model, ClassificationModel): 40 | LOGGER.warning('WARNING ⚠️ YOLO ClassificationModel is not yet AutoShape compatible. ' 41 | 'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).') 42 | elif model.pt and isinstance(model.model, SegmentationModel): 43 | LOGGER.warning('WARNING ⚠️ YOLO SegmentationModel is not yet AutoShape compatible. ' 44 | 'You will not be able to run inference with this model.') 45 | else: 46 | model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS 47 | except Exception: 48 | model = attempt_load(path, device=device, fuse=False) # arbitrary model 49 | else: 50 | cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path 51 | model = DetectionModel(cfg, channels, classes) # create model 52 | if pretrained: 53 | ckpt = torch.load(attempt_download(path), map_location=device) # load 54 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 55 | csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect 56 | model.load_state_dict(csd, strict=False) # load 57 | if len(ckpt['model'].names) == classes: 58 | model.names = ckpt['model'].names # set class names attribute 59 | if not verbose: 60 | LOGGER.setLevel(logging.INFO) # reset to default 61 | return model.to(device) 62 | 63 | except Exception as e: 64 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 65 | s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.' 66 | raise Exception(s) from e 67 | 68 | 69 | def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None): 70 | # YOLO custom or local model 71 | return _create(path, autoshape=autoshape, verbose=_verbose, device=device) 72 | 73 | 74 | if __name__ == '__main__': 75 | import argparse 76 | from pathlib import Path 77 | 78 | import numpy as np 79 | from PIL import Image 80 | 81 | from utils.general import cv2, print_args 82 | 83 | # Argparser 84 | parser = argparse.ArgumentParser() 85 | parser.add_argument('--model', type=str, default='yolo', help='model name') 86 | opt = parser.parse_args() 87 | print_args(vars(opt)) 88 | 89 | # Model 90 | model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True) 91 | # model = custom(path='path/to/model.pt') # custom 92 | 93 | # Images 94 | imgs = [ 95 | 'data/images/zidane.jpg', # filename 96 | Path('data/images/zidane.jpg'), # Path 97 | 'https://ultralytics.com/images/zidane.jpg', # URI 98 | cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV 99 | Image.open('data/images/bus.jpg'), # PIL 100 | np.zeros((320, 640, 3))] # numpy 101 | 102 | # Inference 103 | results = model(imgs, size=320) # batched inference 104 | 105 | # Results 106 | results.print() 107 | results.save() 108 | -------------------------------------------------------------------------------- /llm.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | client = OpenAI(api_key ="sk-xxx") 3 | def get_response(prompt): 4 | completion = client.chat.completions.create( 5 | model="gpt-3.5-turbo-0125", 6 | messages=[ 7 | {"role": "system", "content": "You are my helpful Yolov9 assistant in blurring harmful objects in images. I will provide you a list of objects and you will respond like this: 'Cigarette, Beer'. Just give me the censored object names in a python list. Examples of some harmful objects: Things related to Alcohol and smoking. Knives. Adult explicit, naked images etc"}, 8 | {"role": "user", "content": f"{prompt}"} 9 | ] 10 | ) 11 | 12 | if completion.choices[0].message.content!=None: 13 | print(completion.choices[0].message.content) 14 | return completion.choices[0].message.content 15 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /models/detect/gelan-c.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # gelan backbone 14 | backbone: 15 | [ 16 | # conv down 17 | [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 18 | 19 | # conv down 20 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 21 | 22 | # elan-1 block 23 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 24 | 25 | # avg-conv down 26 | [-1, 1, ADown, [256]], # 3-P3/8 27 | 28 | # elan-2 block 29 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 30 | 31 | # avg-conv down 32 | [-1, 1, ADown, [512]], # 5-P4/16 33 | 34 | # elan-2 block 35 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 36 | 37 | # avg-conv down 38 | [-1, 1, ADown, [512]], # 7-P5/32 39 | 40 | # elan-2 block 41 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 42 | ] 43 | 44 | # gelan head 45 | head: 46 | [ 47 | # elan-spp block 48 | [-1, 1, SPPELAN, [512, 256]], # 9 49 | 50 | # up-concat merge 51 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 52 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 53 | 54 | # elan-2 block 55 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 56 | 57 | # up-concat merge 58 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 59 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 60 | 61 | # elan-2 block 62 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) 63 | 64 | # avg-conv-down merge 65 | [-1, 1, ADown, [256]], 66 | [[-1, 12], 1, Concat, [1]], # cat head P4 67 | 68 | # elan-2 block 69 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) 70 | 71 | # avg-conv-down merge 72 | [-1, 1, ADown, [512]], 73 | [[-1, 9], 1, Concat, [1]], # cat head P5 74 | 75 | # elan-2 block 76 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) 77 | 78 | # detect 79 | [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) 80 | ] 81 | -------------------------------------------------------------------------------- /models/detect/gelan-e.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # gelan backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | # conv down 19 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 20 | 21 | # conv down 22 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 23 | 24 | # elan-1 block 25 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 26 | 27 | # avg-conv down 28 | [-1, 1, ADown, [256]], # 4-P3/8 29 | 30 | # elan-2 block 31 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 32 | 33 | # avg-conv down 34 | [-1, 1, ADown, [512]], # 6-P4/16 35 | 36 | # elan-2 block 37 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 38 | 39 | # avg-conv down 40 | [-1, 1, ADown, [1024]], # 8-P5/32 41 | 42 | # elan-2 block 43 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 44 | 45 | # routing 46 | [1, 1, CBLinear, [[64]]], # 10 47 | [3, 1, CBLinear, [[64, 128]]], # 11 48 | [5, 1, CBLinear, [[64, 128, 256]]], # 12 49 | [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 50 | [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 51 | 52 | # conv down fuse 53 | [0, 1, Conv, [64, 3, 2]], # 15-P1/2 54 | [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 55 | 56 | # conv down fuse 57 | [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 58 | [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 59 | 60 | # elan-1 block 61 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 62 | 63 | # avg-conv down fuse 64 | [-1, 1, ADown, [256]], # 20-P3/8 65 | [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 66 | 67 | # elan-2 block 68 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 69 | 70 | # avg-conv down fuse 71 | [-1, 1, ADown, [512]], # 23-P4/16 72 | [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 73 | 74 | # elan-2 block 75 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 76 | 77 | # avg-conv down fuse 78 | [-1, 1, ADown, [1024]], # 26-P5/32 79 | [[14, -1], 1, CBFuse, [[4]]], # 27 80 | 81 | # elan-2 block 82 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 83 | ] 84 | 85 | # gelan head 86 | head: 87 | [ 88 | # elan-spp block 89 | [28, 1, SPPELAN, [512, 256]], # 29 90 | 91 | # up-concat merge 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [[-1, 25], 1, Concat, [1]], # cat backbone P4 94 | 95 | # elan-2 block 96 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 97 | 98 | # up-concat merge 99 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 100 | [[-1, 22], 1, Concat, [1]], # cat backbone P3 101 | 102 | # elan-2 block 103 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 (P3/8-small) 104 | 105 | # avg-conv-down merge 106 | [-1, 1, ADown, [256]], 107 | [[-1, 32], 1, Concat, [1]], # cat head P4 108 | 109 | # elan-2 block 110 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 38 (P4/16-medium) 111 | 112 | # avg-conv-down merge 113 | [-1, 1, ADown, [512]], 114 | [[-1, 29], 1, Concat, [1]], # cat head P5 115 | 116 | # elan-2 block 117 | [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 41 (P5/32-large) 118 | 119 | # detect 120 | [[35, 38, 41], 1, DDetect, [nc]], # Detect(P3, P4, P5) 121 | ] 122 | -------------------------------------------------------------------------------- /models/detect/gelan.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # gelan backbone 14 | backbone: 15 | [ 16 | # conv down 17 | [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 18 | 19 | # conv down 20 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 21 | 22 | # elan-1 block 23 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 24 | 25 | # avg-conv down 26 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 27 | 28 | # elan-2 block 29 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 30 | 31 | # avg-conv down 32 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 33 | 34 | # elan-2 block 35 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 36 | 37 | # avg-conv down 38 | [-1, 1, Conv, [512, 3, 2]], # 7-P5/32 39 | 40 | # elan-2 block 41 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 42 | ] 43 | 44 | # gelan head 45 | head: 46 | [ 47 | # elan-spp block 48 | [-1, 1, SPPELAN, [512, 256]], # 9 49 | 50 | # up-concat merge 51 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 52 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 53 | 54 | # elan-2 block 55 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 56 | 57 | # up-concat merge 58 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 59 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 60 | 61 | # elan-2 block 62 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) 63 | 64 | # avg-conv-down merge 65 | [-1, 1, Conv, [256, 3, 2]], 66 | [[-1, 12], 1, Concat, [1]], # cat head P4 67 | 68 | # elan-2 block 69 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) 70 | 71 | # avg-conv-down merge 72 | [-1, 1, Conv, [512, 3, 2]], 73 | [[-1, 9], 1, Concat, [1]], # cat head P5 74 | 75 | # elan-2 block 76 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) 77 | 78 | # detect 79 | [[15, 18, 21], 1, DDetect, [nc]], # Detect(P3, P4, P5) 80 | ] 81 | -------------------------------------------------------------------------------- /models/detect/yolov7-af.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv7 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1. # model depth multiple 6 | width_multiple: 1. # layer channel multiple 7 | anchors: 3 8 | 9 | # YOLOv7 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [32, 3, 1]], # 0 13 | 14 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 15 | [-1, 1, Conv, [64, 3, 1]], 16 | 17 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 18 | [-1, 1, Conv, [64, 1, 1]], 19 | [-2, 1, Conv, [64, 1, 1]], 20 | [-1, 1, Conv, [64, 3, 1]], 21 | [-1, 1, Conv, [64, 3, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [[-1, -3, -5, -6], 1, Concat, [1]], 25 | [-1, 1, Conv, [256, 1, 1]], # 11 26 | 27 | [-1, 1, MP, []], 28 | [-1, 1, Conv, [128, 1, 1]], 29 | [-3, 1, Conv, [128, 1, 1]], 30 | [-1, 1, Conv, [128, 3, 2]], 31 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-2, 1, Conv, [128, 1, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [[-1, -3, -5, -6], 1, Concat, [1]], 39 | [-1, 1, Conv, [512, 1, 1]], # 24 40 | 41 | [-1, 1, MP, []], 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-3, 1, Conv, [256, 1, 1]], 44 | [-1, 1, Conv, [256, 3, 2]], 45 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 46 | [-1, 1, Conv, [256, 1, 1]], 47 | [-2, 1, Conv, [256, 1, 1]], 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-1, 1, Conv, [256, 3, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [[-1, -3, -5, -6], 1, Concat, [1]], 53 | [-1, 1, Conv, [1024, 1, 1]], # 37 54 | 55 | [-1, 1, MP, []], 56 | [-1, 1, Conv, [512, 1, 1]], 57 | [-3, 1, Conv, [512, 1, 1]], 58 | [-1, 1, Conv, [512, 3, 2]], 59 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 60 | [-1, 1, Conv, [256, 1, 1]], 61 | [-2, 1, Conv, [256, 1, 1]], 62 | [-1, 1, Conv, [256, 3, 1]], 63 | [-1, 1, Conv, [256, 3, 1]], 64 | [-1, 1, Conv, [256, 3, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [[-1, -3, -5, -6], 1, Concat, [1]], 67 | [-1, 1, Conv, [1024, 1, 1]], # 50 68 | ] 69 | 70 | # yolov7 head 71 | head: 72 | [[-1, 1, SPPCSPC, [512]], # 51 73 | 74 | [-1, 1, Conv, [256, 1, 1]], 75 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 76 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 77 | [[-1, -2], 1, Concat, [1]], 78 | 79 | [-1, 1, Conv, [256, 1, 1]], 80 | [-2, 1, Conv, [256, 1, 1]], 81 | [-1, 1, Conv, [128, 3, 1]], 82 | [-1, 1, Conv, [128, 3, 1]], 83 | [-1, 1, Conv, [128, 3, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 86 | [-1, 1, Conv, [256, 1, 1]], # 63 87 | 88 | [-1, 1, Conv, [128, 1, 1]], 89 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 90 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 91 | [[-1, -2], 1, Concat, [1]], 92 | 93 | [-1, 1, Conv, [128, 1, 1]], 94 | [-2, 1, Conv, [128, 1, 1]], 95 | [-1, 1, Conv, [64, 3, 1]], 96 | [-1, 1, Conv, [64, 3, 1]], 97 | [-1, 1, Conv, [64, 3, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 100 | [-1, 1, Conv, [128, 1, 1]], # 75 101 | 102 | [-1, 1, MP, []], 103 | [-1, 1, Conv, [128, 1, 1]], 104 | [-3, 1, Conv, [128, 1, 1]], 105 | [-1, 1, Conv, [128, 3, 2]], 106 | [[-1, -3, 63], 1, Concat, [1]], 107 | 108 | [-1, 1, Conv, [256, 1, 1]], 109 | [-2, 1, Conv, [256, 1, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 115 | [-1, 1, Conv, [256, 1, 1]], # 88 116 | 117 | [-1, 1, MP, []], 118 | [-1, 1, Conv, [256, 1, 1]], 119 | [-3, 1, Conv, [256, 1, 1]], 120 | [-1, 1, Conv, [256, 3, 2]], 121 | [[-1, -3, 51], 1, Concat, [1]], 122 | 123 | [-1, 1, Conv, [512, 1, 1]], 124 | [-2, 1, Conv, [512, 1, 1]], 125 | [-1, 1, Conv, [256, 3, 1]], 126 | [-1, 1, Conv, [256, 3, 1]], 127 | [-1, 1, Conv, [256, 3, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 130 | [-1, 1, Conv, [512, 1, 1]], # 101 131 | 132 | [75, 1, Conv, [256, 3, 1]], 133 | [88, 1, Conv, [512, 3, 1]], 134 | [101, 1, Conv, [1024, 3, 1]], 135 | 136 | [[102, 103, 104], 1, Detect, [nc]], # Detect(P3, P4, P5) 137 | ] 138 | -------------------------------------------------------------------------------- /models/detect/yolov9-c.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # YOLOv9 backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | # conv down 19 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 20 | 21 | # conv down 22 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 23 | 24 | # elan-1 block 25 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 26 | 27 | # avg-conv down 28 | [-1, 1, ADown, [256]], # 4-P3/8 29 | 30 | # elan-2 block 31 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 32 | 33 | # avg-conv down 34 | [-1, 1, ADown, [512]], # 6-P4/16 35 | 36 | # elan-2 block 37 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 38 | 39 | # avg-conv down 40 | [-1, 1, ADown, [512]], # 8-P5/32 41 | 42 | # elan-2 block 43 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 44 | ] 45 | 46 | # YOLOv9 head 47 | head: 48 | [ 49 | # elan-spp block 50 | [-1, 1, SPPELAN, [512, 256]], # 10 51 | 52 | # up-concat merge 53 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 54 | [[-1, 7], 1, Concat, [1]], # cat backbone P4 55 | 56 | # elan-2 block 57 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 58 | 59 | # up-concat merge 60 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 61 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 62 | 63 | # elan-2 block 64 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) 65 | 66 | # avg-conv-down merge 67 | [-1, 1, ADown, [256]], 68 | [[-1, 13], 1, Concat, [1]], # cat head P4 69 | 70 | # elan-2 block 71 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) 72 | 73 | # avg-conv-down merge 74 | [-1, 1, ADown, [512]], 75 | [[-1, 10], 1, Concat, [1]], # cat head P5 76 | 77 | # elan-2 block 78 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) 79 | 80 | 81 | # multi-level reversible auxiliary branch 82 | 83 | # routing 84 | [5, 1, CBLinear, [[256]]], # 23 85 | [7, 1, CBLinear, [[256, 512]]], # 24 86 | [9, 1, CBLinear, [[256, 512, 512]]], # 25 87 | 88 | # conv down 89 | [0, 1, Conv, [64, 3, 2]], # 26-P1/2 90 | 91 | # conv down 92 | [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 93 | 94 | # elan-1 block 95 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 96 | 97 | # avg-conv down fuse 98 | [-1, 1, ADown, [256]], # 29-P3/8 99 | [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 100 | 101 | # elan-2 block 102 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 103 | 104 | # avg-conv down fuse 105 | [-1, 1, ADown, [512]], # 32-P4/16 106 | [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 107 | 108 | # elan-2 block 109 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 110 | 111 | # avg-conv down fuse 112 | [-1, 1, ADown, [512]], # 35-P5/32 113 | [[25, -1], 1, CBFuse, [[2]]], # 36 114 | 115 | # elan-2 block 116 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 117 | 118 | 119 | 120 | # detection head 121 | 122 | # detect 123 | [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) 124 | ] 125 | -------------------------------------------------------------------------------- /models/detect/yolov9-e.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # YOLOv9 backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | # conv down 19 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 20 | 21 | # conv down 22 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 23 | 24 | # csp-elan block 25 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 26 | 27 | # avg-conv down 28 | [-1, 1, ADown, [256]], # 4-P3/8 29 | 30 | # csp-elan block 31 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 32 | 33 | # avg-conv down 34 | [-1, 1, ADown, [512]], # 6-P4/16 35 | 36 | # csp-elan block 37 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 38 | 39 | # avg-conv down 40 | [-1, 1, ADown, [1024]], # 8-P5/32 41 | 42 | # csp-elan block 43 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 44 | 45 | # routing 46 | [1, 1, CBLinear, [[64]]], # 10 47 | [3, 1, CBLinear, [[64, 128]]], # 11 48 | [5, 1, CBLinear, [[64, 128, 256]]], # 12 49 | [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 50 | [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 51 | 52 | # conv down 53 | [0, 1, Conv, [64, 3, 2]], # 15-P1/2 54 | [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 55 | 56 | # conv down 57 | [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 58 | [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 59 | 60 | # csp-elan block 61 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 62 | 63 | # avg-conv down fuse 64 | [-1, 1, ADown, [256]], # 20-P3/8 65 | [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 66 | 67 | # csp-elan block 68 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 69 | 70 | # avg-conv down fuse 71 | [-1, 1, ADown, [512]], # 23-P4/16 72 | [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 73 | 74 | # csp-elan block 75 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 76 | 77 | # avg-conv down fuse 78 | [-1, 1, ADown, [1024]], # 26-P5/32 79 | [[14, -1], 1, CBFuse, [[4]]], # 27 80 | 81 | # csp-elan block 82 | [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 83 | ] 84 | 85 | # YOLOv9 head 86 | head: 87 | [ 88 | # multi-level auxiliary branch 89 | 90 | # elan-spp block 91 | [9, 1, SPPELAN, [512, 256]], # 29 92 | 93 | # up-concat merge 94 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 95 | [[-1, 7], 1, Concat, [1]], # cat backbone P4 96 | 97 | # csp-elan block 98 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 99 | 100 | # up-concat merge 101 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 102 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 103 | 104 | # csp-elan block 105 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 106 | 107 | 108 | 109 | # main branch 110 | 111 | # elan-spp block 112 | [28, 1, SPPELAN, [512, 256]], # 36 113 | 114 | # up-concat merge 115 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 116 | [[-1, 25], 1, Concat, [1]], # cat backbone P4 117 | 118 | # csp-elan block 119 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39 120 | 121 | # up-concat merge 122 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 123 | [[-1, 22], 1, Concat, [1]], # cat backbone P3 124 | 125 | # csp-elan block 126 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small) 127 | 128 | # avg-conv-down merge 129 | [-1, 1, ADown, [256]], 130 | [[-1, 39], 1, Concat, [1]], # cat head P4 131 | 132 | # csp-elan block 133 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium) 134 | 135 | # avg-conv-down merge 136 | [-1, 1, ADown, [512]], 137 | [[-1, 36], 1, Concat, [1]], # cat head P5 138 | 139 | # csp-elan block 140 | [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large) 141 | 142 | # detect 143 | [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) 144 | ] 145 | -------------------------------------------------------------------------------- /models/detect/yolov9.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv9 2 | 3 | # parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | #activation: nn.LeakyReLU(0.1) 8 | #activation: nn.ReLU() 9 | 10 | # anchors 11 | anchors: 3 12 | 13 | # YOLOv9 backbone 14 | backbone: 15 | [ 16 | [-1, 1, Silence, []], 17 | 18 | # conv down 19 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 20 | 21 | # conv down 22 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 23 | 24 | # elan-1 block 25 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 26 | 27 | # conv down 28 | [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 29 | 30 | # elan-2 block 31 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 32 | 33 | # conv down 34 | [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 35 | 36 | # elan-2 block 37 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 38 | 39 | # conv down 40 | [-1, 1, Conv, [512, 3, 2]], # 8-P5/32 41 | 42 | # elan-2 block 43 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 44 | ] 45 | 46 | # YOLOv9 head 47 | head: 48 | [ 49 | # elan-spp block 50 | [-1, 1, SPPELAN, [512, 256]], # 10 51 | 52 | # up-concat merge 53 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 54 | [[-1, 7], 1, Concat, [1]], # cat backbone P4 55 | 56 | # elan-2 block 57 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 58 | 59 | # up-concat merge 60 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 61 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 62 | 63 | # elan-2 block 64 | [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) 65 | 66 | # conv-down merge 67 | [-1, 1, Conv, [256, 3, 2]], 68 | [[-1, 13], 1, Concat, [1]], # cat head P4 69 | 70 | # elan-2 block 71 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) 72 | 73 | # conv-down merge 74 | [-1, 1, Conv, [512, 3, 2]], 75 | [[-1, 10], 1, Concat, [1]], # cat head P5 76 | 77 | # elan-2 block 78 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) 79 | 80 | # routing 81 | [5, 1, CBLinear, [[256]]], # 23 82 | [7, 1, CBLinear, [[256, 512]]], # 24 83 | [9, 1, CBLinear, [[256, 512, 512]]], # 25 84 | 85 | # conv down 86 | [0, 1, Conv, [64, 3, 2]], # 26-P1/2 87 | 88 | # conv down 89 | [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 90 | 91 | # elan-1 block 92 | [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 93 | 94 | # conv down fuse 95 | [-1, 1, Conv, [256, 3, 2]], # 29-P3/8 96 | [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 97 | 98 | # elan-2 block 99 | [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 100 | 101 | # conv down fuse 102 | [-1, 1, Conv, [512, 3, 2]], # 32-P4/16 103 | [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 104 | 105 | # elan-2 block 106 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 107 | 108 | # conv down fuse 109 | [-1, 1, Conv, [512, 3, 2]], # 35-P5/32 110 | [[25, -1], 1, CBFuse, [[2]]], # 36 111 | 112 | # elan-2 block 113 | [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 114 | 115 | # detect 116 | [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) 117 | ] 118 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from utils.downloads import attempt_download 8 | 9 | 10 | class Sum(nn.Module): 11 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 12 | def __init__(self, n, weight=False): # n: number of inputs 13 | super().__init__() 14 | self.weight = weight # apply weights boolean 15 | self.iter = range(n - 1) # iter object 16 | if weight: 17 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights 18 | 19 | def forward(self, x): 20 | y = x[0] # no weight 21 | if self.weight: 22 | w = torch.sigmoid(self.w) * 2 23 | for i in self.iter: 24 | y = y + x[i + 1] * w[i] 25 | else: 26 | for i in self.iter: 27 | y = y + x[i + 1] 28 | return y 29 | 30 | 31 | class MixConv2d(nn.Module): 32 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 33 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy 34 | super().__init__() 35 | n = len(k) # number of convolutions 36 | if equal_ch: # equal c_ per group 37 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices 38 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels 39 | else: # equal weight.numel() per group 40 | b = [c2] + [0] * n 41 | a = np.eye(n + 1, n, k=-1) 42 | a -= np.roll(a, 1, axis=1) 43 | a *= np.array(k) ** 2 44 | a[0] = 1 45 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 46 | 47 | self.m = nn.ModuleList([ 48 | nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) 49 | self.bn = nn.BatchNorm2d(c2) 50 | self.act = nn.SiLU() 51 | 52 | def forward(self, x): 53 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 54 | 55 | 56 | class Ensemble(nn.ModuleList): 57 | # Ensemble of models 58 | def __init__(self): 59 | super().__init__() 60 | 61 | def forward(self, x, augment=False, profile=False, visualize=False): 62 | y = [module(x, augment, profile, visualize)[0] for module in self] 63 | # y = torch.stack(y).max(0)[0] # max ensemble 64 | # y = torch.stack(y).mean(0) # mean ensemble 65 | y = torch.cat(y, 1) # nms ensemble 66 | return y, None # inference, train output 67 | 68 | 69 | def attempt_load(weights, device=None, inplace=True, fuse=True): 70 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 71 | from models.yolo import Detect, Model 72 | 73 | model = Ensemble() 74 | for w in weights if isinstance(weights, list) else [weights]: 75 | ckpt = torch.load(attempt_download(w), map_location='cpu') # load 76 | ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model 77 | 78 | # Model compatibility updates 79 | if not hasattr(ckpt, 'stride'): 80 | ckpt.stride = torch.tensor([32.]) 81 | if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): 82 | ckpt.names = dict(enumerate(ckpt.names)) # convert to dict 83 | 84 | model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode 85 | 86 | # Module compatibility updates 87 | for m in model.modules(): 88 | t = type(m) 89 | if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): 90 | m.inplace = inplace # torch 1.7.0 compatibility 91 | # if t is Detect and not isinstance(m.anchor_grid, list): 92 | # delattr(m, 'anchor_grid') 93 | # setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) 94 | elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): 95 | m.recompute_scale_factor = None # torch 1.11.0 compatibility 96 | 97 | # Return model 98 | if len(model) == 1: 99 | return model[-1] 100 | 101 | # Return detection ensemble 102 | print(f'Ensemble created with {weights}\n') 103 | for k in 'names', 'nc', 'yaml': 104 | setattr(model, k, getattr(model[0], k)) 105 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 106 | assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' 107 | return model 108 | -------------------------------------------------------------------------------- /models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv3 & YOLOv5 2 | # Default anchors for COCO data 3 | 4 | 5 | # P5 ------------------------------------------------------------------------------------------------------------------- 6 | # P5-640: 7 | anchors_p5_640: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | 13 | # P6 ------------------------------------------------------------------------------------------------------------------- 14 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 15 | anchors_p6_640: 16 | - [9,11, 21,19, 17,41] # P3/8 17 | - [43,32, 39,70, 86,64] # P4/16 18 | - [65,131, 134,130, 120,265] # P5/32 19 | - [282,180, 247,354, 512,387] # P6/64 20 | 21 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 22 | anchors_p6_1280: 23 | - [19,27, 44,40, 38,94] # P3/8 24 | - [96,68, 86,152, 180,137] # P4/16 25 | - [140,301, 303,264, 238,542] # P5/32 26 | - [436,615, 739,380, 925,792] # P6/64 27 | 28 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 29 | anchors_p6_1920: 30 | - [28,41, 67,59, 57,141] # P3/8 31 | - [144,103, 129,227, 270,205] # P4/16 32 | - [209,452, 455,396, 358,812] # P5/32 33 | - [653,922, 1109,570, 1387,1187] # P6/64 34 | 35 | 36 | # P7 ------------------------------------------------------------------------------------------------------------------- 37 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 38 | anchors_p7_640: 39 | - [11,11, 13,30, 29,20] # P3/8 40 | - [30,46, 61,38, 39,92] # P4/16 41 | - [78,80, 146,66, 79,163] # P5/32 42 | - [149,150, 321,143, 157,303] # P6/64 43 | - [257,402, 359,290, 524,372] # P7/128 44 | 45 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 46 | anchors_p7_1280: 47 | - [19,22, 54,36, 32,77] # P3/8 48 | - [70,83, 138,71, 75,173] # P4/16 49 | - [165,159, 148,334, 375,151] # P5/32 50 | - [334,317, 251,626, 499,474] # P6/64 51 | - [750,326, 534,814, 1079,818] # P7/128 52 | 53 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 54 | anchors_p7_1920: 55 | - [29,34, 81,55, 47,115] # P3/8 56 | - [105,124, 207,107, 113,259] # P4/16 57 | - [247,238, 222,500, 563,227] # P5/32 58 | - [501,476, 376,939, 749,711] # P6/64 59 | - [1126,489, 801,1222, 1618,1227] # P7/128 60 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv3 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv3 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv3 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/panoptic/yolov7-af-pan.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv7 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | sem_nc: 93 # number of stuff classes 6 | depth_multiple: 1.0 # model depth multiple 7 | width_multiple: 1.0 # layer channel multiple 8 | anchors: 3 9 | 10 | # YOLOv7 backbone 11 | backbone: 12 | [[-1, 1, Conv, [32, 3, 1]], # 0 13 | 14 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 15 | [-1, 1, Conv, [64, 3, 1]], 16 | 17 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 18 | [-1, 1, Conv, [64, 1, 1]], 19 | [-2, 1, Conv, [64, 1, 1]], 20 | [-1, 1, Conv, [64, 3, 1]], 21 | [-1, 1, Conv, [64, 3, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [[-1, -3, -5, -6], 1, Concat, [1]], 25 | [-1, 1, Conv, [256, 1, 1]], # 11 26 | 27 | [-1, 1, MP, []], 28 | [-1, 1, Conv, [128, 1, 1]], 29 | [-3, 1, Conv, [128, 1, 1]], 30 | [-1, 1, Conv, [128, 3, 2]], 31 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-2, 1, Conv, [128, 1, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [[-1, -3, -5, -6], 1, Concat, [1]], 39 | [-1, 1, Conv, [512, 1, 1]], # 24 40 | 41 | [-1, 1, MP, []], 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-3, 1, Conv, [256, 1, 1]], 44 | [-1, 1, Conv, [256, 3, 2]], 45 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 46 | [-1, 1, Conv, [256, 1, 1]], 47 | [-2, 1, Conv, [256, 1, 1]], 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-1, 1, Conv, [256, 3, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [[-1, -3, -5, -6], 1, Concat, [1]], 53 | [-1, 1, Conv, [1024, 1, 1]], # 37 54 | 55 | [-1, 1, MP, []], 56 | [-1, 1, Conv, [512, 1, 1]], 57 | [-3, 1, Conv, [512, 1, 1]], 58 | [-1, 1, Conv, [512, 3, 2]], 59 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 60 | [-1, 1, Conv, [256, 1, 1]], 61 | [-2, 1, Conv, [256, 1, 1]], 62 | [-1, 1, Conv, [256, 3, 1]], 63 | [-1, 1, Conv, [256, 3, 1]], 64 | [-1, 1, Conv, [256, 3, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [[-1, -3, -5, -6], 1, Concat, [1]], 67 | [-1, 1, Conv, [1024, 1, 1]], # 50 68 | ] 69 | 70 | # yolov7 head 71 | head: 72 | [[-1, 1, SPPCSPC, [512]], # 51 73 | 74 | [-1, 1, Conv, [256, 1, 1]], 75 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 76 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 77 | [[-1, -2], 1, Concat, [1]], 78 | 79 | [-1, 1, Conv, [256, 1, 1]], 80 | [-2, 1, Conv, [256, 1, 1]], 81 | [-1, 1, Conv, [128, 3, 1]], 82 | [-1, 1, Conv, [128, 3, 1]], 83 | [-1, 1, Conv, [128, 3, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 86 | [-1, 1, Conv, [256, 1, 1]], # 63 87 | 88 | [-1, 1, Conv, [128, 1, 1]], 89 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 90 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 91 | [[-1, -2], 1, Concat, [1]], 92 | 93 | [-1, 1, Conv, [128, 1, 1]], 94 | [-2, 1, Conv, [128, 1, 1]], 95 | [-1, 1, Conv, [64, 3, 1]], 96 | [-1, 1, Conv, [64, 3, 1]], 97 | [-1, 1, Conv, [64, 3, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 100 | [-1, 1, Conv, [128, 1, 1]], # 75 101 | 102 | [-1, 1, MP, []], 103 | [-1, 1, Conv, [128, 1, 1]], 104 | [-3, 1, Conv, [128, 1, 1]], 105 | [-1, 1, Conv, [128, 3, 2]], 106 | [[-1, -3, 63], 1, Concat, [1]], 107 | 108 | [-1, 1, Conv, [256, 1, 1]], 109 | [-2, 1, Conv, [256, 1, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 115 | [-1, 1, Conv, [256, 1, 1]], # 88 116 | 117 | [-1, 1, MP, []], 118 | [-1, 1, Conv, [256, 1, 1]], 119 | [-3, 1, Conv, [256, 1, 1]], 120 | [-1, 1, Conv, [256, 3, 2]], 121 | [[-1, -3, 51], 1, Concat, [1]], 122 | 123 | [-1, 1, Conv, [512, 1, 1]], 124 | [-2, 1, Conv, [512, 1, 1]], 125 | [-1, 1, Conv, [256, 3, 1]], 126 | [-1, 1, Conv, [256, 3, 1]], 127 | [-1, 1, Conv, [256, 3, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 130 | [-1, 1, Conv, [512, 1, 1]], # 101 131 | 132 | [75, 1, Conv, [256, 3, 1]], 133 | [88, 1, Conv, [512, 3, 1]], 134 | [101, 1, Conv, [1024, 3, 1]], 135 | 136 | [[102, 103, 104], 1, Panoptic, [nc, 93, 32, 256]], # Panoptic(P3, P4, P5) 137 | ] 138 | -------------------------------------------------------------------------------- /models/segment/yolov7-af-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv7 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 8 | 9 | # YOLOv7 backbone 10 | backbone: 11 | [[-1, 1, Conv, [32, 3, 1]], # 0 12 | 13 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 14 | [-1, 1, Conv, [64, 3, 1]], 15 | 16 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 17 | [-1, 1, Conv, [64, 1, 1]], 18 | [-2, 1, Conv, [64, 1, 1]], 19 | [-1, 1, Conv, [64, 3, 1]], 20 | [-1, 1, Conv, [64, 3, 1]], 21 | [-1, 1, Conv, [64, 3, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [[-1, -3, -5, -6], 1, Concat, [1]], 24 | [-1, 1, Conv, [256, 1, 1]], # 11 25 | 26 | [-1, 1, MP, []], 27 | [-1, 1, Conv, [128, 1, 1]], 28 | [-3, 1, Conv, [128, 1, 1]], 29 | [-1, 1, Conv, [128, 3, 2]], 30 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-2, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [[-1, -3, -5, -6], 1, Concat, [1]], 38 | [-1, 1, Conv, [512, 1, 1]], # 24 39 | 40 | [-1, 1, MP, []], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-3, 1, Conv, [256, 1, 1]], 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-2, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 1]], 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-1, 1, Conv, [256, 3, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [[-1, -3, -5, -6], 1, Concat, [1]], 52 | [-1, 1, Conv, [1024, 1, 1]], # 37 53 | 54 | [-1, 1, MP, []], 55 | [-1, 1, Conv, [512, 1, 1]], 56 | [-3, 1, Conv, [512, 1, 1]], 57 | [-1, 1, Conv, [512, 3, 2]], 58 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 59 | [-1, 1, Conv, [256, 1, 1]], 60 | [-2, 1, Conv, [256, 1, 1]], 61 | [-1, 1, Conv, [256, 3, 1]], 62 | [-1, 1, Conv, [256, 3, 1]], 63 | [-1, 1, Conv, [256, 3, 1]], 64 | [-1, 1, Conv, [256, 3, 1]], 65 | [[-1, -3, -5, -6], 1, Concat, [1]], 66 | [-1, 1, Conv, [1024, 1, 1]], # 50 67 | ] 68 | 69 | # yolov7 head 70 | head: 71 | [[-1, 1, SPPCSPC, [512]], # 51 72 | 73 | [-1, 1, Conv, [256, 1, 1]], 74 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 75 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 76 | [[-1, -2], 1, Concat, [1]], 77 | 78 | [-1, 1, Conv, [256, 1, 1]], 79 | [-2, 1, Conv, [256, 1, 1]], 80 | [-1, 1, Conv, [128, 3, 1]], 81 | [-1, 1, Conv, [128, 3, 1]], 82 | [-1, 1, Conv, [128, 3, 1]], 83 | [-1, 1, Conv, [128, 3, 1]], 84 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 85 | [-1, 1, Conv, [256, 1, 1]], # 63 86 | 87 | [-1, 1, Conv, [128, 1, 1]], 88 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 89 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 90 | [[-1, -2], 1, Concat, [1]], 91 | 92 | [-1, 1, Conv, [128, 1, 1]], 93 | [-2, 1, Conv, [128, 1, 1]], 94 | [-1, 1, Conv, [64, 3, 1]], 95 | [-1, 1, Conv, [64, 3, 1]], 96 | [-1, 1, Conv, [64, 3, 1]], 97 | [-1, 1, Conv, [64, 3, 1]], 98 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 99 | [-1, 1, Conv, [128, 1, 1]], # 75 100 | 101 | [-1, 1, MP, []], 102 | [-1, 1, Conv, [128, 1, 1]], 103 | [-3, 1, Conv, [128, 1, 1]], 104 | [-1, 1, Conv, [128, 3, 2]], 105 | [[-1, -3, 63], 1, Concat, [1]], 106 | 107 | [-1, 1, Conv, [256, 1, 1]], 108 | [-2, 1, Conv, [256, 1, 1]], 109 | [-1, 1, Conv, [128, 3, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 114 | [-1, 1, Conv, [256, 1, 1]], # 88 115 | 116 | [-1, 1, MP, []], 117 | [-1, 1, Conv, [256, 1, 1]], 118 | [-3, 1, Conv, [256, 1, 1]], 119 | [-1, 1, Conv, [256, 3, 2]], 120 | [[-1, -3, 51], 1, Concat, [1]], 121 | 122 | [-1, 1, Conv, [512, 1, 1]], 123 | [-2, 1, Conv, [512, 1, 1]], 124 | [-1, 1, Conv, [256, 3, 1]], 125 | [-1, 1, Conv, [256, 3, 1]], 126 | [-1, 1, Conv, [256, 3, 1]], 127 | [-1, 1, Conv, [256, 3, 1]], 128 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 129 | [-1, 1, Conv, [512, 1, 1]], # 101 130 | 131 | [75, 1, Conv, [256, 3, 1]], 132 | [88, 1, Conv, [512, 3, 1]], 133 | [101, 1, Conv, [1024, 3, 1]], 134 | 135 | [[102, 103, 104], 1, Segment, [nc, 32, 256]], # Segment(P3, P4, P5) 136 | ] 137 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements 2 | # Usage: pip install -r requirements.txt 3 | 4 | # Base ------------------------------------------------------------------------ 5 | gitpython 6 | ipython 7 | matplotlib>=3.2.2 8 | numpy>=1.18.5 9 | opencv-python>=4.1.1 10 | Pillow>=7.1.2 11 | psutil 12 | PyYAML>=5.3.1 13 | requests>=2.23.0 14 | scipy>=1.4.1 15 | thop>=0.1.1 16 | torch>=1.7.0 17 | torchvision>=0.8.1 18 | tqdm>=4.64.0 19 | # protobuf<=3.20.1 20 | 21 | # Logging --------------------------------------------------------------------- 22 | tensorboard>=2.4.1 23 | # clearml>=1.2.0 24 | # comet 25 | 26 | # Plotting -------------------------------------------------------------------- 27 | pandas>=1.1.4 28 | seaborn>=0.11.0 29 | 30 | # Export ---------------------------------------------------------------------- 31 | # coremltools>=6.0 32 | # onnx>=1.9.0 33 | # onnx-simplifier>=0.4.1 34 | # nvidia-pyindex 35 | # nvidia-tensorrt 36 | # scikit-learn<=1.1.2 37 | # tensorflow>=2.4.1 38 | # tensorflowjs>=3.9.0 39 | # openvino-dev 40 | 41 | # Deploy ---------------------------------------------------------------------- 42 | # tritonclient[all]~=2.24.0 43 | 44 | # Extras ---------------------------------------------------------------------- 45 | # mss 46 | albumentations>=1.0.3 47 | pycocotools>=2.0 48 | -------------------------------------------------------------------------------- /scripts/get_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # COCO 2017 dataset http://cocodataset.org 3 | # Download command: bash ./scripts/get_coco.sh 4 | 5 | # Download/unzip labels 6 | d='./' # unzip directory 7 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 8 | f='coco2017labels-segments.zip' # or 'coco2017labels.zip', 68 MB 9 | echo 'Downloading' $url$f ' ...' 10 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background 11 | 12 | # Download/unzip images 13 | d='./coco/images' # unzip directory 14 | url=http://images.cocodataset.org/zips/ 15 | f1='train2017.zip' # 19G, 118k images 16 | f2='val2017.zip' # 1G, 5k images 17 | f3='test2017.zip' # 7G, 41k images (optional) 18 | for f in $f1 $f2 $f3; do 19 | echo 'Downloading' $url$f '...' 20 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background 21 | done 22 | wait # finish background tasks 23 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import platform 3 | import threading 4 | 5 | 6 | def emojis(str=''): 7 | # Return platform-dependent emoji-safe version of string 8 | return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str 9 | 10 | 11 | class TryExcept(contextlib.ContextDecorator): 12 | # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager 13 | def __init__(self, msg=''): 14 | self.msg = msg 15 | 16 | def __enter__(self): 17 | pass 18 | 19 | def __exit__(self, exc_type, value, traceback): 20 | if value: 21 | print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) 22 | return True 23 | 24 | 25 | def threaded(func): 26 | # Multi-threads a target function and returns thread. Usage: @threaded decorator 27 | def wrapper(*args, **kwargs): 28 | thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) 29 | thread.start() 30 | return thread 31 | 32 | return wrapper 33 | 34 | 35 | def join_threads(verbose=False): 36 | # Join all daemon threads, i.e. atexit.register(lambda: join_threads()) 37 | main_thread = threading.current_thread() 38 | for t in threading.enumerate(): 39 | if t is not main_thread: 40 | if verbose: 41 | print(f'Joining thread {t.name}') 42 | t.join() 43 | 44 | 45 | def notebook_init(verbose=True): 46 | # Check system software and hardware 47 | print('Checking setup...') 48 | 49 | import os 50 | import shutil 51 | 52 | from utils.general import check_font, check_requirements, is_colab 53 | from utils.torch_utils import select_device # imports 54 | 55 | check_font() 56 | 57 | import psutil 58 | from IPython import display # to display images and clear console output 59 | 60 | if is_colab(): 61 | shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory 62 | 63 | # System info 64 | if verbose: 65 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 66 | ram = psutil.virtual_memory().total 67 | total, used, free = shutil.disk_usage("/") 68 | display.clear_output() 69 | s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' 70 | else: 71 | s = '' 72 | 73 | select_device(newline=False) 74 | print(emojis(f'Setup complete ✅ {s}')) 75 | return display 76 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SiLU(nn.Module): 7 | # SiLU activation https://arxiv.org/pdf/1606.08415.pdf 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | class Hardswish(nn.Module): 14 | # Hard-SiLU activation 15 | @staticmethod 16 | def forward(x): 17 | # return x * F.hardsigmoid(x) # for TorchScript and CoreML 18 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX 19 | 20 | 21 | class Mish(nn.Module): 22 | # Mish activation https://github.com/digantamisra98/Mish 23 | @staticmethod 24 | def forward(x): 25 | return x * F.softplus(x).tanh() 26 | 27 | 28 | class MemoryEfficientMish(nn.Module): 29 | # Mish activation memory-efficient 30 | class F(torch.autograd.Function): 31 | 32 | @staticmethod 33 | def forward(ctx, x): 34 | ctx.save_for_backward(x) 35 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | x = ctx.saved_tensors[0] 40 | sx = torch.sigmoid(x) 41 | fx = F.softplus(x).tanh() 42 | return grad_output * (fx + x * sx * (1 - fx * fx)) 43 | 44 | def forward(self, x): 45 | return self.F.apply(x) 46 | 47 | 48 | class FReLU(nn.Module): 49 | # FReLU activation https://arxiv.org/abs/2007.11824 50 | def __init__(self, c1, k=3): # ch_in, kernel 51 | super().__init__() 52 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 53 | self.bn = nn.BatchNorm2d(c1) 54 | 55 | def forward(self, x): 56 | return torch.max(x, self.bn(self.conv(x))) 57 | 58 | 59 | class AconC(nn.Module): 60 | r""" ACON activation (activate or not) 61 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 62 | according to "Activate or Not: Learning Customized Activation" . 63 | """ 64 | 65 | def __init__(self, c1): 66 | super().__init__() 67 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 68 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 69 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 70 | 71 | def forward(self, x): 72 | dpx = (self.p1 - self.p2) * x 73 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 74 | 75 | 76 | class MetaAconC(nn.Module): 77 | r""" ACON activation (activate or not) 78 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 79 | according to "Activate or Not: Learning Customized Activation" . 80 | """ 81 | 82 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 83 | super().__init__() 84 | c2 = max(r, c1 // r) 85 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 86 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 87 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 88 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 89 | # self.bn1 = nn.BatchNorm2d(c2) 90 | # self.bn2 = nn.BatchNorm2d(c1) 91 | 92 | def forward(self, x): 93 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 94 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 95 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 96 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 97 | dpx = (self.p1 - self.p2) * x 98 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 99 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from tqdm import tqdm 7 | 8 | from utils import TryExcept 9 | from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr 10 | 11 | PREFIX = colorstr('AutoAnchor: ') 12 | 13 | 14 | def check_anchor_order(m): 15 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 16 | a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer 17 | da = a[-1] - a[0] # delta a 18 | ds = m.stride[-1] - m.stride[0] # delta s 19 | if da and (da.sign() != ds.sign()): # same order 20 | LOGGER.info(f'{PREFIX}Reversing anchor order') 21 | m.anchors[:] = m.anchors.flip(0) 22 | 23 | 24 | @TryExcept(f'{PREFIX}ERROR') 25 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 26 | # Check anchor fit to data, recompute if necessary 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1 / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides 41 | anchors = m.anchors.clone() * stride # current anchors 42 | bpr, aat = metric(anchors.cpu().view(-1, 2)) 43 | s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' 44 | if bpr > 0.98: # threshold to recompute 45 | LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅') 46 | else: 47 | LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...') 48 | na = m.anchors.numel() // 2 # number of anchors 49 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 50 | new_bpr = metric(anchors)[0] 51 | if new_bpr > bpr: # replace anchors 52 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 53 | m.anchors[:] = anchors.clone().view_as(m.anchors) 54 | check_anchor_order(m) # must be in pixel-space (not grid-space) 55 | m.anchors /= stride 56 | s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' 57 | else: 58 | s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' 59 | LOGGER.info(s) 60 | 61 | 62 | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 63 | """ Creates kmeans-evolved anchors from training dataset 64 | 65 | Arguments: 66 | dataset: path to data.yaml, or a loaded dataset 67 | n: number of anchors 68 | img_size: image size used for training 69 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 70 | gen: generations to evolve anchors using genetic algorithm 71 | verbose: print all results 72 | 73 | Return: 74 | k: kmeans evolved anchors 75 | 76 | Usage: 77 | from utils.autoanchor import *; _ = kmean_anchors() 78 | """ 79 | from scipy.cluster.vq import kmeans 80 | 81 | npr = np.random 82 | thr = 1 / thr 83 | 84 | def metric(k, wh): # compute metrics 85 | r = wh[:, None] / k[None] 86 | x = torch.min(r, 1 / r).min(2)[0] # ratio metric 87 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 88 | return x, x.max(1)[0] # x, best_x 89 | 90 | def anchor_fitness(k): # mutation fitness 91 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 92 | return (best * (best > thr).float()).mean() # fitness 93 | 94 | def print_results(k, verbose=True): 95 | k = k[np.argsort(k.prod(1))] # sort small to large 96 | x, best = metric(k, wh0) 97 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 98 | s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \ 99 | f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \ 100 | f'past_thr={x[x > thr].mean():.3f}-mean: ' 101 | for x in k: 102 | s += '%i,%i, ' % (round(x[0]), round(x[1])) 103 | if verbose: 104 | LOGGER.info(s[:-2]) 105 | return k 106 | 107 | if isinstance(dataset, str): # *.yaml file 108 | with open(dataset, errors='ignore') as f: 109 | data_dict = yaml.safe_load(f) # model dict 110 | from utils.dataloaders import LoadImagesAndLabels 111 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 112 | 113 | # Get label wh 114 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 115 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 116 | 117 | # Filter 118 | i = (wh0 < 3.0).any(1).sum() 119 | if i: 120 | LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size') 121 | wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels 122 | # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 123 | 124 | # Kmeans init 125 | try: 126 | LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') 127 | assert n <= len(wh) # apply overdetermined constraint 128 | s = wh.std(0) # sigmas for whitening 129 | k = kmeans(wh / s, n, iter=30)[0] * s # points 130 | assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar 131 | except Exception: 132 | LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init') 133 | k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init 134 | wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) 135 | k = print_results(k, verbose=False) 136 | 137 | # Plot 138 | # k, d = [None] * 20, [None] * 20 139 | # for i in tqdm(range(1, 21)): 140 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 141 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 142 | # ax = ax.ravel() 143 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 144 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 145 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 146 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 147 | # fig.savefig('wh.png', dpi=200) 148 | 149 | # Evolve 150 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 151 | pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar 152 | for _ in pbar: 153 | v = np.ones(sh) 154 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 155 | v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 156 | kg = (k.copy() * v).clip(min=2.0) 157 | fg = anchor_fitness(kg) 158 | if fg > f: 159 | f, k = fg, kg.copy() 160 | pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 161 | if verbose: 162 | print_results(k, verbose) 163 | 164 | return print_results(k).astype(np.float32) 165 | -------------------------------------------------------------------------------- /utils/autobatch.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | import torch 5 | 6 | from utils.general import LOGGER, colorstr 7 | from utils.torch_utils import profile 8 | 9 | 10 | def check_train_batch_size(model, imgsz=640, amp=True): 11 | # Check YOLOv5 training batch size 12 | with torch.cuda.amp.autocast(amp): 13 | return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size 14 | 15 | 16 | def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): 17 | # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory 18 | # Usage: 19 | # import torch 20 | # from utils.autobatch import autobatch 21 | # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) 22 | # print(autobatch(model)) 23 | 24 | # Check device 25 | prefix = colorstr('AutoBatch: ') 26 | LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') 27 | device = next(model.parameters()).device # get model device 28 | if device.type == 'cpu': 29 | LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') 30 | return batch_size 31 | if torch.backends.cudnn.benchmark: 32 | LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') 33 | return batch_size 34 | 35 | # Inspect CUDA memory 36 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 37 | d = str(device).upper() # 'CUDA:0' 38 | properties = torch.cuda.get_device_properties(device) # device properties 39 | t = properties.total_memory / gb # GiB total 40 | r = torch.cuda.memory_reserved(device) / gb # GiB reserved 41 | a = torch.cuda.memory_allocated(device) / gb # GiB allocated 42 | f = t - (r + a) # GiB free 43 | LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') 44 | 45 | # Profile batch sizes 46 | batch_sizes = [1, 2, 4, 8, 16] 47 | try: 48 | img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] 49 | results = profile(img, model, n=3, device=device) 50 | except Exception as e: 51 | LOGGER.warning(f'{prefix}{e}') 52 | 53 | # Fit a solution 54 | y = [x[2] for x in results if x] # memory [2] 55 | p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit 56 | b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) 57 | if None in results: # some sizes failed 58 | i = results.index(None) # first fail index 59 | if b >= batch_sizes[i]: # y intercept above failure point 60 | b = batch_sizes[max(i - 1, 0)] # select prior safe point 61 | if b < 1 or b > 1024: # b outside of safe range 62 | b = batch_size 63 | LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') 64 | 65 | fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted 66 | LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') 67 | return b 68 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | class Callbacks: 5 | """" 6 | Handles all registered callbacks for YOLOv5 Hooks 7 | """ 8 | 9 | def __init__(self): 10 | # Define the available callbacks 11 | self._callbacks = { 12 | 'on_pretrain_routine_start': [], 13 | 'on_pretrain_routine_end': [], 14 | 'on_train_start': [], 15 | 'on_train_epoch_start': [], 16 | 'on_train_batch_start': [], 17 | 'optimizer_step': [], 18 | 'on_before_zero_grad': [], 19 | 'on_train_batch_end': [], 20 | 'on_train_epoch_end': [], 21 | 'on_val_start': [], 22 | 'on_val_batch_start': [], 23 | 'on_val_image_end': [], 24 | 'on_val_batch_end': [], 25 | 'on_val_end': [], 26 | 'on_fit_epoch_end': [], # fit = train + val 27 | 'on_model_save': [], 28 | 'on_train_end': [], 29 | 'on_params_update': [], 30 | 'teardown': [],} 31 | self.stop_training = False # set True to interrupt training 32 | 33 | def register_action(self, hook, name='', callback=None): 34 | """ 35 | Register a new action to a callback hook 36 | 37 | Args: 38 | hook: The callback hook name to register the action to 39 | name: The name of the action for later reference 40 | callback: The callback to fire 41 | """ 42 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 43 | assert callable(callback), f"callback '{callback}' is not callable" 44 | self._callbacks[hook].append({'name': name, 'callback': callback}) 45 | 46 | def get_registered_actions(self, hook=None): 47 | """" 48 | Returns all the registered actions by callback hook 49 | 50 | Args: 51 | hook: The name of the hook to check, defaults to all 52 | """ 53 | return self._callbacks[hook] if hook else self._callbacks 54 | 55 | def run(self, hook, *args, thread=False, **kwargs): 56 | """ 57 | Loop through the registered actions and fire all callbacks on main thread 58 | 59 | Args: 60 | hook: The name of the hook to check, defaults to all 61 | args: Arguments to receive from YOLOv5 62 | thread: (boolean) Run callbacks in daemon thread 63 | kwargs: Keyword Arguments to receive from YOLOv5 64 | """ 65 | 66 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 67 | for logger in self._callbacks[hook]: 68 | if thread: 69 | threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() 70 | else: 71 | logger['callback'](*args, **kwargs) 72 | -------------------------------------------------------------------------------- /utils/downloads.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess 4 | import urllib 5 | from pathlib import Path 6 | 7 | import requests 8 | import torch 9 | 10 | 11 | def is_url(url, check=True): 12 | # Check if string is URL and check if URL exists 13 | try: 14 | url = str(url) 15 | result = urllib.parse.urlparse(url) 16 | assert all([result.scheme, result.netloc]) # check if is url 17 | return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online 18 | except (AssertionError, urllib.request.HTTPError): 19 | return False 20 | 21 | 22 | def gsutil_getsize(url=''): 23 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 24 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 25 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 26 | 27 | 28 | def url_getsize(url='https://ultralytics.com/images/bus.jpg'): 29 | # Return downloadable file size in bytes 30 | response = requests.head(url, allow_redirects=True) 31 | return int(response.headers.get('content-length', -1)) 32 | 33 | 34 | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): 35 | # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes 36 | from utils.general import LOGGER 37 | 38 | file = Path(file) 39 | assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" 40 | try: # url1 41 | LOGGER.info(f'Downloading {url} to {file}...') 42 | torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) 43 | assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check 44 | except Exception as e: # url2 45 | if file.exists(): 46 | file.unlink() # remove partial downloads 47 | LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') 48 | os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail 49 | finally: 50 | if not file.exists() or file.stat().st_size < min_bytes: # check 51 | if file.exists(): 52 | file.unlink() # remove partial downloads 53 | LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") 54 | LOGGER.info('') 55 | 56 | 57 | def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'): 58 | # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc. 59 | from utils.general import LOGGER 60 | 61 | def github_assets(repository, version='latest'): 62 | # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) 63 | if version != 'latest': 64 | version = f'tags/{version}' # i.e. tags/v7.0 65 | response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api 66 | return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets 67 | 68 | file = Path(str(file).strip().replace("'", '')) 69 | if not file.exists(): 70 | # URL specified 71 | name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. 72 | if str(file).startswith(('http:/', 'https:/')): # download 73 | url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ 74 | file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... 75 | if Path(file).is_file(): 76 | LOGGER.info(f'Found {url} locally at {file}') # file already exists 77 | else: 78 | safe_download(file=file, url=url, min_bytes=1E5) 79 | return file 80 | 81 | # GitHub assets 82 | assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default 83 | try: 84 | tag, assets = github_assets(repo, release) 85 | except Exception: 86 | try: 87 | tag, assets = github_assets(repo) # latest release 88 | except Exception: 89 | try: 90 | tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] 91 | except Exception: 92 | tag = release 93 | 94 | file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) 95 | if name in assets: 96 | url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror 97 | safe_download( 98 | file, 99 | url=f'https://github.com/{repo}/releases/download/{tag}/{name}', 100 | min_bytes=1E5, 101 | error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') 102 | 103 | return str(file) 104 | -------------------------------------------------------------------------------- /utils/lion.py: -------------------------------------------------------------------------------- 1 | """PyTorch implementation of the Lion optimizer.""" 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | 6 | class Lion(Optimizer): 7 | r"""Implements Lion algorithm.""" 8 | 9 | def __init__(self, params, lr=1e-4, betas=(0.9, 0.99), weight_decay=0.0): 10 | """Initialize the hyperparameters. 11 | Args: 12 | params (iterable): iterable of parameters to optimize or dicts defining 13 | parameter groups 14 | lr (float, optional): learning rate (default: 1e-4) 15 | betas (Tuple[float, float], optional): coefficients used for computing 16 | running averages of gradient and its square (default: (0.9, 0.99)) 17 | weight_decay (float, optional): weight decay coefficient (default: 0) 18 | """ 19 | 20 | if not 0.0 <= lr: 21 | raise ValueError('Invalid learning rate: {}'.format(lr)) 22 | if not 0.0 <= betas[0] < 1.0: 23 | raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0])) 24 | if not 0.0 <= betas[1] < 1.0: 25 | raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1])) 26 | defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay) 27 | super().__init__(params, defaults) 28 | 29 | @torch.no_grad() 30 | def step(self, closure=None): 31 | """Performs a single optimization step. 32 | Args: 33 | closure (callable, optional): A closure that reevaluates the model 34 | and returns the loss. 35 | Returns: 36 | the loss. 37 | """ 38 | loss = None 39 | if closure is not None: 40 | with torch.enable_grad(): 41 | loss = closure() 42 | 43 | for group in self.param_groups: 44 | for p in group['params']: 45 | if p.grad is None: 46 | continue 47 | 48 | # Perform stepweight decay 49 | p.data.mul_(1 - group['lr'] * group['weight_decay']) 50 | 51 | grad = p.grad 52 | state = self.state[p] 53 | # State initialization 54 | if len(state) == 0: 55 | # Exponential moving average of gradient values 56 | state['exp_avg'] = torch.zeros_like(p) 57 | 58 | exp_avg = state['exp_avg'] 59 | beta1, beta2 = group['betas'] 60 | 61 | # Weight update 62 | update = exp_avg * beta1 + grad * (1 - beta1) 63 | p.add_(torch.sign(update), alpha=-group['lr']) 64 | # Decay the momentum running average coefficient 65 | exp_avg.mul_(beta2).add_(grad, alpha=1 - beta2) 66 | 67 | return loss -------------------------------------------------------------------------------- /utils/loggers/clearml/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/clearml/clearml_utils.py: -------------------------------------------------------------------------------- 1 | """Main Logger class for ClearML experiment tracking.""" 2 | import glob 3 | import re 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import yaml 8 | 9 | from utils.plots import Annotator, colors 10 | 11 | try: 12 | import clearml 13 | from clearml import Dataset, Task 14 | 15 | assert hasattr(clearml, '__version__') # verify package import not local dir 16 | except (ImportError, AssertionError): 17 | clearml = None 18 | 19 | 20 | def construct_dataset(clearml_info_string): 21 | """Load in a clearml dataset and fill the internal data_dict with its contents. 22 | """ 23 | dataset_id = clearml_info_string.replace('clearml://', '') 24 | dataset = Dataset.get(dataset_id=dataset_id) 25 | dataset_root_path = Path(dataset.get_local_copy()) 26 | 27 | # We'll search for the yaml file definition in the dataset 28 | yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml"))) 29 | if len(yaml_filenames) > 1: 30 | raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains ' 31 | 'the dataset definition this way.') 32 | elif len(yaml_filenames) == 0: 33 | raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file ' 34 | 'inside the dataset root path.') 35 | with open(yaml_filenames[0]) as f: 36 | dataset_definition = yaml.safe_load(f) 37 | 38 | assert set(dataset_definition.keys()).issuperset( 39 | {'train', 'test', 'val', 'nc', 'names'} 40 | ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')" 41 | 42 | data_dict = dict() 43 | data_dict['train'] = str( 44 | (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None 45 | data_dict['test'] = str( 46 | (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None 47 | data_dict['val'] = str( 48 | (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None 49 | data_dict['nc'] = dataset_definition['nc'] 50 | data_dict['names'] = dataset_definition['names'] 51 | 52 | return data_dict 53 | 54 | 55 | class ClearmlLogger: 56 | """Log training runs, datasets, models, and predictions to ClearML. 57 | 58 | This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default, 59 | this information includes hyperparameters, system configuration and metrics, model metrics, code information and 60 | basic data metrics and analyses. 61 | 62 | By providing additional command line arguments to train.py, datasets, 63 | models and predictions can also be logged. 64 | """ 65 | 66 | def __init__(self, opt, hyp): 67 | """ 68 | - Initialize ClearML Task, this object will capture the experiment 69 | - Upload dataset version to ClearML Data if opt.upload_dataset is True 70 | 71 | arguments: 72 | opt (namespace) -- Commandline arguments for this run 73 | hyp (dict) -- Hyperparameters for this run 74 | 75 | """ 76 | self.current_epoch = 0 77 | # Keep tracked of amount of logged images to enforce a limit 78 | self.current_epoch_logged_images = set() 79 | # Maximum number of images to log to clearML per epoch 80 | self.max_imgs_to_log_per_epoch = 16 81 | # Get the interval of epochs when bounding box images should be logged 82 | self.bbox_interval = opt.bbox_interval 83 | self.clearml = clearml 84 | self.task = None 85 | self.data_dict = None 86 | if self.clearml: 87 | self.task = Task.init( 88 | project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5', 89 | task_name=opt.name if opt.name != 'exp' else 'Training', 90 | tags=['YOLOv5'], 91 | output_uri=True, 92 | auto_connect_frameworks={'pytorch': False} 93 | # We disconnect pytorch auto-detection, because we added manual model save points in the code 94 | ) 95 | # ClearML's hooks will already grab all general parameters 96 | # Only the hyperparameters coming from the yaml config file 97 | # will have to be added manually! 98 | self.task.connect(hyp, name='Hyperparameters') 99 | 100 | # Get ClearML Dataset Version if requested 101 | if opt.data.startswith('clearml://'): 102 | # data_dict should have the following keys: 103 | # names, nc (number of classes), test, train, val (all three relative paths to ../datasets) 104 | self.data_dict = construct_dataset(opt.data) 105 | # Set data to data_dict because wandb will crash without this information and opt is the best way 106 | # to give it to them 107 | opt.data = self.data_dict 108 | 109 | def log_debug_samples(self, files, title='Debug Samples'): 110 | """ 111 | Log files (images) as debug samples in the ClearML task. 112 | 113 | arguments: 114 | files (List(PosixPath)) a list of file paths in PosixPath format 115 | title (str) A title that groups together images with the same values 116 | """ 117 | for f in files: 118 | if f.exists(): 119 | it = re.search(r'_batch(\d+)', f.name) 120 | iteration = int(it.groups()[0]) if it else 0 121 | self.task.get_logger().report_image(title=title, 122 | series=f.name.replace(it.group(), ''), 123 | local_path=str(f), 124 | iteration=iteration) 125 | 126 | def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25): 127 | """ 128 | Draw the bounding boxes on a single image and report the result as a ClearML debug sample. 129 | 130 | arguments: 131 | image_path (PosixPath) the path the original image file 132 | boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] 133 | class_names (dict): dict containing mapping of class int to class name 134 | image (Tensor): A torch tensor containing the actual image data 135 | """ 136 | if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0: 137 | # Log every bbox_interval times and deduplicate for any intermittend extra eval runs 138 | if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images: 139 | im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2)) 140 | annotator = Annotator(im=im, pil=True) 141 | for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])): 142 | color = colors(i) 143 | 144 | class_name = class_names[int(class_nr)] 145 | confidence_percentage = round(float(conf) * 100, 2) 146 | label = f"{class_name}: {confidence_percentage}%" 147 | 148 | if conf > conf_threshold: 149 | annotator.rectangle(box.cpu().numpy(), outline=color) 150 | annotator.box_label(box.cpu().numpy(), label=label, color=color) 151 | 152 | annotated_image = annotator.result() 153 | self.task.get_logger().report_image(title='Bounding Boxes', 154 | series=image_path.name, 155 | iteration=self.current_epoch, 156 | image=annotated_image) 157 | self.current_epoch_logged_images.add(image_path) 158 | -------------------------------------------------------------------------------- /utils/loggers/clearml/hpo.py: -------------------------------------------------------------------------------- 1 | from clearml import Task 2 | # Connecting ClearML with the current process, 3 | # from here on everything is logged automatically 4 | from clearml.automation import HyperParameterOptimizer, UniformParameterRange 5 | from clearml.automation.optuna import OptimizerOptuna 6 | 7 | task = Task.init(project_name='Hyper-Parameter Optimization', 8 | task_name='YOLOv5', 9 | task_type=Task.TaskTypes.optimizer, 10 | reuse_last_task_id=False) 11 | 12 | # Example use case: 13 | optimizer = HyperParameterOptimizer( 14 | # This is the experiment we want to optimize 15 | base_task_id='', 16 | # here we define the hyper-parameters to optimize 17 | # Notice: The parameter name should exactly match what you see in the UI: / 18 | # For Example, here we see in the base experiment a section Named: "General" 19 | # under it a parameter named "batch_size", this becomes "General/batch_size" 20 | # If you have `argparse` for example, then arguments will appear under the "Args" section, 21 | # and you should instead pass "Args/batch_size" 22 | hyper_parameters=[ 23 | UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), 24 | UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), 25 | UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), 26 | UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), 27 | UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), 28 | UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), 29 | UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), 30 | UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), 31 | UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), 32 | UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), 33 | UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), 34 | UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), 35 | UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), 36 | UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), 37 | UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), 38 | UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), 39 | UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), 40 | UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), 41 | UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), 42 | UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), 43 | UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), 44 | UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), 45 | UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), 46 | UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), 47 | UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), 48 | UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), 49 | UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), 50 | UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], 51 | # this is the objective metric we want to maximize/minimize 52 | objective_metric_title='metrics', 53 | objective_metric_series='mAP_0.5', 54 | # now we decide if we want to maximize it or minimize it (accuracy we maximize) 55 | objective_metric_sign='max', 56 | # let us limit the number of concurrent experiments, 57 | # this in turn will make sure we do dont bombard the scheduler with experiments. 58 | # if we have an auto-scaler connected, this, by proxy, will limit the number of machine 59 | max_number_of_concurrent_tasks=1, 60 | # this is the optimizer class (actually doing the optimization) 61 | # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) 62 | optimizer_class=OptimizerOptuna, 63 | # If specified only the top K performing Tasks will be kept, the others will be automatically archived 64 | save_top_k_tasks_only=5, # 5, 65 | compute_time_limit=None, 66 | total_max_jobs=20, 67 | min_iteration_per_job=None, 68 | max_iteration_per_job=None, 69 | ) 70 | 71 | # report every 10 seconds, this is way too often, but we are testing here 72 | optimizer.set_report_period(10 / 60) 73 | # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent 74 | # an_optimizer.start_locally(job_complete_callback=job_complete_callback) 75 | # set the time limit for the optimization process (2 hours) 76 | optimizer.set_time_limit(in_minutes=120.0) 77 | # Start the optimization process in the local environment 78 | optimizer.start_locally() 79 | # wait until process is done (notice we are controlling the optimization process in the background) 80 | optimizer.wait() 81 | # make sure background optimization stopped 82 | optimizer.stop() 83 | 84 | print('We are done, good bye') 85 | -------------------------------------------------------------------------------- /utils/loggers/comet/comet_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from urllib.parse import urlparse 4 | 5 | try: 6 | import comet_ml 7 | except (ModuleNotFoundError, ImportError): 8 | comet_ml = None 9 | 10 | import yaml 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | COMET_PREFIX = "comet://" 15 | COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") 16 | COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt") 17 | 18 | 19 | def download_model_checkpoint(opt, experiment): 20 | model_dir = f"{opt.project}/{experiment.name}" 21 | os.makedirs(model_dir, exist_ok=True) 22 | 23 | model_name = COMET_MODEL_NAME 24 | model_asset_list = experiment.get_model_asset_list(model_name) 25 | 26 | if len(model_asset_list) == 0: 27 | logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}") 28 | return 29 | 30 | model_asset_list = sorted( 31 | model_asset_list, 32 | key=lambda x: x["step"], 33 | reverse=True, 34 | ) 35 | logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list} 36 | 37 | resource_url = urlparse(opt.weights) 38 | checkpoint_filename = resource_url.query 39 | 40 | if checkpoint_filename: 41 | asset_id = logged_checkpoint_map.get(checkpoint_filename) 42 | else: 43 | asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME) 44 | checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME 45 | 46 | if asset_id is None: 47 | logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment") 48 | return 49 | 50 | try: 51 | logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}") 52 | asset_filename = checkpoint_filename 53 | 54 | model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 55 | model_download_path = f"{model_dir}/{asset_filename}" 56 | with open(model_download_path, "wb") as f: 57 | f.write(model_binary) 58 | 59 | opt.weights = model_download_path 60 | 61 | except Exception as e: 62 | logger.warning("COMET WARNING: Unable to download checkpoint from Comet") 63 | logger.exception(e) 64 | 65 | 66 | def set_opt_parameters(opt, experiment): 67 | """Update the opts Namespace with parameters 68 | from Comet's ExistingExperiment when resuming a run 69 | 70 | Args: 71 | opt (argparse.Namespace): Namespace of command line options 72 | experiment (comet_ml.APIExperiment): Comet API Experiment object 73 | """ 74 | asset_list = experiment.get_asset_list() 75 | resume_string = opt.resume 76 | 77 | for asset in asset_list: 78 | if asset["fileName"] == "opt.yaml": 79 | asset_id = asset["assetId"] 80 | asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 81 | opt_dict = yaml.safe_load(asset_binary) 82 | for key, value in opt_dict.items(): 83 | setattr(opt, key, value) 84 | opt.resume = resume_string 85 | 86 | # Save hyperparameters to YAML file 87 | # Necessary to pass checks in training script 88 | save_dir = f"{opt.project}/{experiment.name}" 89 | os.makedirs(save_dir, exist_ok=True) 90 | 91 | hyp_yaml_path = f"{save_dir}/hyp.yaml" 92 | with open(hyp_yaml_path, "w") as f: 93 | yaml.dump(opt.hyp, f) 94 | opt.hyp = hyp_yaml_path 95 | 96 | 97 | def check_comet_weights(opt): 98 | """Downloads model weights from Comet and updates the 99 | weights path to point to saved weights location 100 | 101 | Args: 102 | opt (argparse.Namespace): Command Line arguments passed 103 | to YOLOv5 training script 104 | 105 | Returns: 106 | None/bool: Return True if weights are successfully downloaded 107 | else return None 108 | """ 109 | if comet_ml is None: 110 | return 111 | 112 | if isinstance(opt.weights, str): 113 | if opt.weights.startswith(COMET_PREFIX): 114 | api = comet_ml.API() 115 | resource = urlparse(opt.weights) 116 | experiment_path = f"{resource.netloc}{resource.path}" 117 | experiment = api.get(experiment_path) 118 | download_model_checkpoint(opt, experiment) 119 | return True 120 | 121 | return None 122 | 123 | 124 | def check_comet_resume(opt): 125 | """Restores run parameters to its original state based on the model checkpoint 126 | and logged Experiment parameters. 127 | 128 | Args: 129 | opt (argparse.Namespace): Command Line arguments passed 130 | to YOLOv5 training script 131 | 132 | Returns: 133 | None/bool: Return True if the run is restored successfully 134 | else return None 135 | """ 136 | if comet_ml is None: 137 | return 138 | 139 | if isinstance(opt.resume, str): 140 | if opt.resume.startswith(COMET_PREFIX): 141 | api = comet_ml.API() 142 | resource = urlparse(opt.resume) 143 | experiment_path = f"{resource.netloc}{resource.path}" 144 | experiment = api.get(experiment_path) 145 | set_opt_parameters(opt, experiment) 146 | download_model_checkpoint(opt, experiment) 147 | 148 | return True 149 | 150 | return None 151 | -------------------------------------------------------------------------------- /utils/loggers/comet/hpo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import comet_ml 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | FILE = Path(__file__).resolve() 13 | ROOT = FILE.parents[3] # YOLOv5 root directory 14 | if str(ROOT) not in sys.path: 15 | sys.path.append(str(ROOT)) # add ROOT to PATH 16 | 17 | from train import train 18 | from utils.callbacks import Callbacks 19 | from utils.general import increment_path 20 | from utils.torch_utils import select_device 21 | 22 | # Project Configuration 23 | config = comet_ml.config.get_config() 24 | COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") 25 | 26 | 27 | def get_args(known=False): 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') 30 | parser.add_argument('--cfg', type=str, default='', help='model.yaml path') 31 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 32 | parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') 33 | parser.add_argument('--epochs', type=int, default=300, help='total training epochs') 34 | parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') 35 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') 36 | parser.add_argument('--rect', action='store_true', help='rectangular training') 37 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') 38 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 39 | parser.add_argument('--noval', action='store_true', help='only validate final epoch') 40 | parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') 41 | parser.add_argument('--noplots', action='store_true', help='save no plot files') 42 | parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') 43 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 44 | parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') 45 | parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') 46 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 47 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') 48 | parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') 49 | parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') 50 | parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') 51 | parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') 52 | parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') 53 | parser.add_argument('--name', default='exp', help='save to project/name') 54 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 55 | parser.add_argument('--quad', action='store_true', help='quad dataloader') 56 | parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') 57 | parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') 58 | parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') 59 | parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') 60 | parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') 61 | parser.add_argument('--seed', type=int, default=0, help='Global training seed') 62 | parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') 63 | 64 | # Weights & Biases arguments 65 | parser.add_argument('--entity', default=None, help='W&B: Entity') 66 | parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') 67 | parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') 68 | parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') 69 | 70 | # Comet Arguments 71 | parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.") 72 | parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.") 73 | parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.") 74 | parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.") 75 | parser.add_argument("--comet_optimizer_workers", 76 | type=int, 77 | default=1, 78 | help="Comet: Number of Parallel Workers to use with the Comet Optimizer.") 79 | 80 | return parser.parse_known_args()[0] if known else parser.parse_args() 81 | 82 | 83 | def run(parameters, opt): 84 | hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]} 85 | 86 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 87 | opt.batch_size = parameters.get("batch_size") 88 | opt.epochs = parameters.get("epochs") 89 | 90 | device = select_device(opt.device, batch_size=opt.batch_size) 91 | train(hyp_dict, opt, device, callbacks=Callbacks()) 92 | 93 | 94 | if __name__ == "__main__": 95 | opt = get_args(known=True) 96 | 97 | opt.weights = str(opt.weights) 98 | opt.cfg = str(opt.cfg) 99 | opt.data = str(opt.data) 100 | opt.project = str(opt.project) 101 | 102 | optimizer_id = os.getenv("COMET_OPTIMIZER_ID") 103 | if optimizer_id is None: 104 | with open(opt.comet_optimizer_config) as f: 105 | optimizer_config = json.load(f) 106 | optimizer = comet_ml.Optimizer(optimizer_config) 107 | else: 108 | optimizer = comet_ml.Optimizer(optimizer_id) 109 | 110 | opt.comet_optimizer_id = optimizer.id 111 | status = optimizer.status() 112 | 113 | opt.comet_optimizer_objective = status["spec"]["objective"] 114 | opt.comet_optimizer_metric = status["spec"]["metric"] 115 | 116 | logger.info("COMET INFO: Starting Hyperparameter Sweep") 117 | for parameter in optimizer.get_parameters(): 118 | run(parameter["parameters"], opt) 119 | -------------------------------------------------------------------------------- /utils/loggers/comet/optimizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": "random", 3 | "parameters": { 4 | "anchor_t": { 5 | "type": "discrete", 6 | "values": [ 7 | 2, 8 | 8 9 | ] 10 | }, 11 | "batch_size": { 12 | "type": "discrete", 13 | "values": [ 14 | 16, 15 | 32, 16 | 64 17 | ] 18 | }, 19 | "box": { 20 | "type": "discrete", 21 | "values": [ 22 | 0.02, 23 | 0.2 24 | ] 25 | }, 26 | "cls": { 27 | "type": "discrete", 28 | "values": [ 29 | 0.2 30 | ] 31 | }, 32 | "cls_pw": { 33 | "type": "discrete", 34 | "values": [ 35 | 0.5 36 | ] 37 | }, 38 | "copy_paste": { 39 | "type": "discrete", 40 | "values": [ 41 | 1 42 | ] 43 | }, 44 | "degrees": { 45 | "type": "discrete", 46 | "values": [ 47 | 0, 48 | 45 49 | ] 50 | }, 51 | "epochs": { 52 | "type": "discrete", 53 | "values": [ 54 | 5 55 | ] 56 | }, 57 | "fl_gamma": { 58 | "type": "discrete", 59 | "values": [ 60 | 0 61 | ] 62 | }, 63 | "fliplr": { 64 | "type": "discrete", 65 | "values": [ 66 | 0 67 | ] 68 | }, 69 | "flipud": { 70 | "type": "discrete", 71 | "values": [ 72 | 0 73 | ] 74 | }, 75 | "hsv_h": { 76 | "type": "discrete", 77 | "values": [ 78 | 0 79 | ] 80 | }, 81 | "hsv_s": { 82 | "type": "discrete", 83 | "values": [ 84 | 0 85 | ] 86 | }, 87 | "hsv_v": { 88 | "type": "discrete", 89 | "values": [ 90 | 0 91 | ] 92 | }, 93 | "iou_t": { 94 | "type": "discrete", 95 | "values": [ 96 | 0.7 97 | ] 98 | }, 99 | "lr0": { 100 | "type": "discrete", 101 | "values": [ 102 | 1e-05, 103 | 0.1 104 | ] 105 | }, 106 | "lrf": { 107 | "type": "discrete", 108 | "values": [ 109 | 0.01, 110 | 1 111 | ] 112 | }, 113 | "mixup": { 114 | "type": "discrete", 115 | "values": [ 116 | 1 117 | ] 118 | }, 119 | "momentum": { 120 | "type": "discrete", 121 | "values": [ 122 | 0.6 123 | ] 124 | }, 125 | "mosaic": { 126 | "type": "discrete", 127 | "values": [ 128 | 0 129 | ] 130 | }, 131 | "obj": { 132 | "type": "discrete", 133 | "values": [ 134 | 0.2 135 | ] 136 | }, 137 | "obj_pw": { 138 | "type": "discrete", 139 | "values": [ 140 | 0.5 141 | ] 142 | }, 143 | "optimizer": { 144 | "type": "categorical", 145 | "values": [ 146 | "SGD", 147 | "Adam", 148 | "AdamW" 149 | ] 150 | }, 151 | "perspective": { 152 | "type": "discrete", 153 | "values": [ 154 | 0 155 | ] 156 | }, 157 | "scale": { 158 | "type": "discrete", 159 | "values": [ 160 | 0 161 | ] 162 | }, 163 | "shear": { 164 | "type": "discrete", 165 | "values": [ 166 | 0 167 | ] 168 | }, 169 | "translate": { 170 | "type": "discrete", 171 | "values": [ 172 | 0 173 | ] 174 | }, 175 | "warmup_bias_lr": { 176 | "type": "discrete", 177 | "values": [ 178 | 0, 179 | 0.2 180 | ] 181 | }, 182 | "warmup_epochs": { 183 | "type": "discrete", 184 | "values": [ 185 | 5 186 | ] 187 | }, 188 | "warmup_momentum": { 189 | "type": "discrete", 190 | "values": [ 191 | 0, 192 | 0.95 193 | ] 194 | }, 195 | "weight_decay": { 196 | "type": "discrete", 197 | "values": [ 198 | 0, 199 | 0.001 200 | ] 201 | } 202 | }, 203 | "spec": { 204 | "maxCombo": 0, 205 | "metric": "metrics/mAP_0.5", 206 | "objective": "maximize" 207 | }, 208 | "trials": 1 209 | } 210 | -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | from utils.general import LOGGER 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 12 | if not logger.wandb: 13 | LOGGER.info("install wandb using `pip install wandb` to log the dataset") 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | parser.add_argument('--entity', default=None, help='W&B entity') 22 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 23 | 24 | opt = parser.parse_args() 25 | opt.resume = False # Explicitly disallow resume check for dataset upload job 26 | 27 | create_dataset_artifact(opt) 28 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).resolve() 7 | ROOT = FILE.parents[3] # YOLOv5 root directory 8 | if str(ROOT) not in sys.path: 9 | sys.path.append(str(ROOT)) # add ROOT to PATH 10 | 11 | from train import parse_opt, train 12 | from utils.callbacks import Callbacks 13 | from utils.general import increment_path 14 | from utils.torch_utils import select_device 15 | 16 | 17 | def sweep(): 18 | wandb.init() 19 | # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. 20 | hyp_dict = vars(wandb.config).get("_items").copy() 21 | 22 | # Workaround: get necessary opt args 23 | opt = parse_opt(known=True) 24 | opt.batch_size = hyp_dict.get("batch_size") 25 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 26 | opt.epochs = hyp_dict.get("epochs") 27 | opt.nosave = True 28 | opt.data = hyp_dict.get("data") 29 | opt.weights = str(opt.weights) 30 | opt.cfg = str(opt.cfg) 31 | opt.data = str(opt.data) 32 | opt.hyp = str(opt.hyp) 33 | opt.project = str(opt.project) 34 | device = select_device(opt.device, batch_size=opt.batch_size) 35 | 36 | # train 37 | train(hyp_dict, opt, device, callbacks=Callbacks()) 38 | 39 | 40 | if __name__ == "__main__": 41 | sweep() 42 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 4.0 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /utils/panoptic/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/panoptic/augmentations.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | from ..augmentations import box_candidates 8 | from ..general import resample_segments, segment2box 9 | from ..metrics import bbox_ioa 10 | 11 | 12 | def mixup(im, labels, segments, seg_cls, semantic_masks, im2, labels2, segments2, seg_cls2, semantic_masks2): 13 | # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf 14 | r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 15 | im = (im * r + im2 * (1 - r)).astype(np.uint8) 16 | labels = np.concatenate((labels, labels2), 0) 17 | segments = np.concatenate((segments, segments2), 0) 18 | seg_cls = np.concatenate((seg_cls, seg_cls2), 0) 19 | semantic_masks = np.concatenate((semantic_masks, semantic_masks2), 0) 20 | return im, labels, segments, seg_cls, semantic_masks 21 | 22 | 23 | def random_perspective(im, 24 | targets=(), 25 | segments=(), 26 | semantic_masks = (), 27 | degrees=10, 28 | translate=.1, 29 | scale=.1, 30 | shear=10, 31 | perspective=0.0, 32 | border=(0, 0)): 33 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 34 | # targets = [cls, xyxy] 35 | 36 | height = im.shape[0] + border[0] * 2 # shape(h,w,c) 37 | width = im.shape[1] + border[1] * 2 38 | 39 | # Center 40 | C = np.eye(3) 41 | C[0, 2] = -im.shape[1] / 2 # x translation (pixels) 42 | C[1, 2] = -im.shape[0] / 2 # y translation (pixels) 43 | 44 | # Perspective 45 | P = np.eye(3) 46 | P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) 47 | P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) 48 | 49 | # Rotation and Scale 50 | R = np.eye(3) 51 | a = random.uniform(-degrees, degrees) 52 | # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations 53 | s = random.uniform(1 - scale, 1 + scale) 54 | # s = 2 ** random.uniform(-scale, scale) 55 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) 56 | 57 | # Shear 58 | S = np.eye(3) 59 | S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) 60 | S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) 61 | 62 | # Translation 63 | T = np.eye(3) 64 | T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) 65 | T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) 66 | 67 | # Combined rotation matrix 68 | M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT 69 | if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed 70 | if perspective: 71 | im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) 72 | else: # affine 73 | im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) 74 | 75 | # Visualize 76 | # import matplotlib.pyplot as plt 77 | # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() 78 | # ax[0].imshow(im[:, :, ::-1]) # base 79 | # ax[1].imshow(im2[:, :, ::-1]) # warped 80 | 81 | # Transform label coordinates 82 | n = len(targets) 83 | new_segments = [] 84 | new_semantic_masks = [] 85 | if n: 86 | new = np.zeros((n, 4)) 87 | segments = resample_segments(segments) # upsample 88 | for i, segment in enumerate(segments): 89 | xy = np.ones((len(segment), 3)) 90 | xy[:, :2] = segment 91 | xy = xy @ M.T # transform 92 | xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine 93 | 94 | # clip 95 | new[i] = segment2box(xy, width, height) 96 | new_segments.append(xy) 97 | 98 | semantic_masks = resample_segments(semantic_masks) 99 | for i, semantic_mask in enumerate(semantic_masks): 100 | #if i < n: 101 | # xy = np.ones((len(segments[i]), 3)) 102 | # xy[:, :2] = segments[i] 103 | # xy = xy @ M.T # transform 104 | # xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine 105 | 106 | # new[i] = segment2box(xy, width, height) 107 | # new_segments.append(xy) 108 | 109 | xy_s = np.ones((len(semantic_mask), 3)) 110 | xy_s[:, :2] = semantic_mask 111 | xy_s = xy_s @ M.T # transform 112 | xy_s = (xy_s[:, :2] / xy_s[:, 2:3] if perspective else xy_s[:, :2]) # perspective rescale or affine 113 | 114 | new_semantic_masks.append(xy_s) 115 | 116 | # filter candidates 117 | i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) 118 | targets = targets[i] 119 | targets[:, 1:5] = new[i] 120 | new_segments = np.array(new_segments)[i] 121 | new_semantic_masks = np.array(new_semantic_masks) 122 | 123 | return im, targets, new_segments, new_semantic_masks 124 | 125 | 126 | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): 127 | # Resize and pad image while meeting stride-multiple constraints 128 | shape = im.shape[:2] # current shape [height, width] 129 | if isinstance(new_shape, int): 130 | new_shape = (new_shape, new_shape) 131 | 132 | # Scale ratio (new / old) 133 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 134 | if not scaleup: # only scale down, do not scale up (for better val mAP) 135 | r = min(r, 1.0) 136 | 137 | # Compute padding 138 | ratio = r, r # width, height ratios 139 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 140 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 141 | if auto: # minimum rectangle 142 | dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding 143 | elif scaleFill: # stretch 144 | dw, dh = 0.0, 0.0 145 | new_unpad = (new_shape[1], new_shape[0]) 146 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 147 | 148 | dw /= 2 # divide padding into 2 sides 149 | dh /= 2 150 | 151 | if shape[::-1] != new_unpad: # resize 152 | im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) 153 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 154 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 155 | im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 156 | return im, ratio, (dw, dh) 157 | 158 | 159 | def copy_paste(im, labels, segments, seg_cls, semantic_masks, p=0.5): 160 | # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) 161 | n = len(segments) 162 | if p and n: 163 | h, w, _ = im.shape # height, width, channels 164 | im_new = np.zeros(im.shape, np.uint8) 165 | 166 | # calculate ioa first then select indexes randomly 167 | boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) 168 | ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area 169 | indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) 170 | n = len(indexes) 171 | for j in random.sample(list(indexes), k=round(p * n)): 172 | l, box, s = labels[j], boxes[j], segments[j] 173 | labels = np.concatenate((labels, [[l[0], *box]]), 0) 174 | segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) 175 | seg_cls.append(l[0].astype(int)) 176 | semantic_masks.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) 177 | cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) 178 | 179 | result = cv2.flip(im, 1) # augment segments (flip left-right) 180 | i = cv2.flip(im_new, 1).astype(bool) 181 | im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug 182 | 183 | return im, labels, segments, seg_cls, semantic_masks -------------------------------------------------------------------------------- /utils/panoptic/general.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def crop_mask(masks, boxes): 8 | """ 9 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 10 | Vectorized by Chong (thanks Chong). 11 | 12 | Args: 13 | - masks should be a size [h, w, n] tensor of masks 14 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 15 | """ 16 | 17 | n, h, w = masks.shape 18 | x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) 19 | r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 20 | c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 21 | 22 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 23 | 24 | 25 | def process_mask_upsample(protos, masks_in, bboxes, shape): 26 | """ 27 | Crop after upsample. 28 | proto_out: [mask_dim, mask_h, mask_w] 29 | out_masks: [n, mask_dim], n is number of masks after nms 30 | bboxes: [n, 4], n is number of masks after nms 31 | shape:input_image_size, (h, w) 32 | 33 | return: h, w, n 34 | """ 35 | 36 | c, mh, mw = protos.shape # CHW 37 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) 38 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 39 | masks = crop_mask(masks, bboxes) # CHW 40 | return masks.gt_(0.5) 41 | 42 | 43 | def process_mask(protos, masks_in, bboxes, shape, upsample=False): 44 | """ 45 | Crop before upsample. 46 | proto_out: [mask_dim, mask_h, mask_w] 47 | out_masks: [n, mask_dim], n is number of masks after nms 48 | bboxes: [n, 4], n is number of masks after nms 49 | shape:input_image_size, (h, w) 50 | 51 | return: h, w, n 52 | """ 53 | 54 | c, mh, mw = protos.shape # CHW 55 | ih, iw = shape 56 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW 57 | 58 | downsampled_bboxes = bboxes.clone() 59 | downsampled_bboxes[:, 0] *= mw / iw 60 | downsampled_bboxes[:, 2] *= mw / iw 61 | downsampled_bboxes[:, 3] *= mh / ih 62 | downsampled_bboxes[:, 1] *= mh / ih 63 | 64 | masks = crop_mask(masks, downsampled_bboxes) # CHW 65 | if upsample: 66 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 67 | return masks.gt_(0.5) 68 | 69 | 70 | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): 71 | """ 72 | img1_shape: model input shape, [h, w] 73 | img0_shape: origin pic shape, [h, w, 3] 74 | masks: [h, w, num] 75 | """ 76 | # Rescale coordinates (xyxy) from im1_shape to im0_shape 77 | if ratio_pad is None: # calculate from im0_shape 78 | gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new 79 | pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding 80 | else: 81 | pad = ratio_pad[1] 82 | top, left = int(pad[1]), int(pad[0]) # y, x 83 | bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) 84 | 85 | if len(masks.shape) < 2: 86 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 87 | masks = masks[top:bottom, left:right] 88 | # masks = masks.permute(2, 0, 1).contiguous() 89 | # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] 90 | # masks = masks.permute(1, 2, 0).contiguous() 91 | masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) 92 | 93 | if len(masks.shape) == 2: 94 | masks = masks[:, :, None] 95 | return masks 96 | 97 | 98 | def mask_iou(mask1, mask2, eps=1e-7): 99 | """ 100 | mask1: [N, n] m1 means number of predicted objects 101 | mask2: [M, n] m2 means number of gt objects 102 | Note: n means image_w x image_h 103 | 104 | return: masks iou, [N, M] 105 | """ 106 | intersection = torch.matmul(mask1, mask2.t()).clamp(0) 107 | union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection 108 | return intersection / (union + eps) 109 | 110 | 111 | def masks_iou(mask1, mask2, eps=1e-7): 112 | """ 113 | mask1: [N, n] m1 means number of predicted objects 114 | mask2: [N, n] m2 means number of gt objects 115 | Note: n means image_w x image_h 116 | 117 | return: masks iou, (N, ) 118 | """ 119 | intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) 120 | union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection 121 | return intersection / (union + eps) 122 | 123 | 124 | def masks2segments(masks, strategy='largest'): 125 | # Convert masks(n,160,160) into segments(n,xy) 126 | segments = [] 127 | for x in masks.int().cpu().numpy().astype('uint8'): 128 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 129 | if c: 130 | if strategy == 'concat': # concatenate all segments 131 | c = np.concatenate([x.reshape(-1, 2) for x in c]) 132 | elif strategy == 'largest': # select largest segment 133 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 134 | else: 135 | c = np.zeros((0, 2)) # no segments found 136 | segments.append(c.astype('float32')) 137 | return segments 138 | -------------------------------------------------------------------------------- /utils/panoptic/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..general import xywh2xyxy 6 | from ..loss import FocalLoss, smooth_BCE 7 | from ..metrics import bbox_iou 8 | from ..torch_utils import de_parallel 9 | from .general import crop_mask 10 | 11 | 12 | class ComputeLoss: 13 | # Compute losses 14 | def __init__(self, model, autobalance=False, overlap=False): 15 | self.sort_obj_iou = False 16 | self.overlap = overlap 17 | device = next(model.parameters()).device # get model device 18 | h = model.hyp # hyperparameters 19 | self.device = device 20 | 21 | # Define criteria 22 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) 23 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 24 | 25 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 26 | self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets 27 | 28 | # Focal loss 29 | g = h['fl_gamma'] # focal loss gamma 30 | if g > 0: 31 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 32 | 33 | m = de_parallel(model).model[-1] # Detect() module 34 | self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 35 | self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index 36 | self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance 37 | self.na = m.na # number of anchors 38 | self.nc = m.nc # number of classes 39 | self.nl = m.nl # number of layers 40 | self.nm = m.nm # number of masks 41 | self.anchors = m.anchors 42 | self.device = device 43 | 44 | def __call__(self, preds, targets, masks): # predictions, targets, model 45 | p, proto = preds 46 | bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width 47 | lcls = torch.zeros(1, device=self.device) 48 | lbox = torch.zeros(1, device=self.device) 49 | lobj = torch.zeros(1, device=self.device) 50 | lseg = torch.zeros(1, device=self.device) 51 | tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets 52 | 53 | # Losses 54 | for i, pi in enumerate(p): # layer index, layer predictions 55 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 56 | tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj 57 | 58 | n = b.shape[0] # number of targets 59 | if n: 60 | pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions 61 | 62 | # Box regression 63 | pxy = pxy.sigmoid() * 2 - 0.5 64 | pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] 65 | pbox = torch.cat((pxy, pwh), 1) # predicted box 66 | iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) 67 | lbox += (1.0 - iou).mean() # iou loss 68 | 69 | # Objectness 70 | iou = iou.detach().clamp(0).type(tobj.dtype) 71 | if self.sort_obj_iou: 72 | j = iou.argsort() 73 | b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] 74 | if self.gr < 1: 75 | iou = (1.0 - self.gr) + self.gr * iou 76 | tobj[b, a, gj, gi] = iou # iou ratio 77 | 78 | # Classification 79 | if self.nc > 1: # cls loss (only if multiple classes) 80 | t = torch.full_like(pcls, self.cn, device=self.device) # targets 81 | t[range(n), tcls[i]] = self.cp 82 | lcls += self.BCEcls(pcls, t) # BCE 83 | 84 | # Mask regression 85 | if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample 86 | masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] 87 | marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized 88 | mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) 89 | for bi in b.unique(): 90 | j = b == bi # matching index 91 | if self.overlap: 92 | mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) 93 | else: 94 | mask_gti = masks[tidxs[i]][j] 95 | lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) 96 | 97 | obji = self.BCEobj(pi[..., 4], tobj) 98 | lobj += obji * self.balance[i] # obj loss 99 | if self.autobalance: 100 | self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() 101 | 102 | if self.autobalance: 103 | self.balance = [x / self.balance[self.ssi] for x in self.balance] 104 | lbox *= self.hyp["box"] 105 | lobj *= self.hyp["obj"] 106 | lcls *= self.hyp["cls"] 107 | lseg *= self.hyp["box"] / bs 108 | 109 | loss = lbox + lobj + lcls + lseg 110 | return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() 111 | 112 | def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): 113 | # Mask loss for one image 114 | pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) 115 | loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") 116 | return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() 117 | 118 | def build_targets(self, p, targets): 119 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 120 | na, nt = self.na, targets.shape[0] # number of anchors, targets 121 | tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] 122 | gain = torch.ones(8, device=self.device) # normalized to gridspace gain 123 | ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 124 | if self.overlap: 125 | batch = p[0].shape[0] 126 | ti = [] 127 | for i in range(batch): 128 | num = (targets[:, 0] == i).sum() # find number of targets of each image 129 | ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) 130 | ti = torch.cat(ti, 1) # (na, nt) 131 | else: 132 | ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) 133 | targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices 134 | 135 | g = 0.5 # bias 136 | off = torch.tensor( 137 | [ 138 | [0, 0], 139 | [1, 0], 140 | [0, 1], 141 | [-1, 0], 142 | [0, -1], # j,k,l,m 143 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 144 | ], 145 | device=self.device).float() * g # offsets 146 | 147 | for i in range(self.nl): 148 | anchors, shape = self.anchors[i], p[i].shape 149 | gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain 150 | 151 | # Match targets to anchors 152 | t = targets * gain # shape(3,n,7) 153 | if nt: 154 | # Matches 155 | r = t[..., 4:6] / anchors[:, None] # wh ratio 156 | j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare 157 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 158 | t = t[j] # filter 159 | 160 | # Offsets 161 | gxy = t[:, 2:4] # grid xy 162 | gxi = gain[[2, 3]] - gxy # inverse 163 | j, k = ((gxy % 1 < g) & (gxy > 1)).T 164 | l, m = ((gxi % 1 < g) & (gxi > 1)).T 165 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 166 | t = t.repeat((5, 1, 1))[j] 167 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 168 | else: 169 | t = targets[0] 170 | offsets = 0 171 | 172 | # Define 173 | bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors 174 | (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class 175 | gij = (gxy - offsets).long() 176 | gi, gj = gij.T # grid indices 177 | 178 | # Append 179 | indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid 180 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 181 | anch.append(anchors[a]) # anchors 182 | tcls.append(c) # class 183 | tidxs.append(tidx) 184 | xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized 185 | 186 | return tcls, tbox, indices, anch, tidxs, xywhn 187 | -------------------------------------------------------------------------------- /utils/panoptic/plots.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import math 3 | from pathlib import Path 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | import torch 10 | from torchvision.utils import draw_segmentation_masks, save_image 11 | 12 | from .. import threaded 13 | from ..general import xywh2xyxy 14 | from ..plots import Annotator, colors 15 | 16 | 17 | @threaded 18 | def plot_images_and_masks(images, targets, masks, semasks, paths=None, fname='images.jpg', names=None): 19 | 20 | try: 21 | if images.shape[-2:] != semasks.shape[-2:]: 22 | m = torch.nn.Upsample(scale_factor=4, mode='nearest') 23 | semasks = m(semasks) 24 | 25 | for idx in range(images.shape[0]): 26 | output_img = draw_segmentation_masks( 27 | image = images[idx, :, :, :].cpu().to(dtype = torch.uint8), 28 | masks = semasks[idx, :, :, :].cpu().to(dtype = torch.bool), 29 | alpha = 1) 30 | cv2.imwrite( 31 | '{}_{}.jpg'.format(fname, idx), 32 | torch.permute(output_img, (1, 2, 0)).numpy() 33 | ) 34 | except: 35 | pass 36 | 37 | # Plot image grid with labels 38 | if isinstance(images, torch.Tensor): 39 | images = images.cpu().float().numpy() 40 | if isinstance(targets, torch.Tensor): 41 | targets = targets.cpu().numpy() 42 | if isinstance(masks, torch.Tensor): 43 | masks = masks.cpu().numpy().astype(int) 44 | if isinstance(semasks, torch.Tensor): 45 | semasks = semasks.cpu().numpy().astype(int) 46 | 47 | max_size = 1920 # max image size 48 | max_subplots = 16 # max image subplots, i.e. 4x4 49 | bs, _, h, w = images.shape # batch size, _, height, width 50 | bs = min(bs, max_subplots) # limit plot images 51 | ns = np.ceil(bs ** 0.5) # number of subplots (square) 52 | if np.max(images[0]) <= 1: 53 | images *= 255 # de-normalise (optional) 54 | 55 | # Build Image 56 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init 57 | for i, im in enumerate(images): 58 | if i == max_subplots: # if last batch has fewer images than we expect 59 | break 60 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 61 | im = im.transpose(1, 2, 0) 62 | mosaic[y:y + h, x:x + w, :] = im 63 | 64 | # Resize (optional) 65 | scale = max_size / ns / max(h, w) 66 | if scale < 1: 67 | h = math.ceil(scale * h) 68 | w = math.ceil(scale * w) 69 | mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) 70 | 71 | # Annotate 72 | fs = int((h + w) * ns * 0.01) # font size 73 | annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) 74 | for i in range(i + 1): 75 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 76 | annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders 77 | if paths: 78 | annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames 79 | if len(targets) > 0: 80 | idx = targets[:, 0] == i 81 | ti = targets[idx] # image targets 82 | 83 | boxes = xywh2xyxy(ti[:, 2:6]).T 84 | classes = ti[:, 1].astype('int') 85 | labels = ti.shape[1] == 6 # labels if no conf column 86 | conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) 87 | 88 | if boxes.shape[1]: 89 | if boxes.max() <= 1.01: # if normalized with tolerance 0.01 90 | boxes[[0, 2]] *= w # scale to pixels 91 | boxes[[1, 3]] *= h 92 | elif scale < 1: # absolute coords need scale if image scales 93 | boxes *= scale 94 | boxes[[0, 2]] += x 95 | boxes[[1, 3]] += y 96 | for j, box in enumerate(boxes.T.tolist()): 97 | cls = classes[j] 98 | color = colors(cls) 99 | cls = names[cls] if names else cls 100 | if labels or conf[j] > 0.25: # 0.25 conf thresh 101 | label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' 102 | annotator.box_label(box, label, color=color) 103 | 104 | # Plot masks 105 | if len(masks): 106 | if masks.max() > 1.0: # mean that masks are overlap 107 | image_masks = masks[[i]] # (1, 640, 640) 108 | nl = len(ti) 109 | index = np.arange(nl).reshape(nl, 1, 1) + 1 110 | image_masks = np.repeat(image_masks, nl, axis=0) 111 | image_masks = np.where(image_masks == index, 1.0, 0.0) 112 | else: 113 | image_masks = masks[idx] 114 | 115 | im = np.asarray(annotator.im).copy() 116 | for j, box in enumerate(boxes.T.tolist()): 117 | if labels or conf[j] > 0.25: # 0.25 conf thresh 118 | color = colors(classes[j]) 119 | mh, mw = image_masks[j].shape 120 | if mh != h or mw != w: 121 | mask = image_masks[j].astype(np.uint8) 122 | mask = cv2.resize(mask, (w, h)) 123 | mask = mask.astype(bool) 124 | else: 125 | mask = image_masks[j].astype(bool) 126 | with contextlib.suppress(Exception): 127 | im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 128 | annotator.fromarray(im) 129 | annotator.im.save(fname) # save 130 | 131 | 132 | def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): 133 | # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') 134 | save_dir = Path(file).parent if file else Path(dir) 135 | fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) 136 | ax = ax.ravel() 137 | files = list(save_dir.glob("results*.csv")) 138 | assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." 139 | for f in files: 140 | try: 141 | data = pd.read_csv(f) 142 | index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 143 | 0.1 * data.values[:, 11]) 144 | s = [x.strip() for x in data.columns] 145 | x = data.values[:, 0] 146 | for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): 147 | y = data.values[:, j] 148 | # y[y == 0] = np.nan # don't show zero values 149 | ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) 150 | if best: 151 | # best 152 | ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) 153 | ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") 154 | else: 155 | # last 156 | ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) 157 | ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") 158 | # if j in [8, 9, 10]: # share train and val loss y axes 159 | # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) 160 | except Exception as e: 161 | print(f"Warning: Plotting error for {f}: {e}") 162 | ax[1].legend() 163 | fig.savefig(save_dir / "results.png", dpi=200) 164 | plt.close() 165 | -------------------------------------------------------------------------------- /utils/panoptic/tal/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/panoptic/tal/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from utils.general import check_version 4 | 5 | TORCH_1_10 = check_version(torch.__version__, '1.10.0') 6 | 7 | 8 | def make_anchors(feats, strides, grid_cell_offset=0.5): 9 | """Generate anchors from features.""" 10 | anchor_points, stride_tensor = [], [] 11 | assert feats is not None 12 | dtype, device = feats[0].dtype, feats[0].device 13 | for i, stride in enumerate(strides): 14 | _, _, h, w = feats[i].shape 15 | sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x 16 | sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y 17 | sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) 18 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 19 | stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) 20 | return torch.cat(anchor_points), torch.cat(stride_tensor) 21 | 22 | 23 | def dist2bbox(distance, anchor_points, xywh=True, dim=-1): 24 | """Transform distance(ltrb) to box(xywh or xyxy).""" 25 | lt, rb = torch.split(distance, 2, dim) 26 | x1y1 = anchor_points - lt 27 | x2y2 = anchor_points + rb 28 | if xywh: 29 | c_xy = (x1y1 + x2y2) / 2 30 | wh = x2y2 - x1y1 31 | return torch.cat((c_xy, wh), dim) # xywh bbox 32 | return torch.cat((x1y1, x2y2), dim) # xyxy bbox 33 | 34 | 35 | def bbox2dist(anchor_points, bbox, reg_max): 36 | """Transform bbox(xyxy) to dist(ltrb).""" 37 | x1y1, x2y2 = torch.split(bbox, 2, -1) 38 | return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) 39 | -------------------------------------------------------------------------------- /utils/panoptic/tal/assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from utils.metrics import bbox_iou 6 | 7 | 8 | def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): 9 | """select the positive anchor center in gt 10 | 11 | Args: 12 | xy_centers (Tensor): shape(h*w, 4) 13 | gt_bboxes (Tensor): shape(b, n_boxes, 4) 14 | Return: 15 | (Tensor): shape(b, n_boxes, h*w) 16 | """ 17 | n_anchors = xy_centers.shape[0] 18 | bs, n_boxes, _ = gt_bboxes.shape 19 | lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom 20 | bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) 21 | # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) 22 | return bbox_deltas.amin(3).gt_(eps) 23 | 24 | 25 | def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): 26 | """if an anchor box is assigned to multiple gts, 27 | the one with the highest iou will be selected. 28 | 29 | Args: 30 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 31 | overlaps (Tensor): shape(b, n_max_boxes, h*w) 32 | Return: 33 | target_gt_idx (Tensor): shape(b, h*w) 34 | fg_mask (Tensor): shape(b, h*w) 35 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 36 | """ 37 | # (b, n_max_boxes, h*w) -> (b, h*w) 38 | fg_mask = mask_pos.sum(-2) 39 | if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes 40 | mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) 41 | max_overlaps_idx = overlaps.argmax(1) # (b, h*w) 42 | is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) 43 | is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) 44 | mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) 45 | fg_mask = mask_pos.sum(-2) 46 | # find each grid serve which gt(index) 47 | target_gt_idx = mask_pos.argmax(-2) # (b, h*w) 48 | return target_gt_idx, fg_mask, mask_pos 49 | 50 | 51 | class TaskAlignedAssigner(nn.Module): 52 | def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): 53 | super().__init__() 54 | self.topk = topk 55 | self.num_classes = num_classes 56 | self.bg_idx = num_classes 57 | self.alpha = alpha 58 | self.beta = beta 59 | self.eps = eps 60 | 61 | @torch.no_grad() 62 | def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): 63 | """This code referenced to 64 | https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py 65 | 66 | Args: 67 | pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) 68 | pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) 69 | anc_points (Tensor): shape(num_total_anchors, 2) 70 | gt_labels (Tensor): shape(bs, n_max_boxes, 1) 71 | gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) 72 | mask_gt (Tensor): shape(bs, n_max_boxes, 1) 73 | Returns: 74 | target_labels (Tensor): shape(bs, num_total_anchors) 75 | target_bboxes (Tensor): shape(bs, num_total_anchors, 4) 76 | target_scores (Tensor): shape(bs, num_total_anchors, num_classes) 77 | fg_mask (Tensor): shape(bs, num_total_anchors) 78 | """ 79 | self.bs = pd_scores.size(0) 80 | self.n_max_boxes = gt_bboxes.size(1) 81 | 82 | if self.n_max_boxes == 0: 83 | device = gt_bboxes.device 84 | return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), 85 | torch.zeros_like(pd_bboxes).to(device), 86 | torch.zeros_like(pd_scores).to(device), 87 | torch.zeros_like(pd_scores[..., 0]).to(device), 88 | torch.zeros_like(pd_scores[..., 0]).to(device)) 89 | 90 | mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, 91 | mask_gt) 92 | 93 | target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) 94 | 95 | # assigned target 96 | target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) 97 | 98 | # normalize 99 | align_metric *= mask_pos 100 | pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj 101 | pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj 102 | norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) 103 | target_scores = target_scores * norm_align_metric 104 | 105 | return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx 106 | 107 | def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): 108 | 109 | # get anchor_align metric, (b, max_num_obj, h*w) 110 | align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) 111 | # get in_gts mask, (b, max_num_obj, h*w) 112 | mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) 113 | # get topk_metric mask, (b, max_num_obj, h*w) 114 | mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, 115 | topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) 116 | # merge all mask to a final mask, (b, max_num_obj, h*w) 117 | mask_pos = mask_topk * mask_in_gts * mask_gt 118 | 119 | return mask_pos, align_metric, overlaps 120 | 121 | def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): 122 | 123 | gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 124 | ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj 125 | ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj 126 | ind[1] = gt_labels.squeeze(-1) # b, max_num_obj 127 | # get the scores of each grid for each gt cls 128 | bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w 129 | 130 | overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) 131 | #overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, WIoU=True, scale=True)[-1].squeeze(3).clamp(0) 132 | align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) 133 | return align_metric, overlaps 134 | 135 | def select_topk_candidates(self, metrics, largest=True, topk_mask=None): 136 | """ 137 | Args: 138 | metrics: (b, max_num_obj, h*w). 139 | topk_mask: (b, max_num_obj, topk) or None 140 | """ 141 | 142 | num_anchors = metrics.shape[-1] # h*w 143 | # (b, max_num_obj, topk) 144 | topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) 145 | if topk_mask is None: 146 | topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) 147 | # (b, max_num_obj, topk) 148 | topk_idxs = torch.where(topk_mask, topk_idxs, 0) 149 | # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) 150 | is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) 151 | # filter invalid bboxes 152 | # assigned topk should be unique, this is for dealing with empty labels 153 | # since empty labels will generate index `0` through `F.one_hot` 154 | # NOTE: but what if the topk_idxs include `0`? 155 | is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) 156 | return is_in_topk.to(metrics.dtype) 157 | 158 | def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): 159 | """ 160 | Args: 161 | gt_labels: (b, max_num_obj, 1) 162 | gt_bboxes: (b, max_num_obj, 4) 163 | target_gt_idx: (b, h*w) 164 | fg_mask: (b, h*w) 165 | """ 166 | 167 | # assigned target labels, (b, 1) 168 | batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] 169 | target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) 170 | target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) 171 | 172 | # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) 173 | target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] 174 | 175 | # assigned target scores 176 | target_labels.clamp(0) 177 | target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) 178 | fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) 179 | target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) 180 | 181 | return target_labels, target_bboxes, target_scores 182 | -------------------------------------------------------------------------------- /utils/segment/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/segment/augmentations.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | from ..augmentations import box_candidates 8 | from ..general import resample_segments, segment2box 9 | 10 | 11 | def mixup(im, labels, segments, im2, labels2, segments2): 12 | # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf 13 | r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 14 | im = (im * r + im2 * (1 - r)).astype(np.uint8) 15 | labels = np.concatenate((labels, labels2), 0) 16 | segments = np.concatenate((segments, segments2), 0) 17 | return im, labels, segments 18 | 19 | 20 | def random_perspective(im, 21 | targets=(), 22 | segments=(), 23 | degrees=10, 24 | translate=.1, 25 | scale=.1, 26 | shear=10, 27 | perspective=0.0, 28 | border=(0, 0)): 29 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 30 | # targets = [cls, xyxy] 31 | 32 | height = im.shape[0] + border[0] * 2 # shape(h,w,c) 33 | width = im.shape[1] + border[1] * 2 34 | 35 | # Center 36 | C = np.eye(3) 37 | C[0, 2] = -im.shape[1] / 2 # x translation (pixels) 38 | C[1, 2] = -im.shape[0] / 2 # y translation (pixels) 39 | 40 | # Perspective 41 | P = np.eye(3) 42 | P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) 43 | P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) 44 | 45 | # Rotation and Scale 46 | R = np.eye(3) 47 | a = random.uniform(-degrees, degrees) 48 | # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations 49 | s = random.uniform(1 - scale, 1 + scale) 50 | # s = 2 ** random.uniform(-scale, scale) 51 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) 52 | 53 | # Shear 54 | S = np.eye(3) 55 | S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) 56 | S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) 57 | 58 | # Translation 59 | T = np.eye(3) 60 | T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) 61 | T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) 62 | 63 | # Combined rotation matrix 64 | M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT 65 | if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed 66 | if perspective: 67 | im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) 68 | else: # affine 69 | im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) 70 | 71 | # Visualize 72 | # import matplotlib.pyplot as plt 73 | # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() 74 | # ax[0].imshow(im[:, :, ::-1]) # base 75 | # ax[1].imshow(im2[:, :, ::-1]) # warped 76 | 77 | # Transform label coordinates 78 | n = len(targets) 79 | new_segments = [] 80 | if n: 81 | new = np.zeros((n, 4)) 82 | segments = resample_segments(segments) # upsample 83 | for i, segment in enumerate(segments): 84 | xy = np.ones((len(segment), 3)) 85 | xy[:, :2] = segment 86 | xy = xy @ M.T # transform 87 | xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine 88 | 89 | # clip 90 | new[i] = segment2box(xy, width, height) 91 | new_segments.append(xy) 92 | 93 | # filter candidates 94 | i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) 95 | targets = targets[i] 96 | targets[:, 1:5] = new[i] 97 | new_segments = np.array(new_segments)[i] 98 | 99 | return im, targets, new_segments 100 | -------------------------------------------------------------------------------- /utils/segment/general.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def crop_mask(masks, boxes): 8 | """ 9 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 10 | Vectorized by Chong (thanks Chong). 11 | 12 | Args: 13 | - masks should be a size [h, w, n] tensor of masks 14 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 15 | """ 16 | 17 | n, h, w = masks.shape 18 | x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) 19 | r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 20 | c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 21 | 22 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 23 | 24 | 25 | def process_mask_upsample(protos, masks_in, bboxes, shape): 26 | """ 27 | Crop after upsample. 28 | proto_out: [mask_dim, mask_h, mask_w] 29 | out_masks: [n, mask_dim], n is number of masks after nms 30 | bboxes: [n, 4], n is number of masks after nms 31 | shape:input_image_size, (h, w) 32 | 33 | return: h, w, n 34 | """ 35 | 36 | c, mh, mw = protos.shape # CHW 37 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) 38 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 39 | masks = crop_mask(masks, bboxes) # CHW 40 | return masks.gt_(0.5) 41 | 42 | 43 | def process_mask(protos, masks_in, bboxes, shape, upsample=False): 44 | """ 45 | Crop before upsample. 46 | proto_out: [mask_dim, mask_h, mask_w] 47 | out_masks: [n, mask_dim], n is number of masks after nms 48 | bboxes: [n, 4], n is number of masks after nms 49 | shape:input_image_size, (h, w) 50 | 51 | return: h, w, n 52 | """ 53 | 54 | c, mh, mw = protos.shape # CHW 55 | ih, iw = shape 56 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW 57 | 58 | downsampled_bboxes = bboxes.clone() 59 | downsampled_bboxes[:, 0] *= mw / iw 60 | downsampled_bboxes[:, 2] *= mw / iw 61 | downsampled_bboxes[:, 3] *= mh / ih 62 | downsampled_bboxes[:, 1] *= mh / ih 63 | 64 | masks = crop_mask(masks, downsampled_bboxes) # CHW 65 | if upsample: 66 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 67 | return masks.gt_(0.5) 68 | 69 | 70 | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): 71 | """ 72 | img1_shape: model input shape, [h, w] 73 | img0_shape: origin pic shape, [h, w, 3] 74 | masks: [h, w, num] 75 | """ 76 | # Rescale coordinates (xyxy) from im1_shape to im0_shape 77 | if ratio_pad is None: # calculate from im0_shape 78 | gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new 79 | pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding 80 | else: 81 | pad = ratio_pad[1] 82 | top, left = int(pad[1]), int(pad[0]) # y, x 83 | bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) 84 | 85 | if len(masks.shape) < 2: 86 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 87 | masks = masks[top:bottom, left:right] 88 | # masks = masks.permute(2, 0, 1).contiguous() 89 | # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] 90 | # masks = masks.permute(1, 2, 0).contiguous() 91 | masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) 92 | 93 | if len(masks.shape) == 2: 94 | masks = masks[:, :, None] 95 | return masks 96 | 97 | 98 | def mask_iou(mask1, mask2, eps=1e-7): 99 | """ 100 | mask1: [N, n] m1 means number of predicted objects 101 | mask2: [M, n] m2 means number of gt objects 102 | Note: n means image_w x image_h 103 | 104 | return: masks iou, [N, M] 105 | """ 106 | intersection = torch.matmul(mask1, mask2.t()).clamp(0) 107 | union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection 108 | return intersection / (union + eps) 109 | 110 | 111 | def masks_iou(mask1, mask2, eps=1e-7): 112 | """ 113 | mask1: [N, n] m1 means number of predicted objects 114 | mask2: [N, n] m2 means number of gt objects 115 | Note: n means image_w x image_h 116 | 117 | return: masks iou, (N, ) 118 | """ 119 | intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) 120 | union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection 121 | return intersection / (union + eps) 122 | 123 | 124 | def masks2segments(masks, strategy='largest'): 125 | # Convert masks(n,160,160) into segments(n,xy) 126 | segments = [] 127 | for x in masks.int().cpu().numpy().astype('uint8'): 128 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 129 | if c: 130 | if strategy == 'concat': # concatenate all segments 131 | c = np.concatenate([x.reshape(-1, 2) for x in c]) 132 | elif strategy == 'largest': # select largest segment 133 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 134 | else: 135 | c = np.zeros((0, 2)) # no segments found 136 | segments.append(c.astype('float32')) 137 | return segments 138 | -------------------------------------------------------------------------------- /utils/segment/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..metrics import ap_per_class 4 | 5 | 6 | def fitness(x): 7 | # Model fitness as a weighted combination of metrics 8 | w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] 9 | return (x[:, :8] * w).sum(1) 10 | 11 | 12 | def ap_per_class_box_and_mask( 13 | tp_m, 14 | tp_b, 15 | conf, 16 | pred_cls, 17 | target_cls, 18 | plot=False, 19 | save_dir=".", 20 | names=(), 21 | ): 22 | """ 23 | Args: 24 | tp_b: tp of boxes. 25 | tp_m: tp of masks. 26 | other arguments see `func: ap_per_class`. 27 | """ 28 | results_boxes = ap_per_class(tp_b, 29 | conf, 30 | pred_cls, 31 | target_cls, 32 | plot=plot, 33 | save_dir=save_dir, 34 | names=names, 35 | prefix="Box")[2:] 36 | results_masks = ap_per_class(tp_m, 37 | conf, 38 | pred_cls, 39 | target_cls, 40 | plot=plot, 41 | save_dir=save_dir, 42 | names=names, 43 | prefix="Mask")[2:] 44 | 45 | results = { 46 | "boxes": { 47 | "p": results_boxes[0], 48 | "r": results_boxes[1], 49 | "ap": results_boxes[3], 50 | "f1": results_boxes[2], 51 | "ap_class": results_boxes[4]}, 52 | "masks": { 53 | "p": results_masks[0], 54 | "r": results_masks[1], 55 | "ap": results_masks[3], 56 | "f1": results_masks[2], 57 | "ap_class": results_masks[4]}} 58 | return results 59 | 60 | 61 | class Metric: 62 | 63 | def __init__(self) -> None: 64 | self.p = [] # (nc, ) 65 | self.r = [] # (nc, ) 66 | self.f1 = [] # (nc, ) 67 | self.all_ap = [] # (nc, 10) 68 | self.ap_class_index = [] # (nc, ) 69 | 70 | @property 71 | def ap50(self): 72 | """AP@0.5 of all classes. 73 | Return: 74 | (nc, ) or []. 75 | """ 76 | return self.all_ap[:, 0] if len(self.all_ap) else [] 77 | 78 | @property 79 | def ap(self): 80 | """AP@0.5:0.95 81 | Return: 82 | (nc, ) or []. 83 | """ 84 | return self.all_ap.mean(1) if len(self.all_ap) else [] 85 | 86 | @property 87 | def mp(self): 88 | """mean precision of all classes. 89 | Return: 90 | float. 91 | """ 92 | return self.p.mean() if len(self.p) else 0.0 93 | 94 | @property 95 | def mr(self): 96 | """mean recall of all classes. 97 | Return: 98 | float. 99 | """ 100 | return self.r.mean() if len(self.r) else 0.0 101 | 102 | @property 103 | def map50(self): 104 | """Mean AP@0.5 of all classes. 105 | Return: 106 | float. 107 | """ 108 | return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 109 | 110 | @property 111 | def map(self): 112 | """Mean AP@0.5:0.95 of all classes. 113 | Return: 114 | float. 115 | """ 116 | return self.all_ap.mean() if len(self.all_ap) else 0.0 117 | 118 | def mean_results(self): 119 | """Mean of results, return mp, mr, map50, map""" 120 | return (self.mp, self.mr, self.map50, self.map) 121 | 122 | def class_result(self, i): 123 | """class-aware result, return p[i], r[i], ap50[i], ap[i]""" 124 | return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) 125 | 126 | def get_maps(self, nc): 127 | maps = np.zeros(nc) + self.map 128 | for i, c in enumerate(self.ap_class_index): 129 | maps[c] = self.ap[i] 130 | return maps 131 | 132 | def update(self, results): 133 | """ 134 | Args: 135 | results: tuple(p, r, ap, f1, ap_class) 136 | """ 137 | p, r, all_ap, f1, ap_class_index = results 138 | self.p = p 139 | self.r = r 140 | self.all_ap = all_ap 141 | self.f1 = f1 142 | self.ap_class_index = ap_class_index 143 | 144 | 145 | class Metrics: 146 | """Metric for boxes and masks.""" 147 | 148 | def __init__(self) -> None: 149 | self.metric_box = Metric() 150 | self.metric_mask = Metric() 151 | 152 | def update(self, results): 153 | """ 154 | Args: 155 | results: Dict{'boxes': Dict{}, 'masks': Dict{}} 156 | """ 157 | self.metric_box.update(list(results["boxes"].values())) 158 | self.metric_mask.update(list(results["masks"].values())) 159 | 160 | def mean_results(self): 161 | return self.metric_box.mean_results() + self.metric_mask.mean_results() 162 | 163 | def class_result(self, i): 164 | return self.metric_box.class_result(i) + self.metric_mask.class_result(i) 165 | 166 | def get_maps(self, nc): 167 | return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) 168 | 169 | @property 170 | def ap_class_index(self): 171 | # boxes and masks have the same ap_class_index 172 | return self.metric_box.ap_class_index 173 | 174 | 175 | KEYS = [ 176 | "train/box_loss", 177 | "train/seg_loss", # train loss 178 | "train/obj_loss", 179 | "train/cls_loss", 180 | "metrics/precision(B)", 181 | "metrics/recall(B)", 182 | "metrics/mAP_0.5(B)", 183 | "metrics/mAP_0.5:0.95(B)", # metrics 184 | "metrics/precision(M)", 185 | "metrics/recall(M)", 186 | "metrics/mAP_0.5(M)", 187 | "metrics/mAP_0.5:0.95(M)", # metrics 188 | "val/box_loss", 189 | "val/seg_loss", # val loss 190 | "val/obj_loss", 191 | "val/cls_loss", 192 | "x/lr0", 193 | "x/lr1", 194 | "x/lr2",] 195 | 196 | BEST_KEYS = [ 197 | "best/epoch", 198 | "best/precision(B)", 199 | "best/recall(B)", 200 | "best/mAP_0.5(B)", 201 | "best/mAP_0.5:0.95(B)", 202 | "best/precision(M)", 203 | "best/recall(M)", 204 | "best/mAP_0.5(M)", 205 | "best/mAP_0.5:0.95(M)",] 206 | -------------------------------------------------------------------------------- /utils/segment/plots.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import math 3 | from pathlib import Path 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | import torch 10 | 11 | from .. import threaded 12 | from ..general import xywh2xyxy 13 | from ..plots import Annotator, colors 14 | 15 | 16 | @threaded 17 | def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): 18 | # Plot image grid with labels 19 | if isinstance(images, torch.Tensor): 20 | images = images.cpu().float().numpy() 21 | if isinstance(targets, torch.Tensor): 22 | targets = targets.cpu().numpy() 23 | if isinstance(masks, torch.Tensor): 24 | masks = masks.cpu().numpy().astype(int) 25 | 26 | max_size = 1920 # max image size 27 | max_subplots = 16 # max image subplots, i.e. 4x4 28 | bs, _, h, w = images.shape # batch size, _, height, width 29 | bs = min(bs, max_subplots) # limit plot images 30 | ns = np.ceil(bs ** 0.5) # number of subplots (square) 31 | if np.max(images[0]) <= 1: 32 | images *= 255 # de-normalise (optional) 33 | 34 | # Build Image 35 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init 36 | for i, im in enumerate(images): 37 | if i == max_subplots: # if last batch has fewer images than we expect 38 | break 39 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 40 | im = im.transpose(1, 2, 0) 41 | mosaic[y:y + h, x:x + w, :] = im 42 | 43 | # Resize (optional) 44 | scale = max_size / ns / max(h, w) 45 | if scale < 1: 46 | h = math.ceil(scale * h) 47 | w = math.ceil(scale * w) 48 | mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) 49 | 50 | # Annotate 51 | fs = int((h + w) * ns * 0.01) # font size 52 | annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) 53 | for i in range(i + 1): 54 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 55 | annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders 56 | if paths: 57 | annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames 58 | if len(targets) > 0: 59 | idx = targets[:, 0] == i 60 | ti = targets[idx] # image targets 61 | 62 | boxes = xywh2xyxy(ti[:, 2:6]).T 63 | classes = ti[:, 1].astype('int') 64 | labels = ti.shape[1] == 6 # labels if no conf column 65 | conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) 66 | 67 | if boxes.shape[1]: 68 | if boxes.max() <= 1.01: # if normalized with tolerance 0.01 69 | boxes[[0, 2]] *= w # scale to pixels 70 | boxes[[1, 3]] *= h 71 | elif scale < 1: # absolute coords need scale if image scales 72 | boxes *= scale 73 | boxes[[0, 2]] += x 74 | boxes[[1, 3]] += y 75 | for j, box in enumerate(boxes.T.tolist()): 76 | cls = classes[j] 77 | color = colors(cls) 78 | cls = names[cls] if names else cls 79 | if labels or conf[j] > 0.25: # 0.25 conf thresh 80 | label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' 81 | annotator.box_label(box, label, color=color) 82 | 83 | # Plot masks 84 | if len(masks): 85 | if masks.max() > 1.0: # mean that masks are overlap 86 | image_masks = masks[[i]] # (1, 640, 640) 87 | nl = len(ti) 88 | index = np.arange(nl).reshape(nl, 1, 1) + 1 89 | image_masks = np.repeat(image_masks, nl, axis=0) 90 | image_masks = np.where(image_masks == index, 1.0, 0.0) 91 | else: 92 | image_masks = masks[idx] 93 | 94 | im = np.asarray(annotator.im).copy() 95 | for j, box in enumerate(boxes.T.tolist()): 96 | if labels or conf[j] > 0.25: # 0.25 conf thresh 97 | color = colors(classes[j]) 98 | mh, mw = image_masks[j].shape 99 | if mh != h or mw != w: 100 | mask = image_masks[j].astype(np.uint8) 101 | mask = cv2.resize(mask, (w, h)) 102 | mask = mask.astype(bool) 103 | else: 104 | mask = image_masks[j].astype(bool) 105 | with contextlib.suppress(Exception): 106 | im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 107 | annotator.fromarray(im) 108 | annotator.im.save(fname) # save 109 | 110 | 111 | def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): 112 | # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') 113 | save_dir = Path(file).parent if file else Path(dir) 114 | fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) 115 | ax = ax.ravel() 116 | files = list(save_dir.glob("results*.csv")) 117 | assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." 118 | for f in files: 119 | try: 120 | data = pd.read_csv(f) 121 | index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 122 | 0.1 * data.values[:, 11]) 123 | s = [x.strip() for x in data.columns] 124 | x = data.values[:, 0] 125 | for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): 126 | y = data.values[:, j] 127 | # y[y == 0] = np.nan # don't show zero values 128 | ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) 129 | if best: 130 | # best 131 | ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) 132 | ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") 133 | else: 134 | # last 135 | ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) 136 | ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") 137 | # if j in [8, 9, 10]: # share train and val loss y axes 138 | # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) 139 | except Exception as e: 140 | print(f"Warning: Plotting error for {f}: {e}") 141 | ax[1].legend() 142 | fig.savefig(save_dir / "results.png", dpi=200) 143 | plt.close() 144 | -------------------------------------------------------------------------------- /utils/segment/tal/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/segment/tal/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from utils.general import check_version 4 | 5 | TORCH_1_10 = check_version(torch.__version__, '1.10.0') 6 | 7 | 8 | def make_anchors(feats, strides, grid_cell_offset=0.5): 9 | """Generate anchors from features.""" 10 | anchor_points, stride_tensor = [], [] 11 | assert feats is not None 12 | dtype, device = feats[0].dtype, feats[0].device 13 | for i, stride in enumerate(strides): 14 | _, _, h, w = feats[i].shape 15 | sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x 16 | sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y 17 | sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) 18 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 19 | stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) 20 | return torch.cat(anchor_points), torch.cat(stride_tensor) 21 | 22 | 23 | def dist2bbox(distance, anchor_points, xywh=True, dim=-1): 24 | """Transform distance(ltrb) to box(xywh or xyxy).""" 25 | lt, rb = torch.split(distance, 2, dim) 26 | x1y1 = anchor_points - lt 27 | x2y2 = anchor_points + rb 28 | if xywh: 29 | c_xy = (x1y1 + x2y2) / 2 30 | wh = x2y2 - x1y1 31 | return torch.cat((c_xy, wh), dim) # xywh bbox 32 | return torch.cat((x1y1, x2y2), dim) # xyxy bbox 33 | 34 | 35 | def bbox2dist(anchor_points, bbox, reg_max): 36 | """Transform bbox(xyxy) to dist(ltrb).""" 37 | x1y1, x2y2 = torch.split(bbox, 2, -1) 38 | return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) 39 | -------------------------------------------------------------------------------- /utils/segment/tal/assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from utils.metrics import bbox_iou 6 | 7 | 8 | def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): 9 | """select the positive anchor center in gt 10 | 11 | Args: 12 | xy_centers (Tensor): shape(h*w, 4) 13 | gt_bboxes (Tensor): shape(b, n_boxes, 4) 14 | Return: 15 | (Tensor): shape(b, n_boxes, h*w) 16 | """ 17 | n_anchors = xy_centers.shape[0] 18 | bs, n_boxes, _ = gt_bboxes.shape 19 | lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom 20 | bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) 21 | # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) 22 | return bbox_deltas.amin(3).gt_(eps) 23 | 24 | 25 | def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): 26 | """if an anchor box is assigned to multiple gts, 27 | the one with the highest iou will be selected. 28 | 29 | Args: 30 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 31 | overlaps (Tensor): shape(b, n_max_boxes, h*w) 32 | Return: 33 | target_gt_idx (Tensor): shape(b, h*w) 34 | fg_mask (Tensor): shape(b, h*w) 35 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 36 | """ 37 | # (b, n_max_boxes, h*w) -> (b, h*w) 38 | fg_mask = mask_pos.sum(-2) 39 | if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes 40 | mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) 41 | max_overlaps_idx = overlaps.argmax(1) # (b, h*w) 42 | is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) 43 | is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) 44 | mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) 45 | fg_mask = mask_pos.sum(-2) 46 | # find each grid serve which gt(index) 47 | target_gt_idx = mask_pos.argmax(-2) # (b, h*w) 48 | return target_gt_idx, fg_mask, mask_pos 49 | 50 | 51 | class TaskAlignedAssigner(nn.Module): 52 | def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): 53 | super().__init__() 54 | self.topk = topk 55 | self.num_classes = num_classes 56 | self.bg_idx = num_classes 57 | self.alpha = alpha 58 | self.beta = beta 59 | self.eps = eps 60 | 61 | @torch.no_grad() 62 | def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): 63 | """This code referenced to 64 | https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py 65 | 66 | Args: 67 | pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) 68 | pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) 69 | anc_points (Tensor): shape(num_total_anchors, 2) 70 | gt_labels (Tensor): shape(bs, n_max_boxes, 1) 71 | gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) 72 | mask_gt (Tensor): shape(bs, n_max_boxes, 1) 73 | Returns: 74 | target_labels (Tensor): shape(bs, num_total_anchors) 75 | target_bboxes (Tensor): shape(bs, num_total_anchors, 4) 76 | target_scores (Tensor): shape(bs, num_total_anchors, num_classes) 77 | fg_mask (Tensor): shape(bs, num_total_anchors) 78 | """ 79 | self.bs = pd_scores.size(0) 80 | self.n_max_boxes = gt_bboxes.size(1) 81 | 82 | if self.n_max_boxes == 0: 83 | device = gt_bboxes.device 84 | return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), 85 | torch.zeros_like(pd_bboxes).to(device), 86 | torch.zeros_like(pd_scores).to(device), 87 | torch.zeros_like(pd_scores[..., 0]).to(device), 88 | torch.zeros_like(pd_scores[..., 0]).to(device)) 89 | 90 | mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, 91 | mask_gt) 92 | 93 | target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) 94 | 95 | # assigned target 96 | target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) 97 | 98 | # normalize 99 | align_metric *= mask_pos 100 | pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj 101 | pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj 102 | norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) 103 | target_scores = target_scores * norm_align_metric 104 | 105 | return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx 106 | 107 | def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): 108 | 109 | # get anchor_align metric, (b, max_num_obj, h*w) 110 | align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) 111 | # get in_gts mask, (b, max_num_obj, h*w) 112 | mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) 113 | # get topk_metric mask, (b, max_num_obj, h*w) 114 | mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, 115 | topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) 116 | # merge all mask to a final mask, (b, max_num_obj, h*w) 117 | mask_pos = mask_topk * mask_in_gts * mask_gt 118 | 119 | return mask_pos, align_metric, overlaps 120 | 121 | def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): 122 | 123 | gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 124 | ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj 125 | ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj 126 | ind[1] = gt_labels.squeeze(-1) # b, max_num_obj 127 | # get the scores of each grid for each gt cls 128 | bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w 129 | 130 | overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) 131 | align_metric = bbox_scores.pow(self.alpha) * (overlaps).pow(self.beta) 132 | return align_metric, overlaps 133 | 134 | def select_topk_candidates(self, metrics, largest=True, topk_mask=None): 135 | """ 136 | Args: 137 | metrics: (b, max_num_obj, h*w). 138 | topk_mask: (b, max_num_obj, topk) or None 139 | """ 140 | 141 | num_anchors = metrics.shape[-1] # h*w 142 | # (b, max_num_obj, topk) 143 | topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) 144 | if topk_mask is None: 145 | topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) 146 | # (b, max_num_obj, topk) 147 | topk_idxs = torch.where(topk_mask, topk_idxs, 0) 148 | # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) 149 | is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) 150 | # filter invalid bboxes 151 | # assigned topk should be unique, this is for dealing with empty labels 152 | # since empty labels will generate index `0` through `F.one_hot` 153 | # NOTE: but what if the topk_idxs include `0`? 154 | is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) 155 | return is_in_topk.to(metrics.dtype) 156 | 157 | def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): 158 | """ 159 | Args: 160 | gt_labels: (b, max_num_obj, 1) 161 | gt_bboxes: (b, max_num_obj, 4) 162 | target_gt_idx: (b, h*w) 163 | fg_mask: (b, h*w) 164 | """ 165 | 166 | # assigned target labels, (b, 1) 167 | batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] 168 | target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) 169 | target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) 170 | 171 | # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) 172 | target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] 173 | 174 | # assigned target scores 175 | target_labels.clamp(0) 176 | target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) 177 | fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) 178 | target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) 179 | 180 | return target_labels, target_bboxes, target_scores 181 | -------------------------------------------------------------------------------- /utils/tal/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/tal/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from utils.general import check_version 4 | 5 | TORCH_1_10 = check_version(torch.__version__, '1.10.0') 6 | 7 | 8 | def make_anchors(feats, strides, grid_cell_offset=0.5): 9 | """Generate anchors from features.""" 10 | anchor_points, stride_tensor = [], [] 11 | assert feats is not None 12 | dtype, device = feats[0].dtype, feats[0].device 13 | for i, stride in enumerate(strides): 14 | _, _, h, w = feats[i].shape 15 | sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x 16 | sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y 17 | sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) 18 | anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) 19 | stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) 20 | return torch.cat(anchor_points), torch.cat(stride_tensor) 21 | 22 | 23 | def dist2bbox(distance, anchor_points, xywh=True, dim=-1): 24 | """Transform distance(ltrb) to box(xywh or xyxy).""" 25 | lt, rb = torch.split(distance, 2, dim) 26 | x1y1 = anchor_points - lt 27 | x2y2 = anchor_points + rb 28 | if xywh: 29 | c_xy = (x1y1 + x2y2) / 2 30 | wh = x2y2 - x1y1 31 | return torch.cat((c_xy, wh), dim) # xywh bbox 32 | return torch.cat((x1y1, x2y2), dim) # xyxy bbox 33 | 34 | 35 | def bbox2dist(anchor_points, bbox, reg_max): 36 | """Transform bbox(xyxy) to dist(ltrb).""" 37 | x1y1, x2y2 = torch.split(bbox, 2, -1) 38 | return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) 39 | -------------------------------------------------------------------------------- /utils/tal/assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from utils.metrics import bbox_iou 6 | 7 | 8 | def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): 9 | """select the positive anchor center in gt 10 | 11 | Args: 12 | xy_centers (Tensor): shape(h*w, 4) 13 | gt_bboxes (Tensor): shape(b, n_boxes, 4) 14 | Return: 15 | (Tensor): shape(b, n_boxes, h*w) 16 | """ 17 | n_anchors = xy_centers.shape[0] 18 | bs, n_boxes, _ = gt_bboxes.shape 19 | lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom 20 | bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) 21 | # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) 22 | return bbox_deltas.amin(3).gt_(eps) 23 | 24 | 25 | def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): 26 | """if an anchor box is assigned to multiple gts, 27 | the one with the highest iou will be selected. 28 | 29 | Args: 30 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 31 | overlaps (Tensor): shape(b, n_max_boxes, h*w) 32 | Return: 33 | target_gt_idx (Tensor): shape(b, h*w) 34 | fg_mask (Tensor): shape(b, h*w) 35 | mask_pos (Tensor): shape(b, n_max_boxes, h*w) 36 | """ 37 | # (b, n_max_boxes, h*w) -> (b, h*w) 38 | fg_mask = mask_pos.sum(-2) 39 | if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes 40 | mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) 41 | max_overlaps_idx = overlaps.argmax(1) # (b, h*w) 42 | is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) 43 | is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) 44 | mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) 45 | fg_mask = mask_pos.sum(-2) 46 | # find each grid serve which gt(index) 47 | target_gt_idx = mask_pos.argmax(-2) # (b, h*w) 48 | return target_gt_idx, fg_mask, mask_pos 49 | 50 | 51 | class TaskAlignedAssigner(nn.Module): 52 | def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): 53 | super().__init__() 54 | self.topk = topk 55 | self.num_classes = num_classes 56 | self.bg_idx = num_classes 57 | self.alpha = alpha 58 | self.beta = beta 59 | self.eps = eps 60 | 61 | @torch.no_grad() 62 | def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): 63 | """This code referenced to 64 | https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py 65 | 66 | Args: 67 | pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) 68 | pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) 69 | anc_points (Tensor): shape(num_total_anchors, 2) 70 | gt_labels (Tensor): shape(bs, n_max_boxes, 1) 71 | gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) 72 | mask_gt (Tensor): shape(bs, n_max_boxes, 1) 73 | Returns: 74 | target_labels (Tensor): shape(bs, num_total_anchors) 75 | target_bboxes (Tensor): shape(bs, num_total_anchors, 4) 76 | target_scores (Tensor): shape(bs, num_total_anchors, num_classes) 77 | fg_mask (Tensor): shape(bs, num_total_anchors) 78 | """ 79 | self.bs = pd_scores.size(0) 80 | self.n_max_boxes = gt_bboxes.size(1) 81 | 82 | if self.n_max_boxes == 0: 83 | device = gt_bboxes.device 84 | return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), 85 | torch.zeros_like(pd_bboxes).to(device), 86 | torch.zeros_like(pd_scores).to(device), 87 | torch.zeros_like(pd_scores[..., 0]).to(device)) 88 | 89 | mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, 90 | mask_gt) 91 | 92 | target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) 93 | 94 | # assigned target 95 | target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) 96 | 97 | # normalize 98 | align_metric *= mask_pos 99 | pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj 100 | pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj 101 | norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) 102 | target_scores = target_scores * norm_align_metric 103 | 104 | return target_labels, target_bboxes, target_scores, fg_mask.bool() 105 | 106 | def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): 107 | 108 | # get anchor_align metric, (b, max_num_obj, h*w) 109 | align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) 110 | # get in_gts mask, (b, max_num_obj, h*w) 111 | mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) 112 | # get topk_metric mask, (b, max_num_obj, h*w) 113 | mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, 114 | topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) 115 | # merge all mask to a final mask, (b, max_num_obj, h*w) 116 | mask_pos = mask_topk * mask_in_gts * mask_gt 117 | 118 | return mask_pos, align_metric, overlaps 119 | 120 | def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): 121 | 122 | gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 123 | ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj 124 | ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj 125 | ind[1] = gt_labels.squeeze(-1) # b, max_num_obj 126 | # get the scores of each grid for each gt cls 127 | bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w 128 | 129 | overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) 130 | align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) 131 | return align_metric, overlaps 132 | 133 | def select_topk_candidates(self, metrics, largest=True, topk_mask=None): 134 | """ 135 | Args: 136 | metrics: (b, max_num_obj, h*w). 137 | topk_mask: (b, max_num_obj, topk) or None 138 | """ 139 | 140 | num_anchors = metrics.shape[-1] # h*w 141 | # (b, max_num_obj, topk) 142 | topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) 143 | if topk_mask is None: 144 | topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) 145 | # (b, max_num_obj, topk) 146 | topk_idxs = torch.where(topk_mask, topk_idxs, 0) 147 | # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) 148 | is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) 149 | # filter invalid bboxes 150 | # assigned topk should be unique, this is for dealing with empty labels 151 | # since empty labels will generate index `0` through `F.one_hot` 152 | # NOTE: but what if the topk_idxs include `0`? 153 | is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) 154 | return is_in_topk.to(metrics.dtype) 155 | 156 | def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): 157 | """ 158 | Args: 159 | gt_labels: (b, max_num_obj, 1) 160 | gt_bboxes: (b, max_num_obj, 4) 161 | target_gt_idx: (b, h*w) 162 | fg_mask: (b, h*w) 163 | """ 164 | 165 | # assigned target labels, (b, 1) 166 | batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] 167 | target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) 168 | target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) 169 | 170 | # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) 171 | target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] 172 | 173 | # assigned target scores 174 | target_labels.clamp(0) 175 | target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) 176 | fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) 177 | target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) 178 | 179 | return target_labels, target_bboxes, target_scores 180 | -------------------------------------------------------------------------------- /utils/triton.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from urllib.parse import urlparse 3 | 4 | import torch 5 | 6 | 7 | class TritonRemoteModel: 8 | """ A wrapper over a model served by the Triton Inference Server. It can 9 | be configured to communicate over GRPC or HTTP. It accepts Torch Tensors 10 | as input and returns them as outputs. 11 | """ 12 | 13 | def __init__(self, url: str): 14 | """ 15 | Keyword arguments: 16 | url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 17 | """ 18 | 19 | parsed_url = urlparse(url) 20 | if parsed_url.scheme == "grpc": 21 | from tritonclient.grpc import InferenceServerClient, InferInput 22 | 23 | self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client 24 | model_repository = self.client.get_model_repository_index() 25 | self.model_name = model_repository.models[0].name 26 | self.metadata = self.client.get_model_metadata(self.model_name, as_json=True) 27 | 28 | def create_input_placeholders() -> typing.List[InferInput]: 29 | return [ 30 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 31 | 32 | else: 33 | from tritonclient.http import InferenceServerClient, InferInput 34 | 35 | self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client 36 | model_repository = self.client.get_model_repository_index() 37 | self.model_name = model_repository[0]['name'] 38 | self.metadata = self.client.get_model_metadata(self.model_name) 39 | 40 | def create_input_placeholders() -> typing.List[InferInput]: 41 | return [ 42 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 43 | 44 | self._create_input_placeholders_fn = create_input_placeholders 45 | 46 | @property 47 | def runtime(self): 48 | """Returns the model runtime""" 49 | return self.metadata.get("backend", self.metadata.get("platform")) 50 | 51 | def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]: 52 | """ Invokes the model. Parameters can be provided via args or kwargs. 53 | args, if provided, are assumed to match the order of inputs of the model. 54 | kwargs are matched with the model input names. 55 | """ 56 | inputs = self._create_inputs(*args, **kwargs) 57 | response = self.client.infer(model_name=self.model_name, inputs=inputs) 58 | result = [] 59 | for output in self.metadata['outputs']: 60 | tensor = torch.as_tensor(response.as_numpy(output['name'])) 61 | result.append(tensor) 62 | return result[0] if len(result) == 1 else result 63 | 64 | def _create_inputs(self, *args, **kwargs): 65 | args_len, kwargs_len = len(args), len(kwargs) 66 | if not args_len and not kwargs_len: 67 | raise RuntimeError("No inputs provided.") 68 | if args_len and kwargs_len: 69 | raise RuntimeError("Cannot specify args and kwargs at the same time") 70 | 71 | placeholders = self._create_input_placeholders_fn() 72 | if args_len: 73 | if args_len != len(placeholders): 74 | raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.") 75 | for input, value in zip(placeholders, args): 76 | input.set_data_from_numpy(value.cpu().numpy()) 77 | else: 78 | for input in placeholders: 79 | value = kwargs[input.name] 80 | input.set_data_from_numpy(value.cpu().numpy()) 81 | return placeholders 82 | --------------------------------------------------------------------------------