├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── README.md ├── detect_plate.py ├── detect_rec_plate.py ├── export_onnx.py ├── fonts ├── cv_puttext.py └── platech.ttf ├── imgs ├── Quicker_20220930_180919.png ├── Quicker_20220930_180938.png ├── double_yellow.jpg ├── hongkang1.jpg ├── police.jpg ├── shi_lin_guan.jpg ├── single_blue.jpg ├── single_green.jpg ├── single_yellow.jpg ├── tmpA5E3.png └── xue.jpg ├── plate_recognition ├── double_plate_split_merge.py ├── plateNet.py └── plate_rec.py ├── readme ├── 105384078.png └── README.md ├── tests ├── conftest.py ├── test_cli.py ├── test_cuda.py ├── test_engine.py ├── test_explorer.py ├── test_integrations.py └── test_python.py ├── train.py ├── train.sh ├── ultralytics ├── __init__.py ├── assets │ ├── bus.jpg │ └── zidane.jpg ├── cfg │ ├── __init__.py │ ├── datasets │ │ ├── Argoverse.yaml │ │ ├── DOTAv1.5.yaml │ │ ├── DOTAv1.yaml │ │ ├── GlobalWheat2020.yaml │ │ ├── ImageNet.yaml │ │ ├── Objects365.yaml │ │ ├── SKU-110K.yaml │ │ ├── VOC.yaml │ │ ├── VisDrone.yaml │ │ ├── coco-pose.yaml │ │ ├── coco.yaml │ │ ├── coco128-seg.yaml │ │ ├── coco128.yaml │ │ ├── coco8-pose.yaml │ │ ├── coco8-seg.yaml │ │ ├── coco8.yaml │ │ ├── dota8.yaml │ │ ├── open-images-v7.yaml │ │ ├── plate.yaml │ │ ├── tiger-pose.yaml │ │ └── xView.yaml │ ├── default.yaml │ ├── models │ │ ├── README.md │ │ ├── rt-detr │ │ │ ├── rtdetr-l.yaml │ │ │ ├── rtdetr-resnet101.yaml │ │ │ ├── rtdetr-resnet50.yaml │ │ │ └── rtdetr-x.yaml │ │ ├── v3 │ │ │ ├── yolov3-spp.yaml │ │ │ ├── yolov3-tiny.yaml │ │ │ └── yolov3.yaml │ │ ├── v5 │ │ │ ├── yolov5-p6.yaml │ │ │ └── yolov5.yaml │ │ ├── v6 │ │ │ └── yolov6.yaml │ │ └── v8 │ │ │ ├── yolov8-cls.yaml │ │ │ ├── yolov8-ghost-p2.yaml │ │ │ ├── yolov8-ghost-p6.yaml │ │ │ ├── yolov8-ghost.yaml │ │ │ ├── yolov8-obb.yaml │ │ │ ├── yolov8-p2.yaml │ │ │ ├── yolov8-p6.yaml │ │ │ ├── yolov8-pose-p6.yaml │ │ │ ├── yolov8-pose.yaml │ │ │ ├── yolov8-rtdetr.yaml │ │ │ ├── yolov8-seg-p6.yaml │ │ │ ├── yolov8-seg.yaml │ │ │ └── yolov8.yaml │ └── trackers │ │ ├── botsort.yaml │ │ └── bytetrack.yaml ├── data │ ├── __init__.py │ ├── annotator.py │ ├── augment.py │ ├── base.py │ ├── build.py │ ├── converter.py │ ├── dataset.py │ ├── explorer │ │ ├── __init__.py │ │ ├── explorer.py │ │ ├── gui │ │ │ ├── __init__.py │ │ │ └── dash.py │ │ └── utils.py │ ├── loaders.py │ ├── scripts │ │ ├── download_weights.sh │ │ ├── get_coco.sh │ │ ├── get_coco128.sh │ │ └── get_imagenet.sh │ ├── split_dota.py │ └── utils.py ├── engine │ ├── __init__.py │ ├── exporter.py │ ├── model.py │ ├── predictor.py │ ├── results.py │ ├── trainer.py │ ├── tuner.py │ └── validator.py ├── hub │ ├── __init__.py │ ├── auth.py │ ├── session.py │ └── utils.py ├── models │ ├── __init__.py │ ├── fastsam │ │ ├── __init__.py │ │ ├── model.py │ │ ├── predict.py │ │ ├── prompt.py │ │ ├── utils.py │ │ └── val.py │ ├── nas │ │ ├── __init__.py │ │ ├── model.py │ │ ├── predict.py │ │ └── val.py │ ├── rtdetr │ │ ├── __init__.py │ │ ├── model.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py │ ├── sam │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── build.py │ │ ├── model.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── decoders.py │ │ │ ├── encoders.py │ │ │ ├── sam.py │ │ │ ├── tiny_encoder.py │ │ │ └── transformer.py │ │ └── predict.py │ ├── utils │ │ ├── __init__.py │ │ ├── loss.py │ │ └── ops.py │ └── yolo │ │ ├── __init__.py │ │ ├── classify │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py │ │ ├── detect │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py │ │ ├── model.py │ │ ├── obb │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py │ │ ├── pose │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py │ │ └── segment │ │ ├── __init__.py │ │ ├── predict.py │ │ ├── train.py │ │ └── val.py ├── nn │ ├── __init__.py │ ├── autobackend.py │ ├── modules │ │ ├── __init__.py │ │ ├── block.py │ │ ├── conv.py │ │ ├── head.py │ │ ├── transformer.py │ │ └── utils.py │ └── tasks.py ├── solutions │ ├── __init__.py │ ├── ai_gym.py │ ├── distance_calculation.py │ ├── heatmap.py │ ├── object_counter.py │ └── speed_estimation.py ├── trackers │ ├── README.md │ ├── __init__.py │ ├── basetrack.py │ ├── bot_sort.py │ ├── byte_tracker.py │ ├── track.py │ └── utils │ │ ├── __init__.py │ │ ├── gmc.py │ │ ├── kalman_filter.py │ │ └── matching.py └── utils │ ├── __init__.py │ ├── autobatch.py │ ├── benchmarks.py │ ├── callbacks │ ├── __init__.py │ ├── base.py │ ├── clearml.py │ ├── comet.py │ ├── dvc.py │ ├── hub.py │ ├── mlflow.py │ ├── neptune.py │ ├── raytune.py │ ├── tensorboard.py │ └── wb.py │ ├── checks.py │ ├── dist.py │ ├── downloads.py │ ├── errors.py │ ├── files.py │ ├── instance.py │ ├── loss.py │ ├── metrics.py │ ├── ops.py │ ├── patches.py │ ├── plotting.py │ ├── tal.py │ ├── torch_utils.py │ ├── triton.py │ └── tuner.py └── weights ├── plate_rec_color.pth └── yolov8s.pt /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore 2 | # 首先忽略所有的文件 3 | * 4 | # 但是不忽略目录 5 | !*/ 6 | # 忽略一些指定的目录名 7 | ut/ 8 | runs/ 9 | .vscode/ 10 | build/ 11 | result/ 12 | onnx/ 13 | *.pyc 14 | pretrained_model/ 15 | # 不忽略下面指定的文件类型 16 | !*.cpp 17 | !*.h 18 | !*.hpp 19 | !*.c 20 | !.gitignore 21 | !*.py 22 | !*.sh 23 | !*.npy 24 | !*.jpg 25 | !*.pt 26 | !*.npy 27 | !*.pth 28 | !*.png 29 | !*.md 30 | !*.txt 31 | !*.yaml 32 | !*.ttf 33 | !*.cu -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Pre-commit hooks. For more information see https://github.com/pre-commit/pre-commit-hooks/blob/main/README.md 3 | # Optionally remove from local hooks with 'rm .git/hooks/pre-commit' 4 | 5 | # Define bot property if installed via https://github.com/marketplace/pre-commit-ci 6 | ci: 7 | autofix_prs: true 8 | autoupdate_commit_msg: "[pre-commit.ci] pre-commit suggestions" 9 | autoupdate_schedule: monthly 10 | submodules: true 11 | 12 | # Exclude directories (optional) 13 | # exclude: 'docs/' 14 | 15 | # Define repos to run 16 | repos: 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: v4.5.0 19 | hooks: 20 | - id: end-of-file-fixer 21 | - id: trailing-whitespace 22 | - id: check-case-conflict 23 | # - id: check-yaml 24 | - id: check-docstring-first 25 | - id: detect-private-key 26 | 27 | - repo: https://github.com/asottile/pyupgrade 28 | rev: v3.15.0 29 | hooks: 30 | - id: pyupgrade 31 | name: Upgrade code 32 | 33 | - repo: https://github.com/astral-sh/ruff-pre-commit 34 | rev: v0.1.11 35 | hooks: 36 | - id: ruff 37 | args: [--fix] 38 | 39 | - repo: https://github.com/executablebooks/mdformat 40 | rev: 0.7.17 41 | hooks: 42 | - id: mdformat 43 | name: MD formatting 44 | additional_dependencies: 45 | - mdformat-gfm 46 | - mdformat-frontmatter 47 | - mdformat-mkdocs 48 | args: 49 | - --wrap=no 50 | - --number 51 | exclude: 'docs/.*\.md' 52 | # exclude: "README.md|README.zh-CN.md|CONTRIBUTING.md" 53 | 54 | - repo: https://github.com/codespell-project/codespell 55 | rev: v2.2.6 56 | hooks: 57 | - id: codespell 58 | exclude: "docs/de|docs/fr|docs/pt|docs/es|docs/mkdocs_de.yml" 59 | args: 60 | - --ignore-words-list=crate,nd,ned,strack,dota,ane,segway,fo,gool,winn,commend,bloc,nam,afterall 61 | 62 | - repo: https://github.com/hadialqattan/pycln 63 | rev: v2.4.0 64 | hooks: 65 | - id: pycln 66 | args: [--all] 67 | # 68 | # - repo: https://github.com/PyCQA/docformatter 69 | # rev: v1.7.5 70 | # hooks: 71 | # - id: docformatter 72 | 73 | # - repo: https://github.com/asottile/yesqa 74 | # rev: v1.4.0 75 | # hooks: 76 | # - id: yesqa 77 | 78 | # - repo: https://github.com/asottile/dead 79 | # rev: v1.5.0 80 | # hooks: 81 | # - id: dead 82 | 83 | # - repo: https://github.com/ultralytics/pre-commit 84 | # rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23 85 | # hooks: 86 | # - id: capitalize-comments 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## **yolov8车牌识别算法,支持12种中文车牌类型** 2 | 3 | #### **图片测试demo:** 4 | 5 | 直接运行detect_plate.py 或者运行如下命令行: 6 | 7 | ``` 8 | python detect_rec_plate.py --detect_model weights/yolov8s.pt --rec_model weights/plate_rec_color.pth --image_path imgs --output result 9 | ``` 10 | 11 | 测试文件夹imgs,结果保存再 result 文件夹中 12 | 13 | ## **车牌检测训练** 14 | 15 | 车牌检测训练链接如下: 16 | 17 | [车牌检测训练](https://github.com/we0091234/yolov8-plate/tree/master/readme) 18 | 19 | ## **车牌识别训练** 20 | 21 | 车牌识别训练链接如下: 22 | 23 | [车牌识别训练](https://github.com/we0091234/crnn_plate_recognition) 24 | 25 | #### **支持如下:** 26 | 27 | - [X] 1.单行蓝牌 28 | - [X] 2.单行黄牌 29 | - [X] 3.新能源车牌 30 | - [X] 4.白色警用车牌 31 | - [X] 5.教练车牌 32 | - [X] 6.武警车牌 33 | - [X] 7.双层黄牌 34 | - [X] 8.双层白牌 35 | - [X] 9.使馆车牌 36 | - [X] 10.港澳粤Z牌 37 | - [X] 11.双层绿牌 38 | - [X] 12.民航车牌 39 | 40 | ## References 41 | 42 | * [https://github.com/derronqi/yolov8-face](https://github.com/derronqi/yolov8-face) 43 | * [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) 44 | 45 | ## 联系 46 | 47 | **有问题可以提issues 或者加qq群:769809695(新群) 837982567(已满) 询问** 48 | -------------------------------------------------------------------------------- /detect_plate.py: -------------------------------------------------------------------------------- 1 | from ultralytics import YOLO 2 | 3 | # 加载预训练的YOLOv8n模型 4 | model = YOLO('runs/detect/train2/weights/best.pt') 5 | 6 | # 在'bus.jpg'上运行推理,并附加参数 7 | model.predict('/mnt/mydisk/xiaolei/code/plate/plate_detect/Chinese_license_plate_detection_recognition/imgs/double_yellow.jpg', save=True, imgsz=320, conf=0.5) -------------------------------------------------------------------------------- /export_onnx.py: -------------------------------------------------------------------------------- 1 | from ultralytics import YOLO 2 | 3 | # Load a model 4 | model = YOLO("yolov8n.yaml") # build a new model from scratch 5 | model = YOLO("runs/detect/train2/weights/best.pt") # load a pretrained model (recommended for training) 6 | 7 | # Use the model 8 | # model.train(data="coco128.yaml", epochs=3) # train the model 9 | # metrics = model.val() # evaluate model performance on the validation set 10 | # results = model("https://ultralytics.com/images/bus.jpg") # predict on an image 11 | path = model.export(format="onnx") # export the model to ONNX format -------------------------------------------------------------------------------- /fonts/cv_puttext.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image, ImageDraw, ImageFont 4 | 5 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): 6 | if (isinstance(img, np.ndarray)): #判断是否OpenCV图片类型 7 | img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 8 | draw = ImageDraw.Draw(img) 9 | fontText = ImageFont.truetype( 10 | "fonts/platech.ttf", textSize, encoding="utf-8") 11 | draw.text((left, top), text, textColor, font=fontText) 12 | return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) 13 | 14 | if __name__ == '__main__': 15 | imgPath = "result.jpg" 16 | img = cv2.imread(imgPath) 17 | 18 | saveImg = cv2ImgAddText(img, '中国加油!', 50, 100, (255, 0, 0), 50) 19 | 20 | # cv2.imshow('display',saveImg) 21 | cv2.imwrite('save.jpg',saveImg) 22 | # cv2.waitKey() -------------------------------------------------------------------------------- /fonts/platech.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/fonts/platech.ttf -------------------------------------------------------------------------------- /imgs/Quicker_20220930_180919.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/Quicker_20220930_180919.png -------------------------------------------------------------------------------- /imgs/Quicker_20220930_180938.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/Quicker_20220930_180938.png -------------------------------------------------------------------------------- /imgs/double_yellow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/double_yellow.jpg -------------------------------------------------------------------------------- /imgs/hongkang1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/hongkang1.jpg -------------------------------------------------------------------------------- /imgs/police.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/police.jpg -------------------------------------------------------------------------------- /imgs/shi_lin_guan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/shi_lin_guan.jpg -------------------------------------------------------------------------------- /imgs/single_blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_blue.jpg -------------------------------------------------------------------------------- /imgs/single_green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_green.jpg -------------------------------------------------------------------------------- /imgs/single_yellow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_yellow.jpg -------------------------------------------------------------------------------- /imgs/tmpA5E3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/tmpA5E3.png -------------------------------------------------------------------------------- /imgs/xue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/xue.jpg -------------------------------------------------------------------------------- /plate_recognition/double_plate_split_merge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | def get_split_merge(img): 5 | h,w,c = img.shape 6 | img_upper = img[0:int(5/12*h),:] 7 | img_lower = img[int(1/3*h):,:] 8 | img_upper = cv2.resize(img_upper,(img_lower.shape[1],img_lower.shape[0])) 9 | new_img = np.hstack((img_upper,img_lower)) 10 | return new_img 11 | 12 | if __name__=="__main__": 13 | img = cv2.imread("double_plate/tmp8078.png") 14 | new_img =get_split_merge(img) 15 | cv2.imwrite("double_plate/new.jpg",new_img) 16 | -------------------------------------------------------------------------------- /plate_recognition/plate_rec.py: -------------------------------------------------------------------------------- 1 | from plate_recognition.plateNet import myNet_ocr,myNet_ocr_color 2 | import torch 3 | import torch.nn as nn 4 | import cv2 5 | import numpy as np 6 | import os 7 | import time 8 | import sys 9 | 10 | def cv_imread(path): #可以读取中文路径的图片 11 | img=cv2.imdecode(np.fromfile(path,dtype=np.uint8),-1) 12 | return img 13 | 14 | def allFilePath(rootPath,allFIleList): 15 | fileList = os.listdir(rootPath) 16 | for temp in fileList: 17 | if os.path.isfile(os.path.join(rootPath,temp)): 18 | if temp.endswith('.jpg') or temp.endswith('.png') or temp.endswith('.JPG'): 19 | allFIleList.append(os.path.join(rootPath,temp)) 20 | else: 21 | allFilePath(os.path.join(rootPath,temp),allFIleList) 22 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu") 23 | color=['黑色','蓝色','绿色','白色','黄色'] 24 | plateName=r"#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品" 25 | mean_value,std_value=(0.588,0.193) 26 | def decodePlate(preds): 27 | pre=0 28 | newPreds=[] 29 | index=[] 30 | for i in range(len(preds)): 31 | if preds[i]!=0 and preds[i]!=pre: 32 | newPreds.append(preds[i]) 33 | index.append(i) 34 | pre=preds[i] 35 | return newPreds,index 36 | 37 | def image_processing(img,device): 38 | img = cv2.resize(img, (168,48)) 39 | img = np.reshape(img, (48, 168, 3)) 40 | 41 | # normalize 42 | img = img.astype(np.float32) 43 | img = (img / 255. - mean_value) / std_value 44 | img = img.transpose([2, 0, 1]) 45 | img = torch.from_numpy(img) 46 | 47 | img = img.to(device) 48 | img = img.view(1, *img.size()) 49 | return img 50 | 51 | def get_plate_result(img,device,model,is_color=False): 52 | input = image_processing(img,device) 53 | if is_color: #是否识别颜色 54 | preds,color_preds = model(input) 55 | color_preds = torch.softmax(color_preds,dim=-1) 56 | color_conf,color_index = torch.max(color_preds,dim=-1) 57 | color_conf=color_conf.item() 58 | else: 59 | preds = model(input) 60 | preds=torch.softmax(preds,dim=-1) 61 | prob,index=preds.max(dim=-1) 62 | index = index.view(-1).detach().cpu().numpy() 63 | prob=prob.view(-1).detach().cpu().numpy() 64 | 65 | 66 | # preds=preds.view(-1).detach().cpu().numpy() 67 | newPreds,new_index=decodePlate(index) 68 | prob=prob[new_index] 69 | plate="" 70 | for i in newPreds: 71 | plate+=plateName[i] 72 | # if not (plate[0] in plateName[1:44] ): 73 | # return "" 74 | if is_color: 75 | return plate,prob,color[color_index],color_conf #返回车牌号以及每个字符的概率,以及颜色,和颜色的概率 76 | else: 77 | return plate,prob 78 | 79 | def init_model(device,model_path,is_color = False): 80 | # print( print(sys.path)) 81 | # model_path ="plate_recognition/model/checkpoint_61_acc_0.9715.pth" 82 | check_point = torch.load(model_path,map_location=device) 83 | model_state=check_point['state_dict'] 84 | cfg=check_point['cfg'] 85 | color_classes=0 86 | if is_color: 87 | color_classes=5 #颜色类别数 88 | model = myNet_ocr_color(num_classes=len(plateName),export=True,cfg=cfg,color_num=color_classes) 89 | 90 | model.load_state_dict(model_state,strict=False) 91 | model.to(device) 92 | model.eval() 93 | return model 94 | 95 | # model = init_model(device) 96 | if __name__ == '__main__': 97 | model_path = r"weights/plate_rec_color.pth" 98 | image_path ="images/tmp2424.png" 99 | testPath = r"/mnt/Gpan/Mydata/pytorchPorject/CRNN/crnn_plate_recognition/images" 100 | fileList=[] 101 | allFilePath(testPath,fileList) 102 | # result = get_plate_result(image_path,device) 103 | # print(result) 104 | is_color = False 105 | model = init_model(device,model_path,is_color=is_color) 106 | right=0 107 | begin = time.time() 108 | 109 | for imge_path in fileList: 110 | img=cv2.imread(imge_path) 111 | if is_color: 112 | plate,_,plate_color,_=get_plate_result(img,device,model,is_color=is_color) 113 | print(plate) 114 | else: 115 | plate,_=get_plate_result(img,device,model,is_color=is_color) 116 | print(plate,imge_path) 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /readme/105384078.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/readme/105384078.png -------------------------------------------------------------------------------- /readme/README.md: -------------------------------------------------------------------------------- 1 | ### **车牌检测训练** 2 | 3 | 1. **下载数据集:** 数据集可以添加vx:we0091234 (注明来意)获取 收费30 介意勿扰 数据从CCPD数据集中选取的一部分,也有自己收集的一部分并转换的 4 | 数据集格式为yolo格式: 5 | 6 | ``` 7 | label x y w h 8 | ``` 9 | 2. **修改ultralytics/datasets/yolov8-plate.yaml train和val路径,换成你的数据路径** 10 | 11 | ``` 12 | train: /mnt/mydisk/xiaolei/plate_detect/new_train_data # train images (relative to 'path') 4 images 13 | val: /mnt/mydisk/xiaolei/plate_detect/new_val_data # val images (relative to 'path') 4 images 14 | 15 | # Classes for DOTA 1.0 16 | names: 17 | 0: single 18 | 1: double 19 | 20 | ``` 21 | 3. **训练** 22 | 23 | ``` 24 | yolo task=detect mode=train model=yolov8s.yaml data=./ultralytics/cfg/datasets/plate.yaml epochs=120 batch=32 imgsz=640 pretrained=False optimizer=SGD 25 | ``` 26 | 27 | 结果存在run文件夹中 28 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import shutil 4 | from pathlib import Path 5 | 6 | import pytest 7 | 8 | TMP = Path(__file__).resolve().parent / 'tmp' # temp directory for test files 9 | 10 | 11 | def pytest_addoption(parser): 12 | """ 13 | Add custom command-line options to pytest. 14 | 15 | Args: 16 | parser (pytest.config.Parser): The pytest parser object. 17 | """ 18 | parser.addoption('--slow', action='store_true', default=False, help='Run slow tests') 19 | 20 | 21 | def pytest_configure(config): 22 | """ 23 | Register custom markers to avoid pytest warnings. 24 | 25 | Args: 26 | config (pytest.config.Config): The pytest config object. 27 | """ 28 | config.addinivalue_line('markers', 'slow: mark test as slow to run') 29 | 30 | 31 | def pytest_runtest_setup(item): 32 | """ 33 | Setup hook to skip tests marked as slow if the --slow option is not provided. 34 | 35 | Args: 36 | item (pytest.Item): The test item object. 37 | """ 38 | if 'slow' in item.keywords and not item.config.getoption('--slow'): 39 | pytest.skip('skip slow tests unless --slow is set') 40 | 41 | 42 | def pytest_collection_modifyitems(config, items): 43 | """ 44 | Modify the list of test items to remove tests marked as slow if the --slow option is not provided. 45 | 46 | Args: 47 | config (pytest.config.Config): The pytest config object. 48 | items (list): List of test items to be executed. 49 | """ 50 | if not config.getoption('--slow'): 51 | # Remove the item entirely from the list of test items if it's marked as 'slow' 52 | items[:] = [item for item in items if 'slow' not in item.keywords] 53 | 54 | 55 | def pytest_sessionstart(session): 56 | """ 57 | Initialize session configurations for pytest. 58 | 59 | This function is automatically called by pytest after the 'Session' object has been created but before performing 60 | test collection. It sets the initial seeds and prepares the temporary directory for the test session. 61 | 62 | Args: 63 | session (pytest.Session): The pytest session object. 64 | """ 65 | from ultralytics.utils.torch_utils import init_seeds 66 | 67 | init_seeds() 68 | shutil.rmtree(TMP, ignore_errors=True) # delete any existing tests/tmp directory 69 | TMP.mkdir(parents=True, exist_ok=True) # create a new empty directory 70 | 71 | 72 | def pytest_terminal_summary(terminalreporter, exitstatus, config): 73 | """ 74 | Cleanup operations after pytest session. 75 | 76 | This function is automatically called by pytest at the end of the entire test session. It removes certain files 77 | and directories used during testing. 78 | 79 | Args: 80 | terminalreporter (pytest.terminal.TerminalReporter): The terminal reporter object. 81 | exitstatus (int): The exit status of the test run. 82 | config (pytest.config.Config): The pytest config object. 83 | """ 84 | from ultralytics.utils import WEIGHTS_DIR 85 | 86 | # Remove files 87 | models = [path for x in ['*.onnx', '*.torchscript'] for path in WEIGHTS_DIR.rglob(x)] 88 | for file in ['bus.jpg', 'yolov8n.onnx', 'yolov8n.torchscript'] + models: 89 | Path(file).unlink(missing_ok=True) 90 | 91 | # Remove directories 92 | models = [path for x in ['*.mlpackage', '*_openvino_model'] for path in WEIGHTS_DIR.rglob(x)] 93 | for directory in [TMP.parents[1] / '.pytest_cache', TMP] + models: 94 | shutil.rmtree(directory, ignore_errors=True) 95 | -------------------------------------------------------------------------------- /tests/test_cuda.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import pytest 4 | import torch 5 | 6 | from ultralytics import YOLO 7 | from ultralytics.utils import ASSETS, WEIGHTS_DIR, checks 8 | 9 | CUDA_IS_AVAILABLE = checks.cuda_is_available() 10 | CUDA_DEVICE_COUNT = checks.cuda_device_count() 11 | 12 | MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt' # test spaces in path 13 | DATA = 'coco8.yaml' 14 | BUS = ASSETS / 'bus.jpg' 15 | 16 | 17 | def test_checks(): 18 | """Validate CUDA settings against torch CUDA functions.""" 19 | assert torch.cuda.is_available() == CUDA_IS_AVAILABLE 20 | assert torch.cuda.device_count() == CUDA_DEVICE_COUNT 21 | 22 | 23 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') 24 | def test_train(): 25 | """Test model training on a minimal dataset.""" 26 | device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1] 27 | YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64 28 | 29 | 30 | @pytest.mark.slow 31 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') 32 | def test_predict_multiple_devices(): 33 | """Validate model prediction on multiple devices.""" 34 | model = YOLO('yolov8n.pt') 35 | model = model.cpu() 36 | assert str(model.device) == 'cpu' 37 | _ = model(BUS) # CPU inference 38 | assert str(model.device) == 'cpu' 39 | 40 | model = model.to('cuda:0') 41 | assert str(model.device) == 'cuda:0' 42 | _ = model(BUS) # CUDA inference 43 | assert str(model.device) == 'cuda:0' 44 | 45 | model = model.cpu() 46 | assert str(model.device) == 'cpu' 47 | _ = model(BUS) # CPU inference 48 | assert str(model.device) == 'cpu' 49 | 50 | model = model.cuda() 51 | assert str(model.device) == 'cuda:0' 52 | _ = model(BUS) # CUDA inference 53 | assert str(model.device) == 'cuda:0' 54 | 55 | 56 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') 57 | def test_autobatch(): 58 | """Check batch size for YOLO model using autobatch.""" 59 | from ultralytics.utils.autobatch import check_train_batch_size 60 | 61 | check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True) 62 | 63 | 64 | @pytest.mark.slow 65 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') 66 | def test_utils_benchmarks(): 67 | """Profile YOLO models for performance benchmarks.""" 68 | from ultralytics.utils.benchmarks import ProfileModels 69 | 70 | # Pre-export a dynamic engine model to use dynamic inference 71 | YOLO(MODEL).export(format='engine', imgsz=32, dynamic=True, batch=1) 72 | ProfileModels([MODEL], imgsz=32, half=False, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile() 73 | 74 | 75 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') 76 | def test_predict_sam(): 77 | """Test SAM model prediction with various prompts.""" 78 | from ultralytics import SAM 79 | from ultralytics.models.sam import Predictor as SAMPredictor 80 | 81 | # Load a model 82 | model = SAM(WEIGHTS_DIR / 'sam_b.pt') 83 | 84 | # Display model information (optional) 85 | model.info() 86 | 87 | # Run inference 88 | model(BUS, device=0) 89 | 90 | # Run inference with bboxes prompt 91 | model(BUS, bboxes=[439, 437, 524, 709], device=0) 92 | 93 | # Run inference with points prompt 94 | model(ASSETS / 'zidane.jpg', points=[900, 370], labels=[1], device=0) 95 | 96 | # Create SAMPredictor 97 | overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model=WEIGHTS_DIR / 'mobile_sam.pt') 98 | predictor = SAMPredictor(overrides=overrides) 99 | 100 | # Set image 101 | predictor.set_image(ASSETS / 'zidane.jpg') # set with image file 102 | # predictor(bboxes=[439, 437, 524, 709]) 103 | # predictor(points=[900, 370], labels=[1]) 104 | 105 | # Reset image 106 | predictor.reset_image() 107 | -------------------------------------------------------------------------------- /tests/test_explorer.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics import Explorer 4 | from ultralytics.utils import ASSETS 5 | 6 | import PIL 7 | 8 | 9 | def test_similarity(): 10 | """Test similarity calculations and SQL queries for correctness and response length.""" 11 | exp = Explorer() 12 | exp.create_embeddings_table() 13 | similar = exp.get_similar(idx=1) 14 | assert len(similar) == 25 15 | similar = exp.get_similar(img=ASSETS / 'zidane.jpg') 16 | assert len(similar) == 25 17 | similar = exp.get_similar(idx=[1, 2], limit=10) 18 | assert len(similar) == 10 19 | sim_idx = exp.similarity_index() 20 | assert len(sim_idx) > 0 21 | sql = exp.sql_query("WHERE labels LIKE '%person%'") 22 | assert len(sql) > 0 23 | 24 | 25 | def test_det(): 26 | """Test detection functionalities and ensure the embedding table has bounding boxes.""" 27 | exp = Explorer(data='coco8.yaml', model='yolov8n.pt') 28 | exp.create_embeddings_table(force=True) 29 | assert len(exp.table.head()['bboxes']) > 0 30 | similar = exp.get_similar(idx=[1, 2], limit=10) 31 | assert len(similar) > 0 32 | # This is a loose test, just checks errors not correctness 33 | similar = exp.plot_similar(idx=[1, 2], limit=10) 34 | assert isinstance(similar, PIL.Image.Image) 35 | 36 | 37 | def test_seg(): 38 | """Test segmentation functionalities and verify the embedding table includes masks.""" 39 | exp = Explorer(data='coco8-seg.yaml', model='yolov8n-seg.pt') 40 | exp.create_embeddings_table(force=True) 41 | assert len(exp.table.head()['masks']) > 0 42 | similar = exp.get_similar(idx=[1, 2], limit=10) 43 | assert len(similar) > 0 44 | similar = exp.plot_similar(idx=[1, 2], limit=10) 45 | assert isinstance(similar, PIL.Image.Image) 46 | 47 | 48 | def test_pose(): 49 | """Test pose estimation functionalities and check the embedding table for keypoints.""" 50 | exp = Explorer(data='coco8-pose.yaml', model='yolov8n-pose.pt') 51 | exp.create_embeddings_table(force=True) 52 | assert len(exp.table.head()['keypoints']) > 0 53 | similar = exp.get_similar(idx=[1, 2], limit=10) 54 | assert len(similar) > 0 55 | similar = exp.plot_similar(idx=[1, 2], limit=10) 56 | assert isinstance(similar, PIL.Image.Image) 57 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | # os.environ["OMP_NUM_THREADS"]='2' 3 | 4 | from ultralytics import YOLO 5 | # Load a model 6 | model = YOLO('yolov8n.yaml') # build a new model from YAML 7 | model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) 8 | 9 | # Train the model 10 | model.train(data='/mnt/mydisk/xiaolei/code/plate/plate_detect/ultralytics-main/ultralytics/cfg/datasets/plate.yaml', epochs=120, imgsz=640, batch=32, device=[0]) -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | yolo task=detect mode=train model=yolov8s.yaml data=./ultralytics/cfg/datasets/plate.yaml epochs=120 batch=32 imgsz=640 pretrained=False optimizer=SGD -------------------------------------------------------------------------------- /ultralytics/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | __version__ = "8.1.2" 4 | 5 | from ultralytics.data.explorer.explorer import Explorer 6 | from ultralytics.models import RTDETR, SAM, YOLO 7 | from ultralytics.models.fastsam import FastSAM 8 | from ultralytics.models.nas import NAS 9 | from ultralytics.utils import SETTINGS as settings 10 | from ultralytics.utils.checks import check_yolo as checks 11 | from ultralytics.utils.downloads import download 12 | 13 | __all__ = "__version__", "YOLO", "NAS", "SAM", "FastSAM", "RTDETR", "checks", "download", "settings", "Explorer" 14 | -------------------------------------------------------------------------------- /ultralytics/assets/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/ultralytics/assets/bus.jpg -------------------------------------------------------------------------------- /ultralytics/assets/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/ultralytics/assets/zidane.jpg -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/Argoverse.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/ 4 | # Example usage: yolo train data=Argoverse.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── Argoverse ← downloads here (31.5 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/Argoverse # dataset root dir 12 | train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images 13 | val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images 14 | test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: bus 23 | 5: truck 24 | 6: traffic_light 25 | 7: stop_sign 26 | 27 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 28 | download: | 29 | import json 30 | from tqdm import tqdm 31 | from ultralytics.utils.downloads import download 32 | from pathlib import Path 33 | 34 | def argoverse2yolo(set): 35 | labels = {} 36 | a = json.load(open(set, "rb")) 37 | for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."): 38 | img_id = annot['image_id'] 39 | img_name = a['images'][img_id]['name'] 40 | img_label_name = f'{img_name[:-3]}txt' 41 | 42 | cls = annot['category_id'] # instance class id 43 | x_center, y_center, width, height = annot['bbox'] 44 | x_center = (x_center + width / 2) / 1920.0 # offset and scale 45 | y_center = (y_center + height / 2) / 1200.0 # offset and scale 46 | width /= 1920.0 # scale 47 | height /= 1200.0 # scale 48 | 49 | img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']] 50 | if not img_dir.exists(): 51 | img_dir.mkdir(parents=True, exist_ok=True) 52 | 53 | k = str(img_dir / img_label_name) 54 | if k not in labels: 55 | labels[k] = [] 56 | labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n") 57 | 58 | for k in labels: 59 | with open(k, "w") as f: 60 | f.writelines(labels[k]) 61 | 62 | 63 | # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link) 64 | dir = Path(yaml['path']) # dataset root dir 65 | urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link'] 66 | download(urls, dir=dir) 67 | 68 | # Convert 69 | annotations_dir = 'Argoverse-HD/annotations/' 70 | (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images' 71 | for d in "train.json", "val.json": 72 | argoverse2yolo(dir / annotations_dir / d) # convert Argoverse annotations to YOLO labels 73 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/DOTAv1.5.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/ 4 | # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── dota1.5 ← downloads here (2GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/DOTAv1.5 # dataset root dir 12 | train: images/train # train images (relative to 'path') 1411 images 13 | val: images/val # val images (relative to 'path') 458 images 14 | test: images/test # test images (optional) 937 images 15 | 16 | # Classes for DOTA 1.5 17 | names: 18 | 0: plane 19 | 1: ship 20 | 2: storage tank 21 | 3: baseball diamond 22 | 4: tennis court 23 | 5: basketball court 24 | 6: ground track field 25 | 7: harbor 26 | 8: bridge 27 | 9: large vehicle 28 | 10: small vehicle 29 | 11: helicopter 30 | 12: roundabout 31 | 13: soccer ball field 32 | 14: swimming pool 33 | 15: container crane 34 | 35 | # Download script/URL (optional) 36 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip 37 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/DOTAv1.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/ 4 | # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── dota1 ← downloads here (2GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/DOTAv1 # dataset root dir 12 | train: images/train # train images (relative to 'path') 1411 images 13 | val: images/val # val images (relative to 'path') 458 images 14 | test: images/test # test images (optional) 937 images 15 | 16 | # Classes for DOTA 1.0 17 | names: 18 | 0: plane 19 | 1: ship 20 | 2: storage tank 21 | 3: baseball diamond 22 | 4: tennis court 23 | 5: basketball court 24 | 6: ground track field 25 | 7: harbor 26 | 8: bridge 27 | 9: large vehicle 28 | 10: small vehicle 29 | 11: helicopter 30 | 12: roundabout 31 | 13: soccer ball field 32 | 14: swimming pool 33 | 34 | # Download script/URL (optional) 35 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip 36 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/GlobalWheat2020.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/ 4 | # Example usage: yolo train data=GlobalWheat2020.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── GlobalWheat2020 ← downloads here (7.0 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/GlobalWheat2020 # dataset root dir 12 | train: # train images (relative to 'path') 3422 images 13 | - images/arvalis_1 14 | - images/arvalis_2 15 | - images/arvalis_3 16 | - images/ethz_1 17 | - images/rres_1 18 | - images/inrae_1 19 | - images/usask_1 20 | val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1) 21 | - images/ethz_1 22 | test: # test images (optional) 1276 images 23 | - images/utokyo_1 24 | - images/utokyo_2 25 | - images/nau_1 26 | - images/uq_1 27 | 28 | # Classes 29 | names: 30 | 0: wheat_head 31 | 32 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 33 | download: | 34 | from ultralytics.utils.downloads import download 35 | from pathlib import Path 36 | 37 | # Download 38 | dir = Path(yaml['path']) # dataset root dir 39 | urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', 40 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] 41 | download(urls, dir=dir) 42 | 43 | # Make Directories 44 | for p in 'annotations', 'images', 'labels': 45 | (dir / p).mkdir(parents=True, exist_ok=True) 46 | 47 | # Move 48 | for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \ 49 | 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1': 50 | (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p) # move to /images 51 | f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json') # json file 52 | if f.exists(): 53 | f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations 54 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/SKU-110K.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/ 4 | # Example usage: yolo train data=SKU-110K.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── SKU-110K ← downloads here (13.6 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/SKU-110K # dataset root dir 12 | train: train.txt # train images (relative to 'path') 8219 images 13 | val: val.txt # val images (relative to 'path') 588 images 14 | test: test.txt # test images (optional) 2936 images 15 | 16 | # Classes 17 | names: 18 | 0: object 19 | 20 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 21 | download: | 22 | import shutil 23 | from pathlib import Path 24 | 25 | import numpy as np 26 | import pandas as pd 27 | from tqdm import tqdm 28 | 29 | from ultralytics.utils.downloads import download 30 | from ultralytics.utils.ops import xyxy2xywh 31 | 32 | # Download 33 | dir = Path(yaml['path']) # dataset root dir 34 | parent = Path(dir.parent) # download dir 35 | urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz'] 36 | download(urls, dir=parent) 37 | 38 | # Rename directories 39 | if dir.exists(): 40 | shutil.rmtree(dir) 41 | (parent / 'SKU110K_fixed').rename(dir) # rename dir 42 | (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir 43 | 44 | # Convert labels 45 | names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names 46 | for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv': 47 | x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations 48 | images, unique_images = x[:, 0], np.unique(x[:, 0]) 49 | with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f: 50 | f.writelines(f'./images/{s}\n' for s in unique_images) 51 | for im in tqdm(unique_images, desc=f'Converting {dir / d}'): 52 | cls = 0 # single-class dataset 53 | with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f: 54 | for r in x[images == im]: 55 | w, h = r[6], r[7] # image width, height 56 | xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance 57 | f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label 58 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/VOC.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford 3 | # Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/ 4 | # Example usage: yolo train data=VOC.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── VOC ← downloads here (2.8 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/VOC 12 | train: # train images (relative to 'path') 16551 images 13 | - images/train2012 14 | - images/train2007 15 | - images/val2012 16 | - images/val2007 17 | val: # val images (relative to 'path') 4952 images 18 | - images/test2007 19 | test: # test images (optional) 20 | - images/test2007 21 | 22 | # Classes 23 | names: 24 | 0: aeroplane 25 | 1: bicycle 26 | 2: bird 27 | 3: boat 28 | 4: bottle 29 | 5: bus 30 | 6: car 31 | 7: cat 32 | 8: chair 33 | 9: cow 34 | 10: diningtable 35 | 11: dog 36 | 12: horse 37 | 13: motorbike 38 | 14: person 39 | 15: pottedplant 40 | 16: sheep 41 | 17: sofa 42 | 18: train 43 | 19: tvmonitor 44 | 45 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 46 | download: | 47 | import xml.etree.ElementTree as ET 48 | 49 | from tqdm import tqdm 50 | from ultralytics.utils.downloads import download 51 | from pathlib import Path 52 | 53 | def convert_label(path, lb_path, year, image_id): 54 | def convert_box(size, box): 55 | dw, dh = 1. / size[0], 1. / size[1] 56 | x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] 57 | return x * dw, y * dh, w * dw, h * dh 58 | 59 | in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml') 60 | out_file = open(lb_path, 'w') 61 | tree = ET.parse(in_file) 62 | root = tree.getroot() 63 | size = root.find('size') 64 | w = int(size.find('width').text) 65 | h = int(size.find('height').text) 66 | 67 | names = list(yaml['names'].values()) # names list 68 | for obj in root.iter('object'): 69 | cls = obj.find('name').text 70 | if cls in names and int(obj.find('difficult').text) != 1: 71 | xmlbox = obj.find('bndbox') 72 | bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')]) 73 | cls_id = names.index(cls) # class id 74 | out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n') 75 | 76 | 77 | # Download 78 | dir = Path(yaml['path']) # dataset root dir 79 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 80 | urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images 81 | f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images 82 | f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images 83 | download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True) # download and unzip over existing paths (required) 84 | 85 | # Convert 86 | path = dir / 'images/VOCdevkit' 87 | for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'): 88 | imgs_path = dir / 'images' / f'{image_set}{year}' 89 | lbs_path = dir / 'labels' / f'{image_set}{year}' 90 | imgs_path.mkdir(exist_ok=True, parents=True) 91 | lbs_path.mkdir(exist_ok=True, parents=True) 92 | 93 | with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f: 94 | image_ids = f.read().strip().split() 95 | for id in tqdm(image_ids, desc=f'{image_set}{year}'): 96 | f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path 97 | lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path 98 | f.rename(imgs_path / f.name) # move image 99 | convert_label(path, lb_path, year, id) # convert labels to YOLO format 100 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/VisDrone.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/ 4 | # Example usage: yolo train data=VisDrone.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── VisDrone ← downloads here (2.3 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/VisDrone # dataset root dir 12 | train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images 13 | val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images 14 | test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images 15 | 16 | # Classes 17 | names: 18 | 0: pedestrian 19 | 1: people 20 | 2: bicycle 21 | 3: car 22 | 4: van 23 | 5: truck 24 | 6: tricycle 25 | 7: awning-tricycle 26 | 8: bus 27 | 9: motor 28 | 29 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 30 | download: | 31 | import os 32 | from pathlib import Path 33 | 34 | from ultralytics.utils.downloads import download 35 | 36 | def visdrone2yolo(dir): 37 | from PIL import Image 38 | from tqdm import tqdm 39 | 40 | def convert_box(size, box): 41 | # Convert VisDrone box to YOLO xywh box 42 | dw = 1. / size[0] 43 | dh = 1. / size[1] 44 | return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh 45 | 46 | (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory 47 | pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}') 48 | for f in pbar: 49 | img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size 50 | lines = [] 51 | with open(f, 'r') as file: # read annotation.txt 52 | for row in [x.split(',') for x in file.read().strip().splitlines()]: 53 | if row[4] == '0': # VisDrone 'ignored regions' class 0 54 | continue 55 | cls = int(row[5]) - 1 56 | box = convert_box(img_size, tuple(map(int, row[:4]))) 57 | lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n") 58 | with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl: 59 | fl.writelines(lines) # write label.txt 60 | 61 | 62 | # Download 63 | dir = Path(yaml['path']) # dataset root dir 64 | urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', 65 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', 66 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip', 67 | 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip'] 68 | download(urls, dir=dir, curl=True, threads=4) 69 | 70 | # Convert 71 | for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev': 72 | visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels 73 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco-pose.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO 2017 dataset https://cocodataset.org by Microsoft 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/coco/ 4 | # Example usage: yolo train data=coco-pose.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco-pose ← downloads here (20.1 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco-pose # dataset root dir 12 | train: train2017.txt # train images (relative to 'path') 118287 images 13 | val: val2017.txt # val images (relative to 'path') 5000 images 14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 15 | 16 | # Keypoints 17 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) 18 | flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 19 | 20 | # Classes 21 | names: 22 | 0: person 23 | 24 | # Download script/URL (optional) 25 | download: | 26 | from ultralytics.utils.downloads import download 27 | from pathlib import Path 28 | 29 | # Download labels 30 | dir = Path(yaml['path']) # dataset root dir 31 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 32 | urls = [url + 'coco2017labels-pose.zip'] # labels 33 | download(urls, dir=dir.parent) 34 | # Download data 35 | urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 36 | 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 37 | 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) 38 | download(urls, dir=dir / 'images', threads=3) 39 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO 2017 dataset https://cocodataset.org by Microsoft 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco/ 4 | # Example usage: yolo train data=coco.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco ← downloads here (20.1 GB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco # dataset root dir 12 | train: train2017.txt # train images (relative to 'path') 118287 images 13 | val: val2017.txt # val images (relative to 'path') 5000 images 14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | # Download script/URL (optional) 100 | download: | 101 | from ultralytics.utils.downloads import download 102 | from pathlib import Path 103 | 104 | # Download labels 105 | segments = True # segment or box labels 106 | dir = Path(yaml['path']) # dataset root dir 107 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 108 | urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels 109 | download(urls, dir=dir.parent) 110 | # Download data 111 | urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 112 | 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 113 | 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) 114 | download(urls, dir=dir / 'images', threads=3) 115 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco128-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/segment/coco/ 4 | # Example usage: yolo train data=coco128.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco128-seg ← downloads here (7 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128-seg # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | # Download script/URL (optional) 100 | download: https://ultralytics.com/assets/coco128-seg.zip 101 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco128.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco/ 4 | # Example usage: yolo train data=coco128.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco128 ← downloads here (7 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128 # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | # Download script/URL (optional) 100 | download: https://ultralytics.com/assets/coco128.zip 101 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco8-pose.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/ 4 | # Example usage: yolo train data=coco8-pose.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco8-pose ← downloads here (1 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco8-pose # dataset root dir 12 | train: images/train # train images (relative to 'path') 4 images 13 | val: images/val # val images (relative to 'path') 4 images 14 | test: # test images (optional) 15 | 16 | # Keypoints 17 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) 18 | flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 19 | 20 | # Classes 21 | names: 22 | 0: person 23 | 24 | # Download script/URL (optional) 25 | download: https://ultralytics.com/assets/coco8-pose.zip 26 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco8-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/ 4 | # Example usage: yolo train data=coco8-seg.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco8-seg ← downloads here (1 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco8-seg # dataset root dir 12 | train: images/train # train images (relative to 'path') 4 images 13 | val: images/val # val images (relative to 'path') 4 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | # Download script/URL (optional) 100 | download: https://ultralytics.com/assets/coco8-seg.zip 101 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/coco8.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco8/ 4 | # Example usage: yolo train data=coco8.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── coco8 ← downloads here (1 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco8 # dataset root dir 12 | train: images/train # train images (relative to 'path') 4 images 13 | val: images/val # val images (relative to 'path') 4 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | # Download script/URL (optional) 100 | download: https://ultralytics.com/assets/coco8.zip 101 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/dota8.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/ 4 | # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── dota8 ← downloads here (1MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/dota8 # dataset root dir 12 | train: images/train # train images (relative to 'path') 4 images 13 | val: images/val # val images (relative to 'path') 4 images 14 | 15 | # Classes for DOTA 1.0 16 | names: 17 | 0: plane 18 | 1: ship 19 | 2: storage tank 20 | 3: baseball diamond 21 | 4: tennis court 22 | 5: basketball court 23 | 6: ground track field 24 | 7: harbor 25 | 8: bridge 26 | 9: large vehicle 27 | 10: small vehicle 28 | 11: helicopter 29 | 12: roundabout 30 | 13: soccer ball field 31 | 14: swimming pool 32 | 33 | # Download script/URL (optional) 34 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip 35 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/plate.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/ 4 | # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── dota8 ← downloads here (1MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | # path: ../datasets/dota8 # dataset root dir 12 | train: /mnt/mydisk/xiaolei/plate_detect/new_train_data # train images (relative to 'path') 4 images 13 | val: /mnt/mydisk/xiaolei/plate_detect/new_val_data # val images (relative to 'path') 4 images 14 | 15 | # Classes for DOTA 1.0 16 | names: 17 | 0: single 18 | 1: double 19 | 20 | 21 | # Download script/URL (optional) 22 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip 23 | -------------------------------------------------------------------------------- /ultralytics/cfg/datasets/tiger-pose.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Tiger Pose dataset by Ultralytics 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/ 4 | # Example usage: yolo train data=tiger-pose.yaml 5 | # parent 6 | # ├── ultralytics 7 | # └── datasets 8 | # └── tiger-pose ← downloads here (75.3 MB) 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/tiger-pose # dataset root dir 12 | train: train # train images (relative to 'path') 210 images 13 | val: val # val images (relative to 'path') 53 images 14 | 15 | # Keypoints 16 | kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) 17 | flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] 18 | 19 | # Classes 20 | names: 21 | 0: tiger 22 | 23 | # Download script/URL (optional) 24 | download: https://ultralytics.com/assets/tiger-pose.zip 25 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/README.md: -------------------------------------------------------------------------------- 1 | ## Models 2 | 3 | Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks. 4 | 5 | These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs. 6 | 7 | To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now! 8 | 9 | ### Usage 10 | 11 | Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command: 12 | 13 | ```bash 14 | yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100 15 | ``` 16 | 17 | They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above: 18 | 19 | ```python 20 | from ultralytics import YOLO 21 | 22 | model = YOLO("model.yaml") # build a YOLOv8n model from scratch 23 | # YOLO("model.pt") use pre-trained model if available 24 | model.info() # display model information 25 | model.train(data="coco128.yaml", epochs=100) # train the model 26 | ``` 27 | 28 | ## Pre-trained Model Architectures 29 | 30 | Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available. 31 | 32 | ## Contribute New Models 33 | 34 | Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository. 35 | 36 | By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile. 37 | 38 | To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited! 39 | 40 | Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏! 41 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/rt-detr/rtdetr-l.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | l: [1.00, 1.00, 1024] 9 | 10 | backbone: 11 | # [from, repeats, module, args] 12 | - [-1, 1, HGStem, [32, 48]] # 0-P2/4 13 | - [-1, 6, HGBlock, [48, 128, 3]] # stage 1 14 | 15 | - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 16 | - [-1, 6, HGBlock, [96, 512, 3]] # stage 2 17 | 18 | - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16 19 | - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut 20 | - [-1, 6, HGBlock, [192, 1024, 5, True, True]] 21 | - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3 22 | 23 | - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32 24 | - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4 25 | 26 | head: 27 | - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2 28 | - [-1, 1, AIFI, [1024, 8]] 29 | - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0 30 | 31 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 32 | - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1 33 | - [[-2, -1], 1, Concat, [1]] 34 | - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0 35 | - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1 36 | 37 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 38 | - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0 39 | - [[-2, -1], 1, Concat, [1]] # cat backbone P4 40 | - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1 41 | 42 | - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0 43 | - [[-1, 17], 1, Concat, [1]] # cat Y4 44 | - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0 45 | 46 | - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1 47 | - [[-1, 12], 1, Concat, [1]] # cat Y5 48 | - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1 49 | 50 | - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) 51 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # RT-DETR-ResNet101 object detection model with P3-P5 outputs. 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | l: [1.00, 1.00, 1024] 9 | 10 | backbone: 11 | # [from, repeats, module, args] 12 | - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0 13 | - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1 14 | - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2 15 | - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3 16 | - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4 17 | 18 | head: 19 | - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5 20 | - [-1, 1, AIFI, [1024, 8]] 21 | - [-1, 1, Conv, [256, 1, 1]] # 7 22 | 23 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 24 | - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9 25 | - [[-2, -1], 1, Concat, [1]] 26 | - [-1, 3, RepC3, [256]] # 11 27 | - [-1, 1, Conv, [256, 1, 1]] # 12 28 | 29 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 30 | - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 31 | - [[-2, -1], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1 33 | 34 | - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0 35 | - [[-1, 12], 1, Concat, [1]] # cat Y4 36 | - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1 39 | - [[-1, 7], 1, Concat, [1]] # cat Y5 40 | - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1 41 | 42 | - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) 43 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # RT-DETR-ResNet50 object detection model with P3-P5 outputs. 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | l: [1.00, 1.00, 1024] 9 | 10 | backbone: 11 | # [from, repeats, module, args] 12 | - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0 13 | - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1 14 | - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2 15 | - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3 16 | - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4 17 | 18 | head: 19 | - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5 20 | - [-1, 1, AIFI, [1024, 8]] 21 | - [-1, 1, Conv, [256, 1, 1]] # 7 22 | 23 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 24 | - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9 25 | - [[-2, -1], 1, Concat, [1]] 26 | - [-1, 3, RepC3, [256]] # 11 27 | - [-1, 1, Conv, [256, 1, 1]] # 12 28 | 29 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 30 | - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 31 | - [[-2, -1], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1 33 | 34 | - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0 35 | - [[-1, 12], 1, Concat, [1]] # cat Y4 36 | - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1 39 | - [[-1, 7], 1, Concat, [1]] # cat Y5 40 | - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1 41 | 42 | - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) 43 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/rt-detr/rtdetr-x.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | x: [1.00, 1.00, 2048] 9 | 10 | backbone: 11 | # [from, repeats, module, args] 12 | - [-1, 1, HGStem, [32, 64]] # 0-P2/4 13 | - [-1, 6, HGBlock, [64, 128, 3]] # stage 1 14 | 15 | - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 16 | - [-1, 6, HGBlock, [128, 512, 3]] 17 | - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2 18 | 19 | - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16 20 | - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut 21 | - [-1, 6, HGBlock, [256, 1024, 5, True, True]] 22 | - [-1, 6, HGBlock, [256, 1024, 5, True, True]] 23 | - [-1, 6, HGBlock, [256, 1024, 5, True, True]] 24 | - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3 25 | 26 | - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32 27 | - [-1, 6, HGBlock, [512, 2048, 5, True, False]] 28 | - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4 29 | 30 | head: 31 | - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2 32 | - [-1, 1, AIFI, [2048, 8]] 33 | - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0 34 | 35 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 36 | - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1 37 | - [[-2, -1], 1, Concat, [1]] 38 | - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0 39 | - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1 40 | 41 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 42 | - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0 43 | - [[-2, -1], 1, Concat, [1]] # cat backbone P4 44 | - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1 45 | 46 | - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0 47 | - [[-1, 21], 1, Concat, [1]] # cat Y4 48 | - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0 49 | 50 | - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1 51 | - [[-1, 16], 1, Concat, [1]] # cat Y5 52 | - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1 53 | 54 | - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) 55 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v3/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | depth_multiple: 1.0 # model depth multiple 7 | width_multiple: 1.0 # layer channel multiple 8 | 9 | # darknet53 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | - [-1, 1, Conv, [32, 3, 1]] # 0 13 | - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2 14 | - [-1, 1, Bottleneck, [64]] 15 | - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4 16 | - [-1, 2, Bottleneck, [128]] 17 | - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8 18 | - [-1, 8, Bottleneck, [256]] 19 | - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16 20 | - [-1, 8, Bottleneck, [512]] 21 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32 22 | - [-1, 4, Bottleneck, [1024]] # 10 23 | 24 | # YOLOv3-SPP head 25 | head: 26 | - [-1, 1, Bottleneck, [1024, False]] 27 | - [-1, 1, SPP, [512, [5, 9, 13]]] 28 | - [-1, 1, Conv, [1024, 3, 1]] 29 | - [-1, 1, Conv, [512, 1, 1]] 30 | - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large) 31 | 32 | - [-2, 1, Conv, [256, 1, 1]] 33 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 34 | - [[-1, 8], 1, Concat, [1]] # cat backbone P4 35 | - [-1, 1, Bottleneck, [512, False]] 36 | - [-1, 1, Bottleneck, [512, False]] 37 | - [-1, 1, Conv, [256, 1, 1]] 38 | - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium) 39 | 40 | - [-2, 1, Conv, [128, 1, 1]] 41 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 42 | - [[-1, 6], 1, Concat, [1]] # cat backbone P3 43 | - [-1, 1, Bottleneck, [256, False]] 44 | - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small) 45 | 46 | - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v3/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | depth_multiple: 1.0 # model depth multiple 7 | width_multiple: 1.0 # layer channel multiple 8 | 9 | # YOLOv3-tiny backbone 10 | backbone: 11 | # [from, number, module, args] 12 | - [-1, 1, Conv, [16, 3, 1]] # 0 13 | - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2 14 | - [-1, 1, Conv, [32, 3, 1]] 15 | - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4 16 | - [-1, 1, Conv, [64, 3, 1]] 17 | - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8 18 | - [-1, 1, Conv, [128, 3, 1]] 19 | - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16 20 | - [-1, 1, Conv, [256, 3, 1]] 21 | - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32 22 | - [-1, 1, Conv, [512, 3, 1]] 23 | - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11 24 | - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12 25 | 26 | # YOLOv3-tiny head 27 | head: 28 | - [-1, 1, Conv, [1024, 3, 1]] 29 | - [-1, 1, Conv, [256, 1, 1]] 30 | - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large) 31 | 32 | - [-2, 1, Conv, [128, 1, 1]] 33 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 34 | - [[-1, 8], 1, Concat, [1]] # cat backbone P4 35 | - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium) 36 | 37 | - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5) 38 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v3/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | depth_multiple: 1.0 # model depth multiple 7 | width_multiple: 1.0 # layer channel multiple 8 | 9 | # darknet53 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | - [-1, 1, Conv, [32, 3, 1]] # 0 13 | - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2 14 | - [-1, 1, Bottleneck, [64]] 15 | - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4 16 | - [-1, 2, Bottleneck, [128]] 17 | - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8 18 | - [-1, 8, Bottleneck, [256]] 19 | - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16 20 | - [-1, 8, Bottleneck, [512]] 21 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32 22 | - [-1, 4, Bottleneck, [1024]] # 10 23 | 24 | # YOLOv3 head 25 | head: 26 | - [-1, 1, Bottleneck, [1024, False]] 27 | - [-1, 1, Conv, [512, 1, 1]] 28 | - [-1, 1, Conv, [1024, 3, 1]] 29 | - [-1, 1, Conv, [512, 1, 1]] 30 | - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large) 31 | 32 | - [-2, 1, Conv, [256, 1, 1]] 33 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 34 | - [[-1, 8], 1, Concat, [1]] # cat backbone P4 35 | - [-1, 1, Bottleneck, [512, False]] 36 | - [-1, 1, Bottleneck, [512, False]] 37 | - [-1, 1, Conv, [256, 1, 1]] 38 | - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium) 39 | 40 | - [-2, 1, Conv, [128, 1, 1]] 41 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 42 | - [[-1, 6], 1, Concat, [1]] # cat backbone P3 43 | - [-1, 1, Bottleneck, [256, False]] 44 | - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small) 45 | 46 | - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v5/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 1024] 11 | l: [1.00, 1.00, 1024] 12 | x: [1.33, 1.25, 1024] 13 | 14 | # YOLOv5 v6.0 backbone 15 | backbone: 16 | # [from, number, module, args] 17 | - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C3, [128]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C3, [256]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 9, C3, [512]] 24 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 25 | - [-1, 3, C3, [768]] 26 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 27 | - [-1, 3, C3, [1024]] 28 | - [-1, 1, SPPF, [1024, 5]] # 11 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | - [-1, 1, Conv, [768, 1, 1]] 33 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 34 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 35 | - [-1, 3, C3, [768, False]] # 15 36 | 37 | - [-1, 1, Conv, [512, 1, 1]] 38 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 39 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 40 | - [-1, 3, C3, [512, False]] # 19 41 | 42 | - [-1, 1, Conv, [256, 1, 1]] 43 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 44 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 45 | - [-1, 3, C3, [256, False]] # 23 (P3/8-small) 46 | 47 | - [-1, 1, Conv, [256, 3, 2]] 48 | - [[-1, 20], 1, Concat, [1]] # cat head P4 49 | - [-1, 3, C3, [512, False]] # 26 (P4/16-medium) 50 | 51 | - [-1, 1, Conv, [512, 3, 2]] 52 | - [[-1, 16], 1, Concat, [1]] # cat head P5 53 | - [-1, 3, C3, [768, False]] # 29 (P5/32-large) 54 | 55 | - [-1, 1, Conv, [768, 3, 2]] 56 | - [[-1, 12], 1, Concat, [1]] # cat head P6 57 | - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge) 58 | 59 | - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) 60 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v5/yolov5.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 1024] 11 | l: [1.00, 1.00, 1024] 12 | x: [1.33, 1.25, 1024] 13 | 14 | # YOLOv5 v6.0 backbone 15 | backbone: 16 | # [from, number, module, args] 17 | - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C3, [128]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C3, [256]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 9, C3, [512]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C3, [1024]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | - [-1, 1, Conv, [512, 1, 1]] 31 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 32 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 33 | - [-1, 3, C3, [512, False]] # 13 34 | 35 | - [-1, 1, Conv, [256, 1, 1]] 36 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 37 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 38 | - [-1, 3, C3, [256, False]] # 17 (P3/8-small) 39 | 40 | - [-1, 1, Conv, [256, 3, 2]] 41 | - [[-1, 14], 1, Concat, [1]] # cat head P4 42 | - [-1, 3, C3, [512, False]] # 20 (P4/16-medium) 43 | 44 | - [-1, 1, Conv, [512, 3, 2]] 45 | - [[-1, 10], 1, Concat, [1]] # cat head P5 46 | - [-1, 3, C3, [1024, False]] # 23 (P5/32-large) 47 | 48 | - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5) 49 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v6/yolov6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | activation: nn.ReLU() # (optional) model default activation function 7 | scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n' 8 | # [depth, width, max_channels] 9 | n: [0.33, 0.25, 1024] 10 | s: [0.33, 0.50, 1024] 11 | m: [0.67, 0.75, 768] 12 | l: [1.00, 1.00, 512] 13 | x: [1.00, 1.25, 512] 14 | 15 | # YOLOv6-3.0s backbone 16 | backbone: 17 | # [from, repeats, module, args] 18 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 19 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 20 | - [-1, 6, Conv, [128, 3, 1]] 21 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 22 | - [-1, 12, Conv, [256, 3, 1]] 23 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 24 | - [-1, 18, Conv, [512, 3, 1]] 25 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 26 | - [-1, 6, Conv, [1024, 3, 1]] 27 | - [-1, 1, SPPF, [1024, 5]] # 9 28 | 29 | # YOLOv6-3.0s head 30 | head: 31 | - [-1, 1, Conv, [256, 1, 1]] 32 | - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]] 33 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 34 | - [-1, 1, Conv, [256, 3, 1]] 35 | - [-1, 9, Conv, [256, 3, 1]] # 14 36 | 37 | - [-1, 1, Conv, [128, 1, 1]] 38 | - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]] 39 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 40 | - [-1, 1, Conv, [128, 3, 1]] 41 | - [-1, 9, Conv, [128, 3, 1]] # 19 42 | 43 | - [-1, 1, Conv, [128, 3, 2]] 44 | - [[-1, 15], 1, Concat, [1]] # cat head P4 45 | - [-1, 1, Conv, [256, 3, 1]] 46 | - [-1, 9, Conv, [256, 3, 1]] # 23 47 | 48 | - [-1, 1, Conv, [256, 3, 2]] 49 | - [[-1, 10], 1, Concat, [1]] # cat head P5 50 | - [-1, 1, Conv, [512, 3, 1]] 51 | - [-1, 9, Conv, [512, 3, 1]] # 27 52 | 53 | - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5) 54 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify 3 | 4 | # Parameters 5 | nc: 1000 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 1024] 11 | l: [1.00, 1.00, 1024] 12 | x: [1.00, 1.25, 1024] 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | 27 | # YOLOv8.0n head 28 | head: 29 | - [-1, 1, Classify, [nc]] # Classify 30 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters, 9031712 gradients, 42.8 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients, 69.1 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs 13 | 14 | # YOLOv8.0-ghost backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C3Ghost, [128, True]] 20 | - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C3Ghost, [256, True]] 22 | - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C3Ghost, [512, True]] 24 | - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C3Ghost, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0-ghost-p2 head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C3Ghost, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 39 | - [[-1, 2], 1, Concat, [1]] # cat backbone P2 40 | - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall) 41 | 42 | - [-1, 1, GhostConv, [128, 3, 2]] 43 | - [[-1, 15], 1, Concat, [1]] # cat head P3 44 | - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small) 45 | 46 | - [-1, 1, GhostConv, [256, 3, 2]] 47 | - [[-1, 12], 1, Concat, [1]] # cat head P4 48 | - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium) 49 | 50 | - [-1, 1, GhostConv, [512, 3, 2]] 51 | - [[-1, 9], 1, Concat, [1]] # cat head P5 52 | - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large) 53 | 54 | - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5) 55 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs 13 | 14 | # YOLOv8.0-ghost backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C3Ghost, [128, True]] 20 | - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C3Ghost, [256, True]] 22 | - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C3Ghost, [512, True]] 24 | - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32 25 | - [-1, 3, C3Ghost, [768, True]] 26 | - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64 27 | - [-1, 3, C3Ghost, [1024, True]] 28 | - [-1, 1, SPPF, [1024, 5]] # 11 29 | 30 | # YOLOv8.0-ghost-p6 head 31 | head: 32 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 33 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 34 | - [-1, 3, C3Ghost, [768]] # 14 35 | 36 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 37 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 38 | - [-1, 3, C3Ghost, [512]] # 17 39 | 40 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 41 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 42 | - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small) 43 | 44 | - [-1, 1, GhostConv, [256, 3, 2]] 45 | - [[-1, 17], 1, Concat, [1]] # cat head P4 46 | - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium) 47 | 48 | - [-1, 1, GhostConv, [512, 3, 2]] 49 | - [[-1, 14], 1, Concat, [1]] # cat head P5 50 | - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large) 51 | 52 | - [-1, 1, GhostConv, [768, 3, 2]] 53 | - [[-1, 11], 1, Concat, [1]] # cat head P6 54 | - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge) 55 | 56 | - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) 57 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-ghost.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | # Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2 4 | 5 | # Parameters 6 | nc: 80 # number of classes 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 8 | # [depth, width, max_channels] 9 | n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs 10 | s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs 11 | m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients, 32.7 GFLOPs 12 | l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients, 53.7 GFLOPs 13 | x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients, 83.3 GFLOPs 14 | 15 | # YOLOv8.0n-ghost backbone 16 | backbone: 17 | # [from, repeats, module, args] 18 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 19 | - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4 20 | - [-1, 3, C3Ghost, [128, True]] 21 | - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8 22 | - [-1, 6, C3Ghost, [256, True]] 23 | - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16 24 | - [-1, 6, C3Ghost, [512, True]] 25 | - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32 26 | - [-1, 3, C3Ghost, [1024, True]] 27 | - [-1, 1, SPPF, [1024, 5]] # 9 28 | 29 | # YOLOv8.0n head 30 | head: 31 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 32 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 33 | - [-1, 3, C3Ghost, [512]] # 12 34 | 35 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 36 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 37 | - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small) 38 | 39 | - [-1, 1, GhostConv, [256, 3, 2]] 40 | - [[-1, 12], 1, Concat, [1]] # cat head P4 41 | - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium) 42 | 43 | - [-1, 1, GhostConv, [512, 3, 2]] 44 | - [[-1, 9], 1, Concat, [1]] # cat head P5 45 | - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large) 46 | 47 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 48 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-obb.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0n head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 12], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 9], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 45 | 46 | - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-p2.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 768] 11 | l: [1.00, 1.00, 512] 12 | x: [1.00, 1.25, 512] 13 | 14 | # YOLOv8.0 backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0-p2 head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 39 | - [[-1, 2], 1, Concat, [1]] # cat backbone P2 40 | - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall) 41 | 42 | - [-1, 1, Conv, [128, 3, 2]] 43 | - [[-1, 15], 1, Concat, [1]] # cat head P3 44 | - [-1, 3, C2f, [256]] # 21 (P3/8-small) 45 | 46 | - [-1, 1, Conv, [256, 3, 2]] 47 | - [[-1, 12], 1, Concat, [1]] # cat head P4 48 | - [-1, 3, C2f, [512]] # 24 (P4/16-medium) 49 | 50 | - [-1, 1, Conv, [512, 3, 2]] 51 | - [[-1, 9], 1, Concat, [1]] # cat head P5 52 | - [-1, 3, C2f, [1024]] # 27 (P5/32-large) 53 | 54 | - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5) 55 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-p6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 768] 11 | l: [1.00, 1.00, 512] 12 | x: [1.00, 1.25, 512] 13 | 14 | # YOLOv8.0x6 backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [768, True]] 26 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 27 | - [-1, 3, C2f, [1024, True]] 28 | - [-1, 1, SPPF, [1024, 5]] # 11 29 | 30 | # YOLOv8.0x6 head 31 | head: 32 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 33 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 34 | - [-1, 3, C2, [768, False]] # 14 35 | 36 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 37 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 38 | - [-1, 3, C2, [512, False]] # 17 39 | 40 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 41 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 42 | - [-1, 3, C2, [256, False]] # 20 (P3/8-small) 43 | 44 | - [-1, 1, Conv, [256, 3, 2]] 45 | - [[-1, 17], 1, Concat, [1]] # cat head P4 46 | - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) 47 | 48 | - [-1, 1, Conv, [512, 3, 2]] 49 | - [[-1, 14], 1, Concat, [1]] # cat head P5 50 | - [-1, 3, C2, [768, False]] # 26 (P5/32-large) 51 | 52 | - [-1, 1, Conv, [768, 3, 2]] 53 | - [[-1, 11], 1, Concat, [1]] # cat head P6 54 | - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) 55 | 56 | - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) 57 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-pose-p6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose 3 | 4 | # Parameters 5 | nc: 1 # number of classes 6 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' 8 | # [depth, width, max_channels] 9 | n: [0.33, 0.25, 1024] 10 | s: [0.33, 0.50, 1024] 11 | m: [0.67, 0.75, 768] 12 | l: [1.00, 1.00, 512] 13 | x: [1.00, 1.25, 512] 14 | 15 | # YOLOv8.0x6 backbone 16 | backbone: 17 | # [from, repeats, module, args] 18 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 19 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 20 | - [-1, 3, C2f, [128, True]] 21 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 22 | - [-1, 6, C2f, [256, True]] 23 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 24 | - [-1, 6, C2f, [512, True]] 25 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 26 | - [-1, 3, C2f, [768, True]] 27 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 28 | - [-1, 3, C2f, [1024, True]] 29 | - [-1, 1, SPPF, [1024, 5]] # 11 30 | 31 | # YOLOv8.0x6 head 32 | head: 33 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 34 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 35 | - [-1, 3, C2, [768, False]] # 14 36 | 37 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 38 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 39 | - [-1, 3, C2, [512, False]] # 17 40 | 41 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 42 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 43 | - [-1, 3, C2, [256, False]] # 20 (P3/8-small) 44 | 45 | - [-1, 1, Conv, [256, 3, 2]] 46 | - [[-1, 17], 1, Concat, [1]] # cat head P4 47 | - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) 48 | 49 | - [-1, 1, Conv, [512, 3, 2]] 50 | - [[-1, 14], 1, Concat, [1]] # cat head P5 51 | - [-1, 3, C2, [768, False]] # 26 (P5/32-large) 52 | 53 | - [-1, 1, Conv, [768, 3, 2]] 54 | - [[-1, 11], 1, Concat, [1]] # cat head P6 55 | - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) 56 | 57 | - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6) 58 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-pose.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose 3 | 4 | # Parameters 5 | nc: 1 # number of classes 6 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n' 8 | # [depth, width, max_channels] 9 | n: [0.33, 0.25, 1024] 10 | s: [0.33, 0.50, 1024] 11 | m: [0.67, 0.75, 768] 12 | l: [1.00, 1.00, 512] 13 | x: [1.00, 1.25, 512] 14 | 15 | # YOLOv8.0n backbone 16 | backbone: 17 | # [from, repeats, module, args] 18 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 19 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 20 | - [-1, 3, C2f, [128, True]] 21 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 22 | - [-1, 6, C2f, [256, True]] 23 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 24 | - [-1, 6, C2f, [512, True]] 25 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 26 | - [-1, 3, C2f, [1024, True]] 27 | - [-1, 1, SPPF, [1024, 5]] # 9 28 | 29 | # YOLOv8.0n head 30 | head: 31 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 32 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 33 | - [-1, 3, C2f, [512]] # 12 34 | 35 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 36 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 37 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 38 | 39 | - [-1, 1, Conv, [256, 3, 2]] 40 | - [[-1, 12], 1, Concat, [1]] # cat head P4 41 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 42 | 43 | - [-1, 1, Conv, [512, 3, 2]] 44 | - [[-1, 9], 1, Concat, [1]] # cat head P5 45 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 46 | 47 | - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5) 48 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-rtdetr.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0n head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 12], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 9], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 45 | 46 | - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-seg-p6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 768] 11 | l: [1.00, 1.00, 512] 12 | x: [1.00, 1.25, 512] 13 | 14 | # YOLOv8.0x6 backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [768, True]] 26 | - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 27 | - [-1, 3, C2f, [1024, True]] 28 | - [-1, 1, SPPF, [1024, 5]] # 11 29 | 30 | # YOLOv8.0x6 head 31 | head: 32 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 33 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 34 | - [-1, 3, C2, [768, False]] # 14 35 | 36 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 37 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 38 | - [-1, 3, C2, [512, False]] # 17 39 | 40 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 41 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 42 | - [-1, 3, C2, [256, False]] # 20 (P3/8-small) 43 | 44 | - [-1, 1, Conv, [256, 3, 2]] 45 | - [[-1, 17], 1, Concat, [1]] # cat head P4 46 | - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) 47 | 48 | - [-1, 1, Conv, [512, 3, 2]] 49 | - [[-1, 14], 1, Concat, [1]] # cat head P5 50 | - [-1, 3, C2, [768, False]] # 26 (P5/32-large) 51 | 52 | - [-1, 1, Conv, [768, 3, 2]] 53 | - [[-1, 11], 1, Concat, [1]] # cat head P6 54 | - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) 55 | 56 | - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6) 57 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] 9 | s: [0.33, 0.50, 1024] 10 | m: [0.67, 0.75, 768] 11 | l: [1.00, 1.00, 512] 12 | x: [1.00, 1.25, 512] 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0n head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 12], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 9], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 45 | 46 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/models/v8/yolov8.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect 3 | 4 | # Parameters 5 | nc: 80 # number of classes 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' 7 | # [depth, width, max_channels] 8 | n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs 9 | s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs 10 | m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs 11 | l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs 12 | x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs 13 | 14 | # YOLOv8.0n backbone 15 | backbone: 16 | # [from, repeats, module, args] 17 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 18 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 19 | - [-1, 3, C2f, [128, True]] 20 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 21 | - [-1, 6, C2f, [256, True]] 22 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 23 | - [-1, 6, C2f, [512, True]] 24 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 25 | - [-1, 3, C2f, [1024, True]] 26 | - [-1, 1, SPPF, [1024, 5]] # 9 27 | 28 | # YOLOv8.0n head 29 | head: 30 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2f, [512]] # 12 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, "nearest"]] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 12], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 9], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 45 | 46 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 47 | -------------------------------------------------------------------------------- /ultralytics/cfg/trackers/botsort.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT 3 | 4 | tracker_type: botsort # tracker type, ['botsort', 'bytetrack'] 5 | track_high_thresh: 0.5 # threshold for the first association 6 | track_low_thresh: 0.1 # threshold for the second association 7 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks 8 | track_buffer: 30 # buffer to calculate the time when to remove tracks 9 | match_thresh: 0.8 # threshold for matching tracks 10 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) 11 | # mot20: False # for tracker evaluation(not used for now) 12 | 13 | # BoT-SORT settings 14 | gmc_method: sparseOptFlow # method of global motion compensation 15 | # ReID model related thresh (not supported yet) 16 | proximity_thresh: 0.5 17 | appearance_thresh: 0.25 18 | with_reid: False 19 | -------------------------------------------------------------------------------- /ultralytics/cfg/trackers/bytetrack.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | # Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack 3 | 4 | tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack'] 5 | track_high_thresh: 0.5 # threshold for the first association 6 | track_low_thresh: 0.1 # threshold for the second association 7 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks 8 | track_buffer: 30 # buffer to calculate the time when to remove tracks 9 | match_thresh: 0.8 # threshold for matching tracks 10 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) 11 | # mot20: False # for tracker evaluation(not used for now) 12 | -------------------------------------------------------------------------------- /ultralytics/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .base import BaseDataset 4 | from .build import build_dataloader, build_yolo_dataset, load_inference_source 5 | from .dataset import ClassificationDataset, SemanticDataset, YOLODataset 6 | 7 | __all__ = ( 8 | "BaseDataset", 9 | "ClassificationDataset", 10 | "SemanticDataset", 11 | "YOLODataset", 12 | "build_yolo_dataset", 13 | "build_dataloader", 14 | "load_inference_source", 15 | ) 16 | -------------------------------------------------------------------------------- /ultralytics/data/annotator.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from pathlib import Path 4 | 5 | from ultralytics import SAM, YOLO 6 | 7 | 8 | def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None): 9 | """ 10 | Automatically annotates images using a YOLO object detection model and a SAM segmentation model. 11 | 12 | Args: 13 | data (str): Path to a folder containing images to be annotated. 14 | det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. 15 | sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'. 16 | device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available). 17 | output_dir (str | None | optional): Directory to save the annotated results. 18 | Defaults to a 'labels' folder in the same directory as 'data'. 19 | 20 | Example: 21 | ```python 22 | from ultralytics.data.annotator import auto_annotate 23 | 24 | auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt') 25 | ``` 26 | """ 27 | det_model = YOLO(det_model) 28 | sam_model = SAM(sam_model) 29 | 30 | data = Path(data) 31 | if not output_dir: 32 | output_dir = data.parent / f"{data.stem}_auto_annotate_labels" 33 | Path(output_dir).mkdir(exist_ok=True, parents=True) 34 | 35 | det_results = det_model(data, stream=True, device=device) 36 | 37 | for result in det_results: 38 | class_ids = result.boxes.cls.int().tolist() # noqa 39 | if len(class_ids): 40 | boxes = result.boxes.xyxy # Boxes object for bbox outputs 41 | sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device) 42 | segments = sam_results[0].masks.xyn # noqa 43 | 44 | with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f: 45 | for i in range(len(segments)): 46 | s = segments[i] 47 | if len(s) == 0: 48 | continue 49 | segment = map(str, segments[i].reshape(-1).tolist()) 50 | f.write(f"{class_ids[i]} " + " ".join(segment) + "\n") 51 | -------------------------------------------------------------------------------- /ultralytics/data/explorer/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .utils import plot_query_result 4 | 5 | __all__ = ["plot_query_result"] 6 | -------------------------------------------------------------------------------- /ultralytics/data/explorer/gui/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | -------------------------------------------------------------------------------- /ultralytics/data/scripts/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ultralytics YOLO 🚀, AGPL-3.0 license 3 | # Download latest models from https://github.com/ultralytics/assets/releases 4 | # Example usage: bash ultralytics/data/scripts/download_weights.sh 5 | # parent 6 | # └── weights 7 | # ├── yolov8n.pt ← downloads here 8 | # ├── yolov8s.pt 9 | # └── ... 10 | 11 | python - < w - threshold, 2] = w # x2 26 | boxes[boxes[:, 3] > h - threshold, 3] = h # y2 27 | return boxes 28 | 29 | 30 | def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False): 31 | """ 32 | Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes. 33 | 34 | Args: 35 | box1 (torch.Tensor): (4, ) 36 | boxes (torch.Tensor): (n, 4) 37 | iou_thres (float): IoU threshold 38 | image_shape (tuple): (height, width) 39 | raw_output (bool): If True, return the raw IoU values instead of the indices 40 | 41 | Returns: 42 | high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres 43 | """ 44 | boxes = adjust_bboxes_to_image_border(boxes, image_shape) 45 | # Obtain coordinates for intersections 46 | x1 = torch.max(box1[0], boxes[:, 0]) 47 | y1 = torch.max(box1[1], boxes[:, 1]) 48 | x2 = torch.min(box1[2], boxes[:, 2]) 49 | y2 = torch.min(box1[3], boxes[:, 3]) 50 | 51 | # Compute the area of intersection 52 | intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) 53 | 54 | # Compute the area of both individual boxes 55 | box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) 56 | box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) 57 | 58 | # Compute the area of union 59 | union = box1_area + box2_area - intersection 60 | 61 | # Compute the IoU 62 | iou = intersection / union # Should be shape (n, ) 63 | if raw_output: 64 | return 0 if iou.numel() == 0 else iou 65 | 66 | # return indices of boxes with IoU > thres 67 | return torch.nonzero(iou > iou_thres).flatten() 68 | -------------------------------------------------------------------------------- /ultralytics/models/fastsam/val.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.models.yolo.segment import SegmentationValidator 4 | from ultralytics.utils.metrics import SegmentMetrics 5 | 6 | 7 | class FastSAMValidator(SegmentationValidator): 8 | """ 9 | Custom validation class for fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework. 10 | 11 | Extends the SegmentationValidator class, customizing the validation process specifically for fast SAM. This class 12 | sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled 13 | to avoid errors during validation. 14 | 15 | Attributes: 16 | dataloader: The data loader object used for validation. 17 | save_dir (str): The directory where validation results will be saved. 18 | pbar: A progress bar object. 19 | args: Additional arguments for customization. 20 | _callbacks: List of callback functions to be invoked during validation. 21 | """ 22 | 23 | def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None): 24 | """ 25 | Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics. 26 | 27 | Args: 28 | dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation. 29 | save_dir (Path, optional): Directory to save results. 30 | pbar (tqdm.tqdm): Progress bar for displaying progress. 31 | args (SimpleNamespace): Configuration for the validator. 32 | _callbacks (dict): Dictionary to store various callback functions. 33 | 34 | Notes: 35 | Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors. 36 | """ 37 | super().__init__(dataloader, save_dir, pbar, args, _callbacks) 38 | self.args.task = "segment" 39 | self.args.plots = False # disable ConfusionMatrix and other plots to avoid errors 40 | self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot) 41 | -------------------------------------------------------------------------------- /ultralytics/models/nas/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .model import NAS 4 | from .predict import NASPredictor 5 | from .val import NASValidator 6 | 7 | __all__ = "NASPredictor", "NASValidator", "NAS" 8 | -------------------------------------------------------------------------------- /ultralytics/models/nas/model.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """ 3 | YOLO-NAS model interface. 4 | 5 | Example: 6 | ```python 7 | from ultralytics import NAS 8 | 9 | model = NAS('yolo_nas_s') 10 | results = model.predict('ultralytics/assets/bus.jpg') 11 | ``` 12 | """ 13 | 14 | from pathlib import Path 15 | 16 | import torch 17 | 18 | from ultralytics.engine.model import Model 19 | from ultralytics.utils.torch_utils import model_info, smart_inference_mode 20 | from .predict import NASPredictor 21 | from .val import NASValidator 22 | 23 | 24 | class NAS(Model): 25 | """ 26 | YOLO NAS model for object detection. 27 | 28 | This class provides an interface for the YOLO-NAS models and extends the `Model` class from Ultralytics engine. 29 | It is designed to facilitate the task of object detection using pre-trained or custom-trained YOLO-NAS models. 30 | 31 | Example: 32 | ```python 33 | from ultralytics import NAS 34 | 35 | model = NAS('yolo_nas_s') 36 | results = model.predict('ultralytics/assets/bus.jpg') 37 | ``` 38 | 39 | Attributes: 40 | model (str): Path to the pre-trained model or model name. Defaults to 'yolo_nas_s.pt'. 41 | 42 | Note: 43 | YOLO-NAS models only support pre-trained models. Do not provide YAML configuration files. 44 | """ 45 | 46 | def __init__(self, model="yolo_nas_s.pt") -> None: 47 | """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model.""" 48 | assert Path(model).suffix not in (".yaml", ".yml"), "YOLO-NAS models only support pre-trained models." 49 | super().__init__(model, task="detect") 50 | 51 | @smart_inference_mode() 52 | def _load(self, weights: str, task: str): 53 | """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided.""" 54 | import super_gradients 55 | 56 | suffix = Path(weights).suffix 57 | if suffix == ".pt": 58 | self.model = torch.load(weights) 59 | elif suffix == "": 60 | self.model = super_gradients.training.models.get(weights, pretrained_weights="coco") 61 | # Standardize model 62 | self.model.fuse = lambda verbose=True: self.model 63 | self.model.stride = torch.tensor([32]) 64 | self.model.names = dict(enumerate(self.model._class_names)) 65 | self.model.is_fused = lambda: False # for info() 66 | self.model.yaml = {} # for info() 67 | self.model.pt_path = weights # for export() 68 | self.model.task = "detect" # for export() 69 | 70 | def info(self, detailed=False, verbose=True): 71 | """ 72 | Logs model info. 73 | 74 | Args: 75 | detailed (bool): Show detailed information about model. 76 | verbose (bool): Controls verbosity. 77 | """ 78 | return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640) 79 | 80 | @property 81 | def task_map(self): 82 | """Returns a dictionary mapping tasks to respective predictor and validator classes.""" 83 | return {"detect": {"predictor": NASPredictor, "validator": NASValidator}} 84 | -------------------------------------------------------------------------------- /ultralytics/models/nas/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import torch 4 | 5 | from ultralytics.engine.predictor import BasePredictor 6 | from ultralytics.engine.results import Results 7 | from ultralytics.utils import ops 8 | 9 | 10 | class NASPredictor(BasePredictor): 11 | """ 12 | Ultralytics YOLO NAS Predictor for object detection. 13 | 14 | This class extends the `BasePredictor` from Ultralytics engine and is responsible for post-processing the 15 | raw predictions generated by the YOLO NAS models. It applies operations like non-maximum suppression and 16 | scaling the bounding boxes to fit the original image dimensions. 17 | 18 | Attributes: 19 | args (Namespace): Namespace containing various configurations for post-processing. 20 | 21 | Example: 22 | ```python 23 | from ultralytics import NAS 24 | 25 | model = NAS('yolo_nas_s') 26 | predictor = model.predictor 27 | # Assumes that raw_preds, img, orig_imgs are available 28 | results = predictor.postprocess(raw_preds, img, orig_imgs) 29 | ``` 30 | 31 | Note: 32 | Typically, this class is not instantiated directly. It is used internally within the `NAS` class. 33 | """ 34 | 35 | def postprocess(self, preds_in, img, orig_imgs): 36 | """Postprocess predictions and returns a list of Results objects.""" 37 | 38 | # Cat boxes and class scores 39 | boxes = ops.xyxy2xywh(preds_in[0][0]) 40 | preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1) 41 | 42 | preds = ops.non_max_suppression( 43 | preds, 44 | self.args.conf, 45 | self.args.iou, 46 | agnostic=self.args.agnostic_nms, 47 | max_det=self.args.max_det, 48 | classes=self.args.classes, 49 | ) 50 | 51 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 52 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 53 | 54 | results = [] 55 | for i, pred in enumerate(preds): 56 | orig_img = orig_imgs[i] 57 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) 58 | img_path = self.batch[0][i] 59 | results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) 60 | return results 61 | -------------------------------------------------------------------------------- /ultralytics/models/nas/val.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import torch 4 | 5 | from ultralytics.models.yolo.detect import DetectionValidator 6 | from ultralytics.utils import ops 7 | 8 | __all__ = ["NASValidator"] 9 | 10 | 11 | class NASValidator(DetectionValidator): 12 | """ 13 | Ultralytics YOLO NAS Validator for object detection. 14 | 15 | Extends `DetectionValidator` from the Ultralytics models package and is designed to post-process the raw predictions 16 | generated by YOLO NAS models. It performs non-maximum suppression to remove overlapping and low-confidence boxes, 17 | ultimately producing the final detections. 18 | 19 | Attributes: 20 | args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds. 21 | lb (torch.Tensor): Optional tensor for multilabel NMS. 22 | 23 | Example: 24 | ```python 25 | from ultralytics import NAS 26 | 27 | model = NAS('yolo_nas_s') 28 | validator = model.validator 29 | # Assumes that raw_preds are available 30 | final_preds = validator.postprocess(raw_preds) 31 | ``` 32 | 33 | Note: 34 | This class is generally not instantiated directly but is used internally within the `NAS` class. 35 | """ 36 | 37 | def postprocess(self, preds_in): 38 | """Apply Non-maximum suppression to prediction outputs.""" 39 | boxes = ops.xyxy2xywh(preds_in[0][0]) 40 | preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1) 41 | return ops.non_max_suppression( 42 | preds, 43 | self.args.conf, 44 | self.args.iou, 45 | labels=self.lb, 46 | multi_label=False, 47 | agnostic=self.args.single_cls, 48 | max_det=self.args.max_det, 49 | max_time_img=0.5, 50 | ) 51 | -------------------------------------------------------------------------------- /ultralytics/models/rtdetr/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .model import RTDETR 4 | from .predict import RTDETRPredictor 5 | from .val import RTDETRValidator 6 | 7 | __all__ = "RTDETRPredictor", "RTDETRValidator", "RTDETR" 8 | -------------------------------------------------------------------------------- /ultralytics/models/rtdetr/model.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """ 3 | Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time 4 | performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient 5 | hybrid encoder and IoU-aware query selection for enhanced detection accuracy. 6 | 7 | For more information on RT-DETR, visit: https://arxiv.org/pdf/2304.08069.pdf 8 | """ 9 | 10 | from ultralytics.engine.model import Model 11 | from ultralytics.nn.tasks import RTDETRDetectionModel 12 | 13 | from .predict import RTDETRPredictor 14 | from .train import RTDETRTrainer 15 | from .val import RTDETRValidator 16 | 17 | 18 | class RTDETR(Model): 19 | """ 20 | Interface for Baidu's RT-DETR model. This Vision Transformer-based object detector provides real-time performance 21 | with high accuracy. It supports efficient hybrid encoding, IoU-aware query selection, and adaptable inference speed. 22 | 23 | Attributes: 24 | model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'. 25 | """ 26 | 27 | def __init__(self, model="rtdetr-l.pt") -> None: 28 | """ 29 | Initializes the RT-DETR model with the given pre-trained model file. Supports .pt and .yaml formats. 30 | 31 | Args: 32 | model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'. 33 | 34 | Raises: 35 | NotImplementedError: If the model file extension is not 'pt', 'yaml', or 'yml'. 36 | """ 37 | if model and model.split(".")[-1] not in ("pt", "yaml", "yml"): 38 | raise NotImplementedError("RT-DETR only supports creating from *.pt, *.yaml, or *.yml files.") 39 | super().__init__(model=model, task="detect") 40 | 41 | @property 42 | def task_map(self) -> dict: 43 | """ 44 | Returns a task map for RT-DETR, associating tasks with corresponding Ultralytics classes. 45 | 46 | Returns: 47 | dict: A dictionary mapping task names to Ultralytics task classes for the RT-DETR model. 48 | """ 49 | return { 50 | "detect": { 51 | "predictor": RTDETRPredictor, 52 | "validator": RTDETRValidator, 53 | "trainer": RTDETRTrainer, 54 | "model": RTDETRDetectionModel, 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /ultralytics/models/rtdetr/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import torch 4 | 5 | from ultralytics.data.augment import LetterBox 6 | from ultralytics.engine.predictor import BasePredictor 7 | from ultralytics.engine.results import Results 8 | from ultralytics.utils import ops 9 | 10 | 11 | class RTDETRPredictor(BasePredictor): 12 | """ 13 | RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions using 14 | Baidu's RT-DETR model. 15 | 16 | This class leverages the power of Vision Transformers to provide real-time object detection while maintaining 17 | high accuracy. It supports key features like efficient hybrid encoding and IoU-aware query selection. 18 | 19 | Example: 20 | ```python 21 | from ultralytics.utils import ASSETS 22 | from ultralytics.models.rtdetr import RTDETRPredictor 23 | 24 | args = dict(model='rtdetr-l.pt', source=ASSETS) 25 | predictor = RTDETRPredictor(overrides=args) 26 | predictor.predict_cli() 27 | ``` 28 | 29 | Attributes: 30 | imgsz (int): Image size for inference (must be square and scale-filled). 31 | args (dict): Argument overrides for the predictor. 32 | """ 33 | 34 | def postprocess(self, preds, img, orig_imgs): 35 | """ 36 | Postprocess the raw predictions from the model to generate bounding boxes and confidence scores. 37 | 38 | The method filters detections based on confidence and class if specified in `self.args`. 39 | 40 | Args: 41 | preds (torch.Tensor): Raw predictions from the model. 42 | img (torch.Tensor): Processed input images. 43 | orig_imgs (list or torch.Tensor): Original, unprocessed images. 44 | 45 | Returns: 46 | (list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores, 47 | and class labels. 48 | """ 49 | nd = preds[0].shape[-1] 50 | bboxes, scores = preds[0].split((4, nd - 4), dim=-1) 51 | 52 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 53 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 54 | 55 | results = [] 56 | for i, bbox in enumerate(bboxes): # (300, 4) 57 | bbox = ops.xywh2xyxy(bbox) 58 | score, cls = scores[i].max(-1, keepdim=True) # (300, 1) 59 | idx = score.squeeze(-1) > self.args.conf # (300, ) 60 | if self.args.classes is not None: 61 | idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx 62 | pred = torch.cat([bbox, score, cls], dim=-1)[idx] # filter 63 | orig_img = orig_imgs[i] 64 | oh, ow = orig_img.shape[:2] 65 | pred[..., [0, 2]] *= ow 66 | pred[..., [1, 3]] *= oh 67 | img_path = self.batch[0][i] 68 | results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) 69 | return results 70 | 71 | def pre_transform(self, im): 72 | """ 73 | Pre-transforms the input images before feeding them into the model for inference. The input images are 74 | letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scaleFilled. 75 | 76 | Args: 77 | im (list[np.ndarray] |torch.Tensor): Input images of shape (N,3,h,w) for tensor, [(h,w,3) x N] for list. 78 | 79 | Returns: 80 | (list): List of pre-transformed images ready for model inference. 81 | """ 82 | letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True) 83 | return [letterbox(image=x) for x in im] 84 | -------------------------------------------------------------------------------- /ultralytics/models/rtdetr/train.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from copy import copy 4 | 5 | import torch 6 | 7 | from ultralytics.models.yolo.detect import DetectionTrainer 8 | from ultralytics.nn.tasks import RTDETRDetectionModel 9 | from ultralytics.utils import RANK, colorstr 10 | from .val import RTDETRDataset, RTDETRValidator 11 | 12 | 13 | class RTDETRTrainer(DetectionTrainer): 14 | """ 15 | Trainer class for the RT-DETR model developed by Baidu for real-time object detection. Extends the DetectionTrainer 16 | class for YOLO to adapt to the specific features and architecture of RT-DETR. This model leverages Vision 17 | Transformers and has capabilities like IoU-aware query selection and adaptable inference speed. 18 | 19 | Notes: 20 | - F.grid_sample used in RT-DETR does not support the `deterministic=True` argument. 21 | - AMP training can lead to NaN outputs and may produce errors during bipartite graph matching. 22 | 23 | Example: 24 | ```python 25 | from ultralytics.models.rtdetr.train import RTDETRTrainer 26 | 27 | args = dict(model='rtdetr-l.yaml', data='coco8.yaml', imgsz=640, epochs=3) 28 | trainer = RTDETRTrainer(overrides=args) 29 | trainer.train() 30 | ``` 31 | """ 32 | 33 | def get_model(self, cfg=None, weights=None, verbose=True): 34 | """ 35 | Initialize and return an RT-DETR model for object detection tasks. 36 | 37 | Args: 38 | cfg (dict, optional): Model configuration. Defaults to None. 39 | weights (str, optional): Path to pre-trained model weights. Defaults to None. 40 | verbose (bool): Verbose logging if True. Defaults to True. 41 | 42 | Returns: 43 | (RTDETRDetectionModel): Initialized model. 44 | """ 45 | model = RTDETRDetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1) 46 | if weights: 47 | model.load(weights) 48 | return model 49 | 50 | def build_dataset(self, img_path, mode="val", batch=None): 51 | """ 52 | Build and return an RT-DETR dataset for training or validation. 53 | 54 | Args: 55 | img_path (str): Path to the folder containing images. 56 | mode (str): Dataset mode, either 'train' or 'val'. 57 | batch (int, optional): Batch size for rectangle training. Defaults to None. 58 | 59 | Returns: 60 | (RTDETRDataset): Dataset object for the specific mode. 61 | """ 62 | return RTDETRDataset( 63 | img_path=img_path, 64 | imgsz=self.args.imgsz, 65 | batch_size=batch, 66 | augment=mode == "train", 67 | hyp=self.args, 68 | rect=False, 69 | cache=self.args.cache or None, 70 | prefix=colorstr(f"{mode}: "), 71 | data=self.data, 72 | ) 73 | 74 | def get_validator(self): 75 | """ 76 | Returns a DetectionValidator suitable for RT-DETR model validation. 77 | 78 | Returns: 79 | (RTDETRValidator): Validator object for model validation. 80 | """ 81 | self.loss_names = "giou_loss", "cls_loss", "l1_loss" 82 | return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) 83 | 84 | def preprocess_batch(self, batch): 85 | """ 86 | Preprocess a batch of images. Scales and converts the images to float format. 87 | 88 | Args: 89 | batch (dict): Dictionary containing a batch of images, bboxes, and labels. 90 | 91 | Returns: 92 | (dict): Preprocessed batch. 93 | """ 94 | batch = super().preprocess_batch(batch) 95 | bs = len(batch["img"]) 96 | batch_idx = batch["batch_idx"] 97 | gt_bbox, gt_class = [], [] 98 | for i in range(bs): 99 | gt_bbox.append(batch["bboxes"][batch_idx == i].to(batch_idx.device)) 100 | gt_class.append(batch["cls"][batch_idx == i].to(device=batch_idx.device, dtype=torch.long)) 101 | return batch 102 | -------------------------------------------------------------------------------- /ultralytics/models/sam/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .model import SAM 4 | from .predict import Predictor 5 | 6 | __all__ = "SAM", "Predictor" # tuple or list 7 | -------------------------------------------------------------------------------- /ultralytics/models/sam/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | -------------------------------------------------------------------------------- /ultralytics/models/sam/modules/sam.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | from typing import List 10 | 11 | import torch 12 | from torch import nn 13 | 14 | from .decoders import MaskDecoder 15 | from .encoders import ImageEncoderViT, PromptEncoder 16 | 17 | 18 | class Sam(nn.Module): 19 | """ 20 | Sam (Segment Anything Model) is designed for object segmentation tasks. It uses image encoders to generate image 21 | embeddings, and prompt encoders to encode various types of input prompts. These embeddings are then used by the mask 22 | decoder to predict object masks. 23 | 24 | Attributes: 25 | mask_threshold (float): Threshold value for mask prediction. 26 | image_format (str): Format of the input image, default is 'RGB'. 27 | image_encoder (ImageEncoderViT): The backbone used to encode the image into embeddings. 28 | prompt_encoder (PromptEncoder): Encodes various types of input prompts. 29 | mask_decoder (MaskDecoder): Predicts object masks from the image and prompt embeddings. 30 | pixel_mean (List[float]): Mean pixel values for image normalization. 31 | pixel_std (List[float]): Standard deviation values for image normalization. 32 | """ 33 | 34 | mask_threshold: float = 0.0 35 | image_format: str = "RGB" 36 | 37 | def __init__( 38 | self, 39 | image_encoder: ImageEncoderViT, 40 | prompt_encoder: PromptEncoder, 41 | mask_decoder: MaskDecoder, 42 | pixel_mean: List[float] = (123.675, 116.28, 103.53), 43 | pixel_std: List[float] = (58.395, 57.12, 57.375), 44 | ) -> None: 45 | """ 46 | Initialize the Sam class to predict object masks from an image and input prompts. 47 | 48 | Note: 49 | All forward() operations moved to SAMPredictor. 50 | 51 | Args: 52 | image_encoder (ImageEncoderViT): The backbone used to encode the image into image embeddings. 53 | prompt_encoder (PromptEncoder): Encodes various types of input prompts. 54 | mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts. 55 | pixel_mean (List[float], optional): Mean values for normalizing pixels in the input image. Defaults to 56 | (123.675, 116.28, 103.53). 57 | pixel_std (List[float], optional): Std values for normalizing pixels in the input image. Defaults to 58 | (58.395, 57.12, 57.375). 59 | """ 60 | super().__init__() 61 | self.image_encoder = image_encoder 62 | self.prompt_encoder = prompt_encoder 63 | self.mask_decoder = mask_decoder 64 | self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False) 65 | self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False) 66 | -------------------------------------------------------------------------------- /ultralytics/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.models.yolo import classify, detect, obb, pose, segment 4 | 5 | from .model import YOLO 6 | 7 | __all__ = "classify", "segment", "detect", "pose", "obb", "YOLO" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/classify/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.models.yolo.classify.predict import ClassificationPredictor 4 | from ultralytics.models.yolo.classify.train import ClassificationTrainer 5 | from ultralytics.models.yolo.classify.val import ClassificationValidator 6 | 7 | __all__ = "ClassificationPredictor", "ClassificationTrainer", "ClassificationValidator" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/classify/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import cv2 4 | import torch 5 | from PIL import Image 6 | 7 | from ultralytics.engine.predictor import BasePredictor 8 | from ultralytics.engine.results import Results 9 | from ultralytics.utils import DEFAULT_CFG, ops 10 | 11 | 12 | class ClassificationPredictor(BasePredictor): 13 | """ 14 | A class extending the BasePredictor class for prediction based on a classification model. 15 | 16 | Notes: 17 | - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'. 18 | 19 | Example: 20 | ```python 21 | from ultralytics.utils import ASSETS 22 | from ultralytics.models.yolo.classify import ClassificationPredictor 23 | 24 | args = dict(model='yolov8n-cls.pt', source=ASSETS) 25 | predictor = ClassificationPredictor(overrides=args) 26 | predictor.predict_cli() 27 | ``` 28 | """ 29 | 30 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 31 | """Initializes ClassificationPredictor setting the task to 'classify'.""" 32 | super().__init__(cfg, overrides, _callbacks) 33 | self.args.task = "classify" 34 | self._legacy_transform_name = "ultralytics.yolo.data.augment.ToTensor" 35 | 36 | def preprocess(self, img): 37 | """Converts input image to model-compatible data type.""" 38 | if not isinstance(img, torch.Tensor): 39 | is_legacy_transform = any( 40 | self._legacy_transform_name in str(transform) for transform in self.transforms.transforms 41 | ) 42 | if is_legacy_transform: # to handle legacy transforms 43 | img = torch.stack([self.transforms(im) for im in img], dim=0) 44 | else: 45 | img = torch.stack( 46 | [self.transforms(Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))) for im in img], dim=0 47 | ) 48 | img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device) 49 | return img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 50 | 51 | def postprocess(self, preds, img, orig_imgs): 52 | """Post-processes predictions to return Results objects.""" 53 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 54 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 55 | 56 | results = [] 57 | for i, pred in enumerate(preds): 58 | orig_img = orig_imgs[i] 59 | img_path = self.batch[0][i] 60 | results.append(Results(orig_img, path=img_path, names=self.model.names, probs=pred)) 61 | return results 62 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/detect/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .predict import DetectionPredictor 4 | from .train import DetectionTrainer 5 | from .val import DetectionValidator 6 | 7 | __all__ = "DetectionPredictor", "DetectionTrainer", "DetectionValidator" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/detect/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.engine.predictor import BasePredictor 4 | from ultralytics.engine.results import Results 5 | from ultralytics.utils import ops 6 | 7 | 8 | class DetectionPredictor(BasePredictor): 9 | """ 10 | A class extending the BasePredictor class for prediction based on a detection model. 11 | 12 | Example: 13 | ```python 14 | from ultralytics.utils import ASSETS 15 | from ultralytics.models.yolo.detect import DetectionPredictor 16 | 17 | args = dict(model='yolov8n.pt', source=ASSETS) 18 | predictor = DetectionPredictor(overrides=args) 19 | predictor.predict_cli() 20 | ``` 21 | """ 22 | 23 | def postprocess(self, preds, img, orig_imgs): 24 | """Post-processes predictions and returns a list of Results objects.""" 25 | preds = ops.non_max_suppression( 26 | preds, 27 | self.args.conf, 28 | self.args.iou, 29 | agnostic=self.args.agnostic_nms, 30 | max_det=self.args.max_det, 31 | classes=self.args.classes, 32 | ) 33 | 34 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 35 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 36 | 37 | results = [] 38 | for i, pred in enumerate(preds): 39 | orig_img = orig_imgs[i] 40 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) 41 | img_path = self.batch[0][i] 42 | results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) 43 | return results 44 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/model.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.engine.model import Model 4 | from ultralytics.models import yolo 5 | from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel 6 | 7 | 8 | class YOLO(Model): 9 | """YOLO (You Only Look Once) object detection model.""" 10 | 11 | @property 12 | def task_map(self): 13 | """Map head to model, trainer, validator, and predictor classes.""" 14 | return { 15 | "classify": { 16 | "model": ClassificationModel, 17 | "trainer": yolo.classify.ClassificationTrainer, 18 | "validator": yolo.classify.ClassificationValidator, 19 | "predictor": yolo.classify.ClassificationPredictor, 20 | }, 21 | "detect": { 22 | "model": DetectionModel, 23 | "trainer": yolo.detect.DetectionTrainer, 24 | "validator": yolo.detect.DetectionValidator, 25 | "predictor": yolo.detect.DetectionPredictor, 26 | }, 27 | "segment": { 28 | "model": SegmentationModel, 29 | "trainer": yolo.segment.SegmentationTrainer, 30 | "validator": yolo.segment.SegmentationValidator, 31 | "predictor": yolo.segment.SegmentationPredictor, 32 | }, 33 | "pose": { 34 | "model": PoseModel, 35 | "trainer": yolo.pose.PoseTrainer, 36 | "validator": yolo.pose.PoseValidator, 37 | "predictor": yolo.pose.PosePredictor, 38 | }, 39 | "obb": { 40 | "model": OBBModel, 41 | "trainer": yolo.obb.OBBTrainer, 42 | "validator": yolo.obb.OBBValidator, 43 | "predictor": yolo.obb.OBBPredictor, 44 | }, 45 | } 46 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/obb/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .predict import OBBPredictor 4 | from .train import OBBTrainer 5 | from .val import OBBValidator 6 | 7 | __all__ = "OBBPredictor", "OBBTrainer", "OBBValidator" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/obb/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import torch 4 | 5 | from ultralytics.engine.results import Results 6 | from ultralytics.models.yolo.detect.predict import DetectionPredictor 7 | from ultralytics.utils import DEFAULT_CFG, ops 8 | 9 | 10 | class OBBPredictor(DetectionPredictor): 11 | """ 12 | A class extending the DetectionPredictor class for prediction based on an Oriented Bounding Box (OBB) model. 13 | 14 | Example: 15 | ```python 16 | from ultralytics.utils import ASSETS 17 | from ultralytics.models.yolo.obb import OBBPredictor 18 | 19 | args = dict(model='yolov8n-obb.pt', source=ASSETS) 20 | predictor = OBBPredictor(overrides=args) 21 | predictor.predict_cli() 22 | ``` 23 | """ 24 | 25 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 26 | """Initializes OBBPredictor with optional model and data configuration overrides.""" 27 | super().__init__(cfg, overrides, _callbacks) 28 | self.args.task = "obb" 29 | 30 | def postprocess(self, preds, img, orig_imgs): 31 | """Post-processes predictions and returns a list of Results objects.""" 32 | preds = ops.non_max_suppression( 33 | preds, 34 | self.args.conf, 35 | self.args.iou, 36 | agnostic=self.args.agnostic_nms, 37 | max_det=self.args.max_det, 38 | nc=len(self.model.names), 39 | classes=self.args.classes, 40 | rotated=True, 41 | ) 42 | 43 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 44 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 45 | 46 | results = [] 47 | for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]): 48 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape, xywh=True) 49 | # xywh, r, conf, cls 50 | obb = torch.cat([pred[:, :4], pred[:, -1:], pred[:, 4:6]], dim=-1) 51 | results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb)) 52 | return results 53 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/obb/train.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from copy import copy 4 | 5 | from ultralytics.models import yolo 6 | from ultralytics.nn.tasks import OBBModel 7 | from ultralytics.utils import DEFAULT_CFG, RANK 8 | 9 | 10 | class OBBTrainer(yolo.detect.DetectionTrainer): 11 | """ 12 | A class extending the DetectionTrainer class for training based on an Oriented Bounding Box (OBB) model. 13 | 14 | Example: 15 | ```python 16 | from ultralytics.models.yolo.obb import OBBTrainer 17 | 18 | args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3) 19 | trainer = OBBTrainer(overrides=args) 20 | trainer.train() 21 | ``` 22 | """ 23 | 24 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 25 | """Initialize a OBBTrainer object with given arguments.""" 26 | if overrides is None: 27 | overrides = {} 28 | overrides["task"] = "obb" 29 | super().__init__(cfg, overrides, _callbacks) 30 | 31 | def get_model(self, cfg=None, weights=None, verbose=True): 32 | """Return OBBModel initialized with specified config and weights.""" 33 | model = OBBModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1) 34 | if weights: 35 | model.load(weights) 36 | 37 | return model 38 | 39 | def get_validator(self): 40 | """Return an instance of OBBValidator for validation of YOLO model.""" 41 | self.loss_names = "box_loss", "cls_loss", "dfl_loss" 42 | return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) 43 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/pose/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .predict import PosePredictor 4 | from .train import PoseTrainer 5 | from .val import PoseValidator 6 | 7 | __all__ = "PoseTrainer", "PoseValidator", "PosePredictor" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/pose/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.engine.results import Results 4 | from ultralytics.models.yolo.detect.predict import DetectionPredictor 5 | from ultralytics.utils import DEFAULT_CFG, LOGGER, ops 6 | 7 | 8 | class PosePredictor(DetectionPredictor): 9 | """ 10 | A class extending the DetectionPredictor class for prediction based on a pose model. 11 | 12 | Example: 13 | ```python 14 | from ultralytics.utils import ASSETS 15 | from ultralytics.models.yolo.pose import PosePredictor 16 | 17 | args = dict(model='yolov8n-pose.pt', source=ASSETS) 18 | predictor = PosePredictor(overrides=args) 19 | predictor.predict_cli() 20 | ``` 21 | """ 22 | 23 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 24 | """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device.""" 25 | super().__init__(cfg, overrides, _callbacks) 26 | self.args.task = "pose" 27 | if isinstance(self.args.device, str) and self.args.device.lower() == "mps": 28 | LOGGER.warning( 29 | "WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. " 30 | "See https://github.com/ultralytics/ultralytics/issues/4031." 31 | ) 32 | 33 | def postprocess(self, preds, img, orig_imgs): 34 | """Return detection results for a given input image or list of images.""" 35 | preds = ops.non_max_suppression( 36 | preds, 37 | self.args.conf, 38 | self.args.iou, 39 | agnostic=self.args.agnostic_nms, 40 | max_det=self.args.max_det, 41 | classes=self.args.classes, 42 | nc=len(self.model.names), 43 | ) 44 | 45 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 46 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 47 | 48 | results = [] 49 | for i, pred in enumerate(preds): 50 | orig_img = orig_imgs[i] 51 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round() 52 | pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] 53 | pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape) 54 | img_path = self.batch[0][i] 55 | results.append( 56 | Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts) 57 | ) 58 | return results 59 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/pose/train.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from copy import copy 4 | 5 | from ultralytics.models import yolo 6 | from ultralytics.nn.tasks import PoseModel 7 | from ultralytics.utils import DEFAULT_CFG, LOGGER 8 | from ultralytics.utils.plotting import plot_images, plot_results 9 | 10 | 11 | class PoseTrainer(yolo.detect.DetectionTrainer): 12 | """ 13 | A class extending the DetectionTrainer class for training based on a pose model. 14 | 15 | Example: 16 | ```python 17 | from ultralytics.models.yolo.pose import PoseTrainer 18 | 19 | args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml', epochs=3) 20 | trainer = PoseTrainer(overrides=args) 21 | trainer.train() 22 | ``` 23 | """ 24 | 25 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 26 | """Initialize a PoseTrainer object with specified configurations and overrides.""" 27 | if overrides is None: 28 | overrides = {} 29 | overrides["task"] = "pose" 30 | super().__init__(cfg, overrides, _callbacks) 31 | 32 | if isinstance(self.args.device, str) and self.args.device.lower() == "mps": 33 | LOGGER.warning( 34 | "WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. " 35 | "See https://github.com/ultralytics/ultralytics/issues/4031." 36 | ) 37 | 38 | def get_model(self, cfg=None, weights=None, verbose=True): 39 | """Get pose estimation model with specified configuration and weights.""" 40 | model = PoseModel(cfg, ch=3, nc=self.data["nc"], data_kpt_shape=self.data["kpt_shape"], verbose=verbose) 41 | if weights: 42 | model.load(weights) 43 | 44 | return model 45 | 46 | def set_model_attributes(self): 47 | """Sets keypoints shape attribute of PoseModel.""" 48 | super().set_model_attributes() 49 | self.model.kpt_shape = self.data["kpt_shape"] 50 | 51 | def get_validator(self): 52 | """Returns an instance of the PoseValidator class for validation.""" 53 | self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss" 54 | return yolo.pose.PoseValidator( 55 | self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks 56 | ) 57 | 58 | def plot_training_samples(self, batch, ni): 59 | """Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints.""" 60 | images = batch["img"] 61 | kpts = batch["keypoints"] 62 | cls = batch["cls"].squeeze(-1) 63 | bboxes = batch["bboxes"] 64 | paths = batch["im_file"] 65 | batch_idx = batch["batch_idx"] 66 | plot_images( 67 | images, 68 | batch_idx, 69 | cls, 70 | bboxes, 71 | kpts=kpts, 72 | paths=paths, 73 | fname=self.save_dir / f"train_batch{ni}.jpg", 74 | on_plot=self.on_plot, 75 | ) 76 | 77 | def plot_metrics(self): 78 | """Plots training/val metrics.""" 79 | plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png 80 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/segment/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .predict import SegmentationPredictor 4 | from .train import SegmentationTrainer 5 | from .val import SegmentationValidator 6 | 7 | __all__ = "SegmentationPredictor", "SegmentationTrainer", "SegmentationValidator" 8 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/segment/predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.engine.results import Results 4 | from ultralytics.models.yolo.detect.predict import DetectionPredictor 5 | from ultralytics.utils import DEFAULT_CFG, ops 6 | 7 | 8 | class SegmentationPredictor(DetectionPredictor): 9 | """ 10 | A class extending the DetectionPredictor class for prediction based on a segmentation model. 11 | 12 | Example: 13 | ```python 14 | from ultralytics.utils import ASSETS 15 | from ultralytics.models.yolo.segment import SegmentationPredictor 16 | 17 | args = dict(model='yolov8n-seg.pt', source=ASSETS) 18 | predictor = SegmentationPredictor(overrides=args) 19 | predictor.predict_cli() 20 | ``` 21 | """ 22 | 23 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 24 | """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks.""" 25 | super().__init__(cfg, overrides, _callbacks) 26 | self.args.task = "segment" 27 | 28 | def postprocess(self, preds, img, orig_imgs): 29 | """Applies non-max suppression and processes detections for each image in an input batch.""" 30 | p = ops.non_max_suppression( 31 | preds[0], 32 | self.args.conf, 33 | self.args.iou, 34 | agnostic=self.args.agnostic_nms, 35 | max_det=self.args.max_det, 36 | nc=len(self.model.names), 37 | classes=self.args.classes, 38 | ) 39 | 40 | if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list 41 | orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) 42 | 43 | results = [] 44 | proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported 45 | for i, pred in enumerate(p): 46 | orig_img = orig_imgs[i] 47 | img_path = self.batch[0][i] 48 | if not len(pred): # save empty boxes 49 | masks = None 50 | elif self.args.retina_masks: 51 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) 52 | masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC 53 | else: 54 | masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC 55 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) 56 | results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) 57 | return results 58 | -------------------------------------------------------------------------------- /ultralytics/models/yolo/segment/train.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from copy import copy 4 | 5 | from ultralytics.models import yolo 6 | from ultralytics.nn.tasks import SegmentationModel 7 | from ultralytics.utils import DEFAULT_CFG, RANK 8 | from ultralytics.utils.plotting import plot_images, plot_results 9 | 10 | 11 | class SegmentationTrainer(yolo.detect.DetectionTrainer): 12 | """ 13 | A class extending the DetectionTrainer class for training based on a segmentation model. 14 | 15 | Example: 16 | ```python 17 | from ultralytics.models.yolo.segment import SegmentationTrainer 18 | 19 | args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3) 20 | trainer = SegmentationTrainer(overrides=args) 21 | trainer.train() 22 | ``` 23 | """ 24 | 25 | def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): 26 | """Initialize a SegmentationTrainer object with given arguments.""" 27 | if overrides is None: 28 | overrides = {} 29 | overrides["task"] = "segment" 30 | super().__init__(cfg, overrides, _callbacks) 31 | 32 | def get_model(self, cfg=None, weights=None, verbose=True): 33 | """Return SegmentationModel initialized with specified config and weights.""" 34 | model = SegmentationModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1) 35 | if weights: 36 | model.load(weights) 37 | 38 | return model 39 | 40 | def get_validator(self): 41 | """Return an instance of SegmentationValidator for validation of YOLO model.""" 42 | self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss" 43 | return yolo.segment.SegmentationValidator( 44 | self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks 45 | ) 46 | 47 | def plot_training_samples(self, batch, ni): 48 | """Creates a plot of training sample images with labels and box coordinates.""" 49 | plot_images( 50 | batch["img"], 51 | batch["batch_idx"], 52 | batch["cls"].squeeze(-1), 53 | batch["bboxes"], 54 | masks=batch["masks"], 55 | paths=batch["im_file"], 56 | fname=self.save_dir / f"train_batch{ni}.jpg", 57 | on_plot=self.on_plot, 58 | ) 59 | 60 | def plot_metrics(self): 61 | """Plots training/val metrics.""" 62 | plot_results(file=self.csv, segment=True, on_plot=self.on_plot) # save results.png 63 | -------------------------------------------------------------------------------- /ultralytics/nn/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .tasks import ( 4 | BaseModel, 5 | ClassificationModel, 6 | DetectionModel, 7 | SegmentationModel, 8 | attempt_load_one_weight, 9 | attempt_load_weights, 10 | guess_model_scale, 11 | guess_model_task, 12 | parse_model, 13 | torch_safe_load, 14 | yaml_model_load, 15 | ) 16 | 17 | __all__ = ( 18 | "attempt_load_one_weight", 19 | "attempt_load_weights", 20 | "parse_model", 21 | "yaml_model_load", 22 | "guess_model_task", 23 | "guess_model_scale", 24 | "torch_safe_load", 25 | "DetectionModel", 26 | "SegmentationModel", 27 | "ClassificationModel", 28 | "BaseModel", 29 | ) 30 | -------------------------------------------------------------------------------- /ultralytics/nn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """ 3 | Ultralytics modules. 4 | 5 | Example: 6 | Visualize a module with Netron. 7 | ```python 8 | from ultralytics.nn.modules import * 9 | import torch 10 | import os 11 | 12 | x = torch.ones(1, 128, 40, 40) 13 | m = Conv(128, 128) 14 | f = f'{m._get_name()}.onnx' 15 | torch.onnx.export(m, x, f) 16 | os.system(f'onnxsim {f} {f} && open {f}') 17 | ``` 18 | """ 19 | 20 | from .block import ( 21 | C1, 22 | C2, 23 | C3, 24 | C3TR, 25 | DFL, 26 | SPP, 27 | SPPF, 28 | Bottleneck, 29 | BottleneckCSP, 30 | C2f, 31 | C3Ghost, 32 | C3x, 33 | GhostBottleneck, 34 | HGBlock, 35 | HGStem, 36 | Proto, 37 | RepC3, 38 | ResNetLayer, 39 | ) 40 | from .conv import ( 41 | CBAM, 42 | ChannelAttention, 43 | Concat, 44 | Conv, 45 | Conv2, 46 | ConvTranspose, 47 | DWConv, 48 | DWConvTranspose2d, 49 | Focus, 50 | GhostConv, 51 | LightConv, 52 | RepConv, 53 | SpatialAttention, 54 | ) 55 | from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment 56 | from .transformer import ( 57 | AIFI, 58 | MLP, 59 | DeformableTransformerDecoder, 60 | DeformableTransformerDecoderLayer, 61 | LayerNorm2d, 62 | MLPBlock, 63 | MSDeformAttn, 64 | TransformerBlock, 65 | TransformerEncoderLayer, 66 | TransformerLayer, 67 | ) 68 | 69 | __all__ = ( 70 | "Conv", 71 | "Conv2", 72 | "LightConv", 73 | "RepConv", 74 | "DWConv", 75 | "DWConvTranspose2d", 76 | "ConvTranspose", 77 | "Focus", 78 | "GhostConv", 79 | "ChannelAttention", 80 | "SpatialAttention", 81 | "CBAM", 82 | "Concat", 83 | "TransformerLayer", 84 | "TransformerBlock", 85 | "MLPBlock", 86 | "LayerNorm2d", 87 | "DFL", 88 | "HGBlock", 89 | "HGStem", 90 | "SPP", 91 | "SPPF", 92 | "C1", 93 | "C2", 94 | "C3", 95 | "C2f", 96 | "C3x", 97 | "C3TR", 98 | "C3Ghost", 99 | "GhostBottleneck", 100 | "Bottleneck", 101 | "BottleneckCSP", 102 | "Proto", 103 | "Detect", 104 | "Segment", 105 | "Pose", 106 | "Classify", 107 | "TransformerEncoderLayer", 108 | "RepC3", 109 | "RTDETRDecoder", 110 | "AIFI", 111 | "DeformableTransformerDecoder", 112 | "DeformableTransformerDecoderLayer", 113 | "MSDeformAttn", 114 | "MLP", 115 | "ResNetLayer", 116 | "OBB", 117 | ) 118 | -------------------------------------------------------------------------------- /ultralytics/nn/modules/utils.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """Module utils.""" 3 | 4 | import copy 5 | import math 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torch.nn.init import uniform_ 12 | 13 | __all__ = "multi_scale_deformable_attn_pytorch", "inverse_sigmoid" 14 | 15 | 16 | def _get_clones(module, n): 17 | """Create a list of cloned modules from the given module.""" 18 | return nn.ModuleList([copy.deepcopy(module) for _ in range(n)]) 19 | 20 | 21 | def bias_init_with_prob(prior_prob=0.01): 22 | """Initialize conv/fc bias value according to a given probability value.""" 23 | return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init 24 | 25 | 26 | def linear_init(module): 27 | """Initialize the weights and biases of a linear module.""" 28 | bound = 1 / math.sqrt(module.weight.shape[0]) 29 | uniform_(module.weight, -bound, bound) 30 | if hasattr(module, "bias") and module.bias is not None: 31 | uniform_(module.bias, -bound, bound) 32 | 33 | 34 | def inverse_sigmoid(x, eps=1e-5): 35 | """Calculate the inverse sigmoid function for a tensor.""" 36 | x = x.clamp(min=0, max=1) 37 | x1 = x.clamp(min=eps) 38 | x2 = (1 - x).clamp(min=eps) 39 | return torch.log(x1 / x2) 40 | 41 | 42 | def multi_scale_deformable_attn_pytorch( 43 | value: torch.Tensor, 44 | value_spatial_shapes: torch.Tensor, 45 | sampling_locations: torch.Tensor, 46 | attention_weights: torch.Tensor, 47 | ) -> torch.Tensor: 48 | """ 49 | Multi-scale deformable attention. 50 | 51 | https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py 52 | """ 53 | 54 | bs, _, num_heads, embed_dims = value.shape 55 | _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape 56 | value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) 57 | sampling_grids = 2 * sampling_locations - 1 58 | sampling_value_list = [] 59 | for level, (H_, W_) in enumerate(value_spatial_shapes): 60 | # bs, H_*W_, num_heads, embed_dims -> 61 | # bs, H_*W_, num_heads*embed_dims -> 62 | # bs, num_heads*embed_dims, H_*W_ -> 63 | # bs*num_heads, embed_dims, H_, W_ 64 | value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_) 65 | # bs, num_queries, num_heads, num_points, 2 -> 66 | # bs, num_heads, num_queries, num_points, 2 -> 67 | # bs*num_heads, num_queries, num_points, 2 68 | sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1) 69 | # bs*num_heads, embed_dims, num_queries, num_points 70 | sampling_value_l_ = F.grid_sample( 71 | value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False 72 | ) 73 | sampling_value_list.append(sampling_value_l_) 74 | # (bs, num_queries, num_heads, num_levels, num_points) -> 75 | # (bs, num_heads, num_queries, num_levels, num_points) -> 76 | # (bs, num_heads, 1, num_queries, num_levels*num_points) 77 | attention_weights = attention_weights.transpose(1, 2).reshape( 78 | bs * num_heads, 1, num_queries, num_levels * num_points 79 | ) 80 | output = ( 81 | (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights) 82 | .sum(-1) 83 | .view(bs, num_heads * embed_dims, num_queries) 84 | ) 85 | return output.transpose(1, 2).contiguous() 86 | -------------------------------------------------------------------------------- /ultralytics/solutions/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | -------------------------------------------------------------------------------- /ultralytics/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .bot_sort import BOTSORT 4 | from .byte_tracker import BYTETracker 5 | from .track import register_tracker 6 | 7 | __all__ = "register_tracker", "BOTSORT", "BYTETracker" # allow simpler import 8 | -------------------------------------------------------------------------------- /ultralytics/trackers/basetrack.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """This module defines the base classes and structures for object tracking in YOLO.""" 3 | 4 | from collections import OrderedDict 5 | 6 | import numpy as np 7 | 8 | 9 | class TrackState: 10 | """ 11 | Enumeration class representing the possible states of an object being tracked. 12 | 13 | Attributes: 14 | New (int): State when the object is newly detected. 15 | Tracked (int): State when the object is successfully tracked in subsequent frames. 16 | Lost (int): State when the object is no longer tracked. 17 | Removed (int): State when the object is removed from tracking. 18 | """ 19 | 20 | New = 0 21 | Tracked = 1 22 | Lost = 2 23 | Removed = 3 24 | 25 | 26 | class BaseTrack: 27 | """ 28 | Base class for object tracking, providing foundational attributes and methods. 29 | 30 | Attributes: 31 | _count (int): Class-level counter for unique track IDs. 32 | track_id (int): Unique identifier for the track. 33 | is_activated (bool): Flag indicating whether the track is currently active. 34 | state (TrackState): Current state of the track. 35 | history (OrderedDict): Ordered history of the track's states. 36 | features (list): List of features extracted from the object for tracking. 37 | curr_feature (any): The current feature of the object being tracked. 38 | score (float): The confidence score of the tracking. 39 | start_frame (int): The frame number where tracking started. 40 | frame_id (int): The most recent frame ID processed by the track. 41 | time_since_update (int): Frames passed since the last update. 42 | location (tuple): The location of the object in the context of multi-camera tracking. 43 | 44 | Methods: 45 | end_frame: Returns the ID of the last frame where the object was tracked. 46 | next_id: Increments and returns the next global track ID. 47 | activate: Abstract method to activate the track. 48 | predict: Abstract method to predict the next state of the track. 49 | update: Abstract method to update the track with new data. 50 | mark_lost: Marks the track as lost. 51 | mark_removed: Marks the track as removed. 52 | reset_id: Resets the global track ID counter. 53 | """ 54 | 55 | _count = 0 56 | 57 | def __init__(self): 58 | """Initializes a new track with unique ID and foundational tracking attributes.""" 59 | self.track_id = 0 60 | self.is_activated = False 61 | self.state = TrackState.New 62 | self.history = OrderedDict() 63 | self.features = [] 64 | self.curr_feature = None 65 | self.score = 0 66 | self.start_frame = 0 67 | self.frame_id = 0 68 | self.time_since_update = 0 69 | self.location = (np.inf, np.inf) 70 | 71 | @property 72 | def end_frame(self): 73 | """Return the last frame ID of the track.""" 74 | return self.frame_id 75 | 76 | @staticmethod 77 | def next_id(): 78 | """Increment and return the global track ID counter.""" 79 | BaseTrack._count += 1 80 | return BaseTrack._count 81 | 82 | def activate(self, *args): 83 | """Abstract method to activate the track with provided arguments.""" 84 | raise NotImplementedError 85 | 86 | def predict(self): 87 | """Abstract method to predict the next state of the track.""" 88 | raise NotImplementedError 89 | 90 | def update(self, *args, **kwargs): 91 | """Abstract method to update the track with new observations.""" 92 | raise NotImplementedError 93 | 94 | def mark_lost(self): 95 | """Mark the track as lost.""" 96 | self.state = TrackState.Lost 97 | 98 | def mark_removed(self): 99 | """Mark the track as removed.""" 100 | self.state = TrackState.Removed 101 | 102 | @staticmethod 103 | def reset_id(): 104 | """Reset the global track ID counter.""" 105 | BaseTrack._count = 0 106 | -------------------------------------------------------------------------------- /ultralytics/trackers/track.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from functools import partial 4 | from pathlib import Path 5 | 6 | import torch 7 | 8 | from ultralytics.utils import IterableSimpleNamespace, yaml_load 9 | from ultralytics.utils.checks import check_yaml 10 | from .bot_sort import BOTSORT 11 | from .byte_tracker import BYTETracker 12 | 13 | # A mapping of tracker types to corresponding tracker classes 14 | TRACKER_MAP = {"bytetrack": BYTETracker, "botsort": BOTSORT} 15 | 16 | 17 | def on_predict_start(predictor: object, persist: bool = False) -> None: 18 | """ 19 | Initialize trackers for object tracking during prediction. 20 | 21 | Args: 22 | predictor (object): The predictor object to initialize trackers for. 23 | persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False. 24 | 25 | Raises: 26 | AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'. 27 | """ 28 | if predictor.args.task == "obb": 29 | raise NotImplementedError("ERROR ❌ OBB task does not support track mode!") 30 | if hasattr(predictor, "trackers") and persist: 31 | return 32 | 33 | tracker = check_yaml(predictor.args.tracker) 34 | cfg = IterableSimpleNamespace(**yaml_load(tracker)) 35 | 36 | if cfg.tracker_type not in ["bytetrack", "botsort"]: 37 | raise AssertionError(f"Only 'bytetrack' and 'botsort' are supported for now, but got '{cfg.tracker_type}'") 38 | 39 | trackers = [] 40 | for _ in range(predictor.dataset.bs): 41 | tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30) 42 | trackers.append(tracker) 43 | predictor.trackers = trackers 44 | 45 | 46 | def on_predict_postprocess_end(predictor: object, persist: bool = False) -> None: 47 | """ 48 | Postprocess detected boxes and update with object tracking. 49 | 50 | Args: 51 | predictor (object): The predictor object containing the predictions. 52 | persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False. 53 | """ 54 | bs = predictor.dataset.bs 55 | path, im0s = predictor.batch[:2] 56 | 57 | for i in range(bs): 58 | if not persist and predictor.vid_path[i] != str(predictor.save_dir / Path(path[i]).name): # new video 59 | predictor.trackers[i].reset() 60 | 61 | det = predictor.results[i].boxes.cpu().numpy() 62 | if len(det) == 0: 63 | continue 64 | tracks = predictor.trackers[i].update(det, im0s[i]) 65 | if len(tracks) == 0: 66 | continue 67 | idx = tracks[:, -1].astype(int) 68 | predictor.results[i] = predictor.results[i][idx] 69 | predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1])) 70 | 71 | 72 | def register_tracker(model: object, persist: bool) -> None: 73 | """ 74 | Register tracking callbacks to the model for object tracking during prediction. 75 | 76 | Args: 77 | model (object): The model object to register tracking callbacks for. 78 | persist (bool): Whether to persist the trackers if they already exist. 79 | """ 80 | model.add_callback("on_predict_start", partial(on_predict_start, persist=persist)) 81 | model.add_callback("on_predict_postprocess_end", partial(on_predict_postprocess_end, persist=persist)) 82 | -------------------------------------------------------------------------------- /ultralytics/trackers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | -------------------------------------------------------------------------------- /ultralytics/utils/autobatch.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.""" 3 | 4 | from copy import deepcopy 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr 10 | from ultralytics.utils.torch_utils import profile 11 | 12 | 13 | def check_train_batch_size(model, imgsz=640, amp=True): 14 | """ 15 | Check YOLO training batch size using the autobatch() function. 16 | 17 | Args: 18 | model (torch.nn.Module): YOLO model to check batch size for. 19 | imgsz (int): Image size used for training. 20 | amp (bool): If True, use automatic mixed precision (AMP) for training. 21 | 22 | Returns: 23 | (int): Optimal batch size computed using the autobatch() function. 24 | """ 25 | 26 | with torch.cuda.amp.autocast(amp): 27 | return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size 28 | 29 | 30 | def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): 31 | """ 32 | Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory. 33 | 34 | Args: 35 | model (torch.nn.module): YOLO model to compute batch size for. 36 | imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640. 37 | fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60. 38 | batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16. 39 | 40 | Returns: 41 | (int): The optimal batch size. 42 | """ 43 | 44 | # Check device 45 | prefix = colorstr("AutoBatch: ") 46 | LOGGER.info(f"{prefix}Computing optimal batch size for imgsz={imgsz}") 47 | device = next(model.parameters()).device # get model device 48 | if device.type == "cpu": 49 | LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}") 50 | return batch_size 51 | if torch.backends.cudnn.benchmark: 52 | LOGGER.info(f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}") 53 | return batch_size 54 | 55 | # Inspect CUDA memory 56 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 57 | d = str(device).upper() # 'CUDA:0' 58 | properties = torch.cuda.get_device_properties(device) # device properties 59 | t = properties.total_memory / gb # GiB total 60 | r = torch.cuda.memory_reserved(device) / gb # GiB reserved 61 | a = torch.cuda.memory_allocated(device) / gb # GiB allocated 62 | f = t - (r + a) # GiB free 63 | LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free") 64 | 65 | # Profile batch sizes 66 | batch_sizes = [1, 2, 4, 8, 16] 67 | try: 68 | img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] 69 | results = profile(img, model, n=3, device=device) 70 | 71 | # Fit a solution 72 | y = [x[2] for x in results if x] # memory [2] 73 | p = np.polyfit(batch_sizes[: len(y)], y, deg=1) # first degree polynomial fit 74 | b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) 75 | if None in results: # some sizes failed 76 | i = results.index(None) # first fail index 77 | if b >= batch_sizes[i]: # y intercept above failure point 78 | b = batch_sizes[max(i - 1, 0)] # select prior safe point 79 | if b < 1 or b > 1024: # b outside of safe range 80 | b = batch_size 81 | LOGGER.info(f"{prefix}WARNING ⚠️ CUDA anomaly detected, using default batch-size {batch_size}.") 82 | 83 | fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted 84 | LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅") 85 | return b 86 | except Exception as e: 87 | LOGGER.warning(f"{prefix}WARNING ⚠️ error detected: {e}, using default batch-size {batch_size}.") 88 | return batch_size 89 | -------------------------------------------------------------------------------- /ultralytics/utils/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from .base import add_integration_callbacks, default_callbacks, get_default_callbacks 4 | 5 | __all__ = "add_integration_callbacks", "default_callbacks", "get_default_callbacks" 6 | -------------------------------------------------------------------------------- /ultralytics/utils/callbacks/hub.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import json 4 | from time import time 5 | 6 | from ultralytics.hub.utils import HUB_WEB_ROOT, PREFIX, events 7 | from ultralytics.utils import LOGGER, SETTINGS 8 | 9 | 10 | def on_pretrain_routine_end(trainer): 11 | """Logs info before starting timer for upload rate limit.""" 12 | session = getattr(trainer, "hub_session", None) 13 | if session: 14 | # Start timer for upload rate limit 15 | session.timers = { 16 | "metrics": time(), 17 | "ckpt": time(), 18 | } # start timer on session.rate_limit 19 | 20 | 21 | def on_fit_epoch_end(trainer): 22 | """Uploads training progress metrics at the end of each epoch.""" 23 | session = getattr(trainer, "hub_session", None) 24 | if session: 25 | # Upload metrics after val end 26 | all_plots = { 27 | **trainer.label_loss_items(trainer.tloss, prefix="train"), 28 | **trainer.metrics, 29 | } 30 | if trainer.epoch == 0: 31 | from ultralytics.utils.torch_utils import model_info_for_loggers 32 | 33 | all_plots = {**all_plots, **model_info_for_loggers(trainer)} 34 | 35 | session.metrics_queue[trainer.epoch] = json.dumps(all_plots) 36 | if time() - session.timers["metrics"] > session.rate_limits["metrics"]: 37 | session.upload_metrics() 38 | session.timers["metrics"] = time() # reset timer 39 | session.metrics_queue = {} # reset queue 40 | 41 | 42 | def on_model_save(trainer): 43 | """Saves checkpoints to Ultralytics HUB with rate limiting.""" 44 | session = getattr(trainer, "hub_session", None) 45 | if session: 46 | # Upload checkpoints with rate limiting 47 | is_best = trainer.best_fitness == trainer.fitness 48 | if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]: 49 | LOGGER.info(f"{PREFIX}Uploading checkpoint {HUB_WEB_ROOT}/models/{session.model_file}") 50 | session.upload_model(trainer.epoch, trainer.last, is_best) 51 | session.timers["ckpt"] = time() # reset timer 52 | 53 | 54 | def on_train_end(trainer): 55 | """Upload final model and metrics to Ultralytics HUB at the end of training.""" 56 | session = getattr(trainer, "hub_session", None) 57 | if session: 58 | # Upload final model and metrics with exponential standoff 59 | LOGGER.info(f"{PREFIX}Syncing final model...") 60 | session.upload_model( 61 | trainer.epoch, 62 | trainer.best, 63 | map=trainer.metrics.get("metrics/mAP50-95(B)", 0), 64 | final=True, 65 | ) 66 | session.alive = False # stop heartbeats 67 | LOGGER.info(f"{PREFIX}Done ✅\n" f"{PREFIX}View model at {session.model_url} 🚀") 68 | 69 | 70 | def on_train_start(trainer): 71 | """Run events on train start.""" 72 | events(trainer.args) 73 | 74 | 75 | def on_val_start(validator): 76 | """Runs events on validation start.""" 77 | events(validator.args) 78 | 79 | 80 | def on_predict_start(predictor): 81 | """Run events on predict start.""" 82 | events(predictor.args) 83 | 84 | 85 | def on_export_start(exporter): 86 | """Run events on export start.""" 87 | events(exporter.args) 88 | 89 | 90 | callbacks = ( 91 | { 92 | "on_pretrain_routine_end": on_pretrain_routine_end, 93 | "on_fit_epoch_end": on_fit_epoch_end, 94 | "on_model_save": on_model_save, 95 | "on_train_end": on_train_end, 96 | "on_train_start": on_train_start, 97 | "on_val_start": on_val_start, 98 | "on_predict_start": on_predict_start, 99 | "on_export_start": on_export_start, 100 | } 101 | if SETTINGS["hub"] is True 102 | else {} 103 | ) # verify enabled 104 | -------------------------------------------------------------------------------- /ultralytics/utils/callbacks/neptune.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING 4 | 5 | try: 6 | assert not TESTS_RUNNING # do not log pytest 7 | assert SETTINGS["neptune"] is True # verify integration is enabled 8 | import neptune 9 | from neptune.types import File 10 | 11 | assert hasattr(neptune, "__version__") 12 | 13 | run = None # NeptuneAI experiment logger instance 14 | 15 | except (ImportError, AssertionError): 16 | neptune = None 17 | 18 | 19 | def _log_scalars(scalars, step=0): 20 | """Log scalars to the NeptuneAI experiment logger.""" 21 | if run: 22 | for k, v in scalars.items(): 23 | run[k].append(value=v, step=step) 24 | 25 | 26 | def _log_images(imgs_dict, group=""): 27 | """Log scalars to the NeptuneAI experiment logger.""" 28 | if run: 29 | for k, v in imgs_dict.items(): 30 | run[f"{group}/{k}"].upload(File(v)) 31 | 32 | 33 | def _log_plot(title, plot_path): 34 | """ 35 | Log plots to the NeptuneAI experiment logger. 36 | 37 | Args: 38 | title (str): Title of the plot. 39 | plot_path (PosixPath | str): Path to the saved image file. 40 | """ 41 | import matplotlib.image as mpimg 42 | import matplotlib.pyplot as plt 43 | 44 | img = mpimg.imread(plot_path) 45 | fig = plt.figure() 46 | ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect="auto", xticks=[], yticks=[]) # no ticks 47 | ax.imshow(img) 48 | run[f"Plots/{title}"].upload(fig) 49 | 50 | 51 | def on_pretrain_routine_start(trainer): 52 | """Callback function called before the training routine starts.""" 53 | try: 54 | global run 55 | run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"]) 56 | run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()} 57 | except Exception as e: 58 | LOGGER.warning(f"WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}") 59 | 60 | 61 | def on_train_epoch_end(trainer): 62 | """Callback function called at end of each training epoch.""" 63 | _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1) 64 | _log_scalars(trainer.lr, trainer.epoch + 1) 65 | if trainer.epoch == 1: 66 | _log_images({f.stem: str(f) for f in trainer.save_dir.glob("train_batch*.jpg")}, "Mosaic") 67 | 68 | 69 | def on_fit_epoch_end(trainer): 70 | """Callback function called at end of each fit (train+val) epoch.""" 71 | if run and trainer.epoch == 0: 72 | from ultralytics.utils.torch_utils import model_info_for_loggers 73 | 74 | run["Configuration/Model"] = model_info_for_loggers(trainer) 75 | _log_scalars(trainer.metrics, trainer.epoch + 1) 76 | 77 | 78 | def on_val_end(validator): 79 | """Callback function called at end of each validation.""" 80 | if run: 81 | # Log val_labels and val_pred 82 | _log_images({f.stem: str(f) for f in validator.save_dir.glob("val*.jpg")}, "Validation") 83 | 84 | 85 | def on_train_end(trainer): 86 | """Callback function called at end of training.""" 87 | if run: 88 | # Log final results, CM matrix + PR plots 89 | files = [ 90 | "results.png", 91 | "confusion_matrix.png", 92 | "confusion_matrix_normalized.png", 93 | *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")), 94 | ] 95 | files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()] # filter 96 | for f in files: 97 | _log_plot(title=f.stem, plot_path=f) 98 | # Log the final model 99 | run[f"weights/{trainer.args.name or trainer.args.task}/{trainer.best.name}"].upload(File(str(trainer.best))) 100 | 101 | 102 | callbacks = ( 103 | { 104 | "on_pretrain_routine_start": on_pretrain_routine_start, 105 | "on_train_epoch_end": on_train_epoch_end, 106 | "on_fit_epoch_end": on_fit_epoch_end, 107 | "on_val_end": on_val_end, 108 | "on_train_end": on_train_end, 109 | } 110 | if neptune 111 | else {} 112 | ) 113 | -------------------------------------------------------------------------------- /ultralytics/utils/callbacks/raytune.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.utils import SETTINGS 4 | 5 | try: 6 | assert SETTINGS["raytune"] is True # verify integration is enabled 7 | import ray 8 | from ray import tune 9 | from ray.air import session 10 | 11 | except (ImportError, AssertionError): 12 | tune = None 13 | 14 | 15 | def on_fit_epoch_end(trainer): 16 | """Sends training metrics to Ray Tune at end of each epoch.""" 17 | if ray.tune.is_session_enabled(): 18 | metrics = trainer.metrics 19 | metrics["epoch"] = trainer.epoch 20 | session.report(metrics) 21 | 22 | 23 | callbacks = ( 24 | { 25 | "on_fit_epoch_end": on_fit_epoch_end, 26 | } 27 | if tune 28 | else {} 29 | ) 30 | -------------------------------------------------------------------------------- /ultralytics/utils/callbacks/tensorboard.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr 4 | 5 | try: 6 | # WARNING: do not move import due to protobuf issue in https://github.com/ultralytics/ultralytics/pull/4674 7 | from torch.utils.tensorboard import SummaryWriter 8 | 9 | assert not TESTS_RUNNING # do not log pytest 10 | assert SETTINGS["tensorboard"] is True # verify integration is enabled 11 | WRITER = None # TensorBoard SummaryWriter instance 12 | 13 | except (ImportError, AssertionError, TypeError, AttributeError): 14 | # TypeError for handling 'Descriptors cannot not be created directly.' protobuf errors in Windows 15 | # AttributeError: module 'tensorflow' has no attribute 'io' if 'tensorflow' not installed 16 | SummaryWriter = None 17 | 18 | 19 | def _log_scalars(scalars, step=0): 20 | """Logs scalar values to TensorBoard.""" 21 | if WRITER: 22 | for k, v in scalars.items(): 23 | WRITER.add_scalar(k, v, step) 24 | 25 | 26 | def _log_tensorboard_graph(trainer): 27 | """Log model graph to TensorBoard.""" 28 | try: 29 | import warnings 30 | 31 | from ultralytics.utils.torch_utils import de_parallel, torch 32 | 33 | imgsz = trainer.args.imgsz 34 | imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz 35 | p = next(trainer.model.parameters()) # for device, type 36 | im = torch.zeros((1, 3, *imgsz), device=p.device, dtype=p.dtype) # input image (must be zeros, not empty) 37 | with warnings.catch_warnings(): 38 | warnings.simplefilter("ignore", category=UserWarning) # suppress jit trace warning 39 | WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), []) 40 | except Exception as e: 41 | LOGGER.warning(f"WARNING ⚠️ TensorBoard graph visualization failure {e}") 42 | 43 | 44 | def on_pretrain_routine_start(trainer): 45 | """Initialize TensorBoard logging with SummaryWriter.""" 46 | if SummaryWriter: 47 | try: 48 | global WRITER 49 | WRITER = SummaryWriter(str(trainer.save_dir)) 50 | prefix = colorstr("TensorBoard: ") 51 | LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/") 52 | except Exception as e: 53 | LOGGER.warning(f"WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}") 54 | 55 | 56 | def on_train_start(trainer): 57 | """Log TensorBoard graph.""" 58 | if WRITER: 59 | _log_tensorboard_graph(trainer) 60 | 61 | 62 | def on_train_epoch_end(trainer): 63 | """Logs scalar statistics at the end of a training epoch.""" 64 | _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1) 65 | _log_scalars(trainer.lr, trainer.epoch + 1) 66 | 67 | 68 | def on_fit_epoch_end(trainer): 69 | """Logs epoch metrics at end of training epoch.""" 70 | _log_scalars(trainer.metrics, trainer.epoch + 1) 71 | 72 | 73 | callbacks = ( 74 | { 75 | "on_pretrain_routine_start": on_pretrain_routine_start, 76 | "on_train_start": on_train_start, 77 | "on_fit_epoch_end": on_fit_epoch_end, 78 | "on_train_epoch_end": on_train_epoch_end, 79 | } 80 | if SummaryWriter 81 | else {} 82 | ) 83 | -------------------------------------------------------------------------------- /ultralytics/utils/dist.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | import os 4 | import shutil 5 | import socket 6 | import sys 7 | import tempfile 8 | 9 | from . import USER_CONFIG_DIR 10 | from .torch_utils import TORCH_1_9 11 | 12 | 13 | def find_free_network_port() -> int: 14 | """ 15 | Finds a free port on localhost. 16 | 17 | It is useful in single-node training when we don't want to connect to a real main node but have to set the 18 | `MASTER_PORT` environment variable. 19 | """ 20 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 21 | s.bind(("127.0.0.1", 0)) 22 | return s.getsockname()[1] # port 23 | 24 | 25 | def generate_ddp_file(trainer): 26 | """Generates a DDP file and returns its file name.""" 27 | module, name = f"{trainer.__class__.__module__}.{trainer.__class__.__name__}".rsplit(".", 1) 28 | 29 | content = f""" 30 | # Ultralytics Multi-GPU training temp file (should be automatically deleted after use) 31 | overrides = {vars(trainer.args)} 32 | 33 | if __name__ == "__main__": 34 | from {module} import {name} 35 | from ultralytics.utils import DEFAULT_CFG_DICT 36 | 37 | cfg = DEFAULT_CFG_DICT.copy() 38 | cfg.update(save_dir='') # handle the extra key 'save_dir' 39 | trainer = {name}(cfg=cfg, overrides=overrides) 40 | results = trainer.train() 41 | """ 42 | (USER_CONFIG_DIR / "DDP").mkdir(exist_ok=True) 43 | with tempfile.NamedTemporaryFile( 44 | prefix="_temp_", 45 | suffix=f"{id(trainer)}.py", 46 | mode="w+", 47 | encoding="utf-8", 48 | dir=USER_CONFIG_DIR / "DDP", 49 | delete=False, 50 | ) as file: 51 | file.write(content) 52 | return file.name 53 | 54 | 55 | def generate_ddp_command(world_size, trainer): 56 | """Generates and returns command for distributed training.""" 57 | import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218 58 | 59 | if not trainer.resume: 60 | shutil.rmtree(trainer.save_dir) # remove the save_dir 61 | file = generate_ddp_file(trainer) 62 | dist_cmd = "torch.distributed.run" if TORCH_1_9 else "torch.distributed.launch" 63 | port = find_free_network_port() 64 | cmd = [sys.executable, "-m", dist_cmd, "--nproc_per_node", f"{world_size}", "--master_port", f"{port}", file] 65 | return cmd, file 66 | 67 | 68 | def ddp_cleanup(trainer, file): 69 | """Delete temp file if created.""" 70 | if f"{id(trainer)}.py" in file: # if temp_file suffix in file 71 | os.remove(file) 72 | -------------------------------------------------------------------------------- /ultralytics/utils/errors.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from ultralytics.utils import emojis 4 | 5 | 6 | class HUBModelError(Exception): 7 | """ 8 | Custom exception class for handling errors related to model fetching in Ultralytics YOLO. 9 | 10 | This exception is raised when a requested model is not found or cannot be retrieved. 11 | The message is also processed to include emojis for better user experience. 12 | 13 | Attributes: 14 | message (str): The error message displayed when the exception is raised. 15 | 16 | Note: 17 | The message is automatically processed through the 'emojis' function from the 'ultralytics.utils' package. 18 | """ 19 | 20 | def __init__(self, message="Model not found. Please check model URL and try again."): 21 | """Create an exception for when a model is not found.""" 22 | super().__init__(emojis(message)) 23 | -------------------------------------------------------------------------------- /ultralytics/utils/patches.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | """Monkey patches to update/extend functionality of existing functions.""" 3 | 4 | import time 5 | from pathlib import Path 6 | 7 | import cv2 8 | import numpy as np 9 | import torch 10 | 11 | # OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------ 12 | _imshow = cv2.imshow # copy to avoid recursion errors 13 | 14 | 15 | def imread(filename: str, flags: int = cv2.IMREAD_COLOR): 16 | """ 17 | Read an image from a file. 18 | 19 | Args: 20 | filename (str): Path to the file to read. 21 | flags (int, optional): Flag that can take values of cv2.IMREAD_*. Defaults to cv2.IMREAD_COLOR. 22 | 23 | Returns: 24 | (np.ndarray): The read image. 25 | """ 26 | return cv2.imdecode(np.fromfile(filename, np.uint8), flags) 27 | 28 | 29 | def imwrite(filename: str, img: np.ndarray, params=None): 30 | """ 31 | Write an image to a file. 32 | 33 | Args: 34 | filename (str): Path to the file to write. 35 | img (np.ndarray): Image to write. 36 | params (list of ints, optional): Additional parameters. See OpenCV documentation. 37 | 38 | Returns: 39 | (bool): True if the file was written, False otherwise. 40 | """ 41 | try: 42 | cv2.imencode(Path(filename).suffix, img, params)[1].tofile(filename) 43 | return True 44 | except Exception: 45 | return False 46 | 47 | 48 | def imshow(winname: str, mat: np.ndarray): 49 | """ 50 | Displays an image in the specified window. 51 | 52 | Args: 53 | winname (str): Name of the window. 54 | mat (np.ndarray): Image to be shown. 55 | """ 56 | _imshow(winname.encode("unicode_escape").decode(), mat) 57 | 58 | 59 | # PyTorch functions ---------------------------------------------------------------------------------------------------- 60 | _torch_save = torch.save # copy to avoid recursion errors 61 | 62 | 63 | def torch_save(*args, **kwargs): 64 | """ 65 | Use dill (if exists) to serialize the lambda functions where pickle does not do this. Also adds 3 retries with 66 | exponential standoff in case of save failure to improve robustness to transient issues. 67 | 68 | Args: 69 | *args (tuple): Positional arguments to pass to torch.save. 70 | **kwargs (dict): Keyword arguments to pass to torch.save. 71 | """ 72 | try: 73 | import dill as pickle # noqa 74 | except ImportError: 75 | import pickle 76 | 77 | if "pickle_module" not in kwargs: 78 | kwargs["pickle_module"] = pickle # noqa 79 | 80 | for i in range(4): # 3 retries 81 | try: 82 | return _torch_save(*args, **kwargs) 83 | except RuntimeError: # unable to save, possibly waiting for device to flush or anti-virus to finish scanning 84 | if i == 3: 85 | raise 86 | time.sleep((2**i) / 2) # exponential standoff 0.5s, 1.0s, 2.0s 87 | -------------------------------------------------------------------------------- /ultralytics/utils/triton.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license 2 | 3 | from typing import List 4 | from urllib.parse import urlsplit 5 | 6 | import numpy as np 7 | 8 | 9 | class TritonRemoteModel: 10 | """ 11 | Client for interacting with a remote Triton Inference Server model. 12 | 13 | Attributes: 14 | endpoint (str): The name of the model on the Triton server. 15 | url (str): The URL of the Triton server. 16 | triton_client: The Triton client (either HTTP or gRPC). 17 | InferInput: The input class for the Triton client. 18 | InferRequestedOutput: The output request class for the Triton client. 19 | input_formats (List[str]): The data types of the model inputs. 20 | np_input_formats (List[type]): The numpy data types of the model inputs. 21 | input_names (List[str]): The names of the model inputs. 22 | output_names (List[str]): The names of the model outputs. 23 | """ 24 | 25 | def __init__(self, url: str, endpoint: str = "", scheme: str = ""): 26 | """ 27 | Initialize the TritonRemoteModel. 28 | 29 | Arguments may be provided individually or parsed from a collective 'url' argument of the form 30 | ://// 31 | 32 | Args: 33 | url (str): The URL of the Triton server. 34 | endpoint (str): The name of the model on the Triton server. 35 | scheme (str): The communication scheme ('http' or 'grpc'). 36 | """ 37 | if not endpoint and not scheme: # Parse all args from URL string 38 | splits = urlsplit(url) 39 | endpoint = splits.path.strip("/").split("/")[0] 40 | scheme = splits.scheme 41 | url = splits.netloc 42 | 43 | self.endpoint = endpoint 44 | self.url = url 45 | 46 | # Choose the Triton client based on the communication scheme 47 | if scheme == "http": 48 | import tritonclient.http as client # noqa 49 | 50 | self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False) 51 | config = self.triton_client.get_model_config(endpoint) 52 | else: 53 | import tritonclient.grpc as client # noqa 54 | 55 | self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False) 56 | config = self.triton_client.get_model_config(endpoint, as_json=True)["config"] 57 | 58 | # Sort output names alphabetically, i.e. 'output0', 'output1', etc. 59 | config["output"] = sorted(config["output"], key=lambda x: x.get("name")) 60 | 61 | # Define model attributes 62 | type_map = {"TYPE_FP32": np.float32, "TYPE_FP16": np.float16, "TYPE_UINT8": np.uint8} 63 | self.InferRequestedOutput = client.InferRequestedOutput 64 | self.InferInput = client.InferInput 65 | self.input_formats = [x["data_type"] for x in config["input"]] 66 | self.np_input_formats = [type_map[x] for x in self.input_formats] 67 | self.input_names = [x["name"] for x in config["input"]] 68 | self.output_names = [x["name"] for x in config["output"]] 69 | 70 | def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]: 71 | """ 72 | Call the model with the given inputs. 73 | 74 | Args: 75 | *inputs (List[np.ndarray]): Input data to the model. 76 | 77 | Returns: 78 | List[np.ndarray]: Model outputs. 79 | """ 80 | infer_inputs = [] 81 | input_format = inputs[0].dtype 82 | for i, x in enumerate(inputs): 83 | if x.dtype != self.np_input_formats[i]: 84 | x = x.astype(self.np_input_formats[i]) 85 | infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace("TYPE_", "")) 86 | infer_input.set_data_from_numpy(x) 87 | infer_inputs.append(infer_input) 88 | 89 | infer_outputs = [self.InferRequestedOutput(output_name) for output_name in self.output_names] 90 | outputs = self.triton_client.infer(model_name=self.endpoint, inputs=infer_inputs, outputs=infer_outputs) 91 | 92 | return [outputs.as_numpy(output_name).astype(input_format) for output_name in self.output_names] 93 | -------------------------------------------------------------------------------- /weights/plate_rec_color.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/weights/plate_rec_color.pth -------------------------------------------------------------------------------- /weights/yolov8s.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/weights/yolov8s.pt --------------------------------------------------------------------------------