├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── README.md
├── detect_plate.py
├── detect_rec_plate.py
├── export_onnx.py
├── fonts
    ├── cv_puttext.py
    └── platech.ttf
├── imgs
    ├── Quicker_20220930_180919.png
    ├── Quicker_20220930_180938.png
    ├── double_yellow.jpg
    ├── hongkang1.jpg
    ├── police.jpg
    ├── shi_lin_guan.jpg
    ├── single_blue.jpg
    ├── single_green.jpg
    ├── single_yellow.jpg
    ├── tmpA5E3.png
    └── xue.jpg
├── plate_recognition
    ├── double_plate_split_merge.py
    ├── plateNet.py
    └── plate_rec.py
├── readme
    ├── 105384078.png
    └── README.md
├── tests
    ├── conftest.py
    ├── test_cli.py
    ├── test_cuda.py
    ├── test_engine.py
    ├── test_explorer.py
    ├── test_integrations.py
    └── test_python.py
├── train.py
├── train.sh
├── ultralytics
    ├── __init__.py
    ├── assets
    │   ├── bus.jpg
    │   └── zidane.jpg
    ├── cfg
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── Argoverse.yaml
    │   │   ├── DOTAv1.5.yaml
    │   │   ├── DOTAv1.yaml
    │   │   ├── GlobalWheat2020.yaml
    │   │   ├── ImageNet.yaml
    │   │   ├── Objects365.yaml
    │   │   ├── SKU-110K.yaml
    │   │   ├── VOC.yaml
    │   │   ├── VisDrone.yaml
    │   │   ├── coco-pose.yaml
    │   │   ├── coco.yaml
    │   │   ├── coco128-seg.yaml
    │   │   ├── coco128.yaml
    │   │   ├── coco8-pose.yaml
    │   │   ├── coco8-seg.yaml
    │   │   ├── coco8.yaml
    │   │   ├── dota8.yaml
    │   │   ├── open-images-v7.yaml
    │   │   ├── plate.yaml
    │   │   ├── tiger-pose.yaml
    │   │   └── xView.yaml
    │   ├── default.yaml
    │   ├── models
    │   │   ├── README.md
    │   │   ├── rt-detr
    │   │   │   ├── rtdetr-l.yaml
    │   │   │   ├── rtdetr-resnet101.yaml
    │   │   │   ├── rtdetr-resnet50.yaml
    │   │   │   └── rtdetr-x.yaml
    │   │   ├── v3
    │   │   │   ├── yolov3-spp.yaml
    │   │   │   ├── yolov3-tiny.yaml
    │   │   │   └── yolov3.yaml
    │   │   ├── v5
    │   │   │   ├── yolov5-p6.yaml
    │   │   │   └── yolov5.yaml
    │   │   ├── v6
    │   │   │   └── yolov6.yaml
    │   │   └── v8
    │   │   │   ├── yolov8-cls.yaml
    │   │   │   ├── yolov8-ghost-p2.yaml
    │   │   │   ├── yolov8-ghost-p6.yaml
    │   │   │   ├── yolov8-ghost.yaml
    │   │   │   ├── yolov8-obb.yaml
    │   │   │   ├── yolov8-p2.yaml
    │   │   │   ├── yolov8-p6.yaml
    │   │   │   ├── yolov8-pose-p6.yaml
    │   │   │   ├── yolov8-pose.yaml
    │   │   │   ├── yolov8-rtdetr.yaml
    │   │   │   ├── yolov8-seg-p6.yaml
    │   │   │   ├── yolov8-seg.yaml
    │   │   │   └── yolov8.yaml
    │   └── trackers
    │   │   ├── botsort.yaml
    │   │   └── bytetrack.yaml
    ├── data
    │   ├── __init__.py
    │   ├── annotator.py
    │   ├── augment.py
    │   ├── base.py
    │   ├── build.py
    │   ├── converter.py
    │   ├── dataset.py
    │   ├── explorer
    │   │   ├── __init__.py
    │   │   ├── explorer.py
    │   │   ├── gui
    │   │   │   ├── __init__.py
    │   │   │   └── dash.py
    │   │   └── utils.py
    │   ├── loaders.py
    │   ├── scripts
    │   │   ├── download_weights.sh
    │   │   ├── get_coco.sh
    │   │   ├── get_coco128.sh
    │   │   └── get_imagenet.sh
    │   ├── split_dota.py
    │   └── utils.py
    ├── engine
    │   ├── __init__.py
    │   ├── exporter.py
    │   ├── model.py
    │   ├── predictor.py
    │   ├── results.py
    │   ├── trainer.py
    │   ├── tuner.py
    │   └── validator.py
    ├── hub
    │   ├── __init__.py
    │   ├── auth.py
    │   ├── session.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── fastsam
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   ├── predict.py
    │   │   ├── prompt.py
    │   │   ├── utils.py
    │   │   └── val.py
    │   ├── nas
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   ├── predict.py
    │   │   └── val.py
    │   ├── rtdetr
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   ├── predict.py
    │   │   ├── train.py
    │   │   └── val.py
    │   ├── sam
    │   │   ├── __init__.py
    │   │   ├── amg.py
    │   │   ├── build.py
    │   │   ├── model.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── decoders.py
    │   │   │   ├── encoders.py
    │   │   │   ├── sam.py
    │   │   │   ├── tiny_encoder.py
    │   │   │   └── transformer.py
    │   │   └── predict.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── loss.py
    │   │   └── ops.py
    │   └── yolo
    │   │   ├── __init__.py
    │   │   ├── classify
    │   │       ├── __init__.py
    │   │       ├── predict.py
    │   │       ├── train.py
    │   │       └── val.py
    │   │   ├── detect
    │   │       ├── __init__.py
    │   │       ├── predict.py
    │   │       ├── train.py
    │   │       └── val.py
    │   │   ├── model.py
    │   │   ├── obb
    │   │       ├── __init__.py
    │   │       ├── predict.py
    │   │       ├── train.py
    │   │       └── val.py
    │   │   ├── pose
    │   │       ├── __init__.py
    │   │       ├── predict.py
    │   │       ├── train.py
    │   │       └── val.py
    │   │   └── segment
    │   │       ├── __init__.py
    │   │       ├── predict.py
    │   │       ├── train.py
    │   │       └── val.py
    ├── nn
    │   ├── __init__.py
    │   ├── autobackend.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── block.py
    │   │   ├── conv.py
    │   │   ├── head.py
    │   │   ├── transformer.py
    │   │   └── utils.py
    │   └── tasks.py
    ├── solutions
    │   ├── __init__.py
    │   ├── ai_gym.py
    │   ├── distance_calculation.py
    │   ├── heatmap.py
    │   ├── object_counter.py
    │   └── speed_estimation.py
    ├── trackers
    │   ├── README.md
    │   ├── __init__.py
    │   ├── basetrack.py
    │   ├── bot_sort.py
    │   ├── byte_tracker.py
    │   ├── track.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── gmc.py
    │   │   ├── kalman_filter.py
    │   │   └── matching.py
    └── utils
    │   ├── __init__.py
    │   ├── autobatch.py
    │   ├── benchmarks.py
    │   ├── callbacks
    │       ├── __init__.py
    │       ├── base.py
    │       ├── clearml.py
    │       ├── comet.py
    │       ├── dvc.py
    │       ├── hub.py
    │       ├── mlflow.py
    │       ├── neptune.py
    │       ├── raytune.py
    │       ├── tensorboard.py
    │       └── wb.py
    │   ├── checks.py
    │   ├── dist.py
    │   ├── downloads.py
    │   ├── errors.py
    │   ├── files.py
    │   ├── instance.py
    │   ├── loss.py
    │   ├── metrics.py
    │   ├── ops.py
    │   ├── patches.py
    │   ├── plotting.py
    │   ├── tal.py
    │   ├── torch_utils.py
    │   ├── triton.py
    │   └── tuner.py
└── weights
    ├── plate_rec_color.pth
    └── yolov8s.pt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # .gitignore
 2 | # 首先忽略所有的文件
 3 | *
 4 | # 但是不忽略目录
 5 | !*/
 6 | # 忽略一些指定的目录名
 7 | ut/
 8 | runs/
 9 | .vscode/
10 | build/
11 | result/
12 | onnx/
13 | *.pyc
14 | pretrained_model/
15 | # 不忽略下面指定的文件类型
16 | !*.cpp
17 | !*.h
18 | !*.hpp
19 | !*.c
20 | !.gitignore
21 | !*.py
22 | !*.sh
23 | !*.npy
24 | !*.jpg
25 | !*.pt
26 | !*.npy
27 | !*.pth
28 | !*.png
29 | !*.md
30 | !*.txt
31 | !*.yaml
32 | !*.ttf
33 | !*.cu


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Pre-commit hooks. For more information see https://github.com/pre-commit/pre-commit-hooks/blob/main/README.md
 3 | # Optionally remove from local hooks with 'rm .git/hooks/pre-commit'
 4 | 
 5 | # Define bot property if installed via https://github.com/marketplace/pre-commit-ci
 6 | ci:
 7 |   autofix_prs: true
 8 |   autoupdate_commit_msg: "[pre-commit.ci] pre-commit suggestions"
 9 |   autoupdate_schedule: monthly
10 |   submodules: true
11 | 
12 | # Exclude directories (optional)
13 | # exclude: 'docs/'
14 | 
15 | # Define repos to run
16 | repos:
17 |   - repo: https://github.com/pre-commit/pre-commit-hooks
18 |     rev: v4.5.0
19 |     hooks:
20 |       - id: end-of-file-fixer
21 |       - id: trailing-whitespace
22 |       - id: check-case-conflict
23 |       # - id: check-yaml
24 |       - id: check-docstring-first
25 |       - id: detect-private-key
26 | 
27 |   - repo: https://github.com/asottile/pyupgrade
28 |     rev: v3.15.0
29 |     hooks:
30 |       - id: pyupgrade
31 |         name: Upgrade code
32 | 
33 |   - repo: https://github.com/astral-sh/ruff-pre-commit
34 |     rev: v0.1.11
35 |     hooks:
36 |       - id: ruff
37 |         args: [--fix]
38 | 
39 |   - repo: https://github.com/executablebooks/mdformat
40 |     rev: 0.7.17
41 |     hooks:
42 |       - id: mdformat
43 |         name: MD formatting
44 |         additional_dependencies:
45 |           - mdformat-gfm
46 |           - mdformat-frontmatter
47 |           - mdformat-mkdocs
48 |         args:
49 |           - --wrap=no
50 |           - --number
51 |         exclude: 'docs/.*\.md'
52 |         # exclude: "README.md|README.zh-CN.md|CONTRIBUTING.md"
53 | 
54 |   - repo: https://github.com/codespell-project/codespell
55 |     rev: v2.2.6
56 |     hooks:
57 |       - id: codespell
58 |         exclude: "docs/de|docs/fr|docs/pt|docs/es|docs/mkdocs_de.yml"
59 |         args:
60 |           - --ignore-words-list=crate,nd,ned,strack,dota,ane,segway,fo,gool,winn,commend,bloc,nam,afterall
61 | 
62 |   - repo: https://github.com/hadialqattan/pycln
63 |     rev: v2.4.0
64 |     hooks:
65 |       - id: pycln
66 |         args: [--all]
67 | #
68 | #  - repo: https://github.com/PyCQA/docformatter
69 | #    rev: v1.7.5
70 | #    hooks:
71 | #      - id: docformatter
72 | 
73 | #  - repo: https://github.com/asottile/yesqa
74 | #    rev: v1.4.0
75 | #    hooks:
76 | #      - id: yesqa
77 | 
78 | #  - repo: https://github.com/asottile/dead
79 | #    rev: v1.5.0
80 | #    hooks:
81 | #    -   id: dead
82 | 
83 | #  - repo: https://github.com/ultralytics/pre-commit
84 | #    rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23
85 | #    hooks:
86 | #      - id: capitalize-comments
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## **yolov8车牌识别算法，支持12种中文车牌类型**
 2 | 
 3 | #### **图片测试demo:**
 4 | 
 5 | 直接运行detect_plate.py 或者运行如下命令行：
 6 | 
 7 | ```
 8 | python detect_rec_plate.py --detect_model weights/yolov8s.pt  --rec_model weights/plate_rec_color.pth --image_path imgs --output result
 9 | ```
10 | 
11 | 测试文件夹imgs，结果保存再 result 文件夹中
12 | 
13 | ## **车牌检测训练**
14 | 
15 | 车牌检测训练链接如下：
16 | 
17 | [车牌检测训练](https://github.com/we0091234/yolov8-plate/tree/master/readme)
18 | 
19 | ## **车牌识别训练**
20 | 
21 | 车牌识别训练链接如下：
22 | 
23 | [车牌识别训练](https://github.com/we0091234/crnn_plate_recognition)
24 | 
25 | #### **支持如下：**
26 | 
27 | - [X] 1.单行蓝牌
28 | - [X] 2.单行黄牌
29 | - [X] 3.新能源车牌
30 | - [X] 4.白色警用车牌
31 | - [X] 5.教练车牌
32 | - [X] 6.武警车牌
33 | - [X] 7.双层黄牌
34 | - [X] 8.双层白牌
35 | - [X] 9.使馆车牌
36 | - [X] 10.港澳粤Z牌
37 | - [X] 11.双层绿牌
38 | - [X] 12.民航车牌
39 | 
40 | ## References
41 | 
42 | * [https://github.com/derronqi/yolov8-face](https://github.com/derronqi/yolov8-face)
43 | * [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
44 | 
45 | ## 联系
46 | 
47 | **有问题可以提issues 或者加qq群:769809695（新群） 837982567（已满） 询问**
48 | 


--------------------------------------------------------------------------------
/detect_plate.py:
--------------------------------------------------------------------------------
1 | from ultralytics import YOLO
2 | 
3 | # 加载预训练的YOLOv8n模型
4 | model = YOLO('runs/detect/train2/weights/best.pt')
5 | 
6 | # 在'bus.jpg'上运行推理，并附加参数
7 | model.predict('/mnt/mydisk/xiaolei/code/plate/plate_detect/Chinese_license_plate_detection_recognition/imgs/double_yellow.jpg', save=True, imgsz=320, conf=0.5)


--------------------------------------------------------------------------------
/export_onnx.py:
--------------------------------------------------------------------------------
 1 | from ultralytics import YOLO
 2 | 
 3 | # Load a model
 4 | model = YOLO("yolov8n.yaml")  # build a new model from scratch
 5 | model = YOLO("runs/detect/train2/weights/best.pt")  # load a pretrained model (recommended for training)
 6 | 
 7 | # Use the model
 8 | # model.train(data="coco128.yaml", epochs=3)  # train the model
 9 | # metrics = model.val()  # evaluate model performance on the validation set
10 | # results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
11 | path = model.export(format="onnx")  # export the model to ONNX format


--------------------------------------------------------------------------------
/fonts/cv_puttext.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from PIL import Image, ImageDraw, ImageFont
 4 | 
 5 | def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20):
 6 |     if (isinstance(img, np.ndarray)):  #判断是否OpenCV图片类型
 7 |         img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 8 |     draw = ImageDraw.Draw(img)
 9 |     fontText = ImageFont.truetype(
10 |         "fonts/platech.ttf", textSize, encoding="utf-8")
11 |     draw.text((left, top), text, textColor, font=fontText)
12 |     return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
13 | 
14 | if __name__ == '__main__':
15 |     imgPath = "result.jpg"
16 |     img = cv2.imread(imgPath)
17 |     
18 |     saveImg = cv2ImgAddText(img, '中国加油！', 50, 100, (255, 0, 0), 50)
19 |     
20 |     # cv2.imshow('display',saveImg)
21 |     cv2.imwrite('save.jpg',saveImg)
22 |     # cv2.waitKey()


--------------------------------------------------------------------------------
/fonts/platech.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/fonts/platech.ttf


--------------------------------------------------------------------------------
/imgs/Quicker_20220930_180919.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/Quicker_20220930_180919.png


--------------------------------------------------------------------------------
/imgs/Quicker_20220930_180938.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/Quicker_20220930_180938.png


--------------------------------------------------------------------------------
/imgs/double_yellow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/double_yellow.jpg


--------------------------------------------------------------------------------
/imgs/hongkang1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/hongkang1.jpg


--------------------------------------------------------------------------------
/imgs/police.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/police.jpg


--------------------------------------------------------------------------------
/imgs/shi_lin_guan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/shi_lin_guan.jpg


--------------------------------------------------------------------------------
/imgs/single_blue.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_blue.jpg


--------------------------------------------------------------------------------
/imgs/single_green.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_green.jpg


--------------------------------------------------------------------------------
/imgs/single_yellow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/single_yellow.jpg


--------------------------------------------------------------------------------
/imgs/tmpA5E3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/tmpA5E3.png


--------------------------------------------------------------------------------
/imgs/xue.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/imgs/xue.jpg


--------------------------------------------------------------------------------
/plate_recognition/double_plate_split_merge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | def get_split_merge(img):
 5 |     h,w,c = img.shape
 6 |     img_upper = img[0:int(5/12*h),:]
 7 |     img_lower = img[int(1/3*h):,:]
 8 |     img_upper = cv2.resize(img_upper,(img_lower.shape[1],img_lower.shape[0]))
 9 |     new_img = np.hstack((img_upper,img_lower))
10 |     return new_img
11 | 
12 | if __name__=="__main__":
13 |     img = cv2.imread("double_plate/tmp8078.png")
14 |     new_img =get_split_merge(img)
15 |     cv2.imwrite("double_plate/new.jpg",new_img)
16 | 


--------------------------------------------------------------------------------
/plate_recognition/plate_rec.py:
--------------------------------------------------------------------------------
  1 | from plate_recognition.plateNet import myNet_ocr,myNet_ocr_color
  2 | import torch
  3 | import torch.nn as nn
  4 | import cv2
  5 | import numpy as np
  6 | import os
  7 | import time
  8 | import sys
  9 | 
 10 | def cv_imread(path):  #可以读取中文路径的图片
 11 |     img=cv2.imdecode(np.fromfile(path,dtype=np.uint8),-1)
 12 |     return img
 13 | 
 14 | def allFilePath(rootPath,allFIleList):
 15 |     fileList = os.listdir(rootPath)
 16 |     for temp in fileList:
 17 |         if os.path.isfile(os.path.join(rootPath,temp)):
 18 |             if temp.endswith('.jpg') or temp.endswith('.png') or temp.endswith('.JPG'):
 19 |                 allFIleList.append(os.path.join(rootPath,temp))
 20 |         else:
 21 |             allFilePath(os.path.join(rootPath,temp),allFIleList)
 22 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")
 23 | color=['黑色','蓝色','绿色','白色','黄色']    
 24 | plateName=r"#京沪津渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新学警港澳挂使领民航危0123456789ABCDEFGHJKLMNPQRSTUVWXYZ险品"
 25 | mean_value,std_value=(0.588,0.193)
 26 | def decodePlate(preds):
 27 |     pre=0
 28 |     newPreds=[]
 29 |     index=[]
 30 |     for i in range(len(preds)):
 31 |         if preds[i]!=0 and preds[i]!=pre:
 32 |             newPreds.append(preds[i])
 33 |             index.append(i)
 34 |         pre=preds[i]
 35 |     return newPreds,index
 36 | 
 37 | def image_processing(img,device):
 38 |     img = cv2.resize(img, (168,48))
 39 |     img = np.reshape(img, (48, 168, 3))
 40 | 
 41 |     # normalize
 42 |     img = img.astype(np.float32)
 43 |     img = (img / 255. - mean_value) / std_value
 44 |     img = img.transpose([2, 0, 1])
 45 |     img = torch.from_numpy(img)
 46 | 
 47 |     img = img.to(device)
 48 |     img = img.view(1, *img.size())
 49 |     return img
 50 | 
 51 | def get_plate_result(img,device,model,is_color=False):
 52 |     input = image_processing(img,device)
 53 |     if is_color:  #是否识别颜色
 54 |         preds,color_preds = model(input)
 55 |         color_preds = torch.softmax(color_preds,dim=-1)
 56 |         color_conf,color_index = torch.max(color_preds,dim=-1)
 57 |         color_conf=color_conf.item()
 58 |     else:
 59 |         preds = model(input)
 60 |     preds=torch.softmax(preds,dim=-1)
 61 |     prob,index=preds.max(dim=-1)
 62 |     index = index.view(-1).detach().cpu().numpy()
 63 |     prob=prob.view(-1).detach().cpu().numpy()
 64 |    
 65 |     
 66 |     # preds=preds.view(-1).detach().cpu().numpy()
 67 |     newPreds,new_index=decodePlate(index)
 68 |     prob=prob[new_index]
 69 |     plate=""
 70 |     for i in newPreds:
 71 |         plate+=plateName[i]
 72 |     # if not (plate[0] in plateName[1:44] ):
 73 |     #     return ""
 74 |     if is_color:
 75 |         return plate,prob,color[color_index],color_conf    #返回车牌号以及每个字符的概率,以及颜色，和颜色的概率
 76 |     else:
 77 |         return plate,prob
 78 | 
 79 | def init_model(device,model_path,is_color = False):
 80 |     # print( print(sys.path))
 81 |     # model_path ="plate_recognition/model/checkpoint_61_acc_0.9715.pth"
 82 |     check_point = torch.load(model_path,map_location=device)
 83 |     model_state=check_point['state_dict']
 84 |     cfg=check_point['cfg']
 85 |     color_classes=0
 86 |     if is_color:
 87 |         color_classes=5           #颜色类别数
 88 |     model = myNet_ocr_color(num_classes=len(plateName),export=True,cfg=cfg,color_num=color_classes)
 89 |    
 90 |     model.load_state_dict(model_state,strict=False)
 91 |     model.to(device)
 92 |     model.eval()
 93 |     return model
 94 | 
 95 | # model = init_model(device)
 96 | if __name__ == '__main__':
 97 |    model_path = r"weights/plate_rec_color.pth"
 98 |    image_path ="images/tmp2424.png"
 99 |    testPath = r"/mnt/Gpan/Mydata/pytorchPorject/CRNN/crnn_plate_recognition/images"
100 |    fileList=[]
101 |    allFilePath(testPath,fileList)
102 | #    result = get_plate_result(image_path,device)
103 | #    print(result)
104 |    is_color = False
105 |    model = init_model(device,model_path,is_color=is_color)
106 |    right=0
107 |    begin = time.time()
108 |    
109 |    for imge_path in fileList:
110 |         img=cv2.imread(imge_path)
111 |         if is_color:
112 |             plate,_,plate_color,_=get_plate_result(img,device,model,is_color=is_color)
113 |             print(plate)
114 |         else:
115 |             plate,_=get_plate_result(img,device,model,is_color=is_color)
116 |             print(plate,imge_path)
117 |         
118 |   
119 |         
120 | 


--------------------------------------------------------------------------------
/readme/105384078.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/readme/105384078.png


--------------------------------------------------------------------------------
/readme/README.md:
--------------------------------------------------------------------------------
 1 | ### **车牌检测训练**
 2 | 
 3 | 1. **下载数据集：**  数据集可以添加vx:we0091234 （注明来意）获取 收费30 介意勿扰    数据从CCPD数据集中选取的一部分，也有自己收集的一部分并转换的
 4 |    数据集格式为yolo格式：
 5 | 
 6 |    ```
 7 |    label x y w h  
 8 |    ```
 9 | 2. **修改ultralytics/datasets/yolov8-plate.yaml    train和val路径,换成你的数据路径**
10 | 
11 |    ```
12 |    train: /mnt/mydisk/xiaolei/plate_detect/new_train_data # train images (relative to 'path') 4 images
13 |    val: /mnt/mydisk/xiaolei/plate_detect/new_val_data # val images (relative to 'path') 4 images
14 | 
15 |    # Classes for DOTA 1.0
16 |    names:
17 |    0: single
18 |    1: double
19 | 
20 |    ```
21 | 3. **训练**
22 | 
23 |    ```
24 |    yolo task=detect mode=train model=yolov8s.yaml  data=./ultralytics/cfg/datasets/plate.yaml epochs=120 batch=32 imgsz=640 pretrained=False optimizer=SGD 
25 |    ```
26 | 
27 |    结果存在run文件夹中
28 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import shutil
 4 | from pathlib import Path
 5 | 
 6 | import pytest
 7 | 
 8 | TMP = Path(__file__).resolve().parent / 'tmp'  # temp directory for test files
 9 | 
10 | 
11 | def pytest_addoption(parser):
12 |     """
13 |     Add custom command-line options to pytest.
14 | 
15 |     Args:
16 |         parser (pytest.config.Parser): The pytest parser object.
17 |     """
18 |     parser.addoption('--slow', action='store_true', default=False, help='Run slow tests')
19 | 
20 | 
21 | def pytest_configure(config):
22 |     """
23 |     Register custom markers to avoid pytest warnings.
24 | 
25 |     Args:
26 |         config (pytest.config.Config): The pytest config object.
27 |     """
28 |     config.addinivalue_line('markers', 'slow: mark test as slow to run')
29 | 
30 | 
31 | def pytest_runtest_setup(item):
32 |     """
33 |     Setup hook to skip tests marked as slow if the --slow option is not provided.
34 | 
35 |     Args:
36 |         item (pytest.Item): The test item object.
37 |     """
38 |     if 'slow' in item.keywords and not item.config.getoption('--slow'):
39 |         pytest.skip('skip slow tests unless --slow is set')
40 | 
41 | 
42 | def pytest_collection_modifyitems(config, items):
43 |     """
44 |     Modify the list of test items to remove tests marked as slow if the --slow option is not provided.
45 | 
46 |     Args:
47 |         config (pytest.config.Config): The pytest config object.
48 |         items (list): List of test items to be executed.
49 |     """
50 |     if not config.getoption('--slow'):
51 |         # Remove the item entirely from the list of test items if it's marked as 'slow'
52 |         items[:] = [item for item in items if 'slow' not in item.keywords]
53 | 
54 | 
55 | def pytest_sessionstart(session):
56 |     """
57 |     Initialize session configurations for pytest.
58 | 
59 |     This function is automatically called by pytest after the 'Session' object has been created but before performing
60 |     test collection. It sets the initial seeds and prepares the temporary directory for the test session.
61 | 
62 |     Args:
63 |         session (pytest.Session): The pytest session object.
64 |     """
65 |     from ultralytics.utils.torch_utils import init_seeds
66 | 
67 |     init_seeds()
68 |     shutil.rmtree(TMP, ignore_errors=True)  # delete any existing tests/tmp directory
69 |     TMP.mkdir(parents=True, exist_ok=True)  # create a new empty directory
70 | 
71 | 
72 | def pytest_terminal_summary(terminalreporter, exitstatus, config):
73 |     """
74 |     Cleanup operations after pytest session.
75 | 
76 |     This function is automatically called by pytest at the end of the entire test session. It removes certain files
77 |     and directories used during testing.
78 | 
79 |     Args:
80 |         terminalreporter (pytest.terminal.TerminalReporter): The terminal reporter object.
81 |         exitstatus (int): The exit status of the test run.
82 |         config (pytest.config.Config): The pytest config object.
83 |     """
84 |     from ultralytics.utils import WEIGHTS_DIR
85 | 
86 |     # Remove files
87 |     models = [path for x in ['*.onnx', '*.torchscript'] for path in WEIGHTS_DIR.rglob(x)]
88 |     for file in ['bus.jpg', 'yolov8n.onnx', 'yolov8n.torchscript'] + models:
89 |         Path(file).unlink(missing_ok=True)
90 | 
91 |     # Remove directories
92 |     models = [path for x in ['*.mlpackage', '*_openvino_model'] for path in WEIGHTS_DIR.rglob(x)]
93 |     for directory in [TMP.parents[1] / '.pytest_cache', TMP] + models:
94 |         shutil.rmtree(directory, ignore_errors=True)
95 | 


--------------------------------------------------------------------------------
/tests/test_cuda.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | 
  3 | import pytest
  4 | import torch
  5 | 
  6 | from ultralytics import YOLO
  7 | from ultralytics.utils import ASSETS, WEIGHTS_DIR, checks
  8 | 
  9 | CUDA_IS_AVAILABLE = checks.cuda_is_available()
 10 | CUDA_DEVICE_COUNT = checks.cuda_device_count()
 11 | 
 12 | MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
 13 | DATA = 'coco8.yaml'
 14 | BUS = ASSETS / 'bus.jpg'
 15 | 
 16 | 
 17 | def test_checks():
 18 |     """Validate CUDA settings against torch CUDA functions."""
 19 |     assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
 20 |     assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
 21 | 
 22 | 
 23 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 24 | def test_train():
 25 |     """Test model training on a minimal dataset."""
 26 |     device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
 27 |     YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device)  # requires imgsz>=64
 28 | 
 29 | 
 30 | @pytest.mark.slow
 31 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 32 | def test_predict_multiple_devices():
 33 |     """Validate model prediction on multiple devices."""
 34 |     model = YOLO('yolov8n.pt')
 35 |     model = model.cpu()
 36 |     assert str(model.device) == 'cpu'
 37 |     _ = model(BUS)  # CPU inference
 38 |     assert str(model.device) == 'cpu'
 39 | 
 40 |     model = model.to('cuda:0')
 41 |     assert str(model.device) == 'cuda:0'
 42 |     _ = model(BUS)  # CUDA inference
 43 |     assert str(model.device) == 'cuda:0'
 44 | 
 45 |     model = model.cpu()
 46 |     assert str(model.device) == 'cpu'
 47 |     _ = model(BUS)  # CPU inference
 48 |     assert str(model.device) == 'cpu'
 49 | 
 50 |     model = model.cuda()
 51 |     assert str(model.device) == 'cuda:0'
 52 |     _ = model(BUS)  # CUDA inference
 53 |     assert str(model.device) == 'cuda:0'
 54 | 
 55 | 
 56 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 57 | def test_autobatch():
 58 |     """Check batch size for YOLO model using autobatch."""
 59 |     from ultralytics.utils.autobatch import check_train_batch_size
 60 | 
 61 |     check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
 62 | 
 63 | 
 64 | @pytest.mark.slow
 65 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 66 | def test_utils_benchmarks():
 67 |     """Profile YOLO models for performance benchmarks."""
 68 |     from ultralytics.utils.benchmarks import ProfileModels
 69 | 
 70 |     # Pre-export a dynamic engine model to use dynamic inference
 71 |     YOLO(MODEL).export(format='engine', imgsz=32, dynamic=True, batch=1)
 72 |     ProfileModels([MODEL], imgsz=32, half=False, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
 73 | 
 74 | 
 75 | @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 76 | def test_predict_sam():
 77 |     """Test SAM model prediction with various prompts."""
 78 |     from ultralytics import SAM
 79 |     from ultralytics.models.sam import Predictor as SAMPredictor
 80 | 
 81 |     # Load a model
 82 |     model = SAM(WEIGHTS_DIR / 'sam_b.pt')
 83 | 
 84 |     # Display model information (optional)
 85 |     model.info()
 86 | 
 87 |     # Run inference
 88 |     model(BUS, device=0)
 89 | 
 90 |     # Run inference with bboxes prompt
 91 |     model(BUS, bboxes=[439, 437, 524, 709], device=0)
 92 | 
 93 |     # Run inference with points prompt
 94 |     model(ASSETS / 'zidane.jpg', points=[900, 370], labels=[1], device=0)
 95 | 
 96 |     # Create SAMPredictor
 97 |     overrides = dict(conf=0.25, task='segment', mode='predict', imgsz=1024, model=WEIGHTS_DIR / 'mobile_sam.pt')
 98 |     predictor = SAMPredictor(overrides=overrides)
 99 | 
100 |     # Set image
101 |     predictor.set_image(ASSETS / 'zidane.jpg')  # set with image file
102 |     # predictor(bboxes=[439, 437, 524, 709])
103 |     # predictor(points=[900, 370], labels=[1])
104 | 
105 |     # Reset image
106 |     predictor.reset_image()
107 | 


--------------------------------------------------------------------------------
/tests/test_explorer.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics import Explorer
 4 | from ultralytics.utils import ASSETS
 5 | 
 6 | import PIL
 7 | 
 8 | 
 9 | def test_similarity():
10 |     """Test similarity calculations and SQL queries for correctness and response length."""
11 |     exp = Explorer()
12 |     exp.create_embeddings_table()
13 |     similar = exp.get_similar(idx=1)
14 |     assert len(similar) == 25
15 |     similar = exp.get_similar(img=ASSETS / 'zidane.jpg')
16 |     assert len(similar) == 25
17 |     similar = exp.get_similar(idx=[1, 2], limit=10)
18 |     assert len(similar) == 10
19 |     sim_idx = exp.similarity_index()
20 |     assert len(sim_idx) > 0
21 |     sql = exp.sql_query("WHERE labels LIKE '%person%'")
22 |     assert len(sql) > 0
23 | 
24 | 
25 | def test_det():
26 |     """Test detection functionalities and ensure the embedding table has bounding boxes."""
27 |     exp = Explorer(data='coco8.yaml', model='yolov8n.pt')
28 |     exp.create_embeddings_table(force=True)
29 |     assert len(exp.table.head()['bboxes']) > 0
30 |     similar = exp.get_similar(idx=[1, 2], limit=10)
31 |     assert len(similar) > 0
32 |     # This is a loose test, just checks errors not correctness
33 |     similar = exp.plot_similar(idx=[1, 2], limit=10)
34 |     assert isinstance(similar, PIL.Image.Image)
35 | 
36 | 
37 | def test_seg():
38 |     """Test segmentation functionalities and verify the embedding table includes masks."""
39 |     exp = Explorer(data='coco8-seg.yaml', model='yolov8n-seg.pt')
40 |     exp.create_embeddings_table(force=True)
41 |     assert len(exp.table.head()['masks']) > 0
42 |     similar = exp.get_similar(idx=[1, 2], limit=10)
43 |     assert len(similar) > 0
44 |     similar = exp.plot_similar(idx=[1, 2], limit=10)
45 |     assert isinstance(similar, PIL.Image.Image)
46 | 
47 | 
48 | def test_pose():
49 |     """Test pose estimation functionalities and check the embedding table for keypoints."""
50 |     exp = Explorer(data='coco8-pose.yaml', model='yolov8n-pose.pt')
51 |     exp.create_embeddings_table(force=True)
52 |     assert len(exp.table.head()['keypoints']) > 0
53 |     similar = exp.get_similar(idx=[1, 2], limit=10)
54 |     assert len(similar) > 0
55 |     similar = exp.plot_similar(idx=[1, 2], limit=10)
56 |     assert isinstance(similar, PIL.Image.Image)
57 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | # os.environ["OMP_NUM_THREADS"]='2'
 3 | 
 4 | from ultralytics import YOLO
 5 | # Load a model
 6 | model = YOLO('yolov8n.yaml')  # build a new model from YAML
 7 | model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)  
 8 | 
 9 | # Train the model
10 | model.train(data='/mnt/mydisk/xiaolei/code/plate/plate_detect/ultralytics-main/ultralytics/cfg/datasets/plate.yaml', epochs=120, imgsz=640, batch=32, device=[0])


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | yolo task=detect mode=train model=yolov8s.yaml  data=./ultralytics/cfg/datasets/plate.yaml epochs=120 batch=32 imgsz=640 pretrained=False optimizer=SGD 


--------------------------------------------------------------------------------
/ultralytics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | __version__ = "8.1.2"
 4 | 
 5 | from ultralytics.data.explorer.explorer import Explorer
 6 | from ultralytics.models import RTDETR, SAM, YOLO
 7 | from ultralytics.models.fastsam import FastSAM
 8 | from ultralytics.models.nas import NAS
 9 | from ultralytics.utils import SETTINGS as settings
10 | from ultralytics.utils.checks import check_yolo as checks
11 | from ultralytics.utils.downloads import download
12 | 
13 | __all__ = "__version__", "YOLO", "NAS", "SAM", "FastSAM", "RTDETR", "checks", "download", "settings", "Explorer"
14 | 


--------------------------------------------------------------------------------
/ultralytics/assets/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/ultralytics/assets/bus.jpg


--------------------------------------------------------------------------------
/ultralytics/assets/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/ultralytics/assets/zidane.jpg


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/Argoverse.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
 4 | # Example usage: yolo train data=Argoverse.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── Argoverse  ← downloads here (31.5 GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/Argoverse # dataset root dir
12 | train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
13 | val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
14 | test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
15 | 
16 | # Classes
17 | names:
18 |   0: person
19 |   1: bicycle
20 |   2: car
21 |   3: motorcycle
22 |   4: bus
23 |   5: truck
24 |   6: traffic_light
25 |   7: stop_sign
26 | 
27 | # Download script/URL (optional) ---------------------------------------------------------------------------------------
28 | download: |
29 |   import json
30 |   from tqdm import tqdm
31 |   from ultralytics.utils.downloads import download
32 |   from pathlib import Path
33 | 
34 |   def argoverse2yolo(set):
35 |       labels = {}
36 |       a = json.load(open(set, "rb"))
37 |       for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
38 |           img_id = annot['image_id']
39 |           img_name = a['images'][img_id]['name']
40 |           img_label_name = f'{img_name[:-3]}txt'
41 | 
42 |           cls = annot['category_id']  # instance class id
43 |           x_center, y_center, width, height = annot['bbox']
44 |           x_center = (x_center + width / 2) / 1920.0  # offset and scale
45 |           y_center = (y_center + height / 2) / 1200.0  # offset and scale
46 |           width /= 1920.0  # scale
47 |           height /= 1200.0  # scale
48 | 
49 |           img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
50 |           if not img_dir.exists():
51 |               img_dir.mkdir(parents=True, exist_ok=True)
52 | 
53 |           k = str(img_dir / img_label_name)
54 |           if k not in labels:
55 |               labels[k] = []
56 |           labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
57 | 
58 |       for k in labels:
59 |           with open(k, "w") as f:
60 |               f.writelines(labels[k])
61 | 
62 | 
63 |   # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
64 |   dir = Path(yaml['path'])  # dataset root dir
65 |   urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
66 |   download(urls, dir=dir)
67 | 
68 |   # Convert
69 |   annotations_dir = 'Argoverse-HD/annotations/'
70 |   (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
71 |   for d in "train.json", "val.json":
72 |       argoverse2yolo(dir / annotations_dir / d)  # convert Argoverse annotations to YOLO labels
73 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/DOTAv1.5.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
 4 | # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── dota1.5  ← downloads here (2GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/DOTAv1.5 # dataset root dir
12 | train: images/train # train images (relative to 'path') 1411 images
13 | val: images/val # val images (relative to 'path') 458 images
14 | test: images/test # test images (optional) 937 images
15 | 
16 | # Classes for DOTA 1.5
17 | names:
18 |   0: plane
19 |   1: ship
20 |   2: storage tank
21 |   3: baseball diamond
22 |   4: tennis court
23 |   5: basketball court
24 |   6: ground track field
25 |   7: harbor
26 |   8: bridge
27 |   9: large vehicle
28 |   10: small vehicle
29 |   11: helicopter
30 |   12: roundabout
31 |   13: soccer ball field
32 |   14: swimming pool
33 |   15: container crane
34 | 
35 | # Download script/URL (optional)
36 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
37 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/DOTAv1.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
 4 | # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── dota1  ← downloads here (2GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/DOTAv1 # dataset root dir
12 | train: images/train # train images (relative to 'path') 1411 images
13 | val: images/val # val images (relative to 'path') 458 images
14 | test: images/test # test images (optional) 937 images
15 | 
16 | # Classes for DOTA 1.0
17 | names:
18 |   0: plane
19 |   1: ship
20 |   2: storage tank
21 |   3: baseball diamond
22 |   4: tennis court
23 |   5: basketball court
24 |   6: ground track field
25 |   7: harbor
26 |   8: bridge
27 |   9: large vehicle
28 |   10: small vehicle
29 |   11: helicopter
30 |   12: roundabout
31 |   13: soccer ball field
32 |   14: swimming pool
33 | 
34 | # Download script/URL (optional)
35 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
36 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/GlobalWheat2020.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
 4 | # Example usage: yolo train data=GlobalWheat2020.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── GlobalWheat2020  ← downloads here (7.0 GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/GlobalWheat2020 # dataset root dir
12 | train: # train images (relative to 'path') 3422 images
13 |   - images/arvalis_1
14 |   - images/arvalis_2
15 |   - images/arvalis_3
16 |   - images/ethz_1
17 |   - images/rres_1
18 |   - images/inrae_1
19 |   - images/usask_1
20 | val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
21 |   - images/ethz_1
22 | test: # test images (optional) 1276 images
23 |   - images/utokyo_1
24 |   - images/utokyo_2
25 |   - images/nau_1
26 |   - images/uq_1
27 | 
28 | # Classes
29 | names:
30 |   0: wheat_head
31 | 
32 | # Download script/URL (optional) ---------------------------------------------------------------------------------------
33 | download: |
34 |   from ultralytics.utils.downloads import download
35 |   from pathlib import Path
36 | 
37 |   # Download
38 |   dir = Path(yaml['path'])  # dataset root dir
39 |   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
40 |           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
41 |   download(urls, dir=dir)
42 | 
43 |   # Make Directories
44 |   for p in 'annotations', 'images', 'labels':
45 |       (dir / p).mkdir(parents=True, exist_ok=True)
46 | 
47 |   # Move
48 |   for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
49 |            'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
50 |       (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p)  # move to /images
51 |       f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json')  # json file
52 |       if f.exists():
53 |           f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
54 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/SKU-110K.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
 4 | # Example usage: yolo train data=SKU-110K.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── SKU-110K  ← downloads here (13.6 GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/SKU-110K # dataset root dir
12 | train: train.txt # train images (relative to 'path')  8219 images
13 | val: val.txt # val images (relative to 'path')  588 images
14 | test: test.txt # test images (optional)  2936 images
15 | 
16 | # Classes
17 | names:
18 |   0: object
19 | 
20 | # Download script/URL (optional) ---------------------------------------------------------------------------------------
21 | download: |
22 |   import shutil
23 |   from pathlib import Path
24 | 
25 |   import numpy as np
26 |   import pandas as pd
27 |   from tqdm import tqdm
28 | 
29 |   from ultralytics.utils.downloads import download
30 |   from ultralytics.utils.ops import xyxy2xywh
31 | 
32 |   # Download
33 |   dir = Path(yaml['path'])  # dataset root dir
34 |   parent = Path(dir.parent)  # download dir
35 |   urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
36 |   download(urls, dir=parent)
37 | 
38 |   # Rename directories
39 |   if dir.exists():
40 |       shutil.rmtree(dir)
41 |   (parent / 'SKU110K_fixed').rename(dir)  # rename dir
42 |   (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
43 | 
44 |   # Convert labels
45 |   names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
46 |   for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
47 |       x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
48 |       images, unique_images = x[:, 0], np.unique(x[:, 0])
49 |       with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
50 |           f.writelines(f'./images/{s}\n' for s in unique_images)
51 |       for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
52 |           cls = 0  # single-class dataset
53 |           with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
54 |               for r in x[images == im]:
55 |                   w, h = r[6], r[7]  # image width, height
56 |                   xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
57 |                   f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
58 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/VOC.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
  3 | # Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
  4 | # Example usage: yolo train data=VOC.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── VOC  ← downloads here (2.8 GB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/VOC
 12 | train: # train images (relative to 'path')  16551 images
 13 |   - images/train2012
 14 |   - images/train2007
 15 |   - images/val2012
 16 |   - images/val2007
 17 | val: # val images (relative to 'path')  4952 images
 18 |   - images/test2007
 19 | test: # test images (optional)
 20 |   - images/test2007
 21 | 
 22 | # Classes
 23 | names:
 24 |   0: aeroplane
 25 |   1: bicycle
 26 |   2: bird
 27 |   3: boat
 28 |   4: bottle
 29 |   5: bus
 30 |   6: car
 31 |   7: cat
 32 |   8: chair
 33 |   9: cow
 34 |   10: diningtable
 35 |   11: dog
 36 |   12: horse
 37 |   13: motorbike
 38 |   14: person
 39 |   15: pottedplant
 40 |   16: sheep
 41 |   17: sofa
 42 |   18: train
 43 |   19: tvmonitor
 44 | 
 45 | # Download script/URL (optional) ---------------------------------------------------------------------------------------
 46 | download: |
 47 |   import xml.etree.ElementTree as ET
 48 | 
 49 |   from tqdm import tqdm
 50 |   from ultralytics.utils.downloads import download
 51 |   from pathlib import Path
 52 | 
 53 |   def convert_label(path, lb_path, year, image_id):
 54 |       def convert_box(size, box):
 55 |           dw, dh = 1. / size[0], 1. / size[1]
 56 |           x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
 57 |           return x * dw, y * dh, w * dw, h * dh
 58 | 
 59 |       in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
 60 |       out_file = open(lb_path, 'w')
 61 |       tree = ET.parse(in_file)
 62 |       root = tree.getroot()
 63 |       size = root.find('size')
 64 |       w = int(size.find('width').text)
 65 |       h = int(size.find('height').text)
 66 | 
 67 |       names = list(yaml['names'].values())  # names list
 68 |       for obj in root.iter('object'):
 69 |           cls = obj.find('name').text
 70 |           if cls in names and int(obj.find('difficult').text) != 1:
 71 |               xmlbox = obj.find('bndbox')
 72 |               bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
 73 |               cls_id = names.index(cls)  # class id
 74 |               out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
 75 | 
 76 | 
 77 |   # Download
 78 |   dir = Path(yaml['path'])  # dataset root dir
 79 |   url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
 80 |   urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
 81 |           f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
 82 |           f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
 83 |   download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True)  # download and unzip over existing paths (required)
 84 | 
 85 |   # Convert
 86 |   path = dir / 'images/VOCdevkit'
 87 |   for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
 88 |       imgs_path = dir / 'images' / f'{image_set}{year}'
 89 |       lbs_path = dir / 'labels' / f'{image_set}{year}'
 90 |       imgs_path.mkdir(exist_ok=True, parents=True)
 91 |       lbs_path.mkdir(exist_ok=True, parents=True)
 92 | 
 93 |       with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
 94 |           image_ids = f.read().strip().split()
 95 |       for id in tqdm(image_ids, desc=f'{image_set}{year}'):
 96 |           f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
 97 |           lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
 98 |           f.rename(imgs_path / f.name)  # move image
 99 |           convert_label(path, lb_path, year, id)  # convert labels to YOLO format
100 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/VisDrone.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
 3 | # Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
 4 | # Example usage: yolo train data=VisDrone.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── VisDrone  ← downloads here (2.3 GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/VisDrone # dataset root dir
12 | train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
13 | val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
14 | test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images
15 | 
16 | # Classes
17 | names:
18 |   0: pedestrian
19 |   1: people
20 |   2: bicycle
21 |   3: car
22 |   4: van
23 |   5: truck
24 |   6: tricycle
25 |   7: awning-tricycle
26 |   8: bus
27 |   9: motor
28 | 
29 | # Download script/URL (optional) ---------------------------------------------------------------------------------------
30 | download: |
31 |   import os
32 |   from pathlib import Path
33 | 
34 |   from ultralytics.utils.downloads import download
35 | 
36 |   def visdrone2yolo(dir):
37 |       from PIL import Image
38 |       from tqdm import tqdm
39 | 
40 |       def convert_box(size, box):
41 |           # Convert VisDrone box to YOLO xywh box
42 |           dw = 1. / size[0]
43 |           dh = 1. / size[1]
44 |           return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
45 | 
46 |       (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
47 |       pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
48 |       for f in pbar:
49 |           img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
50 |           lines = []
51 |           with open(f, 'r') as file:  # read annotation.txt
52 |               for row in [x.split(',') for x in file.read().strip().splitlines()]:
53 |                   if row[4] == '0':  # VisDrone 'ignored regions' class 0
54 |                       continue
55 |                   cls = int(row[5]) - 1
56 |                   box = convert_box(img_size, tuple(map(int, row[:4])))
57 |                   lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
58 |                   with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
59 |                       fl.writelines(lines)  # write label.txt
60 | 
61 | 
62 |   # Download
63 |   dir = Path(yaml['path'])  # dataset root dir
64 |   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
65 |           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
66 |           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
67 |           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
68 |   download(urls, dir=dir, curl=True, threads=4)
69 | 
70 |   # Convert
71 |   for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
72 |       visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels
73 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco-pose.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # COCO 2017 dataset https://cocodataset.org by Microsoft
 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/coco/
 4 | # Example usage: yolo train data=coco-pose.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── coco-pose  ← downloads here (20.1 GB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/coco-pose # dataset root dir
12 | train: train2017.txt # train images (relative to 'path') 118287 images
13 | val: val2017.txt # val images (relative to 'path') 5000 images
14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15 | 
16 | # Keypoints
17 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18 | flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19 | 
20 | # Classes
21 | names:
22 |   0: person
23 | 
24 | # Download script/URL (optional)
25 | download: |
26 |   from ultralytics.utils.downloads import download
27 |   from pathlib import Path
28 | 
29 |   # Download labels
30 |   dir = Path(yaml['path'])  # dataset root dir
31 |   url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
32 |   urls = [url + 'coco2017labels-pose.zip']  # labels
33 |   download(urls, dir=dir.parent)
34 |   # Download data
35 |   urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
36 |           'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
37 |           'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
38 |   download(urls, dir=dir / 'images', threads=3)
39 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # COCO 2017 dataset https://cocodataset.org by Microsoft
  3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco/
  4 | # Example usage: yolo train data=coco.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── coco  ← downloads here (20.1 GB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/coco # dataset root dir
 12 | train: train2017.txt # train images (relative to 'path') 118287 images
 13 | val: val2017.txt # val images (relative to 'path') 5000 images
 14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 15 | 
 16 | # Classes
 17 | names:
 18 |   0: person
 19 |   1: bicycle
 20 |   2: car
 21 |   3: motorcycle
 22 |   4: airplane
 23 |   5: bus
 24 |   6: train
 25 |   7: truck
 26 |   8: boat
 27 |   9: traffic light
 28 |   10: fire hydrant
 29 |   11: stop sign
 30 |   12: parking meter
 31 |   13: bench
 32 |   14: bird
 33 |   15: cat
 34 |   16: dog
 35 |   17: horse
 36 |   18: sheep
 37 |   19: cow
 38 |   20: elephant
 39 |   21: bear
 40 |   22: zebra
 41 |   23: giraffe
 42 |   24: backpack
 43 |   25: umbrella
 44 |   26: handbag
 45 |   27: tie
 46 |   28: suitcase
 47 |   29: frisbee
 48 |   30: skis
 49 |   31: snowboard
 50 |   32: sports ball
 51 |   33: kite
 52 |   34: baseball bat
 53 |   35: baseball glove
 54 |   36: skateboard
 55 |   37: surfboard
 56 |   38: tennis racket
 57 |   39: bottle
 58 |   40: wine glass
 59 |   41: cup
 60 |   42: fork
 61 |   43: knife
 62 |   44: spoon
 63 |   45: bowl
 64 |   46: banana
 65 |   47: apple
 66 |   48: sandwich
 67 |   49: orange
 68 |   50: broccoli
 69 |   51: carrot
 70 |   52: hot dog
 71 |   53: pizza
 72 |   54: donut
 73 |   55: cake
 74 |   56: chair
 75 |   57: couch
 76 |   58: potted plant
 77 |   59: bed
 78 |   60: dining table
 79 |   61: toilet
 80 |   62: tv
 81 |   63: laptop
 82 |   64: mouse
 83 |   65: remote
 84 |   66: keyboard
 85 |   67: cell phone
 86 |   68: microwave
 87 |   69: oven
 88 |   70: toaster
 89 |   71: sink
 90 |   72: refrigerator
 91 |   73: book
 92 |   74: clock
 93 |   75: vase
 94 |   76: scissors
 95 |   77: teddy bear
 96 |   78: hair drier
 97 |   79: toothbrush
 98 | 
 99 | # Download script/URL (optional)
100 | download: |
101 |   from ultralytics.utils.downloads import download
102 |   from pathlib import Path
103 | 
104 |   # Download labels
105 |   segments = True  # segment or box labels
106 |   dir = Path(yaml['path'])  # dataset root dir
107 |   url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
108 |   urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
109 |   download(urls, dir=dir.parent)
110 |   # Download data
111 |   urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
112 |           'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
113 |           'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
114 |   download(urls, dir=dir / 'images', threads=3)
115 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco128-seg.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
  3 | # Documentation: https://docs.ultralytics.com/datasets/segment/coco/
  4 | # Example usage: yolo train data=coco128.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── coco128-seg  ← downloads here (7 MB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/coco128-seg # dataset root dir
 12 | train: images/train2017 # train images (relative to 'path') 128 images
 13 | val: images/train2017 # val images (relative to 'path') 128 images
 14 | test: # test images (optional)
 15 | 
 16 | # Classes
 17 | names:
 18 |   0: person
 19 |   1: bicycle
 20 |   2: car
 21 |   3: motorcycle
 22 |   4: airplane
 23 |   5: bus
 24 |   6: train
 25 |   7: truck
 26 |   8: boat
 27 |   9: traffic light
 28 |   10: fire hydrant
 29 |   11: stop sign
 30 |   12: parking meter
 31 |   13: bench
 32 |   14: bird
 33 |   15: cat
 34 |   16: dog
 35 |   17: horse
 36 |   18: sheep
 37 |   19: cow
 38 |   20: elephant
 39 |   21: bear
 40 |   22: zebra
 41 |   23: giraffe
 42 |   24: backpack
 43 |   25: umbrella
 44 |   26: handbag
 45 |   27: tie
 46 |   28: suitcase
 47 |   29: frisbee
 48 |   30: skis
 49 |   31: snowboard
 50 |   32: sports ball
 51 |   33: kite
 52 |   34: baseball bat
 53 |   35: baseball glove
 54 |   36: skateboard
 55 |   37: surfboard
 56 |   38: tennis racket
 57 |   39: bottle
 58 |   40: wine glass
 59 |   41: cup
 60 |   42: fork
 61 |   43: knife
 62 |   44: spoon
 63 |   45: bowl
 64 |   46: banana
 65 |   47: apple
 66 |   48: sandwich
 67 |   49: orange
 68 |   50: broccoli
 69 |   51: carrot
 70 |   52: hot dog
 71 |   53: pizza
 72 |   54: donut
 73 |   55: cake
 74 |   56: chair
 75 |   57: couch
 76 |   58: potted plant
 77 |   59: bed
 78 |   60: dining table
 79 |   61: toilet
 80 |   62: tv
 81 |   63: laptop
 82 |   64: mouse
 83 |   65: remote
 84 |   66: keyboard
 85 |   67: cell phone
 86 |   68: microwave
 87 |   69: oven
 88 |   70: toaster
 89 |   71: sink
 90 |   72: refrigerator
 91 |   73: book
 92 |   74: clock
 93 |   75: vase
 94 |   76: scissors
 95 |   77: teddy bear
 96 |   78: hair drier
 97 |   79: toothbrush
 98 | 
 99 | # Download script/URL (optional)
100 | download: https://ultralytics.com/assets/coco128-seg.zip
101 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco128.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
  3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco/
  4 | # Example usage: yolo train data=coco128.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── coco128  ← downloads here (7 MB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/coco128 # dataset root dir
 12 | train: images/train2017 # train images (relative to 'path') 128 images
 13 | val: images/train2017 # val images (relative to 'path') 128 images
 14 | test: # test images (optional)
 15 | 
 16 | # Classes
 17 | names:
 18 |   0: person
 19 |   1: bicycle
 20 |   2: car
 21 |   3: motorcycle
 22 |   4: airplane
 23 |   5: bus
 24 |   6: train
 25 |   7: truck
 26 |   8: boat
 27 |   9: traffic light
 28 |   10: fire hydrant
 29 |   11: stop sign
 30 |   12: parking meter
 31 |   13: bench
 32 |   14: bird
 33 |   15: cat
 34 |   16: dog
 35 |   17: horse
 36 |   18: sheep
 37 |   19: cow
 38 |   20: elephant
 39 |   21: bear
 40 |   22: zebra
 41 |   23: giraffe
 42 |   24: backpack
 43 |   25: umbrella
 44 |   26: handbag
 45 |   27: tie
 46 |   28: suitcase
 47 |   29: frisbee
 48 |   30: skis
 49 |   31: snowboard
 50 |   32: sports ball
 51 |   33: kite
 52 |   34: baseball bat
 53 |   35: baseball glove
 54 |   36: skateboard
 55 |   37: surfboard
 56 |   38: tennis racket
 57 |   39: bottle
 58 |   40: wine glass
 59 |   41: cup
 60 |   42: fork
 61 |   43: knife
 62 |   44: spoon
 63 |   45: bowl
 64 |   46: banana
 65 |   47: apple
 66 |   48: sandwich
 67 |   49: orange
 68 |   50: broccoli
 69 |   51: carrot
 70 |   52: hot dog
 71 |   53: pizza
 72 |   54: donut
 73 |   55: cake
 74 |   56: chair
 75 |   57: couch
 76 |   58: potted plant
 77 |   59: bed
 78 |   60: dining table
 79 |   61: toilet
 80 |   62: tv
 81 |   63: laptop
 82 |   64: mouse
 83 |   65: remote
 84 |   66: keyboard
 85 |   67: cell phone
 86 |   68: microwave
 87 |   69: oven
 88 |   70: toaster
 89 |   71: sink
 90 |   72: refrigerator
 91 |   73: book
 92 |   74: clock
 93 |   75: vase
 94 |   76: scissors
 95 |   77: teddy bear
 96 |   78: hair drier
 97 |   79: toothbrush
 98 | 
 99 | # Download script/URL (optional)
100 | download: https://ultralytics.com/assets/coco128.zip
101 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco8-pose.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
 4 | # Example usage: yolo train data=coco8-pose.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── coco8-pose  ← downloads here (1 MB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/coco8-pose # dataset root dir
12 | train: images/train # train images (relative to 'path') 4 images
13 | val: images/val # val images (relative to 'path') 4 images
14 | test: # test images (optional)
15 | 
16 | # Keypoints
17 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18 | flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19 | 
20 | # Classes
21 | names:
22 |   0: person
23 | 
24 | # Download script/URL (optional)
25 | download: https://ultralytics.com/assets/coco8-pose.zip
26 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco8-seg.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
  3 | # Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
  4 | # Example usage: yolo train data=coco8-seg.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── coco8-seg  ← downloads here (1 MB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/coco8-seg # dataset root dir
 12 | train: images/train # train images (relative to 'path') 4 images
 13 | val: images/val # val images (relative to 'path') 4 images
 14 | test: # test images (optional)
 15 | 
 16 | # Classes
 17 | names:
 18 |   0: person
 19 |   1: bicycle
 20 |   2: car
 21 |   3: motorcycle
 22 |   4: airplane
 23 |   5: bus
 24 |   6: train
 25 |   7: truck
 26 |   8: boat
 27 |   9: traffic light
 28 |   10: fire hydrant
 29 |   11: stop sign
 30 |   12: parking meter
 31 |   13: bench
 32 |   14: bird
 33 |   15: cat
 34 |   16: dog
 35 |   17: horse
 36 |   18: sheep
 37 |   19: cow
 38 |   20: elephant
 39 |   21: bear
 40 |   22: zebra
 41 |   23: giraffe
 42 |   24: backpack
 43 |   25: umbrella
 44 |   26: handbag
 45 |   27: tie
 46 |   28: suitcase
 47 |   29: frisbee
 48 |   30: skis
 49 |   31: snowboard
 50 |   32: sports ball
 51 |   33: kite
 52 |   34: baseball bat
 53 |   35: baseball glove
 54 |   36: skateboard
 55 |   37: surfboard
 56 |   38: tennis racket
 57 |   39: bottle
 58 |   40: wine glass
 59 |   41: cup
 60 |   42: fork
 61 |   43: knife
 62 |   44: spoon
 63 |   45: bowl
 64 |   46: banana
 65 |   47: apple
 66 |   48: sandwich
 67 |   49: orange
 68 |   50: broccoli
 69 |   51: carrot
 70 |   52: hot dog
 71 |   53: pizza
 72 |   54: donut
 73 |   55: cake
 74 |   56: chair
 75 |   57: couch
 76 |   58: potted plant
 77 |   59: bed
 78 |   60: dining table
 79 |   61: toilet
 80 |   62: tv
 81 |   63: laptop
 82 |   64: mouse
 83 |   65: remote
 84 |   66: keyboard
 85 |   67: cell phone
 86 |   68: microwave
 87 |   69: oven
 88 |   70: toaster
 89 |   71: sink
 90 |   72: refrigerator
 91 |   73: book
 92 |   74: clock
 93 |   75: vase
 94 |   76: scissors
 95 |   77: teddy bear
 96 |   78: hair drier
 97 |   79: toothbrush
 98 | 
 99 | # Download script/URL (optional)
100 | download: https://ultralytics.com/assets/coco8-seg.zip
101 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/coco8.yaml:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
  3 | # Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
  4 | # Example usage: yolo train data=coco8.yaml
  5 | # parent
  6 | # ├── ultralytics
  7 | # └── datasets
  8 | #     └── coco8  ← downloads here (1 MB)
  9 | 
 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 11 | path: ../datasets/coco8 # dataset root dir
 12 | train: images/train # train images (relative to 'path') 4 images
 13 | val: images/val # val images (relative to 'path') 4 images
 14 | test: # test images (optional)
 15 | 
 16 | # Classes
 17 | names:
 18 |   0: person
 19 |   1: bicycle
 20 |   2: car
 21 |   3: motorcycle
 22 |   4: airplane
 23 |   5: bus
 24 |   6: train
 25 |   7: truck
 26 |   8: boat
 27 |   9: traffic light
 28 |   10: fire hydrant
 29 |   11: stop sign
 30 |   12: parking meter
 31 |   13: bench
 32 |   14: bird
 33 |   15: cat
 34 |   16: dog
 35 |   17: horse
 36 |   18: sheep
 37 |   19: cow
 38 |   20: elephant
 39 |   21: bear
 40 |   22: zebra
 41 |   23: giraffe
 42 |   24: backpack
 43 |   25: umbrella
 44 |   26: handbag
 45 |   27: tie
 46 |   28: suitcase
 47 |   29: frisbee
 48 |   30: skis
 49 |   31: snowboard
 50 |   32: sports ball
 51 |   33: kite
 52 |   34: baseball bat
 53 |   35: baseball glove
 54 |   36: skateboard
 55 |   37: surfboard
 56 |   38: tennis racket
 57 |   39: bottle
 58 |   40: wine glass
 59 |   41: cup
 60 |   42: fork
 61 |   43: knife
 62 |   44: spoon
 63 |   45: bowl
 64 |   46: banana
 65 |   47: apple
 66 |   48: sandwich
 67 |   49: orange
 68 |   50: broccoli
 69 |   51: carrot
 70 |   52: hot dog
 71 |   53: pizza
 72 |   54: donut
 73 |   55: cake
 74 |   56: chair
 75 |   57: couch
 76 |   58: potted plant
 77 |   59: bed
 78 |   60: dining table
 79 |   61: toilet
 80 |   62: tv
 81 |   63: laptop
 82 |   64: mouse
 83 |   65: remote
 84 |   66: keyboard
 85 |   67: cell phone
 86 |   68: microwave
 87 |   69: oven
 88 |   70: toaster
 89 |   71: sink
 90 |   72: refrigerator
 91 |   73: book
 92 |   74: clock
 93 |   75: vase
 94 |   76: scissors
 95 |   77: teddy bear
 96 |   78: hair drier
 97 |   79: toothbrush
 98 | 
 99 | # Download script/URL (optional)
100 | download: https://ultralytics.com/assets/coco8.zip
101 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/dota8.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
 4 | # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── dota8  ← downloads here (1MB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/dota8 # dataset root dir
12 | train: images/train # train images (relative to 'path') 4 images
13 | val: images/val # val images (relative to 'path') 4 images
14 | 
15 | # Classes for DOTA 1.0
16 | names:
17 |   0: plane
18 |   1: ship
19 |   2: storage tank
20 |   3: baseball diamond
21 |   4: tennis court
22 |   5: basketball court
23 |   6: ground track field
24 |   7: harbor
25 |   8: bridge
26 |   9: large vehicle
27 |   10: small vehicle
28 |   11: helicopter
29 |   12: roundabout
30 |   13: soccer ball field
31 |   14: swimming pool
32 | 
33 | # Download script/URL (optional)
34 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
35 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/plate.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
 3 | # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
 4 | # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── dota8  ← downloads here (1MB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | # path: ../datasets/dota8 # dataset root dir
12 | train: /mnt/mydisk/xiaolei/plate_detect/new_train_data # train images (relative to 'path') 4 images
13 | val: /mnt/mydisk/xiaolei/plate_detect/new_val_data # val images (relative to 'path') 4 images
14 | 
15 | # Classes for DOTA 1.0
16 | names:
17 |   0: single
18 |   1: double
19 | 
20 | 
21 | # Download script/URL (optional)
22 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
23 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/datasets/tiger-pose.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Tiger Pose dataset by Ultralytics
 3 | # Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
 4 | # Example usage: yolo train data=tiger-pose.yaml
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── tiger-pose  ← downloads here (75.3 MB)
 9 | 
10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11 | path: ../datasets/tiger-pose # dataset root dir
12 | train: train # train images (relative to 'path') 210 images
13 | val: val # val images (relative to 'path') 53 images
14 | 
15 | # Keypoints
16 | kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
17 | flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
18 | 
19 | # Classes
20 | names:
21 |   0: tiger
22 | 
23 | # Download script/URL (optional)
24 | download: https://ultralytics.com/assets/tiger-pose.zip
25 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/README.md:
--------------------------------------------------------------------------------
 1 | ## Models
 2 | 
 3 | Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
 4 | 
 5 | These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
 6 | 
 7 | To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
 8 | 
 9 | ### Usage
10 | 
11 | Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
12 | 
13 | ```bash
14 | yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
15 | ```
16 | 
17 | They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
18 | 
19 | ```python
20 | from ultralytics import YOLO
21 | 
22 | model = YOLO("model.yaml")  # build a YOLOv8n model from scratch
23 | # YOLO("model.pt")  use pre-trained model if available
24 | model.info()  # display model information
25 | model.train(data="coco128.yaml", epochs=100)  # train the model
26 | ```
27 | 
28 | ## Pre-trained Model Architectures
29 | 
30 | Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
31 | 
32 | ## Contribute New Models
33 | 
34 | Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
35 | 
36 | By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
37 | 
38 | To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
39 | 
40 | Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!
41 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   l: [1.00, 1.00, 1024]
 9 | 
10 | backbone:
11 |   # [from, repeats, module, args]
12 |   - [-1, 1, HGStem, [32, 48]] # 0-P2/4
13 |   - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
14 | 
15 |   - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16 |   - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
17 | 
18 |   - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
19 |   - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
20 |   - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
21 |   - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
22 | 
23 |   - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
24 |   - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
25 | 
26 | head:
27 |   - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
28 |   - [-1, 1, AIFI, [1024, 8]]
29 |   - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
30 | 
31 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32 |   - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
33 |   - [[-2, -1], 1, Concat, [1]]
34 |   - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
35 |   - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
36 | 
37 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38 |   - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
39 |   - [[-2, -1], 1, Concat, [1]] # cat backbone P4
40 |   - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
41 | 
42 |   - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
43 |   - [[-1, 17], 1, Concat, [1]] # cat Y4
44 |   - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
45 | 
46 |   - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
47 |   - [[-1, 12], 1, Concat, [1]] # cat Y5
48 |   - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
49 | 
50 |   - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
51 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # RT-DETR-ResNet101 object detection model with P3-P5 outputs.
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   l: [1.00, 1.00, 1024]
 9 | 
10 | backbone:
11 |   # [from, repeats, module, args]
12 |   - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13 |   - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14 |   - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15 |   - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
16 |   - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17 | 
18 | head:
19 |   - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20 |   - [-1, 1, AIFI, [1024, 8]]
21 |   - [-1, 1, Conv, [256, 1, 1]] # 7
22 | 
23 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24 |   - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25 |   - [[-2, -1], 1, Concat, [1]]
26 |   - [-1, 3, RepC3, [256]] # 11
27 |   - [-1, 1, Conv, [256, 1, 1]] # 12
28 | 
29 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30 |   - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31 |   - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33 | 
34 |   - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35 |   - [[-1, 12], 1, Concat, [1]] # cat Y4
36 |   - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39 |   - [[-1, 7], 1, Concat, [1]] # cat Y5
40 |   - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41 | 
42 |   - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
43 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # RT-DETR-ResNet50 object detection model with P3-P5 outputs.
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   l: [1.00, 1.00, 1024]
 9 | 
10 | backbone:
11 |   # [from, repeats, module, args]
12 |   - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13 |   - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14 |   - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15 |   - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
16 |   - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17 | 
18 | head:
19 |   - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20 |   - [-1, 1, AIFI, [1024, 8]]
21 |   - [-1, 1, Conv, [256, 1, 1]] # 7
22 | 
23 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24 |   - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25 |   - [[-2, -1], 1, Concat, [1]]
26 |   - [-1, 3, RepC3, [256]] # 11
27 |   - [-1, 1, Conv, [256, 1, 1]] # 12
28 | 
29 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30 |   - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31 |   - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33 | 
34 |   - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35 |   - [[-1, 12], 1, Concat, [1]] # cat Y4
36 |   - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39 |   - [[-1, 7], 1, Concat, [1]] # cat Y5
40 |   - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41 | 
42 |   - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
43 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   x: [1.00, 1.00, 2048]
 9 | 
10 | backbone:
11 |   # [from, repeats, module, args]
12 |   - [-1, 1, HGStem, [32, 64]] # 0-P2/4
13 |   - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
14 | 
15 |   - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16 |   - [-1, 6, HGBlock, [128, 512, 3]]
17 |   - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
18 | 
19 |   - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
20 |   - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
21 |   - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
22 |   - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
23 |   - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
24 |   - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
25 | 
26 |   - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
27 |   - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
28 |   - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
29 | 
30 | head:
31 |   - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
32 |   - [-1, 1, AIFI, [2048, 8]]
33 |   - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
34 | 
35 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36 |   - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
37 |   - [[-2, -1], 1, Concat, [1]]
38 |   - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
39 |   - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
40 | 
41 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42 |   - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
43 |   - [[-2, -1], 1, Concat, [1]] # cat backbone P4
44 |   - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
45 | 
46 |   - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
47 |   - [[-1, 21], 1, Concat, [1]] # cat Y4
48 |   - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
49 | 
50 |   - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
51 |   - [[-1, 16], 1, Concat, [1]] # cat Y5
52 |   - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
53 | 
54 |   - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
55 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v3/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | depth_multiple: 1.0 # model depth multiple
 7 | width_multiple: 1.0 # layer channel multiple
 8 | 
 9 | # darknet53 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   - [-1, 1, Conv, [32, 3, 1]] # 0
13 |   - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
14 |   - [-1, 1, Bottleneck, [64]]
15 |   - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
16 |   - [-1, 2, Bottleneck, [128]]
17 |   - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
18 |   - [-1, 8, Bottleneck, [256]]
19 |   - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
20 |   - [-1, 8, Bottleneck, [512]]
21 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22 |   - [-1, 4, Bottleneck, [1024]] # 10
23 | 
24 | # YOLOv3-SPP head
25 | head:
26 |   - [-1, 1, Bottleneck, [1024, False]]
27 |   - [-1, 1, SPP, [512, [5, 9, 13]]]
28 |   - [-1, 1, Conv, [1024, 3, 1]]
29 |   - [-1, 1, Conv, [512, 1, 1]]
30 |   - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
31 | 
32 |   - [-2, 1, Conv, [256, 1, 1]]
33 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35 |   - [-1, 1, Bottleneck, [512, False]]
36 |   - [-1, 1, Bottleneck, [512, False]]
37 |   - [-1, 1, Conv, [256, 1, 1]]
38 |   - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
39 | 
40 |   - [-2, 1, Conv, [128, 1, 1]]
41 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P3
43 |   - [-1, 1, Bottleneck, [256, False]]
44 |   - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
45 | 
46 |   - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v3/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | depth_multiple: 1.0 # model depth multiple
 7 | width_multiple: 1.0 # layer channel multiple
 8 | 
 9 | # YOLOv3-tiny backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   - [-1, 1, Conv, [16, 3, 1]] # 0
13 |   - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
14 |   - [-1, 1, Conv, [32, 3, 1]]
15 |   - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
16 |   - [-1, 1, Conv, [64, 3, 1]]
17 |   - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
18 |   - [-1, 1, Conv, [128, 3, 1]]
19 |   - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
20 |   - [-1, 1, Conv, [256, 3, 1]]
21 |   - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
22 |   - [-1, 1, Conv, [512, 3, 1]]
23 |   - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
24 |   - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
25 | 
26 | # YOLOv3-tiny head
27 | head:
28 |   - [-1, 1, Conv, [1024, 3, 1]]
29 |   - [-1, 1, Conv, [256, 1, 1]]
30 |   - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
31 | 
32 |   - [-2, 1, Conv, [128, 1, 1]]
33 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35 |   - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
36 | 
37 |   - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
38 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v3/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | depth_multiple: 1.0 # model depth multiple
 7 | width_multiple: 1.0 # layer channel multiple
 8 | 
 9 | # darknet53 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   - [-1, 1, Conv, [32, 3, 1]] # 0
13 |   - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
14 |   - [-1, 1, Bottleneck, [64]]
15 |   - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
16 |   - [-1, 2, Bottleneck, [128]]
17 |   - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
18 |   - [-1, 8, Bottleneck, [256]]
19 |   - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
20 |   - [-1, 8, Bottleneck, [512]]
21 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22 |   - [-1, 4, Bottleneck, [1024]] # 10
23 | 
24 | # YOLOv3 head
25 | head:
26 |   - [-1, 1, Bottleneck, [1024, False]]
27 |   - [-1, 1, Conv, [512, 1, 1]]
28 |   - [-1, 1, Conv, [1024, 3, 1]]
29 |   - [-1, 1, Conv, [512, 1, 1]]
30 |   - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
31 | 
32 |   - [-2, 1, Conv, [256, 1, 1]]
33 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35 |   - [-1, 1, Bottleneck, [512, False]]
36 |   - [-1, 1, Bottleneck, [512, False]]
37 |   - [-1, 1, Conv, [256, 1, 1]]
38 |   - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
39 | 
40 |   - [-2, 1, Conv, [128, 1, 1]]
41 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P3
43 |   - [-1, 1, Bottleneck, [256, False]]
44 |   - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
45 | 
46 |   - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v5/yolov5-p6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 1024]
11 |   l: [1.00, 1.00, 1024]
12 |   x: [1.33, 1.25, 1024]
13 | 
14 | # YOLOv5 v6.0 backbone
15 | backbone:
16 |   # [from, number, module, args]
17 |   - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C3, [128]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C3, [256]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 9, C3, [512]]
24 |   - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C3, [768]]
26 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27 |   - [-1, 3, C3, [1024]]
28 |   - [-1, 1, SPPF, [1024, 5]] # 11
29 | 
30 | # YOLOv5 v6.0 head
31 | head:
32 |   - [-1, 1, Conv, [768, 1, 1]]
33 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35 |   - [-1, 3, C3, [768, False]] # 15
36 | 
37 |   - [-1, 1, Conv, [512, 1, 1]]
38 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
40 |   - [-1, 3, C3, [512, False]] # 19
41 | 
42 |   - [-1, 1, Conv, [256, 1, 1]]
43 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
44 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
45 |   - [-1, 3, C3, [256, False]] # 23 (P3/8-small)
46 | 
47 |   - [-1, 1, Conv, [256, 3, 2]]
48 |   - [[-1, 20], 1, Concat, [1]] # cat head P4
49 |   - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
50 | 
51 |   - [-1, 1, Conv, [512, 3, 2]]
52 |   - [[-1, 16], 1, Concat, [1]] # cat head P5
53 |   - [-1, 3, C3, [768, False]] # 29 (P5/32-large)
54 | 
55 |   - [-1, 1, Conv, [768, 3, 2]]
56 |   - [[-1, 12], 1, Concat, [1]] # cat head P6
57 |   - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
58 | 
59 |   - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
60 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v5/yolov5.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 1024]
11 |   l: [1.00, 1.00, 1024]
12 |   x: [1.33, 1.25, 1024]
13 | 
14 | # YOLOv5 v6.0 backbone
15 | backbone:
16 |   # [from, number, module, args]
17 |   - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C3, [128]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C3, [256]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 9, C3, [512]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C3, [1024]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv5 v6.0 head
29 | head:
30 |   - [-1, 1, Conv, [512, 1, 1]]
31 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33 |   - [-1, 3, C3, [512, False]] # 13
34 | 
35 |   - [-1, 1, Conv, [256, 1, 1]]
36 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
38 |   - [-1, 3, C3, [256, False]] # 17 (P3/8-small)
39 | 
40 |   - [-1, 1, Conv, [256, 3, 2]]
41 |   - [[-1, 14], 1, Concat, [1]] # cat head P4
42 |   - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
43 | 
44 |   - [-1, 1, Conv, [512, 3, 2]]
45 |   - [[-1, 10], 1, Concat, [1]] # cat head P5
46 |   - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
47 | 
48 |   - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
49 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v6/yolov6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | activation: nn.ReLU() # (optional) model default activation function
 7 | scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
 8 |   # [depth, width, max_channels]
 9 |   n: [0.33, 0.25, 1024]
10 |   s: [0.33, 0.50, 1024]
11 |   m: [0.67, 0.75, 768]
12 |   l: [1.00, 1.00, 512]
13 |   x: [1.00, 1.25, 512]
14 | 
15 | # YOLOv6-3.0s backbone
16 | backbone:
17 |   # [from, repeats, module, args]
18 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20 |   - [-1, 6, Conv, [128, 3, 1]]
21 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22 |   - [-1, 12, Conv, [256, 3, 1]]
23 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24 |   - [-1, 18, Conv, [512, 3, 1]]
25 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26 |   - [-1, 6, Conv, [1024, 3, 1]]
27 |   - [-1, 1, SPPF, [1024, 5]] # 9
28 | 
29 | # YOLOv6-3.0s head
30 | head:
31 |   - [-1, 1, Conv, [256, 1, 1]]
32 |   - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
33 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34 |   - [-1, 1, Conv, [256, 3, 1]]
35 |   - [-1, 9, Conv, [256, 3, 1]] # 14
36 | 
37 |   - [-1, 1, Conv, [128, 1, 1]]
38 |   - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
39 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
40 |   - [-1, 1, Conv, [128, 3, 1]]
41 |   - [-1, 9, Conv, [128, 3, 1]] # 19
42 | 
43 |   - [-1, 1, Conv, [128, 3, 2]]
44 |   - [[-1, 15], 1, Concat, [1]] # cat head P4
45 |   - [-1, 1, Conv, [256, 3, 1]]
46 |   - [-1, 9, Conv, [256, 3, 1]] # 23
47 | 
48 |   - [-1, 1, Conv, [256, 3, 2]]
49 |   - [[-1, 10], 1, Concat, [1]] # cat head P5
50 |   - [-1, 1, Conv, [512, 3, 1]]
51 |   - [-1, 9, Conv, [512, 3, 1]] # 27
52 | 
53 |   - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
54 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
 3 | 
 4 | # Parameters
 5 | nc: 1000 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 1024]
11 |   l: [1.00, 1.00, 1024]
12 |   x: [1.00, 1.25, 1024]
13 | 
14 | # YOLOv8.0n backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 | 
27 | # YOLOv8.0n head
28 | head:
29 |   - [-1, 1, Classify, [nc]] # Classify
30 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters,   2033928 gradients,  13.8 GFLOPs
 9 |   s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters,   5562064 gradients,  25.1 GFLOPs
10 |   m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters,   9031712 gradients,  42.8 GFLOPs
11 |   l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients,  69.1 GFLOPs
12 |   x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
13 | 
14 | # YOLOv8.0-ghost backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C3Ghost, [128, True]]
20 |   - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C3Ghost, [256, True]]
22 |   - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C3Ghost, [512, True]]
24 |   - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C3Ghost, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0-ghost-p2 head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C3Ghost, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39 |   - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40 |   - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
41 | 
42 |   - [-1, 1, GhostConv, [128, 3, 2]]
43 |   - [[-1, 15], 1, Concat, [1]] # cat head P3
44 |   - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
45 | 
46 |   - [-1, 1, GhostConv, [256, 3, 2]]
47 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
48 |   - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
49 | 
50 |   - [-1, 1, GhostConv, [512, 3, 2]]
51 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
52 |   - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
53 | 
54 |   - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
55 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
 9 |   s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
10 |   m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
11 |   l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
12 |   x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
13 | 
14 | # YOLOv8.0-ghost backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C3Ghost, [128, True]]
20 |   - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C3Ghost, [256, True]]
22 |   - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C3Ghost, [512, True]]
24 |   - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C3Ghost, [768, True]]
26 |   - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
27 |   - [-1, 3, C3Ghost, [1024, True]]
28 |   - [-1, 1, SPPF, [1024, 5]] # 11
29 | 
30 | # YOLOv8.0-ghost-p6 head
31 | head:
32 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34 |   - [-1, 3, C3Ghost, [768]] # 14
35 | 
36 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38 |   - [-1, 3, C3Ghost, [512]] # 17
39 | 
40 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42 |   - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
43 | 
44 |   - [-1, 1, GhostConv, [256, 3, 2]]
45 |   - [[-1, 17], 1, Concat, [1]] # cat head P4
46 |   - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
47 | 
48 |   - [-1, 1, GhostConv, [512, 3, 2]]
49 |   - [[-1, 14], 1, Concat, [1]] # cat head P5
50 |   - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
51 | 
52 |   - [-1, 1, GhostConv, [768, 3, 2]]
53 |   - [[-1, 11], 1, Concat, [1]] # cat head P6
54 |   - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
55 | 
56 |   - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
57 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-ghost.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | # Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
 4 | 
 5 | # Parameters
 6 | nc: 80 # number of classes
 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 8 |   # [depth, width, max_channels]
 9 |   n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers,  1865316 parameters,  1865300 gradients,   5.8 GFLOPs
10 |   s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers,  5960072 parameters,  5960056 gradients,  16.4 GFLOPs
11 |   m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients,  32.7 GFLOPs
12 |   l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients,  53.7 GFLOPs
13 |   x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients,  83.3 GFLOPs
14 | 
15 | # YOLOv8.0n-ghost backbone
16 | backbone:
17 |   # [from, repeats, module, args]
18 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19 |   - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
20 |   - [-1, 3, C3Ghost, [128, True]]
21 |   - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
22 |   - [-1, 6, C3Ghost, [256, True]]
23 |   - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
24 |   - [-1, 6, C3Ghost, [512, True]]
25 |   - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
26 |   - [-1, 3, C3Ghost, [1024, True]]
27 |   - [-1, 1, SPPF, [1024, 5]] # 9
28 | 
29 | # YOLOv8.0n head
30 | head:
31 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33 |   - [-1, 3, C3Ghost, [512]] # 12
34 | 
35 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37 |   - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
38 | 
39 |   - [-1, 1, GhostConv, [256, 3, 2]]
40 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
41 |   - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
42 | 
43 |   - [-1, 1, GhostConv, [512, 3, 2]]
44 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
45 |   - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
46 | 
47 |   - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
48 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-obb.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
 9 |   s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
10 |   m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
11 |   l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12 |   x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13 | 
14 | # YOLOv8.0n backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0n head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C2f, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]]
39 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
40 |   - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41 | 
42 |   - [-1, 1, Conv, [512, 3, 2]]
43 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
44 |   - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45 | 
46 |   - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-p2.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 768]
11 |   l: [1.00, 1.00, 512]
12 |   x: [1.00, 1.25, 512]
13 | 
14 | # YOLOv8.0 backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0-p2 head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C2f, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39 |   - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40 |   - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
41 | 
42 |   - [-1, 1, Conv, [128, 3, 2]]
43 |   - [[-1, 15], 1, Concat, [1]] # cat head P3
44 |   - [-1, 3, C2f, [256]] # 21 (P3/8-small)
45 | 
46 |   - [-1, 1, Conv, [256, 3, 2]]
47 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
48 |   - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
49 | 
50 |   - [-1, 1, Conv, [512, 3, 2]]
51 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
52 |   - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
53 | 
54 |   - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
55 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-p6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 768]
11 |   l: [1.00, 1.00, 512]
12 |   x: [1.00, 1.25, 512]
13 | 
14 | # YOLOv8.0x6 backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [768, True]]
26 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27 |   - [-1, 3, C2f, [1024, True]]
28 |   - [-1, 1, SPPF, [1024, 5]] # 11
29 | 
30 | # YOLOv8.0x6 head
31 | head:
32 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34 |   - [-1, 3, C2, [768, False]] # 14
35 | 
36 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38 |   - [-1, 3, C2, [512, False]] # 17
39 | 
40 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42 |   - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43 | 
44 |   - [-1, 1, Conv, [256, 3, 2]]
45 |   - [[-1, 17], 1, Concat, [1]] # cat head P4
46 |   - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47 | 
48 |   - [-1, 1, Conv, [512, 3, 2]]
49 |   - [[-1, 14], 1, Concat, [1]] # cat head P5
50 |   - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51 | 
52 |   - [-1, 1, Conv, [768, 3, 2]]
53 |   - [[-1, 11], 1, Concat, [1]] # cat head P6
54 |   - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55 | 
56 |   - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
57 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
 3 | 
 4 | # Parameters
 5 | nc: 1 # number of classes
 6 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
 8 |   # [depth, width, max_channels]
 9 |   n: [0.33, 0.25, 1024]
10 |   s: [0.33, 0.50, 1024]
11 |   m: [0.67, 0.75, 768]
12 |   l: [1.00, 1.00, 512]
13 |   x: [1.00, 1.25, 512]
14 | 
15 | # YOLOv8.0x6 backbone
16 | backbone:
17 |   # [from, repeats, module, args]
18 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20 |   - [-1, 3, C2f, [128, True]]
21 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22 |   - [-1, 6, C2f, [256, True]]
23 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24 |   - [-1, 6, C2f, [512, True]]
25 |   - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
26 |   - [-1, 3, C2f, [768, True]]
27 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
28 |   - [-1, 3, C2f, [1024, True]]
29 |   - [-1, 1, SPPF, [1024, 5]] # 11
30 | 
31 | # YOLOv8.0x6 head
32 | head:
33 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35 |   - [-1, 3, C2, [768, False]] # 14
36 | 
37 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39 |   - [-1, 3, C2, [512, False]] # 17
40 | 
41 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43 |   - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
44 | 
45 |   - [-1, 1, Conv, [256, 3, 2]]
46 |   - [[-1, 17], 1, Concat, [1]] # cat head P4
47 |   - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
48 | 
49 |   - [-1, 1, Conv, [512, 3, 2]]
50 |   - [[-1, 14], 1, Concat, [1]] # cat head P5
51 |   - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
52 | 
53 |   - [-1, 1, Conv, [768, 3, 2]]
54 |   - [[-1, 11], 1, Concat, [1]] # cat head P6
55 |   - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
56 | 
57 |   - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
58 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-pose.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
 3 | 
 4 | # Parameters
 5 | nc: 1 # number of classes
 6 | kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 7 | scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
 8 |   # [depth, width, max_channels]
 9 |   n: [0.33, 0.25, 1024]
10 |   s: [0.33, 0.50, 1024]
11 |   m: [0.67, 0.75, 768]
12 |   l: [1.00, 1.00, 512]
13 |   x: [1.00, 1.25, 512]
14 | 
15 | # YOLOv8.0n backbone
16 | backbone:
17 |   # [from, repeats, module, args]
18 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20 |   - [-1, 3, C2f, [128, True]]
21 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22 |   - [-1, 6, C2f, [256, True]]
23 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24 |   - [-1, 6, C2f, [512, True]]
25 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26 |   - [-1, 3, C2f, [1024, True]]
27 |   - [-1, 1, SPPF, [1024, 5]] # 9
28 | 
29 | # YOLOv8.0n head
30 | head:
31 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33 |   - [-1, 3, C2f, [512]] # 12
34 | 
35 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
38 | 
39 |   - [-1, 1, Conv, [256, 3, 2]]
40 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
41 |   - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
42 | 
43 |   - [-1, 1, Conv, [512, 3, 2]]
44 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
45 |   - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
46 | 
47 |   - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
48 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
 9 |   s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
10 |   m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
11 |   l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12 |   x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13 | 
14 | # YOLOv8.0n backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0n head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C2f, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]]
39 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
40 |   - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41 | 
42 |   - [-1, 1, Conv, [512, 3, 2]]
43 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
44 |   - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45 | 
46 |   - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 768]
11 |   l: [1.00, 1.00, 512]
12 |   x: [1.00, 1.25, 512]
13 | 
14 | # YOLOv8.0x6 backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [768, True]]
26 |   - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27 |   - [-1, 3, C2f, [1024, True]]
28 |   - [-1, 1, SPPF, [1024, 5]] # 11
29 | 
30 | # YOLOv8.0x6 head
31 | head:
32 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33 |   - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34 |   - [-1, 3, C2, [768, False]] # 14
35 | 
36 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38 |   - [-1, 3, C2, [512, False]] # 17
39 | 
40 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42 |   - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43 | 
44 |   - [-1, 1, Conv, [256, 3, 2]]
45 |   - [[-1, 17], 1, Concat, [1]] # cat head P4
46 |   - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47 | 
48 |   - [-1, 1, Conv, [512, 3, 2]]
49 |   - [[-1, 14], 1, Concat, [1]] # cat head P5
50 |   - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51 | 
52 |   - [-1, 1, Conv, [768, 3, 2]]
53 |   - [[-1, 11], 1, Concat, [1]] # cat head P6
54 |   - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55 | 
56 |   - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
57 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024]
 9 |   s: [0.33, 0.50, 1024]
10 |   m: [0.67, 0.75, 768]
11 |   l: [1.00, 1.00, 512]
12 |   x: [1.00, 1.25, 512]
13 | 
14 | # YOLOv8.0n backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0n head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C2f, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]]
39 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
40 |   - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41 | 
42 |   - [-1, 1, Conv, [512, 3, 2]]
43 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
44 |   - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45 | 
46 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/models/v8/yolov8.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 3 | 
 4 | # Parameters
 5 | nc: 80 # number of classes
 6 | scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
 7 |   # [depth, width, max_channels]
 8 |   n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
 9 |   s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
10 |   m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
11 |   l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12 |   x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13 | 
14 | # YOLOv8.0n backbone
15 | backbone:
16 |   # [from, repeats, module, args]
17 |   - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18 |   - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19 |   - [-1, 3, C2f, [128, True]]
20 |   - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21 |   - [-1, 6, C2f, [256, True]]
22 |   - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23 |   - [-1, 6, C2f, [512, True]]
24 |   - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25 |   - [-1, 3, C2f, [1024, True]]
26 |   - [-1, 1, SPPF, [1024, 5]] # 9
27 | 
28 | # YOLOv8.0n head
29 | head:
30 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31 |   - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 |   - [-1, 3, C2f, [512]] # 12
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35 |   - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 |   - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]]
39 |   - [[-1, 12], 1, Concat, [1]] # cat head P4
40 |   - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41 | 
42 |   - [-1, 1, Conv, [512, 3, 2]]
43 |   - [[-1, 9], 1, Concat, [1]] # cat head P5
44 |   - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45 | 
46 |   - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
47 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/trackers/botsort.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
 3 | 
 4 | tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
 5 | track_high_thresh: 0.5 # threshold for the first association
 6 | track_low_thresh: 0.1 # threshold for the second association
 7 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
 8 | track_buffer: 30 # buffer to calculate the time when to remove tracks
 9 | match_thresh: 0.8 # threshold for matching tracks
10 | # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
11 | # mot20: False  # for tracker evaluation(not used for now)
12 | 
13 | # BoT-SORT settings
14 | gmc_method: sparseOptFlow # method of global motion compensation
15 | # ReID model related thresh (not supported yet)
16 | proximity_thresh: 0.5
17 | appearance_thresh: 0.25
18 | with_reid: False
19 | 


--------------------------------------------------------------------------------
/ultralytics/cfg/trackers/bytetrack.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | # Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
 3 | 
 4 | tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
 5 | track_high_thresh: 0.5 # threshold for the first association
 6 | track_low_thresh: 0.1 # threshold for the second association
 7 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
 8 | track_buffer: 30 # buffer to calculate the time when to remove tracks
 9 | match_thresh: 0.8 # threshold for matching tracks
10 | # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
11 | # mot20: False  # for tracker evaluation(not used for now)
12 | 


--------------------------------------------------------------------------------
/ultralytics/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from .base import BaseDataset
 4 | from .build import build_dataloader, build_yolo_dataset, load_inference_source
 5 | from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
 6 | 
 7 | __all__ = (
 8 |     "BaseDataset",
 9 |     "ClassificationDataset",
10 |     "SemanticDataset",
11 |     "YOLODataset",
12 |     "build_yolo_dataset",
13 |     "build_dataloader",
14 |     "load_inference_source",
15 | )
16 | 


--------------------------------------------------------------------------------
/ultralytics/data/annotator.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | from ultralytics import SAM, YOLO
 6 | 
 7 | 
 8 | def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
 9 |     """
10 |     Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
11 | 
12 |     Args:
13 |         data (str): Path to a folder containing images to be annotated.
14 |         det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
15 |         sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
16 |         device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
17 |         output_dir (str | None | optional): Directory to save the annotated results.
18 |             Defaults to a 'labels' folder in the same directory as 'data'.
19 | 
20 |     Example:
21 |         ```python
22 |         from ultralytics.data.annotator import auto_annotate
23 | 
24 |         auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
25 |         ```
26 |     """
27 |     det_model = YOLO(det_model)
28 |     sam_model = SAM(sam_model)
29 | 
30 |     data = Path(data)
31 |     if not output_dir:
32 |         output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
33 |     Path(output_dir).mkdir(exist_ok=True, parents=True)
34 | 
35 |     det_results = det_model(data, stream=True, device=device)
36 | 
37 |     for result in det_results:
38 |         class_ids = result.boxes.cls.int().tolist()  # noqa
39 |         if len(class_ids):
40 |             boxes = result.boxes.xyxy  # Boxes object for bbox outputs
41 |             sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
42 |             segments = sam_results[0].masks.xyn  # noqa
43 | 
44 |             with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
45 |                 for i in range(len(segments)):
46 |                     s = segments[i]
47 |                     if len(s) == 0:
48 |                         continue
49 |                     segment = map(str, segments[i].reshape(-1).tolist())
50 |                     f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
51 | 


--------------------------------------------------------------------------------
/ultralytics/data/explorer/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .utils import plot_query_result
4 | 
5 | __all__ = ["plot_query_result"]
6 | 


--------------------------------------------------------------------------------
/ultralytics/data/explorer/gui/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/data/scripts/download_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 3 | # Download latest models from https://github.com/ultralytics/assets/releases
 4 | # Example usage: bash ultralytics/data/scripts/download_weights.sh
 5 | # parent
 6 | # └── weights
 7 | #     ├── yolov8n.pt  ← downloads here
 8 | #     ├── yolov8s.pt
 9 | #     └── ...
10 | 
11 | python - <<EOF
12 | from ultralytics.utils.downloads import attempt_download_asset
13 | 
14 | assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')]
15 | for x in assets:
16 |     attempt_download_asset(f'weights/{x}')
17 | 
18 | EOF
19 | 


--------------------------------------------------------------------------------
/ultralytics/data/scripts/get_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 3 | # Download COCO 2017 dataset https://cocodataset.org
 4 | # Example usage: bash data/scripts/get_coco.sh
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── coco  ← downloads here
 9 | 
10 | # Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
11 | if [ "$#" -gt 0 ]; then
12 |   for opt in "$@"; do
13 |     case "${opt}" in
14 |     --train) train=true ;;
15 |     --val) val=true ;;
16 |     --test) test=true ;;
17 |     --segments) segments=true ;;
18 |     --sama) sama=true ;;
19 |     esac
20 |   done
21 | else
22 |   train=true
23 |   val=true
24 |   test=false
25 |   segments=false
26 |   sama=false
27 | fi
28 | 
29 | # Download/unzip labels
30 | d='../datasets' # unzip directory
31 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
32 | if [ "$segments" == "true" ]; then
33 |   f='coco2017labels-segments.zip' # 169 MB
34 | elif [ "$sama" == "true" ]; then
35 |   f='coco2017labels-segments-sama.zip' # 199 MB https://www.sama.com/sama-coco-dataset/
36 | else
37 |   f='coco2017labels.zip' # 46 MB
38 | fi
39 | echo 'Downloading' $url$f ' ...'
40 | curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
41 | 
42 | # Download/unzip images
43 | d='../datasets/coco/images' # unzip directory
44 | url=http://images.cocodataset.org/zips/
45 | if [ "$train" == "true" ]; then
46 |   f='train2017.zip' # 19G, 118k images
47 |   echo 'Downloading' $url$f '...'
48 |   curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
49 | fi
50 | if [ "$val" == "true" ]; then
51 |   f='val2017.zip' # 1G, 5k images
52 |   echo 'Downloading' $url$f '...'
53 |   curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
54 | fi
55 | if [ "$test" == "true" ]; then
56 |   f='test2017.zip' # 7G, 41k images (optional)
57 |   echo 'Downloading' $url$f '...'
58 |   curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
59 | fi
60 | wait # finish background tasks
61 | 


--------------------------------------------------------------------------------
/ultralytics/data/scripts/get_coco128.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 3 | # Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
 4 | # Example usage: bash data/scripts/get_coco128.sh
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── coco128  ← downloads here
 9 | 
10 | # Download/unzip images and labels
11 | d='../datasets' # unzip directory
12 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
13 | f='coco128.zip' # or 'coco128-segments.zip', 68 MB
14 | echo 'Downloading' $url$f ' ...'
15 | curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
16 | 
17 | wait # finish background tasks
18 | 


--------------------------------------------------------------------------------
/ultralytics/data/scripts/get_imagenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 3 | # Download ILSVRC2012 ImageNet dataset https://image-net.org
 4 | # Example usage: bash data/scripts/get_imagenet.sh
 5 | # parent
 6 | # ├── ultralytics
 7 | # └── datasets
 8 | #     └── imagenet  ← downloads here
 9 | 
10 | # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
11 | if [ "$#" -gt 0 ]; then
12 |   for opt in "$@"; do
13 |     case "${opt}" in
14 |     --train) train=true ;;
15 |     --val) val=true ;;
16 |     esac
17 |   done
18 | else
19 |   train=true
20 |   val=true
21 | fi
22 | 
23 | # Make dir
24 | d='../datasets/imagenet' # unzip directory
25 | mkdir -p $d && cd $d
26 | 
27 | # Download/unzip train
28 | if [ "$train" == "true" ]; then
29 |   wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
30 |   mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
31 |   tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
32 |   find . -name "*.tar" | while read NAME; do
33 |     mkdir -p "${NAME%.tar}"
34 |     tar -xf "${NAME}" -C "${NAME%.tar}"
35 |     rm -f "${NAME}"
36 |   done
37 |   cd ..
38 | fi
39 | 
40 | # Download/unzip val
41 | if [ "$val" == "true" ]; then
42 |   wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
43 |   mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
44 |   wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
45 | fi
46 | 
47 | # Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
48 | # rm train/n04266014/n04266014_10835.JPEG
49 | 
50 | # TFRecords (optional)
51 | # wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
52 | 


--------------------------------------------------------------------------------
/ultralytics/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .rtdetr import RTDETR
4 | from .sam import SAM
5 | from .yolo import YOLO
6 | 
7 | __all__ = "YOLO", "RTDETR", "SAM"  # allow simpler import
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/fastsam/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .model import FastSAM
4 | from .predict import FastSAMPredictor
5 | from .prompt import FastSAMPrompt
6 | from .val import FastSAMValidator
7 | 
8 | __all__ = "FastSAMPredictor", "FastSAM", "FastSAMPrompt", "FastSAMValidator"
9 | 


--------------------------------------------------------------------------------
/ultralytics/models/fastsam/model.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | from ultralytics.engine.model import Model
 6 | from .predict import FastSAMPredictor
 7 | from .val import FastSAMValidator
 8 | 
 9 | 
10 | class FastSAM(Model):
11 |     """
12 |     FastSAM model interface.
13 | 
14 |     Example:
15 |         ```python
16 |         from ultralytics import FastSAM
17 | 
18 |         model = FastSAM('last.pt')
19 |         results = model.predict('ultralytics/assets/bus.jpg')
20 |         ```
21 |     """
22 | 
23 |     def __init__(self, model="FastSAM-x.pt"):
24 |         """Call the __init__ method of the parent class (YOLO) with the updated default model."""
25 |         if str(model) == "FastSAM.pt":
26 |             model = "FastSAM-x.pt"
27 |         assert Path(model).suffix not in (".yaml", ".yml"), "FastSAM models only support pre-trained models."
28 |         super().__init__(model=model, task="segment")
29 | 
30 |     @property
31 |     def task_map(self):
32 |         """Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
33 |         return {"segment": {"predictor": FastSAMPredictor, "validator": FastSAMValidator}}
34 | 


--------------------------------------------------------------------------------
/ultralytics/models/fastsam/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | from ultralytics.engine.results import Results
 6 | from ultralytics.models.fastsam.utils import bbox_iou
 7 | from ultralytics.models.yolo.detect.predict import DetectionPredictor
 8 | from ultralytics.utils import DEFAULT_CFG, ops
 9 | 
10 | 
11 | class FastSAMPredictor(DetectionPredictor):
12 |     """
13 |     FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
14 |     YOLO framework.
15 | 
16 |     This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM.
17 |     It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing
18 |     for single-class segmentation.
19 | 
20 |     Attributes:
21 |         cfg (dict): Configuration parameters for prediction.
22 |         overrides (dict, optional): Optional parameter overrides for custom behavior.
23 |         _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
24 |     """
25 | 
26 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
27 |         """
28 |         Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'.
29 | 
30 |         Args:
31 |             cfg (dict): Configuration parameters for prediction.
32 |             overrides (dict, optional): Optional parameter overrides for custom behavior.
33 |             _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
34 |         """
35 |         super().__init__(cfg, overrides, _callbacks)
36 |         self.args.task = "segment"
37 | 
38 |     def postprocess(self, preds, img, orig_imgs):
39 |         """
40 |         Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image
41 |         size, and returns the final results.
42 | 
43 |         Args:
44 |             preds (list): The raw output predictions from the model.
45 |             img (torch.Tensor): The processed image tensor.
46 |             orig_imgs (list | torch.Tensor): The original image or list of images.
47 | 
48 |         Returns:
49 |             (list): A list of Results objects, each containing processed boxes, masks, and other metadata.
50 |         """
51 |         p = ops.non_max_suppression(
52 |             preds[0],
53 |             self.args.conf,
54 |             self.args.iou,
55 |             agnostic=self.args.agnostic_nms,
56 |             max_det=self.args.max_det,
57 |             nc=1,  # set to 1 class since SAM has no class predictions
58 |             classes=self.args.classes,
59 |         )
60 |         full_box = torch.zeros(p[0].shape[1], device=p[0].device)
61 |         full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
62 |         full_box = full_box.view(1, -1)
63 |         critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
64 |         if critical_iou_index.numel() != 0:
65 |             full_box[0][4] = p[0][critical_iou_index][:, 4]
66 |             full_box[0][6:] = p[0][critical_iou_index][:, 6:]
67 |             p[0][critical_iou_index] = full_box
68 | 
69 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
70 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
71 | 
72 |         results = []
73 |         proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
74 |         for i, pred in enumerate(p):
75 |             orig_img = orig_imgs[i]
76 |             img_path = self.batch[0][i]
77 |             if not len(pred):  # save empty boxes
78 |                 masks = None
79 |             elif self.args.retina_masks:
80 |                 pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
81 |                 masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2])  # HWC
82 |             else:
83 |                 masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True)  # HWC
84 |                 pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
85 |             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
86 |         return results
87 | 


--------------------------------------------------------------------------------
/ultralytics/models/fastsam/utils.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
 7 |     """
 8 |     Adjust bounding boxes to stick to image border if they are within a certain threshold.
 9 | 
10 |     Args:
11 |         boxes (torch.Tensor): (n, 4)
12 |         image_shape (tuple): (height, width)
13 |         threshold (int): pixel threshold
14 | 
15 |     Returns:
16 |         adjusted_boxes (torch.Tensor): adjusted bounding boxes
17 |     """
18 | 
19 |     # Image dimensions
20 |     h, w = image_shape
21 | 
22 |     # Adjust boxes
23 |     boxes[boxes[:, 0] < threshold, 0] = 0  # x1
24 |     boxes[boxes[:, 1] < threshold, 1] = 0  # y1
25 |     boxes[boxes[:, 2] > w - threshold, 2] = w  # x2
26 |     boxes[boxes[:, 3] > h - threshold, 3] = h  # y2
27 |     return boxes
28 | 
29 | 
30 | def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
31 |     """
32 |     Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
33 | 
34 |     Args:
35 |         box1 (torch.Tensor): (4, )
36 |         boxes (torch.Tensor): (n, 4)
37 |         iou_thres (float): IoU threshold
38 |         image_shape (tuple): (height, width)
39 |         raw_output (bool): If True, return the raw IoU values instead of the indices
40 | 
41 |     Returns:
42 |         high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
43 |     """
44 |     boxes = adjust_bboxes_to_image_border(boxes, image_shape)
45 |     # Obtain coordinates for intersections
46 |     x1 = torch.max(box1[0], boxes[:, 0])
47 |     y1 = torch.max(box1[1], boxes[:, 1])
48 |     x2 = torch.min(box1[2], boxes[:, 2])
49 |     y2 = torch.min(box1[3], boxes[:, 3])
50 | 
51 |     # Compute the area of intersection
52 |     intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
53 | 
54 |     # Compute the area of both individual boxes
55 |     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
56 |     box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
57 | 
58 |     # Compute the area of union
59 |     union = box1_area + box2_area - intersection
60 | 
61 |     # Compute the IoU
62 |     iou = intersection / union  # Should be shape (n, )
63 |     if raw_output:
64 |         return 0 if iou.numel() == 0 else iou
65 | 
66 |     # return indices of boxes with IoU > thres
67 |     return torch.nonzero(iou > iou_thres).flatten()
68 | 


--------------------------------------------------------------------------------
/ultralytics/models/fastsam/val.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.models.yolo.segment import SegmentationValidator
 4 | from ultralytics.utils.metrics import SegmentMetrics
 5 | 
 6 | 
 7 | class FastSAMValidator(SegmentationValidator):
 8 |     """
 9 |     Custom validation class for fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework.
10 | 
11 |     Extends the SegmentationValidator class, customizing the validation process specifically for fast SAM. This class
12 |     sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
13 |     to avoid errors during validation.
14 | 
15 |     Attributes:
16 |         dataloader: The data loader object used for validation.
17 |         save_dir (str): The directory where validation results will be saved.
18 |         pbar: A progress bar object.
19 |         args: Additional arguments for customization.
20 |         _callbacks: List of callback functions to be invoked during validation.
21 |     """
22 | 
23 |     def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
24 |         """
25 |         Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
26 | 
27 |         Args:
28 |             dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
29 |             save_dir (Path, optional): Directory to save results.
30 |             pbar (tqdm.tqdm): Progress bar for displaying progress.
31 |             args (SimpleNamespace): Configuration for the validator.
32 |             _callbacks (dict): Dictionary to store various callback functions.
33 | 
34 |         Notes:
35 |             Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors.
36 |         """
37 |         super().__init__(dataloader, save_dir, pbar, args, _callbacks)
38 |         self.args.task = "segment"
39 |         self.args.plots = False  # disable ConfusionMatrix and other plots to avoid errors
40 |         self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/nas/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .model import NAS
4 | from .predict import NASPredictor
5 | from .val import NASValidator
6 | 
7 | __all__ = "NASPredictor", "NASValidator", "NAS"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/nas/model.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | """
 3 | YOLO-NAS model interface.
 4 | 
 5 | Example:
 6 |     ```python
 7 |     from ultralytics import NAS
 8 | 
 9 |     model = NAS('yolo_nas_s')
10 |     results = model.predict('ultralytics/assets/bus.jpg')
11 |     ```
12 | """
13 | 
14 | from pathlib import Path
15 | 
16 | import torch
17 | 
18 | from ultralytics.engine.model import Model
19 | from ultralytics.utils.torch_utils import model_info, smart_inference_mode
20 | from .predict import NASPredictor
21 | from .val import NASValidator
22 | 
23 | 
24 | class NAS(Model):
25 |     """
26 |     YOLO NAS model for object detection.
27 | 
28 |     This class provides an interface for the YOLO-NAS models and extends the `Model` class from Ultralytics engine.
29 |     It is designed to facilitate the task of object detection using pre-trained or custom-trained YOLO-NAS models.
30 | 
31 |     Example:
32 |         ```python
33 |         from ultralytics import NAS
34 | 
35 |         model = NAS('yolo_nas_s')
36 |         results = model.predict('ultralytics/assets/bus.jpg')
37 |         ```
38 | 
39 |     Attributes:
40 |         model (str): Path to the pre-trained model or model name. Defaults to 'yolo_nas_s.pt'.
41 | 
42 |     Note:
43 |         YOLO-NAS models only support pre-trained models. Do not provide YAML configuration files.
44 |     """
45 | 
46 |     def __init__(self, model="yolo_nas_s.pt") -> None:
47 |         """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
48 |         assert Path(model).suffix not in (".yaml", ".yml"), "YOLO-NAS models only support pre-trained models."
49 |         super().__init__(model, task="detect")
50 | 
51 |     @smart_inference_mode()
52 |     def _load(self, weights: str, task: str):
53 |         """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
54 |         import super_gradients
55 | 
56 |         suffix = Path(weights).suffix
57 |         if suffix == ".pt":
58 |             self.model = torch.load(weights)
59 |         elif suffix == "":
60 |             self.model = super_gradients.training.models.get(weights, pretrained_weights="coco")
61 |         # Standardize model
62 |         self.model.fuse = lambda verbose=True: self.model
63 |         self.model.stride = torch.tensor([32])
64 |         self.model.names = dict(enumerate(self.model._class_names))
65 |         self.model.is_fused = lambda: False  # for info()
66 |         self.model.yaml = {}  # for info()
67 |         self.model.pt_path = weights  # for export()
68 |         self.model.task = "detect"  # for export()
69 | 
70 |     def info(self, detailed=False, verbose=True):
71 |         """
72 |         Logs model info.
73 | 
74 |         Args:
75 |             detailed (bool): Show detailed information about model.
76 |             verbose (bool): Controls verbosity.
77 |         """
78 |         return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640)
79 | 
80 |     @property
81 |     def task_map(self):
82 |         """Returns a dictionary mapping tasks to respective predictor and validator classes."""
83 |         return {"detect": {"predictor": NASPredictor, "validator": NASValidator}}
84 | 


--------------------------------------------------------------------------------
/ultralytics/models/nas/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | from ultralytics.engine.predictor import BasePredictor
 6 | from ultralytics.engine.results import Results
 7 | from ultralytics.utils import ops
 8 | 
 9 | 
10 | class NASPredictor(BasePredictor):
11 |     """
12 |     Ultralytics YOLO NAS Predictor for object detection.
13 | 
14 |     This class extends the `BasePredictor` from Ultralytics engine and is responsible for post-processing the
15 |     raw predictions generated by the YOLO NAS models. It applies operations like non-maximum suppression and
16 |     scaling the bounding boxes to fit the original image dimensions.
17 | 
18 |     Attributes:
19 |         args (Namespace): Namespace containing various configurations for post-processing.
20 | 
21 |     Example:
22 |         ```python
23 |         from ultralytics import NAS
24 | 
25 |         model = NAS('yolo_nas_s')
26 |         predictor = model.predictor
27 |         # Assumes that raw_preds, img, orig_imgs are available
28 |         results = predictor.postprocess(raw_preds, img, orig_imgs)
29 |         ```
30 | 
31 |     Note:
32 |         Typically, this class is not instantiated directly. It is used internally within the `NAS` class.
33 |     """
34 | 
35 |     def postprocess(self, preds_in, img, orig_imgs):
36 |         """Postprocess predictions and returns a list of Results objects."""
37 | 
38 |         # Cat boxes and class scores
39 |         boxes = ops.xyxy2xywh(preds_in[0][0])
40 |         preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
41 | 
42 |         preds = ops.non_max_suppression(
43 |             preds,
44 |             self.args.conf,
45 |             self.args.iou,
46 |             agnostic=self.args.agnostic_nms,
47 |             max_det=self.args.max_det,
48 |             classes=self.args.classes,
49 |         )
50 | 
51 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
52 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
53 | 
54 |         results = []
55 |         for i, pred in enumerate(preds):
56 |             orig_img = orig_imgs[i]
57 |             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
58 |             img_path = self.batch[0][i]
59 |             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
60 |         return results
61 | 


--------------------------------------------------------------------------------
/ultralytics/models/nas/val.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | from ultralytics.models.yolo.detect import DetectionValidator
 6 | from ultralytics.utils import ops
 7 | 
 8 | __all__ = ["NASValidator"]
 9 | 
10 | 
11 | class NASValidator(DetectionValidator):
12 |     """
13 |     Ultralytics YOLO NAS Validator for object detection.
14 | 
15 |     Extends `DetectionValidator` from the Ultralytics models package and is designed to post-process the raw predictions
16 |     generated by YOLO NAS models. It performs non-maximum suppression to remove overlapping and low-confidence boxes,
17 |     ultimately producing the final detections.
18 | 
19 |     Attributes:
20 |         args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds.
21 |         lb (torch.Tensor): Optional tensor for multilabel NMS.
22 | 
23 |     Example:
24 |         ```python
25 |         from ultralytics import NAS
26 | 
27 |         model = NAS('yolo_nas_s')
28 |         validator = model.validator
29 |         # Assumes that raw_preds are available
30 |         final_preds = validator.postprocess(raw_preds)
31 |         ```
32 | 
33 |     Note:
34 |         This class is generally not instantiated directly but is used internally within the `NAS` class.
35 |     """
36 | 
37 |     def postprocess(self, preds_in):
38 |         """Apply Non-maximum suppression to prediction outputs."""
39 |         boxes = ops.xyxy2xywh(preds_in[0][0])
40 |         preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
41 |         return ops.non_max_suppression(
42 |             preds,
43 |             self.args.conf,
44 |             self.args.iou,
45 |             labels=self.lb,
46 |             multi_label=False,
47 |             agnostic=self.args.single_cls,
48 |             max_det=self.args.max_det,
49 |             max_time_img=0.5,
50 |         )
51 | 


--------------------------------------------------------------------------------
/ultralytics/models/rtdetr/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .model import RTDETR
4 | from .predict import RTDETRPredictor
5 | from .val import RTDETRValidator
6 | 
7 | __all__ = "RTDETRPredictor", "RTDETRValidator", "RTDETR"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/rtdetr/model.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | """
 3 | Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time
 4 | performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient
 5 | hybrid encoder and IoU-aware query selection for enhanced detection accuracy.
 6 | 
 7 | For more information on RT-DETR, visit: https://arxiv.org/pdf/2304.08069.pdf
 8 | """
 9 | 
10 | from ultralytics.engine.model import Model
11 | from ultralytics.nn.tasks import RTDETRDetectionModel
12 | 
13 | from .predict import RTDETRPredictor
14 | from .train import RTDETRTrainer
15 | from .val import RTDETRValidator
16 | 
17 | 
18 | class RTDETR(Model):
19 |     """
20 |     Interface for Baidu's RT-DETR model. This Vision Transformer-based object detector provides real-time performance
21 |     with high accuracy. It supports efficient hybrid encoding, IoU-aware query selection, and adaptable inference speed.
22 | 
23 |     Attributes:
24 |         model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'.
25 |     """
26 | 
27 |     def __init__(self, model="rtdetr-l.pt") -> None:
28 |         """
29 |         Initializes the RT-DETR model with the given pre-trained model file. Supports .pt and .yaml formats.
30 | 
31 |         Args:
32 |             model (str): Path to the pre-trained model. Defaults to 'rtdetr-l.pt'.
33 | 
34 |         Raises:
35 |             NotImplementedError: If the model file extension is not 'pt', 'yaml', or 'yml'.
36 |         """
37 |         if model and model.split(".")[-1] not in ("pt", "yaml", "yml"):
38 |             raise NotImplementedError("RT-DETR only supports creating from *.pt, *.yaml, or *.yml files.")
39 |         super().__init__(model=model, task="detect")
40 | 
41 |     @property
42 |     def task_map(self) -> dict:
43 |         """
44 |         Returns a task map for RT-DETR, associating tasks with corresponding Ultralytics classes.
45 | 
46 |         Returns:
47 |             dict: A dictionary mapping task names to Ultralytics task classes for the RT-DETR model.
48 |         """
49 |         return {
50 |             "detect": {
51 |                 "predictor": RTDETRPredictor,
52 |                 "validator": RTDETRValidator,
53 |                 "trainer": RTDETRTrainer,
54 |                 "model": RTDETRDetectionModel,
55 |             }
56 |         }
57 | 


--------------------------------------------------------------------------------
/ultralytics/models/rtdetr/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | from ultralytics.data.augment import LetterBox
 6 | from ultralytics.engine.predictor import BasePredictor
 7 | from ultralytics.engine.results import Results
 8 | from ultralytics.utils import ops
 9 | 
10 | 
11 | class RTDETRPredictor(BasePredictor):
12 |     """
13 |     RT-DETR (Real-Time Detection Transformer) Predictor extending the BasePredictor class for making predictions using
14 |     Baidu's RT-DETR model.
15 | 
16 |     This class leverages the power of Vision Transformers to provide real-time object detection while maintaining
17 |     high accuracy. It supports key features like efficient hybrid encoding and IoU-aware query selection.
18 | 
19 |     Example:
20 |         ```python
21 |         from ultralytics.utils import ASSETS
22 |         from ultralytics.models.rtdetr import RTDETRPredictor
23 | 
24 |         args = dict(model='rtdetr-l.pt', source=ASSETS)
25 |         predictor = RTDETRPredictor(overrides=args)
26 |         predictor.predict_cli()
27 |         ```
28 | 
29 |     Attributes:
30 |         imgsz (int): Image size for inference (must be square and scale-filled).
31 |         args (dict): Argument overrides for the predictor.
32 |     """
33 | 
34 |     def postprocess(self, preds, img, orig_imgs):
35 |         """
36 |         Postprocess the raw predictions from the model to generate bounding boxes and confidence scores.
37 | 
38 |         The method filters detections based on confidence and class if specified in `self.args`.
39 | 
40 |         Args:
41 |             preds (torch.Tensor): Raw predictions from the model.
42 |             img (torch.Tensor): Processed input images.
43 |             orig_imgs (list or torch.Tensor): Original, unprocessed images.
44 | 
45 |         Returns:
46 |             (list[Results]): A list of Results objects containing the post-processed bounding boxes, confidence scores,
47 |                 and class labels.
48 |         """
49 |         nd = preds[0].shape[-1]
50 |         bboxes, scores = preds[0].split((4, nd - 4), dim=-1)
51 | 
52 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
53 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
54 | 
55 |         results = []
56 |         for i, bbox in enumerate(bboxes):  # (300, 4)
57 |             bbox = ops.xywh2xyxy(bbox)
58 |             score, cls = scores[i].max(-1, keepdim=True)  # (300, 1)
59 |             idx = score.squeeze(-1) > self.args.conf  # (300, )
60 |             if self.args.classes is not None:
61 |                 idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx
62 |             pred = torch.cat([bbox, score, cls], dim=-1)[idx]  # filter
63 |             orig_img = orig_imgs[i]
64 |             oh, ow = orig_img.shape[:2]
65 |             pred[..., [0, 2]] *= ow
66 |             pred[..., [1, 3]] *= oh
67 |             img_path = self.batch[0][i]
68 |             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
69 |         return results
70 | 
71 |     def pre_transform(self, im):
72 |         """
73 |         Pre-transforms the input images before feeding them into the model for inference. The input images are
74 |         letterboxed to ensure a square aspect ratio and scale-filled. The size must be square(640) and scaleFilled.
75 | 
76 |         Args:
77 |             im (list[np.ndarray] |torch.Tensor): Input images of shape (N,3,h,w) for tensor, [(h,w,3) x N] for list.
78 | 
79 |         Returns:
80 |             (list): List of pre-transformed images ready for model inference.
81 |         """
82 |         letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True)
83 |         return [letterbox(image=x) for x in im]
84 | 


--------------------------------------------------------------------------------
/ultralytics/models/rtdetr/train.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | 
  3 | from copy import copy
  4 | 
  5 | import torch
  6 | 
  7 | from ultralytics.models.yolo.detect import DetectionTrainer
  8 | from ultralytics.nn.tasks import RTDETRDetectionModel
  9 | from ultralytics.utils import RANK, colorstr
 10 | from .val import RTDETRDataset, RTDETRValidator
 11 | 
 12 | 
 13 | class RTDETRTrainer(DetectionTrainer):
 14 |     """
 15 |     Trainer class for the RT-DETR model developed by Baidu for real-time object detection. Extends the DetectionTrainer
 16 |     class for YOLO to adapt to the specific features and architecture of RT-DETR. This model leverages Vision
 17 |     Transformers and has capabilities like IoU-aware query selection and adaptable inference speed.
 18 | 
 19 |     Notes:
 20 |         - F.grid_sample used in RT-DETR does not support the `deterministic=True` argument.
 21 |         - AMP training can lead to NaN outputs and may produce errors during bipartite graph matching.
 22 | 
 23 |     Example:
 24 |         ```python
 25 |         from ultralytics.models.rtdetr.train import RTDETRTrainer
 26 | 
 27 |         args = dict(model='rtdetr-l.yaml', data='coco8.yaml', imgsz=640, epochs=3)
 28 |         trainer = RTDETRTrainer(overrides=args)
 29 |         trainer.train()
 30 |         ```
 31 |     """
 32 | 
 33 |     def get_model(self, cfg=None, weights=None, verbose=True):
 34 |         """
 35 |         Initialize and return an RT-DETR model for object detection tasks.
 36 | 
 37 |         Args:
 38 |             cfg (dict, optional): Model configuration. Defaults to None.
 39 |             weights (str, optional): Path to pre-trained model weights. Defaults to None.
 40 |             verbose (bool): Verbose logging if True. Defaults to True.
 41 | 
 42 |         Returns:
 43 |             (RTDETRDetectionModel): Initialized model.
 44 |         """
 45 |         model = RTDETRDetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
 46 |         if weights:
 47 |             model.load(weights)
 48 |         return model
 49 | 
 50 |     def build_dataset(self, img_path, mode="val", batch=None):
 51 |         """
 52 |         Build and return an RT-DETR dataset for training or validation.
 53 | 
 54 |         Args:
 55 |             img_path (str): Path to the folder containing images.
 56 |             mode (str): Dataset mode, either 'train' or 'val'.
 57 |             batch (int, optional): Batch size for rectangle training. Defaults to None.
 58 | 
 59 |         Returns:
 60 |             (RTDETRDataset): Dataset object for the specific mode.
 61 |         """
 62 |         return RTDETRDataset(
 63 |             img_path=img_path,
 64 |             imgsz=self.args.imgsz,
 65 |             batch_size=batch,
 66 |             augment=mode == "train",
 67 |             hyp=self.args,
 68 |             rect=False,
 69 |             cache=self.args.cache or None,
 70 |             prefix=colorstr(f"{mode}: "),
 71 |             data=self.data,
 72 |         )
 73 | 
 74 |     def get_validator(self):
 75 |         """
 76 |         Returns a DetectionValidator suitable for RT-DETR model validation.
 77 | 
 78 |         Returns:
 79 |             (RTDETRValidator): Validator object for model validation.
 80 |         """
 81 |         self.loss_names = "giou_loss", "cls_loss", "l1_loss"
 82 |         return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
 83 | 
 84 |     def preprocess_batch(self, batch):
 85 |         """
 86 |         Preprocess a batch of images. Scales and converts the images to float format.
 87 | 
 88 |         Args:
 89 |             batch (dict): Dictionary containing a batch of images, bboxes, and labels.
 90 | 
 91 |         Returns:
 92 |             (dict): Preprocessed batch.
 93 |         """
 94 |         batch = super().preprocess_batch(batch)
 95 |         bs = len(batch["img"])
 96 |         batch_idx = batch["batch_idx"]
 97 |         gt_bbox, gt_class = [], []
 98 |         for i in range(bs):
 99 |             gt_bbox.append(batch["bboxes"][batch_idx == i].to(batch_idx.device))
100 |             gt_class.append(batch["cls"][batch_idx == i].to(device=batch_idx.device, dtype=torch.long))
101 |         return batch
102 | 


--------------------------------------------------------------------------------
/ultralytics/models/sam/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .model import SAM
4 | from .predict import Predictor
5 | 
6 | __all__ = "SAM", "Predictor"  # tuple or list
7 | 


--------------------------------------------------------------------------------
/ultralytics/models/sam/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/models/sam/modules/sam.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | # All rights reserved.
 5 | 
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree.
 8 | 
 9 | from typing import List
10 | 
11 | import torch
12 | from torch import nn
13 | 
14 | from .decoders import MaskDecoder
15 | from .encoders import ImageEncoderViT, PromptEncoder
16 | 
17 | 
18 | class Sam(nn.Module):
19 |     """
20 |     Sam (Segment Anything Model) is designed for object segmentation tasks. It uses image encoders to generate image
21 |     embeddings, and prompt encoders to encode various types of input prompts. These embeddings are then used by the mask
22 |     decoder to predict object masks.
23 | 
24 |     Attributes:
25 |         mask_threshold (float): Threshold value for mask prediction.
26 |         image_format (str): Format of the input image, default is 'RGB'.
27 |         image_encoder (ImageEncoderViT): The backbone used to encode the image into embeddings.
28 |         prompt_encoder (PromptEncoder): Encodes various types of input prompts.
29 |         mask_decoder (MaskDecoder): Predicts object masks from the image and prompt embeddings.
30 |         pixel_mean (List[float]): Mean pixel values for image normalization.
31 |         pixel_std (List[float]): Standard deviation values for image normalization.
32 |     """
33 | 
34 |     mask_threshold: float = 0.0
35 |     image_format: str = "RGB"
36 | 
37 |     def __init__(
38 |         self,
39 |         image_encoder: ImageEncoderViT,
40 |         prompt_encoder: PromptEncoder,
41 |         mask_decoder: MaskDecoder,
42 |         pixel_mean: List[float] = (123.675, 116.28, 103.53),
43 |         pixel_std: List[float] = (58.395, 57.12, 57.375),
44 |     ) -> None:
45 |         """
46 |         Initialize the Sam class to predict object masks from an image and input prompts.
47 | 
48 |         Note:
49 |             All forward() operations moved to SAMPredictor.
50 | 
51 |         Args:
52 |             image_encoder (ImageEncoderViT): The backbone used to encode the image into image embeddings.
53 |             prompt_encoder (PromptEncoder): Encodes various types of input prompts.
54 |             mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts.
55 |             pixel_mean (List[float], optional): Mean values for normalizing pixels in the input image. Defaults to
56 |                 (123.675, 116.28, 103.53).
57 |             pixel_std (List[float], optional): Std values for normalizing pixels in the input image. Defaults to
58 |                 (58.395, 57.12, 57.375).
59 |         """
60 |         super().__init__()
61 |         self.image_encoder = image_encoder
62 |         self.prompt_encoder = prompt_encoder
63 |         self.mask_decoder = mask_decoder
64 |         self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
65 |         self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
66 | 


--------------------------------------------------------------------------------
/ultralytics/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from ultralytics.models.yolo import classify, detect, obb, pose, segment
4 | 
5 | from .model import YOLO
6 | 
7 | __all__ = "classify", "segment", "detect", "pose", "obb", "YOLO"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/classify/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from ultralytics.models.yolo.classify.predict import ClassificationPredictor
4 | from ultralytics.models.yolo.classify.train import ClassificationTrainer
5 | from ultralytics.models.yolo.classify.val import ClassificationValidator
6 | 
7 | __all__ = "ClassificationPredictor", "ClassificationTrainer", "ClassificationValidator"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/classify/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import cv2
 4 | import torch
 5 | from PIL import Image
 6 | 
 7 | from ultralytics.engine.predictor import BasePredictor
 8 | from ultralytics.engine.results import Results
 9 | from ultralytics.utils import DEFAULT_CFG, ops
10 | 
11 | 
12 | class ClassificationPredictor(BasePredictor):
13 |     """
14 |     A class extending the BasePredictor class for prediction based on a classification model.
15 | 
16 |     Notes:
17 |         - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'.
18 | 
19 |     Example:
20 |         ```python
21 |         from ultralytics.utils import ASSETS
22 |         from ultralytics.models.yolo.classify import ClassificationPredictor
23 | 
24 |         args = dict(model='yolov8n-cls.pt', source=ASSETS)
25 |         predictor = ClassificationPredictor(overrides=args)
26 |         predictor.predict_cli()
27 |         ```
28 |     """
29 | 
30 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
31 |         """Initializes ClassificationPredictor setting the task to 'classify'."""
32 |         super().__init__(cfg, overrides, _callbacks)
33 |         self.args.task = "classify"
34 |         self._legacy_transform_name = "ultralytics.yolo.data.augment.ToTensor"
35 | 
36 |     def preprocess(self, img):
37 |         """Converts input image to model-compatible data type."""
38 |         if not isinstance(img, torch.Tensor):
39 |             is_legacy_transform = any(
40 |                 self._legacy_transform_name in str(transform) for transform in self.transforms.transforms
41 |             )
42 |             if is_legacy_transform:  # to handle legacy transforms
43 |                 img = torch.stack([self.transforms(im) for im in img], dim=0)
44 |             else:
45 |                 img = torch.stack(
46 |                     [self.transforms(Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))) for im in img], dim=0
47 |                 )
48 |         img = (img if isinstance(img, torch.Tensor) else torch.from_numpy(img)).to(self.model.device)
49 |         return img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
50 | 
51 |     def postprocess(self, preds, img, orig_imgs):
52 |         """Post-processes predictions to return Results objects."""
53 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
54 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
55 | 
56 |         results = []
57 |         for i, pred in enumerate(preds):
58 |             orig_img = orig_imgs[i]
59 |             img_path = self.batch[0][i]
60 |             results.append(Results(orig_img, path=img_path, names=self.model.names, probs=pred))
61 |         return results
62 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/detect/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .predict import DetectionPredictor
4 | from .train import DetectionTrainer
5 | from .val import DetectionValidator
6 | 
7 | __all__ = "DetectionPredictor", "DetectionTrainer", "DetectionValidator"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/detect/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.engine.predictor import BasePredictor
 4 | from ultralytics.engine.results import Results
 5 | from ultralytics.utils import ops
 6 | 
 7 | 
 8 | class DetectionPredictor(BasePredictor):
 9 |     """
10 |     A class extending the BasePredictor class for prediction based on a detection model.
11 | 
12 |     Example:
13 |         ```python
14 |         from ultralytics.utils import ASSETS
15 |         from ultralytics.models.yolo.detect import DetectionPredictor
16 | 
17 |         args = dict(model='yolov8n.pt', source=ASSETS)
18 |         predictor = DetectionPredictor(overrides=args)
19 |         predictor.predict_cli()
20 |         ```
21 |     """
22 | 
23 |     def postprocess(self, preds, img, orig_imgs):
24 |         """Post-processes predictions and returns a list of Results objects."""
25 |         preds = ops.non_max_suppression(
26 |             preds,
27 |             self.args.conf,
28 |             self.args.iou,
29 |             agnostic=self.args.agnostic_nms,
30 |             max_det=self.args.max_det,
31 |             classes=self.args.classes,
32 |         )
33 | 
34 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
35 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
36 | 
37 |         results = []
38 |         for i, pred in enumerate(preds):
39 |             orig_img = orig_imgs[i]
40 |             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
41 |             img_path = self.batch[0][i]
42 |             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
43 |         return results
44 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/model.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.engine.model import Model
 4 | from ultralytics.models import yolo
 5 | from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel
 6 | 
 7 | 
 8 | class YOLO(Model):
 9 |     """YOLO (You Only Look Once) object detection model."""
10 | 
11 |     @property
12 |     def task_map(self):
13 |         """Map head to model, trainer, validator, and predictor classes."""
14 |         return {
15 |             "classify": {
16 |                 "model": ClassificationModel,
17 |                 "trainer": yolo.classify.ClassificationTrainer,
18 |                 "validator": yolo.classify.ClassificationValidator,
19 |                 "predictor": yolo.classify.ClassificationPredictor,
20 |             },
21 |             "detect": {
22 |                 "model": DetectionModel,
23 |                 "trainer": yolo.detect.DetectionTrainer,
24 |                 "validator": yolo.detect.DetectionValidator,
25 |                 "predictor": yolo.detect.DetectionPredictor,
26 |             },
27 |             "segment": {
28 |                 "model": SegmentationModel,
29 |                 "trainer": yolo.segment.SegmentationTrainer,
30 |                 "validator": yolo.segment.SegmentationValidator,
31 |                 "predictor": yolo.segment.SegmentationPredictor,
32 |             },
33 |             "pose": {
34 |                 "model": PoseModel,
35 |                 "trainer": yolo.pose.PoseTrainer,
36 |                 "validator": yolo.pose.PoseValidator,
37 |                 "predictor": yolo.pose.PosePredictor,
38 |             },
39 |             "obb": {
40 |                 "model": OBBModel,
41 |                 "trainer": yolo.obb.OBBTrainer,
42 |                 "validator": yolo.obb.OBBValidator,
43 |                 "predictor": yolo.obb.OBBPredictor,
44 |             },
45 |         }
46 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/obb/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .predict import OBBPredictor
4 | from .train import OBBTrainer
5 | from .val import OBBValidator
6 | 
7 | __all__ = "OBBPredictor", "OBBTrainer", "OBBValidator"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/obb/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import torch
 4 | 
 5 | from ultralytics.engine.results import Results
 6 | from ultralytics.models.yolo.detect.predict import DetectionPredictor
 7 | from ultralytics.utils import DEFAULT_CFG, ops
 8 | 
 9 | 
10 | class OBBPredictor(DetectionPredictor):
11 |     """
12 |     A class extending the DetectionPredictor class for prediction based on an Oriented Bounding Box (OBB) model.
13 | 
14 |     Example:
15 |         ```python
16 |         from ultralytics.utils import ASSETS
17 |         from ultralytics.models.yolo.obb import OBBPredictor
18 | 
19 |         args = dict(model='yolov8n-obb.pt', source=ASSETS)
20 |         predictor = OBBPredictor(overrides=args)
21 |         predictor.predict_cli()
22 |         ```
23 |     """
24 | 
25 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
26 |         """Initializes OBBPredictor with optional model and data configuration overrides."""
27 |         super().__init__(cfg, overrides, _callbacks)
28 |         self.args.task = "obb"
29 | 
30 |     def postprocess(self, preds, img, orig_imgs):
31 |         """Post-processes predictions and returns a list of Results objects."""
32 |         preds = ops.non_max_suppression(
33 |             preds,
34 |             self.args.conf,
35 |             self.args.iou,
36 |             agnostic=self.args.agnostic_nms,
37 |             max_det=self.args.max_det,
38 |             nc=len(self.model.names),
39 |             classes=self.args.classes,
40 |             rotated=True,
41 |         )
42 | 
43 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
44 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
45 | 
46 |         results = []
47 |         for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
48 |             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape, xywh=True)
49 |             # xywh, r, conf, cls
50 |             obb = torch.cat([pred[:, :4], pred[:, -1:], pred[:, 4:6]], dim=-1)
51 |             results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
52 |         return results
53 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/obb/train.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from copy import copy
 4 | 
 5 | from ultralytics.models import yolo
 6 | from ultralytics.nn.tasks import OBBModel
 7 | from ultralytics.utils import DEFAULT_CFG, RANK
 8 | 
 9 | 
10 | class OBBTrainer(yolo.detect.DetectionTrainer):
11 |     """
12 |     A class extending the DetectionTrainer class for training based on an Oriented Bounding Box (OBB) model.
13 | 
14 |     Example:
15 |         ```python
16 |         from ultralytics.models.yolo.obb import OBBTrainer
17 | 
18 |         args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
19 |         trainer = OBBTrainer(overrides=args)
20 |         trainer.train()
21 |         ```
22 |     """
23 | 
24 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
25 |         """Initialize a OBBTrainer object with given arguments."""
26 |         if overrides is None:
27 |             overrides = {}
28 |         overrides["task"] = "obb"
29 |         super().__init__(cfg, overrides, _callbacks)
30 | 
31 |     def get_model(self, cfg=None, weights=None, verbose=True):
32 |         """Return OBBModel initialized with specified config and weights."""
33 |         model = OBBModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1)
34 |         if weights:
35 |             model.load(weights)
36 | 
37 |         return model
38 | 
39 |     def get_validator(self):
40 |         """Return an instance of OBBValidator for validation of YOLO model."""
41 |         self.loss_names = "box_loss", "cls_loss", "dfl_loss"
42 |         return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
43 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/pose/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .predict import PosePredictor
4 | from .train import PoseTrainer
5 | from .val import PoseValidator
6 | 
7 | __all__ = "PoseTrainer", "PoseValidator", "PosePredictor"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/pose/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.engine.results import Results
 4 | from ultralytics.models.yolo.detect.predict import DetectionPredictor
 5 | from ultralytics.utils import DEFAULT_CFG, LOGGER, ops
 6 | 
 7 | 
 8 | class PosePredictor(DetectionPredictor):
 9 |     """
10 |     A class extending the DetectionPredictor class for prediction based on a pose model.
11 | 
12 |     Example:
13 |         ```python
14 |         from ultralytics.utils import ASSETS
15 |         from ultralytics.models.yolo.pose import PosePredictor
16 | 
17 |         args = dict(model='yolov8n-pose.pt', source=ASSETS)
18 |         predictor = PosePredictor(overrides=args)
19 |         predictor.predict_cli()
20 |         ```
21 |     """
22 | 
23 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24 |         """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
25 |         super().__init__(cfg, overrides, _callbacks)
26 |         self.args.task = "pose"
27 |         if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
28 |             LOGGER.warning(
29 |                 "WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
30 |                 "See https://github.com/ultralytics/ultralytics/issues/4031."
31 |             )
32 | 
33 |     def postprocess(self, preds, img, orig_imgs):
34 |         """Return detection results for a given input image or list of images."""
35 |         preds = ops.non_max_suppression(
36 |             preds,
37 |             self.args.conf,
38 |             self.args.iou,
39 |             agnostic=self.args.agnostic_nms,
40 |             max_det=self.args.max_det,
41 |             classes=self.args.classes,
42 |             nc=len(self.model.names),
43 |         )
44 | 
45 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
46 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
47 | 
48 |         results = []
49 |         for i, pred in enumerate(preds):
50 |             orig_img = orig_imgs[i]
51 |             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape).round()
52 |             pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
53 |             pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
54 |             img_path = self.batch[0][i]
55 |             results.append(
56 |                 Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)
57 |             )
58 |         return results
59 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/pose/train.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from copy import copy
 4 | 
 5 | from ultralytics.models import yolo
 6 | from ultralytics.nn.tasks import PoseModel
 7 | from ultralytics.utils import DEFAULT_CFG, LOGGER
 8 | from ultralytics.utils.plotting import plot_images, plot_results
 9 | 
10 | 
11 | class PoseTrainer(yolo.detect.DetectionTrainer):
12 |     """
13 |     A class extending the DetectionTrainer class for training based on a pose model.
14 | 
15 |     Example:
16 |         ```python
17 |         from ultralytics.models.yolo.pose import PoseTrainer
18 | 
19 |         args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml', epochs=3)
20 |         trainer = PoseTrainer(overrides=args)
21 |         trainer.train()
22 |         ```
23 |     """
24 | 
25 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
26 |         """Initialize a PoseTrainer object with specified configurations and overrides."""
27 |         if overrides is None:
28 |             overrides = {}
29 |         overrides["task"] = "pose"
30 |         super().__init__(cfg, overrides, _callbacks)
31 | 
32 |         if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
33 |             LOGGER.warning(
34 |                 "WARNING ⚠️ Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
35 |                 "See https://github.com/ultralytics/ultralytics/issues/4031."
36 |             )
37 | 
38 |     def get_model(self, cfg=None, weights=None, verbose=True):
39 |         """Get pose estimation model with specified configuration and weights."""
40 |         model = PoseModel(cfg, ch=3, nc=self.data["nc"], data_kpt_shape=self.data["kpt_shape"], verbose=verbose)
41 |         if weights:
42 |             model.load(weights)
43 | 
44 |         return model
45 | 
46 |     def set_model_attributes(self):
47 |         """Sets keypoints shape attribute of PoseModel."""
48 |         super().set_model_attributes()
49 |         self.model.kpt_shape = self.data["kpt_shape"]
50 | 
51 |     def get_validator(self):
52 |         """Returns an instance of the PoseValidator class for validation."""
53 |         self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
54 |         return yolo.pose.PoseValidator(
55 |             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
56 |         )
57 | 
58 |     def plot_training_samples(self, batch, ni):
59 |         """Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints."""
60 |         images = batch["img"]
61 |         kpts = batch["keypoints"]
62 |         cls = batch["cls"].squeeze(-1)
63 |         bboxes = batch["bboxes"]
64 |         paths = batch["im_file"]
65 |         batch_idx = batch["batch_idx"]
66 |         plot_images(
67 |             images,
68 |             batch_idx,
69 |             cls,
70 |             bboxes,
71 |             kpts=kpts,
72 |             paths=paths,
73 |             fname=self.save_dir / f"train_batch{ni}.jpg",
74 |             on_plot=self.on_plot,
75 |         )
76 | 
77 |     def plot_metrics(self):
78 |         """Plots training/val metrics."""
79 |         plot_results(file=self.csv, pose=True, on_plot=self.on_plot)  # save results.png
80 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/segment/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .predict import SegmentationPredictor
4 | from .train import SegmentationTrainer
5 | from .val import SegmentationValidator
6 | 
7 | __all__ = "SegmentationPredictor", "SegmentationTrainer", "SegmentationValidator"
8 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/segment/predict.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.engine.results import Results
 4 | from ultralytics.models.yolo.detect.predict import DetectionPredictor
 5 | from ultralytics.utils import DEFAULT_CFG, ops
 6 | 
 7 | 
 8 | class SegmentationPredictor(DetectionPredictor):
 9 |     """
10 |     A class extending the DetectionPredictor class for prediction based on a segmentation model.
11 | 
12 |     Example:
13 |         ```python
14 |         from ultralytics.utils import ASSETS
15 |         from ultralytics.models.yolo.segment import SegmentationPredictor
16 | 
17 |         args = dict(model='yolov8n-seg.pt', source=ASSETS)
18 |         predictor = SegmentationPredictor(overrides=args)
19 |         predictor.predict_cli()
20 |         ```
21 |     """
22 | 
23 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24 |         """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
25 |         super().__init__(cfg, overrides, _callbacks)
26 |         self.args.task = "segment"
27 | 
28 |     def postprocess(self, preds, img, orig_imgs):
29 |         """Applies non-max suppression and processes detections for each image in an input batch."""
30 |         p = ops.non_max_suppression(
31 |             preds[0],
32 |             self.args.conf,
33 |             self.args.iou,
34 |             agnostic=self.args.agnostic_nms,
35 |             max_det=self.args.max_det,
36 |             nc=len(self.model.names),
37 |             classes=self.args.classes,
38 |         )
39 | 
40 |         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
41 |             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
42 | 
43 |         results = []
44 |         proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
45 |         for i, pred in enumerate(p):
46 |             orig_img = orig_imgs[i]
47 |             img_path = self.batch[0][i]
48 |             if not len(pred):  # save empty boxes
49 |                 masks = None
50 |             elif self.args.retina_masks:
51 |                 pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
52 |                 masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2])  # HWC
53 |             else:
54 |                 masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True)  # HWC
55 |                 pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
56 |             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
57 |         return results
58 | 


--------------------------------------------------------------------------------
/ultralytics/models/yolo/segment/train.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from copy import copy
 4 | 
 5 | from ultralytics.models import yolo
 6 | from ultralytics.nn.tasks import SegmentationModel
 7 | from ultralytics.utils import DEFAULT_CFG, RANK
 8 | from ultralytics.utils.plotting import plot_images, plot_results
 9 | 
10 | 
11 | class SegmentationTrainer(yolo.detect.DetectionTrainer):
12 |     """
13 |     A class extending the DetectionTrainer class for training based on a segmentation model.
14 | 
15 |     Example:
16 |         ```python
17 |         from ultralytics.models.yolo.segment import SegmentationTrainer
18 | 
19 |         args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
20 |         trainer = SegmentationTrainer(overrides=args)
21 |         trainer.train()
22 |         ```
23 |     """
24 | 
25 |     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
26 |         """Initialize a SegmentationTrainer object with given arguments."""
27 |         if overrides is None:
28 |             overrides = {}
29 |         overrides["task"] = "segment"
30 |         super().__init__(cfg, overrides, _callbacks)
31 | 
32 |     def get_model(self, cfg=None, weights=None, verbose=True):
33 |         """Return SegmentationModel initialized with specified config and weights."""
34 |         model = SegmentationModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1)
35 |         if weights:
36 |             model.load(weights)
37 | 
38 |         return model
39 | 
40 |     def get_validator(self):
41 |         """Return an instance of SegmentationValidator for validation of YOLO model."""
42 |         self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
43 |         return yolo.segment.SegmentationValidator(
44 |             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
45 |         )
46 | 
47 |     def plot_training_samples(self, batch, ni):
48 |         """Creates a plot of training sample images with labels and box coordinates."""
49 |         plot_images(
50 |             batch["img"],
51 |             batch["batch_idx"],
52 |             batch["cls"].squeeze(-1),
53 |             batch["bboxes"],
54 |             masks=batch["masks"],
55 |             paths=batch["im_file"],
56 |             fname=self.save_dir / f"train_batch{ni}.jpg",
57 |             on_plot=self.on_plot,
58 |         )
59 | 
60 |     def plot_metrics(self):
61 |         """Plots training/val metrics."""
62 |         plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png
63 | 


--------------------------------------------------------------------------------
/ultralytics/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from .tasks import (
 4 |     BaseModel,
 5 |     ClassificationModel,
 6 |     DetectionModel,
 7 |     SegmentationModel,
 8 |     attempt_load_one_weight,
 9 |     attempt_load_weights,
10 |     guess_model_scale,
11 |     guess_model_task,
12 |     parse_model,
13 |     torch_safe_load,
14 |     yaml_model_load,
15 | )
16 | 
17 | __all__ = (
18 |     "attempt_load_one_weight",
19 |     "attempt_load_weights",
20 |     "parse_model",
21 |     "yaml_model_load",
22 |     "guess_model_task",
23 |     "guess_model_scale",
24 |     "torch_safe_load",
25 |     "DetectionModel",
26 |     "SegmentationModel",
27 |     "ClassificationModel",
28 |     "BaseModel",
29 | )
30 | 


--------------------------------------------------------------------------------
/ultralytics/nn/modules/__init__.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | """
  3 | Ultralytics modules.
  4 | 
  5 | Example:
  6 |     Visualize a module with Netron.
  7 |     ```python
  8 |     from ultralytics.nn.modules import *
  9 |     import torch
 10 |     import os
 11 | 
 12 |     x = torch.ones(1, 128, 40, 40)
 13 |     m = Conv(128, 128)
 14 |     f = f'{m._get_name()}.onnx'
 15 |     torch.onnx.export(m, x, f)
 16 |     os.system(f'onnxsim {f} {f} && open {f}')
 17 |     ```
 18 | """
 19 | 
 20 | from .block import (
 21 |     C1,
 22 |     C2,
 23 |     C3,
 24 |     C3TR,
 25 |     DFL,
 26 |     SPP,
 27 |     SPPF,
 28 |     Bottleneck,
 29 |     BottleneckCSP,
 30 |     C2f,
 31 |     C3Ghost,
 32 |     C3x,
 33 |     GhostBottleneck,
 34 |     HGBlock,
 35 |     HGStem,
 36 |     Proto,
 37 |     RepC3,
 38 |     ResNetLayer,
 39 | )
 40 | from .conv import (
 41 |     CBAM,
 42 |     ChannelAttention,
 43 |     Concat,
 44 |     Conv,
 45 |     Conv2,
 46 |     ConvTranspose,
 47 |     DWConv,
 48 |     DWConvTranspose2d,
 49 |     Focus,
 50 |     GhostConv,
 51 |     LightConv,
 52 |     RepConv,
 53 |     SpatialAttention,
 54 | )
 55 | from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment
 56 | from .transformer import (
 57 |     AIFI,
 58 |     MLP,
 59 |     DeformableTransformerDecoder,
 60 |     DeformableTransformerDecoderLayer,
 61 |     LayerNorm2d,
 62 |     MLPBlock,
 63 |     MSDeformAttn,
 64 |     TransformerBlock,
 65 |     TransformerEncoderLayer,
 66 |     TransformerLayer,
 67 | )
 68 | 
 69 | __all__ = (
 70 |     "Conv",
 71 |     "Conv2",
 72 |     "LightConv",
 73 |     "RepConv",
 74 |     "DWConv",
 75 |     "DWConvTranspose2d",
 76 |     "ConvTranspose",
 77 |     "Focus",
 78 |     "GhostConv",
 79 |     "ChannelAttention",
 80 |     "SpatialAttention",
 81 |     "CBAM",
 82 |     "Concat",
 83 |     "TransformerLayer",
 84 |     "TransformerBlock",
 85 |     "MLPBlock",
 86 |     "LayerNorm2d",
 87 |     "DFL",
 88 |     "HGBlock",
 89 |     "HGStem",
 90 |     "SPP",
 91 |     "SPPF",
 92 |     "C1",
 93 |     "C2",
 94 |     "C3",
 95 |     "C2f",
 96 |     "C3x",
 97 |     "C3TR",
 98 |     "C3Ghost",
 99 |     "GhostBottleneck",
100 |     "Bottleneck",
101 |     "BottleneckCSP",
102 |     "Proto",
103 |     "Detect",
104 |     "Segment",
105 |     "Pose",
106 |     "Classify",
107 |     "TransformerEncoderLayer",
108 |     "RepC3",
109 |     "RTDETRDecoder",
110 |     "AIFI",
111 |     "DeformableTransformerDecoder",
112 |     "DeformableTransformerDecoderLayer",
113 |     "MSDeformAttn",
114 |     "MLP",
115 |     "ResNetLayer",
116 |     "OBB",
117 | )
118 | 


--------------------------------------------------------------------------------
/ultralytics/nn/modules/utils.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | """Module utils."""
 3 | 
 4 | import copy
 5 | import math
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | from torch.nn.init import uniform_
12 | 
13 | __all__ = "multi_scale_deformable_attn_pytorch", "inverse_sigmoid"
14 | 
15 | 
16 | def _get_clones(module, n):
17 |     """Create a list of cloned modules from the given module."""
18 |     return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
19 | 
20 | 
21 | def bias_init_with_prob(prior_prob=0.01):
22 |     """Initialize conv/fc bias value according to a given probability value."""
23 |     return float(-np.log((1 - prior_prob) / prior_prob))  # return bias_init
24 | 
25 | 
26 | def linear_init(module):
27 |     """Initialize the weights and biases of a linear module."""
28 |     bound = 1 / math.sqrt(module.weight.shape[0])
29 |     uniform_(module.weight, -bound, bound)
30 |     if hasattr(module, "bias") and module.bias is not None:
31 |         uniform_(module.bias, -bound, bound)
32 | 
33 | 
34 | def inverse_sigmoid(x, eps=1e-5):
35 |     """Calculate the inverse sigmoid function for a tensor."""
36 |     x = x.clamp(min=0, max=1)
37 |     x1 = x.clamp(min=eps)
38 |     x2 = (1 - x).clamp(min=eps)
39 |     return torch.log(x1 / x2)
40 | 
41 | 
42 | def multi_scale_deformable_attn_pytorch(
43 |     value: torch.Tensor,
44 |     value_spatial_shapes: torch.Tensor,
45 |     sampling_locations: torch.Tensor,
46 |     attention_weights: torch.Tensor,
47 | ) -> torch.Tensor:
48 |     """
49 |     Multi-scale deformable attention.
50 | 
51 |     https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
52 |     """
53 | 
54 |     bs, _, num_heads, embed_dims = value.shape
55 |     _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
56 |     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
57 |     sampling_grids = 2 * sampling_locations - 1
58 |     sampling_value_list = []
59 |     for level, (H_, W_) in enumerate(value_spatial_shapes):
60 |         # bs, H_*W_, num_heads, embed_dims ->
61 |         # bs, H_*W_, num_heads*embed_dims ->
62 |         # bs, num_heads*embed_dims, H_*W_ ->
63 |         # bs*num_heads, embed_dims, H_, W_
64 |         value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(bs * num_heads, embed_dims, H_, W_)
65 |         # bs, num_queries, num_heads, num_points, 2 ->
66 |         # bs, num_heads, num_queries, num_points, 2 ->
67 |         # bs*num_heads, num_queries, num_points, 2
68 |         sampling_grid_l_ = sampling_grids[:, :, :, level].transpose(1, 2).flatten(0, 1)
69 |         # bs*num_heads, embed_dims, num_queries, num_points
70 |         sampling_value_l_ = F.grid_sample(
71 |             value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False
72 |         )
73 |         sampling_value_list.append(sampling_value_l_)
74 |     # (bs, num_queries, num_heads, num_levels, num_points) ->
75 |     # (bs, num_heads, num_queries, num_levels, num_points) ->
76 |     # (bs, num_heads, 1, num_queries, num_levels*num_points)
77 |     attention_weights = attention_weights.transpose(1, 2).reshape(
78 |         bs * num_heads, 1, num_queries, num_levels * num_points
79 |     )
80 |     output = (
81 |         (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights)
82 |         .sum(-1)
83 |         .view(bs, num_heads * embed_dims, num_queries)
84 |     )
85 |     return output.transpose(1, 2).contiguous()
86 | 


--------------------------------------------------------------------------------
/ultralytics/solutions/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .bot_sort import BOTSORT
4 | from .byte_tracker import BYTETracker
5 | from .track import register_tracker
6 | 
7 | __all__ = "register_tracker", "BOTSORT", "BYTETracker"  # allow simpler import
8 | 


--------------------------------------------------------------------------------
/ultralytics/trackers/basetrack.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | """This module defines the base classes and structures for object tracking in YOLO."""
  3 | 
  4 | from collections import OrderedDict
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | class TrackState:
 10 |     """
 11 |     Enumeration class representing the possible states of an object being tracked.
 12 | 
 13 |     Attributes:
 14 |         New (int): State when the object is newly detected.
 15 |         Tracked (int): State when the object is successfully tracked in subsequent frames.
 16 |         Lost (int): State when the object is no longer tracked.
 17 |         Removed (int): State when the object is removed from tracking.
 18 |     """
 19 | 
 20 |     New = 0
 21 |     Tracked = 1
 22 |     Lost = 2
 23 |     Removed = 3
 24 | 
 25 | 
 26 | class BaseTrack:
 27 |     """
 28 |     Base class for object tracking, providing foundational attributes and methods.
 29 | 
 30 |     Attributes:
 31 |         _count (int): Class-level counter for unique track IDs.
 32 |         track_id (int): Unique identifier for the track.
 33 |         is_activated (bool): Flag indicating whether the track is currently active.
 34 |         state (TrackState): Current state of the track.
 35 |         history (OrderedDict): Ordered history of the track's states.
 36 |         features (list): List of features extracted from the object for tracking.
 37 |         curr_feature (any): The current feature of the object being tracked.
 38 |         score (float): The confidence score of the tracking.
 39 |         start_frame (int): The frame number where tracking started.
 40 |         frame_id (int): The most recent frame ID processed by the track.
 41 |         time_since_update (int): Frames passed since the last update.
 42 |         location (tuple): The location of the object in the context of multi-camera tracking.
 43 | 
 44 |     Methods:
 45 |         end_frame: Returns the ID of the last frame where the object was tracked.
 46 |         next_id: Increments and returns the next global track ID.
 47 |         activate: Abstract method to activate the track.
 48 |         predict: Abstract method to predict the next state of the track.
 49 |         update: Abstract method to update the track with new data.
 50 |         mark_lost: Marks the track as lost.
 51 |         mark_removed: Marks the track as removed.
 52 |         reset_id: Resets the global track ID counter.
 53 |     """
 54 | 
 55 |     _count = 0
 56 | 
 57 |     def __init__(self):
 58 |         """Initializes a new track with unique ID and foundational tracking attributes."""
 59 |         self.track_id = 0
 60 |         self.is_activated = False
 61 |         self.state = TrackState.New
 62 |         self.history = OrderedDict()
 63 |         self.features = []
 64 |         self.curr_feature = None
 65 |         self.score = 0
 66 |         self.start_frame = 0
 67 |         self.frame_id = 0
 68 |         self.time_since_update = 0
 69 |         self.location = (np.inf, np.inf)
 70 | 
 71 |     @property
 72 |     def end_frame(self):
 73 |         """Return the last frame ID of the track."""
 74 |         return self.frame_id
 75 | 
 76 |     @staticmethod
 77 |     def next_id():
 78 |         """Increment and return the global track ID counter."""
 79 |         BaseTrack._count += 1
 80 |         return BaseTrack._count
 81 | 
 82 |     def activate(self, *args):
 83 |         """Abstract method to activate the track with provided arguments."""
 84 |         raise NotImplementedError
 85 | 
 86 |     def predict(self):
 87 |         """Abstract method to predict the next state of the track."""
 88 |         raise NotImplementedError
 89 | 
 90 |     def update(self, *args, **kwargs):
 91 |         """Abstract method to update the track with new observations."""
 92 |         raise NotImplementedError
 93 | 
 94 |     def mark_lost(self):
 95 |         """Mark the track as lost."""
 96 |         self.state = TrackState.Lost
 97 | 
 98 |     def mark_removed(self):
 99 |         """Mark the track as removed."""
100 |         self.state = TrackState.Removed
101 | 
102 |     @staticmethod
103 |     def reset_id():
104 |         """Reset the global track ID counter."""
105 |         BaseTrack._count = 0
106 | 


--------------------------------------------------------------------------------
/ultralytics/trackers/track.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from functools import partial
 4 | from pathlib import Path
 5 | 
 6 | import torch
 7 | 
 8 | from ultralytics.utils import IterableSimpleNamespace, yaml_load
 9 | from ultralytics.utils.checks import check_yaml
10 | from .bot_sort import BOTSORT
11 | from .byte_tracker import BYTETracker
12 | 
13 | # A mapping of tracker types to corresponding tracker classes
14 | TRACKER_MAP = {"bytetrack": BYTETracker, "botsort": BOTSORT}
15 | 
16 | 
17 | def on_predict_start(predictor: object, persist: bool = False) -> None:
18 |     """
19 |     Initialize trackers for object tracking during prediction.
20 | 
21 |     Args:
22 |         predictor (object): The predictor object to initialize trackers for.
23 |         persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
24 | 
25 |     Raises:
26 |         AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'.
27 |     """
28 |     if predictor.args.task == "obb":
29 |         raise NotImplementedError("ERROR ❌ OBB task does not support track mode!")
30 |     if hasattr(predictor, "trackers") and persist:
31 |         return
32 | 
33 |     tracker = check_yaml(predictor.args.tracker)
34 |     cfg = IterableSimpleNamespace(**yaml_load(tracker))
35 | 
36 |     if cfg.tracker_type not in ["bytetrack", "botsort"]:
37 |         raise AssertionError(f"Only 'bytetrack' and 'botsort' are supported for now, but got '{cfg.tracker_type}'")
38 | 
39 |     trackers = []
40 |     for _ in range(predictor.dataset.bs):
41 |         tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
42 |         trackers.append(tracker)
43 |     predictor.trackers = trackers
44 | 
45 | 
46 | def on_predict_postprocess_end(predictor: object, persist: bool = False) -> None:
47 |     """
48 |     Postprocess detected boxes and update with object tracking.
49 | 
50 |     Args:
51 |         predictor (object): The predictor object containing the predictions.
52 |         persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
53 |     """
54 |     bs = predictor.dataset.bs
55 |     path, im0s = predictor.batch[:2]
56 | 
57 |     for i in range(bs):
58 |         if not persist and predictor.vid_path[i] != str(predictor.save_dir / Path(path[i]).name):  # new video
59 |             predictor.trackers[i].reset()
60 | 
61 |         det = predictor.results[i].boxes.cpu().numpy()
62 |         if len(det) == 0:
63 |             continue
64 |         tracks = predictor.trackers[i].update(det, im0s[i])
65 |         if len(tracks) == 0:
66 |             continue
67 |         idx = tracks[:, -1].astype(int)
68 |         predictor.results[i] = predictor.results[i][idx]
69 |         predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
70 | 
71 | 
72 | def register_tracker(model: object, persist: bool) -> None:
73 |     """
74 |     Register tracking callbacks to the model for object tracking during prediction.
75 | 
76 |     Args:
77 |         model (object): The model object to register tracking callbacks for.
78 |         persist (bool): Whether to persist the trackers if they already exist.
79 |     """
80 |     model.add_callback("on_predict_start", partial(on_predict_start, persist=persist))
81 |     model.add_callback("on_predict_postprocess_end", partial(on_predict_postprocess_end, persist=persist))
82 | 


--------------------------------------------------------------------------------
/ultralytics/trackers/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 


--------------------------------------------------------------------------------
/ultralytics/utils/autobatch.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | """Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
 3 | 
 4 | from copy import deepcopy
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr
10 | from ultralytics.utils.torch_utils import profile
11 | 
12 | 
13 | def check_train_batch_size(model, imgsz=640, amp=True):
14 |     """
15 |     Check YOLO training batch size using the autobatch() function.
16 | 
17 |     Args:
18 |         model (torch.nn.Module): YOLO model to check batch size for.
19 |         imgsz (int): Image size used for training.
20 |         amp (bool): If True, use automatic mixed precision (AMP) for training.
21 | 
22 |     Returns:
23 |         (int): Optimal batch size computed using the autobatch() function.
24 |     """
25 | 
26 |     with torch.cuda.amp.autocast(amp):
27 |         return autobatch(deepcopy(model).train(), imgsz)  # compute optimal batch size
28 | 
29 | 
30 | def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
31 |     """
32 |     Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory.
33 | 
34 |     Args:
35 |         model (torch.nn.module): YOLO model to compute batch size for.
36 |         imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640.
37 |         fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60.
38 |         batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16.
39 | 
40 |     Returns:
41 |         (int): The optimal batch size.
42 |     """
43 | 
44 |     # Check device
45 |     prefix = colorstr("AutoBatch: ")
46 |     LOGGER.info(f"{prefix}Computing optimal batch size for imgsz={imgsz}")
47 |     device = next(model.parameters()).device  # get model device
48 |     if device.type == "cpu":
49 |         LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}")
50 |         return batch_size
51 |     if torch.backends.cudnn.benchmark:
52 |         LOGGER.info(f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}")
53 |         return batch_size
54 | 
55 |     # Inspect CUDA memory
56 |     gb = 1 << 30  # bytes to GiB (1024 ** 3)
57 |     d = str(device).upper()  # 'CUDA:0'
58 |     properties = torch.cuda.get_device_properties(device)  # device properties
59 |     t = properties.total_memory / gb  # GiB total
60 |     r = torch.cuda.memory_reserved(device) / gb  # GiB reserved
61 |     a = torch.cuda.memory_allocated(device) / gb  # GiB allocated
62 |     f = t - (r + a)  # GiB free
63 |     LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free")
64 | 
65 |     # Profile batch sizes
66 |     batch_sizes = [1, 2, 4, 8, 16]
67 |     try:
68 |         img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
69 |         results = profile(img, model, n=3, device=device)
70 | 
71 |         # Fit a solution
72 |         y = [x[2] for x in results if x]  # memory [2]
73 |         p = np.polyfit(batch_sizes[: len(y)], y, deg=1)  # first degree polynomial fit
74 |         b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
75 |         if None in results:  # some sizes failed
76 |             i = results.index(None)  # first fail index
77 |             if b >= batch_sizes[i]:  # y intercept above failure point
78 |                 b = batch_sizes[max(i - 1, 0)]  # select prior safe point
79 |         if b < 1 or b > 1024:  # b outside of safe range
80 |             b = batch_size
81 |             LOGGER.info(f"{prefix}WARNING ⚠️ CUDA anomaly detected, using default batch-size {batch_size}.")
82 | 
83 |         fraction = (np.polyval(p, b) + r + a) / t  # actual fraction predicted
84 |         LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅")
85 |         return b
86 |     except Exception as e:
87 |         LOGGER.warning(f"{prefix}WARNING ⚠️ error detected: {e},  using default batch-size {batch_size}.")
88 |         return batch_size
89 | 


--------------------------------------------------------------------------------
/ultralytics/utils/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
2 | 
3 | from .base import add_integration_callbacks, default_callbacks, get_default_callbacks
4 | 
5 | __all__ = "add_integration_callbacks", "default_callbacks", "get_default_callbacks"
6 | 


--------------------------------------------------------------------------------
/ultralytics/utils/callbacks/hub.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | 
  3 | import json
  4 | from time import time
  5 | 
  6 | from ultralytics.hub.utils import HUB_WEB_ROOT, PREFIX, events
  7 | from ultralytics.utils import LOGGER, SETTINGS
  8 | 
  9 | 
 10 | def on_pretrain_routine_end(trainer):
 11 |     """Logs info before starting timer for upload rate limit."""
 12 |     session = getattr(trainer, "hub_session", None)
 13 |     if session:
 14 |         # Start timer for upload rate limit
 15 |         session.timers = {
 16 |             "metrics": time(),
 17 |             "ckpt": time(),
 18 |         }  # start timer on session.rate_limit
 19 | 
 20 | 
 21 | def on_fit_epoch_end(trainer):
 22 |     """Uploads training progress metrics at the end of each epoch."""
 23 |     session = getattr(trainer, "hub_session", None)
 24 |     if session:
 25 |         # Upload metrics after val end
 26 |         all_plots = {
 27 |             **trainer.label_loss_items(trainer.tloss, prefix="train"),
 28 |             **trainer.metrics,
 29 |         }
 30 |         if trainer.epoch == 0:
 31 |             from ultralytics.utils.torch_utils import model_info_for_loggers
 32 | 
 33 |             all_plots = {**all_plots, **model_info_for_loggers(trainer)}
 34 | 
 35 |         session.metrics_queue[trainer.epoch] = json.dumps(all_plots)
 36 |         if time() - session.timers["metrics"] > session.rate_limits["metrics"]:
 37 |             session.upload_metrics()
 38 |             session.timers["metrics"] = time()  # reset timer
 39 |             session.metrics_queue = {}  # reset queue
 40 | 
 41 | 
 42 | def on_model_save(trainer):
 43 |     """Saves checkpoints to Ultralytics HUB with rate limiting."""
 44 |     session = getattr(trainer, "hub_session", None)
 45 |     if session:
 46 |         # Upload checkpoints with rate limiting
 47 |         is_best = trainer.best_fitness == trainer.fitness
 48 |         if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]:
 49 |             LOGGER.info(f"{PREFIX}Uploading checkpoint {HUB_WEB_ROOT}/models/{session.model_file}")
 50 |             session.upload_model(trainer.epoch, trainer.last, is_best)
 51 |             session.timers["ckpt"] = time()  # reset timer
 52 | 
 53 | 
 54 | def on_train_end(trainer):
 55 |     """Upload final model and metrics to Ultralytics HUB at the end of training."""
 56 |     session = getattr(trainer, "hub_session", None)
 57 |     if session:
 58 |         # Upload final model and metrics with exponential standoff
 59 |         LOGGER.info(f"{PREFIX}Syncing final model...")
 60 |         session.upload_model(
 61 |             trainer.epoch,
 62 |             trainer.best,
 63 |             map=trainer.metrics.get("metrics/mAP50-95(B)", 0),
 64 |             final=True,
 65 |         )
 66 |         session.alive = False  # stop heartbeats
 67 |         LOGGER.info(f"{PREFIX}Done ✅\n" f"{PREFIX}View model at {session.model_url} 🚀")
 68 | 
 69 | 
 70 | def on_train_start(trainer):
 71 |     """Run events on train start."""
 72 |     events(trainer.args)
 73 | 
 74 | 
 75 | def on_val_start(validator):
 76 |     """Runs events on validation start."""
 77 |     events(validator.args)
 78 | 
 79 | 
 80 | def on_predict_start(predictor):
 81 |     """Run events on predict start."""
 82 |     events(predictor.args)
 83 | 
 84 | 
 85 | def on_export_start(exporter):
 86 |     """Run events on export start."""
 87 |     events(exporter.args)
 88 | 
 89 | 
 90 | callbacks = (
 91 |     {
 92 |         "on_pretrain_routine_end": on_pretrain_routine_end,
 93 |         "on_fit_epoch_end": on_fit_epoch_end,
 94 |         "on_model_save": on_model_save,
 95 |         "on_train_end": on_train_end,
 96 |         "on_train_start": on_train_start,
 97 |         "on_val_start": on_val_start,
 98 |         "on_predict_start": on_predict_start,
 99 |         "on_export_start": on_export_start,
100 |     }
101 |     if SETTINGS["hub"] is True
102 |     else {}
103 | )  # verify enabled
104 | 


--------------------------------------------------------------------------------
/ultralytics/utils/callbacks/neptune.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
  2 | 
  3 | from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
  4 | 
  5 | try:
  6 |     assert not TESTS_RUNNING  # do not log pytest
  7 |     assert SETTINGS["neptune"] is True  # verify integration is enabled
  8 |     import neptune
  9 |     from neptune.types import File
 10 | 
 11 |     assert hasattr(neptune, "__version__")
 12 | 
 13 |     run = None  # NeptuneAI experiment logger instance
 14 | 
 15 | except (ImportError, AssertionError):
 16 |     neptune = None
 17 | 
 18 | 
 19 | def _log_scalars(scalars, step=0):
 20 |     """Log scalars to the NeptuneAI experiment logger."""
 21 |     if run:
 22 |         for k, v in scalars.items():
 23 |             run[k].append(value=v, step=step)
 24 | 
 25 | 
 26 | def _log_images(imgs_dict, group=""):
 27 |     """Log scalars to the NeptuneAI experiment logger."""
 28 |     if run:
 29 |         for k, v in imgs_dict.items():
 30 |             run[f"{group}/{k}"].upload(File(v))
 31 | 
 32 | 
 33 | def _log_plot(title, plot_path):
 34 |     """
 35 |     Log plots to the NeptuneAI experiment logger.
 36 | 
 37 |     Args:
 38 |         title (str): Title of the plot.
 39 |         plot_path (PosixPath | str): Path to the saved image file.
 40 |     """
 41 |     import matplotlib.image as mpimg
 42 |     import matplotlib.pyplot as plt
 43 | 
 44 |     img = mpimg.imread(plot_path)
 45 |     fig = plt.figure()
 46 |     ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect="auto", xticks=[], yticks=[])  # no ticks
 47 |     ax.imshow(img)
 48 |     run[f"Plots/{title}"].upload(fig)
 49 | 
 50 | 
 51 | def on_pretrain_routine_start(trainer):
 52 |     """Callback function called before the training routine starts."""
 53 |     try:
 54 |         global run
 55 |         run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"])
 56 |         run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()}
 57 |     except Exception as e:
 58 |         LOGGER.warning(f"WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}")
 59 | 
 60 | 
 61 | def on_train_epoch_end(trainer):
 62 |     """Callback function called at end of each training epoch."""
 63 |     _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1)
 64 |     _log_scalars(trainer.lr, trainer.epoch + 1)
 65 |     if trainer.epoch == 1:
 66 |         _log_images({f.stem: str(f) for f in trainer.save_dir.glob("train_batch*.jpg")}, "Mosaic")
 67 | 
 68 | 
 69 | def on_fit_epoch_end(trainer):
 70 |     """Callback function called at end of each fit (train+val) epoch."""
 71 |     if run and trainer.epoch == 0:
 72 |         from ultralytics.utils.torch_utils import model_info_for_loggers
 73 | 
 74 |         run["Configuration/Model"] = model_info_for_loggers(trainer)
 75 |     _log_scalars(trainer.metrics, trainer.epoch + 1)
 76 | 
 77 | 
 78 | def on_val_end(validator):
 79 |     """Callback function called at end of each validation."""
 80 |     if run:
 81 |         # Log val_labels and val_pred
 82 |         _log_images({f.stem: str(f) for f in validator.save_dir.glob("val*.jpg")}, "Validation")
 83 | 
 84 | 
 85 | def on_train_end(trainer):
 86 |     """Callback function called at end of training."""
 87 |     if run:
 88 |         # Log final results, CM matrix + PR plots
 89 |         files = [
 90 |             "results.png",
 91 |             "confusion_matrix.png",
 92 |             "confusion_matrix_normalized.png",
 93 |             *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")),
 94 |         ]
 95 |         files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()]  # filter
 96 |         for f in files:
 97 |             _log_plot(title=f.stem, plot_path=f)
 98 |         # Log the final model
 99 |         run[f"weights/{trainer.args.name or trainer.args.task}/{trainer.best.name}"].upload(File(str(trainer.best)))
100 | 
101 | 
102 | callbacks = (
103 |     {
104 |         "on_pretrain_routine_start": on_pretrain_routine_start,
105 |         "on_train_epoch_end": on_train_epoch_end,
106 |         "on_fit_epoch_end": on_fit_epoch_end,
107 |         "on_val_end": on_val_end,
108 |         "on_train_end": on_train_end,
109 |     }
110 |     if neptune
111 |     else {}
112 | )
113 | 


--------------------------------------------------------------------------------
/ultralytics/utils/callbacks/raytune.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.utils import SETTINGS
 4 | 
 5 | try:
 6 |     assert SETTINGS["raytune"] is True  # verify integration is enabled
 7 |     import ray
 8 |     from ray import tune
 9 |     from ray.air import session
10 | 
11 | except (ImportError, AssertionError):
12 |     tune = None
13 | 
14 | 
15 | def on_fit_epoch_end(trainer):
16 |     """Sends training metrics to Ray Tune at end of each epoch."""
17 |     if ray.tune.is_session_enabled():
18 |         metrics = trainer.metrics
19 |         metrics["epoch"] = trainer.epoch
20 |         session.report(metrics)
21 | 
22 | 
23 | callbacks = (
24 |     {
25 |         "on_fit_epoch_end": on_fit_epoch_end,
26 |     }
27 |     if tune
28 |     else {}
29 | )
30 | 


--------------------------------------------------------------------------------
/ultralytics/utils/callbacks/tensorboard.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
 4 | 
 5 | try:
 6 |     # WARNING: do not move import due to protobuf issue in https://github.com/ultralytics/ultralytics/pull/4674
 7 |     from torch.utils.tensorboard import SummaryWriter
 8 | 
 9 |     assert not TESTS_RUNNING  # do not log pytest
10 |     assert SETTINGS["tensorboard"] is True  # verify integration is enabled
11 |     WRITER = None  # TensorBoard SummaryWriter instance
12 | 
13 | except (ImportError, AssertionError, TypeError, AttributeError):
14 |     # TypeError for handling 'Descriptors cannot not be created directly.' protobuf errors in Windows
15 |     # AttributeError: module 'tensorflow' has no attribute 'io' if 'tensorflow' not installed
16 |     SummaryWriter = None
17 | 
18 | 
19 | def _log_scalars(scalars, step=0):
20 |     """Logs scalar values to TensorBoard."""
21 |     if WRITER:
22 |         for k, v in scalars.items():
23 |             WRITER.add_scalar(k, v, step)
24 | 
25 | 
26 | def _log_tensorboard_graph(trainer):
27 |     """Log model graph to TensorBoard."""
28 |     try:
29 |         import warnings
30 | 
31 |         from ultralytics.utils.torch_utils import de_parallel, torch
32 | 
33 |         imgsz = trainer.args.imgsz
34 |         imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz
35 |         p = next(trainer.model.parameters())  # for device, type
36 |         im = torch.zeros((1, 3, *imgsz), device=p.device, dtype=p.dtype)  # input image (must be zeros, not empty)
37 |         with warnings.catch_warnings():
38 |             warnings.simplefilter("ignore", category=UserWarning)  # suppress jit trace warning
39 |             WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), [])
40 |     except Exception as e:
41 |         LOGGER.warning(f"WARNING ⚠️ TensorBoard graph visualization failure {e}")
42 | 
43 | 
44 | def on_pretrain_routine_start(trainer):
45 |     """Initialize TensorBoard logging with SummaryWriter."""
46 |     if SummaryWriter:
47 |         try:
48 |             global WRITER
49 |             WRITER = SummaryWriter(str(trainer.save_dir))
50 |             prefix = colorstr("TensorBoard: ")
51 |             LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/")
52 |         except Exception as e:
53 |             LOGGER.warning(f"WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}")
54 | 
55 | 
56 | def on_train_start(trainer):
57 |     """Log TensorBoard graph."""
58 |     if WRITER:
59 |         _log_tensorboard_graph(trainer)
60 | 
61 | 
62 | def on_train_epoch_end(trainer):
63 |     """Logs scalar statistics at the end of a training epoch."""
64 |     _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1)
65 |     _log_scalars(trainer.lr, trainer.epoch + 1)
66 | 
67 | 
68 | def on_fit_epoch_end(trainer):
69 |     """Logs epoch metrics at end of training epoch."""
70 |     _log_scalars(trainer.metrics, trainer.epoch + 1)
71 | 
72 | 
73 | callbacks = (
74 |     {
75 |         "on_pretrain_routine_start": on_pretrain_routine_start,
76 |         "on_train_start": on_train_start,
77 |         "on_fit_epoch_end": on_fit_epoch_end,
78 |         "on_train_epoch_end": on_train_epoch_end,
79 |     }
80 |     if SummaryWriter
81 |     else {}
82 | )
83 | 


--------------------------------------------------------------------------------
/ultralytics/utils/dist.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | import os
 4 | import shutil
 5 | import socket
 6 | import sys
 7 | import tempfile
 8 | 
 9 | from . import USER_CONFIG_DIR
10 | from .torch_utils import TORCH_1_9
11 | 
12 | 
13 | def find_free_network_port() -> int:
14 |     """
15 |     Finds a free port on localhost.
16 | 
17 |     It is useful in single-node training when we don't want to connect to a real main node but have to set the
18 |     `MASTER_PORT` environment variable.
19 |     """
20 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
21 |         s.bind(("127.0.0.1", 0))
22 |         return s.getsockname()[1]  # port
23 | 
24 | 
25 | def generate_ddp_file(trainer):
26 |     """Generates a DDP file and returns its file name."""
27 |     module, name = f"{trainer.__class__.__module__}.{trainer.__class__.__name__}".rsplit(".", 1)
28 | 
29 |     content = f"""
30 | # Ultralytics Multi-GPU training temp file (should be automatically deleted after use)
31 | overrides = {vars(trainer.args)}
32 | 
33 | if __name__ == "__main__":
34 |     from {module} import {name}
35 |     from ultralytics.utils import DEFAULT_CFG_DICT
36 | 
37 |     cfg = DEFAULT_CFG_DICT.copy()
38 |     cfg.update(save_dir='')   # handle the extra key 'save_dir'
39 |     trainer = {name}(cfg=cfg, overrides=overrides)
40 |     results = trainer.train()
41 | """
42 |     (USER_CONFIG_DIR / "DDP").mkdir(exist_ok=True)
43 |     with tempfile.NamedTemporaryFile(
44 |         prefix="_temp_",
45 |         suffix=f"{id(trainer)}.py",
46 |         mode="w+",
47 |         encoding="utf-8",
48 |         dir=USER_CONFIG_DIR / "DDP",
49 |         delete=False,
50 |     ) as file:
51 |         file.write(content)
52 |     return file.name
53 | 
54 | 
55 | def generate_ddp_command(world_size, trainer):
56 |     """Generates and returns command for distributed training."""
57 |     import __main__  # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218
58 | 
59 |     if not trainer.resume:
60 |         shutil.rmtree(trainer.save_dir)  # remove the save_dir
61 |     file = generate_ddp_file(trainer)
62 |     dist_cmd = "torch.distributed.run" if TORCH_1_9 else "torch.distributed.launch"
63 |     port = find_free_network_port()
64 |     cmd = [sys.executable, "-m", dist_cmd, "--nproc_per_node", f"{world_size}", "--master_port", f"{port}", file]
65 |     return cmd, file
66 | 
67 | 
68 | def ddp_cleanup(trainer, file):
69 |     """Delete temp file if created."""
70 |     if f"{id(trainer)}.py" in file:  # if temp_file suffix in file
71 |         os.remove(file)
72 | 


--------------------------------------------------------------------------------
/ultralytics/utils/errors.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from ultralytics.utils import emojis
 4 | 
 5 | 
 6 | class HUBModelError(Exception):
 7 |     """
 8 |     Custom exception class for handling errors related to model fetching in Ultralytics YOLO.
 9 | 
10 |     This exception is raised when a requested model is not found or cannot be retrieved.
11 |     The message is also processed to include emojis for better user experience.
12 | 
13 |     Attributes:
14 |         message (str): The error message displayed when the exception is raised.
15 | 
16 |     Note:
17 |         The message is automatically processed through the 'emojis' function from the 'ultralytics.utils' package.
18 |     """
19 | 
20 |     def __init__(self, message="Model not found. Please check model URL and try again."):
21 |         """Create an exception for when a model is not found."""
22 |         super().__init__(emojis(message))
23 | 


--------------------------------------------------------------------------------
/ultralytics/utils/patches.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | """Monkey patches to update/extend functionality of existing functions."""
 3 | 
 4 | import time
 5 | from pathlib import Path
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import torch
10 | 
11 | # OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------
12 | _imshow = cv2.imshow  # copy to avoid recursion errors
13 | 
14 | 
15 | def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
16 |     """
17 |     Read an image from a file.
18 | 
19 |     Args:
20 |         filename (str): Path to the file to read.
21 |         flags (int, optional): Flag that can take values of cv2.IMREAD_*. Defaults to cv2.IMREAD_COLOR.
22 | 
23 |     Returns:
24 |         (np.ndarray): The read image.
25 |     """
26 |     return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
27 | 
28 | 
29 | def imwrite(filename: str, img: np.ndarray, params=None):
30 |     """
31 |     Write an image to a file.
32 | 
33 |     Args:
34 |         filename (str): Path to the file to write.
35 |         img (np.ndarray): Image to write.
36 |         params (list of ints, optional): Additional parameters. See OpenCV documentation.
37 | 
38 |     Returns:
39 |         (bool): True if the file was written, False otherwise.
40 |     """
41 |     try:
42 |         cv2.imencode(Path(filename).suffix, img, params)[1].tofile(filename)
43 |         return True
44 |     except Exception:
45 |         return False
46 | 
47 | 
48 | def imshow(winname: str, mat: np.ndarray):
49 |     """
50 |     Displays an image in the specified window.
51 | 
52 |     Args:
53 |         winname (str): Name of the window.
54 |         mat (np.ndarray): Image to be shown.
55 |     """
56 |     _imshow(winname.encode("unicode_escape").decode(), mat)
57 | 
58 | 
59 | # PyTorch functions ----------------------------------------------------------------------------------------------------
60 | _torch_save = torch.save  # copy to avoid recursion errors
61 | 
62 | 
63 | def torch_save(*args, **kwargs):
64 |     """
65 |     Use dill (if exists) to serialize the lambda functions where pickle does not do this. Also adds 3 retries with
66 |     exponential standoff in case of save failure to improve robustness to transient issues.
67 | 
68 |     Args:
69 |         *args (tuple): Positional arguments to pass to torch.save.
70 |         **kwargs (dict): Keyword arguments to pass to torch.save.
71 |     """
72 |     try:
73 |         import dill as pickle  # noqa
74 |     except ImportError:
75 |         import pickle
76 | 
77 |     if "pickle_module" not in kwargs:
78 |         kwargs["pickle_module"] = pickle  # noqa
79 | 
80 |     for i in range(4):  # 3 retries
81 |         try:
82 |             return _torch_save(*args, **kwargs)
83 |         except RuntimeError:  # unable to save, possibly waiting for device to flush or anti-virus to finish scanning
84 |             if i == 3:
85 |                 raise
86 |             time.sleep((2**i) / 2)  # exponential standoff 0.5s, 1.0s, 2.0s
87 | 


--------------------------------------------------------------------------------
/ultralytics/utils/triton.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, AGPL-3.0 license
 2 | 
 3 | from typing import List
 4 | from urllib.parse import urlsplit
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class TritonRemoteModel:
10 |     """
11 |     Client for interacting with a remote Triton Inference Server model.
12 | 
13 |     Attributes:
14 |         endpoint (str): The name of the model on the Triton server.
15 |         url (str): The URL of the Triton server.
16 |         triton_client: The Triton client (either HTTP or gRPC).
17 |         InferInput: The input class for the Triton client.
18 |         InferRequestedOutput: The output request class for the Triton client.
19 |         input_formats (List[str]): The data types of the model inputs.
20 |         np_input_formats (List[type]): The numpy data types of the model inputs.
21 |         input_names (List[str]): The names of the model inputs.
22 |         output_names (List[str]): The names of the model outputs.
23 |     """
24 | 
25 |     def __init__(self, url: str, endpoint: str = "", scheme: str = ""):
26 |         """
27 |         Initialize the TritonRemoteModel.
28 | 
29 |         Arguments may be provided individually or parsed from a collective 'url' argument of the form
30 |             <scheme>://<netloc>/<endpoint>/<task_name>
31 | 
32 |         Args:
33 |             url (str): The URL of the Triton server.
34 |             endpoint (str): The name of the model on the Triton server.
35 |             scheme (str): The communication scheme ('http' or 'grpc').
36 |         """
37 |         if not endpoint and not scheme:  # Parse all args from URL string
38 |             splits = urlsplit(url)
39 |             endpoint = splits.path.strip("/").split("/")[0]
40 |             scheme = splits.scheme
41 |             url = splits.netloc
42 | 
43 |         self.endpoint = endpoint
44 |         self.url = url
45 | 
46 |         # Choose the Triton client based on the communication scheme
47 |         if scheme == "http":
48 |             import tritonclient.http as client  # noqa
49 | 
50 |             self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
51 |             config = self.triton_client.get_model_config(endpoint)
52 |         else:
53 |             import tritonclient.grpc as client  # noqa
54 | 
55 |             self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
56 |             config = self.triton_client.get_model_config(endpoint, as_json=True)["config"]
57 | 
58 |         # Sort output names alphabetically, i.e. 'output0', 'output1', etc.
59 |         config["output"] = sorted(config["output"], key=lambda x: x.get("name"))
60 | 
61 |         # Define model attributes
62 |         type_map = {"TYPE_FP32": np.float32, "TYPE_FP16": np.float16, "TYPE_UINT8": np.uint8}
63 |         self.InferRequestedOutput = client.InferRequestedOutput
64 |         self.InferInput = client.InferInput
65 |         self.input_formats = [x["data_type"] for x in config["input"]]
66 |         self.np_input_formats = [type_map[x] for x in self.input_formats]
67 |         self.input_names = [x["name"] for x in config["input"]]
68 |         self.output_names = [x["name"] for x in config["output"]]
69 | 
70 |     def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]:
71 |         """
72 |         Call the model with the given inputs.
73 | 
74 |         Args:
75 |             *inputs (List[np.ndarray]): Input data to the model.
76 | 
77 |         Returns:
78 |             List[np.ndarray]: Model outputs.
79 |         """
80 |         infer_inputs = []
81 |         input_format = inputs[0].dtype
82 |         for i, x in enumerate(inputs):
83 |             if x.dtype != self.np_input_formats[i]:
84 |                 x = x.astype(self.np_input_formats[i])
85 |             infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace("TYPE_", ""))
86 |             infer_input.set_data_from_numpy(x)
87 |             infer_inputs.append(infer_input)
88 | 
89 |         infer_outputs = [self.InferRequestedOutput(output_name) for output_name in self.output_names]
90 |         outputs = self.triton_client.infer(model_name=self.endpoint, inputs=infer_inputs, outputs=infer_outputs)
91 | 
92 |         return [outputs.as_numpy(output_name).astype(input_format) for output_name in self.output_names]
93 | 


--------------------------------------------------------------------------------
/weights/plate_rec_color.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/weights/plate_rec_color.pth


--------------------------------------------------------------------------------
/weights/yolov8s.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/we0091234/yolov8-plate/68ead10ab37e9c7745207145e69d554547ca02d2/weights/yolov8s.pt


--------------------------------------------------------------------------------