├── ultralytics
├── nn
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── modules.cpython-37.pyc
│ │ ├── modules.cpython-38.pyc
│ │ ├── tasks.cpython-37.pyc
│ │ ├── tasks.cpython-38.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── autobackend.cpython-37.pyc
│ │ └── autobackend.cpython-38.pyc
│ ├── autoshape.py
│ ├── modules.py
│ └── autobackend.py
├── tracker
│ ├── utils
│ │ ├── __init__.py
│ │ ├── matching.py
│ │ ├── gmc.py
│ │ └── kalman_filter.py
│ ├── __init__.py
│ ├── trackers
│ │ ├── __init__.py
│ │ ├── basetrack.py
│ │ ├── bot_sort.py
│ │ └── byte_tracker.py
│ ├── cfg
│ │ ├── bytetrack.yaml
│ │ └── botsort.yaml
│ ├── README.md
│ └── track.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ └── __init__.cpython-38.pyc
├── hub
│ ├── __pycache__
│ │ ├── auth.cpython-37.pyc
│ │ ├── auth.cpython-38.pyc
│ │ ├── utils.cpython-37.pyc
│ │ ├── utils.cpython-38.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── session.cpython-37.pyc
│ │ └── session.cpython-38.pyc
│ ├── auth.py
│ ├── __init__.py
│ ├── utils.py
│ └── session.py
├── __init__.py
└── models
│ ├── v8
│ ├── cls
│ │ ├── yolov8l-cls.yaml
│ │ ├── yolov8m-cls.yaml
│ │ ├── yolov8n-cls.yaml
│ │ ├── yolov8s-cls.yaml
│ │ └── yolov8x-cls.yaml
│ ├── yolov8l.yaml
│ ├── yolov8m.yaml
│ ├── yolov8x.yaml
│ ├── yolov8n.yaml
│ ├── yolov8s.yaml
│ ├── seg
│ │ ├── yolov8l-seg.yaml
│ │ ├── yolov8m-seg.yaml
│ │ ├── yolov8x-seg.yaml
│ │ ├── yolov8n-seg.yaml
│ │ └── yolov8s-seg.yaml
│ └── yolov8x6.yaml
│ ├── v3
│ ├── yolov3-tinyu.yaml
│ ├── yolov3u.yaml
│ └── yolov3-sppu.yaml
│ ├── v5
│ ├── yolov5lu.yaml
│ ├── yolov5mu.yaml
│ ├── yolov5nu.yaml
│ ├── yolov5xu.yaml
│ └── yolov5su.yaml
│ └── README.md
├── screenshot
├── 5.jpg
├── 1.jpeg
├── 2.jpeg
├── 3.jpeg
└── qrcode.png
├── images
└── zidane.jpg
├── detect_predict.py
└── README.md
/ultralytics/nn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ultralytics/tracker/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ultralytics/tracker/__init__.py:
--------------------------------------------------------------------------------
1 | from .trackers import BOTSORT, BYTETracker
2 |
--------------------------------------------------------------------------------
/screenshot/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/5.jpg
--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | from .bot_sort import BOTSORT
2 | from .byte_tracker import BYTETracker
3 |
--------------------------------------------------------------------------------
/images/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/images/zidane.jpg
--------------------------------------------------------------------------------
/screenshot/1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/1.jpeg
--------------------------------------------------------------------------------
/screenshot/2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/2.jpeg
--------------------------------------------------------------------------------
/screenshot/3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/3.jpeg
--------------------------------------------------------------------------------
/screenshot/qrcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/qrcode.png
--------------------------------------------------------------------------------
/ultralytics/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/auth.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/auth.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/modules.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/tasks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/tasks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/session.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/session.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/autobackend.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-37.pyc
--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/autobackend.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-38.pyc
--------------------------------------------------------------------------------
/ultralytics/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | __version__ = '8.0.40'
4 |
5 | from ultralytics.yolo.engine.model import YOLO
6 | from ultralytics.yolo.utils.checks import check_yolo as checks
7 |
8 | __all__ = ['__version__', 'YOLO', 'checks'] # allow simpler import
9 |
--------------------------------------------------------------------------------
/ultralytics/tracker/cfg/bytetrack.yaml:
--------------------------------------------------------------------------------
1 | tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
2 | track_high_thresh: 0.5 # threshold for the first association
3 | track_low_thresh: 0.1 # threshold for the second association
4 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
5 | track_buffer: 30 # buffer to calculate the time when to remove tracks
6 | match_thresh: 0.8 # threshold for matching tracks
7 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
8 | # mot20: False # for tracker evaluation(not used for now)
9 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8l-cls.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 1000 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.00 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 |
21 | # YOLOv8.0n head
22 | head:
23 | - [-1, 1, Classify, [nc]]
24 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8m-cls.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 1000 # number of classes
5 | depth_multiple: 0.67 # scales module repeats
6 | width_multiple: 0.75 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 |
21 | # YOLOv8.0n head
22 | head:
23 | - [-1, 1, Classify, [nc]]
24 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8n-cls.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 1000 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.25 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 |
21 | # YOLOv8.0n head
22 | head:
23 | - [-1, 1, Classify, [nc]]
24 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8s-cls.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 1000 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.50 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 |
21 | # YOLOv8.0n head
22 | head:
23 | - [-1, 1, Classify, [nc]]
24 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8x-cls.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 1000 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.25 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 |
21 | # YOLOv8.0n head
22 | head:
23 | - [-1, 1, Classify, [nc]]
24 |
--------------------------------------------------------------------------------
/ultralytics/tracker/README.md:
--------------------------------------------------------------------------------
1 | ## Tracker
2 |
3 | ### Trackers
4 |
5 | - [x] ByteTracker
6 | - [x] BoT-SORT
7 |
8 | ### Usage
9 |
10 | python interface:
11 |
12 | ```python
13 | from ultralytics import YOLO
14 |
15 | model = YOLO("yolov8n.pt") # or a segmentation model .i.e yolov8n-seg.pt
16 | model.track(
17 | source="video/streams",
18 | stream=True,
19 | tracker="botsort.yaml/bytetrack.yaml",
20 | ...,
21 | )
22 | ```
23 |
24 | cli:
25 |
26 | ```bash
27 | yolo detect track source=... tracker=...
28 | yolo segment track source=... tracker=...
29 | ```
30 |
31 | By default, trackers will use the configuration in `ultralytics/tracker/cfg`.
32 | We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/tracker/cfg`.
33 |
--------------------------------------------------------------------------------
/ultralytics/tracker/cfg/botsort.yaml:
--------------------------------------------------------------------------------
1 | tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
2 | track_high_thresh: 0.5 # threshold for the first association
3 | track_low_thresh: 0.1 # threshold for the second association
4 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
5 | track_buffer: 30 # buffer to calculate the time when to remove tracks
6 | match_thresh: 0.8 # threshold for matching tracks
7 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
8 | # mot20: False # for tracker evaluation(not used for now)
9 |
10 | # Botsort settings
11 | cmc_method: sparseOptFlow # method of global motion compensation
12 | # ReID model related thresh (not supported yet)
13 | proximity_thresh: 0.5
14 | appearance_thresh: 0.25
15 | with_reid: False
16 |
--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/basetrack.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import numpy as np
4 |
5 |
6 | class TrackState:
7 | New = 0
8 | Tracked = 1
9 | Lost = 2
10 | Removed = 3
11 |
12 |
13 | class BaseTrack:
14 | _count = 0
15 |
16 | track_id = 0
17 | is_activated = False
18 | state = TrackState.New
19 |
20 | history = OrderedDict()
21 | features = []
22 | curr_feature = None
23 | score = 0
24 | start_frame = 0
25 | frame_id = 0
26 | time_since_update = 0
27 |
28 | # multi-camera
29 | location = (np.inf, np.inf)
30 |
31 | @property
32 | def end_frame(self):
33 | return self.frame_id
34 |
35 | @staticmethod
36 | def next_id():
37 | BaseTrack._count += 1
38 | return BaseTrack._count
39 |
40 | def activate(self, *args):
41 | raise NotImplementedError
42 |
43 | def predict(self):
44 | raise NotImplementedError
45 |
46 | def update(self, *args, **kwargs):
47 | raise NotImplementedError
48 |
49 | def mark_lost(self):
50 | self.state = TrackState.Lost
51 |
52 | def mark_removed(self):
53 | self.state = TrackState.Removed
54 |
--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3-tinyu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 |
8 | # YOLOv3-tiny backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [16, 3, 1]], # 0
12 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
13 | [-1, 1, Conv, [32, 3, 1]],
14 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
15 | [-1, 1, Conv, [64, 3, 1]],
16 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
17 | [-1, 1, Conv, [128, 3, 1]],
18 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
19 | [-1, 1, Conv, [256, 3, 1]],
20 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
21 | [-1, 1, Conv, [512, 3, 1]],
22 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
23 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
24 | ]
25 |
26 | # YOLOv3-tiny head
27 | head:
28 | [[-1, 1, Conv, [1024, 3, 1]],
29 | [-1, 1, Conv, [256, 1, 1]],
30 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
31 |
32 | [-2, 1, Conv, [128, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
35 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
36 |
37 | [[19, 15], 1, Detect, [nc]], # Detect(P4, P5)
38 | ]
39 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8l.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.00 # scales convolution channels
7 |
8 | # YOLOv8.0l backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [512, True]]
20 | - [-1, 1, SPPF, [512, 5]] # 9
21 |
22 | # YOLOv8.0l head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8m.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # scales module repeats
6 | width_multiple: 0.75 # scales convolution channels
7 |
8 | # YOLOv8.0m backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [768, True]]
20 | - [-1, 1, SPPF, [768, 5]] # 9
21 |
22 | # YOLOv8.0m head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [768]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8x.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.25 # scales convolution channels
7 |
8 | # YOLOv8.0x backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [512, True]]
20 | - [-1, 1, SPPF, [512, 5]] # 9
21 |
22 | # YOLOv8.0x head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8n.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.25 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 | - [-1, 1, SPPF, [1024, 5]] # 9
21 |
22 | # YOLOv8.0n head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8s.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.50 # scales convolution channels
7 |
8 | # YOLOv8.0s backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 | - [-1, 1, SPPF, [1024, 5]] # 9
21 |
22 | # YOLOv8.0s head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8l-seg.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.00 # scales convolution channels
7 |
8 | # YOLOv8.0l backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [512, True]]
20 | - [-1, 1, SPPF, [512, 5]] # 9
21 |
22 | # YOLOv8.0l head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8m-seg.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # scales module repeats
6 | width_multiple: 0.75 # scales convolution channels
7 |
8 | # YOLOv8.0m backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [768, True]]
20 | - [-1, 1, SPPF, [768, 5]] # 9
21 |
22 | # YOLOv8.0m head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [768]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8x-seg.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.25 # scales convolution channels
7 |
8 | # YOLOv8.0x backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [512, True]]
20 | - [-1, 1, SPPF, [512, 5]] # 9
21 |
22 | # YOLOv8.0x head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8n-seg.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.25 # scales convolution channels
7 |
8 | # YOLOv8.0n backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 | - [-1, 1, SPPF, [1024, 5]] # 9
21 |
22 | # YOLOv8.0n head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8s-seg.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # scales module repeats
6 | width_multiple: 0.50 # scales convolution channels
7 |
8 | # YOLOv8.0s backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [1024, True]]
20 | - [-1, 1, SPPF, [1024, 5]] # 9
21 |
22 | # YOLOv8.0s head
23 | head:
24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26 | - [-1, 3, C2f, [512]] # 12
27 |
28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small)
31 |
32 | - [-1, 1, Conv, [256, 3, 2]]
33 | - [[-1, 12], 1, Concat, [1]] # cat head P4
34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
35 |
36 | - [-1, 1, Conv, [512, 3, 2]]
37 | - [[-1, 9], 1, Concat, [1]] # cat head P5
38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
39 |
40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5)
41 |
--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5lu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 |
8 | # YOLOv5 v6.0 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
13 | [-1, 3, C3, [128]],
14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
15 | [-1, 6, C3, [256]],
16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
17 | [-1, 9, C3, [512]],
18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
19 | [-1, 3, C3, [1024]],
20 | [-1, 1, SPPF, [1024, 5]], # 9
21 | ]
22 |
23 | # YOLOv5 v6.0 head
24 | head:
25 | [[-1, 1, Conv, [512, 1, 1]],
26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
28 | [-1, 3, C3, [512, False]], # 13
29 |
30 | [-1, 1, Conv, [256, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
34 |
35 | [-1, 1, Conv, [256, 3, 2]],
36 | [[-1, 14], 1, Concat, [1]], # cat head P4
37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
38 |
39 | [-1, 1, Conv, [512, 3, 2]],
40 | [[-1, 10], 1, Concat, [1]], # cat head P5
41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
42 |
43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
44 | ]
45 |
--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5mu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.67 # model depth multiple
6 | width_multiple: 0.75 # layer channel multiple
7 |
8 | # YOLOv5 v6.0 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
13 | [-1, 3, C3, [128]],
14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
15 | [-1, 6, C3, [256]],
16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
17 | [-1, 9, C3, [512]],
18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
19 | [-1, 3, C3, [1024]],
20 | [-1, 1, SPPF, [1024, 5]], # 9
21 | ]
22 |
23 | # YOLOv5 v6.0 head
24 | head:
25 | [[-1, 1, Conv, [512, 1, 1]],
26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
28 | [-1, 3, C3, [512, False]], # 13
29 |
30 | [-1, 1, Conv, [256, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
34 |
35 | [-1, 1, Conv, [256, 3, 2]],
36 | [[-1, 14], 1, Concat, [1]], # cat head P4
37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
38 |
39 | [-1, 1, Conv, [512, 3, 2]],
40 | [[-1, 10], 1, Concat, [1]], # cat head P5
41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
42 |
43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
44 | ]
45 |
--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5nu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.25 # layer channel multiple
7 |
8 | # YOLOv5 v6.0 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
13 | [-1, 3, C3, [128]],
14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
15 | [-1, 6, C3, [256]],
16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
17 | [-1, 9, C3, [512]],
18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
19 | [-1, 3, C3, [1024]],
20 | [-1, 1, SPPF, [1024, 5]], # 9
21 | ]
22 |
23 | # YOLOv5 v6.0 head
24 | head:
25 | [[-1, 1, Conv, [512, 1, 1]],
26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
28 | [-1, 3, C3, [512, False]], # 13
29 |
30 | [-1, 1, Conv, [256, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
34 |
35 | [-1, 1, Conv, [256, 3, 2]],
36 | [[-1, 14], 1, Concat, [1]], # cat head P4
37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
38 |
39 | [-1, 1, Conv, [512, 3, 2]],
40 | [[-1, 10], 1, Concat, [1]], # cat head P5
41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
42 |
43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
44 | ]
45 |
--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5xu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.33 # model depth multiple
6 | width_multiple: 1.25 # layer channel multiple
7 |
8 | # YOLOv5 v6.0 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
13 | [-1, 3, C3, [128]],
14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
15 | [-1, 6, C3, [256]],
16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
17 | [-1, 9, C3, [512]],
18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
19 | [-1, 3, C3, [1024]],
20 | [-1, 1, SPPF, [1024, 5]], # 9
21 | ]
22 |
23 | # YOLOv5 v6.0 head
24 | head:
25 | [[-1, 1, Conv, [512, 1, 1]],
26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
28 | [-1, 3, C3, [512, False]], # 13
29 |
30 | [-1, 1, Conv, [256, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
34 |
35 | [-1, 1, Conv, [256, 3, 2]],
36 | [[-1, 14], 1, Concat, [1]], # cat head P4
37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
38 |
39 | [-1, 1, Conv, [512, 3, 2]],
40 | [[-1, 10], 1, Concat, [1]], # cat head P5
41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
42 |
43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
44 | ]
45 |
--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5su.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 0.33 # model depth multiple
6 | width_multiple: 0.50 # layer channel multiple
7 |
8 |
9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 | # [from, number, module, args]
12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
14 | [-1, 3, C3, [128]],
15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
16 | [-1, 6, C3, [256]],
17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
18 | [-1, 9, C3, [512]],
19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
20 | [-1, 3, C3, [1024]],
21 | [-1, 1, SPPF, [1024, 5]], # 9
22 | ]
23 |
24 | # YOLOv5 v6.0 head
25 | head:
26 | [[-1, 1, Conv, [512, 1, 1]],
27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
29 | [-1, 3, C3, [512, False]], # 13
30 |
31 | [-1, 1, Conv, [256, 1, 1]],
32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
35 |
36 | [-1, 1, Conv, [256, 3, 2]],
37 | [[-1, 14], 1, Concat, [1]], # cat head P4
38 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
39 |
40 | [-1, 1, Conv, [512, 3, 2]],
41 | [[-1, 10], 1, Concat, [1]], # cat head P5
42 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
43 |
44 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
45 | ]
46 |
--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3u.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 |
8 | # darknet53 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [32, 3, 1]], # 0
12 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
13 | [-1, 1, Bottleneck, [64]],
14 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
15 | [-1, 2, Bottleneck, [128]],
16 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
17 | [-1, 8, Bottleneck, [256]],
18 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
19 | [-1, 8, Bottleneck, [512]],
20 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
21 | [-1, 4, Bottleneck, [1024]], # 10
22 | ]
23 |
24 | # YOLOv3 head
25 | head:
26 | [[-1, 1, Bottleneck, [1024, False]],
27 | [-1, 1, Conv, [512, 1, 1]],
28 | [-1, 1, Conv, [1024, 3, 1]],
29 | [-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
31 |
32 | [-2, 1, Conv, [256, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
35 | [-1, 1, Bottleneck, [512, False]],
36 | [-1, 1, Bottleneck, [512, False]],
37 | [-1, 1, Conv, [256, 1, 1]],
38 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
39 |
40 | [-2, 1, Conv, [128, 1, 1]],
41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
43 | [-1, 1, Bottleneck, [256, False]],
44 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
45 |
46 | [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5)
47 | ]
48 |
--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3-sppu.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.0 # model depth multiple
6 | width_multiple: 1.0 # layer channel multiple
7 |
8 | # darknet53 backbone
9 | backbone:
10 | # [from, number, module, args]
11 | [[-1, 1, Conv, [32, 3, 1]], # 0
12 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
13 | [-1, 1, Bottleneck, [64]],
14 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
15 | [-1, 2, Bottleneck, [128]],
16 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
17 | [-1, 8, Bottleneck, [256]],
18 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
19 | [-1, 8, Bottleneck, [512]],
20 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
21 | [-1, 4, Bottleneck, [1024]], # 10
22 | ]
23 |
24 | # YOLOv3-SPP head
25 | head:
26 | [[-1, 1, Bottleneck, [1024, False]],
27 | [-1, 1, SPP, [512, [5, 9, 13]]],
28 | [-1, 1, Conv, [1024, 3, 1]],
29 | [-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
31 |
32 | [-2, 1, Conv, [256, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
35 | [-1, 1, Bottleneck, [512, False]],
36 | [-1, 1, Bottleneck, [512, False]],
37 | [-1, 1, Conv, [256, 1, 1]],
38 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
39 |
40 | [-2, 1, Conv, [128, 1, 1]],
41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
43 | [-1, 1, Bottleneck, [256, False]],
44 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
45 |
46 | [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5)
47 | ]
48 |
--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8x6.yaml:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | # Parameters
4 | nc: 80 # number of classes
5 | depth_multiple: 1.00 # scales module repeats
6 | width_multiple: 1.25 # scales convolution channels
7 |
8 | # YOLOv8.0x6 backbone
9 | backbone:
10 | # [from, repeats, module, args]
11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
13 | - [-1, 3, C2f, [128, True]]
14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
15 | - [-1, 6, C2f, [256, True]]
16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
17 | - [-1, 6, C2f, [512, True]]
18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32
19 | - [-1, 3, C2f, [512, True]]
20 | - [-1, 1, Conv, [512, 3, 2]] # 9-P6/64
21 | - [-1, 3, C2f, [512, True]]
22 | - [-1, 1, SPPF, [512, 5]] # 11
23 |
24 | # YOLOv8.0x6 head
25 | head:
26 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
27 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5
28 | - [-1, 3, C2, [512, False]] # 14
29 |
30 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32 | - [-1, 3, C2, [512, False]] # 17
33 |
34 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36 | - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
37 |
38 | - [-1, 1, Conv, [256, 3, 2]]
39 | - [[-1, 17], 1, Concat, [1]] # cat head P4
40 | - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
41 |
42 | - [-1, 1, Conv, [512, 3, 2]]
43 | - [[-1, 14], 1, Concat, [1]] # cat head P5
44 | - [-1, 3, C2, [512, False]] # 26 (P5/32-large)
45 |
46 | - [-1, 1, Conv, [512, 3, 2]]
47 | - [[-1, 11], 1, Concat, [1]] # cat head P6
48 | - [-1, 3, C2, [512, False]] # 29 (P6/64-xlarge)
49 |
50 | - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
51 |
--------------------------------------------------------------------------------
/ultralytics/tracker/track.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from ultralytics.tracker import BOTSORT, BYTETracker
4 | from ultralytics.yolo.utils import IterableSimpleNamespace, yaml_load
5 | from ultralytics.yolo.utils.checks import check_requirements, check_yaml
6 |
7 | TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
8 | check_requirements('lap') # for linear_assignment
9 |
10 |
11 | def on_predict_start(predictor):
12 | tracker = check_yaml(predictor.args.tracker)
13 | cfg = IterableSimpleNamespace(**yaml_load(tracker))
14 | assert cfg.tracker_type in ['bytetrack', 'botsort'], \
15 | f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'"
16 | trackers = []
17 | for _ in range(predictor.dataset.bs):
18 | tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
19 | trackers.append(tracker)
20 | predictor.trackers = trackers
21 |
22 |
23 | def on_predict_postprocess_end(predictor):
24 | bs = predictor.dataset.bs
25 | im0s = predictor.batch[2]
26 | im0s = im0s if isinstance(im0s, list) else [im0s]
27 | for i in range(bs):
28 | det = predictor.results[i].boxes.cpu().numpy()
29 | if len(det) == 0:
30 | continue
31 | tracks = predictor.trackers[i].update(det, im0s[i])
32 | if len(tracks) == 0:
33 | continue
34 | predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
35 | if predictor.results[i].masks is not None:
36 | idx = tracks[:, -1].tolist()
37 | predictor.results[i].masks = predictor.results[i].masks[idx]
38 |
39 |
40 | def register_tracker(model):
41 | model.add_callback('on_predict_start', on_predict_start)
42 | model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
43 |
--------------------------------------------------------------------------------
/ultralytics/hub/auth.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | import requests
4 |
5 | from ultralytics.hub.utils import HUB_API_ROOT, request_with_credentials
6 | from ultralytics.yolo.utils import is_colab
7 |
8 | API_KEY_PATH = 'https://hub.ultralytics.com/settings?tab=api+keys'
9 |
10 |
11 | class Auth:
12 | id_token = api_key = model_key = False
13 |
14 | def __init__(self, api_key=None):
15 | self.api_key = self._clean_api_key(api_key)
16 | self.authenticate() if self.api_key else self.auth_with_cookies()
17 |
18 | @staticmethod
19 | def _clean_api_key(key: str) -> str:
20 | """Strip model from key if present"""
21 | separator = '_'
22 | return key.split(separator)[0] if separator in key else key
23 |
24 | def authenticate(self) -> bool:
25 | """Attempt to authenticate with server"""
26 | try:
27 | header = self.get_auth_header()
28 | if header:
29 | r = requests.post(f'{HUB_API_ROOT}/v1/auth', headers=header)
30 | if not r.json().get('success', False):
31 | raise ConnectionError('Unable to authenticate.')
32 | return True
33 | raise ConnectionError('User has not authenticated locally.')
34 | except ConnectionError:
35 | self.id_token = self.api_key = False # reset invalid
36 | return False
37 |
38 | def auth_with_cookies(self) -> bool:
39 | """
40 | Attempt to fetch authentication via cookies and set id_token.
41 | User must be logged in to HUB and running in a supported browser.
42 | """
43 | if not is_colab():
44 | return False # Currently only works with Colab
45 | try:
46 | authn = request_with_credentials(f'{HUB_API_ROOT}/v1/auth/auto')
47 | if authn.get('success', False):
48 | self.id_token = authn.get('data', {}).get('idToken', None)
49 | self.authenticate()
50 | return True
51 | raise ConnectionError('Unable to fetch browser authentication details.')
52 | except ConnectionError:
53 | self.id_token = False # reset invalid
54 | return False
55 |
56 | def get_auth_header(self):
57 | if self.id_token:
58 | return {'authorization': f'Bearer {self.id_token}'}
59 | elif self.api_key:
60 | return {'x-api-key': self.api_key}
61 | else:
62 | return None
63 |
64 | def get_state(self) -> bool:
65 | """Get the authentication state"""
66 | return self.id_token or self.api_key
67 |
68 | def set_api_key(self, key: str):
69 | """Get the authentication state"""
70 | self.api_key = key
71 |
--------------------------------------------------------------------------------
/ultralytics/hub/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | import requests
4 |
5 | from ultralytics.hub.auth import Auth
6 | from ultralytics.hub.session import HubTrainingSession
7 | from ultralytics.hub.utils import split_key
8 | from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_LIST
9 | from ultralytics.yolo.engine.model import YOLO
10 | from ultralytics.yolo.utils import LOGGER, PREFIX, emojis
11 |
12 | # Define all export formats
13 | EXPORT_FORMATS_HUB = EXPORT_FORMATS_LIST + ['ultralytics_tflite', 'ultralytics_coreml']
14 |
15 |
16 | def start(key=''):
17 | """
18 | Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
19 | """
20 | auth = Auth(key)
21 | try:
22 | if not auth.get_state():
23 | model_id = request_api_key(auth)
24 | else:
25 | _, model_id = split_key(key)
26 |
27 | if not model_id:
28 | raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
29 |
30 | session = HubTrainingSession(model_id=model_id, auth=auth)
31 | session.check_disk_space()
32 |
33 | trainer = YOLO(session.input_file)
34 | session.register_callbacks(trainer)
35 | trainer.train(**session.train_args)
36 | except Exception as e:
37 | LOGGER.warning(f'{PREFIX}{e}')
38 |
39 |
40 | def request_api_key(auth, max_attempts=3):
41 | """
42 | Prompt the user to input their API key. Returns the model ID.
43 | """
44 | import getpass
45 | for attempts in range(max_attempts):
46 | LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}')
47 | input_key = getpass.getpass('Enter your Ultralytics HUB API key:\n')
48 | auth.api_key, model_id = split_key(input_key)
49 |
50 | if auth.authenticate():
51 | LOGGER.info(f'{PREFIX}Authenticated ✅')
52 | return model_id
53 |
54 | LOGGER.warning(f'{PREFIX}Invalid API key ⚠️\n')
55 |
56 | raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌'))
57 |
58 |
59 | def reset_model(key=''):
60 | # Reset a trained model to an untrained state
61 | api_key, model_id = split_key(key)
62 | r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': api_key, 'modelId': model_id})
63 |
64 | if r.status_code == 200:
65 | LOGGER.info(f'{PREFIX}model reset successfully')
66 | return
67 | LOGGER.warning(f'{PREFIX}model reset failure {r.status_code} {r.reason}')
68 |
69 |
70 | def export_model(key='', format='torchscript'):
71 | # Export a model to all formats
72 | assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}"
73 | api_key, model_id = split_key(key)
74 | r = requests.post('https://api.ultralytics.com/export',
75 | json={
76 | 'apiKey': api_key,
77 | 'modelId': model_id,
78 | 'format': format})
79 | assert (r.status_code == 200), f'{PREFIX}{format} export failure {r.status_code} {r.reason}'
80 | LOGGER.info(f'{PREFIX}{format} export started ✅')
81 |
82 |
83 | def get_export(key='', format='torchscript'):
84 | # Get an exported model dictionary with download URL
85 | assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}"
86 | api_key, model_id = split_key(key)
87 | r = requests.post('https://api.ultralytics.com/get-export',
88 | json={
89 | 'apiKey': api_key,
90 | 'modelId': model_id,
91 | 'format': format})
92 | assert (r.status_code == 200), f'{PREFIX}{format} get_export failure {r.status_code} {r.reason}'
93 | return r.json()
94 |
95 |
96 | # temp. For checking
97 | if __name__ == '__main__':
98 | start()
99 |
--------------------------------------------------------------------------------
/detect_predict.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | import torch
4 |
5 | from ultralytics.yolo.engine.predictor import BasePredictor
6 | from ultralytics.yolo.engine.results import Results
7 | from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
8 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
9 |
10 |
11 | class DetectionPredictor(BasePredictor):
12 |
13 | def get_annotator(self, img):
14 | return Annotator(img, line_width=self.args.line_thickness, example=str(self.model.names))
15 |
16 | def preprocess(self, img):
17 | img = torch.from_numpy(img).to(self.model.device)
18 | img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32
19 | img /= 255 # 0 - 255 to 0.0 - 1.0
20 | return img
21 |
22 | def postprocess(self, preds, img, orig_img):
23 | preds = ops.non_max_suppression(preds,
24 | self.args.conf,
25 | self.args.iou,
26 | agnostic=self.args.agnostic_nms,
27 | max_det=self.args.max_det,
28 | classes=self.args.classes)
29 |
30 | results = []
31 | for i, pred in enumerate(preds):
32 | orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
33 | shape = orig_img.shape
34 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
35 | results.append(Results(boxes=pred, orig_img=orig_img, names=self.model.names))
36 | return results
37 |
38 | def write_results(self, idx, results, batch):
39 | p, im, im0 = batch
40 | log_string = ''
41 | if len(im.shape) == 3:
42 | im = im[None] # expand for batch dim
43 | self.seen += 1
44 | imc = im0.copy() if self.args.save_crop else im0
45 | if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1
46 | log_string += f'{idx}: '
47 | frame = self.dataset.count
48 | else:
49 | frame = getattr(self.dataset, 'frame', 0)
50 | self.data_path = p
51 | self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
52 | log_string += '%gx%g ' % im.shape[2:] # print string
53 | self.annotator = self.get_annotator(im0)
54 |
55 | det = results[idx].boxes # TODO: make boxes inherit from tensors
56 | if len(det) == 0:
57 | return log_string
58 | for c in det.cls.unique():
59 | n = (det.cls == c).sum() # detections per class
60 | log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
61 |
62 | # write
63 | for d in reversed(det):
64 | cls, conf = d.cls.squeeze(), d.conf.squeeze()
65 | if self.args.save_txt: # Write to file
66 | line = (cls, *(d.xywhn.view(-1).tolist()), conf) \
67 | if self.args.save_conf else (cls, *(d.xywhn.view(-1).tolist())) # label format
68 | with open(f'{self.txt_path}.txt', 'a') as f:
69 | f.write(('%g ' * len(line)).rstrip() % line + '\n')
70 | if self.args.save or self.args.save_crop or self.args.show: # Add bbox to image
71 | c = int(cls) # integer class
72 | name = f'id:{int(d.id.item())} {self.model.names[c]}' if d.id is not None else self.model.names[c]
73 | label = None if self.args.hide_labels else (name if self.args.hide_conf else f'{name} {conf:.2f}')
74 | self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
75 | if self.args.save_crop:
76 | save_one_box(d.xyxy,
77 | imc,
78 | file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
79 | BGR=True)
80 |
81 | return log_string
82 |
83 |
84 | def predict(cfg=DEFAULT_CFG, use_python=False):
85 | model = cfg.model or 'yolov8n.pt'
86 | source = "imagesVideo/aaa.mp4"
87 |
88 | show = True
89 | conf=0.3
90 | hide_labels=False
91 | hide_conf=False
92 | line_thickness=3
93 | visualize=False
94 | augment=False
95 | retina_masks=False
96 | #classes=[0,2,3]
97 | args = dict(model=model, source=source, show=show, conf=conf,hide_labels=hide_labels,hide_conf=hide_conf,line_thickness=line_thickness,visualize=visualize,augment=augment,retina_masks=retina_masks)
98 |
99 | if use_python:
100 | from ultralytics import YOLO
101 | YOLO(model)(**args)
102 | else:
103 | predictor = DetectionPredictor(overrides=args)
104 | predictor.predict_cli()
105 |
106 |
107 | if __name__ == '__main__':
108 | predict()
109 |
--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/bot_sort.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 |
3 | import numpy as np
4 |
5 | from ..utils import matching
6 | from ..utils.gmc import GMC
7 | from ..utils.kalman_filter import KalmanFilterXYWH
8 | from .basetrack import TrackState
9 | from .byte_tracker import BYTETracker, STrack
10 |
11 |
12 | class BOTrack(STrack):
13 | shared_kalman = KalmanFilterXYWH()
14 |
15 | def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
16 | super().__init__(tlwh, score, cls)
17 |
18 | self.smooth_feat = None
19 | self.curr_feat = None
20 | if feat is not None:
21 | self.update_features(feat)
22 | self.features = deque([], maxlen=feat_history)
23 | self.alpha = 0.9
24 |
25 | def update_features(self, feat):
26 | feat /= np.linalg.norm(feat)
27 | self.curr_feat = feat
28 | if self.smooth_feat is None:
29 | self.smooth_feat = feat
30 | else:
31 | self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
32 | self.features.append(feat)
33 | self.smooth_feat /= np.linalg.norm(self.smooth_feat)
34 |
35 | def predict(self):
36 | mean_state = self.mean.copy()
37 | if self.state != TrackState.Tracked:
38 | mean_state[6] = 0
39 | mean_state[7] = 0
40 |
41 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
42 |
43 | def re_activate(self, new_track, frame_id, new_id=False):
44 | if new_track.curr_feat is not None:
45 | self.update_features(new_track.curr_feat)
46 | super().re_activate(new_track, frame_id, new_id)
47 |
48 | def update(self, new_track, frame_id):
49 | if new_track.curr_feat is not None:
50 | self.update_features(new_track.curr_feat)
51 | super().update(new_track, frame_id)
52 |
53 | @property
54 | def tlwh(self):
55 | """Get current position in bounding box format `(top left x, top left y,
56 | width, height)`.
57 | """
58 | if self.mean is None:
59 | return self._tlwh.copy()
60 | ret = self.mean[:4].copy()
61 | ret[:2] -= ret[2:] / 2
62 | return ret
63 |
64 | @staticmethod
65 | def multi_predict(stracks):
66 | if len(stracks) > 0:
67 | multi_mean = np.asarray([st.mean.copy() for st in stracks])
68 | multi_covariance = np.asarray([st.covariance for st in stracks])
69 | for i, st in enumerate(stracks):
70 | if st.state != TrackState.Tracked:
71 | multi_mean[i][6] = 0
72 | multi_mean[i][7] = 0
73 | multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
74 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
75 | stracks[i].mean = mean
76 | stracks[i].covariance = cov
77 |
78 | def convert_coords(self, tlwh):
79 | return self.tlwh_to_xywh(tlwh)
80 |
81 | @staticmethod
82 | def tlwh_to_xywh(tlwh):
83 | """Convert bounding box to format `(center x, center y, width,
84 | height)`.
85 | """
86 | ret = np.asarray(tlwh).copy()
87 | ret[:2] += ret[2:] / 2
88 | return ret
89 |
90 |
91 | class BOTSORT(BYTETracker):
92 |
93 | def __init__(self, args, frame_rate=30):
94 | super().__init__(args, frame_rate)
95 | # ReID module
96 | self.proximity_thresh = args.proximity_thresh
97 | self.appearance_thresh = args.appearance_thresh
98 |
99 | if args.with_reid:
100 | # haven't supported bot-sort(reid) yet
101 | self.encoder = None
102 | # self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation])
103 | self.gmc = GMC(method=args.cmc_method)
104 |
105 | def get_kalmanfilter(self):
106 | return KalmanFilterXYWH()
107 |
108 | def init_track(self, dets, scores, cls, img=None):
109 | if len(dets) == 0:
110 | return []
111 | if self.args.with_reid and self.encoder is not None:
112 | features_keep = self.encoder.inference(img, dets)
113 | detections = [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)]
114 | else:
115 | detections = [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)]
116 | return detections
117 |
118 | def get_dists(self, tracks, detections):
119 | dists = matching.iou_distance(tracks, detections)
120 | dists_mask = (dists > self.proximity_thresh)
121 |
122 | # TODO: mot20
123 | # if not self.args.mot20:
124 | dists = matching.fuse_score(dists, detections)
125 |
126 | if self.args.with_reid and self.encoder is not None:
127 | emb_dists = matching.embedding_distance(tracks, detections) / 2.0
128 | emb_dists[emb_dists > self.appearance_thresh] = 1.0
129 | emb_dists[dists_mask] = 1.0
130 | dists = np.minimum(dists, emb_dists)
131 | return dists
132 |
133 | def multi_predict(self, tracks):
134 | BOTrack.multi_predict(tracks)
135 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-Yolov8-gesture-recognition
2 |
3 | >**Python Yolov8 gesture recognition手势特征识别**
4 | >**如需安装运行环境或远程调试,见文章底部名片(QQ:2945218359),由专业技术人员远程协助!**
5 |
6 |
7 | ## 运行环境
8 | **编程语言:Python3**
9 | **依赖库:Torch, Yolov8**
10 |
11 | ## 运行效果
12 |
13 |
14 |
15 |
16 |
17 | ## 在线协助
18 | **如需安装运行环境或远程调试,可扫码或直接加QQ:2945218359, QQ:905733049由专业技术人员远程协助!**
19 | **1)远程安装运行环境,代码调试**
20 | **2)Qt, C++, Python入门指导**
21 | **3)界面美化**
22 | **4)软件制作**
23 | **5)云服务器申请**
24 | **6)网站制作**
25 |
26 | **扫码或****点这里****(QQ:2945218359, QQ:905733049)**
27 |
28 |
29 |
30 |
31 |
32 |
33 | **🏠作者推荐:**
34 |
35 | **🌟Python特征识别检测项目🌟**
36 |
37 | **Python+Yolov5表情检测识别:**
38 | [https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression)
39 | **Python指纹识别系统:**
40 | [https://github.com/alicema-creator/Python-fingerprint-recogn-system](https://github.com/alicema-creator/Python-fingerprint-recogn-system)
41 | **Python人脸识别考勤打卡系统2:**
42 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2)
43 | **Python人脸识别考勤打卡系统:**
44 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system)
45 | **Python果树水果识别**:[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit)
46 | **Python+Yolov8+Deepsort入口人流量统计:**[https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit](https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit)
47 | **Python+Qt指纹录入识别考勤系统:**[https://blog.csdn.net/alicema1111/article/details/129338432](https://blog.csdn.net/alicema1111/article/details/129338432)
48 | **Python手势特征识别:**[https://github.com/alicema-creator/Python-Yolov8-gesture-recognition](https://github.com/alicema-creator/Python-Yolov8-gesture-recognition)
49 | **Python+Yolov5路面桥梁墙体裂缝识别:**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges)
50 | **Python+Yolov8路面桥梁墙体裂缝识别:**[https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall](https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall)
51 | **Python+Qt人行道盲道特征检测识别窗体程序:**[https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving](https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving)
52 | **Python+Yolov5面部情感表情检测识别:**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression)
53 |
54 |
55 |
56 | **🌟Python/Django网页项目🌟**
57 | **Python+Django+HTMLl网页前后端指纹信息识别:**[https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition](https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition)
58 | **Html+threejs网页数字孪生场景三维可视化:**[https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project](https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project)
59 | **python+django+html药物管理系统web drug management system(crm):**[https://github.com/alicema-creator/python-django-web-html-drug-management-system](https://github.com/alicema-creator/python-django-web-html-drug-management-system)
60 | **Qt+C++ web browser自建网页浏览器-Chrome最新内核基础上搭建:**[https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel](https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel)
61 |
62 |
63 |
64 | **🌟C++/Qt项目🌟**
65 | **OCC Opencascade+Qt+C++三维图像建模窗体点线面拾取:**[https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step](https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step)
66 | **Qt+VTK鼠标拾取点生成拉伸闭合三维体:**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes)
67 | **Qt+C++实现的串口通信工具带实时曲线图:**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes]([https://github.com/alicema-creator/SerialPort-Communication)](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/SerialPort-Communication))
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/ultralytics/tracker/utils/matching.py:
--------------------------------------------------------------------------------
1 | import lap
2 | import numpy as np
3 | import scipy
4 | from scipy.spatial.distance import cdist
5 |
6 | from .kalman_filter import chi2inv95
7 |
8 |
9 | def merge_matches(m1, m2, shape):
10 | O, P, Q = shape
11 | m1 = np.asarray(m1)
12 | m2 = np.asarray(m2)
13 |
14 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
15 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
16 |
17 | mask = M1 * M2
18 | match = mask.nonzero()
19 | match = list(zip(match[0], match[1]))
20 | unmatched_O = tuple(set(range(O)) - {i for i, j in match})
21 | unmatched_Q = tuple(set(range(Q)) - {j for i, j in match})
22 |
23 | return match, unmatched_O, unmatched_Q
24 |
25 |
26 | def _indices_to_matches(cost_matrix, indices, thresh):
27 | matched_cost = cost_matrix[tuple(zip(*indices))]
28 | matched_mask = (matched_cost <= thresh)
29 |
30 | matches = indices[matched_mask]
31 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
32 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
33 |
34 | return matches, unmatched_a, unmatched_b
35 |
36 |
37 | def linear_assignment(cost_matrix, thresh):
38 | if cost_matrix.size == 0:
39 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
40 | matches, unmatched_a, unmatched_b = [], [], []
41 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
42 | matches.extend([ix, mx] for ix, mx in enumerate(x) if mx >= 0)
43 | unmatched_a = np.where(x < 0)[0]
44 | unmatched_b = np.where(y < 0)[0]
45 | matches = np.asarray(matches)
46 | return matches, unmatched_a, unmatched_b
47 |
48 |
49 | def ious(atlbrs, btlbrs):
50 | """
51 | Compute cost based on IoU
52 | :type atlbrs: list[tlbr] | np.ndarray
53 | :type atlbrs: list[tlbr] | np.ndarray
54 |
55 | :rtype ious np.ndarray
56 | """
57 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)
58 | if ious.size == 0:
59 | return ious
60 |
61 | ious = bbox_ious(np.ascontiguousarray(atlbrs, dtype=np.float32), np.ascontiguousarray(btlbrs, dtype=np.float32))
62 | return ious
63 |
64 |
65 | def iou_distance(atracks, btracks):
66 | """
67 | Compute cost based on IoU
68 | :type atracks: list[STrack]
69 | :type btracks: list[STrack]
70 |
71 | :rtype cost_matrix np.ndarray
72 | """
73 |
74 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \
75 | or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
76 | atlbrs = atracks
77 | btlbrs = btracks
78 | else:
79 | atlbrs = [track.tlbr for track in atracks]
80 | btlbrs = [track.tlbr for track in btracks]
81 | _ious = ious(atlbrs, btlbrs)
82 | return 1 - _ious # cost matrix
83 |
84 |
85 | def v_iou_distance(atracks, btracks):
86 | """
87 | Compute cost based on IoU
88 | :type atracks: list[STrack]
89 | :type btracks: list[STrack]
90 |
91 | :rtype cost_matrix np.ndarray
92 | """
93 |
94 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \
95 | or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
96 | atlbrs = atracks
97 | btlbrs = btracks
98 | else:
99 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
100 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
101 | _ious = ious(atlbrs, btlbrs)
102 | return 1 - _ious # cost matrix
103 |
104 |
105 | def embedding_distance(tracks, detections, metric='cosine'):
106 | """
107 | :param tracks: list[STrack]
108 | :param detections: list[BaseTrack]
109 | :param metric:
110 | :return: cost_matrix np.ndarray
111 | """
112 |
113 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
114 | if cost_matrix.size == 0:
115 | return cost_matrix
116 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)
117 | # for i, track in enumerate(tracks):
118 | # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
119 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)
120 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
121 | return cost_matrix
122 |
123 |
124 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
125 | if cost_matrix.size == 0:
126 | return cost_matrix
127 | gating_dim = 2 if only_position else 4
128 | gating_threshold = chi2inv95[gating_dim]
129 | measurements = np.asarray([det.to_xyah() for det in detections])
130 | for row, track in enumerate(tracks):
131 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position)
132 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
133 | return cost_matrix
134 |
135 |
136 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
137 | if cost_matrix.size == 0:
138 | return cost_matrix
139 | gating_dim = 2 if only_position else 4
140 | gating_threshold = chi2inv95[gating_dim]
141 | measurements = np.asarray([det.to_xyah() for det in detections])
142 | for row, track in enumerate(tracks):
143 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position, metric='maha')
144 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
145 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
146 | return cost_matrix
147 |
148 |
149 | def fuse_iou(cost_matrix, tracks, detections):
150 | if cost_matrix.size == 0:
151 | return cost_matrix
152 | reid_sim = 1 - cost_matrix
153 | iou_dist = iou_distance(tracks, detections)
154 | iou_sim = 1 - iou_dist
155 | fuse_sim = reid_sim * (1 + iou_sim) / 2
156 | # det_scores = np.array([det.score for det in detections])
157 | # det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
158 | return 1 - fuse_sim # fuse cost
159 |
160 |
161 | def fuse_score(cost_matrix, detections):
162 | if cost_matrix.size == 0:
163 | return cost_matrix
164 | iou_sim = 1 - cost_matrix
165 | det_scores = np.array([det.score for det in detections])
166 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
167 | fuse_sim = iou_sim * det_scores
168 | return 1 - fuse_sim # fuse_cost
169 |
170 |
171 | def bbox_ious(box1, box2, eps=1e-7):
172 | """Boxes are x1y1x2y2
173 | box1: np.array of shape(nx4)
174 | box2: np.array of shape(mx4)
175 | returns: np.array of shape(nxm)
176 | """
177 | # Get the coordinates of bounding boxes
178 | b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
179 | b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
180 |
181 | # Intersection area
182 | inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
183 | (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
184 |
185 | # box2 area
186 | box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
187 | box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
188 | return inter_area / (box2_area + box1_area[:, None] - inter_area + eps)
189 |
--------------------------------------------------------------------------------
/ultralytics/hub/utils.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 |
3 | import os
4 | import platform
5 | import shutil
6 | import sys
7 | import threading
8 | import time
9 | from pathlib import Path
10 | from random import random
11 |
12 | import requests
13 |
14 | from ultralytics.yolo.utils import (DEFAULT_CFG_DICT, ENVIRONMENT, LOGGER, RANK, SETTINGS, TryExcept, __version__,
15 | colorstr, emojis, get_git_origin_url, is_colab, is_git_dir, is_github_actions_ci,
16 | is_pip_package, is_pytest_running)
17 | from ultralytics.yolo.utils.checks import check_online
18 |
19 | PREFIX = colorstr('Ultralytics: ')
20 | HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.'
21 | HUB_API_ROOT = os.environ.get('ULTRALYTICS_HUB_API', 'https://api.ultralytics.com')
22 |
23 |
24 | def check_dataset_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=2.0):
25 | # Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0
26 | gib = 1 << 30 # bytes per GiB
27 | data = int(requests.head(url).headers['Content-Length']) / gib # dataset size (GB)
28 | total, used, free = (x / gib for x in shutil.disk_usage('/')) # bytes
29 | LOGGER.info(f'{PREFIX}{data:.3f} GB dataset, {free:.1f}/{total:.1f} GB free disk space')
30 | if data * sf < free:
31 | return True # sufficient space
32 | LOGGER.warning(f'{PREFIX}WARNING: Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
33 | f'training cancelled ❌. Please free {data * sf - free:.1f} GB additional disk space and try again.')
34 | return False # insufficient space
35 |
36 |
37 | def request_with_credentials(url: str) -> any:
38 | """ Make an ajax request with cookies attached """
39 | if not is_colab():
40 | raise OSError('request_with_credentials() must run in a Colab environment')
41 | from google.colab import output # noqa
42 | from IPython import display # noqa
43 | display.display(
44 | display.Javascript("""
45 | window._hub_tmp = new Promise((resolve, reject) => {
46 | const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000)
47 | fetch("%s", {
48 | method: 'POST',
49 | credentials: 'include'
50 | })
51 | .then((response) => resolve(response.json()))
52 | .then((json) => {
53 | clearTimeout(timeout);
54 | }).catch((err) => {
55 | clearTimeout(timeout);
56 | reject(err);
57 | });
58 | });
59 | """ % url))
60 | return output.eval_js('_hub_tmp')
61 |
62 |
63 | # Deprecated TODO: eliminate this function?
64 | def split_key(key=''):
65 | """
66 | Verify and split a 'api_key[sep]model_id' string, sep is one of '.' or '_'
67 |
68 | Args:
69 | key (str): The model key to split. If not provided, the user will be prompted to enter it.
70 |
71 | Returns:
72 | Tuple[str, str]: A tuple containing the API key and model ID.
73 | """
74 |
75 | import getpass
76 |
77 | error_string = emojis(f'{PREFIX}Invalid API key ⚠️\n') # error string
78 | if not key:
79 | key = getpass.getpass('Enter model key: ')
80 | sep = '_' if '_' in key else '.' if '.' in key else None # separator
81 | assert sep, error_string
82 | api_key, model_id = key.split(sep)
83 | assert len(api_key) and len(model_id), error_string
84 | return api_key, model_id
85 |
86 |
87 | def smart_request(*args, retry=3, timeout=30, thread=True, code=-1, method='post', verbose=True, **kwargs):
88 | """
89 | Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout.
90 |
91 | Args:
92 | *args: Positional arguments to be passed to the requests function specified in method.
93 | retry (int, optional): Number of retries to attempt before giving up. Default is 3.
94 | timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30.
95 | thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True.
96 | code (int, optional): An identifier for the request, used for logging purposes. Default is -1.
97 | method (str, optional): The HTTP method to use for the request. Choices are 'post' and 'get'. Default is 'post'.
98 | verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True.
99 | **kwargs: Keyword arguments to be passed to the requests function specified in method.
100 |
101 | Returns:
102 | requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None.
103 | """
104 | retry_codes = (408, 500) # retry only these codes
105 |
106 | @TryExcept(verbose=verbose)
107 | def func(*func_args, **func_kwargs):
108 | r = None # response
109 | t0 = time.time() # initial time for timer
110 | for i in range(retry + 1):
111 | if (time.time() - t0) > timeout:
112 | break
113 | if method == 'post':
114 | r = requests.post(*func_args, **func_kwargs) # i.e. post(url, data, json, files)
115 | elif method == 'get':
116 | r = requests.get(*func_args, **func_kwargs) # i.e. get(url, data, json, files)
117 | if r.status_code == 200:
118 | break
119 | try:
120 | m = r.json().get('message', 'No JSON message.')
121 | except AttributeError:
122 | m = 'Unable to read JSON.'
123 | if i == 0:
124 | if r.status_code in retry_codes:
125 | m += f' Retrying {retry}x for {timeout}s.' if retry else ''
126 | elif r.status_code == 429: # rate limit
127 | h = r.headers # response headers
128 | m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \
129 | f"Please retry after {h['Retry-After']}s."
130 | if verbose:
131 | LOGGER.warning(f'{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})')
132 | if r.status_code not in retry_codes:
133 | return r
134 | time.sleep(2 ** i) # exponential standoff
135 | return r
136 |
137 | if thread:
138 | threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start()
139 | else:
140 | return func(*args, **kwargs)
141 |
142 |
143 | class Traces:
144 |
145 | def __init__(self):
146 | """
147 | Initialize Traces for error tracking and reporting if tests are not currently running.
148 | """
149 | self.rate_limit = 3.0 # rate limit (seconds)
150 | self.t = 0.0 # rate limit timer (seconds)
151 | self.metadata = {
152 | 'sys_argv_name': Path(sys.argv[0]).name,
153 | 'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other',
154 | 'python': platform.python_version(),
155 | 'release': __version__,
156 | 'environment': ENVIRONMENT}
157 | self.enabled = SETTINGS['sync'] and \
158 | RANK in {-1, 0} and \
159 | check_online() and \
160 | not is_pytest_running() and \
161 | not is_github_actions_ci() and \
162 | (is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git')
163 |
164 | def __call__(self, cfg, all_keys=False, traces_sample_rate=1.0):
165 | """
166 | Sync traces data if enabled in the global settings
167 |
168 | Args:
169 | cfg (IterableSimpleNamespace): Configuration for the task and mode.
170 | all_keys (bool): Sync all items, not just non-default values.
171 | traces_sample_rate (float): Fraction of traces captured from 0.0 to 1.0
172 | """
173 | t = time.time() # current time
174 | if self.enabled and random() < traces_sample_rate and (t - self.t) > self.rate_limit:
175 | self.t = t # reset rate limit timer
176 | cfg = vars(cfg) # convert type from IterableSimpleNamespace to dict
177 | if not all_keys: # filter cfg
178 | include_keys = {'task', 'mode'} # always include
179 | cfg = {
180 | k: (v.split(os.sep)[-1] if isinstance(v, str) and os.sep in v else v)
181 | for k, v in cfg.items() if v != DEFAULT_CFG_DICT.get(k, None) or k in include_keys}
182 | trace = {'uuid': SETTINGS['uuid'], 'cfg': cfg, 'metadata': self.metadata}
183 |
184 | # Send a request to the HUB API to sync analytics
185 | smart_request(f'{HUB_API_ROOT}/v1/usage/anonymous',
186 | json=trace,
187 | headers=None,
188 | code=3,
189 | retry=0,
190 | timeout=1.0,
191 | verbose=False)
192 |
193 |
194 | # Run below code on hub/utils init -------------------------------------------------------------------------------------
195 | traces = Traces()
196 |
--------------------------------------------------------------------------------
/ultralytics/hub/session.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 | import json
3 | import signal
4 | import sys
5 | from pathlib import Path
6 | from time import sleep, time
7 |
8 | import requests
9 |
10 | from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request
11 | from ultralytics.yolo.utils import LOGGER, PREFIX, __version__, emojis, is_colab, threaded
12 | from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params
13 |
14 | AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
15 | session = None
16 |
17 |
18 | class HubTrainingSession:
19 |
20 | def __init__(self, model_id, auth):
21 | self.agent_id = None # identifies which instance is communicating with server
22 | self.model_id = model_id
23 | self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
24 | self.auth_header = auth.get_auth_header()
25 | self._rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds)
26 | self._timers = {} # rate limit timers (seconds)
27 | self._metrics_queue = {} # metrics queue
28 | self.model = self._get_model()
29 | self._start_heartbeat() # start heartbeats
30 | self._register_signal_handlers()
31 |
32 | def _register_signal_handlers(self):
33 | signal.signal(signal.SIGTERM, self._handle_signal)
34 | signal.signal(signal.SIGINT, self._handle_signal)
35 |
36 | def _handle_signal(self, signum, frame):
37 | """
38 | Prevent heartbeats from being sent on Colab after kill.
39 | This method does not use frame, it is included as it is
40 | passed by signal.
41 | """
42 | if self.alive is True:
43 | LOGGER.info(f'{PREFIX}Kill signal received! ❌')
44 | self._stop_heartbeat()
45 | sys.exit(signum)
46 |
47 | def _stop_heartbeat(self):
48 | """End the heartbeat loop"""
49 | self.alive = False
50 |
51 | def upload_metrics(self):
52 | payload = {'metrics': self._metrics_queue.copy(), 'type': 'metrics'}
53 | smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2)
54 |
55 | def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
56 | # Upload a model to HUB
57 | file = None
58 | if Path(weights).is_file():
59 | with open(weights, 'rb') as f:
60 | file = f.read()
61 | if final:
62 | smart_request(
63 | f'{self.api_url}/upload',
64 | data={
65 | 'epoch': epoch,
66 | 'type': 'final',
67 | 'map': map},
68 | files={'best.pt': file},
69 | headers=self.auth_header,
70 | retry=10,
71 | timeout=3600,
72 | code=4,
73 | )
74 | else:
75 | smart_request(
76 | f'{self.api_url}/upload',
77 | data={
78 | 'epoch': epoch,
79 | 'type': 'epoch',
80 | 'isBest': bool(is_best)},
81 | headers=self.auth_header,
82 | files={'last.pt': file},
83 | code=3,
84 | )
85 |
86 | def _get_model(self):
87 | # Returns model from database by id
88 | api_url = f'{HUB_API_ROOT}/v1/models/{self.model_id}'
89 | headers = self.auth_header
90 |
91 | try:
92 | response = smart_request(api_url, method='get', headers=headers, thread=False, code=0)
93 | data = response.json().get('data', None)
94 |
95 | if data.get('status', None) == 'trained':
96 | raise ValueError(
97 | emojis(f'Model is already trained and uploaded to '
98 | f'https://hub.ultralytics.com/models/{self.model_id} 🚀'))
99 |
100 | if not data.get('data', None):
101 | raise ValueError('Dataset may still be processing. Please wait a minute and try again.') # RF fix
102 | self.model_id = data['id']
103 |
104 | # TODO: restore when server keys when dataset URL and GPU train is working
105 |
106 | self.train_args = {
107 | 'batch': data['batch_size'],
108 | 'epochs': data['epochs'],
109 | 'imgsz': data['imgsz'],
110 | 'patience': data['patience'],
111 | 'device': data['device'],
112 | 'cache': data['cache'],
113 | 'data': data['data']}
114 |
115 | self.input_file = data.get('cfg', data['weights'])
116 |
117 | # hack for yolov5 cfg adds u
118 | if 'cfg' in data and 'yolov5' in data['cfg']:
119 | self.input_file = data['cfg'].replace('.yaml', 'u.yaml')
120 |
121 | return data
122 | except requests.exceptions.ConnectionError as e:
123 | raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e
124 | except Exception:
125 | raise
126 |
127 | def check_disk_space(self):
128 | if not check_dataset_disk_space(self.model['data']):
129 | raise MemoryError('Not enough disk space')
130 |
131 | def register_callbacks(self, trainer):
132 | trainer.add_callback('on_pretrain_routine_end', self.on_pretrain_routine_end)
133 | trainer.add_callback('on_fit_epoch_end', self.on_fit_epoch_end)
134 | trainer.add_callback('on_model_save', self.on_model_save)
135 | trainer.add_callback('on_train_end', self.on_train_end)
136 |
137 | def on_pretrain_routine_end(self, trainer):
138 | """
139 | Start timer for upload rate limit.
140 | This method does not use trainer. It is passed to all callbacks by default.
141 | """
142 | # Start timer for upload rate limit
143 | LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀')
144 | self._timers = {'metrics': time(), 'ckpt': time()} # start timer on self.rate_limit
145 |
146 | def on_fit_epoch_end(self, trainer):
147 | # Upload metrics after val end
148 | all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics}
149 |
150 | if trainer.epoch == 0:
151 | model_info = {
152 | 'model/parameters': get_num_params(trainer.model),
153 | 'model/GFLOPs': round(get_flops(trainer.model), 3),
154 | 'model/speed(ms)': round(trainer.validator.speed[1], 3)}
155 | all_plots = {**all_plots, **model_info}
156 | self._metrics_queue[trainer.epoch] = json.dumps(all_plots)
157 | if time() - self._timers['metrics'] > self._rate_limits['metrics']:
158 | self.upload_metrics()
159 | self._timers['metrics'] = time() # reset timer
160 | self._metrics_queue = {} # reset queue
161 |
162 | def on_model_save(self, trainer):
163 | # Upload checkpoints with rate limiting
164 | is_best = trainer.best_fitness == trainer.fitness
165 | if time() - self._timers['ckpt'] > self._rate_limits['ckpt']:
166 | LOGGER.info(f'{PREFIX}Uploading checkpoint {self.model_id}')
167 | self._upload_model(trainer.epoch, trainer.last, is_best)
168 | self._timers['ckpt'] = time() # reset timer
169 |
170 | def on_train_end(self, trainer):
171 | # Upload final model and metrics with exponential standoff
172 | LOGGER.info(f'{PREFIX}Training completed successfully ✅')
173 | LOGGER.info(f'{PREFIX}Uploading final {self.model_id}')
174 |
175 | # hack for fetching mAP
176 | mAP = trainer.metrics.get('metrics/mAP50-95(B)', 0)
177 | self._upload_model(trainer.epoch, trainer.best, map=mAP, final=True) # results[3] is mAP0.5:0.95
178 | self.alive = False # stop heartbeats
179 | LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀')
180 |
181 | def _upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
182 | # Upload a model to HUB
183 | file = None
184 | if Path(weights).is_file():
185 | with open(weights, 'rb') as f:
186 | file = f.read()
187 | file_param = {'best.pt' if final else 'last.pt': file}
188 | endpoint = f'{self.api_url}/upload'
189 | data = {'epoch': epoch}
190 | if final:
191 | data.update({'type': 'final', 'map': map})
192 | else:
193 | data.update({'type': 'epoch', 'isBest': bool(is_best)})
194 |
195 | smart_request(
196 | endpoint,
197 | data=data,
198 | files=file_param,
199 | headers=self.auth_header,
200 | retry=10 if final else None,
201 | timeout=3600 if final else None,
202 | code=4 if final else 3,
203 | )
204 |
205 | @threaded
206 | def _start_heartbeat(self):
207 | self.alive = True
208 | while self.alive:
209 | r = smart_request(
210 | f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',
211 | json={
212 | 'agent': AGENT_NAME,
213 | 'agentId': self.agent_id},
214 | headers=self.auth_header,
215 | retry=0,
216 | code=5,
217 | thread=False,
218 | )
219 | self.agent_id = r.json().get('data', {}).get('agentId', None)
220 | sleep(self._rate_limits['heartbeat'])
221 |
--------------------------------------------------------------------------------
/ultralytics/models/README.md:
--------------------------------------------------------------------------------
1 | ## Models
2 |
3 | Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration
4 | files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted
5 | and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image
6 | segmentation tasks.
7 |
8 | These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like
9 | instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms,
10 | from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this
11 | directory provides a great starting point for your custom model development needs.
12 |
13 | To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've
14 | selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full
15 | details at the Ultralytics [Docs](https://docs.ultralytics.com), and if you need help or have any questions, feel free
16 | to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
17 |
18 | ### Usage
19 |
20 | Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
21 |
22 | ```bash
23 | yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
24 | ```
25 |
26 | They may also be used directly in a Python environment, and accepts the same
27 | [arguments](https://docs.ultralytics.com/cfg/) as in the CLI example above:
28 |
29 | ```python
30 | from ultralytics import YOLO
31 |
32 | model = YOLO("model.yaml") # build a YOLOv8n model from scratch
33 | # YOLO("model.pt") use pre-trained model if available
34 | model.info() # display model information
35 | model.train(data="coco128.yaml", epochs=100) # train the model
36 | ```
37 |
38 | ## Pre-trained Model Architectures
39 |
40 | Ultralytics supports many model architectures. Visit [models](#) page to view detailed information and usage.
41 | Any of these models can be used by loading their configs or pretrained checkpoints if available.
42 |
43 | What to add your model architecture? [Here's](#) how you can contribute
44 |
45 | ### 1. YOLOv8
46 |
47 | **About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics.
48 | **Citation** -
49 | Available Models:
50 |
51 | - Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
52 | - Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
53 | - Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
54 |
55 | Performance
56 |
57 | ### Detection
58 |
59 | | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) |
60 | | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
61 | | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 |
62 | | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 |
63 | | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 |
64 | | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 |
65 | | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 |
66 |
67 | ### Segmentation
68 |
69 | | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) |
70 | | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
71 | | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 |
72 | | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 |
73 | | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 |
74 | | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 |
75 | | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 |
76 |
77 | ### Classification
78 |
79 | | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 |
80 | | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ |
81 | | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 |
82 | | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 |
83 | | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 |
84 | | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 |
85 | | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 |
86 |
87 |
88 |
89 | ### 2. YOLOv5u
90 |
91 | **About** - Anchor-free YOLOv5 models with new detection head and better speed-accuracy tradeoff
92 | **Citation** -
93 | Available Models:
94 |
95 | - Detection - `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`
96 |
97 | Performance
98 |
99 | ### Detection
100 |
101 | | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) |
102 | | -------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
103 | | [YOLOv5nu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 |
104 | | [YOLOv5su](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 |
105 | | [YOLOv5mu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 |
106 | | [YOLOv5lu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 |
107 | | [YOLOv5xu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/ultralytics/nn/autoshape.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 | """
3 | Common modules
4 | """
5 |
6 | from copy import copy
7 | from pathlib import Path
8 |
9 | import cv2
10 | import numpy as np
11 | import pandas as pd
12 | import requests
13 | import torch
14 | import torch.nn as nn
15 | from PIL import Image, ImageOps
16 | from torch.cuda import amp
17 |
18 | from ultralytics.nn.autobackend import AutoBackend
19 | from ultralytics.yolo.data.augment import LetterBox
20 | from ultralytics.yolo.utils import LOGGER, colorstr
21 | from ultralytics.yolo.utils.files import increment_path
22 | from ultralytics.yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh
23 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
24 | from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode
25 |
26 |
27 | class AutoShape(nn.Module):
28 | # YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
29 | conf = 0.25 # NMS confidence threshold
30 | iou = 0.45 # NMS IoU threshold
31 | agnostic = False # NMS class-agnostic
32 | multi_label = False # NMS multiple labels per box
33 | classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
34 | max_det = 1000 # maximum number of detections per image
35 | amp = False # Automatic Mixed Precision (AMP) inference
36 |
37 | def __init__(self, model, verbose=True):
38 | super().__init__()
39 | if verbose:
40 | LOGGER.info('Adding AutoShape... ')
41 | copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
42 | self.dmb = isinstance(model, AutoBackend) # DetectMultiBackend() instance
43 | self.pt = not self.dmb or model.pt # PyTorch model
44 | self.model = model.eval()
45 | if self.pt:
46 | m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
47 | m.inplace = False # Detect.inplace=False for safe multithread inference
48 | m.export = True # do not output loss values
49 |
50 | def _apply(self, fn):
51 | # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
52 | self = super()._apply(fn)
53 | if self.pt:
54 | m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
55 | m.stride = fn(m.stride)
56 | m.grid = list(map(fn, m.grid))
57 | if isinstance(m.anchor_grid, list):
58 | m.anchor_grid = list(map(fn, m.anchor_grid))
59 | return self
60 |
61 | @smart_inference_mode()
62 | def forward(self, ims, size=640, augment=False, profile=False):
63 | # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
64 | # file: ims = 'data/images/zidane.jpg' # str or PosixPath
65 | # URI: = 'https://ultralytics.com/images/zidane.jpg'
66 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
67 | # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
68 | # numpy: = np.zeros((640,1280,3)) # HWC
69 | # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
70 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
71 |
72 | dt = (Profile(), Profile(), Profile())
73 | with dt[0]:
74 | if isinstance(size, int): # expand
75 | size = (size, size)
76 | p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
77 | autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
78 | if isinstance(ims, torch.Tensor): # torch
79 | with amp.autocast(autocast):
80 | return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
81 |
82 | # Pre-process
83 | n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
84 | shape0, shape1, files = [], [], [] # image and inference shapes, filenames
85 | for i, im in enumerate(ims):
86 | f = f'image{i}' # filename
87 | if isinstance(im, (str, Path)): # filename or uri
88 | im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
89 | im = np.asarray(ImageOps.exif_transpose(im))
90 | elif isinstance(im, Image.Image): # PIL Image
91 | im, f = np.asarray(ImageOps.exif_transpose(im)), getattr(im, 'filename', f) or f
92 | files.append(Path(f).with_suffix('.jpg').name)
93 | if im.shape[0] < 5: # image in CHW
94 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
95 | im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
96 | s = im.shape[:2] # HWC
97 | shape0.append(s) # image shape
98 | g = max(size) / max(s) # gain
99 | shape1.append([y * g for y in s])
100 | ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
101 | shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape
102 | x = [LetterBox(shape1, auto=False)(image=im)['img'] for im in ims] # pad
103 | x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
104 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
105 |
106 | with amp.autocast(autocast):
107 | # Inference
108 | with dt[1]:
109 | y = self.model(x, augment=augment) # forward
110 |
111 | # Post-process
112 | with dt[2]:
113 | y = non_max_suppression(y if self.dmb else y[0],
114 | self.conf,
115 | self.iou,
116 | self.classes,
117 | self.agnostic,
118 | self.multi_label,
119 | max_det=self.max_det) # NMS
120 | for i in range(n):
121 | scale_boxes(shape1, y[i][:, :4], shape0[i])
122 |
123 | return Detections(ims, y, files, dt, self.names, x.shape)
124 |
125 |
126 | class Detections:
127 | # YOLOv8 detections class for inference results
128 | def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
129 | super().__init__()
130 | d = pred[0].device # device
131 | gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
132 | self.ims = ims # list of images as numpy arrays
133 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
134 | self.names = names # class names
135 | self.files = files # image filenames
136 | self.times = times # profiling times
137 | self.xyxy = pred # xyxy pixels
138 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
139 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
140 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
141 | self.n = len(self.pred) # number of images (batch size)
142 | self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
143 | self.s = tuple(shape) # inference BCHW shape
144 |
145 | def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
146 | s, crops = '', []
147 | for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
148 | s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
149 | if pred.shape[0]:
150 | for c in pred[:, -1].unique():
151 | n = (pred[:, -1] == c).sum() # detections per class
152 | s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
153 | s = s.rstrip(', ')
154 | if show or save or render or crop:
155 | annotator = Annotator(im, example=str(self.names))
156 | for *box, conf, cls in reversed(pred): # xyxy, confidence, class
157 | label = f'{self.names[int(cls)]} {conf:.2f}'
158 | if crop:
159 | file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
160 | crops.append({
161 | 'box': box,
162 | 'conf': conf,
163 | 'cls': cls,
164 | 'label': label,
165 | 'im': save_one_box(box, im, file=file, save=save)})
166 | else: # all others
167 | annotator.box_label(box, label if labels else '', color=colors(cls))
168 | im = annotator.im
169 | else:
170 | s += '(no detections)'
171 |
172 | im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
173 | if show:
174 | im.show(self.files[i]) # show
175 | if save:
176 | f = self.files[i]
177 | im.save(save_dir / f) # save
178 | if i == self.n - 1:
179 | LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
180 | if render:
181 | self.ims[i] = np.asarray(im)
182 | if pprint:
183 | s = s.lstrip('\n')
184 | return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
185 | if crop:
186 | if save:
187 | LOGGER.info(f'Saved results to {save_dir}\n')
188 | return crops
189 |
190 | def show(self, labels=True):
191 | self._run(show=True, labels=labels) # show results
192 |
193 | def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
194 | save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
195 | self._run(save=True, labels=labels, save_dir=save_dir) # save results
196 |
197 | def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
198 | save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
199 | return self._run(crop=True, save=save, save_dir=save_dir) # crop results
200 |
201 | def render(self, labels=True):
202 | self._run(render=True, labels=labels) # render results
203 | return self.ims
204 |
205 | def pandas(self):
206 | # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
207 | new = copy(self) # return copy
208 | ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
209 | cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
210 | for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
211 | a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
212 | setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
213 | return new
214 |
215 | def tolist(self):
216 | # return a list of Detections objects, i.e. 'for result in results.tolist():'
217 | r = range(self.n) # iterable
218 | x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
219 | # for d in x:
220 | # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
221 | # setattr(d, k, getattr(d, k)[0]) # pop out of list
222 | return x
223 |
224 | def print(self):
225 | LOGGER.info(self.__str__())
226 |
227 | def __len__(self): # override len(results)
228 | return self.n
229 |
230 | def __str__(self): # override print(results)
231 | return self._run(pprint=True) # print results
232 |
233 | def __repr__(self):
234 | return f'YOLOv8 {self.__class__} instance\n' + self.__str__()
235 |
236 |
237 | print('works')
238 |
--------------------------------------------------------------------------------
/ultralytics/tracker/utils/gmc.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import cv2
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 |
7 |
8 | class GMC:
9 |
10 | def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
11 | super().__init__()
12 |
13 | self.method = method
14 | self.downscale = max(1, int(downscale))
15 |
16 | if self.method == 'orb':
17 | self.detector = cv2.FastFeatureDetector_create(20)
18 | self.extractor = cv2.ORB_create()
19 | self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
20 |
21 | elif self.method == 'sift':
22 | self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
23 | self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
24 | self.matcher = cv2.BFMatcher(cv2.NORM_L2)
25 |
26 | elif self.method == 'ecc':
27 | number_of_iterations = 5000
28 | termination_eps = 1e-6
29 | self.warp_mode = cv2.MOTION_EUCLIDEAN
30 | self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
31 |
32 | elif self.method == 'sparseOptFlow':
33 | self.feature_params = dict(maxCorners=1000,
34 | qualityLevel=0.01,
35 | minDistance=1,
36 | blockSize=3,
37 | useHarrisDetector=False,
38 | k=0.04)
39 | # self.gmc_file = open('GMC_results.txt', 'w')
40 |
41 | elif self.method in ['file', 'files']:
42 | seqName = verbose[0]
43 | ablation = verbose[1]
44 | if ablation:
45 | filePath = r'tracker/GMC_files/MOT17_ablation'
46 | else:
47 | filePath = r'tracker/GMC_files/MOTChallenge'
48 |
49 | if '-FRCNN' in seqName:
50 | seqName = seqName[:-6]
51 | elif '-DPM' in seqName or '-SDP' in seqName:
52 | seqName = seqName[:-4]
53 | self.gmcFile = open(f'{filePath}/GMC-{seqName}.txt')
54 |
55 | if self.gmcFile is None:
56 | raise ValueError(f'Error: Unable to open GMC file in directory:{filePath}')
57 | elif self.method in ['none', 'None']:
58 | self.method = 'none'
59 | else:
60 | raise ValueError(f'Error: Unknown CMC method:{method}')
61 |
62 | self.prevFrame = None
63 | self.prevKeyPoints = None
64 | self.prevDescriptors = None
65 |
66 | self.initializedFirstFrame = False
67 |
68 | def apply(self, raw_frame, detections=None):
69 | if self.method in ['orb', 'sift']:
70 | return self.applyFeaures(raw_frame, detections)
71 | elif self.method == 'ecc':
72 | return self.applyEcc(raw_frame, detections)
73 | elif self.method == 'sparseOptFlow':
74 | return self.applySparseOptFlow(raw_frame, detections)
75 | elif self.method == 'file':
76 | return self.applyFile(raw_frame, detections)
77 | elif self.method == 'none':
78 | return np.eye(2, 3)
79 | else:
80 | return np.eye(2, 3)
81 |
82 | def applyEcc(self, raw_frame, detections=None):
83 |
84 | # Initialize
85 | height, width, _ = raw_frame.shape
86 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
87 | H = np.eye(2, 3, dtype=np.float32)
88 |
89 | # Downscale image (TODO: consider using pyramids)
90 | if self.downscale > 1.0:
91 | frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
92 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
93 | width = width // self.downscale
94 | height = height // self.downscale
95 |
96 | # Handle first frame
97 | if not self.initializedFirstFrame:
98 | # Initialize data
99 | self.prevFrame = frame.copy()
100 |
101 | # Initialization done
102 | self.initializedFirstFrame = True
103 |
104 | return H
105 |
106 | # Run the ECC algorithm. The results are stored in warp_matrix.
107 | # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
108 | try:
109 | (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
110 | except Exception as e:
111 | print(f'Warning: find transform failed. Set warp as identity {e}')
112 |
113 | return H
114 |
115 | def applyFeaures(self, raw_frame, detections=None):
116 |
117 | # Initialize
118 | height, width, _ = raw_frame.shape
119 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
120 | H = np.eye(2, 3)
121 |
122 | # Downscale image (TODO: consider using pyramids)
123 | if self.downscale > 1.0:
124 | # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
125 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
126 | width = width // self.downscale
127 | height = height // self.downscale
128 |
129 | # find the keypoints
130 | mask = np.zeros_like(frame)
131 | # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
132 | mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
133 | if detections is not None:
134 | for det in detections:
135 | tlbr = (det[:4] / self.downscale).astype(np.int_)
136 | mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
137 |
138 | keypoints = self.detector.detect(frame, mask)
139 |
140 | # compute the descriptors
141 | keypoints, descriptors = self.extractor.compute(frame, keypoints)
142 |
143 | # Handle first frame
144 | if not self.initializedFirstFrame:
145 | # Initialize data
146 | self.prevFrame = frame.copy()
147 | self.prevKeyPoints = copy.copy(keypoints)
148 | self.prevDescriptors = copy.copy(descriptors)
149 |
150 | # Initialization done
151 | self.initializedFirstFrame = True
152 |
153 | return H
154 |
155 | # Match descriptors.
156 | knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
157 |
158 | # Filtered matches based on smallest spatial distance
159 | matches = []
160 | spatialDistances = []
161 |
162 | maxSpatialDistance = 0.25 * np.array([width, height])
163 |
164 | # Handle empty matches case
165 | if len(knnMatches) == 0:
166 | # Store to next iteration
167 | self.prevFrame = frame.copy()
168 | self.prevKeyPoints = copy.copy(keypoints)
169 | self.prevDescriptors = copy.copy(descriptors)
170 |
171 | return H
172 |
173 | for m, n in knnMatches:
174 | if m.distance < 0.9 * n.distance:
175 | prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
176 | currKeyPointLocation = keypoints[m.trainIdx].pt
177 |
178 | spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
179 | prevKeyPointLocation[1] - currKeyPointLocation[1])
180 |
181 | if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
182 | (np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
183 | spatialDistances.append(spatialDistance)
184 | matches.append(m)
185 |
186 | meanSpatialDistances = np.mean(spatialDistances, 0)
187 | stdSpatialDistances = np.std(spatialDistances, 0)
188 |
189 | inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
190 |
191 | goodMatches = []
192 | prevPoints = []
193 | currPoints = []
194 | for i in range(len(matches)):
195 | if inliesrs[i, 0] and inliesrs[i, 1]:
196 | goodMatches.append(matches[i])
197 | prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
198 | currPoints.append(keypoints[matches[i].trainIdx].pt)
199 |
200 | prevPoints = np.array(prevPoints)
201 | currPoints = np.array(currPoints)
202 |
203 | # Draw the keypoint matches on the output image
204 | if 0:
205 | matches_img = np.hstack((self.prevFrame, frame))
206 | matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
207 | W = np.size(self.prevFrame, 1)
208 | for m in goodMatches:
209 | prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
210 | curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
211 | curr_pt[0] += W
212 | color = np.random.randint(0, 255, (3,))
213 | color = (int(color[0]), int(color[1]), int(color[2]))
214 |
215 | matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
216 | matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
217 | matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
218 |
219 | plt.figure()
220 | plt.imshow(matches_img)
221 | plt.show()
222 |
223 | # Find rigid matrix
224 | if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
225 | H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
226 |
227 | # Handle downscale
228 | if self.downscale > 1.0:
229 | H[0, 2] *= self.downscale
230 | H[1, 2] *= self.downscale
231 | else:
232 | print('Warning: not enough matching points')
233 |
234 | # Store to next iteration
235 | self.prevFrame = frame.copy()
236 | self.prevKeyPoints = copy.copy(keypoints)
237 | self.prevDescriptors = copy.copy(descriptors)
238 |
239 | return H
240 |
241 | def applySparseOptFlow(self, raw_frame, detections=None):
242 | # Initialize
243 | # t0 = time.time()
244 | height, width, _ = raw_frame.shape
245 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
246 | H = np.eye(2, 3)
247 |
248 | # Downscale image
249 | if self.downscale > 1.0:
250 | # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
251 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
252 |
253 | # find the keypoints
254 | keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
255 |
256 | # Handle first frame
257 | if not self.initializedFirstFrame:
258 | # Initialize data
259 | self.prevFrame = frame.copy()
260 | self.prevKeyPoints = copy.copy(keypoints)
261 |
262 | # Initialization done
263 | self.initializedFirstFrame = True
264 |
265 | return H
266 |
267 | # find correspondences
268 | matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
269 |
270 | # leave good correspondences only
271 | prevPoints = []
272 | currPoints = []
273 |
274 | for i in range(len(status)):
275 | if status[i]:
276 | prevPoints.append(self.prevKeyPoints[i])
277 | currPoints.append(matchedKeypoints[i])
278 |
279 | prevPoints = np.array(prevPoints)
280 | currPoints = np.array(currPoints)
281 |
282 | # Find rigid matrix
283 | if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
284 | H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
285 |
286 | # Handle downscale
287 | if self.downscale > 1.0:
288 | H[0, 2] *= self.downscale
289 | H[1, 2] *= self.downscale
290 | else:
291 | print('Warning: not enough matching points')
292 |
293 | # Store to next iteration
294 | self.prevFrame = frame.copy()
295 | self.prevKeyPoints = copy.copy(keypoints)
296 |
297 | # gmc_line = str(1000 * (time.time() - t0)) + "\t" + str(H[0, 0]) + "\t" + str(H[0, 1]) + "\t" + str(
298 | # H[0, 2]) + "\t" + str(H[1, 0]) + "\t" + str(H[1, 1]) + "\t" + str(H[1, 2]) + "\n"
299 | # self.gmc_file.write(gmc_line)
300 |
301 | return H
302 |
303 | def applyFile(self, raw_frame, detections=None):
304 | line = self.gmcFile.readline()
305 | tokens = line.split('\t')
306 | H = np.eye(2, 3, dtype=np.float_)
307 | H[0, 0] = float(tokens[1])
308 | H[0, 1] = float(tokens[2])
309 | H[0, 2] = float(tokens[3])
310 | H[1, 0] = float(tokens[4])
311 | H[1, 1] = float(tokens[5])
312 | H[1, 2] = float(tokens[6])
313 |
314 | return H
315 |
--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/byte_tracker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..utils import matching
4 | from ..utils.kalman_filter import KalmanFilterXYAH
5 | from .basetrack import BaseTrack, TrackState
6 |
7 |
8 | class STrack(BaseTrack):
9 | shared_kalman = KalmanFilterXYAH()
10 |
11 | def __init__(self, tlwh, score, cls):
12 |
13 | # wait activate
14 | self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
15 | self.kalman_filter = None
16 | self.mean, self.covariance = None, None
17 | self.is_activated = False
18 |
19 | self.score = score
20 | self.tracklet_len = 0
21 | self.cls = cls
22 | self.idx = tlwh[-1]
23 |
24 | def predict(self):
25 | mean_state = self.mean.copy()
26 | if self.state != TrackState.Tracked:
27 | mean_state[7] = 0
28 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
29 |
30 | @staticmethod
31 | def multi_predict(stracks):
32 | if len(stracks) <= 0:
33 | return
34 | multi_mean = np.asarray([st.mean.copy() for st in stracks])
35 | multi_covariance = np.asarray([st.covariance for st in stracks])
36 | for i, st in enumerate(stracks):
37 | if st.state != TrackState.Tracked:
38 | multi_mean[i][7] = 0
39 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
40 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
41 | stracks[i].mean = mean
42 | stracks[i].covariance = cov
43 |
44 | @staticmethod
45 | def multi_gmc(stracks, H=np.eye(2, 3)):
46 | if len(stracks) > 0:
47 | multi_mean = np.asarray([st.mean.copy() for st in stracks])
48 | multi_covariance = np.asarray([st.covariance for st in stracks])
49 |
50 | R = H[:2, :2]
51 | R8x8 = np.kron(np.eye(4, dtype=float), R)
52 | t = H[:2, 2]
53 |
54 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
55 | mean = R8x8.dot(mean)
56 | mean[:2] += t
57 | cov = R8x8.dot(cov).dot(R8x8.transpose())
58 |
59 | stracks[i].mean = mean
60 | stracks[i].covariance = cov
61 |
62 | def activate(self, kalman_filter, frame_id):
63 | """Start a new tracklet"""
64 | self.kalman_filter = kalman_filter
65 | self.track_id = self.next_id()
66 | self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
67 |
68 | self.tracklet_len = 0
69 | self.state = TrackState.Tracked
70 | if frame_id == 1:
71 | self.is_activated = True
72 | self.frame_id = frame_id
73 | self.start_frame = frame_id
74 |
75 | def re_activate(self, new_track, frame_id, new_id=False):
76 | self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
77 | self.convert_coords(new_track.tlwh))
78 | self.tracklet_len = 0
79 | self.state = TrackState.Tracked
80 | self.is_activated = True
81 | self.frame_id = frame_id
82 | if new_id:
83 | self.track_id = self.next_id()
84 | self.score = new_track.score
85 | self.cls = new_track.cls
86 | self.idx = new_track.idx
87 |
88 | def update(self, new_track, frame_id):
89 | """
90 | Update a matched track
91 | :type new_track: STrack
92 | :type frame_id: int
93 | :type update_feature: bool
94 | :return:
95 | """
96 | self.frame_id = frame_id
97 | self.tracklet_len += 1
98 |
99 | new_tlwh = new_track.tlwh
100 | self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
101 | self.convert_coords(new_tlwh))
102 | self.state = TrackState.Tracked
103 | self.is_activated = True
104 |
105 | self.score = new_track.score
106 | self.cls = new_track.cls
107 | self.idx = new_track.idx
108 |
109 | def convert_coords(self, tlwh):
110 | return self.tlwh_to_xyah(tlwh)
111 |
112 | @property
113 | def tlwh(self):
114 | """Get current position in bounding box format `(top left x, top left y,
115 | width, height)`.
116 | """
117 | if self.mean is None:
118 | return self._tlwh.copy()
119 | ret = self.mean[:4].copy()
120 | ret[2] *= ret[3]
121 | ret[:2] -= ret[2:] / 2
122 | return ret
123 |
124 | @property
125 | def tlbr(self):
126 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
127 | `(top left, bottom right)`.
128 | """
129 | ret = self.tlwh.copy()
130 | ret[2:] += ret[:2]
131 | return ret
132 |
133 | @staticmethod
134 | def tlwh_to_xyah(tlwh):
135 | """Convert bounding box to format `(center x, center y, aspect ratio,
136 | height)`, where the aspect ratio is `width / height`.
137 | """
138 | ret = np.asarray(tlwh).copy()
139 | ret[:2] += ret[2:] / 2
140 | ret[2] /= ret[3]
141 | return ret
142 |
143 | @staticmethod
144 | def tlbr_to_tlwh(tlbr):
145 | ret = np.asarray(tlbr).copy()
146 | ret[2:] -= ret[:2]
147 | return ret
148 |
149 | @staticmethod
150 | def tlwh_to_tlbr(tlwh):
151 | ret = np.asarray(tlwh).copy()
152 | ret[2:] += ret[:2]
153 | return ret
154 |
155 | def __repr__(self):
156 | return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
157 |
158 |
159 | class BYTETracker:
160 |
161 | def __init__(self, args, frame_rate=30):
162 | self.tracked_stracks = [] # type: list[STrack]
163 | self.lost_stracks = [] # type: list[STrack]
164 | self.removed_stracks = [] # type: list[STrack]
165 |
166 | self.frame_id = 0
167 | self.args = args
168 | self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
169 | self.kalman_filter = self.get_kalmanfilter()
170 |
171 | def update(self, results, img=None):
172 | self.frame_id += 1
173 | activated_starcks = []
174 | refind_stracks = []
175 | lost_stracks = []
176 | removed_stracks = []
177 |
178 | scores = results.conf
179 | bboxes = results.xyxy
180 | # add index
181 | bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
182 | cls = results.cls
183 |
184 | remain_inds = scores > self.args.track_high_thresh
185 | inds_low = scores > self.args.track_low_thresh
186 | inds_high = scores < self.args.track_high_thresh
187 |
188 | inds_second = np.logical_and(inds_low, inds_high)
189 | dets_second = bboxes[inds_second]
190 | dets = bboxes[remain_inds]
191 | scores_keep = scores[remain_inds]
192 | scores_second = scores[inds_second]
193 | cls_keep = cls[remain_inds]
194 | cls_second = cls[inds_second]
195 |
196 | detections = self.init_track(dets, scores_keep, cls_keep, img)
197 | """ Add newly detected tracklets to tracked_stracks"""
198 | unconfirmed = []
199 | tracked_stracks = [] # type: list[STrack]
200 | for track in self.tracked_stracks:
201 | if not track.is_activated:
202 | unconfirmed.append(track)
203 | else:
204 | tracked_stracks.append(track)
205 | """ Step 2: First association, with high score detection boxes"""
206 | strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
207 | # Predict the current location with KF
208 | self.multi_predict(strack_pool)
209 | if hasattr(self, 'gmc'):
210 | warp = self.gmc.apply(img, dets)
211 | STrack.multi_gmc(strack_pool, warp)
212 | STrack.multi_gmc(unconfirmed, warp)
213 |
214 | dists = self.get_dists(strack_pool, detections)
215 | matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
216 |
217 | for itracked, idet in matches:
218 | track = strack_pool[itracked]
219 | det = detections[idet]
220 | if track.state == TrackState.Tracked:
221 | track.update(det, self.frame_id)
222 | activated_starcks.append(track)
223 | else:
224 | track.re_activate(det, self.frame_id, new_id=False)
225 | refind_stracks.append(track)
226 | """ Step 3: Second association, with low score detection boxes"""
227 | # association the untrack to the low score detections
228 | detections_second = self.init_track(dets_second, scores_second, cls_second, img)
229 | r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
230 | # TODO
231 | dists = matching.iou_distance(r_tracked_stracks, detections_second)
232 | matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
233 | for itracked, idet in matches:
234 | track = r_tracked_stracks[itracked]
235 | det = detections_second[idet]
236 | if track.state == TrackState.Tracked:
237 | track.update(det, self.frame_id)
238 | activated_starcks.append(track)
239 | else:
240 | track.re_activate(det, self.frame_id, new_id=False)
241 | refind_stracks.append(track)
242 |
243 | for it in u_track:
244 | track = r_tracked_stracks[it]
245 | if track.state != TrackState.Lost:
246 | track.mark_lost()
247 | lost_stracks.append(track)
248 | """Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
249 | detections = [detections[i] for i in u_detection]
250 | dists = self.get_dists(unconfirmed, detections)
251 | matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
252 | for itracked, idet in matches:
253 | unconfirmed[itracked].update(detections[idet], self.frame_id)
254 | activated_starcks.append(unconfirmed[itracked])
255 | for it in u_unconfirmed:
256 | track = unconfirmed[it]
257 | track.mark_removed()
258 | removed_stracks.append(track)
259 | """ Step 4: Init new stracks"""
260 | for inew in u_detection:
261 | track = detections[inew]
262 | if track.score < self.args.new_track_thresh:
263 | continue
264 | track.activate(self.kalman_filter, self.frame_id)
265 | activated_starcks.append(track)
266 | """ Step 5: Update state"""
267 | for track in self.lost_stracks:
268 | if self.frame_id - track.end_frame > self.max_time_lost:
269 | track.mark_removed()
270 | removed_stracks.append(track)
271 |
272 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
273 | self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_starcks)
274 | self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
275 | self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
276 | self.lost_stracks.extend(lost_stracks)
277 | self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
278 | self.removed_stracks.extend(removed_stracks)
279 | self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
280 | output = [
281 | track.tlbr.tolist() + [track.track_id, track.score, track.cls, track.idx] for track in self.tracked_stracks
282 | if track.is_activated]
283 | return np.asarray(output, dtype=np.float32)
284 |
285 | def get_kalmanfilter(self):
286 | return KalmanFilterXYAH()
287 |
288 | def init_track(self, dets, scores, cls, img=None):
289 | return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
290 |
291 | def get_dists(self, tracks, detections):
292 | dists = matching.iou_distance(tracks, detections)
293 | # TODO: mot20
294 | # if not self.args.mot20:
295 | dists = matching.fuse_score(dists, detections)
296 | return dists
297 |
298 | def multi_predict(self, tracks):
299 | STrack.multi_predict(tracks)
300 |
301 | @staticmethod
302 | def joint_stracks(tlista, tlistb):
303 | exists = {}
304 | res = []
305 | for t in tlista:
306 | exists[t.track_id] = 1
307 | res.append(t)
308 | for t in tlistb:
309 | tid = t.track_id
310 | if not exists.get(tid, 0):
311 | exists[tid] = 1
312 | res.append(t)
313 | return res
314 |
315 | @staticmethod
316 | def sub_stracks(tlista, tlistb):
317 | stracks = {t.track_id: t for t in tlista}
318 | for t in tlistb:
319 | tid = t.track_id
320 | if stracks.get(tid, 0):
321 | del stracks[tid]
322 | return list(stracks.values())
323 |
324 | @staticmethod
325 | def remove_duplicate_stracks(stracksa, stracksb):
326 | pdist = matching.iou_distance(stracksa, stracksb)
327 | pairs = np.where(pdist < 0.15)
328 | dupa, dupb = [], []
329 | for p, q in zip(*pairs):
330 | timep = stracksa[p].frame_id - stracksa[p].start_frame
331 | timeq = stracksb[q].frame_id - stracksb[q].start_frame
332 | if timep > timeq:
333 | dupb.append(q)
334 | else:
335 | dupa.append(p)
336 | resa = [t for i, t in enumerate(stracksa) if i not in dupa]
337 | resb = [t for i, t in enumerate(stracksb) if i not in dupb]
338 | return resa, resb
339 |
--------------------------------------------------------------------------------
/ultralytics/tracker/utils/kalman_filter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.linalg
3 |
4 | # Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9)
5 | # Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold.
6 | chi2inv95 = {1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919}
7 |
8 |
9 | class KalmanFilterXYAH:
10 | """
11 | For bytetrack
12 | A simple Kalman filter for tracking bounding boxes in image space.
13 |
14 | The 8-dimensional state space
15 |
16 | x, y, a, h, vx, vy, va, vh
17 |
18 | contains the bounding box center position (x, y), aspect ratio a, height h,
19 | and their respective velocities.
20 |
21 | Object motion follows a constant velocity model. The bounding box location
22 | (x, y, a, h) is taken as direct observation of the state space (linear
23 | observation model).
24 |
25 | """
26 |
27 | def __init__(self):
28 | ndim, dt = 4, 1.
29 |
30 | # Create Kalman filter model matrices.
31 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
32 | for i in range(ndim):
33 | self._motion_mat[i, ndim + i] = dt
34 | self._update_mat = np.eye(ndim, 2 * ndim)
35 |
36 | # Motion and observation uncertainty are chosen relative to the current
37 | # state estimate. These weights control the amount of uncertainty in
38 | # the model. This is a bit hacky.
39 | self._std_weight_position = 1. / 20
40 | self._std_weight_velocity = 1. / 160
41 |
42 | def initiate(self, measurement):
43 | """Create track from unassociated measurement.
44 |
45 | Parameters
46 | ----------
47 | measurement : ndarray
48 | Bounding box coordinates (x, y, a, h) with center position (x, y),
49 | aspect ratio a, and height h.
50 |
51 | Returns
52 | -------
53 | (ndarray, ndarray)
54 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
55 | dimensional) of the new track. Unobserved velocities are initialized
56 | to 0 mean.
57 |
58 | """
59 | mean_pos = measurement
60 | mean_vel = np.zeros_like(mean_pos)
61 | mean = np.r_[mean_pos, mean_vel]
62 |
63 | std = [
64 | 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2,
65 | 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3],
66 | 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]]
67 | covariance = np.diag(np.square(std))
68 | return mean, covariance
69 |
70 | def predict(self, mean, covariance):
71 | """Run Kalman filter prediction step.
72 |
73 | Parameters
74 | ----------
75 | mean : ndarray
76 | The 8 dimensional mean vector of the object state at the previous
77 | time step.
78 | covariance : ndarray
79 | The 8x8 dimensional covariance matrix of the object state at the
80 | previous time step.
81 |
82 | Returns
83 | -------
84 | (ndarray, ndarray)
85 | Returns the mean vector and covariance matrix of the predicted
86 | state. Unobserved velocities are initialized to 0 mean.
87 |
88 | """
89 | std_pos = [
90 | self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2,
91 | self._std_weight_position * mean[3]]
92 | std_vel = [
93 | self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5,
94 | self._std_weight_velocity * mean[3]]
95 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
96 |
97 | # mean = np.dot(self._motion_mat, mean)
98 | mean = np.dot(mean, self._motion_mat.T)
99 | covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
100 |
101 | return mean, covariance
102 |
103 | def project(self, mean, covariance):
104 | """Project state distribution to measurement space.
105 |
106 | Parameters
107 | ----------
108 | mean : ndarray
109 | The state's mean vector (8 dimensional array).
110 | covariance : ndarray
111 | The state's covariance matrix (8x8 dimensional).
112 |
113 | Returns
114 | -------
115 | (ndarray, ndarray)
116 | Returns the projected mean and covariance matrix of the given state
117 | estimate.
118 |
119 | """
120 | std = [
121 | self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1,
122 | self._std_weight_position * mean[3]]
123 | innovation_cov = np.diag(np.square(std))
124 |
125 | mean = np.dot(self._update_mat, mean)
126 | covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
127 | return mean, covariance + innovation_cov
128 |
129 | def multi_predict(self, mean, covariance):
130 | """Run Kalman filter prediction step (Vectorized version).
131 | Parameters
132 | ----------
133 | mean : ndarray
134 | The Nx8 dimensional mean matrix of the object states at the previous
135 | time step.
136 | covariance : ndarray
137 | The Nx8x8 dimensional covariance matrics of the object states at the
138 | previous time step.
139 | Returns
140 | -------
141 | (ndarray, ndarray)
142 | Returns the mean vector and covariance matrix of the predicted
143 | state. Unobserved velocities are initialized to 0 mean.
144 | """
145 | std_pos = [
146 | self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3],
147 | 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]]
148 | std_vel = [
149 | self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3],
150 | 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]]
151 | sqr = np.square(np.r_[std_pos, std_vel]).T
152 |
153 | motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
154 | motion_cov = np.asarray(motion_cov)
155 |
156 | mean = np.dot(mean, self._motion_mat.T)
157 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
158 | covariance = np.dot(left, self._motion_mat.T) + motion_cov
159 |
160 | return mean, covariance
161 |
162 | def update(self, mean, covariance, measurement):
163 | """Run Kalman filter correction step.
164 |
165 | Parameters
166 | ----------
167 | mean : ndarray
168 | The predicted state's mean vector (8 dimensional).
169 | covariance : ndarray
170 | The state's covariance matrix (8x8 dimensional).
171 | measurement : ndarray
172 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
173 | is the center position, a the aspect ratio, and h the height of the
174 | bounding box.
175 |
176 | Returns
177 | -------
178 | (ndarray, ndarray)
179 | Returns the measurement-corrected state distribution.
180 |
181 | """
182 | projected_mean, projected_cov = self.project(mean, covariance)
183 |
184 | chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
185 | kalman_gain = scipy.linalg.cho_solve((chol_factor, lower),
186 | np.dot(covariance, self._update_mat.T).T,
187 | check_finite=False).T
188 | innovation = measurement - projected_mean
189 |
190 | new_mean = mean + np.dot(innovation, kalman_gain.T)
191 | new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
192 | return new_mean, new_covariance
193 |
194 | def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
195 | """Compute gating distance between state distribution and measurements.
196 | A suitable distance threshold can be obtained from `chi2inv95`. If
197 | `only_position` is False, the chi-square distribution has 4 degrees of
198 | freedom, otherwise 2.
199 | Parameters
200 | ----------
201 | mean : ndarray
202 | Mean vector over the state distribution (8 dimensional).
203 | covariance : ndarray
204 | Covariance of the state distribution (8x8 dimensional).
205 | measurements : ndarray
206 | An Nx4 dimensional matrix of N measurements, each in
207 | format (x, y, a, h) where (x, y) is the bounding box center
208 | position, a the aspect ratio, and h the height.
209 | only_position : Optional[bool]
210 | If True, distance computation is done with respect to the bounding
211 | box center position only.
212 | Returns
213 | -------
214 | ndarray
215 | Returns an array of length N, where the i-th element contains the
216 | squared Mahalanobis distance between (mean, covariance) and
217 | `measurements[i]`.
218 | """
219 | mean, covariance = self.project(mean, covariance)
220 | if only_position:
221 | mean, covariance = mean[:2], covariance[:2, :2]
222 | measurements = measurements[:, :2]
223 |
224 | d = measurements - mean
225 | if metric == 'gaussian':
226 | return np.sum(d * d, axis=1)
227 | elif metric == 'maha':
228 | cholesky_factor = np.linalg.cholesky(covariance)
229 | z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
230 | return np.sum(z * z, axis=0) # square maha
231 | else:
232 | raise ValueError('invalid distance metric')
233 |
234 |
235 | class KalmanFilterXYWH:
236 | """
237 | For bot-sort
238 | A simple Kalman filter for tracking bounding boxes in image space.
239 |
240 | The 8-dimensional state space
241 |
242 | x, y, w, h, vx, vy, vw, vh
243 |
244 | contains the bounding box center position (x, y), width w, height h,
245 | and their respective velocities.
246 |
247 | Object motion follows a constant velocity model. The bounding box location
248 | (x, y, w, h) is taken as direct observation of the state space (linear
249 | observation model).
250 |
251 | """
252 |
253 | def __init__(self):
254 | ndim, dt = 4, 1.
255 |
256 | # Create Kalman filter model matrices.
257 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
258 | for i in range(ndim):
259 | self._motion_mat[i, ndim + i] = dt
260 | self._update_mat = np.eye(ndim, 2 * ndim)
261 |
262 | # Motion and observation uncertainty are chosen relative to the current
263 | # state estimate. These weights control the amount of uncertainty in
264 | # the model. This is a bit hacky.
265 | self._std_weight_position = 1. / 20
266 | self._std_weight_velocity = 1. / 160
267 |
268 | def initiate(self, measurement):
269 | """Create track from unassociated measurement.
270 |
271 | Parameters
272 | ----------
273 | measurement : ndarray
274 | Bounding box coordinates (x, y, w, h) with center position (x, y),
275 | width w, and height h.
276 |
277 | Returns
278 | -------
279 | (ndarray, ndarray)
280 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
281 | dimensional) of the new track. Unobserved velocities are initialized
282 | to 0 mean.
283 |
284 | """
285 | mean_pos = measurement
286 | mean_vel = np.zeros_like(mean_pos)
287 | mean = np.r_[mean_pos, mean_vel]
288 |
289 | std = [
290 | 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
291 | 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
292 | 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3],
293 | 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3]]
294 | covariance = np.diag(np.square(std))
295 | return mean, covariance
296 |
297 | def predict(self, mean, covariance):
298 | """Run Kalman filter prediction step.
299 |
300 | Parameters
301 | ----------
302 | mean : ndarray
303 | The 8 dimensional mean vector of the object state at the previous
304 | time step.
305 | covariance : ndarray
306 | The 8x8 dimensional covariance matrix of the object state at the
307 | previous time step.
308 |
309 | Returns
310 | -------
311 | (ndarray, ndarray)
312 | Returns the mean vector and covariance matrix of the predicted
313 | state. Unobserved velocities are initialized to 0 mean.
314 |
315 | """
316 | std_pos = [
317 | self._std_weight_position * mean[2], self._std_weight_position * mean[3],
318 | self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
319 | std_vel = [
320 | self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3],
321 | self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3]]
322 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
323 |
324 | mean = np.dot(mean, self._motion_mat.T)
325 | covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
326 |
327 | return mean, covariance
328 |
329 | def project(self, mean, covariance):
330 | """Project state distribution to measurement space.
331 |
332 | Parameters
333 | ----------
334 | mean : ndarray
335 | The state's mean vector (8 dimensional array).
336 | covariance : ndarray
337 | The state's covariance matrix (8x8 dimensional).
338 |
339 | Returns
340 | -------
341 | (ndarray, ndarray)
342 | Returns the projected mean and covariance matrix of the given state
343 | estimate.
344 |
345 | """
346 | std = [
347 | self._std_weight_position * mean[2], self._std_weight_position * mean[3],
348 | self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
349 | innovation_cov = np.diag(np.square(std))
350 |
351 | mean = np.dot(self._update_mat, mean)
352 | covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
353 | return mean, covariance + innovation_cov
354 |
355 | def multi_predict(self, mean, covariance):
356 | """Run Kalman filter prediction step (Vectorized version).
357 | Parameters
358 | ----------
359 | mean : ndarray
360 | The Nx8 dimensional mean matrix of the object states at the previous
361 | time step.
362 | covariance : ndarray
363 | The Nx8x8 dimensional covariance matrics of the object states at the
364 | previous time step.
365 | Returns
366 | -------
367 | (ndarray, ndarray)
368 | Returns the mean vector and covariance matrix of the predicted
369 | state. Unobserved velocities are initialized to 0 mean.
370 | """
371 | std_pos = [
372 | self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3],
373 | self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3]]
374 | std_vel = [
375 | self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3],
376 | self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3]]
377 | sqr = np.square(np.r_[std_pos, std_vel]).T
378 |
379 | motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
380 | motion_cov = np.asarray(motion_cov)
381 |
382 | mean = np.dot(mean, self._motion_mat.T)
383 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
384 | covariance = np.dot(left, self._motion_mat.T) + motion_cov
385 |
386 | return mean, covariance
387 |
388 | def update(self, mean, covariance, measurement):
389 | """Run Kalman filter correction step.
390 |
391 | Parameters
392 | ----------
393 | mean : ndarray
394 | The predicted state's mean vector (8 dimensional).
395 | covariance : ndarray
396 | The state's covariance matrix (8x8 dimensional).
397 | measurement : ndarray
398 | The 4 dimensional measurement vector (x, y, w, h), where (x, y)
399 | is the center position, w the width, and h the height of the
400 | bounding box.
401 |
402 | Returns
403 | -------
404 | (ndarray, ndarray)
405 | Returns the measurement-corrected state distribution.
406 |
407 | """
408 | projected_mean, projected_cov = self.project(mean, covariance)
409 |
410 | chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
411 | kalman_gain = scipy.linalg.cho_solve((chol_factor, lower),
412 | np.dot(covariance, self._update_mat.T).T,
413 | check_finite=False).T
414 | innovation = measurement - projected_mean
415 |
416 | new_mean = mean + np.dot(innovation, kalman_gain.T)
417 | new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
418 | return new_mean, new_covariance
419 |
420 | def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
421 | """Compute gating distance between state distribution and measurements.
422 | A suitable distance threshold can be obtained from `chi2inv95`. If
423 | `only_position` is False, the chi-square distribution has 4 degrees of
424 | freedom, otherwise 2.
425 | Parameters
426 | ----------
427 | mean : ndarray
428 | Mean vector over the state distribution (8 dimensional).
429 | covariance : ndarray
430 | Covariance of the state distribution (8x8 dimensional).
431 | measurements : ndarray
432 | An Nx4 dimensional matrix of N measurements, each in
433 | format (x, y, a, h) where (x, y) is the bounding box center
434 | position, a the aspect ratio, and h the height.
435 | only_position : Optional[bool]
436 | If True, distance computation is done with respect to the bounding
437 | box center position only.
438 | Returns
439 | -------
440 | ndarray
441 | Returns an array of length N, where the i-th element contains the
442 | squared Mahalanobis distance between (mean, covariance) and
443 | `measurements[i]`.
444 | """
445 | mean, covariance = self.project(mean, covariance)
446 | if only_position:
447 | mean, covariance = mean[:2], covariance[:2, :2]
448 | measurements = measurements[:, :2]
449 |
450 | d = measurements - mean
451 | if metric == 'gaussian':
452 | return np.sum(d * d, axis=1)
453 | elif metric == 'maha':
454 | cholesky_factor = np.linalg.cholesky(covariance)
455 | z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
456 | return np.sum(z * z, axis=0) # square maha
457 | else:
458 | raise ValueError('invalid distance metric')
459 |
--------------------------------------------------------------------------------
/ultralytics/nn/modules.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 | """
3 | Common modules
4 | """
5 |
6 | import math
7 |
8 | import torch
9 | import torch.nn as nn
10 |
11 | from ultralytics.yolo.utils.tal import dist2bbox, make_anchors
12 |
13 |
14 | def autopad(k, p=None, d=1): # kernel, padding, dilation
15 | # Pad to 'same' shape outputs
16 | if d > 1:
17 | k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
18 | if p is None:
19 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
20 | return p
21 |
22 |
23 | class Conv(nn.Module):
24 | # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
25 | default_act = nn.SiLU() # default activation
26 |
27 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
28 | super().__init__()
29 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
30 | self.bn = nn.BatchNorm2d(c2)
31 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
32 |
33 | def forward(self, x):
34 | return self.act(self.bn(self.conv(x)))
35 |
36 | def forward_fuse(self, x):
37 | return self.act(self.conv(x))
38 |
39 |
40 | class DWConv(Conv):
41 | # Depth-wise convolution
42 | def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
43 | super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
44 |
45 |
46 | class DWConvTranspose2d(nn.ConvTranspose2d):
47 | # Depth-wise transpose convolution
48 | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
49 | super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
50 |
51 |
52 | class ConvTranspose(nn.Module):
53 | # Convolution transpose 2d layer
54 | default_act = nn.SiLU() # default activation
55 |
56 | def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
57 | super().__init__()
58 | self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
59 | self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
60 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
61 |
62 | def forward(self, x):
63 | return self.act(self.bn(self.conv_transpose(x)))
64 |
65 | def forward_fuse(self, x):
66 | return self.act(self.conv_transpose(x))
67 |
68 |
69 | class DFL(nn.Module):
70 | # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
71 | def __init__(self, c1=16):
72 | super().__init__()
73 | self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
74 | x = torch.arange(c1, dtype=torch.float)
75 | self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
76 | self.c1 = c1
77 |
78 | def forward(self, x):
79 | b, c, a = x.shape # batch, channels, anchors
80 | return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
81 | # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
82 |
83 |
84 | class TransformerLayer(nn.Module):
85 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
86 | def __init__(self, c, num_heads):
87 | super().__init__()
88 | self.q = nn.Linear(c, c, bias=False)
89 | self.k = nn.Linear(c, c, bias=False)
90 | self.v = nn.Linear(c, c, bias=False)
91 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
92 | self.fc1 = nn.Linear(c, c, bias=False)
93 | self.fc2 = nn.Linear(c, c, bias=False)
94 |
95 | def forward(self, x):
96 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
97 | x = self.fc2(self.fc1(x)) + x
98 | return x
99 |
100 |
101 | class TransformerBlock(nn.Module):
102 | # Vision Transformer https://arxiv.org/abs/2010.11929
103 | def __init__(self, c1, c2, num_heads, num_layers):
104 | super().__init__()
105 | self.conv = None
106 | if c1 != c2:
107 | self.conv = Conv(c1, c2)
108 | self.linear = nn.Linear(c2, c2) # learnable position embedding
109 | self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
110 | self.c2 = c2
111 |
112 | def forward(self, x):
113 | if self.conv is not None:
114 | x = self.conv(x)
115 | b, _, w, h = x.shape
116 | p = x.flatten(2).permute(2, 0, 1)
117 | return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
118 |
119 |
120 | class Bottleneck(nn.Module):
121 | # Standard bottleneck
122 | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand
123 | super().__init__()
124 | c_ = int(c2 * e) # hidden channels
125 | self.cv1 = Conv(c1, c_, k[0], 1)
126 | self.cv2 = Conv(c_, c2, k[1], 1, g=g)
127 | self.add = shortcut and c1 == c2
128 |
129 | def forward(self, x):
130 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
131 |
132 |
133 | class BottleneckCSP(nn.Module):
134 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
135 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
136 | super().__init__()
137 | c_ = int(c2 * e) # hidden channels
138 | self.cv1 = Conv(c1, c_, 1, 1)
139 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
140 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
141 | self.cv4 = Conv(2 * c_, c2, 1, 1)
142 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
143 | self.act = nn.SiLU()
144 | self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
145 |
146 | def forward(self, x):
147 | y1 = self.cv3(self.m(self.cv1(x)))
148 | y2 = self.cv2(x)
149 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
150 |
151 |
152 | class C3(nn.Module):
153 | # CSP Bottleneck with 3 convolutions
154 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
155 | super().__init__()
156 | c_ = int(c2 * e) # hidden channels
157 | self.cv1 = Conv(c1, c_, 1, 1)
158 | self.cv2 = Conv(c1, c_, 1, 1)
159 | self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
160 | self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
161 |
162 | def forward(self, x):
163 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
164 |
165 |
166 | class C2(nn.Module):
167 | # CSP Bottleneck with 2 convolutions
168 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
169 | super().__init__()
170 | self.c = int(c2 * e) # hidden channels
171 | self.cv1 = Conv(c1, 2 * self.c, 1, 1)
172 | self.cv2 = Conv(2 * self.c, c2, 1) # optional act=FReLU(c2)
173 | # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention()
174 | self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
175 |
176 | def forward(self, x):
177 | a, b = self.cv1(x).split((self.c, self.c), 1)
178 | return self.cv2(torch.cat((self.m(a), b), 1))
179 |
180 |
181 | class C2f(nn.Module):
182 | # CSP Bottleneck with 2 convolutions
183 | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
184 | super().__init__()
185 | self.c = int(c2 * e) # hidden channels
186 | self.cv1 = Conv(c1, 2 * self.c, 1, 1)
187 | self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
188 | self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
189 |
190 | def forward(self, x):
191 | y = list(self.cv1(x).split((self.c, self.c), 1))
192 | y.extend(m(y[-1]) for m in self.m)
193 | return self.cv2(torch.cat(y, 1))
194 |
195 |
196 | class ChannelAttention(nn.Module):
197 | # Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet
198 | def __init__(self, channels: int) -> None:
199 | super().__init__()
200 | self.pool = nn.AdaptiveAvgPool2d(1)
201 | self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
202 | self.act = nn.Sigmoid()
203 |
204 | def forward(self, x: torch.Tensor) -> torch.Tensor:
205 | return x * self.act(self.fc(self.pool(x)))
206 |
207 |
208 | class SpatialAttention(nn.Module):
209 | # Spatial-attention module
210 | def __init__(self, kernel_size=7):
211 | super().__init__()
212 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
213 | padding = 3 if kernel_size == 7 else 1
214 | self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
215 | self.act = nn.Sigmoid()
216 |
217 | def forward(self, x):
218 | return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))
219 |
220 |
221 | class CBAM(nn.Module):
222 | # Convolutional Block Attention Module
223 | def __init__(self, c1, kernel_size=7): # ch_in, kernels
224 | super().__init__()
225 | self.channel_attention = ChannelAttention(c1)
226 | self.spatial_attention = SpatialAttention(kernel_size)
227 |
228 | def forward(self, x):
229 | return self.spatial_attention(self.channel_attention(x))
230 |
231 |
232 | class C1(nn.Module):
233 | # CSP Bottleneck with 1 convolution
234 | def __init__(self, c1, c2, n=1): # ch_in, ch_out, number
235 | super().__init__()
236 | self.cv1 = Conv(c1, c2, 1, 1)
237 | self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
238 |
239 | def forward(self, x):
240 | y = self.cv1(x)
241 | return self.m(y) + y
242 |
243 |
244 | class C3x(C3):
245 | # C3 module with cross-convolutions
246 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
247 | super().__init__(c1, c2, n, shortcut, g, e)
248 | self.c_ = int(c2 * e)
249 | self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))
250 |
251 |
252 | class C3TR(C3):
253 | # C3 module with TransformerBlock()
254 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
255 | super().__init__(c1, c2, n, shortcut, g, e)
256 | c_ = int(c2 * e)
257 | self.m = TransformerBlock(c_, c_, 4, n)
258 |
259 |
260 | class C3Ghost(C3):
261 | # C3 module with GhostBottleneck()
262 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
263 | super().__init__(c1, c2, n, shortcut, g, e)
264 | c_ = int(c2 * e) # hidden channels
265 | self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
266 |
267 |
268 | class SPP(nn.Module):
269 | # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
270 | def __init__(self, c1, c2, k=(5, 9, 13)):
271 | super().__init__()
272 | c_ = c1 // 2 # hidden channels
273 | self.cv1 = Conv(c1, c_, 1, 1)
274 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
275 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
276 |
277 | def forward(self, x):
278 | x = self.cv1(x)
279 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
280 |
281 |
282 | class SPPF(nn.Module):
283 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
284 | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
285 | super().__init__()
286 | c_ = c1 // 2 # hidden channels
287 | self.cv1 = Conv(c1, c_, 1, 1)
288 | self.cv2 = Conv(c_ * 4, c2, 1, 1)
289 | self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
290 |
291 | def forward(self, x):
292 | x = self.cv1(x)
293 | y1 = self.m(x)
294 | y2 = self.m(y1)
295 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
296 |
297 |
298 | class Focus(nn.Module):
299 | # Focus wh information into c-space
300 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
301 | super().__init__()
302 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
303 | # self.contract = Contract(gain=2)
304 |
305 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
306 | return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
307 | # return self.conv(self.contract(x))
308 |
309 |
310 | class GhostConv(nn.Module):
311 | # Ghost Convolution https://github.com/huawei-noah/ghostnet
312 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
313 | super().__init__()
314 | c_ = c2 // 2 # hidden channels
315 | self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
316 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
317 |
318 | def forward(self, x):
319 | y = self.cv1(x)
320 | return torch.cat((y, self.cv2(y)), 1)
321 |
322 |
323 | class GhostBottleneck(nn.Module):
324 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
325 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
326 | super().__init__()
327 | c_ = c2 // 2
328 | self.conv = nn.Sequential(
329 | GhostConv(c1, c_, 1, 1), # pw
330 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
331 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
332 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
333 | act=False)) if s == 2 else nn.Identity()
334 |
335 | def forward(self, x):
336 | return self.conv(x) + self.shortcut(x)
337 |
338 |
339 | class Concat(nn.Module):
340 | # Concatenate a list of tensors along dimension
341 | def __init__(self, dimension=1):
342 | super().__init__()
343 | self.d = dimension
344 |
345 | def forward(self, x):
346 | return torch.cat(x, self.d)
347 |
348 |
349 | class Proto(nn.Module):
350 | # YOLOv8 mask Proto module for segmentation models
351 | def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
352 | super().__init__()
353 | self.cv1 = Conv(c1, c_, k=3)
354 | self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest')
355 | self.cv2 = Conv(c_, c_, k=3)
356 | self.cv3 = Conv(c_, c2)
357 |
358 | def forward(self, x):
359 | return self.cv3(self.cv2(self.upsample(self.cv1(x))))
360 |
361 |
362 | class Ensemble(nn.ModuleList):
363 | # Ensemble of models
364 | def __init__(self):
365 | super().__init__()
366 |
367 | def forward(self, x, augment=False, profile=False, visualize=False):
368 | y = [module(x, augment, profile, visualize)[0] for module in self]
369 | # y = torch.stack(y).max(0)[0] # max ensemble
370 | # y = torch.stack(y).mean(0) # mean ensemble
371 | y = torch.cat(y, 1) # nms ensemble
372 | return y, None # inference, train output
373 |
374 |
375 | # heads
376 | class Detect(nn.Module):
377 | # YOLOv8 Detect head for detection models
378 | dynamic = False # force grid reconstruction
379 | export = False # export mode
380 | shape = None
381 | anchors = torch.empty(0) # init
382 | strides = torch.empty(0) # init
383 |
384 | def __init__(self, nc=80, ch=()): # detection layer
385 | super().__init__()
386 | self.nc = nc # number of classes
387 | self.nl = len(ch) # number of detection layers
388 | self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
389 | self.no = nc + self.reg_max * 4 # number of outputs per anchor
390 | self.stride = torch.zeros(self.nl) # strides computed during build
391 |
392 | c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
393 | self.cv2 = nn.ModuleList(
394 | nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
395 | self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
396 | self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
397 |
398 | def forward(self, x):
399 | shape = x[0].shape # BCHW
400 | for i in range(self.nl):
401 | x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
402 | if self.training:
403 | return x
404 | elif self.dynamic or self.shape != shape:
405 | self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
406 | self.shape = shape
407 |
408 | box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
409 | dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
410 | y = torch.cat((dbox, cls.sigmoid()), 1)
411 | return y if self.export else (y, x)
412 |
413 | def bias_init(self):
414 | # Initialize Detect() biases, WARNING: requires stride availability
415 | m = self # self.model[-1] # Detect() module
416 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
417 | # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
418 | for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
419 | a[-1].bias.data[:] = 1.0 # box
420 | b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
421 |
422 |
423 | class Segment(Detect):
424 | # YOLOv8 Segment head for segmentation models
425 | def __init__(self, nc=80, nm=32, npr=256, ch=()):
426 | super().__init__(nc, ch)
427 | self.nm = nm # number of masks
428 | self.npr = npr # number of protos
429 | self.proto = Proto(ch[0], self.npr, self.nm) # protos
430 | self.detect = Detect.forward
431 |
432 | c4 = max(ch[0] // 4, self.nm)
433 | self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
434 |
435 | def forward(self, x):
436 | p = self.proto(x[0]) # mask protos
437 | bs = p.shape[0] # batch size
438 |
439 | mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients
440 | x = self.detect(self, x)
441 | if self.training:
442 | return x, mc, p
443 | return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
444 |
445 |
446 | class Classify(nn.Module):
447 | # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
448 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
449 | super().__init__()
450 | c_ = 1280 # efficientnet_b0 size
451 | self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
452 | self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
453 | self.drop = nn.Dropout(p=0.0, inplace=True)
454 | self.linear = nn.Linear(c_, c2) # to x(b,c2)
455 |
456 | def forward(self, x):
457 | if isinstance(x, list):
458 | x = torch.cat(x, 1)
459 | x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
460 | return x if self.training else x.softmax(1)
461 |
--------------------------------------------------------------------------------
/ultralytics/nn/autobackend.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 | import ast
3 | import contextlib
4 | import json
5 | import platform
6 | import zipfile
7 | from collections import OrderedDict, namedtuple
8 | from pathlib import Path
9 | from urllib.parse import urlparse
10 |
11 | import cv2
12 | import numpy as np
13 | import torch
14 | import torch.nn as nn
15 | from PIL import Image
16 |
17 | from ultralytics.yolo.utils import LOGGER, ROOT, yaml_load
18 | from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_version, check_yaml
19 | from ultralytics.yolo.utils.downloads import attempt_download_asset, is_url
20 | from ultralytics.yolo.utils.ops import xywh2xyxy
21 |
22 |
23 | def check_class_names(names):
24 | # Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts.
25 | if isinstance(names, list): # names is a list
26 | names = dict(enumerate(names)) # convert to dict
27 | if isinstance(names, dict):
28 | if not all(isinstance(k, int) for k in names.keys()): # convert string keys to int, i.e. '0' to 0
29 | names = {int(k): v for k, v in names.items()}
30 | if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764'
31 | map = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['map'] # human-readable names
32 | names = {k: map[v] for k, v in names.items()}
33 | return names
34 |
35 |
36 | class AutoBackend(nn.Module):
37 |
38 | def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
39 | """
40 | MultiBackend class for python inference on various platforms using Ultralytics YOLO.
41 |
42 | Args:
43 | weights (str): The path to the weights file. Default: 'yolov8n.pt'
44 | device (torch.device): The device to run the model on.
45 | dnn (bool): Use OpenCV's DNN module for inference if True, defaults to False.
46 | data (str), (Path): Additional data.yaml file for class names, optional
47 | fp16 (bool): If True, use half precision. Default: False
48 | fuse (bool): Whether to fuse the model or not. Default: True
49 |
50 | Supported formats and their naming conventions:
51 | | Format | Suffix |
52 | |-----------------------|------------------|
53 | | PyTorch | *.pt |
54 | | TorchScript | *.torchscript |
55 | | ONNX Runtime | *.onnx |
56 | | ONNX OpenCV DNN | *.onnx --dnn |
57 | | OpenVINO | *.xml |
58 | | CoreML | *.mlmodel |
59 | | TensorRT | *.engine |
60 | | TensorFlow SavedModel | *_saved_model |
61 | | TensorFlow GraphDef | *.pb |
62 | | TensorFlow Lite | *.tflite |
63 | | TensorFlow Edge TPU | *_edgetpu.tflite |
64 | | PaddlePaddle | *_paddle_model |
65 | """
66 | super().__init__()
67 | w = str(weights[0] if isinstance(weights, list) else weights)
68 | nn_module = isinstance(weights, torch.nn.Module)
69 | pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
70 | fp16 &= pt or jit or onnx or engine or nn_module # FP16
71 | nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
72 | stride = 32 # default stride
73 | model = None # TODO: resolves ONNX inference, verify effect on other backends
74 | cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
75 | if not (pt or triton or nn_module):
76 | w = attempt_download_asset(w) # download if not local
77 |
78 | # NOTE: special case: in-memory pytorch model
79 | if nn_module:
80 | model = weights.to(device)
81 | model = model.fuse() if fuse else model
82 | names = model.module.names if hasattr(model, 'module') else model.names # get class names
83 | stride = max(int(model.stride.max()), 32) # model stride
84 | model.half() if fp16 else model.float()
85 | self.model = model # explicitly assign for to(), cpu(), cuda(), half()
86 | pt = True
87 | elif pt: # PyTorch
88 | from ultralytics.nn.tasks import attempt_load_weights
89 | model = attempt_load_weights(weights if isinstance(weights, list) else w,
90 | device=device,
91 | inplace=True,
92 | fuse=fuse)
93 | stride = max(int(model.stride.max()), 32) # model stride
94 | names = model.module.names if hasattr(model, 'module') else model.names # get class names
95 | model.half() if fp16 else model.float()
96 | self.model = model # explicitly assign for to(), cpu(), cuda(), half()
97 | elif jit: # TorchScript
98 | LOGGER.info(f'Loading {w} for TorchScript inference...')
99 | extra_files = {'config.txt': ''} # model metadata
100 | model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
101 | model.half() if fp16 else model.float()
102 | if extra_files['config.txt']: # load metadata dict
103 | d = json.loads(extra_files['config.txt'],
104 | object_hook=lambda d: {int(k) if k.isdigit() else k: v
105 | for k, v in d.items()})
106 | stride, names = int(d['stride']), d['names']
107 | elif dnn: # ONNX OpenCV DNN
108 | LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
109 | check_requirements('opencv-python>=4.5.4')
110 | net = cv2.dnn.readNetFromONNX(w)
111 | elif onnx: # ONNX Runtime
112 | LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
113 | check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
114 | import onnxruntime
115 | providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
116 | session = onnxruntime.InferenceSession(w, providers=providers)
117 | output_names = [x.name for x in session.get_outputs()]
118 | meta = session.get_modelmeta().custom_metadata_map # metadata
119 | if 'stride' in meta:
120 | stride, names = int(meta['stride']), eval(meta['names'])
121 | elif xml: # OpenVINO
122 | LOGGER.info(f'Loading {w} for OpenVINO inference...')
123 | check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/
124 | from openvino.runtime import Core, Layout, get_batch # noqa
125 | ie = Core()
126 | if not Path(w).is_file(): # if not *.xml
127 | w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
128 | network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
129 | if network.get_parameters()[0].get_layout().empty:
130 | network.get_parameters()[0].set_layout(Layout('NCHW'))
131 | batch_dim = get_batch(network)
132 | if batch_dim.is_static:
133 | batch_size = batch_dim.get_length()
134 | executable_network = ie.compile_model(network, device_name='CPU') # device_name="MYRIAD" for Intel NCS2
135 | elif engine: # TensorRT
136 | LOGGER.info(f'Loading {w} for TensorRT inference...')
137 | import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
138 | check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
139 | if device.type == 'cpu':
140 | device = torch.device('cuda:0')
141 | Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
142 | logger = trt.Logger(trt.Logger.INFO)
143 | # Read file
144 | with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
145 | # Read metadata length
146 | meta_len = int.from_bytes(f.read(4), byteorder='little')
147 | # Read metadata
148 | meta = json.loads(f.read(meta_len).decode('utf-8'))
149 | stride, names = int(meta['stride']), meta['names']
150 | # Read engine
151 | model = runtime.deserialize_cuda_engine(f.read())
152 | context = model.create_execution_context()
153 | bindings = OrderedDict()
154 | output_names = []
155 | fp16 = False # default updated below
156 | dynamic = False
157 | for i in range(model.num_bindings):
158 | name = model.get_binding_name(i)
159 | dtype = trt.nptype(model.get_binding_dtype(i))
160 | if model.binding_is_input(i):
161 | if -1 in tuple(model.get_binding_shape(i)): # dynamic
162 | dynamic = True
163 | context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
164 | if dtype == np.float16:
165 | fp16 = True
166 | else: # output
167 | output_names.append(name)
168 | shape = tuple(context.get_binding_shape(i))
169 | im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
170 | bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
171 | binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
172 | batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
173 | elif coreml: # CoreML
174 | LOGGER.info(f'Loading {w} for CoreML inference...')
175 | import coremltools as ct
176 | model = ct.models.MLModel(w)
177 | elif saved_model: # TF SavedModel
178 | LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
179 | import tensorflow as tf
180 | keras = False # assume TF1 saved_model
181 | model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
182 | elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
183 | LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
184 | import tensorflow as tf
185 |
186 | def wrap_frozen_graph(gd, inputs, outputs):
187 | x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
188 | ge = x.graph.as_graph_element
189 | return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
190 |
191 | def gd_outputs(gd):
192 | name_list, input_list = [], []
193 | for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
194 | name_list.append(node.name)
195 | input_list.extend(node.input)
196 | return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
197 |
198 | gd = tf.Graph().as_graph_def() # TF GraphDef
199 | with open(w, 'rb') as f:
200 | gd.ParseFromString(f.read())
201 | frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
202 | elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
203 | try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
204 | from tflite_runtime.interpreter import Interpreter, load_delegate
205 | except ImportError:
206 | import tensorflow as tf
207 | Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
208 | if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
209 | LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
210 | delegate = {
211 | 'Linux': 'libedgetpu.so.1',
212 | 'Darwin': 'libedgetpu.1.dylib',
213 | 'Windows': 'edgetpu.dll'}[platform.system()]
214 | interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
215 | else: # TFLite
216 | LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
217 | interpreter = Interpreter(model_path=w) # load TFLite model
218 | interpreter.allocate_tensors() # allocate
219 | input_details = interpreter.get_input_details() # inputs
220 | output_details = interpreter.get_output_details() # outputs
221 | # load metadata
222 | with contextlib.suppress(zipfile.BadZipFile):
223 | with zipfile.ZipFile(w, 'r') as model:
224 | meta_file = model.namelist()[0]
225 | meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
226 | stride, names = int(meta['stride']), meta['names']
227 | elif tfjs: # TF.js
228 | raise NotImplementedError('YOLOv8 TF.js inference is not supported')
229 | elif paddle: # PaddlePaddle
230 | LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
231 | check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
232 | import paddle.inference as pdi
233 | if not Path(w).is_file(): # if not *.pdmodel
234 | w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
235 | weights = Path(w).with_suffix('.pdiparams')
236 | config = pdi.Config(str(w), str(weights))
237 | if cuda:
238 | config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
239 | predictor = pdi.create_predictor(config)
240 | input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
241 | output_names = predictor.get_output_names()
242 | elif triton: # NVIDIA Triton Inference Server
243 | LOGGER.info('Triton Inference Server not supported...')
244 | '''
245 | TODO:
246 | check_requirements('tritonclient[all]')
247 | from utils.triton import TritonRemoteModel
248 | model = TritonRemoteModel(url=w)
249 | nhwc = model.runtime.startswith("tensorflow")
250 | '''
251 | else:
252 | from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_TABLE
253 | raise TypeError(f"model='{w}' is not a supported model format. "
254 | 'See https://docs.ultralytics.com/tasks/detection/#export for help.'
255 | f'\n\n{EXPORT_FORMATS_TABLE}')
256 |
257 | # Load external metadata YAML
258 | if xml or saved_model or paddle:
259 | metadata = Path(w).parent / 'metadata.yaml'
260 | if metadata.exists():
261 | metadata = yaml_load(metadata)
262 | stride, names = int(metadata['stride']), metadata['names'] # load metadata
263 | else:
264 | LOGGER.warning(f"WARNING ⚠️ Metadata not found at '{metadata}'")
265 |
266 | # Check names
267 | if 'names' not in locals(): # names missing
268 | names = yaml_load(check_yaml(data))['names'] if data else {i: f'class{i}' for i in range(999)} # assign
269 | names = check_class_names(names)
270 |
271 | self.__dict__.update(locals()) # assign all variables to self
272 |
273 | def forward(self, im, augment=False, visualize=False):
274 | """
275 | Runs inference on the YOLOv8 MultiBackend model.
276 |
277 | Args:
278 | im (torch.Tensor): The image tensor to perform inference on.
279 | augment (bool): whether to perform data augmentation during inference, defaults to False
280 | visualize (bool): whether to visualize the output predictions, defaults to False
281 |
282 | Returns:
283 | (tuple): Tuple containing the raw output tensor, and the processed output for visualization (if visualize=True)
284 | """
285 | b, ch, h, w = im.shape # batch, channel, height, width
286 | if self.fp16 and im.dtype != torch.float16:
287 | im = im.half() # to FP16
288 | if self.nhwc:
289 | im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
290 |
291 | if self.pt or self.nn_module: # PyTorch
292 | y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
293 | elif self.jit: # TorchScript
294 | y = self.model(im)
295 | elif self.dnn: # ONNX OpenCV DNN
296 | im = im.cpu().numpy() # torch to numpy
297 | self.net.setInput(im)
298 | y = self.net.forward()
299 | elif self.onnx: # ONNX Runtime
300 | im = im.cpu().numpy() # torch to numpy
301 | y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
302 | elif self.xml: # OpenVINO
303 | im = im.cpu().numpy() # FP32
304 | y = list(self.executable_network([im]).values())
305 | elif self.engine: # TensorRT
306 | if self.dynamic and im.shape != self.bindings['images'].shape:
307 | i = self.model.get_binding_index('images')
308 | self.context.set_binding_shape(i, im.shape) # reshape if dynamic
309 | self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
310 | for name in self.output_names:
311 | i = self.model.get_binding_index(name)
312 | self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
313 | s = self.bindings['images'].shape
314 | assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
315 | self.binding_addrs['images'] = int(im.data_ptr())
316 | self.context.execute_v2(list(self.binding_addrs.values()))
317 | y = [self.bindings[x].data for x in sorted(self.output_names)]
318 | elif self.coreml: # CoreML
319 | im = im.cpu().numpy()
320 | im = Image.fromarray((im[0] * 255).astype('uint8'))
321 | # im = im.resize((192, 320), Image.ANTIALIAS)
322 | y = self.model.predict({'image': im}) # coordinates are xywh normalized
323 | if 'confidence' in y:
324 | box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
325 | conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
326 | y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
327 | else:
328 | y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
329 | elif self.paddle: # PaddlePaddle
330 | im = im.cpu().numpy().astype(np.float32)
331 | self.input_handle.copy_from_cpu(im)
332 | self.predictor.run()
333 | y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
334 | elif self.triton: # NVIDIA Triton Inference Server
335 | y = self.model(im)
336 | else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
337 | im = im.cpu().numpy()
338 | if self.saved_model: # SavedModel
339 | y = self.model(im, training=False) if self.keras else self.model(im)
340 | elif self.pb: # GraphDef
341 | y = self.frozen_func(x=self.tf.constant(im))
342 | else: # Lite or Edge TPU
343 | input = self.input_details[0]
344 | int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
345 | if int8:
346 | scale, zero_point = input['quantization']
347 | im = (im / scale + zero_point).astype(np.uint8) # de-scale
348 | self.interpreter.set_tensor(input['index'], im)
349 | self.interpreter.invoke()
350 | y = []
351 | for output in self.output_details:
352 | x = self.interpreter.get_tensor(output['index'])
353 | if int8:
354 | scale, zero_point = output['quantization']
355 | x = (x.astype(np.float32) - zero_point) * scale # re-scale
356 | y.append(x)
357 | # TF segment fixes: export is reversed vs ONNX export and protos are transposed
358 | if len(self.output_details) == 2: # segment
359 | y = [y[1], np.transpose(y[0], (0, 3, 1, 2))]
360 | y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
361 | # y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
362 |
363 | if isinstance(y, (list, tuple)):
364 | return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
365 | else:
366 | return self.from_numpy(y)
367 |
368 | def from_numpy(self, x):
369 | """
370 | Convert a numpy array to a tensor.
371 |
372 | Args:
373 | x (np.ndarray): The array to be converted.
374 |
375 | Returns:
376 | (torch.Tensor): The converted tensor
377 | """
378 | return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
379 |
380 | def warmup(self, imgsz=(1, 3, 640, 640)):
381 | """
382 | Warm up the model by running one forward pass with a dummy input.
383 |
384 | Args:
385 | imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
386 |
387 | Returns:
388 | (None): This method runs the forward pass and don't return any value
389 | """
390 | warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
391 | if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
392 | im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
393 | for _ in range(2 if self.jit else 1): #
394 | self.forward(im) # warmup
395 |
396 | @staticmethod
397 | def _model_type(p='path/to/model.pt'):
398 | """
399 | This function takes a path to a model file and returns the model type
400 |
401 | Args:
402 | p: path to the model file. Defaults to path/to/model.pt
403 | """
404 | # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
405 | # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
406 | from ultralytics.yolo.engine.exporter import export_formats
407 | sf = list(export_formats().Suffix) # export suffixes
408 | if not is_url(p, check=False) and not isinstance(p, str):
409 | check_suffix(p, sf) # checks
410 | url = urlparse(p) # if url may be Triton inference server
411 | types = [s in Path(p).name for s in sf]
412 | types[8] &= not types[9] # tflite &= not edgetpu
413 | triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
414 | return types + [triton]
415 |
--------------------------------------------------------------------------------