├── ultralytics
    ├── nn
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── modules.cpython-37.pyc
    │   │   ├── modules.cpython-38.pyc
    │   │   ├── tasks.cpython-37.pyc
    │   │   ├── tasks.cpython-38.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── autobackend.cpython-37.pyc
    │   │   └── autobackend.cpython-38.pyc
    │   ├── autoshape.py
    │   ├── modules.py
    │   └── autobackend.py
    ├── tracker
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── matching.py
    │   │   ├── gmc.py
    │   │   └── kalman_filter.py
    │   ├── __init__.py
    │   ├── trackers
    │   │   ├── __init__.py
    │   │   ├── basetrack.py
    │   │   ├── bot_sort.py
    │   │   └── byte_tracker.py
    │   ├── cfg
    │   │   ├── bytetrack.yaml
    │   │   └── botsort.yaml
    │   ├── README.md
    │   └── track.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   └── __init__.cpython-38.pyc
    ├── hub
    │   ├── __pycache__
    │   │   ├── auth.cpython-37.pyc
    │   │   ├── auth.cpython-38.pyc
    │   │   ├── utils.cpython-37.pyc
    │   │   ├── utils.cpython-38.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── session.cpython-37.pyc
    │   │   └── session.cpython-38.pyc
    │   ├── auth.py
    │   ├── __init__.py
    │   ├── utils.py
    │   └── session.py
    ├── __init__.py
    └── models
    │   ├── v8
    │       ├── cls
    │       │   ├── yolov8l-cls.yaml
    │       │   ├── yolov8m-cls.yaml
    │       │   ├── yolov8n-cls.yaml
    │       │   ├── yolov8s-cls.yaml
    │       │   └── yolov8x-cls.yaml
    │       ├── yolov8l.yaml
    │       ├── yolov8m.yaml
    │       ├── yolov8x.yaml
    │       ├── yolov8n.yaml
    │       ├── yolov8s.yaml
    │       ├── seg
    │       │   ├── yolov8l-seg.yaml
    │       │   ├── yolov8m-seg.yaml
    │       │   ├── yolov8x-seg.yaml
    │       │   ├── yolov8n-seg.yaml
    │       │   └── yolov8s-seg.yaml
    │       └── yolov8x6.yaml
    │   ├── v3
    │       ├── yolov3-tinyu.yaml
    │       ├── yolov3u.yaml
    │       └── yolov3-sppu.yaml
    │   ├── v5
    │       ├── yolov5lu.yaml
    │       ├── yolov5mu.yaml
    │       ├── yolov5nu.yaml
    │       ├── yolov5xu.yaml
    │       └── yolov5su.yaml
    │   └── README.md
├── screenshot
    ├── 5.jpg
    ├── 1.jpeg
    ├── 2.jpeg
    ├── 3.jpeg
    └── qrcode.png
├── images
    └── zidane.jpg
├── detect_predict.py
└── README.md


/ultralytics/nn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/__init__.py:
--------------------------------------------------------------------------------
1 | from .trackers import BOTSORT, BYTETracker
2 | 


--------------------------------------------------------------------------------
/screenshot/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/5.jpg


--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | from .bot_sort import BOTSORT
2 | from .byte_tracker import BYTETracker
3 | 


--------------------------------------------------------------------------------
/images/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/images/zidane.jpg


--------------------------------------------------------------------------------
/screenshot/1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/1.jpeg


--------------------------------------------------------------------------------
/screenshot/2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/2.jpeg


--------------------------------------------------------------------------------
/screenshot/3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/3.jpeg


--------------------------------------------------------------------------------
/screenshot/qrcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/qrcode.png


--------------------------------------------------------------------------------
/ultralytics/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/auth.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/auth.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/modules.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/tasks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/tasks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/session.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/hub/__pycache__/session.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/autobackend.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-37.pyc


--------------------------------------------------------------------------------
/ultralytics/nn/__pycache__/autobackend.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-38.pyc


--------------------------------------------------------------------------------
/ultralytics/__init__.py:
--------------------------------------------------------------------------------
1 | # Ultralytics YOLO 🚀, GPL-3.0 license
2 | 
3 | __version__ = '8.0.40'
4 | 
5 | from ultralytics.yolo.engine.model import YOLO
6 | from ultralytics.yolo.utils.checks import check_yolo as checks
7 | 
8 | __all__ = ['__version__', 'YOLO', 'checks']  # allow simpler import
9 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/cfg/bytetrack.yaml:
--------------------------------------------------------------------------------
1 | tracker_type: bytetrack  # tracker type, ['botsort', 'bytetrack']
2 | track_high_thresh: 0.5  # threshold for the first association
3 | track_low_thresh: 0.1  # threshold for the second association
4 | new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
5 | track_buffer: 30  # buffer to calculate the time when to remove tracks
6 | match_thresh: 0.8  # threshold for matching tracks
7 | # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
8 | # mot20: False  # for tracker evaluation(not used for now)
9 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8l-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1000  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.00  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 | 
21 | # YOLOv8.0n head
22 | head:
23 |   - [-1, 1, Classify, [nc]]
24 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8m-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1000  # number of classes
 5 | depth_multiple: 0.67  # scales module repeats
 6 | width_multiple: 0.75  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 | 
21 | # YOLOv8.0n head
22 | head:
23 |   - [-1, 1, Classify, [nc]]
24 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8n-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1000  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 | 
21 | # YOLOv8.0n head
22 | head:
23 |   - [-1, 1, Classify, [nc]]
24 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8s-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1000  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.50  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 | 
21 | # YOLOv8.0n head
22 | head:
23 |   - [-1, 1, Classify, [nc]]
24 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/cls/yolov8x-cls.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 1000  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 | 
21 | # YOLOv8.0n head
22 | head:
23 |   - [-1, 1, Classify, [nc]]
24 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/README.md:
--------------------------------------------------------------------------------
 1 | ## Tracker
 2 | 
 3 | ### Trackers
 4 | 
 5 | - [x] ByteTracker
 6 | - [x] BoT-SORT
 7 | 
 8 | ### Usage
 9 | 
10 | python interface:
11 | 
12 | ```python
13 | from ultralytics import YOLO
14 | 
15 | model = YOLO("yolov8n.pt")  # or a segmentation model .i.e yolov8n-seg.pt
16 | model.track(
17 |     source="video/streams",
18 |     stream=True,
19 |     tracker="botsort.yaml/bytetrack.yaml",
20 |     ...,
21 | )
22 | ```
23 | 
24 | cli:
25 | 
26 | ```bash
27 | yolo detect track source=... tracker=...
28 | yolo segment track source=... tracker=...
29 | ```
30 | 
31 | By default, trackers will use the configuration in `ultralytics/tracker/cfg`.
32 | We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/tracker/cfg`.
33 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/cfg/botsort.yaml:
--------------------------------------------------------------------------------
 1 | tracker_type: botsort  # tracker type, ['botsort', 'bytetrack']
 2 | track_high_thresh: 0.5  # threshold for the first association
 3 | track_low_thresh: 0.1  # threshold for the second association
 4 | new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
 5 | track_buffer: 30  # buffer to calculate the time when to remove tracks
 6 | match_thresh: 0.8  # threshold for matching tracks
 7 | # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
 8 | # mot20: False  # for tracker evaluation(not used for now)
 9 | 
10 | # Botsort settings
11 | cmc_method: sparseOptFlow  # method of global motion compensation
12 | # ReID model related thresh (not supported yet)
13 | proximity_thresh: 0.5
14 | appearance_thresh: 0.25
15 | with_reid: False
16 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/basetrack.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class TrackState:
 7 |     New = 0
 8 |     Tracked = 1
 9 |     Lost = 2
10 |     Removed = 3
11 | 
12 | 
13 | class BaseTrack:
14 |     _count = 0
15 | 
16 |     track_id = 0
17 |     is_activated = False
18 |     state = TrackState.New
19 | 
20 |     history = OrderedDict()
21 |     features = []
22 |     curr_feature = None
23 |     score = 0
24 |     start_frame = 0
25 |     frame_id = 0
26 |     time_since_update = 0
27 | 
28 |     # multi-camera
29 |     location = (np.inf, np.inf)
30 | 
31 |     @property
32 |     def end_frame(self):
33 |         return self.frame_id
34 | 
35 |     @staticmethod
36 |     def next_id():
37 |         BaseTrack._count += 1
38 |         return BaseTrack._count
39 | 
40 |     def activate(self, *args):
41 |         raise NotImplementedError
42 | 
43 |     def predict(self):
44 |         raise NotImplementedError
45 | 
46 |     def update(self, *args, **kwargs):
47 |         raise NotImplementedError
48 | 
49 |     def mark_lost(self):
50 |         self.state = TrackState.Lost
51 | 
52 |     def mark_removed(self):
53 |         self.state = TrackState.Removed
54 | 


--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3-tinyu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | 
 8 | # YOLOv3-tiny backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [16, 3, 1]],  # 0
12 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
13 |    [-1, 1, Conv, [32, 3, 1]],
14 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
15 |    [-1, 1, Conv, [64, 3, 1]],
16 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
17 |    [-1, 1, Conv, [128, 3, 1]],
18 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
19 |    [-1, 1, Conv, [256, 3, 1]],
20 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
21 |    [-1, 1, Conv, [512, 3, 1]],
22 |    [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
23 |    [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
24 |   ]
25 | 
26 | # YOLOv3-tiny head
27 | head:
28 |   [[-1, 1, Conv, [1024, 3, 1]],
29 |    [-1, 1, Conv, [256, 1, 1]],
30 |    [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
31 | 
32 |    [-2, 1, Conv, [128, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
35 |    [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
36 | 
37 |    [[19, 15], 1, Detect, [nc]],  # Detect(P4, P5)
38 |   ]
39 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8l.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.00  # scales convolution channels
 7 | 
 8 | # YOLOv8.0l backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [512, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [512, True]]
20 |   - [-1, 1, SPPF, [512, 5]]  # 9
21 | 
22 | # YOLOv8.0l head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [512]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8m.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # scales module repeats
 6 | width_multiple: 0.75  # scales convolution channels
 7 | 
 8 | # YOLOv8.0m backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [768, True]]
20 |   - [-1, 1, SPPF, [768, 5]]  # 9
21 | 
22 | # YOLOv8.0m head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [768]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8x.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0x backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [512, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [512, True]]
20 |   - [-1, 1, SPPF, [512, 5]]  # 9
21 | 
22 | # YOLOv8.0x head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [512]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8n.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 |   - [-1, 1, SPPF, [1024, 5]]  # 9
21 | 
22 | # YOLOv8.0n head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8s.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.50  # scales convolution channels
 7 | 
 8 | # YOLOv8.0s backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 |   - [-1, 1, SPPF, [1024, 5]]  # 9
21 | 
22 | # YOLOv8.0s head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8l-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.00  # scales convolution channels
 7 | 
 8 | # YOLOv8.0l backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [512, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [512, True]]
20 |   - [-1, 1, SPPF, [512, 5]]  # 9
21 | 
22 | # YOLOv8.0l head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [512]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8m-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # scales module repeats
 6 | width_multiple: 0.75  # scales convolution channels
 7 | 
 8 | # YOLOv8.0m backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [768, True]]
20 |   - [-1, 1, SPPF, [768, 5]]  # 9
21 | 
22 | # YOLOv8.0m head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [768]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8x-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0x backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [512, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [512, True]]
20 |   - [-1, 1, SPPF, [512, 5]]  # 9
21 | 
22 | # YOLOv8.0x head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [512]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8n-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0n backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 |   - [-1, 1, SPPF, [1024, 5]]  # 9
21 | 
22 | # YOLOv8.0n head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/seg/yolov8s-seg.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # scales module repeats
 6 | width_multiple: 0.50  # scales convolution channels
 7 | 
 8 | # YOLOv8.0s backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [1024, True]]
20 |   - [-1, 1, SPPF, [1024, 5]]  # 9
21 | 
22 | # YOLOv8.0s head
23 | head:
24 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
26 |   - [-1, 3, C2f, [512]]  # 12
27 | 
28 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
29 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
30 |   - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
31 | 
32 |   - [-1, 1, Conv, [256, 3, 2]]
33 |   - [[-1, 12], 1, Concat, [1]]  # cat head P4
34 |   - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
35 | 
36 |   - [-1, 1, Conv, [512, 3, 2]]
37 |   - [[-1, 9], 1, Concat, [1]]  # cat head P5
38 |   - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
39 | 
40 |   - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Detect(P3, P4, P5)
41 | 


--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5lu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | 
 8 | # YOLOv5 v6.0 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
12 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
13 |    [-1, 3, C3, [128]],
14 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
15 |    [-1, 6, C3, [256]],
16 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
17 |    [-1, 9, C3, [512]],
18 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
19 |    [-1, 3, C3, [1024]],
20 |    [-1, 1, SPPF, [1024, 5]],  # 9
21 |   ]
22 | 
23 | # YOLOv5 v6.0 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5mu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.67  # model depth multiple
 6 | width_multiple: 0.75  # layer channel multiple
 7 | 
 8 | # YOLOv5 v6.0 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
12 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
13 |    [-1, 3, C3, [128]],
14 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
15 |    [-1, 6, C3, [256]],
16 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
17 |    [-1, 9, C3, [512]],
18 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
19 |    [-1, 3, C3, [1024]],
20 |    [-1, 1, SPPF, [1024, 5]],  # 9
21 |   ]
22 | 
23 | # YOLOv5 v6.0 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5nu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.25  # layer channel multiple
 7 | 
 8 | # YOLOv5 v6.0 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
12 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
13 |    [-1, 3, C3, [128]],
14 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
15 |    [-1, 6, C3, [256]],
16 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
17 |    [-1, 9, C3, [512]],
18 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
19 |    [-1, 3, C3, [1024]],
20 |    [-1, 1, SPPF, [1024, 5]],  # 9
21 |   ]
22 | 
23 | # YOLOv5 v6.0 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5xu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.33  # model depth multiple
 6 | width_multiple: 1.25  # layer channel multiple
 7 | 
 8 | # YOLOv5 v6.0 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
12 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
13 |    [-1, 3, C3, [128]],
14 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
15 |    [-1, 6, C3, [256]],
16 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
17 |    [-1, 9, C3, [512]],
18 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
19 |    [-1, 3, C3, [1024]],
20 |    [-1, 1, SPPF, [1024, 5]],  # 9
21 |   ]
22 | 
23 | # YOLOv5 v6.0 head
24 | head:
25 |   [[-1, 1, Conv, [512, 1, 1]],
26 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
27 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
28 |    [-1, 3, C3, [512, False]],  # 13
29 | 
30 |    [-1, 1, Conv, [256, 1, 1]],
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
33 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
34 | 
35 |    [-1, 1, Conv, [256, 3, 2]],
36 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
37 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
38 | 
39 |    [-1, 1, Conv, [512, 3, 2]],
40 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
41 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
42 | 
43 |    [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
44 |   ]
45 | 


--------------------------------------------------------------------------------
/ultralytics/models/v5/yolov5su.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 0.33  # model depth multiple
 6 | width_multiple: 0.50  # layer channel multiple
 7 | 
 8 | 
 9 | # YOLOv5 v6.0 backbone
10 | backbone:
11 |   # [from, number, module, args]
12 |   [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
13 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
14 |    [-1, 3, C3, [128]],
15 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
16 |    [-1, 6, C3, [256]],
17 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
18 |    [-1, 9, C3, [512]],
19 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
20 |    [-1, 3, C3, [1024]],
21 |    [-1, 1, SPPF, [1024, 5]],  # 9
22 |   ]
23 | 
24 | # YOLOv5 v6.0 head
25 | head:
26 |   [[-1, 1, Conv, [512, 1, 1]],
27 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
28 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
29 |    [-1, 3, C3, [512, False]],  # 13
30 | 
31 |    [-1, 1, Conv, [256, 1, 1]],
32 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
34 |    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
35 | 
36 |    [-1, 1, Conv, [256, 3, 2]],
37 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
38 |    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
39 | 
40 |    [-1, 1, Conv, [512, 3, 2]],
41 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
42 |    [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
43 | 
44 |    [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
45 |   ]
46 | 


--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3u.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | 
 8 | # darknet53 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
12 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
13 |    [-1, 1, Bottleneck, [64]],
14 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
15 |    [-1, 2, Bottleneck, [128]],
16 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
17 |    [-1, 8, Bottleneck, [256]],
18 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
19 |    [-1, 8, Bottleneck, [512]],
20 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
21 |    [-1, 4, Bottleneck, [1024]],  # 10
22 |   ]
23 | 
24 | # YOLOv3 head
25 | head:
26 |   [[-1, 1, Bottleneck, [1024, False]],
27 |    [-1, 1, Conv, [512, 1, 1]],
28 |    [-1, 1, Conv, [1024, 3, 1]],
29 |    [-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
31 | 
32 |    [-2, 1, Conv, [256, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
35 |    [-1, 1, Bottleneck, [512, False]],
36 |    [-1, 1, Bottleneck, [512, False]],
37 |    [-1, 1, Conv, [256, 1, 1]],
38 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
39 | 
40 |    [-2, 1, Conv, [128, 1, 1]],
41 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
43 |    [-1, 1, Bottleneck, [256, False]],
44 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
45 | 
46 |    [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
47 |   ]
48 | 


--------------------------------------------------------------------------------
/ultralytics/models/v3/yolov3-sppu.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.0  # model depth multiple
 6 | width_multiple: 1.0  # layer channel multiple
 7 | 
 8 | # darknet53 backbone
 9 | backbone:
10 |   # [from, number, module, args]
11 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
12 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
13 |    [-1, 1, Bottleneck, [64]],
14 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
15 |    [-1, 2, Bottleneck, [128]],
16 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
17 |    [-1, 8, Bottleneck, [256]],
18 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
19 |    [-1, 8, Bottleneck, [512]],
20 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
21 |    [-1, 4, Bottleneck, [1024]],  # 10
22 |   ]
23 | 
24 | # YOLOv3-SPP head
25 | head:
26 |   [[-1, 1, Bottleneck, [1024, False]],
27 |    [-1, 1, SPP, [512, [5, 9, 13]]],
28 |    [-1, 1, Conv, [1024, 3, 1]],
29 |    [-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
31 | 
32 |    [-2, 1, Conv, [256, 1, 1]],
33 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
34 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
35 |    [-1, 1, Bottleneck, [512, False]],
36 |    [-1, 1, Bottleneck, [512, False]],
37 |    [-1, 1, Conv, [256, 1, 1]],
38 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
39 | 
40 |    [-2, 1, Conv, [128, 1, 1]],
41 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
42 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
43 |    [-1, 1, Bottleneck, [256, False]],
44 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
45 | 
46 |    [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
47 |   ]
48 | 


--------------------------------------------------------------------------------
/ultralytics/models/v8/yolov8x6.yaml:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | # Parameters
 4 | nc: 80  # number of classes
 5 | depth_multiple: 1.00  # scales module repeats
 6 | width_multiple: 1.25  # scales convolution channels
 7 | 
 8 | # YOLOv8.0x6 backbone
 9 | backbone:
10 |   # [from, repeats, module, args]
11 |   - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
12 |   - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
13 |   - [-1, 3, C2f, [128, True]]
14 |   - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
15 |   - [-1, 6, C2f, [256, True]]
16 |   - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
17 |   - [-1, 6, C2f, [512, True]]
18 |   - [-1, 1, Conv, [512, 3, 2]]  # 7-P5/32
19 |   - [-1, 3, C2f, [512, True]]
20 |   - [-1, 1, Conv, [512, 3, 2]]  # 9-P6/64
21 |   - [-1, 3, C2f, [512, True]]
22 |   - [-1, 1, SPPF, [512, 5]]  # 11
23 | 
24 | # YOLOv8.0x6 head
25 | head:
26 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
27 |   - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
28 |   - [-1, 3, C2, [512, False]]  # 14
29 | 
30 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31 |   - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
32 |   - [-1, 3, C2, [512, False]]  # 17
33 | 
34 |   - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35 |   - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
36 |   - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
37 | 
38 |   - [-1, 1, Conv, [256, 3, 2]]
39 |   - [[-1, 17], 1, Concat, [1]]  # cat head P4
40 |   - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
41 | 
42 |   - [-1, 1, Conv, [512, 3, 2]]
43 |   - [[-1, 14], 1, Concat, [1]]  # cat head P5
44 |   - [-1, 3, C2, [512, False]]  # 26 (P5/32-large)
45 | 
46 |   - [-1, 1, Conv, [512, 3, 2]]
47 |   - [[-1, 11], 1, Concat, [1]]  # cat head P6
48 |   - [-1, 3, C2, [512, False]]  # 29 (P6/64-xlarge)
49 | 
50 |   - [[20, 23, 26, 29], 1, Detect, [nc]]  # Detect(P3, P4, P5, P6)
51 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/track.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ultralytics.tracker import BOTSORT, BYTETracker
 4 | from ultralytics.yolo.utils import IterableSimpleNamespace, yaml_load
 5 | from ultralytics.yolo.utils.checks import check_requirements, check_yaml
 6 | 
 7 | TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
 8 | check_requirements('lap')  # for linear_assignment
 9 | 
10 | 
11 | def on_predict_start(predictor):
12 |     tracker = check_yaml(predictor.args.tracker)
13 |     cfg = IterableSimpleNamespace(**yaml_load(tracker))
14 |     assert cfg.tracker_type in ['bytetrack', 'botsort'], \
15 |             f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'"
16 |     trackers = []
17 |     for _ in range(predictor.dataset.bs):
18 |         tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
19 |         trackers.append(tracker)
20 |     predictor.trackers = trackers
21 | 
22 | 
23 | def on_predict_postprocess_end(predictor):
24 |     bs = predictor.dataset.bs
25 |     im0s = predictor.batch[2]
26 |     im0s = im0s if isinstance(im0s, list) else [im0s]
27 |     for i in range(bs):
28 |         det = predictor.results[i].boxes.cpu().numpy()
29 |         if len(det) == 0:
30 |             continue
31 |         tracks = predictor.trackers[i].update(det, im0s[i])
32 |         if len(tracks) == 0:
33 |             continue
34 |         predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
35 |         if predictor.results[i].masks is not None:
36 |             idx = tracks[:, -1].tolist()
37 |             predictor.results[i].masks = predictor.results[i].masks[idx]
38 | 
39 | 
40 | def register_tracker(model):
41 |     model.add_callback('on_predict_start', on_predict_start)
42 |     model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
43 | 


--------------------------------------------------------------------------------
/ultralytics/hub/auth.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | import requests
 4 | 
 5 | from ultralytics.hub.utils import HUB_API_ROOT, request_with_credentials
 6 | from ultralytics.yolo.utils import is_colab
 7 | 
 8 | API_KEY_PATH = 'https://hub.ultralytics.com/settings?tab=api+keys'
 9 | 
10 | 
11 | class Auth:
12 |     id_token = api_key = model_key = False
13 | 
14 |     def __init__(self, api_key=None):
15 |         self.api_key = self._clean_api_key(api_key)
16 |         self.authenticate() if self.api_key else self.auth_with_cookies()
17 | 
18 |     @staticmethod
19 |     def _clean_api_key(key: str) -> str:
20 |         """Strip model from key if present"""
21 |         separator = '_'
22 |         return key.split(separator)[0] if separator in key else key
23 | 
24 |     def authenticate(self) -> bool:
25 |         """Attempt to authenticate with server"""
26 |         try:
27 |             header = self.get_auth_header()
28 |             if header:
29 |                 r = requests.post(f'{HUB_API_ROOT}/v1/auth', headers=header)
30 |                 if not r.json().get('success', False):
31 |                     raise ConnectionError('Unable to authenticate.')
32 |                 return True
33 |             raise ConnectionError('User has not authenticated locally.')
34 |         except ConnectionError:
35 |             self.id_token = self.api_key = False  # reset invalid
36 |             return False
37 | 
38 |     def auth_with_cookies(self) -> bool:
39 |         """
40 |         Attempt to fetch authentication via cookies and set id_token.
41 |         User must be logged in to HUB and running in a supported browser.
42 |         """
43 |         if not is_colab():
44 |             return False  # Currently only works with Colab
45 |         try:
46 |             authn = request_with_credentials(f'{HUB_API_ROOT}/v1/auth/auto')
47 |             if authn.get('success', False):
48 |                 self.id_token = authn.get('data', {}).get('idToken', None)
49 |                 self.authenticate()
50 |                 return True
51 |             raise ConnectionError('Unable to fetch browser authentication details.')
52 |         except ConnectionError:
53 |             self.id_token = False  # reset invalid
54 |             return False
55 | 
56 |     def get_auth_header(self):
57 |         if self.id_token:
58 |             return {'authorization': f'Bearer {self.id_token}'}
59 |         elif self.api_key:
60 |             return {'x-api-key': self.api_key}
61 |         else:
62 |             return None
63 | 
64 |     def get_state(self) -> bool:
65 |         """Get the authentication state"""
66 |         return self.id_token or self.api_key
67 | 
68 |     def set_api_key(self, key: str):
69 |         """Get the authentication state"""
70 |         self.api_key = key
71 | 


--------------------------------------------------------------------------------
/ultralytics/hub/__init__.py:
--------------------------------------------------------------------------------
 1 | # Ultralytics YOLO 🚀, GPL-3.0 license
 2 | 
 3 | import requests
 4 | 
 5 | from ultralytics.hub.auth import Auth
 6 | from ultralytics.hub.session import HubTrainingSession
 7 | from ultralytics.hub.utils import split_key
 8 | from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_LIST
 9 | from ultralytics.yolo.engine.model import YOLO
10 | from ultralytics.yolo.utils import LOGGER, PREFIX, emojis
11 | 
12 | # Define all export formats
13 | EXPORT_FORMATS_HUB = EXPORT_FORMATS_LIST + ['ultralytics_tflite', 'ultralytics_coreml']
14 | 
15 | 
16 | def start(key=''):
17 |     """
18 |     Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY')
19 |     """
20 |     auth = Auth(key)
21 |     try:
22 |         if not auth.get_state():
23 |             model_id = request_api_key(auth)
24 |         else:
25 |             _, model_id = split_key(key)
26 | 
27 |         if not model_id:
28 |             raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌'))
29 | 
30 |         session = HubTrainingSession(model_id=model_id, auth=auth)
31 |         session.check_disk_space()
32 | 
33 |         trainer = YOLO(session.input_file)
34 |         session.register_callbacks(trainer)
35 |         trainer.train(**session.train_args)
36 |     except Exception as e:
37 |         LOGGER.warning(f'{PREFIX}{e}')
38 | 
39 | 
40 | def request_api_key(auth, max_attempts=3):
41 |     """
42 |     Prompt the user to input their API key. Returns the model ID.
43 |     """
44 |     import getpass
45 |     for attempts in range(max_attempts):
46 |         LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}')
47 |         input_key = getpass.getpass('Enter your Ultralytics HUB API key:\n')
48 |         auth.api_key, model_id = split_key(input_key)
49 | 
50 |         if auth.authenticate():
51 |             LOGGER.info(f'{PREFIX}Authenticated ✅')
52 |             return model_id
53 | 
54 |         LOGGER.warning(f'{PREFIX}Invalid API key ⚠️\n')
55 | 
56 |     raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌'))
57 | 
58 | 
59 | def reset_model(key=''):
60 |     # Reset a trained model to an untrained state
61 |     api_key, model_id = split_key(key)
62 |     r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': api_key, 'modelId': model_id})
63 | 
64 |     if r.status_code == 200:
65 |         LOGGER.info(f'{PREFIX}model reset successfully')
66 |         return
67 |     LOGGER.warning(f'{PREFIX}model reset failure {r.status_code} {r.reason}')
68 | 
69 | 
70 | def export_model(key='', format='torchscript'):
71 |     # Export a model to all formats
72 |     assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}"
73 |     api_key, model_id = split_key(key)
74 |     r = requests.post('https://api.ultralytics.com/export',
75 |                       json={
76 |                           'apiKey': api_key,
77 |                           'modelId': model_id,
78 |                           'format': format})
79 |     assert (r.status_code == 200), f'{PREFIX}{format} export failure {r.status_code} {r.reason}'
80 |     LOGGER.info(f'{PREFIX}{format} export started ✅')
81 | 
82 | 
83 | def get_export(key='', format='torchscript'):
84 |     # Get an exported model dictionary with download URL
85 |     assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}"
86 |     api_key, model_id = split_key(key)
87 |     r = requests.post('https://api.ultralytics.com/get-export',
88 |                       json={
89 |                           'apiKey': api_key,
90 |                           'modelId': model_id,
91 |                           'format': format})
92 |     assert (r.status_code == 200), f'{PREFIX}{format} get_export failure {r.status_code} {r.reason}'
93 |     return r.json()
94 | 
95 | 
96 | # temp. For checking
97 | if __name__ == '__main__':
98 |     start()
99 | 


--------------------------------------------------------------------------------
/detect_predict.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | 
  3 | import torch
  4 | 
  5 | from ultralytics.yolo.engine.predictor import BasePredictor
  6 | from ultralytics.yolo.engine.results import Results
  7 | from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
  8 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
  9 | 
 10 | 
 11 | class DetectionPredictor(BasePredictor):
 12 | 
 13 |     def get_annotator(self, img):
 14 |         return Annotator(img, line_width=self.args.line_thickness, example=str(self.model.names))
 15 | 
 16 |     def preprocess(self, img):
 17 |         img = torch.from_numpy(img).to(self.model.device)
 18 |         img = img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
 19 |         img /= 255  # 0 - 255 to 0.0 - 1.0
 20 |         return img
 21 | 
 22 |     def postprocess(self, preds, img, orig_img):
 23 |         preds = ops.non_max_suppression(preds,
 24 |                                         self.args.conf,
 25 |                                         self.args.iou,
 26 |                                         agnostic=self.args.agnostic_nms,
 27 |                                         max_det=self.args.max_det,
 28 |                                         classes=self.args.classes)
 29 | 
 30 |         results = []
 31 |         for i, pred in enumerate(preds):
 32 |             orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
 33 |             shape = orig_img.shape
 34 |             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
 35 |             results.append(Results(boxes=pred, orig_img=orig_img, names=self.model.names))
 36 |         return results
 37 | 
 38 |     def write_results(self, idx, results, batch):
 39 |         p, im, im0 = batch
 40 |         log_string = ''
 41 |         if len(im.shape) == 3:
 42 |             im = im[None]  # expand for batch dim
 43 |         self.seen += 1
 44 |         imc = im0.copy() if self.args.save_crop else im0
 45 |         if self.source_type.webcam or self.source_type.from_img:  # batch_size >= 1
 46 |             log_string += f'{idx}: '
 47 |             frame = self.dataset.count
 48 |         else:
 49 |             frame = getattr(self.dataset, 'frame', 0)
 50 |         self.data_path = p
 51 |         self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
 52 |         log_string += '%gx%g ' % im.shape[2:]  # print string
 53 |         self.annotator = self.get_annotator(im0)
 54 | 
 55 |         det = results[idx].boxes  # TODO: make boxes inherit from tensors
 56 |         if len(det) == 0:
 57 |             return log_string
 58 |         for c in det.cls.unique():
 59 |             n = (det.cls == c).sum()  # detections per class
 60 |             log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
 61 | 
 62 |         # write
 63 |         for d in reversed(det):
 64 |             cls, conf = d.cls.squeeze(), d.conf.squeeze()
 65 |             if self.args.save_txt:  # Write to file
 66 |                 line = (cls, *(d.xywhn.view(-1).tolist()), conf) \
 67 |                     if self.args.save_conf else (cls, *(d.xywhn.view(-1).tolist()))  # label format
 68 |                 with open(f'{self.txt_path}.txt', 'a') as f:
 69 |                     f.write(('%g ' * len(line)).rstrip() % line + '\n')
 70 |             if self.args.save or self.args.save_crop or self.args.show:  # Add bbox to image
 71 |                 c = int(cls)  # integer class
 72 |                 name = f'id:{int(d.id.item())} {self.model.names[c]}' if d.id is not None else self.model.names[c]
 73 |                 label = None if self.args.hide_labels else (name if self.args.hide_conf else f'{name} {conf:.2f}')
 74 |                 self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
 75 |             if self.args.save_crop:
 76 |                 save_one_box(d.xyxy,
 77 |                              imc,
 78 |                              file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
 79 |                              BGR=True)
 80 | 
 81 |         return log_string
 82 | 
 83 | 
 84 | def predict(cfg=DEFAULT_CFG, use_python=False):
 85 |     model = cfg.model or 'yolov8n.pt'
 86 |     source = "imagesVideo/aaa.mp4"
 87 | 
 88 |     show = True
 89 |     conf=0.3
 90 |     hide_labels=False
 91 |     hide_conf=False
 92 |     line_thickness=3
 93 |     visualize=False
 94 |     augment=False
 95 |     retina_masks=False
 96 |     #classes=[0,2,3]
 97 |     args = dict(model=model, source=source, show=show, conf=conf,hide_labels=hide_labels,hide_conf=hide_conf,line_thickness=line_thickness,visualize=visualize,augment=augment,retina_masks=retina_masks)
 98 | 
 99 |     if use_python:
100 |         from ultralytics import YOLO
101 |         YOLO(model)(**args)
102 |     else:
103 |         predictor = DetectionPredictor(overrides=args)
104 |         predictor.predict_cli()
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     predict()
109 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/bot_sort.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | 
  3 | import numpy as np
  4 | 
  5 | from ..utils import matching
  6 | from ..utils.gmc import GMC
  7 | from ..utils.kalman_filter import KalmanFilterXYWH
  8 | from .basetrack import TrackState
  9 | from .byte_tracker import BYTETracker, STrack
 10 | 
 11 | 
 12 | class BOTrack(STrack):
 13 |     shared_kalman = KalmanFilterXYWH()
 14 | 
 15 |     def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
 16 |         super().__init__(tlwh, score, cls)
 17 | 
 18 |         self.smooth_feat = None
 19 |         self.curr_feat = None
 20 |         if feat is not None:
 21 |             self.update_features(feat)
 22 |         self.features = deque([], maxlen=feat_history)
 23 |         self.alpha = 0.9
 24 | 
 25 |     def update_features(self, feat):
 26 |         feat /= np.linalg.norm(feat)
 27 |         self.curr_feat = feat
 28 |         if self.smooth_feat is None:
 29 |             self.smooth_feat = feat
 30 |         else:
 31 |             self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
 32 |         self.features.append(feat)
 33 |         self.smooth_feat /= np.linalg.norm(self.smooth_feat)
 34 | 
 35 |     def predict(self):
 36 |         mean_state = self.mean.copy()
 37 |         if self.state != TrackState.Tracked:
 38 |             mean_state[6] = 0
 39 |             mean_state[7] = 0
 40 | 
 41 |         self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
 42 | 
 43 |     def re_activate(self, new_track, frame_id, new_id=False):
 44 |         if new_track.curr_feat is not None:
 45 |             self.update_features(new_track.curr_feat)
 46 |         super().re_activate(new_track, frame_id, new_id)
 47 | 
 48 |     def update(self, new_track, frame_id):
 49 |         if new_track.curr_feat is not None:
 50 |             self.update_features(new_track.curr_feat)
 51 |         super().update(new_track, frame_id)
 52 | 
 53 |     @property
 54 |     def tlwh(self):
 55 |         """Get current position in bounding box format `(top left x, top left y,
 56 |         width, height)`.
 57 |         """
 58 |         if self.mean is None:
 59 |             return self._tlwh.copy()
 60 |         ret = self.mean[:4].copy()
 61 |         ret[:2] -= ret[2:] / 2
 62 |         return ret
 63 | 
 64 |     @staticmethod
 65 |     def multi_predict(stracks):
 66 |         if len(stracks) > 0:
 67 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 68 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 69 |             for i, st in enumerate(stracks):
 70 |                 if st.state != TrackState.Tracked:
 71 |                     multi_mean[i][6] = 0
 72 |                     multi_mean[i][7] = 0
 73 |             multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
 74 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 75 |                 stracks[i].mean = mean
 76 |                 stracks[i].covariance = cov
 77 | 
 78 |     def convert_coords(self, tlwh):
 79 |         return self.tlwh_to_xywh(tlwh)
 80 | 
 81 |     @staticmethod
 82 |     def tlwh_to_xywh(tlwh):
 83 |         """Convert bounding box to format `(center x, center y, width,
 84 |         height)`.
 85 |         """
 86 |         ret = np.asarray(tlwh).copy()
 87 |         ret[:2] += ret[2:] / 2
 88 |         return ret
 89 | 
 90 | 
 91 | class BOTSORT(BYTETracker):
 92 | 
 93 |     def __init__(self, args, frame_rate=30):
 94 |         super().__init__(args, frame_rate)
 95 |         # ReID module
 96 |         self.proximity_thresh = args.proximity_thresh
 97 |         self.appearance_thresh = args.appearance_thresh
 98 | 
 99 |         if args.with_reid:
100 |             # haven't supported bot-sort(reid) yet
101 |             self.encoder = None
102 |         # self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation])
103 |         self.gmc = GMC(method=args.cmc_method)
104 | 
105 |     def get_kalmanfilter(self):
106 |         return KalmanFilterXYWH()
107 | 
108 |     def init_track(self, dets, scores, cls, img=None):
109 |         if len(dets) == 0:
110 |             return []
111 |         if self.args.with_reid and self.encoder is not None:
112 |             features_keep = self.encoder.inference(img, dets)
113 |             detections = [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)]
114 |         else:
115 |             detections = [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)]
116 |         return detections
117 | 
118 |     def get_dists(self, tracks, detections):
119 |         dists = matching.iou_distance(tracks, detections)
120 |         dists_mask = (dists > self.proximity_thresh)
121 | 
122 |         # TODO: mot20
123 |         # if not self.args.mot20:
124 |         dists = matching.fuse_score(dists, detections)
125 | 
126 |         if self.args.with_reid and self.encoder is not None:
127 |             emb_dists = matching.embedding_distance(tracks, detections) / 2.0
128 |             emb_dists[emb_dists > self.appearance_thresh] = 1.0
129 |             emb_dists[dists_mask] = 1.0
130 |             dists = np.minimum(dists, emb_dists)
131 |         return dists
132 | 
133 |     def multi_predict(self, tracks):
134 |         BOTrack.multi_predict(tracks)
135 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python-Yolov8-gesture-recognition
 2 | 
 3 | >**Python Yolov8 gesture recognition手势特征识别**  
 4 | >**如需安装运行环境或远程调试，见文章底部名片(QQ:2945218359)，由专业技术人员远程协助！**  
 5 | 
 6 | 
 7 | ## 运行环境
 8 | **编程语言：Python3**  
 9 | **依赖库：Torch, Yolov8**
10 | 
11 | ## 运行效果
12 | <img src="https://github.com/alicema-creator/Python-Yolov8-gesture-recognition/blob/main/screenshot/1.jpeg" width="500"></a>
13 | <img src="https://github.com/alicema-creator/Python-Yolov8-gesture-recognition/blob/main/screenshot/2.jpeg" width="500"></a>
14 | <img src="https://github.com/alicema-creator/Python-Yolov8-gesture-recognition/blob/main/screenshot/3.jpeg" width="500"></a>
15 | <img src="https://github.com/alicema-creator/Python-Yolov8-gesture-recognition/blob/main/screenshot/5.jpg" width="500"></a>
16 | 
17 | ## 在线协助
18 | **如需安装运行环境或远程调试，可扫码或直接加QQ:2945218359, QQ:905733049由专业技术人员远程协助！**  
19 | **1）远程安装运行环境，代码调试**  
20 | **2）Qt, C++, Python入门指导**  
21 | **3）界面美化**  
22 | **4）软件制作**  
23 | **5）云服务器申请**  
24 | **6）网站制作**  
25 | 
26 | **扫码或**<a href="https://img-blog.csdnimg.cn/132d32981a6d4d48bdf578f9810bd341.png" target="_blank">**点这里**</a>**(QQ:2945218359, QQ:905733049)**  
27 | <a href="https://img-blog.csdnimg.cn/132d32981a6d4d48bdf578f9810bd341.png" target="_blank">
28 |   <img src="https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit/blob/main/screenshot/qrcode.png" width="200">
29 |   <img src="https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit/blob/main/screenshot/qrcode2.png" width="200">
30 | </a>
31 | 
32 | 
33 | <font color=#000000 size=4>**🏠作者推荐：**</font>  
34 | <font color=#000000 size=4>
35 | <font color=#000000 size=4>**🌟Python特征识别检测项目🌟**</font>  
36 | 
37 | **Python+Yolov5表情检测识别：**
38 | [https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression)  
39 | **Python指纹识别系统：**
40 | [https://github.com/alicema-creator/Python-fingerprint-recogn-system](https://github.com/alicema-creator/Python-fingerprint-recogn-system)  
41 | **Python人脸识别考勤打卡系统2：**
42 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2)  
43 | **Python人脸识别考勤打卡系统：**
44 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system)  
45 |  **Python果树水果识别**：[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit)  
46 | **Python+Yolov8+Deepsort入口人流量统计：**[https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit](https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit)  
47 | **Python+Qt指纹录入识别考勤系统：**[https://blog.csdn.net/alicema1111/article/details/129338432](https://blog.csdn.net/alicema1111/article/details/129338432)  
48 | **Python手势特征识别：**[https://github.com/alicema-creator/Python-Yolov8-gesture-recognition](https://github.com/alicema-creator/Python-Yolov8-gesture-recognition)  
49 | **Python+Yolov5路面桥梁墙体裂缝识别：**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges)  
50 | **Python+Yolov8路面桥梁墙体裂缝识别：**[https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall](https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall)  
51 | **Python+Qt人行道盲道特征检测识别窗体程序：**[https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving](https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving)  
52 | **Python+Yolov5面部情感表情检测识别：**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression)  
53 | 
54 | 
55 | <font color=#000000 size=4></font>  
56 | <font color=#000000 size=4>**🌟Python/Django网页项目🌟**</font>  
57 | **Python+Django+HTMLl网页前后端指纹信息识别：**[https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition](https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition)  
58 | **Html+threejs网页数字孪生场景三维可视化：**[https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project](https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project)  
59 | **python+django+html药物管理系统web drug management system(crm)：**[https://github.com/alicema-creator/python-django-web-html-drug-management-system](https://github.com/alicema-creator/python-django-web-html-drug-management-system)  
60 | **Qt+C++ web browser自建网页浏览器-Chrome最新内核基础上搭建：**[https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel](https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel)  
61 | 
62 | 
63 | <font color=#000000 size=4></font>  
64 | <font color=#000000 size=4>**🌟C++/Qt项目🌟**</font>  
65 | **OCC Opencascade+Qt+C++三维图像建模窗体点线面拾取：**[https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step](https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step)  
66 | **Qt+VTK鼠标拾取点生成拉伸闭合三维体：**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes)  
67 | **Qt+C++实现的串口通信工具带实时曲线图：**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes]([https://github.com/alicema-creator/SerialPort-Communication)](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/SerialPort-Communication))  
68 | 
69 | </font>  
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/utils/matching.py:
--------------------------------------------------------------------------------
  1 | import lap
  2 | import numpy as np
  3 | import scipy
  4 | from scipy.spatial.distance import cdist
  5 | 
  6 | from .kalman_filter import chi2inv95
  7 | 
  8 | 
  9 | def merge_matches(m1, m2, shape):
 10 |     O, P, Q = shape
 11 |     m1 = np.asarray(m1)
 12 |     m2 = np.asarray(m2)
 13 | 
 14 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 15 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 16 | 
 17 |     mask = M1 * M2
 18 |     match = mask.nonzero()
 19 |     match = list(zip(match[0], match[1]))
 20 |     unmatched_O = tuple(set(range(O)) - {i for i, j in match})
 21 |     unmatched_Q = tuple(set(range(Q)) - {j for i, j in match})
 22 | 
 23 |     return match, unmatched_O, unmatched_Q
 24 | 
 25 | 
 26 | def _indices_to_matches(cost_matrix, indices, thresh):
 27 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 28 |     matched_mask = (matched_cost <= thresh)
 29 | 
 30 |     matches = indices[matched_mask]
 31 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 32 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 33 | 
 34 |     return matches, unmatched_a, unmatched_b
 35 | 
 36 | 
 37 | def linear_assignment(cost_matrix, thresh):
 38 |     if cost_matrix.size == 0:
 39 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 40 |     matches, unmatched_a, unmatched_b = [], [], []
 41 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 42 |     matches.extend([ix, mx] for ix, mx in enumerate(x) if mx >= 0)
 43 |     unmatched_a = np.where(x < 0)[0]
 44 |     unmatched_b = np.where(y < 0)[0]
 45 |     matches = np.asarray(matches)
 46 |     return matches, unmatched_a, unmatched_b
 47 | 
 48 | 
 49 | def ious(atlbrs, btlbrs):
 50 |     """
 51 |     Compute cost based on IoU
 52 |     :type atlbrs: list[tlbr] | np.ndarray
 53 |     :type atlbrs: list[tlbr] | np.ndarray
 54 | 
 55 |     :rtype ious np.ndarray
 56 |     """
 57 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)
 58 |     if ious.size == 0:
 59 |         return ious
 60 | 
 61 |     ious = bbox_ious(np.ascontiguousarray(atlbrs, dtype=np.float32), np.ascontiguousarray(btlbrs, dtype=np.float32))
 62 |     return ious
 63 | 
 64 | 
 65 | def iou_distance(atracks, btracks):
 66 |     """
 67 |     Compute cost based on IoU
 68 |     :type atracks: list[STrack]
 69 |     :type btracks: list[STrack]
 70 | 
 71 |     :rtype cost_matrix np.ndarray
 72 |     """
 73 | 
 74 |     if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \
 75 |             or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 76 |         atlbrs = atracks
 77 |         btlbrs = btracks
 78 |     else:
 79 |         atlbrs = [track.tlbr for track in atracks]
 80 |         btlbrs = [track.tlbr for track in btracks]
 81 |     _ious = ious(atlbrs, btlbrs)
 82 |     return 1 - _ious  # cost matrix
 83 | 
 84 | 
 85 | def v_iou_distance(atracks, btracks):
 86 |     """
 87 |     Compute cost based on IoU
 88 |     :type atracks: list[STrack]
 89 |     :type btracks: list[STrack]
 90 | 
 91 |     :rtype cost_matrix np.ndarray
 92 |     """
 93 | 
 94 |     if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \
 95 |             or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 96 |         atlbrs = atracks
 97 |         btlbrs = btracks
 98 |     else:
 99 |         atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
100 |         btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
101 |     _ious = ious(atlbrs, btlbrs)
102 |     return 1 - _ious  # cost matrix
103 | 
104 | 
105 | def embedding_distance(tracks, detections, metric='cosine'):
106 |     """
107 |     :param tracks: list[STrack]
108 |     :param detections: list[BaseTrack]
109 |     :param metric:
110 |     :return: cost_matrix np.ndarray
111 |     """
112 | 
113 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
114 |     if cost_matrix.size == 0:
115 |         return cost_matrix
116 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)
117 |     # for i, track in enumerate(tracks):
118 |     # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
119 |     track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)
120 |     cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features
121 |     return cost_matrix
122 | 
123 | 
124 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
125 |     if cost_matrix.size == 0:
126 |         return cost_matrix
127 |     gating_dim = 2 if only_position else 4
128 |     gating_threshold = chi2inv95[gating_dim]
129 |     measurements = np.asarray([det.to_xyah() for det in detections])
130 |     for row, track in enumerate(tracks):
131 |         gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position)
132 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
133 |     return cost_matrix
134 | 
135 | 
136 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
137 |     if cost_matrix.size == 0:
138 |         return cost_matrix
139 |     gating_dim = 2 if only_position else 4
140 |     gating_threshold = chi2inv95[gating_dim]
141 |     measurements = np.asarray([det.to_xyah() for det in detections])
142 |     for row, track in enumerate(tracks):
143 |         gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position, metric='maha')
144 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
145 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
146 |     return cost_matrix
147 | 
148 | 
149 | def fuse_iou(cost_matrix, tracks, detections):
150 |     if cost_matrix.size == 0:
151 |         return cost_matrix
152 |     reid_sim = 1 - cost_matrix
153 |     iou_dist = iou_distance(tracks, detections)
154 |     iou_sim = 1 - iou_dist
155 |     fuse_sim = reid_sim * (1 + iou_sim) / 2
156 |     # det_scores = np.array([det.score for det in detections])
157 |     # det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
158 |     return 1 - fuse_sim  # fuse cost
159 | 
160 | 
161 | def fuse_score(cost_matrix, detections):
162 |     if cost_matrix.size == 0:
163 |         return cost_matrix
164 |     iou_sim = 1 - cost_matrix
165 |     det_scores = np.array([det.score for det in detections])
166 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
167 |     fuse_sim = iou_sim * det_scores
168 |     return 1 - fuse_sim  # fuse_cost
169 | 
170 | 
171 | def bbox_ious(box1, box2, eps=1e-7):
172 |     """Boxes are x1y1x2y2
173 |     box1:       np.array of shape(nx4)
174 |     box2:       np.array of shape(mx4)
175 |     returns:    np.array of shape(nxm)
176 |     """
177 |     # Get the coordinates of bounding boxes
178 |     b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
179 |     b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
180 | 
181 |     # Intersection area
182 |     inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
183 |                  (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
184 | 
185 |     # box2 area
186 |     box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
187 |     box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
188 |     return inter_area / (box2_area + box1_area[:, None] - inter_area + eps)
189 | 


--------------------------------------------------------------------------------
/ultralytics/hub/utils.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | 
  3 | import os
  4 | import platform
  5 | import shutil
  6 | import sys
  7 | import threading
  8 | import time
  9 | from pathlib import Path
 10 | from random import random
 11 | 
 12 | import requests
 13 | 
 14 | from ultralytics.yolo.utils import (DEFAULT_CFG_DICT, ENVIRONMENT, LOGGER, RANK, SETTINGS, TryExcept, __version__,
 15 |                                     colorstr, emojis, get_git_origin_url, is_colab, is_git_dir, is_github_actions_ci,
 16 |                                     is_pip_package, is_pytest_running)
 17 | from ultralytics.yolo.utils.checks import check_online
 18 | 
 19 | PREFIX = colorstr('Ultralytics: ')
 20 | HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.'
 21 | HUB_API_ROOT = os.environ.get('ULTRALYTICS_HUB_API', 'https://api.ultralytics.com')
 22 | 
 23 | 
 24 | def check_dataset_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=2.0):
 25 |     # Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0
 26 |     gib = 1 << 30  # bytes per GiB
 27 |     data = int(requests.head(url).headers['Content-Length']) / gib  # dataset size (GB)
 28 |     total, used, free = (x / gib for x in shutil.disk_usage('/'))  # bytes
 29 |     LOGGER.info(f'{PREFIX}{data:.3f} GB dataset, {free:.1f}/{total:.1f} GB free disk space')
 30 |     if data * sf < free:
 31 |         return True  # sufficient space
 32 |     LOGGER.warning(f'{PREFIX}WARNING: Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
 33 |                    f'training cancelled ❌. Please free {data * sf - free:.1f} GB additional disk space and try again.')
 34 |     return False  # insufficient space
 35 | 
 36 | 
 37 | def request_with_credentials(url: str) -> any:
 38 |     """ Make an ajax request with cookies attached """
 39 |     if not is_colab():
 40 |         raise OSError('request_with_credentials() must run in a Colab environment')
 41 |     from google.colab import output  # noqa
 42 |     from IPython import display  # noqa
 43 |     display.display(
 44 |         display.Javascript("""
 45 |             window._hub_tmp = new Promise((resolve, reject) => {
 46 |                 const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000)
 47 |                 fetch("%s", {
 48 |                     method: 'POST',
 49 |                     credentials: 'include'
 50 |                 })
 51 |                     .then((response) => resolve(response.json()))
 52 |                     .then((json) => {
 53 |                     clearTimeout(timeout);
 54 |                     }).catch((err) => {
 55 |                     clearTimeout(timeout);
 56 |                     reject(err);
 57 |                 });
 58 |             });
 59 |             """ % url))
 60 |     return output.eval_js('_hub_tmp')
 61 | 
 62 | 
 63 | # Deprecated TODO: eliminate this function?
 64 | def split_key(key=''):
 65 |     """
 66 |     Verify and split a 'api_key[sep]model_id' string, sep is one of '.' or '_'
 67 | 
 68 |     Args:
 69 |         key (str): The model key to split. If not provided, the user will be prompted to enter it.
 70 | 
 71 |     Returns:
 72 |         Tuple[str, str]: A tuple containing the API key and model ID.
 73 |     """
 74 | 
 75 |     import getpass
 76 | 
 77 |     error_string = emojis(f'{PREFIX}Invalid API key ⚠️\n')  # error string
 78 |     if not key:
 79 |         key = getpass.getpass('Enter model key: ')
 80 |     sep = '_' if '_' in key else '.' if '.' in key else None  # separator
 81 |     assert sep, error_string
 82 |     api_key, model_id = key.split(sep)
 83 |     assert len(api_key) and len(model_id), error_string
 84 |     return api_key, model_id
 85 | 
 86 | 
 87 | def smart_request(*args, retry=3, timeout=30, thread=True, code=-1, method='post', verbose=True, **kwargs):
 88 |     """
 89 |     Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout.
 90 | 
 91 |     Args:
 92 |         *args: Positional arguments to be passed to the requests function specified in method.
 93 |         retry (int, optional): Number of retries to attempt before giving up. Default is 3.
 94 |         timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30.
 95 |         thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True.
 96 |         code (int, optional): An identifier for the request, used for logging purposes. Default is -1.
 97 |         method (str, optional): The HTTP method to use for the request. Choices are 'post' and 'get'. Default is 'post'.
 98 |         verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True.
 99 |         **kwargs: Keyword arguments to be passed to the requests function specified in method.
100 | 
101 |     Returns:
102 |         requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None.
103 |     """
104 |     retry_codes = (408, 500)  # retry only these codes
105 | 
106 |     @TryExcept(verbose=verbose)
107 |     def func(*func_args, **func_kwargs):
108 |         r = None  # response
109 |         t0 = time.time()  # initial time for timer
110 |         for i in range(retry + 1):
111 |             if (time.time() - t0) > timeout:
112 |                 break
113 |             if method == 'post':
114 |                 r = requests.post(*func_args, **func_kwargs)  # i.e. post(url, data, json, files)
115 |             elif method == 'get':
116 |                 r = requests.get(*func_args, **func_kwargs)  # i.e. get(url, data, json, files)
117 |             if r.status_code == 200:
118 |                 break
119 |             try:
120 |                 m = r.json().get('message', 'No JSON message.')
121 |             except AttributeError:
122 |                 m = 'Unable to read JSON.'
123 |             if i == 0:
124 |                 if r.status_code in retry_codes:
125 |                     m += f' Retrying {retry}x for {timeout}s.' if retry else ''
126 |                 elif r.status_code == 429:  # rate limit
127 |                     h = r.headers  # response headers
128 |                     m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \
129 |                         f"Please retry after {h['Retry-After']}s."
130 |                 if verbose:
131 |                     LOGGER.warning(f'{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})')
132 |                 if r.status_code not in retry_codes:
133 |                     return r
134 |             time.sleep(2 ** i)  # exponential standoff
135 |         return r
136 | 
137 |     if thread:
138 |         threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start()
139 |     else:
140 |         return func(*args, **kwargs)
141 | 
142 | 
143 | class Traces:
144 | 
145 |     def __init__(self):
146 |         """
147 |         Initialize Traces for error tracking and reporting if tests are not currently running.
148 |         """
149 |         self.rate_limit = 3.0  # rate limit (seconds)
150 |         self.t = 0.0  # rate limit timer (seconds)
151 |         self.metadata = {
152 |             'sys_argv_name': Path(sys.argv[0]).name,
153 |             'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other',
154 |             'python': platform.python_version(),
155 |             'release': __version__,
156 |             'environment': ENVIRONMENT}
157 |         self.enabled = SETTINGS['sync'] and \
158 |                        RANK in {-1, 0} and \
159 |                        check_online() and \
160 |                        not is_pytest_running() and \
161 |                        not is_github_actions_ci() and \
162 |                        (is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git')
163 | 
164 |     def __call__(self, cfg, all_keys=False, traces_sample_rate=1.0):
165 |         """
166 |        Sync traces data if enabled in the global settings
167 | 
168 |         Args:
169 |             cfg (IterableSimpleNamespace): Configuration for the task and mode.
170 |             all_keys (bool): Sync all items, not just non-default values.
171 |             traces_sample_rate (float): Fraction of traces captured from 0.0 to 1.0
172 |         """
173 |         t = time.time()  # current time
174 |         if self.enabled and random() < traces_sample_rate and (t - self.t) > self.rate_limit:
175 |             self.t = t  # reset rate limit timer
176 |             cfg = vars(cfg)  # convert type from IterableSimpleNamespace to dict
177 |             if not all_keys:  # filter cfg
178 |                 include_keys = {'task', 'mode'}  # always include
179 |                 cfg = {
180 |                     k: (v.split(os.sep)[-1] if isinstance(v, str) and os.sep in v else v)
181 |                     for k, v in cfg.items() if v != DEFAULT_CFG_DICT.get(k, None) or k in include_keys}
182 |             trace = {'uuid': SETTINGS['uuid'], 'cfg': cfg, 'metadata': self.metadata}
183 | 
184 |             # Send a request to the HUB API to sync analytics
185 |             smart_request(f'{HUB_API_ROOT}/v1/usage/anonymous',
186 |                           json=trace,
187 |                           headers=None,
188 |                           code=3,
189 |                           retry=0,
190 |                           timeout=1.0,
191 |                           verbose=False)
192 | 
193 | 
194 | # Run below code on hub/utils init -------------------------------------------------------------------------------------
195 | traces = Traces()
196 | 


--------------------------------------------------------------------------------
/ultralytics/hub/session.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | import json
  3 | import signal
  4 | import sys
  5 | from pathlib import Path
  6 | from time import sleep, time
  7 | 
  8 | import requests
  9 | 
 10 | from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request
 11 | from ultralytics.yolo.utils import LOGGER, PREFIX, __version__, emojis, is_colab, threaded
 12 | from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params
 13 | 
 14 | AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local'
 15 | session = None
 16 | 
 17 | 
 18 | class HubTrainingSession:
 19 | 
 20 |     def __init__(self, model_id, auth):
 21 |         self.agent_id = None  # identifies which instance is communicating with server
 22 |         self.model_id = model_id
 23 |         self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}'
 24 |         self.auth_header = auth.get_auth_header()
 25 |         self._rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0}  # rate limits (seconds)
 26 |         self._timers = {}  # rate limit timers (seconds)
 27 |         self._metrics_queue = {}  # metrics queue
 28 |         self.model = self._get_model()
 29 |         self._start_heartbeat()  # start heartbeats
 30 |         self._register_signal_handlers()
 31 | 
 32 |     def _register_signal_handlers(self):
 33 |         signal.signal(signal.SIGTERM, self._handle_signal)
 34 |         signal.signal(signal.SIGINT, self._handle_signal)
 35 | 
 36 |     def _handle_signal(self, signum, frame):
 37 |         """
 38 |         Prevent heartbeats from being sent on Colab after kill.
 39 |         This method does not use frame, it is included as it is
 40 |         passed by signal.
 41 |         """
 42 |         if self.alive is True:
 43 |             LOGGER.info(f'{PREFIX}Kill signal received! ❌')
 44 |             self._stop_heartbeat()
 45 |             sys.exit(signum)
 46 | 
 47 |     def _stop_heartbeat(self):
 48 |         """End the heartbeat loop"""
 49 |         self.alive = False
 50 | 
 51 |     def upload_metrics(self):
 52 |         payload = {'metrics': self._metrics_queue.copy(), 'type': 'metrics'}
 53 |         smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2)
 54 | 
 55 |     def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
 56 |         # Upload a model to HUB
 57 |         file = None
 58 |         if Path(weights).is_file():
 59 |             with open(weights, 'rb') as f:
 60 |                 file = f.read()
 61 |         if final:
 62 |             smart_request(
 63 |                 f'{self.api_url}/upload',
 64 |                 data={
 65 |                     'epoch': epoch,
 66 |                     'type': 'final',
 67 |                     'map': map},
 68 |                 files={'best.pt': file},
 69 |                 headers=self.auth_header,
 70 |                 retry=10,
 71 |                 timeout=3600,
 72 |                 code=4,
 73 |             )
 74 |         else:
 75 |             smart_request(
 76 |                 f'{self.api_url}/upload',
 77 |                 data={
 78 |                     'epoch': epoch,
 79 |                     'type': 'epoch',
 80 |                     'isBest': bool(is_best)},
 81 |                 headers=self.auth_header,
 82 |                 files={'last.pt': file},
 83 |                 code=3,
 84 |             )
 85 | 
 86 |     def _get_model(self):
 87 |         # Returns model from database by id
 88 |         api_url = f'{HUB_API_ROOT}/v1/models/{self.model_id}'
 89 |         headers = self.auth_header
 90 | 
 91 |         try:
 92 |             response = smart_request(api_url, method='get', headers=headers, thread=False, code=0)
 93 |             data = response.json().get('data', None)
 94 | 
 95 |             if data.get('status', None) == 'trained':
 96 |                 raise ValueError(
 97 |                     emojis(f'Model is already trained and uploaded to '
 98 |                            f'https://hub.ultralytics.com/models/{self.model_id} 🚀'))
 99 | 
100 |             if not data.get('data', None):
101 |                 raise ValueError('Dataset may still be processing. Please wait a minute and try again.')  # RF fix
102 |             self.model_id = data['id']
103 | 
104 |             # TODO: restore when server keys when dataset URL and GPU train is working
105 | 
106 |             self.train_args = {
107 |                 'batch': data['batch_size'],
108 |                 'epochs': data['epochs'],
109 |                 'imgsz': data['imgsz'],
110 |                 'patience': data['patience'],
111 |                 'device': data['device'],
112 |                 'cache': data['cache'],
113 |                 'data': data['data']}
114 | 
115 |             self.input_file = data.get('cfg', data['weights'])
116 | 
117 |             # hack for yolov5 cfg adds u
118 |             if 'cfg' in data and 'yolov5' in data['cfg']:
119 |                 self.input_file = data['cfg'].replace('.yaml', 'u.yaml')
120 | 
121 |             return data
122 |         except requests.exceptions.ConnectionError as e:
123 |             raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e
124 |         except Exception:
125 |             raise
126 | 
127 |     def check_disk_space(self):
128 |         if not check_dataset_disk_space(self.model['data']):
129 |             raise MemoryError('Not enough disk space')
130 | 
131 |     def register_callbacks(self, trainer):
132 |         trainer.add_callback('on_pretrain_routine_end', self.on_pretrain_routine_end)
133 |         trainer.add_callback('on_fit_epoch_end', self.on_fit_epoch_end)
134 |         trainer.add_callback('on_model_save', self.on_model_save)
135 |         trainer.add_callback('on_train_end', self.on_train_end)
136 | 
137 |     def on_pretrain_routine_end(self, trainer):
138 |         """
139 |         Start timer for upload rate limit.
140 |         This method does not use trainer. It is passed to all callbacks by default.
141 |         """
142 |         # Start timer for upload rate limit
143 |         LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀')
144 |         self._timers = {'metrics': time(), 'ckpt': time()}  # start timer on self.rate_limit
145 | 
146 |     def on_fit_epoch_end(self, trainer):
147 |         # Upload metrics after val end
148 |         all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics}
149 | 
150 |         if trainer.epoch == 0:
151 |             model_info = {
152 |                 'model/parameters': get_num_params(trainer.model),
153 |                 'model/GFLOPs': round(get_flops(trainer.model), 3),
154 |                 'model/speed(ms)': round(trainer.validator.speed[1], 3)}
155 |             all_plots = {**all_plots, **model_info}
156 |         self._metrics_queue[trainer.epoch] = json.dumps(all_plots)
157 |         if time() - self._timers['metrics'] > self._rate_limits['metrics']:
158 |             self.upload_metrics()
159 |             self._timers['metrics'] = time()  # reset timer
160 |             self._metrics_queue = {}  # reset queue
161 | 
162 |     def on_model_save(self, trainer):
163 |         # Upload checkpoints with rate limiting
164 |         is_best = trainer.best_fitness == trainer.fitness
165 |         if time() - self._timers['ckpt'] > self._rate_limits['ckpt']:
166 |             LOGGER.info(f'{PREFIX}Uploading checkpoint {self.model_id}')
167 |             self._upload_model(trainer.epoch, trainer.last, is_best)
168 |             self._timers['ckpt'] = time()  # reset timer
169 | 
170 |     def on_train_end(self, trainer):
171 |         # Upload final model and metrics with exponential standoff
172 |         LOGGER.info(f'{PREFIX}Training completed successfully ✅')
173 |         LOGGER.info(f'{PREFIX}Uploading final {self.model_id}')
174 | 
175 |         # hack for fetching mAP
176 |         mAP = trainer.metrics.get('metrics/mAP50-95(B)', 0)
177 |         self._upload_model(trainer.epoch, trainer.best, map=mAP, final=True)  # results[3] is mAP0.5:0.95
178 |         self.alive = False  # stop heartbeats
179 |         LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀')
180 | 
181 |     def _upload_model(self, epoch, weights, is_best=False, map=0.0, final=False):
182 |         # Upload a model to HUB
183 |         file = None
184 |         if Path(weights).is_file():
185 |             with open(weights, 'rb') as f:
186 |                 file = f.read()
187 |         file_param = {'best.pt' if final else 'last.pt': file}
188 |         endpoint = f'{self.api_url}/upload'
189 |         data = {'epoch': epoch}
190 |         if final:
191 |             data.update({'type': 'final', 'map': map})
192 |         else:
193 |             data.update({'type': 'epoch', 'isBest': bool(is_best)})
194 | 
195 |         smart_request(
196 |             endpoint,
197 |             data=data,
198 |             files=file_param,
199 |             headers=self.auth_header,
200 |             retry=10 if final else None,
201 |             timeout=3600 if final else None,
202 |             code=4 if final else 3,
203 |         )
204 | 
205 |     @threaded
206 |     def _start_heartbeat(self):
207 |         self.alive = True
208 |         while self.alive:
209 |             r = smart_request(
210 |                 f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}',
211 |                 json={
212 |                     'agent': AGENT_NAME,
213 |                     'agentId': self.agent_id},
214 |                 headers=self.auth_header,
215 |                 retry=0,
216 |                 code=5,
217 |                 thread=False,
218 |             )
219 |             self.agent_id = r.json().get('data', {}).get('agentId', None)
220 |             sleep(self._rate_limits['heartbeat'])
221 | 


--------------------------------------------------------------------------------
/ultralytics/models/README.md:
--------------------------------------------------------------------------------
  1 | ## Models
  2 | 
  3 | Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration
  4 | files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted
  5 | and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image
  6 | segmentation tasks.
  7 | 
  8 | These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like
  9 | instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms,
 10 | from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this
 11 | directory provides a great starting point for your custom model development needs.
 12 | 
 13 | To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've
 14 | selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full
 15 | details at the Ultralytics [Docs](https://docs.ultralytics.com), and if you need help or have any questions, feel free
 16 | to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
 17 | 
 18 | ### Usage
 19 | 
 20 | Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
 21 | 
 22 | ```bash
 23 | yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
 24 | ```
 25 | 
 26 | They may also be used directly in a Python environment, and accepts the same
 27 | [arguments](https://docs.ultralytics.com/cfg/) as in the CLI example above:
 28 | 
 29 | ```python
 30 | from ultralytics import YOLO
 31 | 
 32 | model = YOLO("model.yaml")  # build a YOLOv8n model from scratch
 33 | # YOLO("model.pt")  use pre-trained model if available
 34 | model.info()  # display model information
 35 | model.train(data="coco128.yaml", epochs=100)  # train the model
 36 | ```
 37 | 
 38 | ## Pre-trained Model Architectures
 39 | 
 40 | Ultralytics supports many model architectures. Visit [models](#) page to view detailed information and usage.
 41 | Any of these models can be used by loading their configs or pretrained checkpoints if available.
 42 | 
 43 | <b>What to add your model architecture?</b> [Here's](#) how you can contribute
 44 | 
 45 | ### 1. YOLOv8
 46 | 
 47 | **About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics. </br>
 48 | **Citation** -
 49 | Available Models:
 50 | 
 51 | - Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
 52 | - Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
 53 | - Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
 54 | 
 55 | <details><summary>Performance</summary>
 56 | 
 57 | ### Detection
 58 | 
 59 | | Model                                                                                | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
 60 | | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
 61 | | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640                   | 37.3                 | 80.4                           | 0.99                                | 3.2                | 8.7               |
 62 | | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640                   | 44.9                 | 128.4                          | 1.20                                | 11.2               | 28.6              |
 63 | | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640                   | 50.2                 | 234.7                          | 1.83                                | 25.9               | 78.9              |
 64 | | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640                   | 52.9                 | 375.2                          | 2.39                                | 43.7               | 165.2             |
 65 | | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640                   | 53.9                 | 479.1                          | 3.53                                | 68.2               | 257.8             |
 66 | 
 67 | ### Segmentation
 68 | 
 69 | | Model                                                                                        | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>mask<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
 70 | | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
 71 | | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640                   | 36.7                 | 30.5                  | 96.1                           | 1.21                                | 3.4                | 12.6              |
 72 | | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640                   | 44.6                 | 36.8                  | 155.7                          | 1.47                                | 11.8               | 42.6              |
 73 | | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640                   | 49.9                 | 40.8                  | 317.0                          | 2.18                                | 27.3               | 110.2             |
 74 | | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640                   | 52.3                 | 42.6                  | 572.4                          | 2.79                                | 46.0               | 220.5             |
 75 | | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640                   | 53.4                 | 43.4                  | 712.1                          | 4.02                                | 71.8               | 344.1             |
 76 | 
 77 | ### Classification
 78 | 
 79 | | Model                                                                                        | size<br><sup>(pixels) | acc<br><sup>top1 | acc<br><sup>top5 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) at 640 |
 80 | | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ |
 81 | | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224                   | 66.6             | 87.0             | 12.9                           | 0.31                                | 2.7                | 4.3                      |
 82 | | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224                   | 72.3             | 91.1             | 23.4                           | 0.35                                | 6.4                | 13.5                     |
 83 | | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224                   | 76.4             | 93.2             | 85.4                           | 0.62                                | 17.0               | 42.7                     |
 84 | | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224                   | 78.0             | 94.1             | 163.0                          | 0.87                                | 37.5               | 99.7                     |
 85 | | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224                   | 78.4             | 94.3             | 232.0                          | 1.01                                | 57.4               | 154.8                    |
 86 | 
 87 | </details>
 88 | 
 89 | ### 2. YOLOv5u
 90 | 
 91 | **About** - Anchor-free YOLOv5 models with new detection head and better speed-accuracy tradeoff </br>
 92 | **Citation** -
 93 | Available Models:
 94 | 
 95 | - Detection - `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu`
 96 | 
 97 | <details><summary>Performance</summary>
 98 | 
 99 | ### Detection
100 | 
101 | | Model                                                                                  | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
102 | | -------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
103 | | [YOLOv5nu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | 640                   | 34.3                 | 73.6                           | 1.06                                | 2.6                | 7.7               |
104 | | [YOLOv5su](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | 640                   | 43.0                 | 120.7                          | 1.27                                | 9.1                | 24.0              |
105 | | [YOLOv5mu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | 640                   | 49.0                 | 233.9                          | 1.86                                | 25.1               | 64.2              |
106 | | [YOLOv5lu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | 640                   | 52.2                 | 408.4                          | 2.50                                | 53.2               | 135.0             |
107 | | [YOLOv5xu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | 640                   | 53.2                 | 763.2                          | 3.81                                | 97.2               | 246.4             |
108 | 
109 | </details>
110 | 


--------------------------------------------------------------------------------
/ultralytics/nn/autoshape.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | """
  3 | Common modules
  4 | """
  5 | 
  6 | from copy import copy
  7 | from pathlib import Path
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | import pandas as pd
 12 | import requests
 13 | import torch
 14 | import torch.nn as nn
 15 | from PIL import Image, ImageOps
 16 | from torch.cuda import amp
 17 | 
 18 | from ultralytics.nn.autobackend import AutoBackend
 19 | from ultralytics.yolo.data.augment import LetterBox
 20 | from ultralytics.yolo.utils import LOGGER, colorstr
 21 | from ultralytics.yolo.utils.files import increment_path
 22 | from ultralytics.yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh
 23 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
 24 | from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode
 25 | 
 26 | 
 27 | class AutoShape(nn.Module):
 28 |     # YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
 29 |     conf = 0.25  # NMS confidence threshold
 30 |     iou = 0.45  # NMS IoU threshold
 31 |     agnostic = False  # NMS class-agnostic
 32 |     multi_label = False  # NMS multiple labels per box
 33 |     classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
 34 |     max_det = 1000  # maximum number of detections per image
 35 |     amp = False  # Automatic Mixed Precision (AMP) inference
 36 | 
 37 |     def __init__(self, model, verbose=True):
 38 |         super().__init__()
 39 |         if verbose:
 40 |             LOGGER.info('Adding AutoShape... ')
 41 |         copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
 42 |         self.dmb = isinstance(model, AutoBackend)  # DetectMultiBackend() instance
 43 |         self.pt = not self.dmb or model.pt  # PyTorch model
 44 |         self.model = model.eval()
 45 |         if self.pt:
 46 |             m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
 47 |             m.inplace = False  # Detect.inplace=False for safe multithread inference
 48 |             m.export = True  # do not output loss values
 49 | 
 50 |     def _apply(self, fn):
 51 |         # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
 52 |         self = super()._apply(fn)
 53 |         if self.pt:
 54 |             m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
 55 |             m.stride = fn(m.stride)
 56 |             m.grid = list(map(fn, m.grid))
 57 |             if isinstance(m.anchor_grid, list):
 58 |                 m.anchor_grid = list(map(fn, m.anchor_grid))
 59 |         return self
 60 | 
 61 |     @smart_inference_mode()
 62 |     def forward(self, ims, size=640, augment=False, profile=False):
 63 |         # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
 64 |         #   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath
 65 |         #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
 66 |         #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
 67 |         #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
 68 |         #   numpy:           = np.zeros((640,1280,3))  # HWC
 69 |         #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
 70 |         #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
 71 | 
 72 |         dt = (Profile(), Profile(), Profile())
 73 |         with dt[0]:
 74 |             if isinstance(size, int):  # expand
 75 |                 size = (size, size)
 76 |             p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # param
 77 |             autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
 78 |             if isinstance(ims, torch.Tensor):  # torch
 79 |                 with amp.autocast(autocast):
 80 |                     return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference
 81 | 
 82 |             # Pre-process
 83 |             n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
 84 |             shape0, shape1, files = [], [], []  # image and inference shapes, filenames
 85 |             for i, im in enumerate(ims):
 86 |                 f = f'image{i}'  # filename
 87 |                 if isinstance(im, (str, Path)):  # filename or uri
 88 |                     im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
 89 |                     im = np.asarray(ImageOps.exif_transpose(im))
 90 |                 elif isinstance(im, Image.Image):  # PIL Image
 91 |                     im, f = np.asarray(ImageOps.exif_transpose(im)), getattr(im, 'filename', f) or f
 92 |                 files.append(Path(f).with_suffix('.jpg').name)
 93 |                 if im.shape[0] < 5:  # image in CHW
 94 |                     im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
 95 |                 im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
 96 |                 s = im.shape[:2]  # HWC
 97 |                 shape0.append(s)  # image shape
 98 |                 g = max(size) / max(s)  # gain
 99 |                 shape1.append([y * g for y in s])
100 |                 ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
101 |             shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size  # inf shape
102 |             x = [LetterBox(shape1, auto=False)(image=im)['img'] for im in ims]  # pad
103 |             x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHW
104 |             x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
105 | 
106 |         with amp.autocast(autocast):
107 |             # Inference
108 |             with dt[1]:
109 |                 y = self.model(x, augment=augment)  # forward
110 | 
111 |             # Post-process
112 |             with dt[2]:
113 |                 y = non_max_suppression(y if self.dmb else y[0],
114 |                                         self.conf,
115 |                                         self.iou,
116 |                                         self.classes,
117 |                                         self.agnostic,
118 |                                         self.multi_label,
119 |                                         max_det=self.max_det)  # NMS
120 |                 for i in range(n):
121 |                     scale_boxes(shape1, y[i][:, :4], shape0[i])
122 | 
123 |             return Detections(ims, y, files, dt, self.names, x.shape)
124 | 
125 | 
126 | class Detections:
127 |     # YOLOv8 detections class for inference results
128 |     def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
129 |         super().__init__()
130 |         d = pred[0].device  # device
131 |         gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizations
132 |         self.ims = ims  # list of images as numpy arrays
133 |         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
134 |         self.names = names  # class names
135 |         self.files = files  # image filenames
136 |         self.times = times  # profiling times
137 |         self.xyxy = pred  # xyxy pixels
138 |         self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
139 |         self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
140 |         self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
141 |         self.n = len(self.pred)  # number of images (batch size)
142 |         self.t = tuple(x.t / self.n * 1E3 for x in times)  # timestamps (ms)
143 |         self.s = tuple(shape)  # inference BCHW shape
144 | 
145 |     def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
146 |         s, crops = '', []
147 |         for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
148 |             s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
149 |             if pred.shape[0]:
150 |                 for c in pred[:, -1].unique():
151 |                     n = (pred[:, -1] == c).sum()  # detections per class
152 |                     s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
153 |                 s = s.rstrip(', ')
154 |                 if show or save or render or crop:
155 |                     annotator = Annotator(im, example=str(self.names))
156 |                     for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
157 |                         label = f'{self.names[int(cls)]} {conf:.2f}'
158 |                         if crop:
159 |                             file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
160 |                             crops.append({
161 |                                 'box': box,
162 |                                 'conf': conf,
163 |                                 'cls': cls,
164 |                                 'label': label,
165 |                                 'im': save_one_box(box, im, file=file, save=save)})
166 |                         else:  # all others
167 |                             annotator.box_label(box, label if labels else '', color=colors(cls))
168 |                     im = annotator.im
169 |             else:
170 |                 s += '(no detections)'
171 | 
172 |             im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
173 |             if show:
174 |                 im.show(self.files[i])  # show
175 |             if save:
176 |                 f = self.files[i]
177 |                 im.save(save_dir / f)  # save
178 |                 if i == self.n - 1:
179 |                     LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
180 |             if render:
181 |                 self.ims[i] = np.asarray(im)
182 |         if pprint:
183 |             s = s.lstrip('\n')
184 |             return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
185 |         if crop:
186 |             if save:
187 |                 LOGGER.info(f'Saved results to {save_dir}\n')
188 |             return crops
189 | 
190 |     def show(self, labels=True):
191 |         self._run(show=True, labels=labels)  # show results
192 | 
193 |     def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
194 |         save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dir
195 |         self._run(save=True, labels=labels, save_dir=save_dir)  # save results
196 | 
197 |     def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
198 |         save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
199 |         return self._run(crop=True, save=save, save_dir=save_dir)  # crop results
200 | 
201 |     def render(self, labels=True):
202 |         self._run(render=True, labels=labels)  # render results
203 |         return self.ims
204 | 
205 |     def pandas(self):
206 |         # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
207 |         new = copy(self)  # return copy
208 |         ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
209 |         cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
210 |         for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
211 |             a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
212 |             setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
213 |         return new
214 | 
215 |     def tolist(self):
216 |         # return a list of Detections objects, i.e. 'for result in results.tolist():'
217 |         r = range(self.n)  # iterable
218 |         x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
219 |         # for d in x:
220 |         #    for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
221 |         #        setattr(d, k, getattr(d, k)[0])  # pop out of list
222 |         return x
223 | 
224 |     def print(self):
225 |         LOGGER.info(self.__str__())
226 | 
227 |     def __len__(self):  # override len(results)
228 |         return self.n
229 | 
230 |     def __str__(self):  # override print(results)
231 |         return self._run(pprint=True)  # print results
232 | 
233 |     def __repr__(self):
234 |         return f'YOLOv8 {self.__class__} instance\n' + self.__str__()
235 | 
236 | 
237 | print('works')
238 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/utils/gmc.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import cv2
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | 
  7 | 
  8 | class GMC:
  9 | 
 10 |     def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
 11 |         super().__init__()
 12 | 
 13 |         self.method = method
 14 |         self.downscale = max(1, int(downscale))
 15 | 
 16 |         if self.method == 'orb':
 17 |             self.detector = cv2.FastFeatureDetector_create(20)
 18 |             self.extractor = cv2.ORB_create()
 19 |             self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
 20 | 
 21 |         elif self.method == 'sift':
 22 |             self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
 23 |             self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
 24 |             self.matcher = cv2.BFMatcher(cv2.NORM_L2)
 25 | 
 26 |         elif self.method == 'ecc':
 27 |             number_of_iterations = 5000
 28 |             termination_eps = 1e-6
 29 |             self.warp_mode = cv2.MOTION_EUCLIDEAN
 30 |             self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
 31 | 
 32 |         elif self.method == 'sparseOptFlow':
 33 |             self.feature_params = dict(maxCorners=1000,
 34 |                                        qualityLevel=0.01,
 35 |                                        minDistance=1,
 36 |                                        blockSize=3,
 37 |                                        useHarrisDetector=False,
 38 |                                        k=0.04)
 39 |             # self.gmc_file = open('GMC_results.txt', 'w')
 40 | 
 41 |         elif self.method in ['file', 'files']:
 42 |             seqName = verbose[0]
 43 |             ablation = verbose[1]
 44 |             if ablation:
 45 |                 filePath = r'tracker/GMC_files/MOT17_ablation'
 46 |             else:
 47 |                 filePath = r'tracker/GMC_files/MOTChallenge'
 48 | 
 49 |             if '-FRCNN' in seqName:
 50 |                 seqName = seqName[:-6]
 51 |             elif '-DPM' in seqName or '-SDP' in seqName:
 52 |                 seqName = seqName[:-4]
 53 |             self.gmcFile = open(f'{filePath}/GMC-{seqName}.txt')
 54 | 
 55 |             if self.gmcFile is None:
 56 |                 raise ValueError(f'Error: Unable to open GMC file in directory:{filePath}')
 57 |         elif self.method in ['none', 'None']:
 58 |             self.method = 'none'
 59 |         else:
 60 |             raise ValueError(f'Error: Unknown CMC method:{method}')
 61 | 
 62 |         self.prevFrame = None
 63 |         self.prevKeyPoints = None
 64 |         self.prevDescriptors = None
 65 | 
 66 |         self.initializedFirstFrame = False
 67 | 
 68 |     def apply(self, raw_frame, detections=None):
 69 |         if self.method in ['orb', 'sift']:
 70 |             return self.applyFeaures(raw_frame, detections)
 71 |         elif self.method == 'ecc':
 72 |             return self.applyEcc(raw_frame, detections)
 73 |         elif self.method == 'sparseOptFlow':
 74 |             return self.applySparseOptFlow(raw_frame, detections)
 75 |         elif self.method == 'file':
 76 |             return self.applyFile(raw_frame, detections)
 77 |         elif self.method == 'none':
 78 |             return np.eye(2, 3)
 79 |         else:
 80 |             return np.eye(2, 3)
 81 | 
 82 |     def applyEcc(self, raw_frame, detections=None):
 83 | 
 84 |         # Initialize
 85 |         height, width, _ = raw_frame.shape
 86 |         frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
 87 |         H = np.eye(2, 3, dtype=np.float32)
 88 | 
 89 |         # Downscale image (TODO: consider using pyramids)
 90 |         if self.downscale > 1.0:
 91 |             frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
 92 |             frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
 93 |             width = width // self.downscale
 94 |             height = height // self.downscale
 95 | 
 96 |         # Handle first frame
 97 |         if not self.initializedFirstFrame:
 98 |             # Initialize data
 99 |             self.prevFrame = frame.copy()
100 | 
101 |             # Initialization done
102 |             self.initializedFirstFrame = True
103 | 
104 |             return H
105 | 
106 |         # Run the ECC algorithm. The results are stored in warp_matrix.
107 |         # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
108 |         try:
109 |             (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
110 |         except Exception as e:
111 |             print(f'Warning: find transform failed. Set warp as identity {e}')
112 | 
113 |         return H
114 | 
115 |     def applyFeaures(self, raw_frame, detections=None):
116 | 
117 |         # Initialize
118 |         height, width, _ = raw_frame.shape
119 |         frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
120 |         H = np.eye(2, 3)
121 | 
122 |         # Downscale image (TODO: consider using pyramids)
123 |         if self.downscale > 1.0:
124 |             # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
125 |             frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
126 |             width = width // self.downscale
127 |             height = height // self.downscale
128 | 
129 |         # find the keypoints
130 |         mask = np.zeros_like(frame)
131 |         # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
132 |         mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
133 |         if detections is not None:
134 |             for det in detections:
135 |                 tlbr = (det[:4] / self.downscale).astype(np.int_)
136 |                 mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
137 | 
138 |         keypoints = self.detector.detect(frame, mask)
139 | 
140 |         # compute the descriptors
141 |         keypoints, descriptors = self.extractor.compute(frame, keypoints)
142 | 
143 |         # Handle first frame
144 |         if not self.initializedFirstFrame:
145 |             # Initialize data
146 |             self.prevFrame = frame.copy()
147 |             self.prevKeyPoints = copy.copy(keypoints)
148 |             self.prevDescriptors = copy.copy(descriptors)
149 | 
150 |             # Initialization done
151 |             self.initializedFirstFrame = True
152 | 
153 |             return H
154 | 
155 |         # Match descriptors.
156 |         knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
157 | 
158 |         # Filtered matches based on smallest spatial distance
159 |         matches = []
160 |         spatialDistances = []
161 | 
162 |         maxSpatialDistance = 0.25 * np.array([width, height])
163 | 
164 |         # Handle empty matches case
165 |         if len(knnMatches) == 0:
166 |             # Store to next iteration
167 |             self.prevFrame = frame.copy()
168 |             self.prevKeyPoints = copy.copy(keypoints)
169 |             self.prevDescriptors = copy.copy(descriptors)
170 | 
171 |             return H
172 | 
173 |         for m, n in knnMatches:
174 |             if m.distance < 0.9 * n.distance:
175 |                 prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
176 |                 currKeyPointLocation = keypoints[m.trainIdx].pt
177 | 
178 |                 spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
179 |                                    prevKeyPointLocation[1] - currKeyPointLocation[1])
180 | 
181 |                 if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
182 |                         (np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
183 |                     spatialDistances.append(spatialDistance)
184 |                     matches.append(m)
185 | 
186 |         meanSpatialDistances = np.mean(spatialDistances, 0)
187 |         stdSpatialDistances = np.std(spatialDistances, 0)
188 | 
189 |         inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
190 | 
191 |         goodMatches = []
192 |         prevPoints = []
193 |         currPoints = []
194 |         for i in range(len(matches)):
195 |             if inliesrs[i, 0] and inliesrs[i, 1]:
196 |                 goodMatches.append(matches[i])
197 |                 prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
198 |                 currPoints.append(keypoints[matches[i].trainIdx].pt)
199 | 
200 |         prevPoints = np.array(prevPoints)
201 |         currPoints = np.array(currPoints)
202 | 
203 |         # Draw the keypoint matches on the output image
204 |         if 0:
205 |             matches_img = np.hstack((self.prevFrame, frame))
206 |             matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
207 |             W = np.size(self.prevFrame, 1)
208 |             for m in goodMatches:
209 |                 prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
210 |                 curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
211 |                 curr_pt[0] += W
212 |                 color = np.random.randint(0, 255, (3,))
213 |                 color = (int(color[0]), int(color[1]), int(color[2]))
214 | 
215 |                 matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
216 |                 matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
217 |                 matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
218 | 
219 |             plt.figure()
220 |             plt.imshow(matches_img)
221 |             plt.show()
222 | 
223 |         # Find rigid matrix
224 |         if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
225 |             H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
226 | 
227 |             # Handle downscale
228 |             if self.downscale > 1.0:
229 |                 H[0, 2] *= self.downscale
230 |                 H[1, 2] *= self.downscale
231 |         else:
232 |             print('Warning: not enough matching points')
233 | 
234 |         # Store to next iteration
235 |         self.prevFrame = frame.copy()
236 |         self.prevKeyPoints = copy.copy(keypoints)
237 |         self.prevDescriptors = copy.copy(descriptors)
238 | 
239 |         return H
240 | 
241 |     def applySparseOptFlow(self, raw_frame, detections=None):
242 |         # Initialize
243 |         # t0 = time.time()
244 |         height, width, _ = raw_frame.shape
245 |         frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
246 |         H = np.eye(2, 3)
247 | 
248 |         # Downscale image
249 |         if self.downscale > 1.0:
250 |             # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
251 |             frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
252 | 
253 |         # find the keypoints
254 |         keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
255 | 
256 |         # Handle first frame
257 |         if not self.initializedFirstFrame:
258 |             # Initialize data
259 |             self.prevFrame = frame.copy()
260 |             self.prevKeyPoints = copy.copy(keypoints)
261 | 
262 |             # Initialization done
263 |             self.initializedFirstFrame = True
264 | 
265 |             return H
266 | 
267 |         # find correspondences
268 |         matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
269 | 
270 |         # leave good correspondences only
271 |         prevPoints = []
272 |         currPoints = []
273 | 
274 |         for i in range(len(status)):
275 |             if status[i]:
276 |                 prevPoints.append(self.prevKeyPoints[i])
277 |                 currPoints.append(matchedKeypoints[i])
278 | 
279 |         prevPoints = np.array(prevPoints)
280 |         currPoints = np.array(currPoints)
281 | 
282 |         # Find rigid matrix
283 |         if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
284 |             H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
285 | 
286 |             # Handle downscale
287 |             if self.downscale > 1.0:
288 |                 H[0, 2] *= self.downscale
289 |                 H[1, 2] *= self.downscale
290 |         else:
291 |             print('Warning: not enough matching points')
292 | 
293 |         # Store to next iteration
294 |         self.prevFrame = frame.copy()
295 |         self.prevKeyPoints = copy.copy(keypoints)
296 | 
297 |         # gmc_line = str(1000 * (time.time() - t0)) + "\t" + str(H[0, 0]) + "\t" + str(H[0, 1]) + "\t" + str(
298 |         #     H[0, 2]) + "\t" + str(H[1, 0]) + "\t" + str(H[1, 1]) + "\t" + str(H[1, 2]) + "\n"
299 |         # self.gmc_file.write(gmc_line)
300 | 
301 |         return H
302 | 
303 |     def applyFile(self, raw_frame, detections=None):
304 |         line = self.gmcFile.readline()
305 |         tokens = line.split('\t')
306 |         H = np.eye(2, 3, dtype=np.float_)
307 |         H[0, 0] = float(tokens[1])
308 |         H[0, 1] = float(tokens[2])
309 |         H[0, 2] = float(tokens[3])
310 |         H[1, 0] = float(tokens[4])
311 |         H[1, 1] = float(tokens[5])
312 |         H[1, 2] = float(tokens[6])
313 | 
314 |         return H
315 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/trackers/byte_tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from ..utils import matching
  4 | from ..utils.kalman_filter import KalmanFilterXYAH
  5 | from .basetrack import BaseTrack, TrackState
  6 | 
  7 | 
  8 | class STrack(BaseTrack):
  9 |     shared_kalman = KalmanFilterXYAH()
 10 | 
 11 |     def __init__(self, tlwh, score, cls):
 12 | 
 13 |         # wait activate
 14 |         self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
 15 |         self.kalman_filter = None
 16 |         self.mean, self.covariance = None, None
 17 |         self.is_activated = False
 18 | 
 19 |         self.score = score
 20 |         self.tracklet_len = 0
 21 |         self.cls = cls
 22 |         self.idx = tlwh[-1]
 23 | 
 24 |     def predict(self):
 25 |         mean_state = self.mean.copy()
 26 |         if self.state != TrackState.Tracked:
 27 |             mean_state[7] = 0
 28 |         self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
 29 | 
 30 |     @staticmethod
 31 |     def multi_predict(stracks):
 32 |         if len(stracks) <= 0:
 33 |             return
 34 |         multi_mean = np.asarray([st.mean.copy() for st in stracks])
 35 |         multi_covariance = np.asarray([st.covariance for st in stracks])
 36 |         for i, st in enumerate(stracks):
 37 |             if st.state != TrackState.Tracked:
 38 |                 multi_mean[i][7] = 0
 39 |         multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
 40 |         for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 41 |             stracks[i].mean = mean
 42 |             stracks[i].covariance = cov
 43 | 
 44 |     @staticmethod
 45 |     def multi_gmc(stracks, H=np.eye(2, 3)):
 46 |         if len(stracks) > 0:
 47 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 48 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 49 | 
 50 |             R = H[:2, :2]
 51 |             R8x8 = np.kron(np.eye(4, dtype=float), R)
 52 |             t = H[:2, 2]
 53 | 
 54 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 55 |                 mean = R8x8.dot(mean)
 56 |                 mean[:2] += t
 57 |                 cov = R8x8.dot(cov).dot(R8x8.transpose())
 58 | 
 59 |                 stracks[i].mean = mean
 60 |                 stracks[i].covariance = cov
 61 | 
 62 |     def activate(self, kalman_filter, frame_id):
 63 |         """Start a new tracklet"""
 64 |         self.kalman_filter = kalman_filter
 65 |         self.track_id = self.next_id()
 66 |         self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
 67 | 
 68 |         self.tracklet_len = 0
 69 |         self.state = TrackState.Tracked
 70 |         if frame_id == 1:
 71 |             self.is_activated = True
 72 |         self.frame_id = frame_id
 73 |         self.start_frame = frame_id
 74 | 
 75 |     def re_activate(self, new_track, frame_id, new_id=False):
 76 |         self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
 77 |                                                                self.convert_coords(new_track.tlwh))
 78 |         self.tracklet_len = 0
 79 |         self.state = TrackState.Tracked
 80 |         self.is_activated = True
 81 |         self.frame_id = frame_id
 82 |         if new_id:
 83 |             self.track_id = self.next_id()
 84 |         self.score = new_track.score
 85 |         self.cls = new_track.cls
 86 |         self.idx = new_track.idx
 87 | 
 88 |     def update(self, new_track, frame_id):
 89 |         """
 90 |         Update a matched track
 91 |         :type new_track: STrack
 92 |         :type frame_id: int
 93 |         :type update_feature: bool
 94 |         :return:
 95 |         """
 96 |         self.frame_id = frame_id
 97 |         self.tracklet_len += 1
 98 | 
 99 |         new_tlwh = new_track.tlwh
100 |         self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
101 |                                                                self.convert_coords(new_tlwh))
102 |         self.state = TrackState.Tracked
103 |         self.is_activated = True
104 | 
105 |         self.score = new_track.score
106 |         self.cls = new_track.cls
107 |         self.idx = new_track.idx
108 | 
109 |     def convert_coords(self, tlwh):
110 |         return self.tlwh_to_xyah(tlwh)
111 | 
112 |     @property
113 |     def tlwh(self):
114 |         """Get current position in bounding box format `(top left x, top left y,
115 |         width, height)`.
116 |         """
117 |         if self.mean is None:
118 |             return self._tlwh.copy()
119 |         ret = self.mean[:4].copy()
120 |         ret[2] *= ret[3]
121 |         ret[:2] -= ret[2:] / 2
122 |         return ret
123 | 
124 |     @property
125 |     def tlbr(self):
126 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
127 |         `(top left, bottom right)`.
128 |         """
129 |         ret = self.tlwh.copy()
130 |         ret[2:] += ret[:2]
131 |         return ret
132 | 
133 |     @staticmethod
134 |     def tlwh_to_xyah(tlwh):
135 |         """Convert bounding box to format `(center x, center y, aspect ratio,
136 |         height)`, where the aspect ratio is `width / height`.
137 |         """
138 |         ret = np.asarray(tlwh).copy()
139 |         ret[:2] += ret[2:] / 2
140 |         ret[2] /= ret[3]
141 |         return ret
142 | 
143 |     @staticmethod
144 |     def tlbr_to_tlwh(tlbr):
145 |         ret = np.asarray(tlbr).copy()
146 |         ret[2:] -= ret[:2]
147 |         return ret
148 | 
149 |     @staticmethod
150 |     def tlwh_to_tlbr(tlwh):
151 |         ret = np.asarray(tlwh).copy()
152 |         ret[2:] += ret[:2]
153 |         return ret
154 | 
155 |     def __repr__(self):
156 |         return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
157 | 
158 | 
159 | class BYTETracker:
160 | 
161 |     def __init__(self, args, frame_rate=30):
162 |         self.tracked_stracks = []  # type: list[STrack]
163 |         self.lost_stracks = []  # type: list[STrack]
164 |         self.removed_stracks = []  # type: list[STrack]
165 | 
166 |         self.frame_id = 0
167 |         self.args = args
168 |         self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
169 |         self.kalman_filter = self.get_kalmanfilter()
170 | 
171 |     def update(self, results, img=None):
172 |         self.frame_id += 1
173 |         activated_starcks = []
174 |         refind_stracks = []
175 |         lost_stracks = []
176 |         removed_stracks = []
177 | 
178 |         scores = results.conf
179 |         bboxes = results.xyxy
180 |         # add index
181 |         bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
182 |         cls = results.cls
183 | 
184 |         remain_inds = scores > self.args.track_high_thresh
185 |         inds_low = scores > self.args.track_low_thresh
186 |         inds_high = scores < self.args.track_high_thresh
187 | 
188 |         inds_second = np.logical_and(inds_low, inds_high)
189 |         dets_second = bboxes[inds_second]
190 |         dets = bboxes[remain_inds]
191 |         scores_keep = scores[remain_inds]
192 |         scores_second = scores[inds_second]
193 |         cls_keep = cls[remain_inds]
194 |         cls_second = cls[inds_second]
195 | 
196 |         detections = self.init_track(dets, scores_keep, cls_keep, img)
197 |         """ Add newly detected tracklets to tracked_stracks"""
198 |         unconfirmed = []
199 |         tracked_stracks = []  # type: list[STrack]
200 |         for track in self.tracked_stracks:
201 |             if not track.is_activated:
202 |                 unconfirmed.append(track)
203 |             else:
204 |                 tracked_stracks.append(track)
205 |         """ Step 2: First association, with high score detection boxes"""
206 |         strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
207 |         # Predict the current location with KF
208 |         self.multi_predict(strack_pool)
209 |         if hasattr(self, 'gmc'):
210 |             warp = self.gmc.apply(img, dets)
211 |             STrack.multi_gmc(strack_pool, warp)
212 |             STrack.multi_gmc(unconfirmed, warp)
213 | 
214 |         dists = self.get_dists(strack_pool, detections)
215 |         matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
216 | 
217 |         for itracked, idet in matches:
218 |             track = strack_pool[itracked]
219 |             det = detections[idet]
220 |             if track.state == TrackState.Tracked:
221 |                 track.update(det, self.frame_id)
222 |                 activated_starcks.append(track)
223 |             else:
224 |                 track.re_activate(det, self.frame_id, new_id=False)
225 |                 refind_stracks.append(track)
226 |         """ Step 3: Second association, with low score detection boxes"""
227 |         # association the untrack to the low score detections
228 |         detections_second = self.init_track(dets_second, scores_second, cls_second, img)
229 |         r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
230 |         # TODO
231 |         dists = matching.iou_distance(r_tracked_stracks, detections_second)
232 |         matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
233 |         for itracked, idet in matches:
234 |             track = r_tracked_stracks[itracked]
235 |             det = detections_second[idet]
236 |             if track.state == TrackState.Tracked:
237 |                 track.update(det, self.frame_id)
238 |                 activated_starcks.append(track)
239 |             else:
240 |                 track.re_activate(det, self.frame_id, new_id=False)
241 |                 refind_stracks.append(track)
242 | 
243 |         for it in u_track:
244 |             track = r_tracked_stracks[it]
245 |             if track.state != TrackState.Lost:
246 |                 track.mark_lost()
247 |                 lost_stracks.append(track)
248 |         """Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
249 |         detections = [detections[i] for i in u_detection]
250 |         dists = self.get_dists(unconfirmed, detections)
251 |         matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
252 |         for itracked, idet in matches:
253 |             unconfirmed[itracked].update(detections[idet], self.frame_id)
254 |             activated_starcks.append(unconfirmed[itracked])
255 |         for it in u_unconfirmed:
256 |             track = unconfirmed[it]
257 |             track.mark_removed()
258 |             removed_stracks.append(track)
259 |         """ Step 4: Init new stracks"""
260 |         for inew in u_detection:
261 |             track = detections[inew]
262 |             if track.score < self.args.new_track_thresh:
263 |                 continue
264 |             track.activate(self.kalman_filter, self.frame_id)
265 |             activated_starcks.append(track)
266 |         """ Step 5: Update state"""
267 |         for track in self.lost_stracks:
268 |             if self.frame_id - track.end_frame > self.max_time_lost:
269 |                 track.mark_removed()
270 |                 removed_stracks.append(track)
271 | 
272 |         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
273 |         self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_starcks)
274 |         self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
275 |         self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
276 |         self.lost_stracks.extend(lost_stracks)
277 |         self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
278 |         self.removed_stracks.extend(removed_stracks)
279 |         self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
280 |         output = [
281 |             track.tlbr.tolist() + [track.track_id, track.score, track.cls, track.idx] for track in self.tracked_stracks
282 |             if track.is_activated]
283 |         return np.asarray(output, dtype=np.float32)
284 | 
285 |     def get_kalmanfilter(self):
286 |         return KalmanFilterXYAH()
287 | 
288 |     def init_track(self, dets, scores, cls, img=None):
289 |         return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else []  # detections
290 | 
291 |     def get_dists(self, tracks, detections):
292 |         dists = matching.iou_distance(tracks, detections)
293 |         # TODO: mot20
294 |         # if not self.args.mot20:
295 |         dists = matching.fuse_score(dists, detections)
296 |         return dists
297 | 
298 |     def multi_predict(self, tracks):
299 |         STrack.multi_predict(tracks)
300 | 
301 |     @staticmethod
302 |     def joint_stracks(tlista, tlistb):
303 |         exists = {}
304 |         res = []
305 |         for t in tlista:
306 |             exists[t.track_id] = 1
307 |             res.append(t)
308 |         for t in tlistb:
309 |             tid = t.track_id
310 |             if not exists.get(tid, 0):
311 |                 exists[tid] = 1
312 |                 res.append(t)
313 |         return res
314 | 
315 |     @staticmethod
316 |     def sub_stracks(tlista, tlistb):
317 |         stracks = {t.track_id: t for t in tlista}
318 |         for t in tlistb:
319 |             tid = t.track_id
320 |             if stracks.get(tid, 0):
321 |                 del stracks[tid]
322 |         return list(stracks.values())
323 | 
324 |     @staticmethod
325 |     def remove_duplicate_stracks(stracksa, stracksb):
326 |         pdist = matching.iou_distance(stracksa, stracksb)
327 |         pairs = np.where(pdist < 0.15)
328 |         dupa, dupb = [], []
329 |         for p, q in zip(*pairs):
330 |             timep = stracksa[p].frame_id - stracksa[p].start_frame
331 |             timeq = stracksb[q].frame_id - stracksb[q].start_frame
332 |             if timep > timeq:
333 |                 dupb.append(q)
334 |             else:
335 |                 dupa.append(p)
336 |         resa = [t for i, t in enumerate(stracksa) if i not in dupa]
337 |         resb = [t for i, t in enumerate(stracksb) if i not in dupb]
338 |         return resa, resb
339 | 


--------------------------------------------------------------------------------
/ultralytics/tracker/utils/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.linalg
  3 | 
  4 | # Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9)
  5 | # Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold.
  6 | chi2inv95 = {1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919}
  7 | 
  8 | 
  9 | class KalmanFilterXYAH:
 10 |     """
 11 |     For bytetrack
 12 |     A simple Kalman filter for tracking bounding boxes in image space.
 13 | 
 14 |     The 8-dimensional state space
 15 | 
 16 |         x, y, a, h, vx, vy, va, vh
 17 | 
 18 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 19 |     and their respective velocities.
 20 | 
 21 |     Object motion follows a constant velocity model. The bounding box location
 22 |     (x, y, a, h) is taken as direct observation of the state space (linear
 23 |     observation model).
 24 | 
 25 |     """
 26 | 
 27 |     def __init__(self):
 28 |         ndim, dt = 4, 1.
 29 | 
 30 |         # Create Kalman filter model matrices.
 31 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 32 |         for i in range(ndim):
 33 |             self._motion_mat[i, ndim + i] = dt
 34 |         self._update_mat = np.eye(ndim, 2 * ndim)
 35 | 
 36 |         # Motion and observation uncertainty are chosen relative to the current
 37 |         # state estimate. These weights control the amount of uncertainty in
 38 |         # the model. This is a bit hacky.
 39 |         self._std_weight_position = 1. / 20
 40 |         self._std_weight_velocity = 1. / 160
 41 | 
 42 |     def initiate(self, measurement):
 43 |         """Create track from unassociated measurement.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         measurement : ndarray
 48 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 49 |             aspect ratio a, and height h.
 50 | 
 51 |         Returns
 52 |         -------
 53 |         (ndarray, ndarray)
 54 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 55 |             dimensional) of the new track. Unobserved velocities are initialized
 56 |             to 0 mean.
 57 | 
 58 |         """
 59 |         mean_pos = measurement
 60 |         mean_vel = np.zeros_like(mean_pos)
 61 |         mean = np.r_[mean_pos, mean_vel]
 62 | 
 63 |         std = [
 64 |             2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2,
 65 |             2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3],
 66 |             10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]]
 67 |         covariance = np.diag(np.square(std))
 68 |         return mean, covariance
 69 | 
 70 |     def predict(self, mean, covariance):
 71 |         """Run Kalman filter prediction step.
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         mean : ndarray
 76 |             The 8 dimensional mean vector of the object state at the previous
 77 |             time step.
 78 |         covariance : ndarray
 79 |             The 8x8 dimensional covariance matrix of the object state at the
 80 |             previous time step.
 81 | 
 82 |         Returns
 83 |         -------
 84 |         (ndarray, ndarray)
 85 |             Returns the mean vector and covariance matrix of the predicted
 86 |             state. Unobserved velocities are initialized to 0 mean.
 87 | 
 88 |         """
 89 |         std_pos = [
 90 |             self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2,
 91 |             self._std_weight_position * mean[3]]
 92 |         std_vel = [
 93 |             self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5,
 94 |             self._std_weight_velocity * mean[3]]
 95 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
 96 | 
 97 |         # mean = np.dot(self._motion_mat, mean)
 98 |         mean = np.dot(mean, self._motion_mat.T)
 99 |         covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
100 | 
101 |         return mean, covariance
102 | 
103 |     def project(self, mean, covariance):
104 |         """Project state distribution to measurement space.
105 | 
106 |         Parameters
107 |         ----------
108 |         mean : ndarray
109 |             The state's mean vector (8 dimensional array).
110 |         covariance : ndarray
111 |             The state's covariance matrix (8x8 dimensional).
112 | 
113 |         Returns
114 |         -------
115 |         (ndarray, ndarray)
116 |             Returns the projected mean and covariance matrix of the given state
117 |             estimate.
118 | 
119 |         """
120 |         std = [
121 |             self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1,
122 |             self._std_weight_position * mean[3]]
123 |         innovation_cov = np.diag(np.square(std))
124 | 
125 |         mean = np.dot(self._update_mat, mean)
126 |         covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
127 |         return mean, covariance + innovation_cov
128 | 
129 |     def multi_predict(self, mean, covariance):
130 |         """Run Kalman filter prediction step (Vectorized version).
131 |         Parameters
132 |         ----------
133 |         mean : ndarray
134 |             The Nx8 dimensional mean matrix of the object states at the previous
135 |             time step.
136 |         covariance : ndarray
137 |             The Nx8x8 dimensional covariance matrics of the object states at the
138 |             previous time step.
139 |         Returns
140 |         -------
141 |         (ndarray, ndarray)
142 |             Returns the mean vector and covariance matrix of the predicted
143 |             state. Unobserved velocities are initialized to 0 mean.
144 |         """
145 |         std_pos = [
146 |             self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3],
147 |             1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]]
148 |         std_vel = [
149 |             self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3],
150 |             1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]]
151 |         sqr = np.square(np.r_[std_pos, std_vel]).T
152 | 
153 |         motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
154 |         motion_cov = np.asarray(motion_cov)
155 | 
156 |         mean = np.dot(mean, self._motion_mat.T)
157 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
158 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
159 | 
160 |         return mean, covariance
161 | 
162 |     def update(self, mean, covariance, measurement):
163 |         """Run Kalman filter correction step.
164 | 
165 |         Parameters
166 |         ----------
167 |         mean : ndarray
168 |             The predicted state's mean vector (8 dimensional).
169 |         covariance : ndarray
170 |             The state's covariance matrix (8x8 dimensional).
171 |         measurement : ndarray
172 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
173 |             is the center position, a the aspect ratio, and h the height of the
174 |             bounding box.
175 | 
176 |         Returns
177 |         -------
178 |         (ndarray, ndarray)
179 |             Returns the measurement-corrected state distribution.
180 | 
181 |         """
182 |         projected_mean, projected_cov = self.project(mean, covariance)
183 | 
184 |         chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
185 |         kalman_gain = scipy.linalg.cho_solve((chol_factor, lower),
186 |                                              np.dot(covariance, self._update_mat.T).T,
187 |                                              check_finite=False).T
188 |         innovation = measurement - projected_mean
189 | 
190 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
191 |         new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
192 |         return new_mean, new_covariance
193 | 
194 |     def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
195 |         """Compute gating distance between state distribution and measurements.
196 |         A suitable distance threshold can be obtained from `chi2inv95`. If
197 |         `only_position` is False, the chi-square distribution has 4 degrees of
198 |         freedom, otherwise 2.
199 |         Parameters
200 |         ----------
201 |         mean : ndarray
202 |             Mean vector over the state distribution (8 dimensional).
203 |         covariance : ndarray
204 |             Covariance of the state distribution (8x8 dimensional).
205 |         measurements : ndarray
206 |             An Nx4 dimensional matrix of N measurements, each in
207 |             format (x, y, a, h) where (x, y) is the bounding box center
208 |             position, a the aspect ratio, and h the height.
209 |         only_position : Optional[bool]
210 |             If True, distance computation is done with respect to the bounding
211 |             box center position only.
212 |         Returns
213 |         -------
214 |         ndarray
215 |             Returns an array of length N, where the i-th element contains the
216 |             squared Mahalanobis distance between (mean, covariance) and
217 |             `measurements[i]`.
218 |         """
219 |         mean, covariance = self.project(mean, covariance)
220 |         if only_position:
221 |             mean, covariance = mean[:2], covariance[:2, :2]
222 |             measurements = measurements[:, :2]
223 | 
224 |         d = measurements - mean
225 |         if metric == 'gaussian':
226 |             return np.sum(d * d, axis=1)
227 |         elif metric == 'maha':
228 |             cholesky_factor = np.linalg.cholesky(covariance)
229 |             z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
230 |             return np.sum(z * z, axis=0)  # square maha
231 |         else:
232 |             raise ValueError('invalid distance metric')
233 | 
234 | 
235 | class KalmanFilterXYWH:
236 |     """
237 |     For bot-sort
238 |     A simple Kalman filter for tracking bounding boxes in image space.
239 | 
240 |     The 8-dimensional state space
241 | 
242 |         x, y, w, h, vx, vy, vw, vh
243 | 
244 |     contains the bounding box center position (x, y), width w, height h,
245 |     and their respective velocities.
246 | 
247 |     Object motion follows a constant velocity model. The bounding box location
248 |     (x, y, w, h) is taken as direct observation of the state space (linear
249 |     observation model).
250 | 
251 |     """
252 | 
253 |     def __init__(self):
254 |         ndim, dt = 4, 1.
255 | 
256 |         # Create Kalman filter model matrices.
257 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
258 |         for i in range(ndim):
259 |             self._motion_mat[i, ndim + i] = dt
260 |         self._update_mat = np.eye(ndim, 2 * ndim)
261 | 
262 |         # Motion and observation uncertainty are chosen relative to the current
263 |         # state estimate. These weights control the amount of uncertainty in
264 |         # the model. This is a bit hacky.
265 |         self._std_weight_position = 1. / 20
266 |         self._std_weight_velocity = 1. / 160
267 | 
268 |     def initiate(self, measurement):
269 |         """Create track from unassociated measurement.
270 | 
271 |         Parameters
272 |         ----------
273 |         measurement : ndarray
274 |             Bounding box coordinates (x, y, w, h) with center position (x, y),
275 |             width w, and height h.
276 | 
277 |         Returns
278 |         -------
279 |         (ndarray, ndarray)
280 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
281 |             dimensional) of the new track. Unobserved velocities are initialized
282 |             to 0 mean.
283 | 
284 |         """
285 |         mean_pos = measurement
286 |         mean_vel = np.zeros_like(mean_pos)
287 |         mean = np.r_[mean_pos, mean_vel]
288 | 
289 |         std = [
290 |             2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
291 |             2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
292 |             10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3],
293 |             10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3]]
294 |         covariance = np.diag(np.square(std))
295 |         return mean, covariance
296 | 
297 |     def predict(self, mean, covariance):
298 |         """Run Kalman filter prediction step.
299 | 
300 |         Parameters
301 |         ----------
302 |         mean : ndarray
303 |             The 8 dimensional mean vector of the object state at the previous
304 |             time step.
305 |         covariance : ndarray
306 |             The 8x8 dimensional covariance matrix of the object state at the
307 |             previous time step.
308 | 
309 |         Returns
310 |         -------
311 |         (ndarray, ndarray)
312 |             Returns the mean vector and covariance matrix of the predicted
313 |             state. Unobserved velocities are initialized to 0 mean.
314 | 
315 |         """
316 |         std_pos = [
317 |             self._std_weight_position * mean[2], self._std_weight_position * mean[3],
318 |             self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
319 |         std_vel = [
320 |             self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3],
321 |             self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3]]
322 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
323 | 
324 |         mean = np.dot(mean, self._motion_mat.T)
325 |         covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
326 | 
327 |         return mean, covariance
328 | 
329 |     def project(self, mean, covariance):
330 |         """Project state distribution to measurement space.
331 | 
332 |         Parameters
333 |         ----------
334 |         mean : ndarray
335 |             The state's mean vector (8 dimensional array).
336 |         covariance : ndarray
337 |             The state's covariance matrix (8x8 dimensional).
338 | 
339 |         Returns
340 |         -------
341 |         (ndarray, ndarray)
342 |             Returns the projected mean and covariance matrix of the given state
343 |             estimate.
344 | 
345 |         """
346 |         std = [
347 |             self._std_weight_position * mean[2], self._std_weight_position * mean[3],
348 |             self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
349 |         innovation_cov = np.diag(np.square(std))
350 | 
351 |         mean = np.dot(self._update_mat, mean)
352 |         covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
353 |         return mean, covariance + innovation_cov
354 | 
355 |     def multi_predict(self, mean, covariance):
356 |         """Run Kalman filter prediction step (Vectorized version).
357 |         Parameters
358 |         ----------
359 |         mean : ndarray
360 |             The Nx8 dimensional mean matrix of the object states at the previous
361 |             time step.
362 |         covariance : ndarray
363 |             The Nx8x8 dimensional covariance matrics of the object states at the
364 |             previous time step.
365 |         Returns
366 |         -------
367 |         (ndarray, ndarray)
368 |             Returns the mean vector and covariance matrix of the predicted
369 |             state. Unobserved velocities are initialized to 0 mean.
370 |         """
371 |         std_pos = [
372 |             self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3],
373 |             self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3]]
374 |         std_vel = [
375 |             self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3],
376 |             self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3]]
377 |         sqr = np.square(np.r_[std_pos, std_vel]).T
378 | 
379 |         motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
380 |         motion_cov = np.asarray(motion_cov)
381 | 
382 |         mean = np.dot(mean, self._motion_mat.T)
383 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
384 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
385 | 
386 |         return mean, covariance
387 | 
388 |     def update(self, mean, covariance, measurement):
389 |         """Run Kalman filter correction step.
390 | 
391 |         Parameters
392 |         ----------
393 |         mean : ndarray
394 |             The predicted state's mean vector (8 dimensional).
395 |         covariance : ndarray
396 |             The state's covariance matrix (8x8 dimensional).
397 |         measurement : ndarray
398 |             The 4 dimensional measurement vector (x, y, w, h), where (x, y)
399 |             is the center position, w the width, and h the height of the
400 |             bounding box.
401 | 
402 |         Returns
403 |         -------
404 |         (ndarray, ndarray)
405 |             Returns the measurement-corrected state distribution.
406 | 
407 |         """
408 |         projected_mean, projected_cov = self.project(mean, covariance)
409 | 
410 |         chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
411 |         kalman_gain = scipy.linalg.cho_solve((chol_factor, lower),
412 |                                              np.dot(covariance, self._update_mat.T).T,
413 |                                              check_finite=False).T
414 |         innovation = measurement - projected_mean
415 | 
416 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
417 |         new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
418 |         return new_mean, new_covariance
419 | 
420 |     def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
421 |         """Compute gating distance between state distribution and measurements.
422 |         A suitable distance threshold can be obtained from `chi2inv95`. If
423 |         `only_position` is False, the chi-square distribution has 4 degrees of
424 |         freedom, otherwise 2.
425 |         Parameters
426 |         ----------
427 |         mean : ndarray
428 |             Mean vector over the state distribution (8 dimensional).
429 |         covariance : ndarray
430 |             Covariance of the state distribution (8x8 dimensional).
431 |         measurements : ndarray
432 |             An Nx4 dimensional matrix of N measurements, each in
433 |             format (x, y, a, h) where (x, y) is the bounding box center
434 |             position, a the aspect ratio, and h the height.
435 |         only_position : Optional[bool]
436 |             If True, distance computation is done with respect to the bounding
437 |             box center position only.
438 |         Returns
439 |         -------
440 |         ndarray
441 |             Returns an array of length N, where the i-th element contains the
442 |             squared Mahalanobis distance between (mean, covariance) and
443 |             `measurements[i]`.
444 |         """
445 |         mean, covariance = self.project(mean, covariance)
446 |         if only_position:
447 |             mean, covariance = mean[:2], covariance[:2, :2]
448 |             measurements = measurements[:, :2]
449 | 
450 |         d = measurements - mean
451 |         if metric == 'gaussian':
452 |             return np.sum(d * d, axis=1)
453 |         elif metric == 'maha':
454 |             cholesky_factor = np.linalg.cholesky(covariance)
455 |             z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
456 |             return np.sum(z * z, axis=0)  # square maha
457 |         else:
458 |             raise ValueError('invalid distance metric')
459 | 


--------------------------------------------------------------------------------
/ultralytics/nn/modules.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | """
  3 | Common modules
  4 | """
  5 | 
  6 | import math
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from ultralytics.yolo.utils.tal import dist2bbox, make_anchors
 12 | 
 13 | 
 14 | def autopad(k, p=None, d=1):  # kernel, padding, dilation
 15 |     # Pad to 'same' shape outputs
 16 |     if d > 1:
 17 |         k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
 18 |     if p is None:
 19 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
 20 |     return p
 21 | 
 22 | 
 23 | class Conv(nn.Module):
 24 |     # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
 25 |     default_act = nn.SiLU()  # default activation
 26 | 
 27 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
 28 |         super().__init__()
 29 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
 30 |         self.bn = nn.BatchNorm2d(c2)
 31 |         self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
 32 | 
 33 |     def forward(self, x):
 34 |         return self.act(self.bn(self.conv(x)))
 35 | 
 36 |     def forward_fuse(self, x):
 37 |         return self.act(self.conv(x))
 38 | 
 39 | 
 40 | class DWConv(Conv):
 41 |     # Depth-wise convolution
 42 |     def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
 43 |         super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
 44 | 
 45 | 
 46 | class DWConvTranspose2d(nn.ConvTranspose2d):
 47 |     # Depth-wise transpose convolution
 48 |     def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
 49 |         super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
 50 | 
 51 | 
 52 | class ConvTranspose(nn.Module):
 53 |     # Convolution transpose 2d layer
 54 |     default_act = nn.SiLU()  # default activation
 55 | 
 56 |     def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
 57 |         super().__init__()
 58 |         self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
 59 |         self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
 60 |         self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
 61 | 
 62 |     def forward(self, x):
 63 |         return self.act(self.bn(self.conv_transpose(x)))
 64 | 
 65 |     def forward_fuse(self, x):
 66 |         return self.act(self.conv_transpose(x))
 67 | 
 68 | 
 69 | class DFL(nn.Module):
 70 |     # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
 71 |     def __init__(self, c1=16):
 72 |         super().__init__()
 73 |         self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
 74 |         x = torch.arange(c1, dtype=torch.float)
 75 |         self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
 76 |         self.c1 = c1
 77 | 
 78 |     def forward(self, x):
 79 |         b, c, a = x.shape  # batch, channels, anchors
 80 |         return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
 81 |         # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
 82 | 
 83 | 
 84 | class TransformerLayer(nn.Module):
 85 |     # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
 86 |     def __init__(self, c, num_heads):
 87 |         super().__init__()
 88 |         self.q = nn.Linear(c, c, bias=False)
 89 |         self.k = nn.Linear(c, c, bias=False)
 90 |         self.v = nn.Linear(c, c, bias=False)
 91 |         self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
 92 |         self.fc1 = nn.Linear(c, c, bias=False)
 93 |         self.fc2 = nn.Linear(c, c, bias=False)
 94 | 
 95 |     def forward(self, x):
 96 |         x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
 97 |         x = self.fc2(self.fc1(x)) + x
 98 |         return x
 99 | 
100 | 
101 | class TransformerBlock(nn.Module):
102 |     # Vision Transformer https://arxiv.org/abs/2010.11929
103 |     def __init__(self, c1, c2, num_heads, num_layers):
104 |         super().__init__()
105 |         self.conv = None
106 |         if c1 != c2:
107 |             self.conv = Conv(c1, c2)
108 |         self.linear = nn.Linear(c2, c2)  # learnable position embedding
109 |         self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
110 |         self.c2 = c2
111 | 
112 |     def forward(self, x):
113 |         if self.conv is not None:
114 |             x = self.conv(x)
115 |         b, _, w, h = x.shape
116 |         p = x.flatten(2).permute(2, 0, 1)
117 |         return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
118 | 
119 | 
120 | class Bottleneck(nn.Module):
121 |     # Standard bottleneck
122 |     def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):  # ch_in, ch_out, shortcut, groups, kernels, expand
123 |         super().__init__()
124 |         c_ = int(c2 * e)  # hidden channels
125 |         self.cv1 = Conv(c1, c_, k[0], 1)
126 |         self.cv2 = Conv(c_, c2, k[1], 1, g=g)
127 |         self.add = shortcut and c1 == c2
128 | 
129 |     def forward(self, x):
130 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
131 | 
132 | 
133 | class BottleneckCSP(nn.Module):
134 |     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
135 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
136 |         super().__init__()
137 |         c_ = int(c2 * e)  # hidden channels
138 |         self.cv1 = Conv(c1, c_, 1, 1)
139 |         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
140 |         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
141 |         self.cv4 = Conv(2 * c_, c2, 1, 1)
142 |         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
143 |         self.act = nn.SiLU()
144 |         self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
145 | 
146 |     def forward(self, x):
147 |         y1 = self.cv3(self.m(self.cv1(x)))
148 |         y2 = self.cv2(x)
149 |         return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
150 | 
151 | 
152 | class C3(nn.Module):
153 |     # CSP Bottleneck with 3 convolutions
154 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
155 |         super().__init__()
156 |         c_ = int(c2 * e)  # hidden channels
157 |         self.cv1 = Conv(c1, c_, 1, 1)
158 |         self.cv2 = Conv(c1, c_, 1, 1)
159 |         self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
160 |         self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
161 | 
162 |     def forward(self, x):
163 |         return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
164 | 
165 | 
166 | class C2(nn.Module):
167 |     # CSP Bottleneck with 2 convolutions
168 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
169 |         super().__init__()
170 |         self.c = int(c2 * e)  # hidden channels
171 |         self.cv1 = Conv(c1, 2 * self.c, 1, 1)
172 |         self.cv2 = Conv(2 * self.c, c2, 1)  # optional act=FReLU(c2)
173 |         # self.attention = ChannelAttention(2 * self.c)  # or SpatialAttention()
174 |         self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
175 | 
176 |     def forward(self, x):
177 |         a, b = self.cv1(x).split((self.c, self.c), 1)
178 |         return self.cv2(torch.cat((self.m(a), b), 1))
179 | 
180 | 
181 | class C2f(nn.Module):
182 |     # CSP Bottleneck with 2 convolutions
183 |     def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
184 |         super().__init__()
185 |         self.c = int(c2 * e)  # hidden channels
186 |         self.cv1 = Conv(c1, 2 * self.c, 1, 1)
187 |         self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
188 |         self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
189 | 
190 |     def forward(self, x):
191 |         y = list(self.cv1(x).split((self.c, self.c), 1))
192 |         y.extend(m(y[-1]) for m in self.m)
193 |         return self.cv2(torch.cat(y, 1))
194 | 
195 | 
196 | class ChannelAttention(nn.Module):
197 |     # Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet
198 |     def __init__(self, channels: int) -> None:
199 |         super().__init__()
200 |         self.pool = nn.AdaptiveAvgPool2d(1)
201 |         self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
202 |         self.act = nn.Sigmoid()
203 | 
204 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
205 |         return x * self.act(self.fc(self.pool(x)))
206 | 
207 | 
208 | class SpatialAttention(nn.Module):
209 |     # Spatial-attention module
210 |     def __init__(self, kernel_size=7):
211 |         super().__init__()
212 |         assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
213 |         padding = 3 if kernel_size == 7 else 1
214 |         self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
215 |         self.act = nn.Sigmoid()
216 | 
217 |     def forward(self, x):
218 |         return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))
219 | 
220 | 
221 | class CBAM(nn.Module):
222 |     # Convolutional Block Attention Module
223 |     def __init__(self, c1, kernel_size=7):  # ch_in, kernels
224 |         super().__init__()
225 |         self.channel_attention = ChannelAttention(c1)
226 |         self.spatial_attention = SpatialAttention(kernel_size)
227 | 
228 |     def forward(self, x):
229 |         return self.spatial_attention(self.channel_attention(x))
230 | 
231 | 
232 | class C1(nn.Module):
233 |     # CSP Bottleneck with 1 convolution
234 |     def __init__(self, c1, c2, n=1):  # ch_in, ch_out, number
235 |         super().__init__()
236 |         self.cv1 = Conv(c1, c2, 1, 1)
237 |         self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
238 | 
239 |     def forward(self, x):
240 |         y = self.cv1(x)
241 |         return self.m(y) + y
242 | 
243 | 
244 | class C3x(C3):
245 |     # C3 module with cross-convolutions
246 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
247 |         super().__init__(c1, c2, n, shortcut, g, e)
248 |         self.c_ = int(c2 * e)
249 |         self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))
250 | 
251 | 
252 | class C3TR(C3):
253 |     # C3 module with TransformerBlock()
254 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
255 |         super().__init__(c1, c2, n, shortcut, g, e)
256 |         c_ = int(c2 * e)
257 |         self.m = TransformerBlock(c_, c_, 4, n)
258 | 
259 | 
260 | class C3Ghost(C3):
261 |     # C3 module with GhostBottleneck()
262 |     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
263 |         super().__init__(c1, c2, n, shortcut, g, e)
264 |         c_ = int(c2 * e)  # hidden channels
265 |         self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
266 | 
267 | 
268 | class SPP(nn.Module):
269 |     # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
270 |     def __init__(self, c1, c2, k=(5, 9, 13)):
271 |         super().__init__()
272 |         c_ = c1 // 2  # hidden channels
273 |         self.cv1 = Conv(c1, c_, 1, 1)
274 |         self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
275 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
276 | 
277 |     def forward(self, x):
278 |         x = self.cv1(x)
279 |         return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
280 | 
281 | 
282 | class SPPF(nn.Module):
283 |     # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
284 |     def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
285 |         super().__init__()
286 |         c_ = c1 // 2  # hidden channels
287 |         self.cv1 = Conv(c1, c_, 1, 1)
288 |         self.cv2 = Conv(c_ * 4, c2, 1, 1)
289 |         self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
290 | 
291 |     def forward(self, x):
292 |         x = self.cv1(x)
293 |         y1 = self.m(x)
294 |         y2 = self.m(y1)
295 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
296 | 
297 | 
298 | class Focus(nn.Module):
299 |     # Focus wh information into c-space
300 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
301 |         super().__init__()
302 |         self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
303 |         # self.contract = Contract(gain=2)
304 | 
305 |     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
306 |         return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
307 |         # return self.conv(self.contract(x))
308 | 
309 | 
310 | class GhostConv(nn.Module):
311 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
312 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
313 |         super().__init__()
314 |         c_ = c2 // 2  # hidden channels
315 |         self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
316 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
317 | 
318 |     def forward(self, x):
319 |         y = self.cv1(x)
320 |         return torch.cat((y, self.cv2(y)), 1)
321 | 
322 | 
323 | class GhostBottleneck(nn.Module):
324 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
325 |     def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
326 |         super().__init__()
327 |         c_ = c2 // 2
328 |         self.conv = nn.Sequential(
329 |             GhostConv(c1, c_, 1, 1),  # pw
330 |             DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
331 |             GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
332 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
333 |                                                                             act=False)) if s == 2 else nn.Identity()
334 | 
335 |     def forward(self, x):
336 |         return self.conv(x) + self.shortcut(x)
337 | 
338 | 
339 | class Concat(nn.Module):
340 |     # Concatenate a list of tensors along dimension
341 |     def __init__(self, dimension=1):
342 |         super().__init__()
343 |         self.d = dimension
344 | 
345 |     def forward(self, x):
346 |         return torch.cat(x, self.d)
347 | 
348 | 
349 | class Proto(nn.Module):
350 |     # YOLOv8 mask Proto module for segmentation models
351 |     def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
352 |         super().__init__()
353 |         self.cv1 = Conv(c1, c_, k=3)
354 |         self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
355 |         self.cv2 = Conv(c_, c_, k=3)
356 |         self.cv3 = Conv(c_, c2)
357 | 
358 |     def forward(self, x):
359 |         return self.cv3(self.cv2(self.upsample(self.cv1(x))))
360 | 
361 | 
362 | class Ensemble(nn.ModuleList):
363 |     # Ensemble of models
364 |     def __init__(self):
365 |         super().__init__()
366 | 
367 |     def forward(self, x, augment=False, profile=False, visualize=False):
368 |         y = [module(x, augment, profile, visualize)[0] for module in self]
369 |         # y = torch.stack(y).max(0)[0]  # max ensemble
370 |         # y = torch.stack(y).mean(0)  # mean ensemble
371 |         y = torch.cat(y, 1)  # nms ensemble
372 |         return y, None  # inference, train output
373 | 
374 | 
375 | # heads
376 | class Detect(nn.Module):
377 |     # YOLOv8 Detect head for detection models
378 |     dynamic = False  # force grid reconstruction
379 |     export = False  # export mode
380 |     shape = None
381 |     anchors = torch.empty(0)  # init
382 |     strides = torch.empty(0)  # init
383 | 
384 |     def __init__(self, nc=80, ch=()):  # detection layer
385 |         super().__init__()
386 |         self.nc = nc  # number of classes
387 |         self.nl = len(ch)  # number of detection layers
388 |         self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
389 |         self.no = nc + self.reg_max * 4  # number of outputs per anchor
390 |         self.stride = torch.zeros(self.nl)  # strides computed during build
391 | 
392 |         c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
393 |         self.cv2 = nn.ModuleList(
394 |             nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
395 |         self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
396 |         self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
397 | 
398 |     def forward(self, x):
399 |         shape = x[0].shape  # BCHW
400 |         for i in range(self.nl):
401 |             x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
402 |         if self.training:
403 |             return x
404 |         elif self.dynamic or self.shape != shape:
405 |             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
406 |             self.shape = shape
407 | 
408 |         box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
409 |         dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
410 |         y = torch.cat((dbox, cls.sigmoid()), 1)
411 |         return y if self.export else (y, x)
412 | 
413 |     def bias_init(self):
414 |         # Initialize Detect() biases, WARNING: requires stride availability
415 |         m = self  # self.model[-1]  # Detect() module
416 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
417 |         # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
418 |         for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
419 |             a[-1].bias.data[:] = 1.0  # box
420 |             b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
421 | 
422 | 
423 | class Segment(Detect):
424 |     # YOLOv8 Segment head for segmentation models
425 |     def __init__(self, nc=80, nm=32, npr=256, ch=()):
426 |         super().__init__(nc, ch)
427 |         self.nm = nm  # number of masks
428 |         self.npr = npr  # number of protos
429 |         self.proto = Proto(ch[0], self.npr, self.nm)  # protos
430 |         self.detect = Detect.forward
431 | 
432 |         c4 = max(ch[0] // 4, self.nm)
433 |         self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
434 | 
435 |     def forward(self, x):
436 |         p = self.proto(x[0])  # mask protos
437 |         bs = p.shape[0]  # batch size
438 | 
439 |         mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  # mask coefficients
440 |         x = self.detect(self, x)
441 |         if self.training:
442 |             return x, mc, p
443 |         return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
444 | 
445 | 
446 | class Classify(nn.Module):
447 |     # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
448 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
449 |         super().__init__()
450 |         c_ = 1280  # efficientnet_b0 size
451 |         self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
452 |         self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
453 |         self.drop = nn.Dropout(p=0.0, inplace=True)
454 |         self.linear = nn.Linear(c_, c2)  # to x(b,c2)
455 | 
456 |     def forward(self, x):
457 |         if isinstance(x, list):
458 |             x = torch.cat(x, 1)
459 |         x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
460 |         return x if self.training else x.softmax(1)
461 | 


--------------------------------------------------------------------------------
/ultralytics/nn/autobackend.py:
--------------------------------------------------------------------------------
  1 | # Ultralytics YOLO 🚀, GPL-3.0 license
  2 | import ast
  3 | import contextlib
  4 | import json
  5 | import platform
  6 | import zipfile
  7 | from collections import OrderedDict, namedtuple
  8 | from pathlib import Path
  9 | from urllib.parse import urlparse
 10 | 
 11 | import cv2
 12 | import numpy as np
 13 | import torch
 14 | import torch.nn as nn
 15 | from PIL import Image
 16 | 
 17 | from ultralytics.yolo.utils import LOGGER, ROOT, yaml_load
 18 | from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_version, check_yaml
 19 | from ultralytics.yolo.utils.downloads import attempt_download_asset, is_url
 20 | from ultralytics.yolo.utils.ops import xywh2xyxy
 21 | 
 22 | 
 23 | def check_class_names(names):
 24 |     # Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts.
 25 |     if isinstance(names, list):  # names is a list
 26 |         names = dict(enumerate(names))  # convert to dict
 27 |     if isinstance(names, dict):
 28 |         if not all(isinstance(k, int) for k in names.keys()):  # convert string keys to int, i.e. '0' to 0
 29 |             names = {int(k): v for k, v in names.items()}
 30 |         if isinstance(names[0], str) and names[0].startswith('n0'):  # imagenet class codes, i.e. 'n01440764'
 31 |             map = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['map']  # human-readable names
 32 |             names = {k: map[v] for k, v in names.items()}
 33 |     return names
 34 | 
 35 | 
 36 | class AutoBackend(nn.Module):
 37 | 
 38 |     def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
 39 |         """
 40 |         MultiBackend class for python inference on various platforms using Ultralytics YOLO.
 41 | 
 42 |         Args:
 43 |             weights (str): The path to the weights file. Default: 'yolov8n.pt'
 44 |             device (torch.device): The device to run the model on.
 45 |             dnn (bool): Use OpenCV's DNN module for inference if True, defaults to False.
 46 |             data (str), (Path): Additional data.yaml file for class names, optional
 47 |             fp16 (bool): If True, use half precision. Default: False
 48 |             fuse (bool): Whether to fuse the model or not. Default: True
 49 | 
 50 |         Supported formats and their naming conventions:
 51 |             | Format                | Suffix           |
 52 |             |-----------------------|------------------|
 53 |             | PyTorch               | *.pt             |
 54 |             | TorchScript           | *.torchscript    |
 55 |             | ONNX Runtime          | *.onnx           |
 56 |             | ONNX OpenCV DNN       | *.onnx --dnn     |
 57 |             | OpenVINO              | *.xml            |
 58 |             | CoreML                | *.mlmodel        |
 59 |             | TensorRT              | *.engine         |
 60 |             | TensorFlow SavedModel | *_saved_model    |
 61 |             | TensorFlow GraphDef   | *.pb             |
 62 |             | TensorFlow Lite       | *.tflite         |
 63 |             | TensorFlow Edge TPU   | *_edgetpu.tflite |
 64 |             | PaddlePaddle          | *_paddle_model   |
 65 |         """
 66 |         super().__init__()
 67 |         w = str(weights[0] if isinstance(weights, list) else weights)
 68 |         nn_module = isinstance(weights, torch.nn.Module)
 69 |         pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
 70 |         fp16 &= pt or jit or onnx or engine or nn_module  # FP16
 71 |         nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)
 72 |         stride = 32  # default stride
 73 |         model = None  # TODO: resolves ONNX inference, verify effect on other backends
 74 |         cuda = torch.cuda.is_available() and device.type != 'cpu'  # use CUDA
 75 |         if not (pt or triton or nn_module):
 76 |             w = attempt_download_asset(w)  # download if not local
 77 | 
 78 |         # NOTE: special case: in-memory pytorch model
 79 |         if nn_module:
 80 |             model = weights.to(device)
 81 |             model = model.fuse() if fuse else model
 82 |             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
 83 |             stride = max(int(model.stride.max()), 32)  # model stride
 84 |             model.half() if fp16 else model.float()
 85 |             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
 86 |             pt = True
 87 |         elif pt:  # PyTorch
 88 |             from ultralytics.nn.tasks import attempt_load_weights
 89 |             model = attempt_load_weights(weights if isinstance(weights, list) else w,
 90 |                                          device=device,
 91 |                                          inplace=True,
 92 |                                          fuse=fuse)
 93 |             stride = max(int(model.stride.max()), 32)  # model stride
 94 |             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
 95 |             model.half() if fp16 else model.float()
 96 |             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
 97 |         elif jit:  # TorchScript
 98 |             LOGGER.info(f'Loading {w} for TorchScript inference...')
 99 |             extra_files = {'config.txt': ''}  # model metadata
100 |             model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
101 |             model.half() if fp16 else model.float()
102 |             if extra_files['config.txt']:  # load metadata dict
103 |                 d = json.loads(extra_files['config.txt'],
104 |                                object_hook=lambda d: {int(k) if k.isdigit() else k: v
105 |                                                       for k, v in d.items()})
106 |                 stride, names = int(d['stride']), d['names']
107 |         elif dnn:  # ONNX OpenCV DNN
108 |             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
109 |             check_requirements('opencv-python>=4.5.4')
110 |             net = cv2.dnn.readNetFromONNX(w)
111 |         elif onnx:  # ONNX Runtime
112 |             LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
113 |             check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
114 |             import onnxruntime
115 |             providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
116 |             session = onnxruntime.InferenceSession(w, providers=providers)
117 |             output_names = [x.name for x in session.get_outputs()]
118 |             meta = session.get_modelmeta().custom_metadata_map  # metadata
119 |             if 'stride' in meta:
120 |                 stride, names = int(meta['stride']), eval(meta['names'])
121 |         elif xml:  # OpenVINO
122 |             LOGGER.info(f'Loading {w} for OpenVINO inference...')
123 |             check_requirements('openvino')  # requires openvino-dev: https://pypi.org/project/openvino-dev/
124 |             from openvino.runtime import Core, Layout, get_batch  # noqa
125 |             ie = Core()
126 |             if not Path(w).is_file():  # if not *.xml
127 |                 w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
128 |             network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
129 |             if network.get_parameters()[0].get_layout().empty:
130 |                 network.get_parameters()[0].set_layout(Layout('NCHW'))
131 |             batch_dim = get_batch(network)
132 |             if batch_dim.is_static:
133 |                 batch_size = batch_dim.get_length()
134 |             executable_network = ie.compile_model(network, device_name='CPU')  # device_name="MYRIAD" for Intel NCS2
135 |         elif engine:  # TensorRT
136 |             LOGGER.info(f'Loading {w} for TensorRT inference...')
137 |             import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
138 |             check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
139 |             if device.type == 'cpu':
140 |                 device = torch.device('cuda:0')
141 |             Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
142 |             logger = trt.Logger(trt.Logger.INFO)
143 |             # Read file
144 |             with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
145 |                 # Read metadata length
146 |                 meta_len = int.from_bytes(f.read(4), byteorder='little')
147 |                 # Read metadata
148 |                 meta = json.loads(f.read(meta_len).decode('utf-8'))
149 |                 stride, names = int(meta['stride']), meta['names']
150 |                 # Read engine
151 |                 model = runtime.deserialize_cuda_engine(f.read())
152 |             context = model.create_execution_context()
153 |             bindings = OrderedDict()
154 |             output_names = []
155 |             fp16 = False  # default updated below
156 |             dynamic = False
157 |             for i in range(model.num_bindings):
158 |                 name = model.get_binding_name(i)
159 |                 dtype = trt.nptype(model.get_binding_dtype(i))
160 |                 if model.binding_is_input(i):
161 |                     if -1 in tuple(model.get_binding_shape(i)):  # dynamic
162 |                         dynamic = True
163 |                         context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
164 |                     if dtype == np.float16:
165 |                         fp16 = True
166 |                 else:  # output
167 |                     output_names.append(name)
168 |                 shape = tuple(context.get_binding_shape(i))
169 |                 im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
170 |                 bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
171 |             binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
172 |             batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
173 |         elif coreml:  # CoreML
174 |             LOGGER.info(f'Loading {w} for CoreML inference...')
175 |             import coremltools as ct
176 |             model = ct.models.MLModel(w)
177 |         elif saved_model:  # TF SavedModel
178 |             LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
179 |             import tensorflow as tf
180 |             keras = False  # assume TF1 saved_model
181 |             model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
182 |         elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
183 |             LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
184 |             import tensorflow as tf
185 | 
186 |             def wrap_frozen_graph(gd, inputs, outputs):
187 |                 x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), [])  # wrapped
188 |                 ge = x.graph.as_graph_element
189 |                 return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
190 | 
191 |             def gd_outputs(gd):
192 |                 name_list, input_list = [], []
193 |                 for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
194 |                     name_list.append(node.name)
195 |                     input_list.extend(node.input)
196 |                 return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
197 | 
198 |             gd = tf.Graph().as_graph_def()  # TF GraphDef
199 |             with open(w, 'rb') as f:
200 |                 gd.ParseFromString(f.read())
201 |             frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
202 |         elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
203 |             try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
204 |                 from tflite_runtime.interpreter import Interpreter, load_delegate
205 |             except ImportError:
206 |                 import tensorflow as tf
207 |                 Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
208 |             if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
209 |                 LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
210 |                 delegate = {
211 |                     'Linux': 'libedgetpu.so.1',
212 |                     'Darwin': 'libedgetpu.1.dylib',
213 |                     'Windows': 'edgetpu.dll'}[platform.system()]
214 |                 interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
215 |             else:  # TFLite
216 |                 LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
217 |                 interpreter = Interpreter(model_path=w)  # load TFLite model
218 |             interpreter.allocate_tensors()  # allocate
219 |             input_details = interpreter.get_input_details()  # inputs
220 |             output_details = interpreter.get_output_details()  # outputs
221 |             # load metadata
222 |             with contextlib.suppress(zipfile.BadZipFile):
223 |                 with zipfile.ZipFile(w, 'r') as model:
224 |                     meta_file = model.namelist()[0]
225 |                     meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
226 |                     stride, names = int(meta['stride']), meta['names']
227 |         elif tfjs:  # TF.js
228 |             raise NotImplementedError('YOLOv8 TF.js inference is not supported')
229 |         elif paddle:  # PaddlePaddle
230 |             LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
231 |             check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
232 |             import paddle.inference as pdi
233 |             if not Path(w).is_file():  # if not *.pdmodel
234 |                 w = next(Path(w).rglob('*.pdmodel'))  # get *.pdmodel file from *_paddle_model dir
235 |             weights = Path(w).with_suffix('.pdiparams')
236 |             config = pdi.Config(str(w), str(weights))
237 |             if cuda:
238 |                 config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
239 |             predictor = pdi.create_predictor(config)
240 |             input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
241 |             output_names = predictor.get_output_names()
242 |         elif triton:  # NVIDIA Triton Inference Server
243 |             LOGGER.info('Triton Inference Server not supported...')
244 |             '''
245 |             TODO:
246 |             check_requirements('tritonclient[all]')
247 |             from utils.triton import TritonRemoteModel
248 |             model = TritonRemoteModel(url=w)
249 |             nhwc = model.runtime.startswith("tensorflow")
250 |             '''
251 |         else:
252 |             from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_TABLE
253 |             raise TypeError(f"model='{w}' is not a supported model format. "
254 |                             'See https://docs.ultralytics.com/tasks/detection/#export for help.'
255 |                             f'\n\n{EXPORT_FORMATS_TABLE}')
256 | 
257 |         # Load external metadata YAML
258 |         if xml or saved_model or paddle:
259 |             metadata = Path(w).parent / 'metadata.yaml'
260 |             if metadata.exists():
261 |                 metadata = yaml_load(metadata)
262 |                 stride, names = int(metadata['stride']), metadata['names']  # load metadata
263 |             else:
264 |                 LOGGER.warning(f"WARNING ⚠️ Metadata not found at '{metadata}'")
265 | 
266 |         # Check names
267 |         if 'names' not in locals():  # names missing
268 |             names = yaml_load(check_yaml(data))['names'] if data else {i: f'class{i}' for i in range(999)}  # assign
269 |         names = check_class_names(names)
270 | 
271 |         self.__dict__.update(locals())  # assign all variables to self
272 | 
273 |     def forward(self, im, augment=False, visualize=False):
274 |         """
275 |         Runs inference on the YOLOv8 MultiBackend model.
276 | 
277 |         Args:
278 |             im (torch.Tensor): The image tensor to perform inference on.
279 |             augment (bool): whether to perform data augmentation during inference, defaults to False
280 |             visualize (bool): whether to visualize the output predictions, defaults to False
281 | 
282 |         Returns:
283 |             (tuple): Tuple containing the raw output tensor, and the processed output for visualization (if visualize=True)
284 |         """
285 |         b, ch, h, w = im.shape  # batch, channel, height, width
286 |         if self.fp16 and im.dtype != torch.float16:
287 |             im = im.half()  # to FP16
288 |         if self.nhwc:
289 |             im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)
290 | 
291 |         if self.pt or self.nn_module:  # PyTorch
292 |             y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
293 |         elif self.jit:  # TorchScript
294 |             y = self.model(im)
295 |         elif self.dnn:  # ONNX OpenCV DNN
296 |             im = im.cpu().numpy()  # torch to numpy
297 |             self.net.setInput(im)
298 |             y = self.net.forward()
299 |         elif self.onnx:  # ONNX Runtime
300 |             im = im.cpu().numpy()  # torch to numpy
301 |             y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
302 |         elif self.xml:  # OpenVINO
303 |             im = im.cpu().numpy()  # FP32
304 |             y = list(self.executable_network([im]).values())
305 |         elif self.engine:  # TensorRT
306 |             if self.dynamic and im.shape != self.bindings['images'].shape:
307 |                 i = self.model.get_binding_index('images')
308 |                 self.context.set_binding_shape(i, im.shape)  # reshape if dynamic
309 |                 self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
310 |                 for name in self.output_names:
311 |                     i = self.model.get_binding_index(name)
312 |                     self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
313 |             s = self.bindings['images'].shape
314 |             assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
315 |             self.binding_addrs['images'] = int(im.data_ptr())
316 |             self.context.execute_v2(list(self.binding_addrs.values()))
317 |             y = [self.bindings[x].data for x in sorted(self.output_names)]
318 |         elif self.coreml:  # CoreML
319 |             im = im.cpu().numpy()
320 |             im = Image.fromarray((im[0] * 255).astype('uint8'))
321 |             # im = im.resize((192, 320), Image.ANTIALIAS)
322 |             y = self.model.predict({'image': im})  # coordinates are xywh normalized
323 |             if 'confidence' in y:
324 |                 box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
325 |                 conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
326 |                 y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
327 |             else:
328 |                 y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)
329 |         elif self.paddle:  # PaddlePaddle
330 |             im = im.cpu().numpy().astype(np.float32)
331 |             self.input_handle.copy_from_cpu(im)
332 |             self.predictor.run()
333 |             y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
334 |         elif self.triton:  # NVIDIA Triton Inference Server
335 |             y = self.model(im)
336 |         else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
337 |             im = im.cpu().numpy()
338 |             if self.saved_model:  # SavedModel
339 |                 y = self.model(im, training=False) if self.keras else self.model(im)
340 |             elif self.pb:  # GraphDef
341 |                 y = self.frozen_func(x=self.tf.constant(im))
342 |             else:  # Lite or Edge TPU
343 |                 input = self.input_details[0]
344 |                 int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
345 |                 if int8:
346 |                     scale, zero_point = input['quantization']
347 |                     im = (im / scale + zero_point).astype(np.uint8)  # de-scale
348 |                 self.interpreter.set_tensor(input['index'], im)
349 |                 self.interpreter.invoke()
350 |                 y = []
351 |                 for output in self.output_details:
352 |                     x = self.interpreter.get_tensor(output['index'])
353 |                     if int8:
354 |                         scale, zero_point = output['quantization']
355 |                         x = (x.astype(np.float32) - zero_point) * scale  # re-scale
356 |                     y.append(x)
357 |                 # TF segment fixes: export is reversed vs ONNX export and protos are transposed
358 |                 if len(self.output_details) == 2:  # segment
359 |                     y = [y[1], np.transpose(y[0], (0, 3, 1, 2))]
360 |             y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
361 |             # y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels
362 | 
363 |         if isinstance(y, (list, tuple)):
364 |             return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
365 |         else:
366 |             return self.from_numpy(y)
367 | 
368 |     def from_numpy(self, x):
369 |         """
370 |          Convert a numpy array to a tensor.
371 | 
372 |          Args:
373 |              x (np.ndarray): The array to be converted.
374 | 
375 |          Returns:
376 |              (torch.Tensor): The converted tensor
377 |          """
378 |         return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
379 | 
380 |     def warmup(self, imgsz=(1, 3, 640, 640)):
381 |         """
382 |         Warm up the model by running one forward pass with a dummy input.
383 | 
384 |         Args:
385 |             imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
386 | 
387 |         Returns:
388 |             (None): This method runs the forward pass and don't return any value
389 |         """
390 |         warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
391 |         if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
392 |             im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
393 |             for _ in range(2 if self.jit else 1):  #
394 |                 self.forward(im)  # warmup
395 | 
396 |     @staticmethod
397 |     def _model_type(p='path/to/model.pt'):
398 |         """
399 |         This function takes a path to a model file and returns the model type
400 | 
401 |         Args:
402 |             p: path to the model file. Defaults to path/to/model.pt
403 |         """
404 |         # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
405 |         # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
406 |         from ultralytics.yolo.engine.exporter import export_formats
407 |         sf = list(export_formats().Suffix)  # export suffixes
408 |         if not is_url(p, check=False) and not isinstance(p, str):
409 |             check_suffix(p, sf)  # checks
410 |         url = urlparse(p)  # if url may be Triton inference server
411 |         types = [s in Path(p).name for s in sf]
412 |         types[8] &= not types[9]  # tflite &= not edgetpu
413 |         triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
414 |         return types + [triton]
415 | 


--------------------------------------------------------------------------------