├── ultralytics ├── nn │ ├── __init__.py │ ├── __pycache__ │ │ ├── modules.cpython-37.pyc │ │ ├── modules.cpython-38.pyc │ │ ├── tasks.cpython-37.pyc │ │ ├── tasks.cpython-38.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── autobackend.cpython-37.pyc │ │ └── autobackend.cpython-38.pyc │ ├── autoshape.py │ ├── modules.py │ └── autobackend.py ├── tracker │ ├── utils │ │ ├── __init__.py │ │ ├── matching.py │ │ ├── gmc.py │ │ └── kalman_filter.py │ ├── __init__.py │ ├── trackers │ │ ├── __init__.py │ │ ├── basetrack.py │ │ ├── bot_sort.py │ │ └── byte_tracker.py │ ├── cfg │ │ ├── bytetrack.yaml │ │ └── botsort.yaml │ ├── README.md │ └── track.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── __init__.cpython-38.pyc ├── hub │ ├── __pycache__ │ │ ├── auth.cpython-37.pyc │ │ ├── auth.cpython-38.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── utils.cpython-38.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── session.cpython-37.pyc │ │ └── session.cpython-38.pyc │ ├── auth.py │ ├── __init__.py │ ├── utils.py │ └── session.py ├── __init__.py └── models │ ├── v8 │ ├── cls │ │ ├── yolov8l-cls.yaml │ │ ├── yolov8m-cls.yaml │ │ ├── yolov8n-cls.yaml │ │ ├── yolov8s-cls.yaml │ │ └── yolov8x-cls.yaml │ ├── yolov8l.yaml │ ├── yolov8m.yaml │ ├── yolov8x.yaml │ ├── yolov8n.yaml │ ├── yolov8s.yaml │ ├── seg │ │ ├── yolov8l-seg.yaml │ │ ├── yolov8m-seg.yaml │ │ ├── yolov8x-seg.yaml │ │ ├── yolov8n-seg.yaml │ │ └── yolov8s-seg.yaml │ └── yolov8x6.yaml │ ├── v3 │ ├── yolov3-tinyu.yaml │ ├── yolov3u.yaml │ └── yolov3-sppu.yaml │ ├── v5 │ ├── yolov5lu.yaml │ ├── yolov5mu.yaml │ ├── yolov5nu.yaml │ ├── yolov5xu.yaml │ └── yolov5su.yaml │ └── README.md ├── screenshot ├── 5.jpg ├── 1.jpeg ├── 2.jpeg ├── 3.jpeg └── qrcode.png ├── images └── zidane.jpg ├── detect_predict.py └── README.md /ultralytics/nn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ultralytics/tracker/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ultralytics/tracker/__init__.py: -------------------------------------------------------------------------------- 1 | from .trackers import BOTSORT, BYTETracker 2 | -------------------------------------------------------------------------------- /screenshot/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/5.jpg -------------------------------------------------------------------------------- /ultralytics/tracker/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | from .bot_sort import BOTSORT 2 | from .byte_tracker import BYTETracker 3 | -------------------------------------------------------------------------------- /images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/images/zidane.jpg -------------------------------------------------------------------------------- /screenshot/1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/1.jpeg -------------------------------------------------------------------------------- /screenshot/2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/2.jpeg -------------------------------------------------------------------------------- /screenshot/3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/3.jpeg -------------------------------------------------------------------------------- /screenshot/qrcode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/screenshot/qrcode.png -------------------------------------------------------------------------------- /ultralytics/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/auth.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/auth.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/auth.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/modules.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/modules.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/tasks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/tasks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/tasks.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/session.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/hub/__pycache__/session.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/hub/__pycache__/session.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/autobackend.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-37.pyc -------------------------------------------------------------------------------- /ultralytics/nn/__pycache__/autobackend.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alicema-creator/Python-Yolov8-gesture-recognition/HEAD/ultralytics/nn/__pycache__/autobackend.cpython-38.pyc -------------------------------------------------------------------------------- /ultralytics/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | __version__ = '8.0.40' 4 | 5 | from ultralytics.yolo.engine.model import YOLO 6 | from ultralytics.yolo.utils.checks import check_yolo as checks 7 | 8 | __all__ = ['__version__', 'YOLO', 'checks'] # allow simpler import 9 | -------------------------------------------------------------------------------- /ultralytics/tracker/cfg/bytetrack.yaml: -------------------------------------------------------------------------------- 1 | tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack'] 2 | track_high_thresh: 0.5 # threshold for the first association 3 | track_low_thresh: 0.1 # threshold for the second association 4 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks 5 | track_buffer: 30 # buffer to calculate the time when to remove tracks 6 | match_thresh: 0.8 # threshold for matching tracks 7 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) 8 | # mot20: False # for tracker evaluation(not used for now) 9 | -------------------------------------------------------------------------------- /ultralytics/models/v8/cls/yolov8l-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1000 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.00 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | 21 | # YOLOv8.0n head 22 | head: 23 | - [-1, 1, Classify, [nc]] 24 | -------------------------------------------------------------------------------- /ultralytics/models/v8/cls/yolov8m-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1000 # number of classes 5 | depth_multiple: 0.67 # scales module repeats 6 | width_multiple: 0.75 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | 21 | # YOLOv8.0n head 22 | head: 23 | - [-1, 1, Classify, [nc]] 24 | -------------------------------------------------------------------------------- /ultralytics/models/v8/cls/yolov8n-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1000 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.25 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | 21 | # YOLOv8.0n head 22 | head: 23 | - [-1, 1, Classify, [nc]] 24 | -------------------------------------------------------------------------------- /ultralytics/models/v8/cls/yolov8s-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1000 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.50 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | 21 | # YOLOv8.0n head 22 | head: 23 | - [-1, 1, Classify, [nc]] 24 | -------------------------------------------------------------------------------- /ultralytics/models/v8/cls/yolov8x-cls.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 1000 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.25 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | 21 | # YOLOv8.0n head 22 | head: 23 | - [-1, 1, Classify, [nc]] 24 | -------------------------------------------------------------------------------- /ultralytics/tracker/README.md: -------------------------------------------------------------------------------- 1 | ## Tracker 2 | 3 | ### Trackers 4 | 5 | - [x] ByteTracker 6 | - [x] BoT-SORT 7 | 8 | ### Usage 9 | 10 | python interface: 11 | 12 | ```python 13 | from ultralytics import YOLO 14 | 15 | model = YOLO("yolov8n.pt") # or a segmentation model .i.e yolov8n-seg.pt 16 | model.track( 17 | source="video/streams", 18 | stream=True, 19 | tracker="botsort.yaml/bytetrack.yaml", 20 | ..., 21 | ) 22 | ``` 23 | 24 | cli: 25 | 26 | ```bash 27 | yolo detect track source=... tracker=... 28 | yolo segment track source=... tracker=... 29 | ``` 30 | 31 | By default, trackers will use the configuration in `ultralytics/tracker/cfg`. 32 | We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/tracker/cfg`. 33 | -------------------------------------------------------------------------------- /ultralytics/tracker/cfg/botsort.yaml: -------------------------------------------------------------------------------- 1 | tracker_type: botsort # tracker type, ['botsort', 'bytetrack'] 2 | track_high_thresh: 0.5 # threshold for the first association 3 | track_low_thresh: 0.1 # threshold for the second association 4 | new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks 5 | track_buffer: 30 # buffer to calculate the time when to remove tracks 6 | match_thresh: 0.8 # threshold for matching tracks 7 | # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now) 8 | # mot20: False # for tracker evaluation(not used for now) 9 | 10 | # Botsort settings 11 | cmc_method: sparseOptFlow # method of global motion compensation 12 | # ReID model related thresh (not supported yet) 13 | proximity_thresh: 0.5 14 | appearance_thresh: 0.25 15 | with_reid: False 16 | -------------------------------------------------------------------------------- /ultralytics/tracker/trackers/basetrack.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | 5 | 6 | class TrackState: 7 | New = 0 8 | Tracked = 1 9 | Lost = 2 10 | Removed = 3 11 | 12 | 13 | class BaseTrack: 14 | _count = 0 15 | 16 | track_id = 0 17 | is_activated = False 18 | state = TrackState.New 19 | 20 | history = OrderedDict() 21 | features = [] 22 | curr_feature = None 23 | score = 0 24 | start_frame = 0 25 | frame_id = 0 26 | time_since_update = 0 27 | 28 | # multi-camera 29 | location = (np.inf, np.inf) 30 | 31 | @property 32 | def end_frame(self): 33 | return self.frame_id 34 | 35 | @staticmethod 36 | def next_id(): 37 | BaseTrack._count += 1 38 | return BaseTrack._count 39 | 40 | def activate(self, *args): 41 | raise NotImplementedError 42 | 43 | def predict(self): 44 | raise NotImplementedError 45 | 46 | def update(self, *args, **kwargs): 47 | raise NotImplementedError 48 | 49 | def mark_lost(self): 50 | self.state = TrackState.Lost 51 | 52 | def mark_removed(self): 53 | self.state = TrackState.Removed 54 | -------------------------------------------------------------------------------- /ultralytics/models/v3/yolov3-tinyu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | 8 | # YOLOv3-tiny backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [16, 3, 1]], # 0 12 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 13 | [-1, 1, Conv, [32, 3, 1]], 14 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 15 | [-1, 1, Conv, [64, 3, 1]], 16 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 17 | [-1, 1, Conv, [128, 3, 1]], 18 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 19 | [-1, 1, Conv, [256, 3, 1]], 20 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 21 | [-1, 1, Conv, [512, 3, 1]], 22 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 23 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 24 | ] 25 | 26 | # YOLOv3-tiny head 27 | head: 28 | [[-1, 1, Conv, [1024, 3, 1]], 29 | [-1, 1, Conv, [256, 1, 1]], 30 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 31 | 32 | [-2, 1, Conv, [128, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 35 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 36 | 37 | [[19, 15], 1, Detect, [nc]], # Detect(P4, P5) 38 | ] 39 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8l.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.00 # scales convolution channels 7 | 8 | # YOLOv8.0l backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [512, True]] 20 | - [-1, 1, SPPF, [512, 5]] # 9 21 | 22 | # YOLOv8.0l head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8m.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # scales module repeats 6 | width_multiple: 0.75 # scales convolution channels 7 | 8 | # YOLOv8.0m backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [768, True]] 20 | - [-1, 1, SPPF, [768, 5]] # 9 21 | 22 | # YOLOv8.0m head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [768]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8x.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.25 # scales convolution channels 7 | 8 | # YOLOv8.0x backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [512, True]] 20 | - [-1, 1, SPPF, [512, 5]] # 9 21 | 22 | # YOLOv8.0x head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8n.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.25 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | - [-1, 1, SPPF, [1024, 5]] # 9 21 | 22 | # YOLOv8.0n head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8s.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.50 # scales convolution channels 7 | 8 | # YOLOv8.0s backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | - [-1, 1, SPPF, [1024, 5]] # 9 21 | 22 | # YOLOv8.0s head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/seg/yolov8l-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.00 # scales convolution channels 7 | 8 | # YOLOv8.0l backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [512, True]] 20 | - [-1, 1, SPPF, [512, 5]] # 9 21 | 22 | # YOLOv8.0l head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/seg/yolov8m-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # scales module repeats 6 | width_multiple: 0.75 # scales convolution channels 7 | 8 | # YOLOv8.0m backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [768, True]] 20 | - [-1, 1, SPPF, [768, 5]] # 9 21 | 22 | # YOLOv8.0m head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [768]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/seg/yolov8x-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.25 # scales convolution channels 7 | 8 | # YOLOv8.0x backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [512, True]] 20 | - [-1, 1, SPPF, [512, 5]] # 9 21 | 22 | # YOLOv8.0x head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [512]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/seg/yolov8n-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.25 # scales convolution channels 7 | 8 | # YOLOv8.0n backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | - [-1, 1, SPPF, [1024, 5]] # 9 21 | 22 | # YOLOv8.0n head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v8/seg/yolov8s-seg.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # scales module repeats 6 | width_multiple: 0.50 # scales convolution channels 7 | 8 | # YOLOv8.0s backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [1024, True]] 20 | - [-1, 1, SPPF, [1024, 5]] # 9 21 | 22 | # YOLOv8.0s head 23 | head: 24 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 25 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 26 | - [-1, 3, C2f, [512]] # 12 27 | 28 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 29 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 30 | - [-1, 3, C2f, [256]] # 15 (P3/8-small) 31 | 32 | - [-1, 1, Conv, [256, 3, 2]] 33 | - [[-1, 12], 1, Concat, [1]] # cat head P4 34 | - [-1, 3, C2f, [512]] # 18 (P4/16-medium) 35 | 36 | - [-1, 1, Conv, [512, 3, 2]] 37 | - [[-1, 9], 1, Concat, [1]] # cat head P5 38 | - [-1, 3, C2f, [1024]] # 21 (P5/32-large) 39 | 40 | - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Detect(P3, P4, P5) 41 | -------------------------------------------------------------------------------- /ultralytics/models/v5/yolov5lu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | 8 | # YOLOv5 v6.0 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 13 | [-1, 3, C3, [128]], 14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 15 | [-1, 6, C3, [256]], 16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 17 | [-1, 9, C3, [512]], 18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 19 | [-1, 3, C3, [1024]], 20 | [-1, 1, SPPF, [1024, 5]], # 9 21 | ] 22 | 23 | # YOLOv5 v6.0 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 14], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 10], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /ultralytics/models/v5/yolov5mu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | 8 | # YOLOv5 v6.0 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 13 | [-1, 3, C3, [128]], 14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 15 | [-1, 6, C3, [256]], 16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 17 | [-1, 9, C3, [512]], 18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 19 | [-1, 3, C3, [1024]], 20 | [-1, 1, SPPF, [1024, 5]], # 9 21 | ] 22 | 23 | # YOLOv5 v6.0 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 14], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 10], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /ultralytics/models/v5/yolov5nu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | 8 | # YOLOv5 v6.0 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 13 | [-1, 3, C3, [128]], 14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 15 | [-1, 6, C3, [256]], 16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 17 | [-1, 9, C3, [512]], 18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 19 | [-1, 3, C3, [1024]], 20 | [-1, 1, SPPF, [1024, 5]], # 9 21 | ] 22 | 23 | # YOLOv5 v6.0 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 14], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 10], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /ultralytics/models/v5/yolov5xu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | 8 | # YOLOv5 v6.0 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 12 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 13 | [-1, 3, C3, [128]], 14 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 15 | [-1, 6, C3, [256]], 16 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 17 | [-1, 9, C3, [512]], 18 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 19 | [-1, 3, C3, [1024]], 20 | [-1, 1, SPPF, [1024, 5]], # 9 21 | ] 22 | 23 | # YOLOv5 v6.0 head 24 | head: 25 | [[-1, 1, Conv, [512, 1, 1]], 26 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 27 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 28 | [-1, 3, C3, [512, False]], # 13 29 | 30 | [-1, 1, Conv, [256, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 33 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 34 | 35 | [-1, 1, Conv, [256, 3, 2]], 36 | [[-1, 14], 1, Concat, [1]], # cat head P4 37 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 38 | 39 | [-1, 1, Conv, [512, 3, 2]], 40 | [[-1, 10], 1, Concat, [1]], # cat head P5 41 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 42 | 43 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) 44 | ] 45 | -------------------------------------------------------------------------------- /ultralytics/models/v5/yolov5su.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [1024]], 21 | [-1, 1, SPPF, [1024, 5]], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head 25 | head: 26 | [[-1, 1, Conv, [512, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 29 | [-1, 3, C3, [512, False]], # 13 30 | 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 35 | 36 | [-1, 1, Conv, [256, 3, 2]], 37 | [[-1, 14], 1, Concat, [1]], # cat head P4 38 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 39 | 40 | [-1, 1, Conv, [512, 3, 2]], 41 | [[-1, 10], 1, Concat, [1]], # cat head P5 42 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 43 | 44 | [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) 45 | ] 46 | -------------------------------------------------------------------------------- /ultralytics/models/v3/yolov3u.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | 8 | # darknet53 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [32, 3, 1]], # 0 12 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 13 | [-1, 1, Bottleneck, [64]], 14 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 15 | [-1, 2, Bottleneck, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 17 | [-1, 8, Bottleneck, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 19 | [-1, 8, Bottleneck, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 21 | [-1, 4, Bottleneck, [1024]], # 10 22 | ] 23 | 24 | # YOLOv3 head 25 | head: 26 | [[-1, 1, Bottleneck, [1024, False]], 27 | [-1, 1, Conv, [512, 1, 1]], 28 | [-1, 1, Conv, [1024, 3, 1]], 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 31 | 32 | [-2, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 35 | [-1, 1, Bottleneck, [512, False]], 36 | [-1, 1, Bottleneck, [512, False]], 37 | [-1, 1, Conv, [256, 1, 1]], 38 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 39 | 40 | [-2, 1, Conv, [128, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 43 | [-1, 1, Bottleneck, [256, False]], 44 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 45 | 46 | [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /ultralytics/models/v3/yolov3-sppu.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | 8 | # darknet53 backbone 9 | backbone: 10 | # [from, number, module, args] 11 | [[-1, 1, Conv, [32, 3, 1]], # 0 12 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 13 | [-1, 1, Bottleneck, [64]], 14 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 15 | [-1, 2, Bottleneck, [128]], 16 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 17 | [-1, 8, Bottleneck, [256]], 18 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 19 | [-1, 8, Bottleneck, [512]], 20 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 21 | [-1, 4, Bottleneck, [1024]], # 10 22 | ] 23 | 24 | # YOLOv3-SPP head 25 | head: 26 | [[-1, 1, Bottleneck, [1024, False]], 27 | [-1, 1, SPP, [512, [5, 9, 13]]], 28 | [-1, 1, Conv, [1024, 3, 1]], 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 31 | 32 | [-2, 1, Conv, [256, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 35 | [-1, 1, Bottleneck, [512, False]], 36 | [-1, 1, Bottleneck, [512, False]], 37 | [-1, 1, Conv, [256, 1, 1]], 38 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 39 | 40 | [-2, 1, Conv, [128, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 43 | [-1, 1, Bottleneck, [256, False]], 44 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 45 | 46 | [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /ultralytics/models/v8/yolov8x6.yaml: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.00 # scales module repeats 6 | width_multiple: 1.25 # scales convolution channels 7 | 8 | # YOLOv8.0x6 backbone 9 | backbone: 10 | # [from, repeats, module, args] 11 | - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 12 | - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 13 | - [-1, 3, C2f, [128, True]] 14 | - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 15 | - [-1, 6, C2f, [256, True]] 16 | - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 17 | - [-1, 6, C2f, [512, True]] 18 | - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 19 | - [-1, 3, C2f, [512, True]] 20 | - [-1, 1, Conv, [512, 3, 2]] # 9-P6/64 21 | - [-1, 3, C2f, [512, True]] 22 | - [-1, 1, SPPF, [512, 5]] # 11 23 | 24 | # YOLOv8.0x6 head 25 | head: 26 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 27 | - [[-1, 8], 1, Concat, [1]] # cat backbone P5 28 | - [-1, 3, C2, [512, False]] # 14 29 | 30 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 31 | - [[-1, 6], 1, Concat, [1]] # cat backbone P4 32 | - [-1, 3, C2, [512, False]] # 17 33 | 34 | - [-1, 1, nn.Upsample, [None, 2, 'nearest']] 35 | - [[-1, 4], 1, Concat, [1]] # cat backbone P3 36 | - [-1, 3, C2, [256, False]] # 20 (P3/8-small) 37 | 38 | - [-1, 1, Conv, [256, 3, 2]] 39 | - [[-1, 17], 1, Concat, [1]] # cat head P4 40 | - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) 41 | 42 | - [-1, 1, Conv, [512, 3, 2]] 43 | - [[-1, 14], 1, Concat, [1]] # cat head P5 44 | - [-1, 3, C2, [512, False]] # 26 (P5/32-large) 45 | 46 | - [-1, 1, Conv, [512, 3, 2]] 47 | - [[-1, 11], 1, Concat, [1]] # cat head P6 48 | - [-1, 3, C2, [512, False]] # 29 (P6/64-xlarge) 49 | 50 | - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) 51 | -------------------------------------------------------------------------------- /ultralytics/tracker/track.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ultralytics.tracker import BOTSORT, BYTETracker 4 | from ultralytics.yolo.utils import IterableSimpleNamespace, yaml_load 5 | from ultralytics.yolo.utils.checks import check_requirements, check_yaml 6 | 7 | TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT} 8 | check_requirements('lap') # for linear_assignment 9 | 10 | 11 | def on_predict_start(predictor): 12 | tracker = check_yaml(predictor.args.tracker) 13 | cfg = IterableSimpleNamespace(**yaml_load(tracker)) 14 | assert cfg.tracker_type in ['bytetrack', 'botsort'], \ 15 | f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'" 16 | trackers = [] 17 | for _ in range(predictor.dataset.bs): 18 | tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30) 19 | trackers.append(tracker) 20 | predictor.trackers = trackers 21 | 22 | 23 | def on_predict_postprocess_end(predictor): 24 | bs = predictor.dataset.bs 25 | im0s = predictor.batch[2] 26 | im0s = im0s if isinstance(im0s, list) else [im0s] 27 | for i in range(bs): 28 | det = predictor.results[i].boxes.cpu().numpy() 29 | if len(det) == 0: 30 | continue 31 | tracks = predictor.trackers[i].update(det, im0s[i]) 32 | if len(tracks) == 0: 33 | continue 34 | predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1])) 35 | if predictor.results[i].masks is not None: 36 | idx = tracks[:, -1].tolist() 37 | predictor.results[i].masks = predictor.results[i].masks[idx] 38 | 39 | 40 | def register_tracker(model): 41 | model.add_callback('on_predict_start', on_predict_start) 42 | model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end) 43 | -------------------------------------------------------------------------------- /ultralytics/hub/auth.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | import requests 4 | 5 | from ultralytics.hub.utils import HUB_API_ROOT, request_with_credentials 6 | from ultralytics.yolo.utils import is_colab 7 | 8 | API_KEY_PATH = 'https://hub.ultralytics.com/settings?tab=api+keys' 9 | 10 | 11 | class Auth: 12 | id_token = api_key = model_key = False 13 | 14 | def __init__(self, api_key=None): 15 | self.api_key = self._clean_api_key(api_key) 16 | self.authenticate() if self.api_key else self.auth_with_cookies() 17 | 18 | @staticmethod 19 | def _clean_api_key(key: str) -> str: 20 | """Strip model from key if present""" 21 | separator = '_' 22 | return key.split(separator)[0] if separator in key else key 23 | 24 | def authenticate(self) -> bool: 25 | """Attempt to authenticate with server""" 26 | try: 27 | header = self.get_auth_header() 28 | if header: 29 | r = requests.post(f'{HUB_API_ROOT}/v1/auth', headers=header) 30 | if not r.json().get('success', False): 31 | raise ConnectionError('Unable to authenticate.') 32 | return True 33 | raise ConnectionError('User has not authenticated locally.') 34 | except ConnectionError: 35 | self.id_token = self.api_key = False # reset invalid 36 | return False 37 | 38 | def auth_with_cookies(self) -> bool: 39 | """ 40 | Attempt to fetch authentication via cookies and set id_token. 41 | User must be logged in to HUB and running in a supported browser. 42 | """ 43 | if not is_colab(): 44 | return False # Currently only works with Colab 45 | try: 46 | authn = request_with_credentials(f'{HUB_API_ROOT}/v1/auth/auto') 47 | if authn.get('success', False): 48 | self.id_token = authn.get('data', {}).get('idToken', None) 49 | self.authenticate() 50 | return True 51 | raise ConnectionError('Unable to fetch browser authentication details.') 52 | except ConnectionError: 53 | self.id_token = False # reset invalid 54 | return False 55 | 56 | def get_auth_header(self): 57 | if self.id_token: 58 | return {'authorization': f'Bearer {self.id_token}'} 59 | elif self.api_key: 60 | return {'x-api-key': self.api_key} 61 | else: 62 | return None 63 | 64 | def get_state(self) -> bool: 65 | """Get the authentication state""" 66 | return self.id_token or self.api_key 67 | 68 | def set_api_key(self, key: str): 69 | """Get the authentication state""" 70 | self.api_key = key 71 | -------------------------------------------------------------------------------- /ultralytics/hub/__init__.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | import requests 4 | 5 | from ultralytics.hub.auth import Auth 6 | from ultralytics.hub.session import HubTrainingSession 7 | from ultralytics.hub.utils import split_key 8 | from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_LIST 9 | from ultralytics.yolo.engine.model import YOLO 10 | from ultralytics.yolo.utils import LOGGER, PREFIX, emojis 11 | 12 | # Define all export formats 13 | EXPORT_FORMATS_HUB = EXPORT_FORMATS_LIST + ['ultralytics_tflite', 'ultralytics_coreml'] 14 | 15 | 16 | def start(key=''): 17 | """ 18 | Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY') 19 | """ 20 | auth = Auth(key) 21 | try: 22 | if not auth.get_state(): 23 | model_id = request_api_key(auth) 24 | else: 25 | _, model_id = split_key(key) 26 | 27 | if not model_id: 28 | raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌')) 29 | 30 | session = HubTrainingSession(model_id=model_id, auth=auth) 31 | session.check_disk_space() 32 | 33 | trainer = YOLO(session.input_file) 34 | session.register_callbacks(trainer) 35 | trainer.train(**session.train_args) 36 | except Exception as e: 37 | LOGGER.warning(f'{PREFIX}{e}') 38 | 39 | 40 | def request_api_key(auth, max_attempts=3): 41 | """ 42 | Prompt the user to input their API key. Returns the model ID. 43 | """ 44 | import getpass 45 | for attempts in range(max_attempts): 46 | LOGGER.info(f'{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}') 47 | input_key = getpass.getpass('Enter your Ultralytics HUB API key:\n') 48 | auth.api_key, model_id = split_key(input_key) 49 | 50 | if auth.authenticate(): 51 | LOGGER.info(f'{PREFIX}Authenticated ✅') 52 | return model_id 53 | 54 | LOGGER.warning(f'{PREFIX}Invalid API key ⚠️\n') 55 | 56 | raise ConnectionError(emojis(f'{PREFIX}Failed to authenticate ❌')) 57 | 58 | 59 | def reset_model(key=''): 60 | # Reset a trained model to an untrained state 61 | api_key, model_id = split_key(key) 62 | r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': api_key, 'modelId': model_id}) 63 | 64 | if r.status_code == 200: 65 | LOGGER.info(f'{PREFIX}model reset successfully') 66 | return 67 | LOGGER.warning(f'{PREFIX}model reset failure {r.status_code} {r.reason}') 68 | 69 | 70 | def export_model(key='', format='torchscript'): 71 | # Export a model to all formats 72 | assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}" 73 | api_key, model_id = split_key(key) 74 | r = requests.post('https://api.ultralytics.com/export', 75 | json={ 76 | 'apiKey': api_key, 77 | 'modelId': model_id, 78 | 'format': format}) 79 | assert (r.status_code == 200), f'{PREFIX}{format} export failure {r.status_code} {r.reason}' 80 | LOGGER.info(f'{PREFIX}{format} export started ✅') 81 | 82 | 83 | def get_export(key='', format='torchscript'): 84 | # Get an exported model dictionary with download URL 85 | assert format in EXPORT_FORMATS_HUB, f"Unsupported export format '{format}', valid formats are {EXPORT_FORMATS_HUB}" 86 | api_key, model_id = split_key(key) 87 | r = requests.post('https://api.ultralytics.com/get-export', 88 | json={ 89 | 'apiKey': api_key, 90 | 'modelId': model_id, 91 | 'format': format}) 92 | assert (r.status_code == 200), f'{PREFIX}{format} get_export failure {r.status_code} {r.reason}' 93 | return r.json() 94 | 95 | 96 | # temp. For checking 97 | if __name__ == '__main__': 98 | start() 99 | -------------------------------------------------------------------------------- /detect_predict.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | import torch 4 | 5 | from ultralytics.yolo.engine.predictor import BasePredictor 6 | from ultralytics.yolo.engine.results import Results 7 | from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops 8 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box 9 | 10 | 11 | class DetectionPredictor(BasePredictor): 12 | 13 | def get_annotator(self, img): 14 | return Annotator(img, line_width=self.args.line_thickness, example=str(self.model.names)) 15 | 16 | def preprocess(self, img): 17 | img = torch.from_numpy(img).to(self.model.device) 18 | img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 19 | img /= 255 # 0 - 255 to 0.0 - 1.0 20 | return img 21 | 22 | def postprocess(self, preds, img, orig_img): 23 | preds = ops.non_max_suppression(preds, 24 | self.args.conf, 25 | self.args.iou, 26 | agnostic=self.args.agnostic_nms, 27 | max_det=self.args.max_det, 28 | classes=self.args.classes) 29 | 30 | results = [] 31 | for i, pred in enumerate(preds): 32 | orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img 33 | shape = orig_img.shape 34 | pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() 35 | results.append(Results(boxes=pred, orig_img=orig_img, names=self.model.names)) 36 | return results 37 | 38 | def write_results(self, idx, results, batch): 39 | p, im, im0 = batch 40 | log_string = '' 41 | if len(im.shape) == 3: 42 | im = im[None] # expand for batch dim 43 | self.seen += 1 44 | imc = im0.copy() if self.args.save_crop else im0 45 | if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1 46 | log_string += f'{idx}: ' 47 | frame = self.dataset.count 48 | else: 49 | frame = getattr(self.dataset, 'frame', 0) 50 | self.data_path = p 51 | self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}') 52 | log_string += '%gx%g ' % im.shape[2:] # print string 53 | self.annotator = self.get_annotator(im0) 54 | 55 | det = results[idx].boxes # TODO: make boxes inherit from tensors 56 | if len(det) == 0: 57 | return log_string 58 | for c in det.cls.unique(): 59 | n = (det.cls == c).sum() # detections per class 60 | log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, " 61 | 62 | # write 63 | for d in reversed(det): 64 | cls, conf = d.cls.squeeze(), d.conf.squeeze() 65 | if self.args.save_txt: # Write to file 66 | line = (cls, *(d.xywhn.view(-1).tolist()), conf) \ 67 | if self.args.save_conf else (cls, *(d.xywhn.view(-1).tolist())) # label format 68 | with open(f'{self.txt_path}.txt', 'a') as f: 69 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 70 | if self.args.save or self.args.save_crop or self.args.show: # Add bbox to image 71 | c = int(cls) # integer class 72 | name = f'id:{int(d.id.item())} {self.model.names[c]}' if d.id is not None else self.model.names[c] 73 | label = None if self.args.hide_labels else (name if self.args.hide_conf else f'{name} {conf:.2f}') 74 | self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True)) 75 | if self.args.save_crop: 76 | save_one_box(d.xyxy, 77 | imc, 78 | file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg', 79 | BGR=True) 80 | 81 | return log_string 82 | 83 | 84 | def predict(cfg=DEFAULT_CFG, use_python=False): 85 | model = cfg.model or 'yolov8n.pt' 86 | source = "imagesVideo/aaa.mp4" 87 | 88 | show = True 89 | conf=0.3 90 | hide_labels=False 91 | hide_conf=False 92 | line_thickness=3 93 | visualize=False 94 | augment=False 95 | retina_masks=False 96 | #classes=[0,2,3] 97 | args = dict(model=model, source=source, show=show, conf=conf,hide_labels=hide_labels,hide_conf=hide_conf,line_thickness=line_thickness,visualize=visualize,augment=augment,retina_masks=retina_masks) 98 | 99 | if use_python: 100 | from ultralytics import YOLO 101 | YOLO(model)(**args) 102 | else: 103 | predictor = DetectionPredictor(overrides=args) 104 | predictor.predict_cli() 105 | 106 | 107 | if __name__ == '__main__': 108 | predict() 109 | -------------------------------------------------------------------------------- /ultralytics/tracker/trackers/bot_sort.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | 3 | import numpy as np 4 | 5 | from ..utils import matching 6 | from ..utils.gmc import GMC 7 | from ..utils.kalman_filter import KalmanFilterXYWH 8 | from .basetrack import TrackState 9 | from .byte_tracker import BYTETracker, STrack 10 | 11 | 12 | class BOTrack(STrack): 13 | shared_kalman = KalmanFilterXYWH() 14 | 15 | def __init__(self, tlwh, score, cls, feat=None, feat_history=50): 16 | super().__init__(tlwh, score, cls) 17 | 18 | self.smooth_feat = None 19 | self.curr_feat = None 20 | if feat is not None: 21 | self.update_features(feat) 22 | self.features = deque([], maxlen=feat_history) 23 | self.alpha = 0.9 24 | 25 | def update_features(self, feat): 26 | feat /= np.linalg.norm(feat) 27 | self.curr_feat = feat 28 | if self.smooth_feat is None: 29 | self.smooth_feat = feat 30 | else: 31 | self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat 32 | self.features.append(feat) 33 | self.smooth_feat /= np.linalg.norm(self.smooth_feat) 34 | 35 | def predict(self): 36 | mean_state = self.mean.copy() 37 | if self.state != TrackState.Tracked: 38 | mean_state[6] = 0 39 | mean_state[7] = 0 40 | 41 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) 42 | 43 | def re_activate(self, new_track, frame_id, new_id=False): 44 | if new_track.curr_feat is not None: 45 | self.update_features(new_track.curr_feat) 46 | super().re_activate(new_track, frame_id, new_id) 47 | 48 | def update(self, new_track, frame_id): 49 | if new_track.curr_feat is not None: 50 | self.update_features(new_track.curr_feat) 51 | super().update(new_track, frame_id) 52 | 53 | @property 54 | def tlwh(self): 55 | """Get current position in bounding box format `(top left x, top left y, 56 | width, height)`. 57 | """ 58 | if self.mean is None: 59 | return self._tlwh.copy() 60 | ret = self.mean[:4].copy() 61 | ret[:2] -= ret[2:] / 2 62 | return ret 63 | 64 | @staticmethod 65 | def multi_predict(stracks): 66 | if len(stracks) > 0: 67 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 68 | multi_covariance = np.asarray([st.covariance for st in stracks]) 69 | for i, st in enumerate(stracks): 70 | if st.state != TrackState.Tracked: 71 | multi_mean[i][6] = 0 72 | multi_mean[i][7] = 0 73 | multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance) 74 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 75 | stracks[i].mean = mean 76 | stracks[i].covariance = cov 77 | 78 | def convert_coords(self, tlwh): 79 | return self.tlwh_to_xywh(tlwh) 80 | 81 | @staticmethod 82 | def tlwh_to_xywh(tlwh): 83 | """Convert bounding box to format `(center x, center y, width, 84 | height)`. 85 | """ 86 | ret = np.asarray(tlwh).copy() 87 | ret[:2] += ret[2:] / 2 88 | return ret 89 | 90 | 91 | class BOTSORT(BYTETracker): 92 | 93 | def __init__(self, args, frame_rate=30): 94 | super().__init__(args, frame_rate) 95 | # ReID module 96 | self.proximity_thresh = args.proximity_thresh 97 | self.appearance_thresh = args.appearance_thresh 98 | 99 | if args.with_reid: 100 | # haven't supported bot-sort(reid) yet 101 | self.encoder = None 102 | # self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation]) 103 | self.gmc = GMC(method=args.cmc_method) 104 | 105 | def get_kalmanfilter(self): 106 | return KalmanFilterXYWH() 107 | 108 | def init_track(self, dets, scores, cls, img=None): 109 | if len(dets) == 0: 110 | return [] 111 | if self.args.with_reid and self.encoder is not None: 112 | features_keep = self.encoder.inference(img, dets) 113 | detections = [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)] 114 | else: 115 | detections = [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] 116 | return detections 117 | 118 | def get_dists(self, tracks, detections): 119 | dists = matching.iou_distance(tracks, detections) 120 | dists_mask = (dists > self.proximity_thresh) 121 | 122 | # TODO: mot20 123 | # if not self.args.mot20: 124 | dists = matching.fuse_score(dists, detections) 125 | 126 | if self.args.with_reid and self.encoder is not None: 127 | emb_dists = matching.embedding_distance(tracks, detections) / 2.0 128 | emb_dists[emb_dists > self.appearance_thresh] = 1.0 129 | emb_dists[dists_mask] = 1.0 130 | dists = np.minimum(dists, emb_dists) 131 | return dists 132 | 133 | def multi_predict(self, tracks): 134 | BOTrack.multi_predict(tracks) 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Yolov8-gesture-recognition 2 | 3 | >**Python Yolov8 gesture recognition手势特征识别** 4 | >**如需安装运行环境或远程调试,见文章底部名片(QQ:2945218359),由专业技术人员远程协助!** 5 | 6 | 7 | ## 运行环境 8 | **编程语言:Python3** 9 | **依赖库:Torch, Yolov8** 10 | 11 | ## 运行效果 12 | 13 | 14 | 15 | 16 | 17 | ## 在线协助 18 | **如需安装运行环境或远程调试,可扫码或直接加QQ:2945218359, QQ:905733049由专业技术人员远程协助!** 19 | **1)远程安装运行环境,代码调试** 20 | **2)Qt, C++, Python入门指导** 21 | **3)界面美化** 22 | **4)软件制作** 23 | **5)云服务器申请** 24 | **6)网站制作** 25 | 26 | **扫码或****点这里****(QQ:2945218359, QQ:905733049)** 27 | 28 | 29 | 30 | 31 | 32 | 33 | **🏠作者推荐:** 34 | 35 | **🌟Python特征识别检测项目🌟** 36 | 37 | **Python+Yolov5表情检测识别:** 38 | [https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression) 39 | **Python指纹识别系统:** 40 | [https://github.com/alicema-creator/Python-fingerprint-recogn-system](https://github.com/alicema-creator/Python-fingerprint-recogn-system) 41 | **Python人脸识别考勤打卡系统2:** 42 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system2) 43 | **Python人脸识别考勤打卡系统:** 44 | [https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system](https://github.com/alicema-creator/Python-OpenCV-Face-recognition-attendance-management-system) 45 | **Python果树水果识别**:[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-apple-fruit) 46 | **Python+Yolov8+Deepsort入口人流量统计:**[https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit](https://github.com/alicema-creator/Python-Yolov8-Statistics-of-the-number-of-people-at-the-entrance-and-exit) 47 | **Python+Qt指纹录入识别考勤系统:**[https://blog.csdn.net/alicema1111/article/details/129338432](https://blog.csdn.net/alicema1111/article/details/129338432) 48 | **Python手势特征识别:**[https://github.com/alicema-creator/Python-Yolov8-gesture-recognition](https://github.com/alicema-creator/Python-Yolov8-gesture-recognition) 49 | **Python+Yolov5路面桥梁墙体裂缝识别:**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-cracks-in-road-bridges) 50 | **Python+Yolov8路面桥梁墙体裂缝识别:**[https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall](https://github.com/alicema-creator/Python-Yolov8-crack-recognition-for-road-bridge-wall) 51 | **Python+Qt人行道盲道特征检测识别窗体程序:**[https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving](https://github.com/alicema-creator/Python-Qt-Detection-and-recognition-of-sidewalk-tactile-paving) 52 | **Python+Yolov5面部情感表情检测识别:**[https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression](https://github.com/alicema-creator/Python-Yolov5-Detection-and-recognition-of-emotion-expression) 53 | 54 | 55 | 56 | **🌟Python/Django网页项目🌟** 57 | **Python+Django+HTMLl网页前后端指纹信息识别:**[https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition](https://github.com/alicema-creator/Python-Django-HTML-web-fingerprint-information-recognition) 58 | **Html+threejs网页数字孪生场景三维可视化:**[https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project](https://github.com/alicema-creator/html-threejs-twin-scenes-3D-visualization-project) 59 | **python+django+html药物管理系统web drug management system(crm):**[https://github.com/alicema-creator/python-django-web-html-drug-management-system](https://github.com/alicema-creator/python-django-web-html-drug-management-system) 60 | **Qt+C++ web browser自建网页浏览器-Chrome最新内核基础上搭建:**[https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel](https://github.com/alicema-creator/Qt-and-C-web-browser--Chrome-latest-kernel) 61 | 62 | 63 | 64 | **🌟C++/Qt项目🌟** 65 | **OCC Opencascade+Qt+C++三维图像建模窗体点线面拾取:**[https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step](https://github.com/alicema-creator/OCC-Opencascade-Qt-C-3D-model-modeling-point-line-surface-pick-igs-iges-stp-step) 66 | **Qt+VTK鼠标拾取点生成拉伸闭合三维体:**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes) 67 | **Qt+C++实现的串口通信工具带实时曲线图:**[https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes]([https://github.com/alicema-creator/SerialPort-Communication)](https://github.com/alicema-creator/Qt-VTK-mouse-picking-points-to-generate-extruded-closed-3D-volumes](https://github.com/alicema-creator/SerialPort-Communication)) 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /ultralytics/tracker/utils/matching.py: -------------------------------------------------------------------------------- 1 | import lap 2 | import numpy as np 3 | import scipy 4 | from scipy.spatial.distance import cdist 5 | 6 | from .kalman_filter import chi2inv95 7 | 8 | 9 | def merge_matches(m1, m2, shape): 10 | O, P, Q = shape 11 | m1 = np.asarray(m1) 12 | m2 = np.asarray(m2) 13 | 14 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 15 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 16 | 17 | mask = M1 * M2 18 | match = mask.nonzero() 19 | match = list(zip(match[0], match[1])) 20 | unmatched_O = tuple(set(range(O)) - {i for i, j in match}) 21 | unmatched_Q = tuple(set(range(Q)) - {j for i, j in match}) 22 | 23 | return match, unmatched_O, unmatched_Q 24 | 25 | 26 | def _indices_to_matches(cost_matrix, indices, thresh): 27 | matched_cost = cost_matrix[tuple(zip(*indices))] 28 | matched_mask = (matched_cost <= thresh) 29 | 30 | matches = indices[matched_mask] 31 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 32 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 33 | 34 | return matches, unmatched_a, unmatched_b 35 | 36 | 37 | def linear_assignment(cost_matrix, thresh): 38 | if cost_matrix.size == 0: 39 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 40 | matches, unmatched_a, unmatched_b = [], [], [] 41 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 42 | matches.extend([ix, mx] for ix, mx in enumerate(x) if mx >= 0) 43 | unmatched_a = np.where(x < 0)[0] 44 | unmatched_b = np.where(y < 0)[0] 45 | matches = np.asarray(matches) 46 | return matches, unmatched_a, unmatched_b 47 | 48 | 49 | def ious(atlbrs, btlbrs): 50 | """ 51 | Compute cost based on IoU 52 | :type atlbrs: list[tlbr] | np.ndarray 53 | :type atlbrs: list[tlbr] | np.ndarray 54 | 55 | :rtype ious np.ndarray 56 | """ 57 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32) 58 | if ious.size == 0: 59 | return ious 60 | 61 | ious = bbox_ious(np.ascontiguousarray(atlbrs, dtype=np.float32), np.ascontiguousarray(btlbrs, dtype=np.float32)) 62 | return ious 63 | 64 | 65 | def iou_distance(atracks, btracks): 66 | """ 67 | Compute cost based on IoU 68 | :type atracks: list[STrack] 69 | :type btracks: list[STrack] 70 | 71 | :rtype cost_matrix np.ndarray 72 | """ 73 | 74 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \ 75 | or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 76 | atlbrs = atracks 77 | btlbrs = btracks 78 | else: 79 | atlbrs = [track.tlbr for track in atracks] 80 | btlbrs = [track.tlbr for track in btracks] 81 | _ious = ious(atlbrs, btlbrs) 82 | return 1 - _ious # cost matrix 83 | 84 | 85 | def v_iou_distance(atracks, btracks): 86 | """ 87 | Compute cost based on IoU 88 | :type atracks: list[STrack] 89 | :type btracks: list[STrack] 90 | 91 | :rtype cost_matrix np.ndarray 92 | """ 93 | 94 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \ 95 | or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 96 | atlbrs = atracks 97 | btlbrs = btracks 98 | else: 99 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] 100 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] 101 | _ious = ious(atlbrs, btlbrs) 102 | return 1 - _ious # cost matrix 103 | 104 | 105 | def embedding_distance(tracks, detections, metric='cosine'): 106 | """ 107 | :param tracks: list[STrack] 108 | :param detections: list[BaseTrack] 109 | :param metric: 110 | :return: cost_matrix np.ndarray 111 | """ 112 | 113 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32) 114 | if cost_matrix.size == 0: 115 | return cost_matrix 116 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32) 117 | # for i, track in enumerate(tracks): 118 | # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 119 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32) 120 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features 121 | return cost_matrix 122 | 123 | 124 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 125 | if cost_matrix.size == 0: 126 | return cost_matrix 127 | gating_dim = 2 if only_position else 4 128 | gating_threshold = chi2inv95[gating_dim] 129 | measurements = np.asarray([det.to_xyah() for det in detections]) 130 | for row, track in enumerate(tracks): 131 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position) 132 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 133 | return cost_matrix 134 | 135 | 136 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 137 | if cost_matrix.size == 0: 138 | return cost_matrix 139 | gating_dim = 2 if only_position else 4 140 | gating_threshold = chi2inv95[gating_dim] 141 | measurements = np.asarray([det.to_xyah() for det in detections]) 142 | for row, track in enumerate(tracks): 143 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position, metric='maha') 144 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 145 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 146 | return cost_matrix 147 | 148 | 149 | def fuse_iou(cost_matrix, tracks, detections): 150 | if cost_matrix.size == 0: 151 | return cost_matrix 152 | reid_sim = 1 - cost_matrix 153 | iou_dist = iou_distance(tracks, detections) 154 | iou_sim = 1 - iou_dist 155 | fuse_sim = reid_sim * (1 + iou_sim) / 2 156 | # det_scores = np.array([det.score for det in detections]) 157 | # det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 158 | return 1 - fuse_sim # fuse cost 159 | 160 | 161 | def fuse_score(cost_matrix, detections): 162 | if cost_matrix.size == 0: 163 | return cost_matrix 164 | iou_sim = 1 - cost_matrix 165 | det_scores = np.array([det.score for det in detections]) 166 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 167 | fuse_sim = iou_sim * det_scores 168 | return 1 - fuse_sim # fuse_cost 169 | 170 | 171 | def bbox_ious(box1, box2, eps=1e-7): 172 | """Boxes are x1y1x2y2 173 | box1: np.array of shape(nx4) 174 | box2: np.array of shape(mx4) 175 | returns: np.array of shape(nxm) 176 | """ 177 | # Get the coordinates of bounding boxes 178 | b1_x1, b1_y1, b1_x2, b1_y2 = box1.T 179 | b2_x1, b2_y1, b2_x2, b2_y2 = box2.T 180 | 181 | # Intersection area 182 | inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ 183 | (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) 184 | 185 | # box2 area 186 | box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) 187 | box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) 188 | return inter_area / (box2_area + box1_area[:, None] - inter_area + eps) 189 | -------------------------------------------------------------------------------- /ultralytics/hub/utils.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | 3 | import os 4 | import platform 5 | import shutil 6 | import sys 7 | import threading 8 | import time 9 | from pathlib import Path 10 | from random import random 11 | 12 | import requests 13 | 14 | from ultralytics.yolo.utils import (DEFAULT_CFG_DICT, ENVIRONMENT, LOGGER, RANK, SETTINGS, TryExcept, __version__, 15 | colorstr, emojis, get_git_origin_url, is_colab, is_git_dir, is_github_actions_ci, 16 | is_pip_package, is_pytest_running) 17 | from ultralytics.yolo.utils.checks import check_online 18 | 19 | PREFIX = colorstr('Ultralytics: ') 20 | HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.' 21 | HUB_API_ROOT = os.environ.get('ULTRALYTICS_HUB_API', 'https://api.ultralytics.com') 22 | 23 | 24 | def check_dataset_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=2.0): 25 | # Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0 26 | gib = 1 << 30 # bytes per GiB 27 | data = int(requests.head(url).headers['Content-Length']) / gib # dataset size (GB) 28 | total, used, free = (x / gib for x in shutil.disk_usage('/')) # bytes 29 | LOGGER.info(f'{PREFIX}{data:.3f} GB dataset, {free:.1f}/{total:.1f} GB free disk space') 30 | if data * sf < free: 31 | return True # sufficient space 32 | LOGGER.warning(f'{PREFIX}WARNING: Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, ' 33 | f'training cancelled ❌. Please free {data * sf - free:.1f} GB additional disk space and try again.') 34 | return False # insufficient space 35 | 36 | 37 | def request_with_credentials(url: str) -> any: 38 | """ Make an ajax request with cookies attached """ 39 | if not is_colab(): 40 | raise OSError('request_with_credentials() must run in a Colab environment') 41 | from google.colab import output # noqa 42 | from IPython import display # noqa 43 | display.display( 44 | display.Javascript(""" 45 | window._hub_tmp = new Promise((resolve, reject) => { 46 | const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000) 47 | fetch("%s", { 48 | method: 'POST', 49 | credentials: 'include' 50 | }) 51 | .then((response) => resolve(response.json())) 52 | .then((json) => { 53 | clearTimeout(timeout); 54 | }).catch((err) => { 55 | clearTimeout(timeout); 56 | reject(err); 57 | }); 58 | }); 59 | """ % url)) 60 | return output.eval_js('_hub_tmp') 61 | 62 | 63 | # Deprecated TODO: eliminate this function? 64 | def split_key(key=''): 65 | """ 66 | Verify and split a 'api_key[sep]model_id' string, sep is one of '.' or '_' 67 | 68 | Args: 69 | key (str): The model key to split. If not provided, the user will be prompted to enter it. 70 | 71 | Returns: 72 | Tuple[str, str]: A tuple containing the API key and model ID. 73 | """ 74 | 75 | import getpass 76 | 77 | error_string = emojis(f'{PREFIX}Invalid API key ⚠️\n') # error string 78 | if not key: 79 | key = getpass.getpass('Enter model key: ') 80 | sep = '_' if '_' in key else '.' if '.' in key else None # separator 81 | assert sep, error_string 82 | api_key, model_id = key.split(sep) 83 | assert len(api_key) and len(model_id), error_string 84 | return api_key, model_id 85 | 86 | 87 | def smart_request(*args, retry=3, timeout=30, thread=True, code=-1, method='post', verbose=True, **kwargs): 88 | """ 89 | Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout. 90 | 91 | Args: 92 | *args: Positional arguments to be passed to the requests function specified in method. 93 | retry (int, optional): Number of retries to attempt before giving up. Default is 3. 94 | timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30. 95 | thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True. 96 | code (int, optional): An identifier for the request, used for logging purposes. Default is -1. 97 | method (str, optional): The HTTP method to use for the request. Choices are 'post' and 'get'. Default is 'post'. 98 | verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True. 99 | **kwargs: Keyword arguments to be passed to the requests function specified in method. 100 | 101 | Returns: 102 | requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None. 103 | """ 104 | retry_codes = (408, 500) # retry only these codes 105 | 106 | @TryExcept(verbose=verbose) 107 | def func(*func_args, **func_kwargs): 108 | r = None # response 109 | t0 = time.time() # initial time for timer 110 | for i in range(retry + 1): 111 | if (time.time() - t0) > timeout: 112 | break 113 | if method == 'post': 114 | r = requests.post(*func_args, **func_kwargs) # i.e. post(url, data, json, files) 115 | elif method == 'get': 116 | r = requests.get(*func_args, **func_kwargs) # i.e. get(url, data, json, files) 117 | if r.status_code == 200: 118 | break 119 | try: 120 | m = r.json().get('message', 'No JSON message.') 121 | except AttributeError: 122 | m = 'Unable to read JSON.' 123 | if i == 0: 124 | if r.status_code in retry_codes: 125 | m += f' Retrying {retry}x for {timeout}s.' if retry else '' 126 | elif r.status_code == 429: # rate limit 127 | h = r.headers # response headers 128 | m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \ 129 | f"Please retry after {h['Retry-After']}s." 130 | if verbose: 131 | LOGGER.warning(f'{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})') 132 | if r.status_code not in retry_codes: 133 | return r 134 | time.sleep(2 ** i) # exponential standoff 135 | return r 136 | 137 | if thread: 138 | threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start() 139 | else: 140 | return func(*args, **kwargs) 141 | 142 | 143 | class Traces: 144 | 145 | def __init__(self): 146 | """ 147 | Initialize Traces for error tracking and reporting if tests are not currently running. 148 | """ 149 | self.rate_limit = 3.0 # rate limit (seconds) 150 | self.t = 0.0 # rate limit timer (seconds) 151 | self.metadata = { 152 | 'sys_argv_name': Path(sys.argv[0]).name, 153 | 'install': 'git' if is_git_dir() else 'pip' if is_pip_package() else 'other', 154 | 'python': platform.python_version(), 155 | 'release': __version__, 156 | 'environment': ENVIRONMENT} 157 | self.enabled = SETTINGS['sync'] and \ 158 | RANK in {-1, 0} and \ 159 | check_online() and \ 160 | not is_pytest_running() and \ 161 | not is_github_actions_ci() and \ 162 | (is_pip_package() or get_git_origin_url() == 'https://github.com/ultralytics/ultralytics.git') 163 | 164 | def __call__(self, cfg, all_keys=False, traces_sample_rate=1.0): 165 | """ 166 | Sync traces data if enabled in the global settings 167 | 168 | Args: 169 | cfg (IterableSimpleNamespace): Configuration for the task and mode. 170 | all_keys (bool): Sync all items, not just non-default values. 171 | traces_sample_rate (float): Fraction of traces captured from 0.0 to 1.0 172 | """ 173 | t = time.time() # current time 174 | if self.enabled and random() < traces_sample_rate and (t - self.t) > self.rate_limit: 175 | self.t = t # reset rate limit timer 176 | cfg = vars(cfg) # convert type from IterableSimpleNamespace to dict 177 | if not all_keys: # filter cfg 178 | include_keys = {'task', 'mode'} # always include 179 | cfg = { 180 | k: (v.split(os.sep)[-1] if isinstance(v, str) and os.sep in v else v) 181 | for k, v in cfg.items() if v != DEFAULT_CFG_DICT.get(k, None) or k in include_keys} 182 | trace = {'uuid': SETTINGS['uuid'], 'cfg': cfg, 'metadata': self.metadata} 183 | 184 | # Send a request to the HUB API to sync analytics 185 | smart_request(f'{HUB_API_ROOT}/v1/usage/anonymous', 186 | json=trace, 187 | headers=None, 188 | code=3, 189 | retry=0, 190 | timeout=1.0, 191 | verbose=False) 192 | 193 | 194 | # Run below code on hub/utils init ------------------------------------------------------------------------------------- 195 | traces = Traces() 196 | -------------------------------------------------------------------------------- /ultralytics/hub/session.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | import json 3 | import signal 4 | import sys 5 | from pathlib import Path 6 | from time import sleep, time 7 | 8 | import requests 9 | 10 | from ultralytics.hub.utils import HUB_API_ROOT, check_dataset_disk_space, smart_request 11 | from ultralytics.yolo.utils import LOGGER, PREFIX, __version__, emojis, is_colab, threaded 12 | from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params 13 | 14 | AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local' 15 | session = None 16 | 17 | 18 | class HubTrainingSession: 19 | 20 | def __init__(self, model_id, auth): 21 | self.agent_id = None # identifies which instance is communicating with server 22 | self.model_id = model_id 23 | self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}' 24 | self.auth_header = auth.get_auth_header() 25 | self._rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds) 26 | self._timers = {} # rate limit timers (seconds) 27 | self._metrics_queue = {} # metrics queue 28 | self.model = self._get_model() 29 | self._start_heartbeat() # start heartbeats 30 | self._register_signal_handlers() 31 | 32 | def _register_signal_handlers(self): 33 | signal.signal(signal.SIGTERM, self._handle_signal) 34 | signal.signal(signal.SIGINT, self._handle_signal) 35 | 36 | def _handle_signal(self, signum, frame): 37 | """ 38 | Prevent heartbeats from being sent on Colab after kill. 39 | This method does not use frame, it is included as it is 40 | passed by signal. 41 | """ 42 | if self.alive is True: 43 | LOGGER.info(f'{PREFIX}Kill signal received! ❌') 44 | self._stop_heartbeat() 45 | sys.exit(signum) 46 | 47 | def _stop_heartbeat(self): 48 | """End the heartbeat loop""" 49 | self.alive = False 50 | 51 | def upload_metrics(self): 52 | payload = {'metrics': self._metrics_queue.copy(), 'type': 'metrics'} 53 | smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2) 54 | 55 | def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False): 56 | # Upload a model to HUB 57 | file = None 58 | if Path(weights).is_file(): 59 | with open(weights, 'rb') as f: 60 | file = f.read() 61 | if final: 62 | smart_request( 63 | f'{self.api_url}/upload', 64 | data={ 65 | 'epoch': epoch, 66 | 'type': 'final', 67 | 'map': map}, 68 | files={'best.pt': file}, 69 | headers=self.auth_header, 70 | retry=10, 71 | timeout=3600, 72 | code=4, 73 | ) 74 | else: 75 | smart_request( 76 | f'{self.api_url}/upload', 77 | data={ 78 | 'epoch': epoch, 79 | 'type': 'epoch', 80 | 'isBest': bool(is_best)}, 81 | headers=self.auth_header, 82 | files={'last.pt': file}, 83 | code=3, 84 | ) 85 | 86 | def _get_model(self): 87 | # Returns model from database by id 88 | api_url = f'{HUB_API_ROOT}/v1/models/{self.model_id}' 89 | headers = self.auth_header 90 | 91 | try: 92 | response = smart_request(api_url, method='get', headers=headers, thread=False, code=0) 93 | data = response.json().get('data', None) 94 | 95 | if data.get('status', None) == 'trained': 96 | raise ValueError( 97 | emojis(f'Model is already trained and uploaded to ' 98 | f'https://hub.ultralytics.com/models/{self.model_id} 🚀')) 99 | 100 | if not data.get('data', None): 101 | raise ValueError('Dataset may still be processing. Please wait a minute and try again.') # RF fix 102 | self.model_id = data['id'] 103 | 104 | # TODO: restore when server keys when dataset URL and GPU train is working 105 | 106 | self.train_args = { 107 | 'batch': data['batch_size'], 108 | 'epochs': data['epochs'], 109 | 'imgsz': data['imgsz'], 110 | 'patience': data['patience'], 111 | 'device': data['device'], 112 | 'cache': data['cache'], 113 | 'data': data['data']} 114 | 115 | self.input_file = data.get('cfg', data['weights']) 116 | 117 | # hack for yolov5 cfg adds u 118 | if 'cfg' in data and 'yolov5' in data['cfg']: 119 | self.input_file = data['cfg'].replace('.yaml', 'u.yaml') 120 | 121 | return data 122 | except requests.exceptions.ConnectionError as e: 123 | raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e 124 | except Exception: 125 | raise 126 | 127 | def check_disk_space(self): 128 | if not check_dataset_disk_space(self.model['data']): 129 | raise MemoryError('Not enough disk space') 130 | 131 | def register_callbacks(self, trainer): 132 | trainer.add_callback('on_pretrain_routine_end', self.on_pretrain_routine_end) 133 | trainer.add_callback('on_fit_epoch_end', self.on_fit_epoch_end) 134 | trainer.add_callback('on_model_save', self.on_model_save) 135 | trainer.add_callback('on_train_end', self.on_train_end) 136 | 137 | def on_pretrain_routine_end(self, trainer): 138 | """ 139 | Start timer for upload rate limit. 140 | This method does not use trainer. It is passed to all callbacks by default. 141 | """ 142 | # Start timer for upload rate limit 143 | LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀') 144 | self._timers = {'metrics': time(), 'ckpt': time()} # start timer on self.rate_limit 145 | 146 | def on_fit_epoch_end(self, trainer): 147 | # Upload metrics after val end 148 | all_plots = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics} 149 | 150 | if trainer.epoch == 0: 151 | model_info = { 152 | 'model/parameters': get_num_params(trainer.model), 153 | 'model/GFLOPs': round(get_flops(trainer.model), 3), 154 | 'model/speed(ms)': round(trainer.validator.speed[1], 3)} 155 | all_plots = {**all_plots, **model_info} 156 | self._metrics_queue[trainer.epoch] = json.dumps(all_plots) 157 | if time() - self._timers['metrics'] > self._rate_limits['metrics']: 158 | self.upload_metrics() 159 | self._timers['metrics'] = time() # reset timer 160 | self._metrics_queue = {} # reset queue 161 | 162 | def on_model_save(self, trainer): 163 | # Upload checkpoints with rate limiting 164 | is_best = trainer.best_fitness == trainer.fitness 165 | if time() - self._timers['ckpt'] > self._rate_limits['ckpt']: 166 | LOGGER.info(f'{PREFIX}Uploading checkpoint {self.model_id}') 167 | self._upload_model(trainer.epoch, trainer.last, is_best) 168 | self._timers['ckpt'] = time() # reset timer 169 | 170 | def on_train_end(self, trainer): 171 | # Upload final model and metrics with exponential standoff 172 | LOGGER.info(f'{PREFIX}Training completed successfully ✅') 173 | LOGGER.info(f'{PREFIX}Uploading final {self.model_id}') 174 | 175 | # hack for fetching mAP 176 | mAP = trainer.metrics.get('metrics/mAP50-95(B)', 0) 177 | self._upload_model(trainer.epoch, trainer.best, map=mAP, final=True) # results[3] is mAP0.5:0.95 178 | self.alive = False # stop heartbeats 179 | LOGGER.info(f'{PREFIX}View model at https://hub.ultralytics.com/models/{self.model_id} 🚀') 180 | 181 | def _upload_model(self, epoch, weights, is_best=False, map=0.0, final=False): 182 | # Upload a model to HUB 183 | file = None 184 | if Path(weights).is_file(): 185 | with open(weights, 'rb') as f: 186 | file = f.read() 187 | file_param = {'best.pt' if final else 'last.pt': file} 188 | endpoint = f'{self.api_url}/upload' 189 | data = {'epoch': epoch} 190 | if final: 191 | data.update({'type': 'final', 'map': map}) 192 | else: 193 | data.update({'type': 'epoch', 'isBest': bool(is_best)}) 194 | 195 | smart_request( 196 | endpoint, 197 | data=data, 198 | files=file_param, 199 | headers=self.auth_header, 200 | retry=10 if final else None, 201 | timeout=3600 if final else None, 202 | code=4 if final else 3, 203 | ) 204 | 205 | @threaded 206 | def _start_heartbeat(self): 207 | self.alive = True 208 | while self.alive: 209 | r = smart_request( 210 | f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}', 211 | json={ 212 | 'agent': AGENT_NAME, 213 | 'agentId': self.agent_id}, 214 | headers=self.auth_header, 215 | retry=0, 216 | code=5, 217 | thread=False, 218 | ) 219 | self.agent_id = r.json().get('data', {}).get('agentId', None) 220 | sleep(self._rate_limits['heartbeat']) 221 | -------------------------------------------------------------------------------- /ultralytics/models/README.md: -------------------------------------------------------------------------------- 1 | ## Models 2 | 3 | Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration 4 | files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted 5 | and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image 6 | segmentation tasks. 7 | 8 | These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like 9 | instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, 10 | from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this 11 | directory provides a great starting point for your custom model development needs. 12 | 13 | To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've 14 | selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full 15 | details at the Ultralytics [Docs](https://docs.ultralytics.com), and if you need help or have any questions, feel free 16 | to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now! 17 | 18 | ### Usage 19 | 20 | Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command: 21 | 22 | ```bash 23 | yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100 24 | ``` 25 | 26 | They may also be used directly in a Python environment, and accepts the same 27 | [arguments](https://docs.ultralytics.com/cfg/) as in the CLI example above: 28 | 29 | ```python 30 | from ultralytics import YOLO 31 | 32 | model = YOLO("model.yaml") # build a YOLOv8n model from scratch 33 | # YOLO("model.pt") use pre-trained model if available 34 | model.info() # display model information 35 | model.train(data="coco128.yaml", epochs=100) # train the model 36 | ``` 37 | 38 | ## Pre-trained Model Architectures 39 | 40 | Ultralytics supports many model architectures. Visit [models](#) page to view detailed information and usage. 41 | Any of these models can be used by loading their configs or pretrained checkpoints if available. 42 | 43 | What to add your model architecture? [Here's](#) how you can contribute 44 | 45 | ### 1. YOLOv8 46 | 47 | **About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics.
48 | **Citation** - 49 | Available Models: 50 | 51 | - Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x` 52 | - Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg` 53 | - Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls` 54 | 55 |
Performance 56 | 57 | ### Detection 58 | 59 | | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | 60 | | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | 61 | | [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | 62 | | [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | 63 | | [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | 64 | | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | 65 | | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | 66 | 67 | ### Segmentation 68 | 69 | | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | 70 | | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | 71 | | [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | 72 | | [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | 73 | | [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | 74 | | [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | 75 | | [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | 76 | 77 | ### Classification 78 | 79 | | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | 80 | | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | 81 | | [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt) | 224 | 66.6 | 87.0 | 12.9 | 0.31 | 2.7 | 4.3 | 82 | | [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-cls.pt) | 224 | 72.3 | 91.1 | 23.4 | 0.35 | 6.4 | 13.5 | 83 | | [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-cls.pt) | 224 | 76.4 | 93.2 | 85.4 | 0.62 | 17.0 | 42.7 | 84 | | [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 | 85 | | [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 | 86 | 87 |
88 | 89 | ### 2. YOLOv5u 90 | 91 | **About** - Anchor-free YOLOv5 models with new detection head and better speed-accuracy tradeoff
92 | **Citation** - 93 | Available Models: 94 | 95 | - Detection - `yolov5nu`, `yolov5su`, `yolov5mu`, `yolov5lu`, `yolov5xu` 96 | 97 |
Performance 98 | 99 | ### Detection 100 | 101 | | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | 102 | | -------------------------------------------------------------------------------------- | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | 103 | | [YOLOv5nu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5nu.pt) | 640 | 34.3 | 73.6 | 1.06 | 2.6 | 7.7 | 104 | | [YOLOv5su](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5su.pt) | 640 | 43.0 | 120.7 | 1.27 | 9.1 | 24.0 | 105 | | [YOLOv5mu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5mu.pt) | 640 | 49.0 | 233.9 | 1.86 | 25.1 | 64.2 | 106 | | [YOLOv5lu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5lu.pt) | 640 | 52.2 | 408.4 | 2.50 | 53.2 | 135.0 | 107 | | [YOLOv5xu](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5xu.pt) | 640 | 53.2 | 763.2 | 3.81 | 97.2 | 246.4 | 108 | 109 |
110 | -------------------------------------------------------------------------------- /ultralytics/nn/autoshape.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | """ 3 | Common modules 4 | """ 5 | 6 | from copy import copy 7 | from pathlib import Path 8 | 9 | import cv2 10 | import numpy as np 11 | import pandas as pd 12 | import requests 13 | import torch 14 | import torch.nn as nn 15 | from PIL import Image, ImageOps 16 | from torch.cuda import amp 17 | 18 | from ultralytics.nn.autobackend import AutoBackend 19 | from ultralytics.yolo.data.augment import LetterBox 20 | from ultralytics.yolo.utils import LOGGER, colorstr 21 | from ultralytics.yolo.utils.files import increment_path 22 | from ultralytics.yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh 23 | from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box 24 | from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode 25 | 26 | 27 | class AutoShape(nn.Module): 28 | # YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 29 | conf = 0.25 # NMS confidence threshold 30 | iou = 0.45 # NMS IoU threshold 31 | agnostic = False # NMS class-agnostic 32 | multi_label = False # NMS multiple labels per box 33 | classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs 34 | max_det = 1000 # maximum number of detections per image 35 | amp = False # Automatic Mixed Precision (AMP) inference 36 | 37 | def __init__(self, model, verbose=True): 38 | super().__init__() 39 | if verbose: 40 | LOGGER.info('Adding AutoShape... ') 41 | copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes 42 | self.dmb = isinstance(model, AutoBackend) # DetectMultiBackend() instance 43 | self.pt = not self.dmb or model.pt # PyTorch model 44 | self.model = model.eval() 45 | if self.pt: 46 | m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() 47 | m.inplace = False # Detect.inplace=False for safe multithread inference 48 | m.export = True # do not output loss values 49 | 50 | def _apply(self, fn): 51 | # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers 52 | self = super()._apply(fn) 53 | if self.pt: 54 | m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() 55 | m.stride = fn(m.stride) 56 | m.grid = list(map(fn, m.grid)) 57 | if isinstance(m.anchor_grid, list): 58 | m.anchor_grid = list(map(fn, m.anchor_grid)) 59 | return self 60 | 61 | @smart_inference_mode() 62 | def forward(self, ims, size=640, augment=False, profile=False): 63 | # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are: 64 | # file: ims = 'data/images/zidane.jpg' # str or PosixPath 65 | # URI: = 'https://ultralytics.com/images/zidane.jpg' 66 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) 67 | # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) 68 | # numpy: = np.zeros((640,1280,3)) # HWC 69 | # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) 70 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 71 | 72 | dt = (Profile(), Profile(), Profile()) 73 | with dt[0]: 74 | if isinstance(size, int): # expand 75 | size = (size, size) 76 | p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param 77 | autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference 78 | if isinstance(ims, torch.Tensor): # torch 79 | with amp.autocast(autocast): 80 | return self.model(ims.to(p.device).type_as(p), augment=augment) # inference 81 | 82 | # Pre-process 83 | n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images 84 | shape0, shape1, files = [], [], [] # image and inference shapes, filenames 85 | for i, im in enumerate(ims): 86 | f = f'image{i}' # filename 87 | if isinstance(im, (str, Path)): # filename or uri 88 | im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im 89 | im = np.asarray(ImageOps.exif_transpose(im)) 90 | elif isinstance(im, Image.Image): # PIL Image 91 | im, f = np.asarray(ImageOps.exif_transpose(im)), getattr(im, 'filename', f) or f 92 | files.append(Path(f).with_suffix('.jpg').name) 93 | if im.shape[0] < 5: # image in CHW 94 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 95 | im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input 96 | s = im.shape[:2] # HWC 97 | shape0.append(s) # image shape 98 | g = max(size) / max(s) # gain 99 | shape1.append([y * g for y in s]) 100 | ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update 101 | shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape 102 | x = [LetterBox(shape1, auto=False)(image=im)['img'] for im in ims] # pad 103 | x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW 104 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 105 | 106 | with amp.autocast(autocast): 107 | # Inference 108 | with dt[1]: 109 | y = self.model(x, augment=augment) # forward 110 | 111 | # Post-process 112 | with dt[2]: 113 | y = non_max_suppression(y if self.dmb else y[0], 114 | self.conf, 115 | self.iou, 116 | self.classes, 117 | self.agnostic, 118 | self.multi_label, 119 | max_det=self.max_det) # NMS 120 | for i in range(n): 121 | scale_boxes(shape1, y[i][:, :4], shape0[i]) 122 | 123 | return Detections(ims, y, files, dt, self.names, x.shape) 124 | 125 | 126 | class Detections: 127 | # YOLOv8 detections class for inference results 128 | def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): 129 | super().__init__() 130 | d = pred[0].device # device 131 | gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations 132 | self.ims = ims # list of images as numpy arrays 133 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 134 | self.names = names # class names 135 | self.files = files # image filenames 136 | self.times = times # profiling times 137 | self.xyxy = pred # xyxy pixels 138 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 139 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 140 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 141 | self.n = len(self.pred) # number of images (batch size) 142 | self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms) 143 | self.s = tuple(shape) # inference BCHW shape 144 | 145 | def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): 146 | s, crops = '', [] 147 | for i, (im, pred) in enumerate(zip(self.ims, self.pred)): 148 | s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string 149 | if pred.shape[0]: 150 | for c in pred[:, -1].unique(): 151 | n = (pred[:, -1] == c).sum() # detections per class 152 | s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string 153 | s = s.rstrip(', ') 154 | if show or save or render or crop: 155 | annotator = Annotator(im, example=str(self.names)) 156 | for *box, conf, cls in reversed(pred): # xyxy, confidence, class 157 | label = f'{self.names[int(cls)]} {conf:.2f}' 158 | if crop: 159 | file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None 160 | crops.append({ 161 | 'box': box, 162 | 'conf': conf, 163 | 'cls': cls, 164 | 'label': label, 165 | 'im': save_one_box(box, im, file=file, save=save)}) 166 | else: # all others 167 | annotator.box_label(box, label if labels else '', color=colors(cls)) 168 | im = annotator.im 169 | else: 170 | s += '(no detections)' 171 | 172 | im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np 173 | if show: 174 | im.show(self.files[i]) # show 175 | if save: 176 | f = self.files[i] 177 | im.save(save_dir / f) # save 178 | if i == self.n - 1: 179 | LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") 180 | if render: 181 | self.ims[i] = np.asarray(im) 182 | if pprint: 183 | s = s.lstrip('\n') 184 | return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t 185 | if crop: 186 | if save: 187 | LOGGER.info(f'Saved results to {save_dir}\n') 188 | return crops 189 | 190 | def show(self, labels=True): 191 | self._run(show=True, labels=labels) # show results 192 | 193 | def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False): 194 | save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir 195 | self._run(save=True, labels=labels, save_dir=save_dir) # save results 196 | 197 | def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False): 198 | save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None 199 | return self._run(crop=True, save=save, save_dir=save_dir) # crop results 200 | 201 | def render(self, labels=True): 202 | self._run(render=True, labels=labels) # render results 203 | return self.ims 204 | 205 | def pandas(self): 206 | # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) 207 | new = copy(self) # return copy 208 | ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns 209 | cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns 210 | for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): 211 | a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update 212 | setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) 213 | return new 214 | 215 | def tolist(self): 216 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 217 | r = range(self.n) # iterable 218 | x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] 219 | # for d in x: 220 | # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 221 | # setattr(d, k, getattr(d, k)[0]) # pop out of list 222 | return x 223 | 224 | def print(self): 225 | LOGGER.info(self.__str__()) 226 | 227 | def __len__(self): # override len(results) 228 | return self.n 229 | 230 | def __str__(self): # override print(results) 231 | return self._run(pprint=True) # print results 232 | 233 | def __repr__(self): 234 | return f'YOLOv8 {self.__class__} instance\n' + self.__str__() 235 | 236 | 237 | print('works') 238 | -------------------------------------------------------------------------------- /ultralytics/tracker/utils/gmc.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | 8 | class GMC: 9 | 10 | def __init__(self, method='sparseOptFlow', downscale=2, verbose=None): 11 | super().__init__() 12 | 13 | self.method = method 14 | self.downscale = max(1, int(downscale)) 15 | 16 | if self.method == 'orb': 17 | self.detector = cv2.FastFeatureDetector_create(20) 18 | self.extractor = cv2.ORB_create() 19 | self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING) 20 | 21 | elif self.method == 'sift': 22 | self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20) 23 | self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20) 24 | self.matcher = cv2.BFMatcher(cv2.NORM_L2) 25 | 26 | elif self.method == 'ecc': 27 | number_of_iterations = 5000 28 | termination_eps = 1e-6 29 | self.warp_mode = cv2.MOTION_EUCLIDEAN 30 | self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps) 31 | 32 | elif self.method == 'sparseOptFlow': 33 | self.feature_params = dict(maxCorners=1000, 34 | qualityLevel=0.01, 35 | minDistance=1, 36 | blockSize=3, 37 | useHarrisDetector=False, 38 | k=0.04) 39 | # self.gmc_file = open('GMC_results.txt', 'w') 40 | 41 | elif self.method in ['file', 'files']: 42 | seqName = verbose[0] 43 | ablation = verbose[1] 44 | if ablation: 45 | filePath = r'tracker/GMC_files/MOT17_ablation' 46 | else: 47 | filePath = r'tracker/GMC_files/MOTChallenge' 48 | 49 | if '-FRCNN' in seqName: 50 | seqName = seqName[:-6] 51 | elif '-DPM' in seqName or '-SDP' in seqName: 52 | seqName = seqName[:-4] 53 | self.gmcFile = open(f'{filePath}/GMC-{seqName}.txt') 54 | 55 | if self.gmcFile is None: 56 | raise ValueError(f'Error: Unable to open GMC file in directory:{filePath}') 57 | elif self.method in ['none', 'None']: 58 | self.method = 'none' 59 | else: 60 | raise ValueError(f'Error: Unknown CMC method:{method}') 61 | 62 | self.prevFrame = None 63 | self.prevKeyPoints = None 64 | self.prevDescriptors = None 65 | 66 | self.initializedFirstFrame = False 67 | 68 | def apply(self, raw_frame, detections=None): 69 | if self.method in ['orb', 'sift']: 70 | return self.applyFeaures(raw_frame, detections) 71 | elif self.method == 'ecc': 72 | return self.applyEcc(raw_frame, detections) 73 | elif self.method == 'sparseOptFlow': 74 | return self.applySparseOptFlow(raw_frame, detections) 75 | elif self.method == 'file': 76 | return self.applyFile(raw_frame, detections) 77 | elif self.method == 'none': 78 | return np.eye(2, 3) 79 | else: 80 | return np.eye(2, 3) 81 | 82 | def applyEcc(self, raw_frame, detections=None): 83 | 84 | # Initialize 85 | height, width, _ = raw_frame.shape 86 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) 87 | H = np.eye(2, 3, dtype=np.float32) 88 | 89 | # Downscale image (TODO: consider using pyramids) 90 | if self.downscale > 1.0: 91 | frame = cv2.GaussianBlur(frame, (3, 3), 1.5) 92 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) 93 | width = width // self.downscale 94 | height = height // self.downscale 95 | 96 | # Handle first frame 97 | if not self.initializedFirstFrame: 98 | # Initialize data 99 | self.prevFrame = frame.copy() 100 | 101 | # Initialization done 102 | self.initializedFirstFrame = True 103 | 104 | return H 105 | 106 | # Run the ECC algorithm. The results are stored in warp_matrix. 107 | # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria) 108 | try: 109 | (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1) 110 | except Exception as e: 111 | print(f'Warning: find transform failed. Set warp as identity {e}') 112 | 113 | return H 114 | 115 | def applyFeaures(self, raw_frame, detections=None): 116 | 117 | # Initialize 118 | height, width, _ = raw_frame.shape 119 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) 120 | H = np.eye(2, 3) 121 | 122 | # Downscale image (TODO: consider using pyramids) 123 | if self.downscale > 1.0: 124 | # frame = cv2.GaussianBlur(frame, (3, 3), 1.5) 125 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) 126 | width = width // self.downscale 127 | height = height // self.downscale 128 | 129 | # find the keypoints 130 | mask = np.zeros_like(frame) 131 | # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255 132 | mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255 133 | if detections is not None: 134 | for det in detections: 135 | tlbr = (det[:4] / self.downscale).astype(np.int_) 136 | mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0 137 | 138 | keypoints = self.detector.detect(frame, mask) 139 | 140 | # compute the descriptors 141 | keypoints, descriptors = self.extractor.compute(frame, keypoints) 142 | 143 | # Handle first frame 144 | if not self.initializedFirstFrame: 145 | # Initialize data 146 | self.prevFrame = frame.copy() 147 | self.prevKeyPoints = copy.copy(keypoints) 148 | self.prevDescriptors = copy.copy(descriptors) 149 | 150 | # Initialization done 151 | self.initializedFirstFrame = True 152 | 153 | return H 154 | 155 | # Match descriptors. 156 | knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2) 157 | 158 | # Filtered matches based on smallest spatial distance 159 | matches = [] 160 | spatialDistances = [] 161 | 162 | maxSpatialDistance = 0.25 * np.array([width, height]) 163 | 164 | # Handle empty matches case 165 | if len(knnMatches) == 0: 166 | # Store to next iteration 167 | self.prevFrame = frame.copy() 168 | self.prevKeyPoints = copy.copy(keypoints) 169 | self.prevDescriptors = copy.copy(descriptors) 170 | 171 | return H 172 | 173 | for m, n in knnMatches: 174 | if m.distance < 0.9 * n.distance: 175 | prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt 176 | currKeyPointLocation = keypoints[m.trainIdx].pt 177 | 178 | spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0], 179 | prevKeyPointLocation[1] - currKeyPointLocation[1]) 180 | 181 | if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \ 182 | (np.abs(spatialDistance[1]) < maxSpatialDistance[1]): 183 | spatialDistances.append(spatialDistance) 184 | matches.append(m) 185 | 186 | meanSpatialDistances = np.mean(spatialDistances, 0) 187 | stdSpatialDistances = np.std(spatialDistances, 0) 188 | 189 | inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances 190 | 191 | goodMatches = [] 192 | prevPoints = [] 193 | currPoints = [] 194 | for i in range(len(matches)): 195 | if inliesrs[i, 0] and inliesrs[i, 1]: 196 | goodMatches.append(matches[i]) 197 | prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt) 198 | currPoints.append(keypoints[matches[i].trainIdx].pt) 199 | 200 | prevPoints = np.array(prevPoints) 201 | currPoints = np.array(currPoints) 202 | 203 | # Draw the keypoint matches on the output image 204 | if 0: 205 | matches_img = np.hstack((self.prevFrame, frame)) 206 | matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR) 207 | W = np.size(self.prevFrame, 1) 208 | for m in goodMatches: 209 | prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_) 210 | curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_) 211 | curr_pt[0] += W 212 | color = np.random.randint(0, 255, (3,)) 213 | color = (int(color[0]), int(color[1]), int(color[2])) 214 | 215 | matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA) 216 | matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1) 217 | matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1) 218 | 219 | plt.figure() 220 | plt.imshow(matches_img) 221 | plt.show() 222 | 223 | # Find rigid matrix 224 | if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)): 225 | H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC) 226 | 227 | # Handle downscale 228 | if self.downscale > 1.0: 229 | H[0, 2] *= self.downscale 230 | H[1, 2] *= self.downscale 231 | else: 232 | print('Warning: not enough matching points') 233 | 234 | # Store to next iteration 235 | self.prevFrame = frame.copy() 236 | self.prevKeyPoints = copy.copy(keypoints) 237 | self.prevDescriptors = copy.copy(descriptors) 238 | 239 | return H 240 | 241 | def applySparseOptFlow(self, raw_frame, detections=None): 242 | # Initialize 243 | # t0 = time.time() 244 | height, width, _ = raw_frame.shape 245 | frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) 246 | H = np.eye(2, 3) 247 | 248 | # Downscale image 249 | if self.downscale > 1.0: 250 | # frame = cv2.GaussianBlur(frame, (3, 3), 1.5) 251 | frame = cv2.resize(frame, (width // self.downscale, height // self.downscale)) 252 | 253 | # find the keypoints 254 | keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params) 255 | 256 | # Handle first frame 257 | if not self.initializedFirstFrame: 258 | # Initialize data 259 | self.prevFrame = frame.copy() 260 | self.prevKeyPoints = copy.copy(keypoints) 261 | 262 | # Initialization done 263 | self.initializedFirstFrame = True 264 | 265 | return H 266 | 267 | # find correspondences 268 | matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None) 269 | 270 | # leave good correspondences only 271 | prevPoints = [] 272 | currPoints = [] 273 | 274 | for i in range(len(status)): 275 | if status[i]: 276 | prevPoints.append(self.prevKeyPoints[i]) 277 | currPoints.append(matchedKeypoints[i]) 278 | 279 | prevPoints = np.array(prevPoints) 280 | currPoints = np.array(currPoints) 281 | 282 | # Find rigid matrix 283 | if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)): 284 | H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC) 285 | 286 | # Handle downscale 287 | if self.downscale > 1.0: 288 | H[0, 2] *= self.downscale 289 | H[1, 2] *= self.downscale 290 | else: 291 | print('Warning: not enough matching points') 292 | 293 | # Store to next iteration 294 | self.prevFrame = frame.copy() 295 | self.prevKeyPoints = copy.copy(keypoints) 296 | 297 | # gmc_line = str(1000 * (time.time() - t0)) + "\t" + str(H[0, 0]) + "\t" + str(H[0, 1]) + "\t" + str( 298 | # H[0, 2]) + "\t" + str(H[1, 0]) + "\t" + str(H[1, 1]) + "\t" + str(H[1, 2]) + "\n" 299 | # self.gmc_file.write(gmc_line) 300 | 301 | return H 302 | 303 | def applyFile(self, raw_frame, detections=None): 304 | line = self.gmcFile.readline() 305 | tokens = line.split('\t') 306 | H = np.eye(2, 3, dtype=np.float_) 307 | H[0, 0] = float(tokens[1]) 308 | H[0, 1] = float(tokens[2]) 309 | H[0, 2] = float(tokens[3]) 310 | H[1, 0] = float(tokens[4]) 311 | H[1, 1] = float(tokens[5]) 312 | H[1, 2] = float(tokens[6]) 313 | 314 | return H 315 | -------------------------------------------------------------------------------- /ultralytics/tracker/trackers/byte_tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..utils import matching 4 | from ..utils.kalman_filter import KalmanFilterXYAH 5 | from .basetrack import BaseTrack, TrackState 6 | 7 | 8 | class STrack(BaseTrack): 9 | shared_kalman = KalmanFilterXYAH() 10 | 11 | def __init__(self, tlwh, score, cls): 12 | 13 | # wait activate 14 | self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32) 15 | self.kalman_filter = None 16 | self.mean, self.covariance = None, None 17 | self.is_activated = False 18 | 19 | self.score = score 20 | self.tracklet_len = 0 21 | self.cls = cls 22 | self.idx = tlwh[-1] 23 | 24 | def predict(self): 25 | mean_state = self.mean.copy() 26 | if self.state != TrackState.Tracked: 27 | mean_state[7] = 0 28 | self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) 29 | 30 | @staticmethod 31 | def multi_predict(stracks): 32 | if len(stracks) <= 0: 33 | return 34 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 35 | multi_covariance = np.asarray([st.covariance for st in stracks]) 36 | for i, st in enumerate(stracks): 37 | if st.state != TrackState.Tracked: 38 | multi_mean[i][7] = 0 39 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) 40 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 41 | stracks[i].mean = mean 42 | stracks[i].covariance = cov 43 | 44 | @staticmethod 45 | def multi_gmc(stracks, H=np.eye(2, 3)): 46 | if len(stracks) > 0: 47 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 48 | multi_covariance = np.asarray([st.covariance for st in stracks]) 49 | 50 | R = H[:2, :2] 51 | R8x8 = np.kron(np.eye(4, dtype=float), R) 52 | t = H[:2, 2] 53 | 54 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 55 | mean = R8x8.dot(mean) 56 | mean[:2] += t 57 | cov = R8x8.dot(cov).dot(R8x8.transpose()) 58 | 59 | stracks[i].mean = mean 60 | stracks[i].covariance = cov 61 | 62 | def activate(self, kalman_filter, frame_id): 63 | """Start a new tracklet""" 64 | self.kalman_filter = kalman_filter 65 | self.track_id = self.next_id() 66 | self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh)) 67 | 68 | self.tracklet_len = 0 69 | self.state = TrackState.Tracked 70 | if frame_id == 1: 71 | self.is_activated = True 72 | self.frame_id = frame_id 73 | self.start_frame = frame_id 74 | 75 | def re_activate(self, new_track, frame_id, new_id=False): 76 | self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, 77 | self.convert_coords(new_track.tlwh)) 78 | self.tracklet_len = 0 79 | self.state = TrackState.Tracked 80 | self.is_activated = True 81 | self.frame_id = frame_id 82 | if new_id: 83 | self.track_id = self.next_id() 84 | self.score = new_track.score 85 | self.cls = new_track.cls 86 | self.idx = new_track.idx 87 | 88 | def update(self, new_track, frame_id): 89 | """ 90 | Update a matched track 91 | :type new_track: STrack 92 | :type frame_id: int 93 | :type update_feature: bool 94 | :return: 95 | """ 96 | self.frame_id = frame_id 97 | self.tracklet_len += 1 98 | 99 | new_tlwh = new_track.tlwh 100 | self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, 101 | self.convert_coords(new_tlwh)) 102 | self.state = TrackState.Tracked 103 | self.is_activated = True 104 | 105 | self.score = new_track.score 106 | self.cls = new_track.cls 107 | self.idx = new_track.idx 108 | 109 | def convert_coords(self, tlwh): 110 | return self.tlwh_to_xyah(tlwh) 111 | 112 | @property 113 | def tlwh(self): 114 | """Get current position in bounding box format `(top left x, top left y, 115 | width, height)`. 116 | """ 117 | if self.mean is None: 118 | return self._tlwh.copy() 119 | ret = self.mean[:4].copy() 120 | ret[2] *= ret[3] 121 | ret[:2] -= ret[2:] / 2 122 | return ret 123 | 124 | @property 125 | def tlbr(self): 126 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 127 | `(top left, bottom right)`. 128 | """ 129 | ret = self.tlwh.copy() 130 | ret[2:] += ret[:2] 131 | return ret 132 | 133 | @staticmethod 134 | def tlwh_to_xyah(tlwh): 135 | """Convert bounding box to format `(center x, center y, aspect ratio, 136 | height)`, where the aspect ratio is `width / height`. 137 | """ 138 | ret = np.asarray(tlwh).copy() 139 | ret[:2] += ret[2:] / 2 140 | ret[2] /= ret[3] 141 | return ret 142 | 143 | @staticmethod 144 | def tlbr_to_tlwh(tlbr): 145 | ret = np.asarray(tlbr).copy() 146 | ret[2:] -= ret[:2] 147 | return ret 148 | 149 | @staticmethod 150 | def tlwh_to_tlbr(tlwh): 151 | ret = np.asarray(tlwh).copy() 152 | ret[2:] += ret[:2] 153 | return ret 154 | 155 | def __repr__(self): 156 | return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})' 157 | 158 | 159 | class BYTETracker: 160 | 161 | def __init__(self, args, frame_rate=30): 162 | self.tracked_stracks = [] # type: list[STrack] 163 | self.lost_stracks = [] # type: list[STrack] 164 | self.removed_stracks = [] # type: list[STrack] 165 | 166 | self.frame_id = 0 167 | self.args = args 168 | self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer) 169 | self.kalman_filter = self.get_kalmanfilter() 170 | 171 | def update(self, results, img=None): 172 | self.frame_id += 1 173 | activated_starcks = [] 174 | refind_stracks = [] 175 | lost_stracks = [] 176 | removed_stracks = [] 177 | 178 | scores = results.conf 179 | bboxes = results.xyxy 180 | # add index 181 | bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1) 182 | cls = results.cls 183 | 184 | remain_inds = scores > self.args.track_high_thresh 185 | inds_low = scores > self.args.track_low_thresh 186 | inds_high = scores < self.args.track_high_thresh 187 | 188 | inds_second = np.logical_and(inds_low, inds_high) 189 | dets_second = bboxes[inds_second] 190 | dets = bboxes[remain_inds] 191 | scores_keep = scores[remain_inds] 192 | scores_second = scores[inds_second] 193 | cls_keep = cls[remain_inds] 194 | cls_second = cls[inds_second] 195 | 196 | detections = self.init_track(dets, scores_keep, cls_keep, img) 197 | """ Add newly detected tracklets to tracked_stracks""" 198 | unconfirmed = [] 199 | tracked_stracks = [] # type: list[STrack] 200 | for track in self.tracked_stracks: 201 | if not track.is_activated: 202 | unconfirmed.append(track) 203 | else: 204 | tracked_stracks.append(track) 205 | """ Step 2: First association, with high score detection boxes""" 206 | strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks) 207 | # Predict the current location with KF 208 | self.multi_predict(strack_pool) 209 | if hasattr(self, 'gmc'): 210 | warp = self.gmc.apply(img, dets) 211 | STrack.multi_gmc(strack_pool, warp) 212 | STrack.multi_gmc(unconfirmed, warp) 213 | 214 | dists = self.get_dists(strack_pool, detections) 215 | matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) 216 | 217 | for itracked, idet in matches: 218 | track = strack_pool[itracked] 219 | det = detections[idet] 220 | if track.state == TrackState.Tracked: 221 | track.update(det, self.frame_id) 222 | activated_starcks.append(track) 223 | else: 224 | track.re_activate(det, self.frame_id, new_id=False) 225 | refind_stracks.append(track) 226 | """ Step 3: Second association, with low score detection boxes""" 227 | # association the untrack to the low score detections 228 | detections_second = self.init_track(dets_second, scores_second, cls_second, img) 229 | r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] 230 | # TODO 231 | dists = matching.iou_distance(r_tracked_stracks, detections_second) 232 | matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) 233 | for itracked, idet in matches: 234 | track = r_tracked_stracks[itracked] 235 | det = detections_second[idet] 236 | if track.state == TrackState.Tracked: 237 | track.update(det, self.frame_id) 238 | activated_starcks.append(track) 239 | else: 240 | track.re_activate(det, self.frame_id, new_id=False) 241 | refind_stracks.append(track) 242 | 243 | for it in u_track: 244 | track = r_tracked_stracks[it] 245 | if track.state != TrackState.Lost: 246 | track.mark_lost() 247 | lost_stracks.append(track) 248 | """Deal with unconfirmed tracks, usually tracks with only one beginning frame""" 249 | detections = [detections[i] for i in u_detection] 250 | dists = self.get_dists(unconfirmed, detections) 251 | matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) 252 | for itracked, idet in matches: 253 | unconfirmed[itracked].update(detections[idet], self.frame_id) 254 | activated_starcks.append(unconfirmed[itracked]) 255 | for it in u_unconfirmed: 256 | track = unconfirmed[it] 257 | track.mark_removed() 258 | removed_stracks.append(track) 259 | """ Step 4: Init new stracks""" 260 | for inew in u_detection: 261 | track = detections[inew] 262 | if track.score < self.args.new_track_thresh: 263 | continue 264 | track.activate(self.kalman_filter, self.frame_id) 265 | activated_starcks.append(track) 266 | """ Step 5: Update state""" 267 | for track in self.lost_stracks: 268 | if self.frame_id - track.end_frame > self.max_time_lost: 269 | track.mark_removed() 270 | removed_stracks.append(track) 271 | 272 | self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] 273 | self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_starcks) 274 | self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks) 275 | self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks) 276 | self.lost_stracks.extend(lost_stracks) 277 | self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks) 278 | self.removed_stracks.extend(removed_stracks) 279 | self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) 280 | output = [ 281 | track.tlbr.tolist() + [track.track_id, track.score, track.cls, track.idx] for track in self.tracked_stracks 282 | if track.is_activated] 283 | return np.asarray(output, dtype=np.float32) 284 | 285 | def get_kalmanfilter(self): 286 | return KalmanFilterXYAH() 287 | 288 | def init_track(self, dets, scores, cls, img=None): 289 | return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections 290 | 291 | def get_dists(self, tracks, detections): 292 | dists = matching.iou_distance(tracks, detections) 293 | # TODO: mot20 294 | # if not self.args.mot20: 295 | dists = matching.fuse_score(dists, detections) 296 | return dists 297 | 298 | def multi_predict(self, tracks): 299 | STrack.multi_predict(tracks) 300 | 301 | @staticmethod 302 | def joint_stracks(tlista, tlistb): 303 | exists = {} 304 | res = [] 305 | for t in tlista: 306 | exists[t.track_id] = 1 307 | res.append(t) 308 | for t in tlistb: 309 | tid = t.track_id 310 | if not exists.get(tid, 0): 311 | exists[tid] = 1 312 | res.append(t) 313 | return res 314 | 315 | @staticmethod 316 | def sub_stracks(tlista, tlistb): 317 | stracks = {t.track_id: t for t in tlista} 318 | for t in tlistb: 319 | tid = t.track_id 320 | if stracks.get(tid, 0): 321 | del stracks[tid] 322 | return list(stracks.values()) 323 | 324 | @staticmethod 325 | def remove_duplicate_stracks(stracksa, stracksb): 326 | pdist = matching.iou_distance(stracksa, stracksb) 327 | pairs = np.where(pdist < 0.15) 328 | dupa, dupb = [], [] 329 | for p, q in zip(*pairs): 330 | timep = stracksa[p].frame_id - stracksa[p].start_frame 331 | timeq = stracksb[q].frame_id - stracksb[q].start_frame 332 | if timep > timeq: 333 | dupb.append(q) 334 | else: 335 | dupa.append(p) 336 | resa = [t for i, t in enumerate(stracksa) if i not in dupa] 337 | resb = [t for i, t in enumerate(stracksb) if i not in dupb] 338 | return resa, resb 339 | -------------------------------------------------------------------------------- /ultralytics/tracker/utils/kalman_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.linalg 3 | 4 | # Table for the 0.95 quantile of the chi-square distribution with N degrees of freedom (contains values for N=1, ..., 9) 5 | # Taken from MATLAB/Octave's chi2inv function and used as Mahalanobis gating threshold. 6 | chi2inv95 = {1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} 7 | 8 | 9 | class KalmanFilterXYAH: 10 | """ 11 | For bytetrack 12 | A simple Kalman filter for tracking bounding boxes in image space. 13 | 14 | The 8-dimensional state space 15 | 16 | x, y, a, h, vx, vy, va, vh 17 | 18 | contains the bounding box center position (x, y), aspect ratio a, height h, 19 | and their respective velocities. 20 | 21 | Object motion follows a constant velocity model. The bounding box location 22 | (x, y, a, h) is taken as direct observation of the state space (linear 23 | observation model). 24 | 25 | """ 26 | 27 | def __init__(self): 28 | ndim, dt = 4, 1. 29 | 30 | # Create Kalman filter model matrices. 31 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 32 | for i in range(ndim): 33 | self._motion_mat[i, ndim + i] = dt 34 | self._update_mat = np.eye(ndim, 2 * ndim) 35 | 36 | # Motion and observation uncertainty are chosen relative to the current 37 | # state estimate. These weights control the amount of uncertainty in 38 | # the model. This is a bit hacky. 39 | self._std_weight_position = 1. / 20 40 | self._std_weight_velocity = 1. / 160 41 | 42 | def initiate(self, measurement): 43 | """Create track from unassociated measurement. 44 | 45 | Parameters 46 | ---------- 47 | measurement : ndarray 48 | Bounding box coordinates (x, y, a, h) with center position (x, y), 49 | aspect ratio a, and height h. 50 | 51 | Returns 52 | ------- 53 | (ndarray, ndarray) 54 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 55 | dimensional) of the new track. Unobserved velocities are initialized 56 | to 0 mean. 57 | 58 | """ 59 | mean_pos = measurement 60 | mean_vel = np.zeros_like(mean_pos) 61 | mean = np.r_[mean_pos, mean_vel] 62 | 63 | std = [ 64 | 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, 65 | 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], 66 | 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]] 67 | covariance = np.diag(np.square(std)) 68 | return mean, covariance 69 | 70 | def predict(self, mean, covariance): 71 | """Run Kalman filter prediction step. 72 | 73 | Parameters 74 | ---------- 75 | mean : ndarray 76 | The 8 dimensional mean vector of the object state at the previous 77 | time step. 78 | covariance : ndarray 79 | The 8x8 dimensional covariance matrix of the object state at the 80 | previous time step. 81 | 82 | Returns 83 | ------- 84 | (ndarray, ndarray) 85 | Returns the mean vector and covariance matrix of the predicted 86 | state. Unobserved velocities are initialized to 0 mean. 87 | 88 | """ 89 | std_pos = [ 90 | self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, 91 | self._std_weight_position * mean[3]] 92 | std_vel = [ 93 | self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, 94 | self._std_weight_velocity * mean[3]] 95 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 96 | 97 | # mean = np.dot(self._motion_mat, mean) 98 | mean = np.dot(mean, self._motion_mat.T) 99 | covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 100 | 101 | return mean, covariance 102 | 103 | def project(self, mean, covariance): 104 | """Project state distribution to measurement space. 105 | 106 | Parameters 107 | ---------- 108 | mean : ndarray 109 | The state's mean vector (8 dimensional array). 110 | covariance : ndarray 111 | The state's covariance matrix (8x8 dimensional). 112 | 113 | Returns 114 | ------- 115 | (ndarray, ndarray) 116 | Returns the projected mean and covariance matrix of the given state 117 | estimate. 118 | 119 | """ 120 | std = [ 121 | self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, 122 | self._std_weight_position * mean[3]] 123 | innovation_cov = np.diag(np.square(std)) 124 | 125 | mean = np.dot(self._update_mat, mean) 126 | covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) 127 | return mean, covariance + innovation_cov 128 | 129 | def multi_predict(self, mean, covariance): 130 | """Run Kalman filter prediction step (Vectorized version). 131 | Parameters 132 | ---------- 133 | mean : ndarray 134 | The Nx8 dimensional mean matrix of the object states at the previous 135 | time step. 136 | covariance : ndarray 137 | The Nx8x8 dimensional covariance matrics of the object states at the 138 | previous time step. 139 | Returns 140 | ------- 141 | (ndarray, ndarray) 142 | Returns the mean vector and covariance matrix of the predicted 143 | state. Unobserved velocities are initialized to 0 mean. 144 | """ 145 | std_pos = [ 146 | self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], 147 | 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]] 148 | std_vel = [ 149 | self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], 150 | 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]] 151 | sqr = np.square(np.r_[std_pos, std_vel]).T 152 | 153 | motion_cov = [np.diag(sqr[i]) for i in range(len(mean))] 154 | motion_cov = np.asarray(motion_cov) 155 | 156 | mean = np.dot(mean, self._motion_mat.T) 157 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 158 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 159 | 160 | return mean, covariance 161 | 162 | def update(self, mean, covariance, measurement): 163 | """Run Kalman filter correction step. 164 | 165 | Parameters 166 | ---------- 167 | mean : ndarray 168 | The predicted state's mean vector (8 dimensional). 169 | covariance : ndarray 170 | The state's covariance matrix (8x8 dimensional). 171 | measurement : ndarray 172 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 173 | is the center position, a the aspect ratio, and h the height of the 174 | bounding box. 175 | 176 | Returns 177 | ------- 178 | (ndarray, ndarray) 179 | Returns the measurement-corrected state distribution. 180 | 181 | """ 182 | projected_mean, projected_cov = self.project(mean, covariance) 183 | 184 | chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) 185 | kalman_gain = scipy.linalg.cho_solve((chol_factor, lower), 186 | np.dot(covariance, self._update_mat.T).T, 187 | check_finite=False).T 188 | innovation = measurement - projected_mean 189 | 190 | new_mean = mean + np.dot(innovation, kalman_gain.T) 191 | new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T)) 192 | return new_mean, new_covariance 193 | 194 | def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): 195 | """Compute gating distance between state distribution and measurements. 196 | A suitable distance threshold can be obtained from `chi2inv95`. If 197 | `only_position` is False, the chi-square distribution has 4 degrees of 198 | freedom, otherwise 2. 199 | Parameters 200 | ---------- 201 | mean : ndarray 202 | Mean vector over the state distribution (8 dimensional). 203 | covariance : ndarray 204 | Covariance of the state distribution (8x8 dimensional). 205 | measurements : ndarray 206 | An Nx4 dimensional matrix of N measurements, each in 207 | format (x, y, a, h) where (x, y) is the bounding box center 208 | position, a the aspect ratio, and h the height. 209 | only_position : Optional[bool] 210 | If True, distance computation is done with respect to the bounding 211 | box center position only. 212 | Returns 213 | ------- 214 | ndarray 215 | Returns an array of length N, where the i-th element contains the 216 | squared Mahalanobis distance between (mean, covariance) and 217 | `measurements[i]`. 218 | """ 219 | mean, covariance = self.project(mean, covariance) 220 | if only_position: 221 | mean, covariance = mean[:2], covariance[:2, :2] 222 | measurements = measurements[:, :2] 223 | 224 | d = measurements - mean 225 | if metric == 'gaussian': 226 | return np.sum(d * d, axis=1) 227 | elif metric == 'maha': 228 | cholesky_factor = np.linalg.cholesky(covariance) 229 | z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) 230 | return np.sum(z * z, axis=0) # square maha 231 | else: 232 | raise ValueError('invalid distance metric') 233 | 234 | 235 | class KalmanFilterXYWH: 236 | """ 237 | For bot-sort 238 | A simple Kalman filter for tracking bounding boxes in image space. 239 | 240 | The 8-dimensional state space 241 | 242 | x, y, w, h, vx, vy, vw, vh 243 | 244 | contains the bounding box center position (x, y), width w, height h, 245 | and their respective velocities. 246 | 247 | Object motion follows a constant velocity model. The bounding box location 248 | (x, y, w, h) is taken as direct observation of the state space (linear 249 | observation model). 250 | 251 | """ 252 | 253 | def __init__(self): 254 | ndim, dt = 4, 1. 255 | 256 | # Create Kalman filter model matrices. 257 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 258 | for i in range(ndim): 259 | self._motion_mat[i, ndim + i] = dt 260 | self._update_mat = np.eye(ndim, 2 * ndim) 261 | 262 | # Motion and observation uncertainty are chosen relative to the current 263 | # state estimate. These weights control the amount of uncertainty in 264 | # the model. This is a bit hacky. 265 | self._std_weight_position = 1. / 20 266 | self._std_weight_velocity = 1. / 160 267 | 268 | def initiate(self, measurement): 269 | """Create track from unassociated measurement. 270 | 271 | Parameters 272 | ---------- 273 | measurement : ndarray 274 | Bounding box coordinates (x, y, w, h) with center position (x, y), 275 | width w, and height h. 276 | 277 | Returns 278 | ------- 279 | (ndarray, ndarray) 280 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 281 | dimensional) of the new track. Unobserved velocities are initialized 282 | to 0 mean. 283 | 284 | """ 285 | mean_pos = measurement 286 | mean_vel = np.zeros_like(mean_pos) 287 | mean = np.r_[mean_pos, mean_vel] 288 | 289 | std = [ 290 | 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3], 291 | 2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3], 292 | 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3], 293 | 10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3]] 294 | covariance = np.diag(np.square(std)) 295 | return mean, covariance 296 | 297 | def predict(self, mean, covariance): 298 | """Run Kalman filter prediction step. 299 | 300 | Parameters 301 | ---------- 302 | mean : ndarray 303 | The 8 dimensional mean vector of the object state at the previous 304 | time step. 305 | covariance : ndarray 306 | The 8x8 dimensional covariance matrix of the object state at the 307 | previous time step. 308 | 309 | Returns 310 | ------- 311 | (ndarray, ndarray) 312 | Returns the mean vector and covariance matrix of the predicted 313 | state. Unobserved velocities are initialized to 0 mean. 314 | 315 | """ 316 | std_pos = [ 317 | self._std_weight_position * mean[2], self._std_weight_position * mean[3], 318 | self._std_weight_position * mean[2], self._std_weight_position * mean[3]] 319 | std_vel = [ 320 | self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3], 321 | self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3]] 322 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 323 | 324 | mean = np.dot(mean, self._motion_mat.T) 325 | covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 326 | 327 | return mean, covariance 328 | 329 | def project(self, mean, covariance): 330 | """Project state distribution to measurement space. 331 | 332 | Parameters 333 | ---------- 334 | mean : ndarray 335 | The state's mean vector (8 dimensional array). 336 | covariance : ndarray 337 | The state's covariance matrix (8x8 dimensional). 338 | 339 | Returns 340 | ------- 341 | (ndarray, ndarray) 342 | Returns the projected mean and covariance matrix of the given state 343 | estimate. 344 | 345 | """ 346 | std = [ 347 | self._std_weight_position * mean[2], self._std_weight_position * mean[3], 348 | self._std_weight_position * mean[2], self._std_weight_position * mean[3]] 349 | innovation_cov = np.diag(np.square(std)) 350 | 351 | mean = np.dot(self._update_mat, mean) 352 | covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) 353 | return mean, covariance + innovation_cov 354 | 355 | def multi_predict(self, mean, covariance): 356 | """Run Kalman filter prediction step (Vectorized version). 357 | Parameters 358 | ---------- 359 | mean : ndarray 360 | The Nx8 dimensional mean matrix of the object states at the previous 361 | time step. 362 | covariance : ndarray 363 | The Nx8x8 dimensional covariance matrics of the object states at the 364 | previous time step. 365 | Returns 366 | ------- 367 | (ndarray, ndarray) 368 | Returns the mean vector and covariance matrix of the predicted 369 | state. Unobserved velocities are initialized to 0 mean. 370 | """ 371 | std_pos = [ 372 | self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3], 373 | self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3]] 374 | std_vel = [ 375 | self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3], 376 | self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3]] 377 | sqr = np.square(np.r_[std_pos, std_vel]).T 378 | 379 | motion_cov = [np.diag(sqr[i]) for i in range(len(mean))] 380 | motion_cov = np.asarray(motion_cov) 381 | 382 | mean = np.dot(mean, self._motion_mat.T) 383 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 384 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 385 | 386 | return mean, covariance 387 | 388 | def update(self, mean, covariance, measurement): 389 | """Run Kalman filter correction step. 390 | 391 | Parameters 392 | ---------- 393 | mean : ndarray 394 | The predicted state's mean vector (8 dimensional). 395 | covariance : ndarray 396 | The state's covariance matrix (8x8 dimensional). 397 | measurement : ndarray 398 | The 4 dimensional measurement vector (x, y, w, h), where (x, y) 399 | is the center position, w the width, and h the height of the 400 | bounding box. 401 | 402 | Returns 403 | ------- 404 | (ndarray, ndarray) 405 | Returns the measurement-corrected state distribution. 406 | 407 | """ 408 | projected_mean, projected_cov = self.project(mean, covariance) 409 | 410 | chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) 411 | kalman_gain = scipy.linalg.cho_solve((chol_factor, lower), 412 | np.dot(covariance, self._update_mat.T).T, 413 | check_finite=False).T 414 | innovation = measurement - projected_mean 415 | 416 | new_mean = mean + np.dot(innovation, kalman_gain.T) 417 | new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T)) 418 | return new_mean, new_covariance 419 | 420 | def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): 421 | """Compute gating distance between state distribution and measurements. 422 | A suitable distance threshold can be obtained from `chi2inv95`. If 423 | `only_position` is False, the chi-square distribution has 4 degrees of 424 | freedom, otherwise 2. 425 | Parameters 426 | ---------- 427 | mean : ndarray 428 | Mean vector over the state distribution (8 dimensional). 429 | covariance : ndarray 430 | Covariance of the state distribution (8x8 dimensional). 431 | measurements : ndarray 432 | An Nx4 dimensional matrix of N measurements, each in 433 | format (x, y, a, h) where (x, y) is the bounding box center 434 | position, a the aspect ratio, and h the height. 435 | only_position : Optional[bool] 436 | If True, distance computation is done with respect to the bounding 437 | box center position only. 438 | Returns 439 | ------- 440 | ndarray 441 | Returns an array of length N, where the i-th element contains the 442 | squared Mahalanobis distance between (mean, covariance) and 443 | `measurements[i]`. 444 | """ 445 | mean, covariance = self.project(mean, covariance) 446 | if only_position: 447 | mean, covariance = mean[:2], covariance[:2, :2] 448 | measurements = measurements[:, :2] 449 | 450 | d = measurements - mean 451 | if metric == 'gaussian': 452 | return np.sum(d * d, axis=1) 453 | elif metric == 'maha': 454 | cholesky_factor = np.linalg.cholesky(covariance) 455 | z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) 456 | return np.sum(z * z, axis=0) # square maha 457 | else: 458 | raise ValueError('invalid distance metric') 459 | -------------------------------------------------------------------------------- /ultralytics/nn/modules.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | """ 3 | Common modules 4 | """ 5 | 6 | import math 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from ultralytics.yolo.utils.tal import dist2bbox, make_anchors 12 | 13 | 14 | def autopad(k, p=None, d=1): # kernel, padding, dilation 15 | # Pad to 'same' shape outputs 16 | if d > 1: 17 | k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size 18 | if p is None: 19 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 20 | return p 21 | 22 | 23 | class Conv(nn.Module): 24 | # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) 25 | default_act = nn.SiLU() # default activation 26 | 27 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): 28 | super().__init__() 29 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) 30 | self.bn = nn.BatchNorm2d(c2) 31 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() 32 | 33 | def forward(self, x): 34 | return self.act(self.bn(self.conv(x))) 35 | 36 | def forward_fuse(self, x): 37 | return self.act(self.conv(x)) 38 | 39 | 40 | class DWConv(Conv): 41 | # Depth-wise convolution 42 | def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation 43 | super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) 44 | 45 | 46 | class DWConvTranspose2d(nn.ConvTranspose2d): 47 | # Depth-wise transpose convolution 48 | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out 49 | super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) 50 | 51 | 52 | class ConvTranspose(nn.Module): 53 | # Convolution transpose 2d layer 54 | default_act = nn.SiLU() # default activation 55 | 56 | def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): 57 | super().__init__() 58 | self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) 59 | self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() 60 | self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() 61 | 62 | def forward(self, x): 63 | return self.act(self.bn(self.conv_transpose(x))) 64 | 65 | def forward_fuse(self, x): 66 | return self.act(self.conv_transpose(x)) 67 | 68 | 69 | class DFL(nn.Module): 70 | # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 71 | def __init__(self, c1=16): 72 | super().__init__() 73 | self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) 74 | x = torch.arange(c1, dtype=torch.float) 75 | self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) 76 | self.c1 = c1 77 | 78 | def forward(self, x): 79 | b, c, a = x.shape # batch, channels, anchors 80 | return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) 81 | # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) 82 | 83 | 84 | class TransformerLayer(nn.Module): 85 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) 86 | def __init__(self, c, num_heads): 87 | super().__init__() 88 | self.q = nn.Linear(c, c, bias=False) 89 | self.k = nn.Linear(c, c, bias=False) 90 | self.v = nn.Linear(c, c, bias=False) 91 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) 92 | self.fc1 = nn.Linear(c, c, bias=False) 93 | self.fc2 = nn.Linear(c, c, bias=False) 94 | 95 | def forward(self, x): 96 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x 97 | x = self.fc2(self.fc1(x)) + x 98 | return x 99 | 100 | 101 | class TransformerBlock(nn.Module): 102 | # Vision Transformer https://arxiv.org/abs/2010.11929 103 | def __init__(self, c1, c2, num_heads, num_layers): 104 | super().__init__() 105 | self.conv = None 106 | if c1 != c2: 107 | self.conv = Conv(c1, c2) 108 | self.linear = nn.Linear(c2, c2) # learnable position embedding 109 | self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) 110 | self.c2 = c2 111 | 112 | def forward(self, x): 113 | if self.conv is not None: 114 | x = self.conv(x) 115 | b, _, w, h = x.shape 116 | p = x.flatten(2).permute(2, 0, 1) 117 | return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) 118 | 119 | 120 | class Bottleneck(nn.Module): 121 | # Standard bottleneck 122 | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand 123 | super().__init__() 124 | c_ = int(c2 * e) # hidden channels 125 | self.cv1 = Conv(c1, c_, k[0], 1) 126 | self.cv2 = Conv(c_, c2, k[1], 1, g=g) 127 | self.add = shortcut and c1 == c2 128 | 129 | def forward(self, x): 130 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 131 | 132 | 133 | class BottleneckCSP(nn.Module): 134 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 135 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 136 | super().__init__() 137 | c_ = int(c2 * e) # hidden channels 138 | self.cv1 = Conv(c1, c_, 1, 1) 139 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 140 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 141 | self.cv4 = Conv(2 * c_, c2, 1, 1) 142 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 143 | self.act = nn.SiLU() 144 | self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) 145 | 146 | def forward(self, x): 147 | y1 = self.cv3(self.m(self.cv1(x))) 148 | y2 = self.cv2(x) 149 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) 150 | 151 | 152 | class C3(nn.Module): 153 | # CSP Bottleneck with 3 convolutions 154 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 155 | super().__init__() 156 | c_ = int(c2 * e) # hidden channels 157 | self.cv1 = Conv(c1, c_, 1, 1) 158 | self.cv2 = Conv(c1, c_, 1, 1) 159 | self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) 160 | self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n))) 161 | 162 | def forward(self, x): 163 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) 164 | 165 | 166 | class C2(nn.Module): 167 | # CSP Bottleneck with 2 convolutions 168 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 169 | super().__init__() 170 | self.c = int(c2 * e) # hidden channels 171 | self.cv1 = Conv(c1, 2 * self.c, 1, 1) 172 | self.cv2 = Conv(2 * self.c, c2, 1) # optional act=FReLU(c2) 173 | # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention() 174 | self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))) 175 | 176 | def forward(self, x): 177 | a, b = self.cv1(x).split((self.c, self.c), 1) 178 | return self.cv2(torch.cat((self.m(a), b), 1)) 179 | 180 | 181 | class C2f(nn.Module): 182 | # CSP Bottleneck with 2 convolutions 183 | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 184 | super().__init__() 185 | self.c = int(c2 * e) # hidden channels 186 | self.cv1 = Conv(c1, 2 * self.c, 1, 1) 187 | self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) 188 | self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) 189 | 190 | def forward(self, x): 191 | y = list(self.cv1(x).split((self.c, self.c), 1)) 192 | y.extend(m(y[-1]) for m in self.m) 193 | return self.cv2(torch.cat(y, 1)) 194 | 195 | 196 | class ChannelAttention(nn.Module): 197 | # Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet 198 | def __init__(self, channels: int) -> None: 199 | super().__init__() 200 | self.pool = nn.AdaptiveAvgPool2d(1) 201 | self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) 202 | self.act = nn.Sigmoid() 203 | 204 | def forward(self, x: torch.Tensor) -> torch.Tensor: 205 | return x * self.act(self.fc(self.pool(x))) 206 | 207 | 208 | class SpatialAttention(nn.Module): 209 | # Spatial-attention module 210 | def __init__(self, kernel_size=7): 211 | super().__init__() 212 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 213 | padding = 3 if kernel_size == 7 else 1 214 | self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 215 | self.act = nn.Sigmoid() 216 | 217 | def forward(self, x): 218 | return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1))) 219 | 220 | 221 | class CBAM(nn.Module): 222 | # Convolutional Block Attention Module 223 | def __init__(self, c1, kernel_size=7): # ch_in, kernels 224 | super().__init__() 225 | self.channel_attention = ChannelAttention(c1) 226 | self.spatial_attention = SpatialAttention(kernel_size) 227 | 228 | def forward(self, x): 229 | return self.spatial_attention(self.channel_attention(x)) 230 | 231 | 232 | class C1(nn.Module): 233 | # CSP Bottleneck with 1 convolution 234 | def __init__(self, c1, c2, n=1): # ch_in, ch_out, number 235 | super().__init__() 236 | self.cv1 = Conv(c1, c2, 1, 1) 237 | self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n))) 238 | 239 | def forward(self, x): 240 | y = self.cv1(x) 241 | return self.m(y) + y 242 | 243 | 244 | class C3x(C3): 245 | # C3 module with cross-convolutions 246 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 247 | super().__init__(c1, c2, n, shortcut, g, e) 248 | self.c_ = int(c2 * e) 249 | self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n))) 250 | 251 | 252 | class C3TR(C3): 253 | # C3 module with TransformerBlock() 254 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 255 | super().__init__(c1, c2, n, shortcut, g, e) 256 | c_ = int(c2 * e) 257 | self.m = TransformerBlock(c_, c_, 4, n) 258 | 259 | 260 | class C3Ghost(C3): 261 | # C3 module with GhostBottleneck() 262 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 263 | super().__init__(c1, c2, n, shortcut, g, e) 264 | c_ = int(c2 * e) # hidden channels 265 | self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) 266 | 267 | 268 | class SPP(nn.Module): 269 | # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 270 | def __init__(self, c1, c2, k=(5, 9, 13)): 271 | super().__init__() 272 | c_ = c1 // 2 # hidden channels 273 | self.cv1 = Conv(c1, c_, 1, 1) 274 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 275 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 276 | 277 | def forward(self, x): 278 | x = self.cv1(x) 279 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 280 | 281 | 282 | class SPPF(nn.Module): 283 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 284 | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) 285 | super().__init__() 286 | c_ = c1 // 2 # hidden channels 287 | self.cv1 = Conv(c1, c_, 1, 1) 288 | self.cv2 = Conv(c_ * 4, c2, 1, 1) 289 | self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) 290 | 291 | def forward(self, x): 292 | x = self.cv1(x) 293 | y1 = self.m(x) 294 | y2 = self.m(y1) 295 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 296 | 297 | 298 | class Focus(nn.Module): 299 | # Focus wh information into c-space 300 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 301 | super().__init__() 302 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) 303 | # self.contract = Contract(gain=2) 304 | 305 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 306 | return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) 307 | # return self.conv(self.contract(x)) 308 | 309 | 310 | class GhostConv(nn.Module): 311 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 312 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 313 | super().__init__() 314 | c_ = c2 // 2 # hidden channels 315 | self.cv1 = Conv(c1, c_, k, s, None, g, act=act) 316 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act) 317 | 318 | def forward(self, x): 319 | y = self.cv1(x) 320 | return torch.cat((y, self.cv2(y)), 1) 321 | 322 | 323 | class GhostBottleneck(nn.Module): 324 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 325 | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride 326 | super().__init__() 327 | c_ = c2 // 2 328 | self.conv = nn.Sequential( 329 | GhostConv(c1, c_, 1, 1), # pw 330 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 331 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 332 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, 333 | act=False)) if s == 2 else nn.Identity() 334 | 335 | def forward(self, x): 336 | return self.conv(x) + self.shortcut(x) 337 | 338 | 339 | class Concat(nn.Module): 340 | # Concatenate a list of tensors along dimension 341 | def __init__(self, dimension=1): 342 | super().__init__() 343 | self.d = dimension 344 | 345 | def forward(self, x): 346 | return torch.cat(x, self.d) 347 | 348 | 349 | class Proto(nn.Module): 350 | # YOLOv8 mask Proto module for segmentation models 351 | def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks 352 | super().__init__() 353 | self.cv1 = Conv(c1, c_, k=3) 354 | self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest') 355 | self.cv2 = Conv(c_, c_, k=3) 356 | self.cv3 = Conv(c_, c2) 357 | 358 | def forward(self, x): 359 | return self.cv3(self.cv2(self.upsample(self.cv1(x)))) 360 | 361 | 362 | class Ensemble(nn.ModuleList): 363 | # Ensemble of models 364 | def __init__(self): 365 | super().__init__() 366 | 367 | def forward(self, x, augment=False, profile=False, visualize=False): 368 | y = [module(x, augment, profile, visualize)[0] for module in self] 369 | # y = torch.stack(y).max(0)[0] # max ensemble 370 | # y = torch.stack(y).mean(0) # mean ensemble 371 | y = torch.cat(y, 1) # nms ensemble 372 | return y, None # inference, train output 373 | 374 | 375 | # heads 376 | class Detect(nn.Module): 377 | # YOLOv8 Detect head for detection models 378 | dynamic = False # force grid reconstruction 379 | export = False # export mode 380 | shape = None 381 | anchors = torch.empty(0) # init 382 | strides = torch.empty(0) # init 383 | 384 | def __init__(self, nc=80, ch=()): # detection layer 385 | super().__init__() 386 | self.nc = nc # number of classes 387 | self.nl = len(ch) # number of detection layers 388 | self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) 389 | self.no = nc + self.reg_max * 4 # number of outputs per anchor 390 | self.stride = torch.zeros(self.nl) # strides computed during build 391 | 392 | c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels 393 | self.cv2 = nn.ModuleList( 394 | nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) 395 | self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) 396 | self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() 397 | 398 | def forward(self, x): 399 | shape = x[0].shape # BCHW 400 | for i in range(self.nl): 401 | x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) 402 | if self.training: 403 | return x 404 | elif self.dynamic or self.shape != shape: 405 | self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) 406 | self.shape = shape 407 | 408 | box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) 409 | dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides 410 | y = torch.cat((dbox, cls.sigmoid()), 1) 411 | return y if self.export else (y, x) 412 | 413 | def bias_init(self): 414 | # Initialize Detect() biases, WARNING: requires stride availability 415 | m = self # self.model[-1] # Detect() module 416 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 417 | # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency 418 | for a, b, s in zip(m.cv2, m.cv3, m.stride): # from 419 | a[-1].bias.data[:] = 1.0 # box 420 | b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) 421 | 422 | 423 | class Segment(Detect): 424 | # YOLOv8 Segment head for segmentation models 425 | def __init__(self, nc=80, nm=32, npr=256, ch=()): 426 | super().__init__(nc, ch) 427 | self.nm = nm # number of masks 428 | self.npr = npr # number of protos 429 | self.proto = Proto(ch[0], self.npr, self.nm) # protos 430 | self.detect = Detect.forward 431 | 432 | c4 = max(ch[0] // 4, self.nm) 433 | self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) 434 | 435 | def forward(self, x): 436 | p = self.proto(x[0]) # mask protos 437 | bs = p.shape[0] # batch size 438 | 439 | mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients 440 | x = self.detect(self, x) 441 | if self.training: 442 | return x, mc, p 443 | return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) 444 | 445 | 446 | class Classify(nn.Module): 447 | # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2) 448 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 449 | super().__init__() 450 | c_ = 1280 # efficientnet_b0 size 451 | self.conv = Conv(c1, c_, k, s, autopad(k, p), g) 452 | self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) 453 | self.drop = nn.Dropout(p=0.0, inplace=True) 454 | self.linear = nn.Linear(c_, c2) # to x(b,c2) 455 | 456 | def forward(self, x): 457 | if isinstance(x, list): 458 | x = torch.cat(x, 1) 459 | x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) 460 | return x if self.training else x.softmax(1) 461 | -------------------------------------------------------------------------------- /ultralytics/nn/autobackend.py: -------------------------------------------------------------------------------- 1 | # Ultralytics YOLO 🚀, GPL-3.0 license 2 | import ast 3 | import contextlib 4 | import json 5 | import platform 6 | import zipfile 7 | from collections import OrderedDict, namedtuple 8 | from pathlib import Path 9 | from urllib.parse import urlparse 10 | 11 | import cv2 12 | import numpy as np 13 | import torch 14 | import torch.nn as nn 15 | from PIL import Image 16 | 17 | from ultralytics.yolo.utils import LOGGER, ROOT, yaml_load 18 | from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_version, check_yaml 19 | from ultralytics.yolo.utils.downloads import attempt_download_asset, is_url 20 | from ultralytics.yolo.utils.ops import xywh2xyxy 21 | 22 | 23 | def check_class_names(names): 24 | # Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts. 25 | if isinstance(names, list): # names is a list 26 | names = dict(enumerate(names)) # convert to dict 27 | if isinstance(names, dict): 28 | if not all(isinstance(k, int) for k in names.keys()): # convert string keys to int, i.e. '0' to 0 29 | names = {int(k): v for k, v in names.items()} 30 | if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764' 31 | map = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['map'] # human-readable names 32 | names = {k: map[v] for k, v in names.items()} 33 | return names 34 | 35 | 36 | class AutoBackend(nn.Module): 37 | 38 | def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): 39 | """ 40 | MultiBackend class for python inference on various platforms using Ultralytics YOLO. 41 | 42 | Args: 43 | weights (str): The path to the weights file. Default: 'yolov8n.pt' 44 | device (torch.device): The device to run the model on. 45 | dnn (bool): Use OpenCV's DNN module for inference if True, defaults to False. 46 | data (str), (Path): Additional data.yaml file for class names, optional 47 | fp16 (bool): If True, use half precision. Default: False 48 | fuse (bool): Whether to fuse the model or not. Default: True 49 | 50 | Supported formats and their naming conventions: 51 | | Format | Suffix | 52 | |-----------------------|------------------| 53 | | PyTorch | *.pt | 54 | | TorchScript | *.torchscript | 55 | | ONNX Runtime | *.onnx | 56 | | ONNX OpenCV DNN | *.onnx --dnn | 57 | | OpenVINO | *.xml | 58 | | CoreML | *.mlmodel | 59 | | TensorRT | *.engine | 60 | | TensorFlow SavedModel | *_saved_model | 61 | | TensorFlow GraphDef | *.pb | 62 | | TensorFlow Lite | *.tflite | 63 | | TensorFlow Edge TPU | *_edgetpu.tflite | 64 | | PaddlePaddle | *_paddle_model | 65 | """ 66 | super().__init__() 67 | w = str(weights[0] if isinstance(weights, list) else weights) 68 | nn_module = isinstance(weights, torch.nn.Module) 69 | pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w) 70 | fp16 &= pt or jit or onnx or engine or nn_module # FP16 71 | nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) 72 | stride = 32 # default stride 73 | model = None # TODO: resolves ONNX inference, verify effect on other backends 74 | cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA 75 | if not (pt or triton or nn_module): 76 | w = attempt_download_asset(w) # download if not local 77 | 78 | # NOTE: special case: in-memory pytorch model 79 | if nn_module: 80 | model = weights.to(device) 81 | model = model.fuse() if fuse else model 82 | names = model.module.names if hasattr(model, 'module') else model.names # get class names 83 | stride = max(int(model.stride.max()), 32) # model stride 84 | model.half() if fp16 else model.float() 85 | self.model = model # explicitly assign for to(), cpu(), cuda(), half() 86 | pt = True 87 | elif pt: # PyTorch 88 | from ultralytics.nn.tasks import attempt_load_weights 89 | model = attempt_load_weights(weights if isinstance(weights, list) else w, 90 | device=device, 91 | inplace=True, 92 | fuse=fuse) 93 | stride = max(int(model.stride.max()), 32) # model stride 94 | names = model.module.names if hasattr(model, 'module') else model.names # get class names 95 | model.half() if fp16 else model.float() 96 | self.model = model # explicitly assign for to(), cpu(), cuda(), half() 97 | elif jit: # TorchScript 98 | LOGGER.info(f'Loading {w} for TorchScript inference...') 99 | extra_files = {'config.txt': ''} # model metadata 100 | model = torch.jit.load(w, _extra_files=extra_files, map_location=device) 101 | model.half() if fp16 else model.float() 102 | if extra_files['config.txt']: # load metadata dict 103 | d = json.loads(extra_files['config.txt'], 104 | object_hook=lambda d: {int(k) if k.isdigit() else k: v 105 | for k, v in d.items()}) 106 | stride, names = int(d['stride']), d['names'] 107 | elif dnn: # ONNX OpenCV DNN 108 | LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') 109 | check_requirements('opencv-python>=4.5.4') 110 | net = cv2.dnn.readNetFromONNX(w) 111 | elif onnx: # ONNX Runtime 112 | LOGGER.info(f'Loading {w} for ONNX Runtime inference...') 113 | check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) 114 | import onnxruntime 115 | providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] 116 | session = onnxruntime.InferenceSession(w, providers=providers) 117 | output_names = [x.name for x in session.get_outputs()] 118 | meta = session.get_modelmeta().custom_metadata_map # metadata 119 | if 'stride' in meta: 120 | stride, names = int(meta['stride']), eval(meta['names']) 121 | elif xml: # OpenVINO 122 | LOGGER.info(f'Loading {w} for OpenVINO inference...') 123 | check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/ 124 | from openvino.runtime import Core, Layout, get_batch # noqa 125 | ie = Core() 126 | if not Path(w).is_file(): # if not *.xml 127 | w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir 128 | network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) 129 | if network.get_parameters()[0].get_layout().empty: 130 | network.get_parameters()[0].set_layout(Layout('NCHW')) 131 | batch_dim = get_batch(network) 132 | if batch_dim.is_static: 133 | batch_size = batch_dim.get_length() 134 | executable_network = ie.compile_model(network, device_name='CPU') # device_name="MYRIAD" for Intel NCS2 135 | elif engine: # TensorRT 136 | LOGGER.info(f'Loading {w} for TensorRT inference...') 137 | import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download 138 | check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 139 | if device.type == 'cpu': 140 | device = torch.device('cuda:0') 141 | Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) 142 | logger = trt.Logger(trt.Logger.INFO) 143 | # Read file 144 | with open(w, 'rb') as f, trt.Runtime(logger) as runtime: 145 | # Read metadata length 146 | meta_len = int.from_bytes(f.read(4), byteorder='little') 147 | # Read metadata 148 | meta = json.loads(f.read(meta_len).decode('utf-8')) 149 | stride, names = int(meta['stride']), meta['names'] 150 | # Read engine 151 | model = runtime.deserialize_cuda_engine(f.read()) 152 | context = model.create_execution_context() 153 | bindings = OrderedDict() 154 | output_names = [] 155 | fp16 = False # default updated below 156 | dynamic = False 157 | for i in range(model.num_bindings): 158 | name = model.get_binding_name(i) 159 | dtype = trt.nptype(model.get_binding_dtype(i)) 160 | if model.binding_is_input(i): 161 | if -1 in tuple(model.get_binding_shape(i)): # dynamic 162 | dynamic = True 163 | context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) 164 | if dtype == np.float16: 165 | fp16 = True 166 | else: # output 167 | output_names.append(name) 168 | shape = tuple(context.get_binding_shape(i)) 169 | im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) 170 | bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) 171 | binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) 172 | batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size 173 | elif coreml: # CoreML 174 | LOGGER.info(f'Loading {w} for CoreML inference...') 175 | import coremltools as ct 176 | model = ct.models.MLModel(w) 177 | elif saved_model: # TF SavedModel 178 | LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') 179 | import tensorflow as tf 180 | keras = False # assume TF1 saved_model 181 | model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) 182 | elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt 183 | LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') 184 | import tensorflow as tf 185 | 186 | def wrap_frozen_graph(gd, inputs, outputs): 187 | x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped 188 | ge = x.graph.as_graph_element 189 | return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) 190 | 191 | def gd_outputs(gd): 192 | name_list, input_list = [], [] 193 | for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef 194 | name_list.append(node.name) 195 | input_list.extend(node.input) 196 | return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp')) 197 | 198 | gd = tf.Graph().as_graph_def() # TF GraphDef 199 | with open(w, 'rb') as f: 200 | gd.ParseFromString(f.read()) 201 | frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd)) 202 | elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python 203 | try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu 204 | from tflite_runtime.interpreter import Interpreter, load_delegate 205 | except ImportError: 206 | import tensorflow as tf 207 | Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate 208 | if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime 209 | LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') 210 | delegate = { 211 | 'Linux': 'libedgetpu.so.1', 212 | 'Darwin': 'libedgetpu.1.dylib', 213 | 'Windows': 'edgetpu.dll'}[platform.system()] 214 | interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) 215 | else: # TFLite 216 | LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') 217 | interpreter = Interpreter(model_path=w) # load TFLite model 218 | interpreter.allocate_tensors() # allocate 219 | input_details = interpreter.get_input_details() # inputs 220 | output_details = interpreter.get_output_details() # outputs 221 | # load metadata 222 | with contextlib.suppress(zipfile.BadZipFile): 223 | with zipfile.ZipFile(w, 'r') as model: 224 | meta_file = model.namelist()[0] 225 | meta = ast.literal_eval(model.read(meta_file).decode('utf-8')) 226 | stride, names = int(meta['stride']), meta['names'] 227 | elif tfjs: # TF.js 228 | raise NotImplementedError('YOLOv8 TF.js inference is not supported') 229 | elif paddle: # PaddlePaddle 230 | LOGGER.info(f'Loading {w} for PaddlePaddle inference...') 231 | check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle') 232 | import paddle.inference as pdi 233 | if not Path(w).is_file(): # if not *.pdmodel 234 | w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir 235 | weights = Path(w).with_suffix('.pdiparams') 236 | config = pdi.Config(str(w), str(weights)) 237 | if cuda: 238 | config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) 239 | predictor = pdi.create_predictor(config) 240 | input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) 241 | output_names = predictor.get_output_names() 242 | elif triton: # NVIDIA Triton Inference Server 243 | LOGGER.info('Triton Inference Server not supported...') 244 | ''' 245 | TODO: 246 | check_requirements('tritonclient[all]') 247 | from utils.triton import TritonRemoteModel 248 | model = TritonRemoteModel(url=w) 249 | nhwc = model.runtime.startswith("tensorflow") 250 | ''' 251 | else: 252 | from ultralytics.yolo.engine.exporter import EXPORT_FORMATS_TABLE 253 | raise TypeError(f"model='{w}' is not a supported model format. " 254 | 'See https://docs.ultralytics.com/tasks/detection/#export for help.' 255 | f'\n\n{EXPORT_FORMATS_TABLE}') 256 | 257 | # Load external metadata YAML 258 | if xml or saved_model or paddle: 259 | metadata = Path(w).parent / 'metadata.yaml' 260 | if metadata.exists(): 261 | metadata = yaml_load(metadata) 262 | stride, names = int(metadata['stride']), metadata['names'] # load metadata 263 | else: 264 | LOGGER.warning(f"WARNING ⚠️ Metadata not found at '{metadata}'") 265 | 266 | # Check names 267 | if 'names' not in locals(): # names missing 268 | names = yaml_load(check_yaml(data))['names'] if data else {i: f'class{i}' for i in range(999)} # assign 269 | names = check_class_names(names) 270 | 271 | self.__dict__.update(locals()) # assign all variables to self 272 | 273 | def forward(self, im, augment=False, visualize=False): 274 | """ 275 | Runs inference on the YOLOv8 MultiBackend model. 276 | 277 | Args: 278 | im (torch.Tensor): The image tensor to perform inference on. 279 | augment (bool): whether to perform data augmentation during inference, defaults to False 280 | visualize (bool): whether to visualize the output predictions, defaults to False 281 | 282 | Returns: 283 | (tuple): Tuple containing the raw output tensor, and the processed output for visualization (if visualize=True) 284 | """ 285 | b, ch, h, w = im.shape # batch, channel, height, width 286 | if self.fp16 and im.dtype != torch.float16: 287 | im = im.half() # to FP16 288 | if self.nhwc: 289 | im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) 290 | 291 | if self.pt or self.nn_module: # PyTorch 292 | y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) 293 | elif self.jit: # TorchScript 294 | y = self.model(im) 295 | elif self.dnn: # ONNX OpenCV DNN 296 | im = im.cpu().numpy() # torch to numpy 297 | self.net.setInput(im) 298 | y = self.net.forward() 299 | elif self.onnx: # ONNX Runtime 300 | im = im.cpu().numpy() # torch to numpy 301 | y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) 302 | elif self.xml: # OpenVINO 303 | im = im.cpu().numpy() # FP32 304 | y = list(self.executable_network([im]).values()) 305 | elif self.engine: # TensorRT 306 | if self.dynamic and im.shape != self.bindings['images'].shape: 307 | i = self.model.get_binding_index('images') 308 | self.context.set_binding_shape(i, im.shape) # reshape if dynamic 309 | self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) 310 | for name in self.output_names: 311 | i = self.model.get_binding_index(name) 312 | self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) 313 | s = self.bindings['images'].shape 314 | assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" 315 | self.binding_addrs['images'] = int(im.data_ptr()) 316 | self.context.execute_v2(list(self.binding_addrs.values())) 317 | y = [self.bindings[x].data for x in sorted(self.output_names)] 318 | elif self.coreml: # CoreML 319 | im = im.cpu().numpy() 320 | im = Image.fromarray((im[0] * 255).astype('uint8')) 321 | # im = im.resize((192, 320), Image.ANTIALIAS) 322 | y = self.model.predict({'image': im}) # coordinates are xywh normalized 323 | if 'confidence' in y: 324 | box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels 325 | conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) 326 | y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) 327 | else: 328 | y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) 329 | elif self.paddle: # PaddlePaddle 330 | im = im.cpu().numpy().astype(np.float32) 331 | self.input_handle.copy_from_cpu(im) 332 | self.predictor.run() 333 | y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] 334 | elif self.triton: # NVIDIA Triton Inference Server 335 | y = self.model(im) 336 | else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) 337 | im = im.cpu().numpy() 338 | if self.saved_model: # SavedModel 339 | y = self.model(im, training=False) if self.keras else self.model(im) 340 | elif self.pb: # GraphDef 341 | y = self.frozen_func(x=self.tf.constant(im)) 342 | else: # Lite or Edge TPU 343 | input = self.input_details[0] 344 | int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model 345 | if int8: 346 | scale, zero_point = input['quantization'] 347 | im = (im / scale + zero_point).astype(np.uint8) # de-scale 348 | self.interpreter.set_tensor(input['index'], im) 349 | self.interpreter.invoke() 350 | y = [] 351 | for output in self.output_details: 352 | x = self.interpreter.get_tensor(output['index']) 353 | if int8: 354 | scale, zero_point = output['quantization'] 355 | x = (x.astype(np.float32) - zero_point) * scale # re-scale 356 | y.append(x) 357 | # TF segment fixes: export is reversed vs ONNX export and protos are transposed 358 | if len(self.output_details) == 2: # segment 359 | y = [y[1], np.transpose(y[0], (0, 3, 1, 2))] 360 | y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] 361 | # y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels 362 | 363 | if isinstance(y, (list, tuple)): 364 | return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] 365 | else: 366 | return self.from_numpy(y) 367 | 368 | def from_numpy(self, x): 369 | """ 370 | Convert a numpy array to a tensor. 371 | 372 | Args: 373 | x (np.ndarray): The array to be converted. 374 | 375 | Returns: 376 | (torch.Tensor): The converted tensor 377 | """ 378 | return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x 379 | 380 | def warmup(self, imgsz=(1, 3, 640, 640)): 381 | """ 382 | Warm up the model by running one forward pass with a dummy input. 383 | 384 | Args: 385 | imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width) 386 | 387 | Returns: 388 | (None): This method runs the forward pass and don't return any value 389 | """ 390 | warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module 391 | if any(warmup_types) and (self.device.type != 'cpu' or self.triton): 392 | im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input 393 | for _ in range(2 if self.jit else 1): # 394 | self.forward(im) # warmup 395 | 396 | @staticmethod 397 | def _model_type(p='path/to/model.pt'): 398 | """ 399 | This function takes a path to a model file and returns the model type 400 | 401 | Args: 402 | p: path to the model file. Defaults to path/to/model.pt 403 | """ 404 | # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx 405 | # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] 406 | from ultralytics.yolo.engine.exporter import export_formats 407 | sf = list(export_formats().Suffix) # export suffixes 408 | if not is_url(p, check=False) and not isinstance(p, str): 409 | check_suffix(p, sf) # checks 410 | url = urlparse(p) # if url may be Triton inference server 411 | types = [s in Path(p).name for s in sf] 412 | types[8] &= not types[9] # tflite &= not edgetpu 413 | triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc]) 414 | return types + [triton] 415 | --------------------------------------------------------------------------------