├── applications └── handpose_local_app.py ├── components ├── classify_imagenet │ ├── imagenet_c.py │ ├── imagenet_msg.json │ ├── models │ │ ├── __pycache__ │ │ │ └── resnet.cpython-37.pyc │ │ └── resnet.py │ ├── utils │ │ ├── common_utils.py │ │ └── model_utils.py │ └── weights │ │ └── 分类权重放这里.txt ├── hand_detect │ └── weights │ │ └── yolov5权重.txt └── hand_keypoints │ ├── handpose_x.py │ ├── models │ ├── mobilenetv2.py │ ├── my_model.py │ ├── resnet.py │ ├── resnet_50.py │ ├── rexnetv1.py │ ├── shufflenet.py │ ├── shufflenetv2.py │ └── squeezenet.py │ ├── utils │ ├── __pycache__ │ │ └── common_utils.cpython-37.pyc │ ├── common_utils.py │ └── model_utils.py │ └── weights │ └── 关键点权重放这里.txt ├── data ├── VOC.yaml ├── coco.yaml └── coco128.yaml ├── export.py ├── hand_data_iter ├── data_agu.py ├── datasets.py └── handpose_agu.py ├── lib └── hand_lib │ ├── cores │ ├── __pycache__ │ │ ├── handpose_fuction.cpython-37.pyc │ │ └── tracking_utils.cpython-37.pyc │ ├── hand_pnp.py │ ├── handpose_fuction.py │ └── tracking_utils.py │ └── utils │ └── utils.py ├── loss └── loss.py ├── materials └── audio │ ├── imagenet_2012 │ ├── n02930766.mp3 │ ├── n03085013.mp3 │ ├── n03290653.mp3 │ ├── n03642806.mp3 │ ├── n03782006.mp3 │ ├── n03791053.mp3 │ ├── n03793489.mp3 │ ├── n03832673.mp3 │ ├── n03876231.mp3 │ ├── n03887697.mp3 │ └── n04285008.mp3 │ └── sentences │ ├── IdentifyingObjectsWait.mp3 │ └── ObjectMayBeIdentified.mp3 ├── models ├── __init__.py ├── common.py ├── experimental.py ├── hub │ ├── anchors.yaml │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-bifpn.yaml │ ├── yolov5-fpn.yaml │ ├── yolov5-p2.yaml │ ├── yolov5-p34.yaml │ ├── yolov5-p6.yaml │ ├── yolov5-p7.yaml │ ├── yolov5-panet.yaml │ ├── yolov5l6.yaml │ ├── yolov5m6.yaml │ ├── yolov5n6.yaml │ ├── yolov5s-LeakyReLU.yaml │ ├── yolov5s-ghost.yaml │ ├── yolov5s-transformer.yaml │ ├── yolov5s6.yaml │ └── yolov5x6.yaml ├── segment │ ├── yolov5l-seg.yaml │ ├── yolov5m-seg.yaml │ ├── yolov5n-seg.yaml │ ├── yolov5s-seg.yaml │ └── yolov5x-seg.yaml ├── tf.py ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5n.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── predict.py ├── readme.md ├── train.py └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc ├── __init__.cpython-37.pyc ├── augmentations.cpython-36.pyc ├── augmentations.cpython-37.pyc ├── autoanchor.cpython-37.pyc ├── dataloaders.cpython-36.pyc ├── dataloaders.cpython-37.pyc ├── downloads.cpython-36.pyc ├── downloads.cpython-37.pyc ├── general.cpython-36.pyc ├── general.cpython-37.pyc ├── metrics.cpython-36.pyc ├── metrics.cpython-37.pyc ├── plots.cpython-36.pyc ├── plots.cpython-37.pyc ├── torch_utils.cpython-36.pyc └── torch_utils.cpython-37.pyc ├── activations.py ├── augmentations.py ├── autoanchor.py ├── autobatch.py ├── aws ├── __init__.py ├── mime.sh ├── resume.py └── userdata.sh ├── callbacks.py ├── common_utils.py ├── dataloaders.py ├── docker ├── Dockerfile ├── Dockerfile-arm64 └── Dockerfile-cpu ├── downloads.py ├── flask_rest_api ├── README.md ├── example_request.py └── restapi.py ├── general.py ├── google_app_engine ├── Dockerfile ├── additional_requirements.txt └── app.yaml ├── loggers ├── __init__.py ├── clearml │ ├── README.md │ ├── __init__.py │ ├── clearml_utils.py │ └── hpo.py ├── comet │ ├── README.md │ ├── __init__.py │ ├── comet_utils.py │ ├── hpo.py │ └── optimizer_config.json └── wandb │ ├── README.md │ ├── __init__.py │ ├── log_dataset.py │ ├── sweep.py │ ├── sweep.yaml │ └── wandb_utils.py ├── loss.py ├── metrics.py ├── model_utils.py ├── plots.py ├── segment ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── general.cpython-36.pyc │ └── general.cpython-37.pyc ├── augmentations.py ├── dataloaders.py ├── general.py ├── loss.py ├── metrics.py └── plots.py ├── torch_utils.py └── triton.py /applications/handpose_local_app.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /components/classify_imagenet/imagenet_c.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | # function : classify 5 | 6 | import os 7 | import torch 8 | import cv2 9 | import numpy as np 10 | import json 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | import numpy as np 16 | 17 | import time 18 | import datetime 19 | import os 20 | import math 21 | from datetime import datetime 22 | import cv2 23 | import torch.nn.functional as F 24 | from components.classify_imagenet.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152 25 | # 26 | class classify_imagenet_model(object): 27 | def __init__(self, 28 | model_path = './components/classify_imagenet/weights/imagenet_size-256_20210409.pth', 29 | model_arch = "resnet_50", 30 | img_size= 256, 31 | num_classes = 1000, 32 | ): 33 | 34 | f = open("./components/classify_imagenet/imagenet_msg.json", encoding='utf-8')#读取 json文件 35 | dict_ = json.load(f) 36 | f.close() 37 | self.classify_dict = dict_ 38 | # print("-------------->>\n dict_ : \n",dict_) 39 | # 40 | print("classify model loading : ",model_path) 41 | # print('use model : %s'%(model_arch)) 42 | 43 | if model_arch == 'resnet_18': 44 | model_=resnet18(num_classes=num_classes, img_size=img_size) 45 | elif model_arch == 'resnet_34': 46 | model_=resnet34(num_classes=num_classes, img_size=img_size) 47 | elif model_arch == 'resnet_50': 48 | model_=resnet50(num_classes=num_classes, img_size=img_size) 49 | elif model_arch == 'resnet_101': 50 | model_=resnet101(num_classes=num_classes, img_size=img_size) 51 | elif model_arch == 'resnet_152': 52 | model_=resnet152(num_classes=num_classes, img_size=img_size) 53 | else: 54 | print('error no the struct model : {}'.format(model_arch)) 55 | 56 | use_cuda = torch.cuda.is_available() 57 | 58 | device = torch.device("cuda:0" if use_cuda else "cpu") 59 | model_ = model_.to(device) 60 | model_.eval() # 设置为前向推断模式 61 | 62 | # print(model_)# 打印模型结构 63 | 64 | # 加载测试模型 65 | if os.access(model_path,os.F_OK):# checkpoint 66 | chkpt = torch.load(model_path, map_location=device) 67 | model_.load_state_dict(chkpt) 68 | # print('load classify model : {}'.format(model_path)) 69 | self.model_ = model_ 70 | self.use_cuda = use_cuda 71 | self.img_size = img_size 72 | 73 | def predict(self, img, vis = False):# img is align img 74 | with torch.no_grad(): 75 | 76 | img_ = cv2.resize(img, (self.img_size,self.img_size), interpolation = cv2.INTER_CUBIC) 77 | 78 | img_ = img_.astype(np.float32) 79 | img_ = (img_-128.)/256. 80 | 81 | img_ = img_.transpose(2, 0, 1) 82 | img_ = torch.from_numpy(img_) 83 | img_ = img_.unsqueeze_(0) 84 | 85 | if self.use_cuda: 86 | img_ = img_.cuda() # (bs, 3, h, w) 87 | 88 | pre_ = self.model_(img_.float()) 89 | 90 | outputs = F.softmax(pre_,dim = 1) 91 | outputs = outputs[0] 92 | 93 | output = outputs.cpu().detach().numpy() 94 | output = np.array(output) 95 | 96 | max_index = np.argmax(output) 97 | 98 | score_ = output[max_index] 99 | # print("max_index:",max_index) 100 | # print("name:",self.label_dict[max_index]) 101 | return max_index,self.classify_dict[str(max_index)],score_ 102 | -------------------------------------------------------------------------------- /components/classify_imagenet/models/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/components/classify_imagenet/models/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /components/classify_imagenet/utils/common_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | # function: utils 5 | 6 | import os 7 | import shutil 8 | import cv2 9 | import numpy as np 10 | import json 11 | 12 | def mkdir_(path, flag_rm=False): 13 | if os.path.exists(path): 14 | if flag_rm == True: 15 | shutil.rmtree(path) 16 | os.mkdir(path) 17 | print('remove {} done ~ '.format(path)) 18 | else: 19 | os.mkdir(path) 20 | 21 | def plot_box(bbox, img, color=None, label=None, line_thickness=None): 22 | tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 23 | color = color or [random.randint(0, 255) for _ in range(3)] 24 | c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) 25 | cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox 26 | if label: 27 | tf = max(tl - 2, 1) 28 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size 29 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox 30 | cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 31 | # 文本绘制 32 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) 33 | 34 | class JSON_Encoder(json.JSONEncoder): 35 | def default(self, obj): 36 | if isinstance(obj, np.integer): 37 | return int(obj) 38 | elif isinstance(obj, np.floating): 39 | return float(obj) 40 | elif isinstance(obj, np.ndarray): 41 | return obj.tolist() 42 | else: 43 | return super(JSON_Encoder, self).default(obj) 44 | -------------------------------------------------------------------------------- /components/classify_imagenet/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | # function : utils 5 | 6 | import os 7 | import numpy as np 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import random 11 | 12 | def get_acc(output, label): 13 | total = output.shape[0] 14 | _, pred_label = output.max(1) 15 | num_correct = (pred_label == label).sum().item() 16 | return num_correct / float(total) 17 | 18 | def set_learning_rate(optimizer, lr): 19 | for param_group in optimizer.param_groups: 20 | param_group['lr'] = lr 21 | 22 | def set_seed(seed = 666): 23 | np.random.seed(seed) 24 | random.seed(seed) 25 | torch.manual_seed(seed) 26 | if torch.cuda.is_available(): 27 | torch.cuda.manual_seed(seed) 28 | torch.cuda.manual_seed_all(seed) 29 | cudnn.deterministic = True 30 | 31 | def split_trainval_datasets(ops): 32 | print(' --------------->>> split_trainval_datasets ') 33 | train_split_datasets = [] 34 | train_split_datasets_label = [] 35 | 36 | val_split_datasets = [] 37 | val_split_datasets_label = [] 38 | for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('-')[0]), reverse=False)): 39 | # print(' %s label is %s \n'%(doc,idx)) 40 | 41 | data_list = os.listdir(ops.train_path+doc) 42 | random.shuffle(data_list) 43 | 44 | cal_split_num = int(len(data_list)*ops.val_factor) 45 | 46 | for i,file in enumerate(data_list): 47 | if '.jpg' in file: 48 | if i < cal_split_num: 49 | val_split_datasets.append(ops.train_path+doc + '/' + file) 50 | val_split_datasets_label.append(idx) 51 | else: 52 | train_split_datasets.append(ops.train_path+doc + '/' + file) 53 | train_split_datasets_label.append(idx) 54 | 55 | print(ops.train_path+doc + '/' + file,idx) 56 | 57 | print('\n') 58 | print('train_split_datasets len {}'.format(len(train_split_datasets))) 59 | print('val_split_datasets len {}'.format(len(val_split_datasets))) 60 | 61 | return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label 62 | -------------------------------------------------------------------------------- /components/classify_imagenet/weights/分类权重放这里.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/components/classify_imagenet/weights/分类权重放这里.txt -------------------------------------------------------------------------------- /components/hand_detect/weights/yolov5权重.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/components/hand_detect/weights/yolov5权重.txt -------------------------------------------------------------------------------- /components/hand_keypoints/models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | """mobilenetv2 in pytorch 2 | 3 | 4 | 5 | [1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen 6 | 7 | MobileNetV2: Inverted Residuals and Linear Bottlenecks 8 | https://arxiv.org/abs/1801.04381 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | class LinearBottleNeck(nn.Module): 17 | 18 | def __init__(self, in_channels, out_channels, stride, t=6, class_num=100): 19 | super().__init__() 20 | 21 | self.residual = nn.Sequential( 22 | nn.Conv2d(in_channels, in_channels * t, 1), 23 | nn.BatchNorm2d(in_channels * t), 24 | nn.ReLU6(inplace=True), 25 | 26 | nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t), 27 | nn.BatchNorm2d(in_channels * t), 28 | nn.ReLU6(inplace=True), 29 | 30 | nn.Conv2d(in_channels * t, out_channels, 1), 31 | nn.BatchNorm2d(out_channels) 32 | ) 33 | 34 | self.stride = stride 35 | self.in_channels = in_channels 36 | self.out_channels = out_channels 37 | 38 | def forward(self, x): 39 | 40 | residual = self.residual(x) 41 | 42 | if self.stride == 1 and self.in_channels == self.out_channels: 43 | residual += x 44 | 45 | return residual 46 | 47 | class MobileNetV2(nn.Module): 48 | 49 | def __init__(self, num_classes=100,dropout_factor = 1.0): 50 | super().__init__() 51 | 52 | self.pre = nn.Sequential( 53 | nn.Conv2d(3, 32, 1, padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ReLU6(inplace=True) 56 | ) 57 | 58 | self.stage1 = LinearBottleNeck(32, 16, 1, 1) 59 | self.stage2 = self._make_stage(2, 16, 24, 2, 6) 60 | self.stage3 = self._make_stage(3, 24, 32, 2, 6) 61 | self.stage4 = self._make_stage(4, 32, 64, 2, 6) 62 | self.stage5 = self._make_stage(3, 64, 96, 1, 6) 63 | self.stage6 = self._make_stage(3, 96, 160, 1, 6) 64 | self.stage7 = LinearBottleNeck(160, 320, 1, 6) 65 | 66 | self.conv1 = nn.Sequential( 67 | nn.Conv2d(320, 1280, 1), 68 | nn.BatchNorm2d(1280), 69 | nn.ReLU6(inplace=True) 70 | ) 71 | 72 | self.conv2 = nn.Conv2d(1280, num_classes, 1) 73 | 74 | self.dropout = nn.Dropout(dropout_factor) 75 | 76 | def forward(self, x): 77 | x = self.pre(x) 78 | x = self.stage1(x) 79 | x = self.stage2(x) 80 | x = self.stage3(x) 81 | x = self.stage4(x) 82 | x = self.stage5(x) 83 | x = self.stage6(x) 84 | x = self.stage7(x) 85 | x = self.conv1(x) 86 | x = F.adaptive_avg_pool2d(x, 1) 87 | x = self.dropout(x) 88 | x = self.conv2(x) 89 | x = x.view(x.size(0), -1) 90 | 91 | return x 92 | 93 | def _make_stage(self, repeat, in_channels, out_channels, stride, t): 94 | 95 | layers = [] 96 | layers.append(LinearBottleNeck(in_channels, out_channels, stride, t)) 97 | 98 | while repeat - 1: 99 | layers.append(LinearBottleNeck(out_channels, out_channels, 1, t)) 100 | repeat -= 1 101 | 102 | return nn.Sequential(*layers) 103 | 104 | def mobilenetv2(): 105 | return MobileNetV2() 106 | -------------------------------------------------------------------------------- /components/hand_keypoints/models/my_model.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2020-08-08 3 | # Author: X.L.Eric 4 | # function: my model 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | class MY_Net(nn.Module): 10 | def __init__(self,num_classes):# op 初始化 11 | super(MY_Net, self).__init__() 12 | self.cov = nn.Conv2d(3, 32, 3) 13 | self.relu = nn.ReLU(inplace=True) 14 | layers1 = [] 15 | # Conv2d : in_channels, out_channels, kernel_size, stride, padding 16 | layers1.append(nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3,stride=1,padding = 0)) 17 | layers1.append(nn.BatchNorm2d(64,affine=True)) 18 | layers1.append(nn.ReLU(inplace=True)) 19 | layers1.append(nn.AvgPool2d(kernel_size=3, stride=2, padding=1)) 20 | self.layers1 = nn.Sequential(*layers1) 21 | layers2 = [] 22 | layers2.append(nn.Conv2d(64, 128, 3)) 23 | layers2.append(nn.BatchNorm2d(128,affine=True)) 24 | layers2.append(nn.ReLU(inplace=True)) 25 | layers2.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 26 | self.layers2 = nn.Sequential(*layers2) 27 | layers3 = [] 28 | layers3.append(nn.Conv2d(128, 256, 3,stride=2)) 29 | layers3.append(nn.BatchNorm2d(256,affine=True)) 30 | layers3.append(nn.ReLU(inplace=True)) 31 | layers3.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 32 | self.layers3 = nn.Sequential(*layers3) 33 | layers4 = [] 34 | layers4.append(nn.Conv2d(256, 512, 3,stride=2)) 35 | layers4.append(nn.BatchNorm2d(512,affine=True)) 36 | layers4.append(nn.ReLU(inplace=True)) 37 | layers4.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 38 | layers4.append(nn.Conv2d(512, 512, 1,stride=1)) 39 | self.layers4 = nn.Sequential(*layers4) 40 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))# 自适应均值池化 41 | self.fc = nn.Linear(in_features = 512 , out_features = num_classes)# 全连接 fc 42 | 43 | def forward(self, x):# 模型前向推断 44 | x = self.cov(x) 45 | x = self.relu(x) 46 | x = self.layers1(x) 47 | x = self.layers2(x) 48 | x = self.layers3(x) 49 | x = self.layers4(x) 50 | x = self.avgpool(x) 51 | x = x.reshape(x.size(0), -1) 52 | x = self.fc(x) 53 | return x 54 | 55 | if __name__ == "__main__": 56 | #输入批次图片(batchsize,channel,height,width):8 ,3*256*256 57 | dummy_input = torch.randn([8, 3, 256,256]) 58 | model = MY_Net(num_classes = 100)# 分类数为 100 类 59 | print('model:\n',model)# 打印模型op 60 | output = model(dummy_input)# 模型前向推断 61 | # 模型前向推断输出特征尺寸 62 | print('model inference feature size: ',output.size()) 63 | print(output) 64 | 65 | output_ = F.softmax(output,dim = 1) 66 | # 67 | print(output_) 68 | -------------------------------------------------------------------------------- /components/hand_keypoints/models/shufflenetv2.py: -------------------------------------------------------------------------------- 1 | """shufflenetv2 in pytorch 2 | 3 | 4 | 5 | [1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun 6 | 7 | ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design 8 | https://arxiv.org/abs/1807.11164 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | def channel_split(x, split): 17 | """split a tensor into two pieces along channel dimension 18 | Args: 19 | x: input tensor 20 | split:(int) channel size for each pieces 21 | """ 22 | assert x.size(1) == split * 2 23 | return torch.split(x, split, dim=1) 24 | 25 | def channel_shuffle(x, groups): 26 | """channel shuffle operation 27 | Args: 28 | x: input tensor 29 | groups: input branch number 30 | """ 31 | 32 | batch_size, channels, height, width = x.size() 33 | channels_per_group = int(channels // groups) 34 | 35 | x = x.view(batch_size, groups, channels_per_group, height, width) 36 | x = x.transpose(1, 2).contiguous() 37 | x = x.view(batch_size, -1, height, width) 38 | 39 | return x 40 | 41 | class ShuffleUnit(nn.Module): 42 | 43 | def __init__(self, in_channels, out_channels, stride): 44 | super().__init__() 45 | 46 | self.stride = stride 47 | self.in_channels = in_channels 48 | self.out_channels = out_channels 49 | 50 | if stride != 1 or in_channels != out_channels: 51 | self.residual = nn.Sequential( 52 | nn.Conv2d(in_channels, in_channels, 1), 53 | nn.BatchNorm2d(in_channels), 54 | nn.ReLU(inplace=True), 55 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 56 | nn.BatchNorm2d(in_channels), 57 | nn.Conv2d(in_channels, int(out_channels / 2), 1), 58 | nn.BatchNorm2d(int(out_channels / 2)), 59 | nn.ReLU(inplace=True) 60 | ) 61 | 62 | self.shortcut = nn.Sequential( 63 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 64 | nn.BatchNorm2d(in_channels), 65 | nn.Conv2d(in_channels, int(out_channels / 2), 1), 66 | nn.BatchNorm2d(int(out_channels / 2)), 67 | nn.ReLU(inplace=True) 68 | ) 69 | else: 70 | self.shortcut = nn.Sequential() 71 | 72 | in_channels = int(in_channels / 2) 73 | self.residual = nn.Sequential( 74 | nn.Conv2d(in_channels, in_channels, 1), 75 | nn.BatchNorm2d(in_channels), 76 | nn.ReLU(inplace=True), 77 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 78 | nn.BatchNorm2d(in_channels), 79 | nn.Conv2d(in_channels, in_channels, 1), 80 | nn.BatchNorm2d(in_channels), 81 | nn.ReLU(inplace=True) 82 | ) 83 | 84 | 85 | def forward(self, x): 86 | 87 | if self.stride == 1 and self.out_channels == self.in_channels: 88 | shortcut, residual = channel_split(x, int(self.in_channels / 2)) 89 | else: 90 | shortcut = x 91 | residual = x 92 | 93 | shortcut = self.shortcut(shortcut) 94 | residual = self.residual(residual) 95 | x = torch.cat([shortcut, residual], dim=1) 96 | x = channel_shuffle(x, 2) 97 | 98 | return x 99 | 100 | class ShuffleNetV2(nn.Module): 101 | 102 | def __init__(self, ratio=1., num_classes=100, dropout_factor = 1.0): 103 | super().__init__() 104 | if ratio == 0.5: 105 | out_channels = [48, 96, 192, 1024] 106 | elif ratio == 1: 107 | out_channels = [116, 232, 464, 1024] 108 | elif ratio == 1.5: 109 | out_channels = [176, 352, 704, 1024] 110 | elif ratio == 2: 111 | out_channels = [244, 488, 976, 2048] 112 | else: 113 | ValueError('unsupported ratio number') 114 | 115 | self.pre = nn.Sequential( 116 | nn.Conv2d(3, 24, 3, padding=1), 117 | nn.BatchNorm2d(24) 118 | ) 119 | 120 | self.stage2 = self._make_stage(24, out_channels[0], 3) 121 | self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7) 122 | self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3) 123 | self.conv5 = nn.Sequential( 124 | nn.Conv2d(out_channels[2], out_channels[3], 1), 125 | nn.BatchNorm2d(out_channels[3]), 126 | nn.ReLU(inplace=True) 127 | ) 128 | 129 | self.fc = nn.Linear(out_channels[3], num_classes) 130 | 131 | self.dropout = nn.Dropout(dropout_factor) 132 | 133 | def forward(self, x): 134 | x = self.pre(x) 135 | x = self.stage2(x) 136 | x = self.stage3(x) 137 | x = self.stage4(x) 138 | x = self.conv5(x) 139 | x = F.adaptive_avg_pool2d(x, 1) 140 | x = x.view(x.size(0), -1) 141 | x = self.dropout(x) 142 | x = self.fc(x) 143 | 144 | return x 145 | 146 | def _make_stage(self, in_channels, out_channels, repeat): 147 | layers = [] 148 | layers.append(ShuffleUnit(in_channels, out_channels, 2)) 149 | 150 | while repeat: 151 | layers.append(ShuffleUnit(out_channels, out_channels, 1)) 152 | repeat -= 1 153 | 154 | return nn.Sequential(*layers) 155 | 156 | def shufflenetv2(): 157 | return ShuffleNetV2() 158 | -------------------------------------------------------------------------------- /components/hand_keypoints/models/squeezenet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.init as init 6 | import torch.utils.model_zoo as model_zoo 7 | 8 | 9 | __all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1'] 10 | 11 | 12 | model_urls = { 13 | 'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth', 14 | 'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth', 15 | } 16 | 17 | 18 | class Fire(nn.Module): 19 | 20 | def __init__(self, inplanes, squeeze_planes, 21 | expand1x1_planes, expand3x3_planes): 22 | super(Fire, self).__init__() 23 | self.inplanes = inplanes 24 | self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1) 25 | self.squeeze_activation = nn.ReLU(inplace=True) 26 | self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, 27 | kernel_size=1) 28 | self.expand1x1_activation = nn.ReLU(inplace=True) 29 | self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, 30 | kernel_size=3, padding=1) 31 | self.expand3x3_activation = nn.ReLU(inplace=True) 32 | 33 | def forward(self, x): 34 | x = self.squeeze_activation(self.squeeze(x)) 35 | return torch.cat([ 36 | self.expand1x1_activation(self.expand1x1(x)), 37 | self.expand3x3_activation(self.expand3x3(x)) 38 | ], 1) 39 | 40 | 41 | class SqueezeNet(nn.Module): 42 | 43 | def __init__(self, version=1.0, num_classes=1000,dropout_factor = 1.): 44 | super(SqueezeNet, self).__init__() 45 | if version not in [1.0, 1.1]: 46 | raise ValueError("Unsupported SqueezeNet version {version}:" 47 | "1.0 or 1.1 expected".format(version=version)) 48 | self.num_classes = num_classes 49 | if version == 1.0: 50 | self.features = nn.Sequential( 51 | nn.Conv2d(3, 96, kernel_size=7, stride=2), 52 | nn.ReLU(inplace=True), 53 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 54 | Fire(96, 16, 64, 64), 55 | Fire(128, 16, 64, 64), 56 | Fire(128, 32, 128, 128), 57 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 58 | Fire(256, 32, 128, 128), 59 | Fire(256, 48, 192, 192), 60 | Fire(384, 48, 192, 192), 61 | Fire(384, 64, 256, 256), 62 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 63 | Fire(512, 64, 256, 256), 64 | ) 65 | else: 66 | self.features = nn.Sequential( 67 | nn.Conv2d(3, 64, kernel_size=3, stride=2), 68 | nn.ReLU(inplace=True), 69 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 70 | Fire(64, 16, 64, 64), 71 | Fire(128, 16, 64, 64), 72 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 73 | Fire(128, 32, 128, 128), 74 | Fire(256, 32, 128, 128), 75 | nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), 76 | Fire(256, 48, 192, 192), 77 | Fire(384, 48, 192, 192), 78 | Fire(384, 64, 256, 256), 79 | Fire(512, 64, 256, 256), 80 | ) 81 | # Final convolution is initialized differently form the rest 82 | final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) 83 | self.classifier = nn.Sequential( 84 | nn.Dropout(p=dropout_factor), 85 | final_conv, 86 | nn.ReLU(inplace=True), 87 | nn.AdaptiveAvgPool2d(1) 88 | ) 89 | 90 | for m in self.modules(): 91 | if isinstance(m, nn.Conv2d): 92 | if m is final_conv: 93 | init.normal(m.weight.data, mean=0.0, std=0.01) 94 | else: 95 | init.kaiming_uniform(m.weight.data) 96 | if m.bias is not None: 97 | m.bias.data.zero_() 98 | 99 | def forward(self, x): 100 | x = self.features(x) 101 | # print("features(x):", x.size()) 102 | x = self.classifier(x) 103 | # print("features(x):", x.size()) 104 | return x.view(x.size(0), self.num_classes) 105 | 106 | 107 | def squeezenet1_0(pretrained=False, **kwargs): 108 | r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level 109 | accuracy with 50x fewer parameters and <0.5MB model size" 110 | `_ paper. 111 | 112 | Args: 113 | pretrained (bool): If True, returns a model pre-trained on ImageNet 114 | """ 115 | model = SqueezeNet(version=1.0, **kwargs) 116 | model_dict = model.state_dict() 117 | if pretrained: 118 | pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0']) 119 | pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if 120 | k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} 121 | model.load_state_dict(pretrained_dict,strict=False) 122 | return model 123 | 124 | 125 | def squeezenet1_1(pretrained=False, **kwargs): 126 | r"""SqueezeNet 1.1 model from the `official SqueezeNet repo 127 | `_. 128 | SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters 129 | than SqueezeNet 1.0, without sacrificing accuracy. 130 | 131 | Args: 132 | pretrained (bool): If True, returns a model pre-trained on ImageNet 133 | """ 134 | model = SqueezeNet(version=1.1, **kwargs) 135 | model_dict = model.state_dict() 136 | if pretrained: 137 | pretrained_state_dict = model_zoo.load_url(model_urls['squeezenet1_0']) 138 | pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if 139 | k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} 140 | model.load_state_dict(pretrained_dict,strict=False) 141 | return model 142 | 143 | 144 | if __name__ == "__main__": 145 | from thop import profile 146 | dummy = torch.from_numpy(np.random.random([16, 3, 256, 256]).astype(np.float32)) 147 | model = squeezenet1_0(pretrained=True, num_classes=42,dropout_factor = 0.5) 148 | print(model) 149 | flops, params = profile(model, inputs=(dummy, )) 150 | model.eval() 151 | output = model(dummy) 152 | print(output.size()) 153 | print("flops: {}, params: {}".format(flops, params)) 154 | -------------------------------------------------------------------------------- /components/hand_keypoints/utils/__pycache__/common_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/components/hand_keypoints/utils/__pycache__/common_utils.cpython-37.pyc -------------------------------------------------------------------------------- /components/hand_keypoints/utils/common_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2020-04-11 3 | # Author: Eric.Lee 4 | # function: common utils 5 | 6 | import os 7 | import shutil 8 | import cv2 9 | import numpy as np 10 | import json 11 | 12 | def mkdir_(path, flag_rm=False): 13 | if os.path.exists(path): 14 | if flag_rm == True: 15 | shutil.rmtree(path) 16 | os.mkdir(path) 17 | print('remove {} done ~ '.format(path)) 18 | else: 19 | os.mkdir(path) 20 | 21 | def plot_box(bbox, img, color=None, label=None, line_thickness=None): 22 | tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 23 | color = color or [random.randint(0, 255) for _ in range(3)] 24 | c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) 25 | cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox 26 | if label: 27 | tf = max(tl - 2, 1) 28 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size 29 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox 30 | cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 31 | # 文本绘制 32 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) 33 | 34 | class JSON_Encoder(json.JSONEncoder): 35 | def default(self, obj): 36 | if isinstance(obj, np.integer): 37 | return int(obj) 38 | elif isinstance(obj, np.floating): 39 | return float(obj) 40 | elif isinstance(obj, np.ndarray): 41 | return obj.tolist() 42 | else: 43 | return super(JSON_Encoder, self).default(obj) 44 | 45 | def draw_landmarks(img,output,draw_circle): 46 | img_width = img.shape[1] 47 | img_height = img.shape[0] 48 | dict_landmarks = {} 49 | for i in range(int(output.shape[0]/2)): 50 | x = output[i*2+0]*float(img_width) 51 | y = output[i*2+1]*float(img_height) 52 | if 41>= i >=33: 53 | if 'left_eyebrow' not in dict_landmarks.keys(): 54 | dict_landmarks['left_eyebrow'] = [] 55 | dict_landmarks['left_eyebrow'].append([int(x),int(y),(0,255,0)]) 56 | if draw_circle: 57 | cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) 58 | elif 50>= i >=42: 59 | if 'right_eyebrow' not in dict_landmarks.keys(): 60 | dict_landmarks['right_eyebrow'] = [] 61 | dict_landmarks['right_eyebrow'].append([int(x),int(y),(0,255,0)]) 62 | if draw_circle: 63 | cv2.circle(img, (int(x),int(y)), 2, (0,255,0),-1) 64 | elif 67>= i >=60: 65 | if 'left_eye' not in dict_landmarks.keys(): 66 | dict_landmarks['left_eye'] = [] 67 | dict_landmarks['left_eye'].append([int(x),int(y),(255,0,255)]) 68 | if draw_circle: 69 | cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) 70 | elif 75>= i >=68: 71 | if 'right_eye' not in dict_landmarks.keys(): 72 | dict_landmarks['right_eye'] = [] 73 | dict_landmarks['right_eye'].append([int(x),int(y),(255,0,255)]) 74 | if draw_circle: 75 | cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) 76 | elif 97>= i >=96: 77 | cv2.circle(img, (int(x),int(y)), 2, (0,0,255),-1) 78 | elif 54>= i >=51: 79 | if 'bridge_nose' not in dict_landmarks.keys(): 80 | dict_landmarks['bridge_nose'] = [] 81 | dict_landmarks['bridge_nose'].append([int(x),int(y),(0,170,255)]) 82 | if draw_circle: 83 | cv2.circle(img, (int(x),int(y)), 2, (0,170,255),-1) 84 | elif 32>= i >=0: 85 | if 'basin' not in dict_landmarks.keys(): 86 | dict_landmarks['basin'] = [] 87 | dict_landmarks['basin'].append([int(x),int(y),(255,30,30)]) 88 | if draw_circle: 89 | cv2.circle(img, (int(x),int(y)), 2, (255,30,30),-1) 90 | elif 59>= i >=55: 91 | if 'wing_nose' not in dict_landmarks.keys(): 92 | dict_landmarks['wing_nose'] = [] 93 | dict_landmarks['wing_nose'].append([int(x),int(y),(0,255,255)]) 94 | if draw_circle: 95 | cv2.circle(img, (int(x),int(y)), 2, (0,255,255),-1) 96 | elif 87>= i >=76: 97 | if 'out_lip' not in dict_landmarks.keys(): 98 | dict_landmarks['out_lip'] = [] 99 | dict_landmarks['out_lip'].append([int(x),int(y),(255,255,0)]) 100 | if draw_circle: 101 | cv2.circle(img, (int(x),int(y)), 2, (255,255,0),-1) 102 | elif 95>= i >=88: 103 | if 'in_lip' not in dict_landmarks.keys(): 104 | dict_landmarks['in_lip'] = [] 105 | dict_landmarks['in_lip'].append([int(x),int(y),(50,220,255)]) 106 | if draw_circle: 107 | cv2.circle(img, (int(x),int(y)), 2, (50,220,255),-1) 108 | else: 109 | if draw_circle: 110 | cv2.circle(img, (int(x),int(y)), 2, (255,0,255),-1) 111 | 112 | return dict_landmarks 113 | 114 | def draw_contour(image,dict): 115 | for key in dict.keys(): 116 | # print(key) 117 | _,_,color = dict[key][0] 118 | 119 | if 'basin' == key or 'wing_nose' == key: 120 | pts = np.array([[dict[key][i][0],dict[key][i][1]] for i in range(len(dict[key]))],np.int32) 121 | # print(pts) 122 | cv2.polylines(image,[pts],False,color) 123 | 124 | else: 125 | points_array = np.zeros((1,len(dict[key]),2),dtype = np.int32) 126 | for i in range(len(dict[key])): 127 | x,y,_ = dict[key][i] 128 | points_array[0,i,0] = x 129 | points_array[0,i,1] = y 130 | 131 | # cv2.fillPoly(image, points_array, color) 132 | cv2.drawContours(image,points_array,-1,color,thickness=1) 133 | -------------------------------------------------------------------------------- /components/hand_keypoints/utils/model_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2020-04-11 3 | # Author: Eric.Lee 4 | # function: model utils 5 | 6 | import os 7 | import numpy as np 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import random 11 | 12 | def get_acc(output, label): 13 | total = output.shape[0] 14 | _, pred_label = output.max(1) 15 | num_correct = (pred_label == label).sum().item() 16 | return num_correct / float(total) 17 | 18 | def set_learning_rate(optimizer, lr): 19 | for param_group in optimizer.param_groups: 20 | param_group['lr'] = lr 21 | 22 | def set_seed(seed = 666): 23 | np.random.seed(seed) 24 | random.seed(seed) 25 | torch.manual_seed(seed) 26 | if torch.cuda.is_available(): 27 | torch.cuda.manual_seed(seed) 28 | torch.cuda.manual_seed_all(seed) 29 | cudnn.deterministic = True 30 | 31 | def split_trainval_datasets(ops): 32 | print(' --------------->>> split_trainval_datasets ') 33 | train_split_datasets = [] 34 | train_split_datasets_label = [] 35 | 36 | val_split_datasets = [] 37 | val_split_datasets_label = [] 38 | for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('.')[0]), reverse=False)): 39 | # print(' %s label is %s \n'%(doc,idx)) 40 | 41 | data_list = os.listdir(ops.train_path+doc) 42 | random.shuffle(data_list) 43 | 44 | cal_split_num = int(len(data_list)*ops.val_factor) 45 | 46 | for i,file in enumerate(data_list): 47 | if '.jpg' in file: 48 | if i < cal_split_num: 49 | val_split_datasets.append(ops.train_path+doc + '/' + file) 50 | val_split_datasets_label.append(idx) 51 | else: 52 | train_split_datasets.append(ops.train_path+doc + '/' + file) 53 | train_split_datasets_label.append(idx) 54 | 55 | print(ops.train_path+doc + '/' + file,idx) 56 | 57 | print('\n') 58 | print('train_split_datasets len {}'.format(len(train_split_datasets))) 59 | print('val_split_datasets len {}'.format(len(val_split_datasets))) 60 | 61 | return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label 62 | -------------------------------------------------------------------------------- /components/hand_keypoints/weights/关键点权重放这里.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/components/hand_keypoints/weights/关键点权重放这里.txt -------------------------------------------------------------------------------- /data/VOC.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford 3 | # Example usage: python train.py --data VOC.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── VOC ← downloads here (2.8 GB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/VOC 12 | train: # train images (relative to 'path') 16551 images 13 | - images/train2012 14 | - images/train2007 15 | - images/val2012 16 | - images/val2007 17 | val: # val images (relative to 'path') 4952 images 18 | - images/test2007 19 | test: # test images (optional) 20 | - images/test2007 21 | 22 | # Classes 23 | names: 24 | 0: aeroplane 25 | 1: bicycle 26 | 2: bird 27 | 3: boat 28 | 4: bottle 29 | 5: bus 30 | 6: car 31 | 7: cat 32 | 8: chair 33 | 9: cow 34 | 10: diningtable 35 | 11: dog 36 | 12: horse 37 | 13: motorbike 38 | 14: person 39 | 15: pottedplant 40 | 16: sheep 41 | 17: sofa 42 | 18: train 43 | 19: tvmonitor 44 | 45 | 46 | # Download script/URL (optional) --------------------------------------------------------------------------------------- 47 | download: | 48 | import xml.etree.ElementTree as ET 49 | 50 | from tqdm import tqdm 51 | from utils.general import download, Path 52 | 53 | 54 | def convert_label(path, lb_path, year, image_id): 55 | def convert_box(size, box): 56 | dw, dh = 1. / size[0], 1. / size[1] 57 | x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] 58 | return x * dw, y * dh, w * dw, h * dh 59 | 60 | in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml') 61 | out_file = open(lb_path, 'w') 62 | tree = ET.parse(in_file) 63 | root = tree.getroot() 64 | size = root.find('size') 65 | w = int(size.find('width').text) 66 | h = int(size.find('height').text) 67 | 68 | names = list(yaml['names'].values()) # names list 69 | for obj in root.iter('object'): 70 | cls = obj.find('name').text 71 | if cls in names and int(obj.find('difficult').text) != 1: 72 | xmlbox = obj.find('bndbox') 73 | bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')]) 74 | cls_id = names.index(cls) # class id 75 | out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n') 76 | 77 | 78 | # Download 79 | dir = Path(yaml['path']) # dataset root dir 80 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 81 | urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images 82 | f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images 83 | f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images 84 | download(urls, dir=dir / 'images', delete=False, curl=True, threads=3) 85 | 86 | # Convert 87 | path = dir / 'images/VOCdevkit' 88 | for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'): 89 | imgs_path = dir / 'images' / f'{image_set}{year}' 90 | lbs_path = dir / 'labels' / f'{image_set}{year}' 91 | imgs_path.mkdir(exist_ok=True, parents=True) 92 | lbs_path.mkdir(exist_ok=True, parents=True) 93 | 94 | with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f: 95 | image_ids = f.read().strip().split() 96 | for id in tqdm(image_ids, desc=f'{image_set}{year}'): 97 | f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path 98 | lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path 99 | f.rename(imgs_path / f.name) # move image 100 | convert_label(path, lb_path, year, id) # convert labels to YOLO format 101 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # COCO 2017 dataset http://cocodataset.org by Microsoft 3 | # Example usage: python train.py --data coco.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco ← downloads here (20.1 GB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco # dataset root dir 12 | train: train2017.txt # train images (relative to 'path') 118287 images 13 | val: val2017.txt # val images (relative to 'path') 5000 images 14 | test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | 100 | # Download script/URL (optional) 101 | download: | 102 | from utils.general import download, Path 103 | 104 | 105 | # Download labels 106 | segments = False # segment or box labels 107 | dir = Path(yaml['path']) # dataset root dir 108 | url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' 109 | urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels 110 | download(urls, dir=dir.parent) 111 | 112 | # Download data 113 | urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 114 | 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 115 | 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) 116 | download(urls, dir=dir / 'images', threads=3) 117 | -------------------------------------------------------------------------------- /data/coco128.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics 3 | # Example usage: python train.py --data coco128.yaml 4 | # parent 5 | # ├── yolov5 6 | # └── datasets 7 | # └── coco128 ← downloads here (7 MB) 8 | 9 | 10 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 11 | path: ../datasets/coco128 # dataset root dir 12 | train: images/train2017 # train images (relative to 'path') 128 images 13 | val: images/train2017 # val images (relative to 'path') 128 images 14 | test: # test images (optional) 15 | 16 | # Classes 17 | names: 18 | 0: person 19 | 1: bicycle 20 | 2: car 21 | 3: motorcycle 22 | 4: airplane 23 | 5: bus 24 | 6: train 25 | 7: truck 26 | 8: boat 27 | 9: traffic light 28 | 10: fire hydrant 29 | 11: stop sign 30 | 12: parking meter 31 | 13: bench 32 | 14: bird 33 | 15: cat 34 | 16: dog 35 | 17: horse 36 | 18: sheep 37 | 19: cow 38 | 20: elephant 39 | 21: bear 40 | 22: zebra 41 | 23: giraffe 42 | 24: backpack 43 | 25: umbrella 44 | 26: handbag 45 | 27: tie 46 | 28: suitcase 47 | 29: frisbee 48 | 30: skis 49 | 31: snowboard 50 | 32: sports ball 51 | 33: kite 52 | 34: baseball bat 53 | 35: baseball glove 54 | 36: skateboard 55 | 37: surfboard 56 | 38: tennis racket 57 | 39: bottle 58 | 40: wine glass 59 | 41: cup 60 | 42: fork 61 | 43: knife 62 | 44: spoon 63 | 45: bowl 64 | 46: banana 65 | 47: apple 66 | 48: sandwich 67 | 49: orange 68 | 50: broccoli 69 | 51: carrot 70 | 52: hot dog 71 | 53: pizza 72 | 54: donut 73 | 55: cake 74 | 56: chair 75 | 57: couch 76 | 58: potted plant 77 | 59: bed 78 | 60: dining table 79 | 61: toilet 80 | 62: tv 81 | 63: laptop 82 | 64: mouse 83 | 65: remote 84 | 66: keyboard 85 | 67: cell phone 86 | 68: microwave 87 | 69: oven 88 | 70: toaster 89 | 71: sink 90 | 72: refrigerator 91 | 73: book 92 | 74: clock 93 | 75: vase 94 | 76: scissors 95 | 77: teddy bear 96 | 78: hair drier 97 | 79: toothbrush 98 | 99 | 100 | # Download script/URL (optional) 101 | download: https://ultralytics.com/assets/coco128.zip 102 | -------------------------------------------------------------------------------- /hand_data_iter/data_agu.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | ## function: data agu 5 | 6 | import numpy as np 7 | import cv2 8 | 9 | #------------------------------------------------------------------------------- 10 | # eye_left_n,eye_right_n:为扰动后的参考点坐标 11 | 12 | def hand_alignment_aug_fun(imgn,eye_left_n, eye_right_n, facial_landmarks_n = None, angle = None,desiredLeftEye=(0.34, 0.42),desiredFaceWidth=160, desiredFaceHeight=None,draw_flag = False): 13 | 14 | if desiredFaceHeight is None: 15 | desiredFaceHeight = desiredFaceWidth 16 | 17 | leftEyeCenter = eye_left_n 18 | rightEyeCenter = eye_right_n 19 | # compute the angle between the eye centroids 20 | dY = rightEyeCenter[1] - leftEyeCenter[1] 21 | dX = rightEyeCenter[0] - leftEyeCenter[0] 22 | if angle == None: 23 | angle = np.degrees(np.arctan2(dY, dX)) 24 | else: 25 | # print(' a) disturb angle : ',angle) 26 | angle += np.degrees(np.arctan2(dY, dX))#基于正对角度的扰动 27 | # print(' b) disturb angle : ',angle) 28 | 29 | # compute the desired right eye x-coordinate based on the 30 | # desired x-coordinate of the left eye 31 | desiredRightEyeX = 1.0 - desiredLeftEye[0] 32 | # determine the scale of the new resulting image by taking 33 | # the ratio of the distance between eyes in the *current* 34 | # image to the ratio of distance between eyes in the 35 | # *desired* image 36 | dist = np.sqrt((dX ** 2) + (dY ** 2)) 37 | desiredDist = (desiredRightEyeX - desiredLeftEye[0]) 38 | desiredDist *= desiredFaceWidth 39 | scale = desiredDist / dist 40 | # compute center (x, y)-coordinates (i.e., the median point) 41 | # between the two eyes in the input image 42 | eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) / 2,(leftEyeCenter[1] + rightEyeCenter[1]) / 2) 43 | # grab the rotation matrix for rotating and scaling the face 44 | M = cv2.getRotationMatrix2D(eyesCenter, angle, scale) 45 | # update the translation component of the matrix 46 | tX = desiredFaceWidth * 0.5 47 | tY = desiredFaceHeight * desiredLeftEye[1] 48 | M[0, 2] += (tX - eyesCenter[0]) 49 | M[1, 2] += (tY - eyesCenter[1]) 50 | 51 | M_reg = np.zeros((3,3),dtype = np.float32) 52 | M_reg[0,:] = M[0,:] 53 | M_reg[1,:] = M[1,:] 54 | M_reg[2,:] = (0,0,1.) 55 | # print(M_reg) 56 | M_I = np.linalg.inv(M_reg)#矩阵求逆,从而获得,目标图到原图的关系 57 | # print(M_I) 58 | # apply the affine transformation 59 | (w, h) = (desiredFaceWidth, desiredFaceHeight) 60 | output = cv2.warpAffine(imgn, M, (w, h),flags=cv2.INTER_LINEAR,borderMode=cv2.BORDER_CONSTANT)# INTER_LINEAR INTER_CUBIC INTER_NEAREST 61 | #BORDER_REFLECT BORDER_TRANSPARENT BORDER_REPLICATE CV_BORDER_WRAP BORDER_CONSTANT 62 | 63 | pts_landmarks = [] 64 | 65 | for k in range(len(facial_landmarks_n)): 66 | x = facial_landmarks_n[k][0] 67 | y = facial_landmarks_n[k][1] 68 | 69 | x_r = (x*M[0][0] + y*M[0][1] + M[0][2]) 70 | y_r = (x*M[1][0] + y*M[1][1] + M[1][2]) 71 | pts_landmarks.append([x_r,y_r]) 72 | # if draw_flag: 73 | # cv2.circle(output, (int(x_r),int(y_r)), np.int(1),(0,0,255), 1) 74 | 75 | 76 | # 77 | # cv2.circle(output, (ptx2,pty2), np.int(1),(0,0,255), 1) 78 | # cv2.circle(output, (ptx3,pty3), np.int(1),(0,255,0), 1) 79 | 80 | 81 | 82 | 83 | return output,pts_landmarks,M_I 84 | -------------------------------------------------------------------------------- /lib/hand_lib/cores/__pycache__/handpose_fuction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/lib/hand_lib/cores/__pycache__/handpose_fuction.cpython-37.pyc -------------------------------------------------------------------------------- /lib/hand_lib/cores/__pycache__/tracking_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/lib/hand_lib/cores/__pycache__/tracking_utils.cpython-37.pyc -------------------------------------------------------------------------------- /lib/hand_lib/cores/hand_pnp.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | ''' 3 | DpCas-Light 4 | |||| ||||| |||| || ||||||| 5 | || || || || || || |||| || || 6 | || || || || || || || || || 7 | || || || || || ||====|| |||||| 8 | || || ||||| || || ||======|| || 9 | || || || || || || || || || 10 | |||| || |||| || || ||||||| 11 | 12 | /--------------------- HandPose_X ---------------------/ 13 | ''' 14 | # date:2019-12-10 15 | # Author: Eric.Lee 16 | # function: handpose :rotation & translation 17 | 18 | import cv2 19 | import numpy as np 20 | # 人脸外轮廓 21 | def get_face_outline(img_crop,face_crop_region,obj_crop_points,face_w,face_h): 22 | face_mask = np.zeros((1,27,2),dtype = np.int32) 23 | for m in range(obj_crop_points.shape[0]): 24 | if m <=16: 25 | x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w) 26 | y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h) 27 | # face_mask.append((x,y)) 28 | face_mask[0,m,0]=x 29 | face_mask[0,m,1]=y 30 | 31 | for k in range(16,26): 32 | m = 42-k 33 | x = int(face_crop_region[0]+obj_crop_points[m][0]*face_w) 34 | y = int(face_crop_region[1]+obj_crop_points[m][1]*face_h) 35 | # face_mask.append((x,y)) 36 | face_mask[0,k+1,0]=x 37 | face_mask[0,k+1,1]=y 38 | # print(x,y) 39 | return face_mask 40 | 41 | # 人脸公共模型三维坐标 42 | object_pts = np.float32([ 43 | [0., 0.4,0.],#掌心 44 | [0., 5.,0.],#hand 根部 45 | # [-2, 2.5,0.],#thumb 第一指节 46 | # [-4, 0.5,0.],#thumb 第二指节 47 | [-2.7, -4.5, 0.],# index 根部 48 | [0., -5., 0.],# middle 根部 49 | [2.6, -4., 0.], # ring 根部 50 | [5.2, -3., 0.],# pink 根部 51 | ] 52 | ) 53 | 54 | # object_pts = np.float32([[-2.5, -7.45, 0.5],# pink 根部 55 | # 56 | # [-1.2, -7.45, 0.5], # ring 根部 57 | # 58 | # 59 | # [1.2, -7.5, 0.5],# middle 根部 60 | # 61 | # [2.5, -7.45, 0.5],# index 根部 62 | # [4.2, -3.45, 0.5],# thumb 第二指节 63 | # [2.5, -2.0, 0.5],# thumb 根部 64 | # [0.00, -0.0,0.5],#hand 根部 65 | # ] 66 | # ) 67 | 68 | # xyz 立体矩形框 69 | # reprojectsrc = np.float32([[3.0, 11.0, 2.0], 70 | # [3.0, 11.0, -4.0], 71 | # [3.0, -7.0, -4.0], 72 | # [3.0, -7.0, 2.0], 73 | # [-3.0, 11.0, 2.0], 74 | # [-3.0, 11.0, -4.0], 75 | # [-3.0, -7.0, -4.0], 76 | # [-3.0, -7.0, 2.0]]) 77 | 78 | reprojectsrc = np.float32([[5.0, 8.0, 2.0], 79 | [5.0, 8.0, -2.0], 80 | [5.0, -8.0, -2.0], 81 | [5.0, -8.0, 2.0], 82 | [-5.0, 8.0, 2.0], 83 | [-5.0, 8.0, -2.0], 84 | [-5.0, -8.0, -2.0], 85 | [-5.0, -8.0, 2.0]]) 86 | 87 | # reprojectsrc = np.float32([[6.0, 4.0, 2.0], 88 | # [6.0, 4.0, -4.0], 89 | # [6.0, -3.0, -4.0], 90 | # [6.0, -3.0, 2.0], 91 | # [-6.0, 4.0, 2.0], 92 | # [-6.0, 4.0, -4.0], 93 | # [-6.0, -3.0, -4.0], 94 | # [-6.0, -3.0, 2.0]]) 95 | 96 | # reprojectsrc = np.float32([[6.0, 6.0, 6.0], 97 | # [6.0, 6.0, -6.0], 98 | # [6.0, -6.0, -6.0], 99 | # [6.0, -6.0, 6.0], 100 | # [-6.0, 6.0, 6.0], 101 | # [-6.0, 6.0, -6.0], 102 | # [-6.0, -6.0, -6.0], 103 | # [-6.0, -6.0, 6.0]]) 104 | 105 | # 立体矩形框连线,连接组合 106 | line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], 107 | [4, 5], [5, 6], [6, 7], [7, 4], 108 | [0, 4], [1, 5], [2, 6], [3, 7]] 109 | 110 | 111 | def get_hand_pose(shape,img,vis = True): 112 | h,w,_=img.shape 113 | K = [w, 0.0, w//2, 114 | 0.0, w, h//2, 115 | 0.0, 0.0, 1.0] 116 | # Assuming no lens distortion 117 | D = [0, 0, 0.0, 0.0, 0] 118 | 119 | cam_matrix = np.array(K).reshape(3, 3).astype(np.float32)# 相机矩阵 120 | # dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32)#相机畸变矩阵,默认无畸变 121 | dist_coeffs = np.float32([0.0, 0.0, 0.0, 0.0, 0.0]) 122 | # 选取的人脸关键点的二维图像坐标 123 | # image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 124 | # shape[39], shape[42], shape[45], 125 | # shape[27],shape[31], shape[35],shape[30],shape[33]]) 126 | 127 | image_pts = np.float32([shape[0], shape[1], shape[2], shape[3], shape[4], shape[5] 128 | ] 129 | ) 130 | 131 | # PNP 计算图像二维和三维实际关系,获得旋转和偏移矩阵 132 | _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs) 133 | # _, rotation_vec, translation_vec = cv2.solvePnPRansac(object_pts, image_pts, cam_matrix, dist_coeffs) 134 | 135 | 136 | # print("translation_vec:",translation_vec) 137 | #print('translation_vec : {}'.format(translation_vec)) 138 | 139 | # 映射矩形框 140 | reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix,dist_coeffs) 141 | 142 | reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) 143 | 144 | # calc euler angle 145 | rotation_mat, _ = cv2.Rodrigues(rotation_vec)#旋转向量转为旋转矩阵 146 | pose_mat = cv2.hconcat((rotation_mat, translation_vec))# 拼接操作 旋转 + 偏移 147 | _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat)#欧拉角估计 148 | 149 | if vis: 150 | for i,line_pair in enumerate(line_pairs):# 显示立体矩形框 151 | x1 = int(reprojectdst[line_pair[0]][0]) 152 | y1 = int(reprojectdst[line_pair[0]][1]) 153 | 154 | x2 = int(reprojectdst[line_pair[1]][0]) 155 | y2 = int(reprojectdst[line_pair[1]][1]) 156 | 157 | if line_pair[0] in [0,3,4,7] and line_pair[1] in [0,3,4,7]: 158 | cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2) 159 | elif line_pair[0] in [1,2,5,6] and line_pair[1] in [1,2,5,6]: 160 | cv2.line(img,(x1,y1),(x2,y2),(250,150,0),2) 161 | else: 162 | cv2.line(img,(x1,y1),(x2,y2),(0,90,255),2) 163 | 164 | return reprojectdst, euler_angle,translation_vec 165 | -------------------------------------------------------------------------------- /lib/hand_lib/cores/tracking_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | ''' 3 | DpCas-Light 4 | |||| ||||| |||| || ||||||| 5 | || || || || || || |||| || || 6 | || || || || || || || || || 7 | || || || || || ||====|| |||||| 8 | || || ||||| || || ||======|| || 9 | || || || || || || || || || 10 | |||| || |||| || || ||||||| 11 | 12 | /--------------------- HandPose_X ---------------------/ 13 | ''' 14 | import copy 15 | def compute_iou_tk(rec1, rec2): 16 | """ 17 | computing IoU 18 | :param rec1: (y0, x0, y1, x1), which reflects 19 | (top, left, bottom, right) 20 | :param rec2: (y0, x0, y1, x1) 21 | :return: scala value of IoU 22 | """ 23 | # computing area of each rectangles 24 | 25 | S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1]) 26 | S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1]) 27 | 28 | # computing the sum_area 29 | sum_area = S_rec1 + S_rec2 30 | 31 | # find the each edge of intersect rectangle 32 | left_line = max(rec1[1], rec2[1]) 33 | right_line = min(rec1[3], rec2[3]) 34 | top_line = max(rec1[0], rec2[0]) 35 | bottom_line = min(rec1[2], rec2[2]) 36 | 37 | # judge if there is an intersect 38 | if left_line >= right_line or top_line >= bottom_line: 39 | return 0. 40 | else: 41 | intersect = (right_line - left_line) * (bottom_line - top_line) 42 | return (intersect / (sum_area - intersect)) * 1.0 43 | 44 | def tracking_bbox(data,hand_dict,index,iou_thr = 0.5): 45 | 46 | track_index = index 47 | reg_dict = {} 48 | Flag_ = True if hand_dict else False 49 | if Flag_ == False: 50 | # print("------------------->>. False") 51 | for bbox in data: 52 | x_min,y_min,x_max,y_max,score = bbox 53 | reg_dict[track_index] = (x_min,y_min,x_max,y_max,score,0.,1,1) 54 | track_index += 1 55 | 56 | if track_index >= 65535: 57 | track_index = 0 58 | else: 59 | # print("------------------->>. True ") 60 | for bbox in data: 61 | xa0,ya0,xa1,ya1,score = bbox 62 | is_track = False 63 | for k_ in hand_dict.keys(): 64 | xb0,yb0,xb1,yb1,_,_,cnt_,bbox_stanbel_cnt = hand_dict[k_] 65 | 66 | iou_ = compute_iou_tk((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1)) 67 | # print((ya0,xa0,ya1,xa1),(yb0,xb0,yb1,xb1)) 68 | # print("iou : ",iou_) 69 | if iou_ > iou_thr: # 跟踪成功目标 70 | UI_CNT = 1 71 | if iou_ > 0.888: 72 | UI_CNT = bbox_stanbel_cnt + 1 73 | reg_dict[k_] = (xa0,ya0,xa1,ya1,score,iou_,cnt_ + 1,UI_CNT) 74 | is_track = True 75 | # print("is_track : " ,cnt_ + 1) 76 | if is_track == False: # 新目标 77 | reg_dict[track_index] = (xa0,ya0,xa1,ya1,score,0.,1,1) 78 | track_index += 1 79 | if track_index >=65535: #索引越界归零 80 | track_index = 0 81 | 82 | if track_index>=100: 83 | track_index = 0 84 | 85 | hand_dict = copy.deepcopy(reg_dict) 86 | 87 | # print("a:",hand_dict) 88 | 89 | return hand_dict,track_index 90 | -------------------------------------------------------------------------------- /lib/hand_lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | """Parses the data configuration file""" 4 | def parse_data_cfg(path): 5 | print('data_cfg : ',path) 6 | options = dict() 7 | with open(path, 'r',encoding='UTF-8') as fp: 8 | lines = fp.readlines() 9 | for line in lines: 10 | line = line.strip() 11 | if line == '' or line.startswith('#'): 12 | continue 13 | key, value = line.split('=') 14 | options[key.strip()] = value.strip() 15 | return options 16 | -------------------------------------------------------------------------------- /loss/loss.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2019-05-20 3 | # function: wing loss 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import os 8 | import math 9 | 10 | def wing_loss(landmarks, labels, w=0.06, epsilon=0.01): 11 | """ 12 | Arguments: 13 | landmarks, labels: float tensors with shape [batch_size, landmarks]. landmarks means x1,x2,x3,x4...y1,y2,y3,y4 1-D 14 | w, epsilon: a float numbers. 15 | Returns: 16 | a float tensor with shape []. 17 | """ 18 | 19 | x = landmarks - labels 20 | c = w * (1.0 - math.log(1.0 + w / epsilon)) 21 | absolute_x = torch.abs(x) 22 | 23 | losses = torch.where(\ 24 | (w>absolute_x),\ 25 | w * torch.log(1.0 + absolute_x / epsilon),\ 26 | absolute_x - c) 27 | 28 | 29 | # loss = tf.reduce_mean(tf.reduce_mean(losses, axis=[1]), axis=0) 30 | losses = torch.mean(losses,dim=1,keepdim=True) 31 | loss = torch.mean(losses) 32 | return loss 33 | 34 | def got_total_wing_loss(output,crop_landmarks): 35 | loss = wing_loss(output, crop_landmarks) 36 | return loss -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n02930766.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n02930766.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03085013.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03085013.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03290653.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03290653.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03642806.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03642806.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03782006.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03782006.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03791053.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03791053.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03793489.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03793489.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03832673.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03832673.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03876231.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03876231.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n03887697.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n03887697.mp3 -------------------------------------------------------------------------------- /materials/audio/imagenet_2012/n04285008.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/imagenet_2012/n04285008.mp3 -------------------------------------------------------------------------------- /materials/audio/sentences/IdentifyingObjectsWait.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/sentences/IdentifyingObjectsWait.mp3 -------------------------------------------------------------------------------- /materials/audio/sentences/ObjectMayBeIdentified.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/materials/audio/sentences/ObjectMayBeIdentified.mp3 -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/models/__init__.py -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Experimental modules 4 | """ 5 | import math 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from utils.downloads import attempt_download 12 | 13 | 14 | class Sum(nn.Module): 15 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 16 | def __init__(self, n, weight=False): # n: number of inputs 17 | super().__init__() 18 | self.weight = weight # apply weights boolean 19 | self.iter = range(n - 1) # iter object 20 | if weight: 21 | self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights 22 | 23 | def forward(self, x): 24 | y = x[0] # no weight 25 | if self.weight: 26 | w = torch.sigmoid(self.w) * 2 27 | for i in self.iter: 28 | y = y + x[i + 1] * w[i] 29 | else: 30 | for i in self.iter: 31 | y = y + x[i + 1] 32 | return y 33 | 34 | 35 | class MixConv2d(nn.Module): 36 | # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 37 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy 38 | super().__init__() 39 | n = len(k) # number of convolutions 40 | if equal_ch: # equal c_ per group 41 | i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices 42 | c_ = [(i == g).sum() for g in range(n)] # intermediate channels 43 | else: # equal weight.numel() per group 44 | b = [c2] + [0] * n 45 | a = np.eye(n + 1, n, k=-1) 46 | a -= np.roll(a, 1, axis=1) 47 | a *= np.array(k) ** 2 48 | a[0] = 1 49 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 50 | 51 | self.m = nn.ModuleList([ 52 | nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) 53 | self.bn = nn.BatchNorm2d(c2) 54 | self.act = nn.SiLU() 55 | 56 | def forward(self, x): 57 | return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 58 | 59 | 60 | class Ensemble(nn.ModuleList): 61 | # Ensemble of models 62 | def __init__(self): 63 | super().__init__() 64 | 65 | def forward(self, x, augment=False, profile=False, visualize=False): 66 | y = [module(x, augment, profile, visualize)[0] for module in self] 67 | # y = torch.stack(y).max(0)[0] # max ensemble 68 | # y = torch.stack(y).mean(0) # mean ensemble 69 | y = torch.cat(y, 1) # nms ensemble 70 | return y, None # inference, train output 71 | 72 | 73 | def attempt_load(weights, device=None, inplace=True, fuse=True): 74 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 75 | from models.yolo import Detect, Model 76 | 77 | model = Ensemble() 78 | for w in weights if isinstance(weights, list) else [weights]: 79 | ckpt = torch.load(attempt_download(w), map_location='cpu') # load 80 | ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model 81 | 82 | # Model compatibility updates 83 | if not hasattr(ckpt, 'stride'): 84 | ckpt.stride = torch.tensor([32.]) 85 | if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): 86 | ckpt.names = dict(enumerate(ckpt.names)) # convert to dict 87 | 88 | model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode 89 | 90 | # Module compatibility updates 91 | for m in model.modules(): 92 | t = type(m) 93 | if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): 94 | m.inplace = inplace # torch 1.7.0 compatibility 95 | if t is Detect and not isinstance(m.anchor_grid, list): 96 | delattr(m, 'anchor_grid') 97 | setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) 98 | elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): 99 | m.recompute_scale_factor = None # torch 1.11.0 compatibility 100 | 101 | # Return model 102 | if len(model) == 1: 103 | return model[-1] 104 | 105 | # Return detection ensemble 106 | print(f'Ensemble created with {weights}\n') 107 | for k in 'names', 'nc', 'yaml': 108 | setattr(model, k, getattr(model[0], k)) 109 | model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride 110 | assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' 111 | return model 112 | -------------------------------------------------------------------------------- /models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Default anchors for COCO data 3 | 4 | 5 | # P5 ------------------------------------------------------------------------------------------------------------------- 6 | # P5-640: 7 | anchors_p5_640: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | 13 | # P6 ------------------------------------------------------------------------------------------------------------------- 14 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 15 | anchors_p6_640: 16 | - [9,11, 21,19, 17,41] # P3/8 17 | - [43,32, 39,70, 86,64] # P4/16 18 | - [65,131, 134,130, 120,265] # P5/32 19 | - [282,180, 247,354, 512,387] # P6/64 20 | 21 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 22 | anchors_p6_1280: 23 | - [19,27, 44,40, 38,94] # P3/8 24 | - [96,68, 86,152, 180,137] # P4/16 25 | - [140,301, 303,264, 238,542] # P5/32 26 | - [436,615, 739,380, 925,792] # P6/64 27 | 28 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 29 | anchors_p6_1920: 30 | - [28,41, 67,59, 57,141] # P3/8 31 | - [144,103, 129,227, 270,205] # P4/16 32 | - [209,452, 455,396, 358,812] # P5/32 33 | - [653,922, 1109,570, 1387,1187] # P6/64 34 | 35 | 36 | # P7 ------------------------------------------------------------------------------------------------------------------- 37 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 38 | anchors_p7_640: 39 | - [11,11, 13,30, 29,20] # P3/8 40 | - [30,46, 61,38, 39,92] # P4/16 41 | - [78,80, 146,66, 79,163] # P5/32 42 | - [149,150, 321,143, 157,303] # P6/64 43 | - [257,402, 359,290, 524,372] # P7/128 44 | 45 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 46 | anchors_p7_1280: 47 | - [19,22, 54,36, 32,77] # P3/8 48 | - [70,83, 138,71, 75,173] # P4/16 49 | - [165,159, 148,334, 375,151] # P5/32 50 | - [334,317, 251,626, 499,474] # P6/64 51 | - [750,326, 534,814, 1079,818] # P7/128 52 | 53 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 54 | anchors_p7_1920: 55 | - [29,34, 81,55, 47,115] # P3/8 56 | - [105,124, 207,107, 113,259] # P4/16 57 | - [247,238, 222,500, 563,227] # P5/32 58 | - [501,476, 376,939, 749,711] # P6/64 59 | - [1126,489, 801,1222, 1618,1227] # P7/128 60 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, 1, 1]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-bifpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 BiFPN head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 FPN head 28 | head: 29 | [[-1, 3, C3, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, C3, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, C3, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-p2.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [1024]], 21 | [-1, 1, SPPF, [1024, 5]], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs 25 | head: 26 | [[-1, 1, Conv, [512, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 29 | [-1, 3, C3, [512, False]], # 13 30 | 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 34 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 2], 1, Concat, [1]], # cat backbone P2 39 | [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall) 40 | 41 | [-1, 1, Conv, [128, 3, 2]], 42 | [[-1, 18], 1, Concat, [1]], # cat head P3 43 | [-1, 3, C3, [256, False]], # 24 (P3/8-small) 44 | 45 | [-1, 1, Conv, [256, 3, 2]], 46 | [[-1, 14], 1, Concat, [1]], # cat head P4 47 | [-1, 3, C3, [512, False]], # 27 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [512, 3, 2]], 50 | [[-1, 10], 1, Concat, [1]], # cat head P5 51 | [-1, 3, C3, [1024, False]], # 30 (P5/32-large) 52 | 53 | [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5) 54 | ] 55 | -------------------------------------------------------------------------------- /models/hub/yolov5-p34.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2 13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 14 | [ -1, 3, C3, [ 128 ] ], 15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 16 | [ -1, 6, C3, [ 256 ] ], 17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 18 | [ -1, 9, C3, [ 512 ] ], 19 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32 20 | [ -1, 3, C3, [ 1024 ] ], 21 | [ -1, 1, SPPF, [ 1024, 5 ] ], # 9 22 | ] 23 | 24 | # YOLOv5 v6.0 head with (P3, P4) outputs 25 | head: 26 | [ [ -1, 1, Conv, [ 512, 1, 1 ] ], 27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 28 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 29 | [ -1, 3, C3, [ 512, False ] ], # 13 30 | 31 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 34 | [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small) 35 | 36 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 37 | [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4 38 | [ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium) 39 | 40 | [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, SPPF, [1024, 5]], # 11 24 | ] 25 | 26 | # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs 27 | head: 28 | [[-1, 1, Conv, [768, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 31 | [-1, 3, C3, [768, False]], # 15 32 | 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 36 | [-1, 3, C3, [512, False]], # 19 37 | 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 41 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 42 | 43 | [-1, 1, Conv, [256, 3, 2]], 44 | [[-1, 20], 1, Concat, [1]], # cat head P4 45 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 46 | 47 | [-1, 1, Conv, [512, 3, 2]], 48 | [[-1, 16], 1, Concat, [1]], # cat head P5 49 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 50 | 51 | [-1, 1, Conv, [768, 3, 2]], 52 | [[-1, 12], 1, Concat, [1]], # cat head P6 53 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 54 | 55 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 56 | ] 57 | -------------------------------------------------------------------------------- /models/hub/yolov5-p7.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 3 # AutoAnchor evolves 3 anchors per P output layer 8 | 9 | # YOLOv5 v6.0 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 13 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 14 | [-1, 3, C3, [128]], 15 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 16 | [-1, 6, C3, [256]], 17 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 18 | [-1, 9, C3, [512]], 19 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 20 | [-1, 3, C3, [768]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 22 | [-1, 3, C3, [1024]], 23 | [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 24 | [-1, 3, C3, [1280]], 25 | [-1, 1, SPPF, [1280, 5]], # 13 26 | ] 27 | 28 | # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs 29 | head: 30 | [[-1, 1, Conv, [1024, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 10], 1, Concat, [1]], # cat backbone P6 33 | [-1, 3, C3, [1024, False]], # 17 34 | 35 | [-1, 1, Conv, [768, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 38 | [-1, 3, C3, [768, False]], # 21 39 | 40 | [-1, 1, Conv, [512, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 42 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 43 | [-1, 3, C3, [512, False]], # 25 44 | 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 47 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 48 | [-1, 3, C3, [256, False]], # 29 (P3/8-small) 49 | 50 | [-1, 1, Conv, [256, 3, 2]], 51 | [[-1, 26], 1, Concat, [1]], # cat head P4 52 | [-1, 3, C3, [512, False]], # 32 (P4/16-medium) 53 | 54 | [-1, 1, Conv, [512, 3, 2]], 55 | [[-1, 22], 1, Concat, [1]], # cat head P5 56 | [-1, 3, C3, [768, False]], # 35 (P5/32-large) 57 | 58 | [-1, 1, Conv, [768, 3, 2]], 59 | [[-1, 18], 1, Concat, [1]], # cat head P6 60 | [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge) 61 | 62 | [-1, 1, Conv, [1024, 3, 2]], 63 | [[-1, 14], 1, Concat, [1]], # cat head P7 64 | [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge) 65 | 66 | [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7) 67 | ] 68 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5s-LeakyReLU.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model 6 | depth_multiple: 0.33 # model depth multiple 7 | width_multiple: 0.50 # layer channel multiple 8 | anchors: 9 | - [10,13, 16,30, 33,23] # P3/8 10 | - [30,61, 62,45, 59,119] # P4/16 11 | - [116,90, 156,198, 373,326] # P5/32 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [1024]], 25 | [-1, 1, SPPF, [1024, 5]], # 9 26 | ] 27 | 28 | # YOLOv5 v6.0 head 29 | head: 30 | [[-1, 1, Conv, [512, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3, [512, False]], # 13 34 | 35 | [-1, 1, Conv, [256, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 39 | 40 | [-1, 1, Conv, [256, 3, 2]], 41 | [[-1, 14], 1, Concat, [1]], # cat head P4 42 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 43 | 44 | [-1, 1, Conv, [512, 3, 2]], 45 | [[-1, 10], 1, Concat, [1]], # cat head P5 46 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 47 | 48 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /models/hub/yolov5s-ghost.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3Ghost, [128]], 18 | [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3Ghost, [256]], 20 | [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3Ghost, [512]], 22 | [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3Ghost, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, GhostConv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3Ghost, [512, False]], # 13 33 | 34 | [-1, 1, GhostConv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, GhostConv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, GhostConv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5s-transformer.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/yolov5x6.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [19,27, 44,40, 38,94] # P3/8 9 | - [96,68, 86,152, 180,137] # P4/16 10 | - [140,301, 303,264, 238,542] # P5/32 11 | - [436,615, 739,380, 925,792] # P6/64 12 | 13 | # YOLOv5 v6.0 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 17 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 20 | [-1, 6, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 26 | [-1, 3, C3, [1024]], 27 | [-1, 1, SPPF, [1024, 5]], # 11 28 | ] 29 | 30 | # YOLOv5 v6.0 head 31 | head: 32 | [[-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 34 | [[-1, 8], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 15 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 39 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 19 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 44 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 23 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 20], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 26 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 16], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 29 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 12], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) 58 | 59 | [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /models/segment/yolov5l-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5m-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] -------------------------------------------------------------------------------- /models/segment/yolov5n-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/segment/yolov5s-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.5 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] -------------------------------------------------------------------------------- /models/segment/yolov5x-seg.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.0 # model depth multiple 6 | width_multiple: 1.0 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.67 # model depth multiple 6 | width_multiple: 0.75 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | 3 | # Parameters 4 | nc: 80 # number of classes 5 | depth_multiple: 1.33 # model depth multiple 6 | width_multiple: 1.25 # layer channel multiple 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 v6.0 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 6, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 3, C3, [1024]], 24 | [-1, 1, SPPF, [1024, 5]], # 9 25 | ] 26 | 27 | # YOLOv5 v6.0 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | 3 | import torch 4 | import torchvision 5 | import argparse 6 | from applications.handpose_local_app import main_handpose_x 7 | def demo_logo(): 8 | print("\n/*********************************/") 9 | print("/---------------------------------/\n") 10 | print(" WELCOME ") 11 | print(" yinyipeng ") 12 | print(" wechat:y24065939s ") 13 | print("\n/---------------------------------/") 14 | print("/*********************************/\n") 15 | if __name__ == '__main__': 16 | multiprocessing.freeze_support() 17 | demo_logo() 18 | parse = argparse.ArgumentParser("yolov5 hand pose") 19 | parse.add_argument('--hand_weight', type=str, default='best.pt', help='hand detect model path') 20 | parse.add_argument('--input_shape', type=int, default=640, help='yolov5 input shape') 21 | parse.add_argument('--conf', type=float, default=0.5, help='detect conf') 22 | parse.add_argument('--iou', type=float, default=0.45) 23 | parse.add_argument('--data', type=str, default='components/hand_detect/data/coco128.yaml') 24 | parse.add_argument('--handpose_model_path', type=str, default='components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.122.pth', help='hand 21 keys model path') 25 | parse.add_argument('--handpose_name', type=str, default='rexnetv1', help='handpose arch name') 26 | 27 | parse.add_argument('--classify_model_path', type=str, default='components/classify_imagenet/weights/imagenet_size-256_20210409.pth', help='classify model path') 28 | parse.add_argument('--classify_model_name', type=str, default='resnet_50', help='classify model name') 29 | parse.add_argument('--num_class', type=int, default=1000) 30 | 31 | parse.add_argument('--video_path', default='0', help='video path') 32 | parse.add_argument('--vis_gesture_lines', action='store_false') 33 | parse.add_argument('--charge_cycle_step', type=int, default=18) 34 | parse.add_argument('--device', type=str, default='cuda', help='cpu or cuda') 35 | # args = parse.parse_args() 36 | args, unparsed = parse.parse_known_args() 37 | print(args) 38 | 39 | main_handpose_x(args) 40 | 41 | # 导出exe:pyinstaller --onefile --hidden-import torch._C --hidden-import torch.nn.functional predict.py 42 | 43 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # YOLOV5_HANDPOSE 2 | 3 | 之前是做的**yolov3手势物体识别**,最近几天我将该项目进行了重新的整理和升级,实现了**yolov5手势物体识别**。 4 | 5 | 同时为了方便更多的人直接拿来应用,我生成了支持windows系统的应用小程序,即便你电脑上**没有安装pytorch,没有安装cuda、python,都可以使用**~![有偿提供] 6 | 7 | **相关资料:** 8 | 9 | [yolov3手势物体识别](https://blog.csdn.net/z240626191s/article/details/123289979?spm=1001.2014.3001.5502) 10 | 11 | **应用程序效果如下:** 12 | 13 | 14 | 15 | yolov5手势[物体识别](https://so.csdn.net/so/search?q=物体识别&spm=1001.2101.3001.7020) 16 | 17 | 18 | 19 | # 环境说明 20 | 21 | ``` 22 | torch 1.7.0 23 | 24 | tensorboard 1.15.0 25 | 26 | protobuf 3.20.0 27 | 28 | Pillow 9.5.0 29 | 30 | opencv-python 4.4.0.44 31 | ``` 32 | 33 | 34 | 35 | # 技术说明 36 | 37 | 本项目使用了三个算法模型进行的功能实现。yolov5做手部目标检测,ReXNet(支持Resnet系列)做手部21关键点回归检测,Resnet50做物体分类识别。(其实就是三个算法做的级联) 38 | 39 | ## yolov5手部目标检测 40 | 41 | 使用yolov5s进行训练,数据集共有3W+,因本地训练环境受限,我只训练到mAP 64%左右,因此准确率并不是很高,大家有需要的可以自行再去训练~ 42 | 43 | ### 数据集说明 44 | 45 | **数据集链接:** 46 | 47 | (ps:这里的数据集采用的公共数据集,没有做过数据清洗) 48 | 49 | 链接:https://pan.baidu.com/s/1jnXH3yBuGJ8_DRXu-gKtNg 50 | 提取码:yypn 51 | 52 | **数据集格式:** 53 | 54 | images:存放所有的数据集 55 | 56 | labels:已经归一化后的label信息 57 | 58 | train.txt:训练集划分,25934张 59 | 60 | val.txt:验证集划分,3241张 61 | 62 | test.txt:测试集划分,3242张图 63 | 64 | 65 | 66 | **训练实验记录** 67 | 采用马赛克数据增强 68 | 69 | 70 | **评价指标:** 71 | 72 | (我这里只训练了大该十多个epoch,没服务器训练太慢了~(T^T)~,所以准确率比较低,有需要的可以自己训练一下) 73 | 74 | (训练好的权重见文末项目链接) 75 | 76 | | P | R | mAP 0.5 | mAP 0.5:0.95 | 77 | | ------- | ------- | ------- | ------------ | 78 | | 0.75396 | 0.59075 | 0.64671 | 0.27652 | 79 | 80 | ## 手部21关键点检测 81 | 82 | 手部关键点识别采用的网络为ReXNet(支持Resnet系列),这里需要说明的是关键点预测并没有采用openpose网络!而是采用的坐标回归方法,这个问题需要强调一下,不然总有小伙伴问我,而且还很质疑~ 在本任务中,由于有yolo作为前置滤波器算法将手部和背景进行分离,分离后的图像前景和背景相对平衡,而且前景(手部)占主要部分,因此任务其实相对简单,可以采用坐标回归方法。 83 | 84 | 网络的定义在yolov5_hand_pose/components/hand_keypoints/models/。 85 | 86 | 21个关键点,那就是有42个坐标(x,y坐标)。因此代码中num_classes=42. 87 | 88 | ### 数据集说明 89 | 90 | (ps:这里的数据集采用的公共数据集,没有做过数据清洗) 91 | 92 | **数据集链接:** 93 | 94 | 链接:https://pan.baidu.com/s/129aFPmhHq3lWmAFkuBI3BA 95 | 提取码:yypn 96 | 整个数据集共有49062张图像。 97 | 98 | ### 训练 99 | 100 | 训练代码在train.py中。 101 | 102 | (训练好的权重见文末项目链接) 103 | 104 | 可采用提供的预权重进行fine tune训练。 105 | 106 | 输入以下命令开始训练: 107 | 108 | ``` 109 | python train.py --model resnet_50 --train_path [数据集路径] --fintune_model 【fine tune模型路径】--batch_size 16 110 | ``` 111 | 112 | 如果是fine tune训练,建议初始学习率(init_lr)设置为5e-4,否则建议设置为1e-3。 113 | 114 | 损失函数此次采用的是MSE,还可支持wing loss。 115 | 116 | 训练好的权重会保存在model_exp中,对应的tensorboard会存储在logs中 117 | 118 | ## 分类网络 119 | 120 | 这里的分类网络采用是resnet50网络,权重为ImageNet数据集上的(1000个类),可以根据自己任务需求去训练。(权重见文末项目链接) 121 | 122 | 网络定义在yolov5_hand_pose/components/classify_imagenet/models。 123 | 124 | 那具体是如何分类的呢? 125 | 126 | 首先触发分类模型的手势是食指和大拇指捏合。主要是计算两个关键点的欧式距离,当距离小于阈值则为触发状态click_state=True,表示手势触发成功。 127 | 128 | 当两个手都触发捏合动作,那么判断是有效动作,同时**将左右手选定的区域截出来(和yolo的操作类似),送入分类网络进行分类识别**。 129 | 130 | ## 语音播报 131 | 132 | 当手势动作触发成功后会触发语音播报函数,此时会自动语音播放"正在识别物体请等待",如果成功识别,并且也有该物体的语音包(需要自己录制),那么会说“您识别的物体为。。。” 133 | 134 | 如果需要自己录制语音(mp3格式),可以将录制好的语音放在materials/audio/imagenet_2012/ 135 | 136 | # 如何使用本项目 137 | 138 | 使用方法很简单,clone本项目到本地后,只需要运行predict.py并搭配参数即可。 139 | 140 | (提前下载好权重~) 141 | 142 | 你可能会用到如下参数: 143 | 144 | ``` 145 | --hand_weight 【yolov5权重路径】,默认为best.pt 146 | --handpose_model_path 【关键点权重】,默认components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.122.pth 147 | --handpose_name 【关键点模型】,默认rexnetv1 148 | --classify_model_path 【分类网络权重】,默认components/classify_imagenet/weights/imagenet_size-256_20210409.pth 149 | --classify_model_name 【分类网络模型名称】,默认resnet_50 150 | --conf 【yolo置信度阈值】,默认0.5 151 | --video_path 【视频路径】,默认本机摄像头 152 | --device 【推理设备】,默认GPU 153 | 154 | ``` 155 | 156 | 例如: 157 | 158 | ``` 159 | python predict.py --conf 0.3 --video_path 0 --hand_weight best.pt --device cuda 160 | ``` 161 | 162 | # 手势物体识别应用程序 163 | 164 | 为了可以让更多的进行使用,花费了两天的时间导出了exe应用程序,即便你的电脑没有安装pytorch和cuda都可以直接运行(暂时只支持windows系统,linux应该是需要wine来帮助运行)。 165 | 166 | ps:博主只是在一些电脑上进行了测试还是可以成功运行的~ 167 | 168 | 应用程序链接: 169 | 170 | 链接:https://pan.baidu.com/s/1wPpg2v4h2Zlkr5SgzCGgVw 171 | 提取码:yypn 172 | 173 | 运行方式: 174 | 175 | 1.直接双击predict.exe可直接运行程序 176 | 177 | 2.在cmd运行predict.exe可直接运行程序,推荐这种方式,因为可以搭配命令使用,同时有报错可以看到。 178 | 179 | 可搭配的命令如下: 180 | 181 | ``` 182 | --hand_weight 【yolov5权重路径】,默认为best.pt 183 | --handpose_model_path 【关键点权重】,默认components/hand_keypoints/weights/ReXNetV1-size-256-wingloss102-0.122.pth 184 | --handpose_name 【关键点模型】,默认rexnetv1 185 | --classify_model_path 【分类网络权重】,默认components/classify_imagenet/weights/imagenet_size-256_20210409.pth 186 | --classify_model_name 【分类网络模型名称】,默认resnet_50 187 | --conf 【yolo置信度阈值】,默认0.5 188 | --video_path 【视频路径】,默认本机摄像头 189 | --device 【推理设备】,默认GPU 190 | ``` 191 | 192 | 输入命令样例: 193 | 194 | ```bash 195 | predict.exe --conf 0.3 --video 0 196 | ``` 197 | 198 | # 权重链接 199 | 200 | 链接:https://pan.baidu.com/s/1WS3Nb5MkqMGhCKjM7DYsgg 201 | 提取码:yypn 202 | 203 | 云盘中有三个权重: 204 | 205 | best.pt是yolov5训练的权重 206 | 207 | ReXNetV1-size-256-wingloss102-0.122.pth是21关键点权重 208 | 209 | imagenet_size-256_20210409.pth是分类网络权重 210 | 211 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | utils/initialization 4 | """ 5 | 6 | import contextlib 7 | import platform 8 | import threading 9 | 10 | 11 | def emojis(str=''): 12 | # Return platform-dependent emoji-safe version of string 13 | return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str 14 | 15 | 16 | class TryExcept(contextlib.ContextDecorator): 17 | # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager 18 | def __init__(self, msg=''): 19 | self.msg = msg 20 | 21 | def __enter__(self): 22 | pass 23 | 24 | def __exit__(self, exc_type, value, traceback): 25 | if value: 26 | print(emojis(f'{self.msg}{value}')) 27 | return True 28 | 29 | 30 | def threaded(func): 31 | # Multi-threads a target function and returns thread. Usage: @threaded decorator 32 | def wrapper(*args, **kwargs): 33 | thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) 34 | thread.start() 35 | return thread 36 | 37 | return wrapper 38 | 39 | 40 | def notebook_init(verbose=True): 41 | # Check system software and hardware 42 | print('Checking setup...') 43 | 44 | import os 45 | import shutil 46 | 47 | from utils.general import check_font, check_requirements, is_colab 48 | from utils.torch_utils import select_device # imports 49 | 50 | check_requirements(('psutil', 'IPython')) 51 | check_font() 52 | 53 | import psutil 54 | from IPython import display # to display images and clear console output 55 | 56 | if is_colab(): 57 | shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory 58 | 59 | # System info 60 | if verbose: 61 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 62 | ram = psutil.virtual_memory().total 63 | total, used, free = shutil.disk_usage("/") 64 | display.clear_output() 65 | s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' 66 | else: 67 | s = '' 68 | 69 | select_device(newline=False) 70 | print(emojis(f'Setup complete ✅ {s}')) 71 | return display 72 | -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/augmentations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/augmentations.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/augmentations.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/augmentations.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/autoanchor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/autoanchor.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dataloaders.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/dataloaders.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dataloaders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/dataloaders.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/downloads.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/downloads.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/downloads.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/downloads.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/general.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/metrics.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metrics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/metrics.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/plots.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/plots.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/plots.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/plots.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/torch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/__pycache__/torch_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Activation functions 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class SiLU(nn.Module): 12 | # SiLU activation https://arxiv.org/pdf/1606.08415.pdf 13 | @staticmethod 14 | def forward(x): 15 | return x * torch.sigmoid(x) 16 | 17 | 18 | class Hardswish(nn.Module): 19 | # Hard-SiLU activation 20 | @staticmethod 21 | def forward(x): 22 | # return x * F.hardsigmoid(x) # for TorchScript and CoreML 23 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX 24 | 25 | 26 | class Mish(nn.Module): 27 | # Mish activation https://github.com/digantamisra98/Mish 28 | @staticmethod 29 | def forward(x): 30 | return x * F.softplus(x).tanh() 31 | 32 | 33 | class MemoryEfficientMish(nn.Module): 34 | # Mish activation memory-efficient 35 | class F(torch.autograd.Function): 36 | 37 | @staticmethod 38 | def forward(ctx, x): 39 | ctx.save_for_backward(x) 40 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 41 | 42 | @staticmethod 43 | def backward(ctx, grad_output): 44 | x = ctx.saved_tensors[0] 45 | sx = torch.sigmoid(x) 46 | fx = F.softplus(x).tanh() 47 | return grad_output * (fx + x * sx * (1 - fx * fx)) 48 | 49 | def forward(self, x): 50 | return self.F.apply(x) 51 | 52 | 53 | class FReLU(nn.Module): 54 | # FReLU activation https://arxiv.org/abs/2007.11824 55 | def __init__(self, c1, k=3): # ch_in, kernel 56 | super().__init__() 57 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 58 | self.bn = nn.BatchNorm2d(c1) 59 | 60 | def forward(self, x): 61 | return torch.max(x, self.bn(self.conv(x))) 62 | 63 | 64 | class AconC(nn.Module): 65 | r""" ACON activation (activate or not) 66 | AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter 67 | according to "Activate or Not: Learning Customized Activation" . 68 | """ 69 | 70 | def __init__(self, c1): 71 | super().__init__() 72 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 73 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 74 | self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) 75 | 76 | def forward(self, x): 77 | dpx = (self.p1 - self.p2) * x 78 | return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x 79 | 80 | 81 | class MetaAconC(nn.Module): 82 | r""" ACON activation (activate or not) 83 | MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network 84 | according to "Activate or Not: Learning Customized Activation" . 85 | """ 86 | 87 | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r 88 | super().__init__() 89 | c2 = max(r, c1 // r) 90 | self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) 91 | self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) 92 | self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) 93 | self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) 94 | # self.bn1 = nn.BatchNorm2d(c2) 95 | # self.bn2 = nn.BatchNorm2d(c1) 96 | 97 | def forward(self, x): 98 | y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) 99 | # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 100 | # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable 101 | beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed 102 | dpx = (self.p1 - self.p2) * x 103 | return dpx * torch.sigmoid(beta * dpx) + self.p2 * x 104 | -------------------------------------------------------------------------------- /utils/autobatch.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Auto-batch utils 4 | """ 5 | 6 | from copy import deepcopy 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from utils.general import LOGGER, colorstr 12 | from utils.torch_utils import profile 13 | 14 | 15 | def check_train_batch_size(model, imgsz=640, amp=True): 16 | # Check YOLOv5 training batch size 17 | with torch.cuda.amp.autocast(amp): 18 | return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size 19 | 20 | 21 | def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): 22 | # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory 23 | # Usage: 24 | # import torch 25 | # from utils.autobatch import autobatch 26 | # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) 27 | # print(autobatch(model)) 28 | 29 | # Check device 30 | prefix = colorstr('AutoBatch: ') 31 | LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') 32 | device = next(model.parameters()).device # get model device 33 | if device.type == 'cpu': 34 | LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') 35 | return batch_size 36 | if torch.backends.cudnn.benchmark: 37 | LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') 38 | return batch_size 39 | 40 | # Inspect CUDA memory 41 | gb = 1 << 30 # bytes to GiB (1024 ** 3) 42 | d = str(device).upper() # 'CUDA:0' 43 | properties = torch.cuda.get_device_properties(device) # device properties 44 | t = properties.total_memory / gb # GiB total 45 | r = torch.cuda.memory_reserved(device) / gb # GiB reserved 46 | a = torch.cuda.memory_allocated(device) / gb # GiB allocated 47 | f = t - (r + a) # GiB free 48 | LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') 49 | 50 | # Profile batch sizes 51 | batch_sizes = [1, 2, 4, 8, 16] 52 | try: 53 | img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] 54 | results = profile(img, model, n=3, device=device) 55 | except Exception as e: 56 | LOGGER.warning(f'{prefix}{e}') 57 | 58 | # Fit a solution 59 | y = [x[2] for x in results if x] # memory [2] 60 | p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit 61 | b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) 62 | if None in results: # some sizes failed 63 | i = results.index(None) # first fail index 64 | if b >= batch_sizes[i]: # y intercept above failure point 65 | b = batch_sizes[max(i - 1, 0)] # select prior safe point 66 | if b < 1 or b > 1024: # b outside of safe range 67 | b = batch_size 68 | LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') 69 | 70 | fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted 71 | LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') 72 | return b 73 | -------------------------------------------------------------------------------- /utils/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/aws/__init__.py -------------------------------------------------------------------------------- /utils/aws/mime.sh: -------------------------------------------------------------------------------- 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ 2 | # This script will run on every instance restart, not only on first start 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- 4 | 5 | Content-Type: multipart/mixed; boundary="//" 6 | MIME-Version: 1.0 7 | 8 | --// 9 | Content-Type: text/cloud-config; charset="us-ascii" 10 | MIME-Version: 1.0 11 | Content-Transfer-Encoding: 7bit 12 | Content-Disposition: attachment; filename="cloud-config.txt" 13 | 14 | #cloud-config 15 | cloud_final_modules: 16 | - [scripts-user, always] 17 | 18 | --// 19 | Content-Type: text/x-shellscript; charset="us-ascii" 20 | MIME-Version: 1.0 21 | Content-Transfer-Encoding: 7bit 22 | Content-Disposition: attachment; filename="userdata.txt" 23 | 24 | #!/bin/bash 25 | # --- paste contents of userdata.sh here --- 26 | --// 27 | -------------------------------------------------------------------------------- /utils/aws/resume.py: -------------------------------------------------------------------------------- 1 | # Resume all interrupted trainings in yolov5/ dir including DDP trainings 2 | # Usage: $ python utils/aws/resume.py 3 | 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import torch 9 | import yaml 10 | 11 | FILE = Path(__file__).resolve() 12 | ROOT = FILE.parents[2] # YOLOv5 root directory 13 | if str(ROOT) not in sys.path: 14 | sys.path.append(str(ROOT)) # add ROOT to PATH 15 | 16 | port = 0 # --master_port 17 | path = Path('').resolve() 18 | for last in path.rglob('*/**/last.pt'): 19 | ckpt = torch.load(last) 20 | if ckpt['optimizer'] is None: 21 | continue 22 | 23 | # Load opt.yaml 24 | with open(last.parent.parent / 'opt.yaml', errors='ignore') as f: 25 | opt = yaml.safe_load(f) 26 | 27 | # Get device count 28 | d = opt['device'].split(',') # devices 29 | nd = len(d) # number of devices 30 | ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 31 | 32 | if ddp: # multi-GPU 33 | port += 1 34 | cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 35 | else: # single-GPU 36 | cmd = f'python train.py --resume {last}' 37 | 38 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 39 | print(cmd) 40 | os.system(cmd) 41 | -------------------------------------------------------------------------------- /utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolov5 ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5 11 | cd yolov5 12 | bash data/scripts/get_coco.sh && echo "COCO done." & 13 | sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /utils/callbacks.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Callback utils 4 | """ 5 | 6 | import threading 7 | 8 | 9 | class Callbacks: 10 | """" 11 | Handles all registered callbacks for YOLOv5 Hooks 12 | """ 13 | 14 | def __init__(self): 15 | # Define the available callbacks 16 | self._callbacks = { 17 | 'on_pretrain_routine_start': [], 18 | 'on_pretrain_routine_end': [], 19 | 'on_train_start': [], 20 | 'on_train_epoch_start': [], 21 | 'on_train_batch_start': [], 22 | 'optimizer_step': [], 23 | 'on_before_zero_grad': [], 24 | 'on_train_batch_end': [], 25 | 'on_train_epoch_end': [], 26 | 'on_val_start': [], 27 | 'on_val_batch_start': [], 28 | 'on_val_image_end': [], 29 | 'on_val_batch_end': [], 30 | 'on_val_end': [], 31 | 'on_fit_epoch_end': [], # fit = train + val 32 | 'on_model_save': [], 33 | 'on_train_end': [], 34 | 'on_params_update': [], 35 | 'teardown': [],} 36 | self.stop_training = False # set True to interrupt training 37 | 38 | def register_action(self, hook, name='', callback=None): 39 | """ 40 | Register a new action to a callback hook 41 | 42 | Args: 43 | hook: The callback hook name to register the action to 44 | name: The name of the action for later reference 45 | callback: The callback to fire 46 | """ 47 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 48 | assert callable(callback), f"callback '{callback}' is not callable" 49 | self._callbacks[hook].append({'name': name, 'callback': callback}) 50 | 51 | def get_registered_actions(self, hook=None): 52 | """" 53 | Returns all the registered actions by callback hook 54 | 55 | Args: 56 | hook: The name of the hook to check, defaults to all 57 | """ 58 | return self._callbacks[hook] if hook else self._callbacks 59 | 60 | def run(self, hook, *args, thread=False, **kwargs): 61 | """ 62 | Loop through the registered actions and fire all callbacks on main thread 63 | 64 | Args: 65 | hook: The name of the hook to check, defaults to all 66 | args: Arguments to receive from YOLOv5 67 | thread: (boolean) Run callbacks in daemon thread 68 | kwargs: Keyword Arguments to receive from YOLOv5 69 | """ 70 | 71 | assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" 72 | for logger in self._callbacks[hook]: 73 | if thread: 74 | threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() 75 | else: 76 | logger['callback'](*args, **kwargs) 77 | -------------------------------------------------------------------------------- /utils/common_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | # function: common utils 5 | 6 | import os 7 | import shutil 8 | import cv2 9 | import numpy as np 10 | import json 11 | 12 | def mkdir_(path, flag_rm=False): 13 | if os.path.exists(path): 14 | if flag_rm == True: 15 | shutil.rmtree(path) 16 | os.mkdir(path) 17 | print('remove {} done ~ '.format(path)) 18 | else: 19 | os.mkdir(path) 20 | 21 | def plot_box(bbox, img, color=None, label=None, line_thickness=None): 22 | tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 23 | color = color or [random.randint(0, 255) for _ in range(3)] 24 | c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) 25 | cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox 26 | if label: 27 | tf = max(tl - 2, 1) 28 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size 29 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox 30 | cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 31 | # 文本绘制 32 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) 33 | 34 | class JSON_Encoder(json.JSONEncoder): 35 | def default(self, obj): 36 | if isinstance(obj, np.integer): 37 | return int(obj) 38 | elif isinstance(obj, np.floating): 39 | return float(obj) 40 | elif isinstance(obj, np.ndarray): 41 | return obj.tolist() 42 | else: 43 | return super(JSON_Encoder, self).default(obj) 44 | -------------------------------------------------------------------------------- /utils/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Builds ultralytics/yolov5:latest image on DockerHub https://hub.docker.com/r/ultralytics/yolov5 3 | # Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference 4 | 5 | # Start FROM NVIDIA PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 6 | FROM nvcr.io/nvidia/pytorch:22.09-py3 7 | RUN rm -rf /opt/pytorch # remove 1.2GB dir 8 | 9 | # Downloads to user config dir 10 | ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ 11 | 12 | # Install linux packages 13 | RUN apt update && apt install --no-install-recommends -y zip htop screen libgl1-mesa-glx 14 | 15 | # Install pip packages 16 | COPY requirements.txt . 17 | RUN python -m pip install --upgrade pip wheel 18 | RUN pip uninstall -y Pillow torchtext torch torchvision 19 | RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook Pillow>=9.1.0 \ 20 | 'opencv-python<4.6.0.66' \ 21 | --extra-index-url https://download.pytorch.org/whl/cu113 22 | 23 | # Create working directory 24 | RUN mkdir -p /usr/src/app 25 | WORKDIR /usr/src/app 26 | 27 | # Copy contents 28 | # COPY . /usr/src/app (issues as not a .git directory) 29 | RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app 30 | 31 | # Set environment variables 32 | ENV OMP_NUM_THREADS=8 33 | 34 | 35 | # Usage Examples ------------------------------------------------------------------------------------------------------- 36 | 37 | # Build and Push 38 | # t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t 39 | 40 | # Pull and Run 41 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t 42 | 43 | # Pull and Run with local directory access 44 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t 45 | 46 | # Kill all 47 | # sudo docker kill $(sudo docker ps -q) 48 | 49 | # Kill all image-based 50 | # sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest) 51 | 52 | # DockerHub tag update 53 | # t=ultralytics/yolov5:latest tnew=ultralytics/yolov5:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew 54 | 55 | # Clean up 56 | # docker system prune -a --volumes 57 | 58 | # Update Ubuntu drivers 59 | # https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/ 60 | 61 | # DDP test 62 | # python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3 63 | 64 | # GCP VM from Image 65 | # docker.io/ultralytics/yolov5:latest 66 | -------------------------------------------------------------------------------- /utils/docker/Dockerfile-arm64: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Builds ultralytics/yolov5:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/yolov5 3 | # Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi 4 | 5 | # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu 6 | FROM arm64v8/ubuntu:20.04 7 | 8 | # Downloads to user config dir 9 | ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ 10 | 11 | # Install linux packages 12 | RUN apt update 13 | RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata 14 | RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1-mesa-glx libglib2.0-0 libpython3-dev 15 | # RUN alias python=python3 16 | 17 | # Install pip packages 18 | COPY requirements.txt . 19 | RUN python3 -m pip install --upgrade pip wheel 20 | RUN pip install --no-cache -r requirements.txt gsutil notebook \ 21 | tensorflow-aarch64 22 | # tensorflowjs \ 23 | # onnx onnx-simplifier onnxruntime \ 24 | # coremltools openvino-dev \ 25 | 26 | # Create working directory 27 | RUN mkdir -p /usr/src/app 28 | WORKDIR /usr/src/app 29 | 30 | # Copy contents 31 | # COPY . /usr/src/app (issues as not a .git directory) 32 | RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app 33 | 34 | 35 | # Usage Examples ------------------------------------------------------------------------------------------------------- 36 | 37 | # Build and Push 38 | # t=ultralytics/yolov5:latest-M1 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t 39 | 40 | # Pull and Run 41 | # t=ultralytics/yolov5:latest-M1 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t 42 | -------------------------------------------------------------------------------- /utils/docker/Dockerfile-cpu: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | # Builds ultralytics/yolov5:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/yolov5 3 | # Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments 4 | 5 | # Start FROM Ubuntu image https://hub.docker.com/_/ubuntu 6 | FROM ubuntu:20.04 7 | 8 | # Downloads to user config dir 9 | ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/ 10 | 11 | # Install linux packages 12 | RUN apt update 13 | RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata 14 | RUN apt install --no-install-recommends -y python3-pip git zip curl htop libgl1-mesa-glx libglib2.0-0 libpython3-dev 15 | # RUN alias python=python3 16 | 17 | # Install pip packages 18 | COPY requirements.txt . 19 | RUN python3 -m pip install --upgrade pip wheel 20 | RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \ 21 | coremltools onnx onnx-simplifier onnxruntime tensorflow-cpu tensorflowjs \ 22 | # openvino-dev \ 23 | --extra-index-url https://download.pytorch.org/whl/cpu 24 | 25 | # Create working directory 26 | RUN mkdir -p /usr/src/app 27 | WORKDIR /usr/src/app 28 | 29 | # Copy contents 30 | # COPY . /usr/src/app (issues as not a .git directory) 31 | RUN git clone https://github.com/ultralytics/yolov5 /usr/src/app 32 | 33 | 34 | # Usage Examples ------------------------------------------------------------------------------------------------------- 35 | 36 | # Build and Push 37 | # t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t 38 | 39 | # Pull and Run 40 | # t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t 41 | -------------------------------------------------------------------------------- /utils/flask_rest_api/README.md: -------------------------------------------------------------------------------- 1 | # Flask REST API 2 | 3 | [REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are 4 | commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API 5 | created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/). 6 | 7 | ## Requirements 8 | 9 | [Flask](https://palletsprojects.com/p/flask/) is required. Install with: 10 | 11 | ```shell 12 | $ pip install Flask 13 | ``` 14 | 15 | ## Run 16 | 17 | After Flask installation run: 18 | 19 | ```shell 20 | $ python3 restapi.py --port 5000 21 | ``` 22 | 23 | Then use [curl](https://curl.se/) to perform a request: 24 | 25 | ```shell 26 | $ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s' 27 | ``` 28 | 29 | The model inference results are returned as a JSON response: 30 | 31 | ```json 32 | [ 33 | { 34 | "class": 0, 35 | "confidence": 0.8900438547, 36 | "height": 0.9318675399, 37 | "name": "person", 38 | "width": 0.3264600933, 39 | "xcenter": 0.7438579798, 40 | "ycenter": 0.5207948685 41 | }, 42 | { 43 | "class": 0, 44 | "confidence": 0.8440024257, 45 | "height": 0.7155083418, 46 | "name": "person", 47 | "width": 0.6546785235, 48 | "xcenter": 0.427829951, 49 | "ycenter": 0.6334488392 50 | }, 51 | { 52 | "class": 27, 53 | "confidence": 0.3771208823, 54 | "height": 0.3902671337, 55 | "name": "tie", 56 | "width": 0.0696444362, 57 | "xcenter": 0.3675483763, 58 | "ycenter": 0.7991207838 59 | }, 60 | { 61 | "class": 27, 62 | "confidence": 0.3527112305, 63 | "height": 0.1540903747, 64 | "name": "tie", 65 | "width": 0.0336618312, 66 | "xcenter": 0.7814827561, 67 | "ycenter": 0.5065554976 68 | } 69 | ] 70 | ``` 71 | 72 | An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given 73 | in `example_request.py` 74 | -------------------------------------------------------------------------------- /utils/flask_rest_api/example_request.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Perform test request 4 | """ 5 | 6 | import pprint 7 | 8 | import requests 9 | 10 | DETECTION_URL = "http://localhost:5000/v1/object-detection/yolov5s" 11 | IMAGE = "zidane.jpg" 12 | 13 | # Read image 14 | with open(IMAGE, "rb") as f: 15 | image_data = f.read() 16 | 17 | response = requests.post(DETECTION_URL, files={"image": image_data}).json() 18 | 19 | pprint.pprint(response) 20 | -------------------------------------------------------------------------------- /utils/flask_rest_api/restapi.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Run a Flask REST API exposing one or more YOLOv5s models 4 | """ 5 | 6 | import argparse 7 | import io 8 | 9 | import torch 10 | from flask import Flask, request 11 | from PIL import Image 12 | 13 | app = Flask(__name__) 14 | models = {} 15 | 16 | DETECTION_URL = "/v1/object-detection/" 17 | 18 | 19 | @app.route(DETECTION_URL, methods=["POST"]) 20 | def predict(model): 21 | if request.method != "POST": 22 | return 23 | 24 | if request.files.get("image"): 25 | # Method 1 26 | # with request.files["image"] as f: 27 | # im = Image.open(io.BytesIO(f.read())) 28 | 29 | # Method 2 30 | im_file = request.files["image"] 31 | im_bytes = im_file.read() 32 | im = Image.open(io.BytesIO(im_bytes)) 33 | 34 | if model in models: 35 | results = models[model](im, size=640) # reduce size=320 for faster inference 36 | return results.pandas().xyxy[0].to_json(orient="records") 37 | 38 | 39 | if __name__ == "__main__": 40 | parser = argparse.ArgumentParser(description="Flask API exposing YOLOv5 model") 41 | parser.add_argument("--port", default=5000, type=int, help="port number") 42 | parser.add_argument('--model', nargs='+', default=['yolov5s'], help='model(s) to run, i.e. --model yolov5n yolov5s') 43 | opt = parser.parse_args() 44 | 45 | for m in opt.model: 46 | models[m] = torch.hub.load("ultralytics/yolov5", m, force_reload=True, skip_validation=True) 47 | 48 | app.run(host="0.0.0.0", port=opt.port) # debug=True causes Restarting with stat 49 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==21.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 15 | -------------------------------------------------------------------------------- /utils/loggers/clearml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/loggers/clearml/__init__.py -------------------------------------------------------------------------------- /utils/loggers/clearml/hpo.py: -------------------------------------------------------------------------------- 1 | from clearml import Task 2 | # Connecting ClearML with the current process, 3 | # from here on everything is logged automatically 4 | from clearml.automation import HyperParameterOptimizer, UniformParameterRange 5 | from clearml.automation.optuna import OptimizerOptuna 6 | 7 | task = Task.init(project_name='Hyper-Parameter Optimization', 8 | task_name='YOLOv5', 9 | task_type=Task.TaskTypes.optimizer, 10 | reuse_last_task_id=False) 11 | 12 | # Example use case: 13 | optimizer = HyperParameterOptimizer( 14 | # This is the experiment we want to optimize 15 | base_task_id='', 16 | # here we define the hyper-parameters to optimize 17 | # Notice: The parameter name should exactly match what you see in the UI: / 18 | # For Example, here we see in the base experiment a section Named: "General" 19 | # under it a parameter named "batch_size", this becomes "General/batch_size" 20 | # If you have `argparse` for example, then arguments will appear under the "Args" section, 21 | # and you should instead pass "Args/batch_size" 22 | hyper_parameters=[ 23 | UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), 24 | UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), 25 | UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), 26 | UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), 27 | UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), 28 | UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), 29 | UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), 30 | UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), 31 | UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), 32 | UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), 33 | UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), 34 | UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), 35 | UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), 36 | UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), 37 | UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), 38 | UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), 39 | UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), 40 | UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), 41 | UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), 42 | UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), 43 | UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), 44 | UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), 45 | UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), 46 | UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), 47 | UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), 48 | UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), 49 | UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), 50 | UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], 51 | # this is the objective metric we want to maximize/minimize 52 | objective_metric_title='metrics', 53 | objective_metric_series='mAP_0.5', 54 | # now we decide if we want to maximize it or minimize it (accuracy we maximize) 55 | objective_metric_sign='max', 56 | # let us limit the number of concurrent experiments, 57 | # this in turn will make sure we do dont bombard the scheduler with experiments. 58 | # if we have an auto-scaler connected, this, by proxy, will limit the number of machine 59 | max_number_of_concurrent_tasks=1, 60 | # this is the optimizer class (actually doing the optimization) 61 | # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) 62 | optimizer_class=OptimizerOptuna, 63 | # If specified only the top K performing Tasks will be kept, the others will be automatically archived 64 | save_top_k_tasks_only=5, # 5, 65 | compute_time_limit=None, 66 | total_max_jobs=20, 67 | min_iteration_per_job=None, 68 | max_iteration_per_job=None, 69 | ) 70 | 71 | # report every 10 seconds, this is way too often, but we are testing here 72 | optimizer.set_report_period(10 / 60) 73 | # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent 74 | # an_optimizer.start_locally(job_complete_callback=job_complete_callback) 75 | # set the time limit for the optimization process (2 hours) 76 | optimizer.set_time_limit(in_minutes=120.0) 77 | # Start the optimization process in the local environment 78 | optimizer.start_locally() 79 | # wait until process is done (notice we are controlling the optimization process in the background) 80 | optimizer.wait() 81 | # make sure background optimization stopped 82 | optimizer.stop() 83 | 84 | print('We are done, good bye') 85 | -------------------------------------------------------------------------------- /utils/loggers/comet/comet_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from urllib.parse import urlparse 4 | 5 | try: 6 | import comet_ml 7 | except (ModuleNotFoundError, ImportError): 8 | comet_ml = None 9 | 10 | import yaml 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | COMET_PREFIX = "comet://" 15 | COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") 16 | COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt") 17 | 18 | 19 | def download_model_checkpoint(opt, experiment): 20 | model_dir = f"{opt.project}/{experiment.name}" 21 | os.makedirs(model_dir, exist_ok=True) 22 | 23 | model_name = COMET_MODEL_NAME 24 | model_asset_list = experiment.get_model_asset_list(model_name) 25 | 26 | if len(model_asset_list) == 0: 27 | logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}") 28 | return 29 | 30 | model_asset_list = sorted( 31 | model_asset_list, 32 | key=lambda x: x["step"], 33 | reverse=True, 34 | ) 35 | logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list} 36 | 37 | resource_url = urlparse(opt.weights) 38 | checkpoint_filename = resource_url.query 39 | 40 | if checkpoint_filename: 41 | asset_id = logged_checkpoint_map.get(checkpoint_filename) 42 | else: 43 | asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME) 44 | checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME 45 | 46 | if asset_id is None: 47 | logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment") 48 | return 49 | 50 | try: 51 | logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}") 52 | asset_filename = checkpoint_filename 53 | 54 | model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 55 | model_download_path = f"{model_dir}/{asset_filename}" 56 | with open(model_download_path, "wb") as f: 57 | f.write(model_binary) 58 | 59 | opt.weights = model_download_path 60 | 61 | except Exception as e: 62 | logger.warning("COMET WARNING: Unable to download checkpoint from Comet") 63 | logger.exception(e) 64 | 65 | 66 | def set_opt_parameters(opt, experiment): 67 | """Update the opts Namespace with parameters 68 | from Comet's ExistingExperiment when resuming a run 69 | 70 | Args: 71 | opt (argparse.Namespace): Namespace of command line options 72 | experiment (comet_ml.APIExperiment): Comet API Experiment object 73 | """ 74 | asset_list = experiment.get_asset_list() 75 | resume_string = opt.resume 76 | 77 | for asset in asset_list: 78 | if asset["fileName"] == "opt.yaml": 79 | asset_id = asset["assetId"] 80 | asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) 81 | opt_dict = yaml.safe_load(asset_binary) 82 | for key, value in opt_dict.items(): 83 | setattr(opt, key, value) 84 | opt.resume = resume_string 85 | 86 | # Save hyperparameters to YAML file 87 | # Necessary to pass checks in training script 88 | save_dir = f"{opt.project}/{experiment.name}" 89 | os.makedirs(save_dir, exist_ok=True) 90 | 91 | hyp_yaml_path = f"{save_dir}/hyp.yaml" 92 | with open(hyp_yaml_path, "w") as f: 93 | yaml.dump(opt.hyp, f) 94 | opt.hyp = hyp_yaml_path 95 | 96 | 97 | def check_comet_weights(opt): 98 | """Downloads model weights from Comet and updates the 99 | weights path to point to saved weights location 100 | 101 | Args: 102 | opt (argparse.Namespace): Command Line arguments passed 103 | to YOLOv5 training script 104 | 105 | Returns: 106 | None/bool: Return True if weights are successfully downloaded 107 | else return None 108 | """ 109 | if comet_ml is None: 110 | return 111 | 112 | if isinstance(opt.weights, str): 113 | if opt.weights.startswith(COMET_PREFIX): 114 | api = comet_ml.API() 115 | resource = urlparse(opt.weights) 116 | experiment_path = f"{resource.netloc}{resource.path}" 117 | experiment = api.get(experiment_path) 118 | download_model_checkpoint(opt, experiment) 119 | return True 120 | 121 | return None 122 | 123 | 124 | def check_comet_resume(opt): 125 | """Restores run parameters to its original state based on the model checkpoint 126 | and logged Experiment parameters. 127 | 128 | Args: 129 | opt (argparse.Namespace): Command Line arguments passed 130 | to YOLOv5 training script 131 | 132 | Returns: 133 | None/bool: Return True if the run is restored successfully 134 | else return None 135 | """ 136 | if comet_ml is None: 137 | return 138 | 139 | if isinstance(opt.resume, str): 140 | if opt.resume.startswith(COMET_PREFIX): 141 | api = comet_ml.API() 142 | resource = urlparse(opt.resume) 143 | experiment_path = f"{resource.netloc}{resource.path}" 144 | experiment = api.get(experiment_path) 145 | set_opt_parameters(opt, experiment) 146 | download_model_checkpoint(opt, experiment) 147 | 148 | return True 149 | 150 | return None 151 | -------------------------------------------------------------------------------- /utils/loggers/comet/hpo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import comet_ml 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | FILE = Path(__file__).resolve() 13 | ROOT = FILE.parents[3] # YOLOv5 root directory 14 | if str(ROOT) not in sys.path: 15 | sys.path.append(str(ROOT)) # add ROOT to PATH 16 | 17 | from train import train 18 | from utils.callbacks import Callbacks 19 | from utils.general import increment_path 20 | from utils.torch_utils import select_device 21 | 22 | # Project Configuration 23 | config = comet_ml.config.get_config() 24 | COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") 25 | 26 | 27 | def get_args(known=False): 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') 30 | parser.add_argument('--cfg', type=str, default='', help='model.yaml path') 31 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') 32 | parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') 33 | parser.add_argument('--epochs', type=int, default=300, help='total training epochs') 34 | parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') 35 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') 36 | parser.add_argument('--rect', action='store_true', help='rectangular training') 37 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') 38 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 39 | parser.add_argument('--noval', action='store_true', help='only validate final epoch') 40 | parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') 41 | parser.add_argument('--noplots', action='store_true', help='save no plot files') 42 | parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') 43 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 44 | parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') 45 | parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') 46 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 47 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') 48 | parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') 49 | parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') 50 | parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') 51 | parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') 52 | parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') 53 | parser.add_argument('--name', default='exp', help='save to project/name') 54 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 55 | parser.add_argument('--quad', action='store_true', help='quad dataloader') 56 | parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') 57 | parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') 58 | parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') 59 | parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') 60 | parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') 61 | parser.add_argument('--seed', type=int, default=0, help='Global training seed') 62 | parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') 63 | 64 | # Weights & Biases arguments 65 | parser.add_argument('--entity', default=None, help='W&B: Entity') 66 | parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') 67 | parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') 68 | parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') 69 | 70 | # Comet Arguments 71 | parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.") 72 | parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.") 73 | parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.") 74 | parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.") 75 | parser.add_argument("--comet_optimizer_workers", 76 | type=int, 77 | default=1, 78 | help="Comet: Number of Parallel Workers to use with the Comet Optimizer.") 79 | 80 | return parser.parse_known_args()[0] if known else parser.parse_args() 81 | 82 | 83 | def run(parameters, opt): 84 | hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]} 85 | 86 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 87 | opt.batch_size = parameters.get("batch_size") 88 | opt.epochs = parameters.get("epochs") 89 | 90 | device = select_device(opt.device, batch_size=opt.batch_size) 91 | train(hyp_dict, opt, device, callbacks=Callbacks()) 92 | 93 | 94 | if __name__ == "__main__": 95 | opt = get_args(known=True) 96 | 97 | opt.weights = str(opt.weights) 98 | opt.cfg = str(opt.cfg) 99 | opt.data = str(opt.data) 100 | opt.project = str(opt.project) 101 | 102 | optimizer_id = os.getenv("COMET_OPTIMIZER_ID") 103 | if optimizer_id is None: 104 | with open(opt.comet_optimizer_config) as f: 105 | optimizer_config = json.load(f) 106 | optimizer = comet_ml.Optimizer(optimizer_config) 107 | else: 108 | optimizer = comet_ml.Optimizer(optimizer_id) 109 | 110 | opt.comet_optimizer_id = optimizer.id 111 | status = optimizer.status() 112 | 113 | opt.comet_optimizer_objective = status["spec"]["objective"] 114 | opt.comet_optimizer_metric = status["spec"]["metric"] 115 | 116 | logger.info("COMET INFO: Starting Hyperparameter Sweep") 117 | for parameter in optimizer.get_parameters(): 118 | run(parameter["parameters"], opt) 119 | -------------------------------------------------------------------------------- /utils/loggers/comet/optimizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": "random", 3 | "parameters": { 4 | "anchor_t": { 5 | "type": "discrete", 6 | "values": [ 7 | 2, 8 | 8 9 | ] 10 | }, 11 | "batch_size": { 12 | "type": "discrete", 13 | "values": [ 14 | 16, 15 | 32, 16 | 64 17 | ] 18 | }, 19 | "box": { 20 | "type": "discrete", 21 | "values": [ 22 | 0.02, 23 | 0.2 24 | ] 25 | }, 26 | "cls": { 27 | "type": "discrete", 28 | "values": [ 29 | 0.2 30 | ] 31 | }, 32 | "cls_pw": { 33 | "type": "discrete", 34 | "values": [ 35 | 0.5 36 | ] 37 | }, 38 | "copy_paste": { 39 | "type": "discrete", 40 | "values": [ 41 | 1 42 | ] 43 | }, 44 | "degrees": { 45 | "type": "discrete", 46 | "values": [ 47 | 0, 48 | 45 49 | ] 50 | }, 51 | "epochs": { 52 | "type": "discrete", 53 | "values": [ 54 | 5 55 | ] 56 | }, 57 | "fl_gamma": { 58 | "type": "discrete", 59 | "values": [ 60 | 0 61 | ] 62 | }, 63 | "fliplr": { 64 | "type": "discrete", 65 | "values": [ 66 | 0 67 | ] 68 | }, 69 | "flipud": { 70 | "type": "discrete", 71 | "values": [ 72 | 0 73 | ] 74 | }, 75 | "hsv_h": { 76 | "type": "discrete", 77 | "values": [ 78 | 0 79 | ] 80 | }, 81 | "hsv_s": { 82 | "type": "discrete", 83 | "values": [ 84 | 0 85 | ] 86 | }, 87 | "hsv_v": { 88 | "type": "discrete", 89 | "values": [ 90 | 0 91 | ] 92 | }, 93 | "iou_t": { 94 | "type": "discrete", 95 | "values": [ 96 | 0.7 97 | ] 98 | }, 99 | "lr0": { 100 | "type": "discrete", 101 | "values": [ 102 | 1e-05, 103 | 0.1 104 | ] 105 | }, 106 | "lrf": { 107 | "type": "discrete", 108 | "values": [ 109 | 0.01, 110 | 1 111 | ] 112 | }, 113 | "mixup": { 114 | "type": "discrete", 115 | "values": [ 116 | 1 117 | ] 118 | }, 119 | "momentum": { 120 | "type": "discrete", 121 | "values": [ 122 | 0.6 123 | ] 124 | }, 125 | "mosaic": { 126 | "type": "discrete", 127 | "values": [ 128 | 0 129 | ] 130 | }, 131 | "obj": { 132 | "type": "discrete", 133 | "values": [ 134 | 0.2 135 | ] 136 | }, 137 | "obj_pw": { 138 | "type": "discrete", 139 | "values": [ 140 | 0.5 141 | ] 142 | }, 143 | "optimizer": { 144 | "type": "categorical", 145 | "values": [ 146 | "SGD", 147 | "Adam", 148 | "AdamW" 149 | ] 150 | }, 151 | "perspective": { 152 | "type": "discrete", 153 | "values": [ 154 | 0 155 | ] 156 | }, 157 | "scale": { 158 | "type": "discrete", 159 | "values": [ 160 | 0 161 | ] 162 | }, 163 | "shear": { 164 | "type": "discrete", 165 | "values": [ 166 | 0 167 | ] 168 | }, 169 | "translate": { 170 | "type": "discrete", 171 | "values": [ 172 | 0 173 | ] 174 | }, 175 | "warmup_bias_lr": { 176 | "type": "discrete", 177 | "values": [ 178 | 0, 179 | 0.2 180 | ] 181 | }, 182 | "warmup_epochs": { 183 | "type": "discrete", 184 | "values": [ 185 | 5 186 | ] 187 | }, 188 | "warmup_momentum": { 189 | "type": "discrete", 190 | "values": [ 191 | 0, 192 | 0.95 193 | ] 194 | }, 195 | "weight_decay": { 196 | "type": "discrete", 197 | "values": [ 198 | 0, 199 | 0.001 200 | ] 201 | } 202 | }, 203 | "spec": { 204 | "maxCombo": 0, 205 | "metric": "metrics/mAP_0.5", 206 | "objective": "maximize" 207 | }, 208 | "trials": 1 209 | } 210 | -------------------------------------------------------------------------------- /utils/loggers/wandb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/loggers/wandb/__init__.py -------------------------------------------------------------------------------- /utils/loggers/wandb/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from wandb_utils import WandbLogger 4 | 5 | from utils.general import LOGGER 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused 12 | if not logger.wandb: 13 | LOGGER.info("install wandb using `pip install wandb` to log the dataset") 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | parser.add_argument('--entity', default=None, help='W&B entity') 22 | parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') 23 | 24 | opt = parser.parse_args() 25 | opt.resume = False # Explicitly disallow resume check for dataset upload job 26 | 27 | create_dataset_artifact(opt) 28 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import wandb 5 | 6 | FILE = Path(__file__).resolve() 7 | ROOT = FILE.parents[3] # YOLOv5 root directory 8 | if str(ROOT) not in sys.path: 9 | sys.path.append(str(ROOT)) # add ROOT to PATH 10 | 11 | from train import parse_opt, train 12 | from utils.callbacks import Callbacks 13 | from utils.general import increment_path 14 | from utils.torch_utils import select_device 15 | 16 | 17 | def sweep(): 18 | wandb.init() 19 | # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. 20 | hyp_dict = vars(wandb.config).get("_items").copy() 21 | 22 | # Workaround: get necessary opt args 23 | opt = parse_opt(known=True) 24 | opt.batch_size = hyp_dict.get("batch_size") 25 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) 26 | opt.epochs = hyp_dict.get("epochs") 27 | opt.nosave = True 28 | opt.data = hyp_dict.get("data") 29 | opt.weights = str(opt.weights) 30 | opt.cfg = str(opt.cfg) 31 | opt.data = str(opt.data) 32 | opt.hyp = str(opt.hyp) 33 | opt.project = str(opt.project) 34 | device = select_device(opt.device, batch_size=opt.batch_size) 35 | 36 | # train 37 | train(hyp_dict, opt, device, callbacks=Callbacks()) 38 | 39 | 40 | if __name__ == "__main__": 41 | sweep() 42 | -------------------------------------------------------------------------------- /utils/loggers/wandb/sweep.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for training 2 | # To set range- 3 | # Provide min and max values as: 4 | # parameter: 5 | # 6 | # min: scalar 7 | # max: scalar 8 | # OR 9 | # 10 | # Set a specific list of search space- 11 | # parameter: 12 | # values: [scalar1, scalar2, scalar3...] 13 | # 14 | # You can use grid, bayesian and hyperopt search strategy 15 | # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration 16 | 17 | program: utils/loggers/wandb/sweep.py 18 | method: random 19 | metric: 20 | name: metrics/mAP_0.5 21 | goal: maximize 22 | 23 | parameters: 24 | # hyperparameters: set either min, max range or values list 25 | data: 26 | value: "data/coco128.yaml" 27 | batch_size: 28 | values: [64] 29 | epochs: 30 | values: [10] 31 | 32 | lr0: 33 | distribution: uniform 34 | min: 1e-5 35 | max: 1e-1 36 | lrf: 37 | distribution: uniform 38 | min: 0.01 39 | max: 1.0 40 | momentum: 41 | distribution: uniform 42 | min: 0.6 43 | max: 0.98 44 | weight_decay: 45 | distribution: uniform 46 | min: 0.0 47 | max: 0.001 48 | warmup_epochs: 49 | distribution: uniform 50 | min: 0.0 51 | max: 5.0 52 | warmup_momentum: 53 | distribution: uniform 54 | min: 0.0 55 | max: 0.95 56 | warmup_bias_lr: 57 | distribution: uniform 58 | min: 0.0 59 | max: 0.2 60 | box: 61 | distribution: uniform 62 | min: 0.02 63 | max: 0.2 64 | cls: 65 | distribution: uniform 66 | min: 0.2 67 | max: 4.0 68 | cls_pw: 69 | distribution: uniform 70 | min: 0.5 71 | max: 2.0 72 | obj: 73 | distribution: uniform 74 | min: 0.2 75 | max: 4.0 76 | obj_pw: 77 | distribution: uniform 78 | min: 0.5 79 | max: 2.0 80 | iou_t: 81 | distribution: uniform 82 | min: 0.1 83 | max: 0.7 84 | anchor_t: 85 | distribution: uniform 86 | min: 2.0 87 | max: 8.0 88 | fl_gamma: 89 | distribution: uniform 90 | min: 0.0 91 | max: 4.0 92 | hsv_h: 93 | distribution: uniform 94 | min: 0.0 95 | max: 0.1 96 | hsv_s: 97 | distribution: uniform 98 | min: 0.0 99 | max: 0.9 100 | hsv_v: 101 | distribution: uniform 102 | min: 0.0 103 | max: 0.9 104 | degrees: 105 | distribution: uniform 106 | min: 0.0 107 | max: 45.0 108 | translate: 109 | distribution: uniform 110 | min: 0.0 111 | max: 0.9 112 | scale: 113 | distribution: uniform 114 | min: 0.0 115 | max: 0.9 116 | shear: 117 | distribution: uniform 118 | min: 0.0 119 | max: 10.0 120 | perspective: 121 | distribution: uniform 122 | min: 0.0 123 | max: 0.001 124 | flipud: 125 | distribution: uniform 126 | min: 0.0 127 | max: 1.0 128 | fliplr: 129 | distribution: uniform 130 | min: 0.0 131 | max: 1.0 132 | mosaic: 133 | distribution: uniform 134 | min: 0.0 135 | max: 1.0 136 | mixup: 137 | distribution: uniform 138 | min: 0.0 139 | max: 1.0 140 | copy_paste: 141 | distribution: uniform 142 | min: 0.0 143 | max: 1.0 144 | -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | # date:2023-12-07 3 | # Author: yinyipeng 4 | # function: model utils 5 | 6 | import os 7 | import numpy as np 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import random 11 | 12 | def get_acc(output, label): 13 | total = output.shape[0] 14 | _, pred_label = output.max(1) 15 | num_correct = (pred_label == label).sum().item() 16 | return num_correct / float(total) 17 | 18 | def set_learning_rate(optimizer, lr): 19 | for param_group in optimizer.param_groups: 20 | param_group['lr'] = lr 21 | 22 | def set_seed(seed = 666): 23 | np.random.seed(seed) 24 | random.seed(seed) 25 | torch.manual_seed(seed) 26 | if torch.cuda.is_available(): 27 | torch.cuda.manual_seed(seed) 28 | torch.cuda.manual_seed_all(seed) 29 | cudnn.deterministic = True 30 | -------------------------------------------------------------------------------- /utils/segment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/segment/__init__.py -------------------------------------------------------------------------------- /utils/segment/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/segment/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/segment/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/segment/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/segment/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/segment/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /utils/segment/__pycache__/general.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YINYIPENG-EN/yolov5_hand_pose/6f33a43c64de3a9ae96c6518f2b3ffdc8db93019/utils/segment/__pycache__/general.cpython-37.pyc -------------------------------------------------------------------------------- /utils/segment/augmentations.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Image augmentation functions 4 | """ 5 | 6 | import math 7 | import random 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | from ..augmentations import box_candidates 13 | from ..general import resample_segments, segment2box 14 | 15 | 16 | def mixup(im, labels, segments, im2, labels2, segments2): 17 | # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf 18 | r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 19 | im = (im * r + im2 * (1 - r)).astype(np.uint8) 20 | labels = np.concatenate((labels, labels2), 0) 21 | segments = np.concatenate((segments, segments2), 0) 22 | return im, labels, segments 23 | 24 | 25 | def random_perspective(im, 26 | targets=(), 27 | segments=(), 28 | degrees=10, 29 | translate=.1, 30 | scale=.1, 31 | shear=10, 32 | perspective=0.0, 33 | border=(0, 0)): 34 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 35 | # targets = [cls, xyxy] 36 | 37 | height = im.shape[0] + border[0] * 2 # shape(h,w,c) 38 | width = im.shape[1] + border[1] * 2 39 | 40 | # Center 41 | C = np.eye(3) 42 | C[0, 2] = -im.shape[1] / 2 # x translation (pixels) 43 | C[1, 2] = -im.shape[0] / 2 # y translation (pixels) 44 | 45 | # Perspective 46 | P = np.eye(3) 47 | P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) 48 | P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) 49 | 50 | # Rotation and Scale 51 | R = np.eye(3) 52 | a = random.uniform(-degrees, degrees) 53 | # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations 54 | s = random.uniform(1 - scale, 1 + scale) 55 | # s = 2 ** random.uniform(-scale, scale) 56 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) 57 | 58 | # Shear 59 | S = np.eye(3) 60 | S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) 61 | S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) 62 | 63 | # Translation 64 | T = np.eye(3) 65 | T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) 66 | T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) 67 | 68 | # Combined rotation matrix 69 | M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT 70 | if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed 71 | if perspective: 72 | im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) 73 | else: # affine 74 | im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) 75 | 76 | # Visualize 77 | # import matplotlib.pyplot as plt 78 | # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() 79 | # ax[0].imshow(im[:, :, ::-1]) # base 80 | # ax[1].imshow(im2[:, :, ::-1]) # warped 81 | 82 | # Transform label coordinates 83 | n = len(targets) 84 | new_segments = [] 85 | if n: 86 | new = np.zeros((n, 4)) 87 | segments = resample_segments(segments) # upsample 88 | for i, segment in enumerate(segments): 89 | xy = np.ones((len(segment), 3)) 90 | xy[:, :2] = segment 91 | xy = xy @ M.T # transform 92 | xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine 93 | 94 | # clip 95 | new[i] = segment2box(xy, width, height) 96 | new_segments.append(xy) 97 | 98 | # filter candidates 99 | i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) 100 | targets = targets[i] 101 | targets[:, 1:5] = new[i] 102 | new_segments = np.array(new_segments)[i] 103 | 104 | return im, targets, new_segments 105 | -------------------------------------------------------------------------------- /utils/segment/general.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def crop_mask(masks, boxes): 8 | """ 9 | "Crop" predicted masks by zeroing out everything not in the predicted bbox. 10 | Vectorized by Chong (thanks Chong). 11 | 12 | Args: 13 | - masks should be a size [h, w, n] tensor of masks 14 | - boxes should be a size [n, 4] tensor of bbox coords in relative point form 15 | """ 16 | 17 | n, h, w = masks.shape 18 | x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) 19 | r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) 20 | c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) 21 | 22 | return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) 23 | 24 | 25 | def process_mask_upsample(protos, masks_in, bboxes, shape): 26 | """ 27 | Crop after upsample. 28 | proto_out: [mask_dim, mask_h, mask_w] 29 | out_masks: [n, mask_dim], n is number of masks after nms 30 | bboxes: [n, 4], n is number of masks after nms 31 | shape:input_image_size, (h, w) 32 | 33 | return: h, w, n 34 | """ 35 | 36 | c, mh, mw = protos.shape # CHW 37 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) 38 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 39 | masks = crop_mask(masks, bboxes) # CHW 40 | return masks.gt_(0.5) 41 | 42 | 43 | def process_mask(protos, masks_in, bboxes, shape, upsample=False): 44 | """ 45 | Crop before upsample. 46 | proto_out: [mask_dim, mask_h, mask_w] 47 | out_masks: [n, mask_dim], n is number of masks after nms 48 | bboxes: [n, 4], n is number of masks after nms 49 | shape:input_image_size, (h, w) 50 | 51 | return: h, w, n 52 | """ 53 | 54 | c, mh, mw = protos.shape # CHW 55 | ih, iw = shape 56 | masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW 57 | 58 | downsampled_bboxes = bboxes.clone() 59 | downsampled_bboxes[:, 0] *= mw / iw 60 | downsampled_bboxes[:, 2] *= mw / iw 61 | downsampled_bboxes[:, 3] *= mh / ih 62 | downsampled_bboxes[:, 1] *= mh / ih 63 | 64 | masks = crop_mask(masks, downsampled_bboxes) # CHW 65 | if upsample: 66 | masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW 67 | return masks.gt_(0.5) 68 | 69 | 70 | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): 71 | """ 72 | img1_shape: model input shape, [h, w] 73 | img0_shape: origin pic shape, [h, w, 3] 74 | masks: [h, w, num] 75 | """ 76 | # Rescale coordinates (xyxy) from im1_shape to im0_shape 77 | if ratio_pad is None: # calculate from im0_shape 78 | gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new 79 | pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding 80 | else: 81 | pad = ratio_pad[1] 82 | top, left = int(pad[1]), int(pad[0]) # y, x 83 | bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) 84 | 85 | if len(masks.shape) < 2: 86 | raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') 87 | masks = masks[top:bottom, left:right] 88 | # masks = masks.permute(2, 0, 1).contiguous() 89 | # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] 90 | # masks = masks.permute(1, 2, 0).contiguous() 91 | masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) 92 | 93 | if len(masks.shape) == 2: 94 | masks = masks[:, :, None] 95 | return masks 96 | 97 | 98 | def mask_iou(mask1, mask2, eps=1e-7): 99 | """ 100 | mask1: [N, n] m1 means number of predicted objects 101 | mask2: [M, n] m2 means number of gt objects 102 | Note: n means image_w x image_h 103 | 104 | return: masks iou, [N, M] 105 | """ 106 | intersection = torch.matmul(mask1, mask2.t()).clamp(0) 107 | union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection 108 | return intersection / (union + eps) 109 | 110 | 111 | def masks_iou(mask1, mask2, eps=1e-7): 112 | """ 113 | mask1: [N, n] m1 means number of predicted objects 114 | mask2: [N, n] m2 means number of gt objects 115 | Note: n means image_w x image_h 116 | 117 | return: masks iou, (N, ) 118 | """ 119 | intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) 120 | union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection 121 | return intersection / (union + eps) 122 | 123 | 124 | def masks2segments(masks, strategy='largest'): 125 | # Convert masks(n,160,160) into segments(n,xy) 126 | segments = [] 127 | for x in masks.int().cpu().numpy().astype('uint8'): 128 | c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 129 | if strategy == 'concat': # concatenate all segments 130 | c = np.concatenate([x.reshape(-1, 2) for x in c]) 131 | elif strategy == 'largest': # select largest segment 132 | c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) 133 | segments.append(c.astype('float32')) 134 | return segments 135 | -------------------------------------------------------------------------------- /utils/segment/metrics.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ 3 | Model validation metrics 4 | """ 5 | 6 | import numpy as np 7 | 8 | from ..metrics import ap_per_class 9 | 10 | 11 | def fitness(x): 12 | # Model fitness as a weighted combination of metrics 13 | w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] 14 | return (x[:, :8] * w).sum(1) 15 | 16 | 17 | def ap_per_class_box_and_mask( 18 | tp_m, 19 | tp_b, 20 | conf, 21 | pred_cls, 22 | target_cls, 23 | plot=False, 24 | save_dir=".", 25 | names=(), 26 | ): 27 | """ 28 | Args: 29 | tp_b: tp of boxes. 30 | tp_m: tp of masks. 31 | other arguments see `func: ap_per_class`. 32 | """ 33 | results_boxes = ap_per_class(tp_b, 34 | conf, 35 | pred_cls, 36 | target_cls, 37 | plot=plot, 38 | save_dir=save_dir, 39 | names=names, 40 | prefix="Box")[2:] 41 | results_masks = ap_per_class(tp_m, 42 | conf, 43 | pred_cls, 44 | target_cls, 45 | plot=plot, 46 | save_dir=save_dir, 47 | names=names, 48 | prefix="Mask")[2:] 49 | 50 | results = { 51 | "boxes": { 52 | "p": results_boxes[0], 53 | "r": results_boxes[1], 54 | "ap": results_boxes[3], 55 | "f1": results_boxes[2], 56 | "ap_class": results_boxes[4]}, 57 | "masks": { 58 | "p": results_masks[0], 59 | "r": results_masks[1], 60 | "ap": results_masks[3], 61 | "f1": results_masks[2], 62 | "ap_class": results_masks[4]}} 63 | return results 64 | 65 | 66 | class Metric: 67 | 68 | def __init__(self) -> None: 69 | self.p = [] # (nc, ) 70 | self.r = [] # (nc, ) 71 | self.f1 = [] # (nc, ) 72 | self.all_ap = [] # (nc, 10) 73 | self.ap_class_index = [] # (nc, ) 74 | 75 | @property 76 | def ap50(self): 77 | """AP@0.5 of all classes. 78 | Return: 79 | (nc, ) or []. 80 | """ 81 | return self.all_ap[:, 0] if len(self.all_ap) else [] 82 | 83 | @property 84 | def ap(self): 85 | """AP@0.5:0.95 86 | Return: 87 | (nc, ) or []. 88 | """ 89 | return self.all_ap.mean(1) if len(self.all_ap) else [] 90 | 91 | @property 92 | def mp(self): 93 | """mean precision of all classes. 94 | Return: 95 | float. 96 | """ 97 | return self.p.mean() if len(self.p) else 0.0 98 | 99 | @property 100 | def mr(self): 101 | """mean recall of all classes. 102 | Return: 103 | float. 104 | """ 105 | return self.r.mean() if len(self.r) else 0.0 106 | 107 | @property 108 | def map50(self): 109 | """Mean AP@0.5 of all classes. 110 | Return: 111 | float. 112 | """ 113 | return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 114 | 115 | @property 116 | def map(self): 117 | """Mean AP@0.5:0.95 of all classes. 118 | Return: 119 | float. 120 | """ 121 | return self.all_ap.mean() if len(self.all_ap) else 0.0 122 | 123 | def mean_results(self): 124 | """Mean of results, return mp, mr, map50, map""" 125 | return (self.mp, self.mr, self.map50, self.map) 126 | 127 | def class_result(self, i): 128 | """class-aware result, return p[i], r[i], ap50[i], ap[i]""" 129 | return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) 130 | 131 | def get_maps(self, nc): 132 | maps = np.zeros(nc) + self.map 133 | for i, c in enumerate(self.ap_class_index): 134 | maps[c] = self.ap[i] 135 | return maps 136 | 137 | def update(self, results): 138 | """ 139 | Args: 140 | results: tuple(p, r, ap, f1, ap_class) 141 | """ 142 | p, r, all_ap, f1, ap_class_index = results 143 | self.p = p 144 | self.r = r 145 | self.all_ap = all_ap 146 | self.f1 = f1 147 | self.ap_class_index = ap_class_index 148 | 149 | 150 | class Metrics: 151 | """Metric for boxes and masks.""" 152 | 153 | def __init__(self) -> None: 154 | self.metric_box = Metric() 155 | self.metric_mask = Metric() 156 | 157 | def update(self, results): 158 | """ 159 | Args: 160 | results: Dict{'boxes': Dict{}, 'masks': Dict{}} 161 | """ 162 | self.metric_box.update(list(results["boxes"].values())) 163 | self.metric_mask.update(list(results["masks"].values())) 164 | 165 | def mean_results(self): 166 | return self.metric_box.mean_results() + self.metric_mask.mean_results() 167 | 168 | def class_result(self, i): 169 | return self.metric_box.class_result(i) + self.metric_mask.class_result(i) 170 | 171 | def get_maps(self, nc): 172 | return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) 173 | 174 | @property 175 | def ap_class_index(self): 176 | # boxes and masks have the same ap_class_index 177 | return self.metric_box.ap_class_index 178 | 179 | 180 | KEYS = [ 181 | "train/box_loss", 182 | "train/seg_loss", # train loss 183 | "train/obj_loss", 184 | "train/cls_loss", 185 | "metrics/precision(B)", 186 | "metrics/recall(B)", 187 | "metrics/mAP_0.5(B)", 188 | "metrics/mAP_0.5:0.95(B)", # metrics 189 | "metrics/precision(M)", 190 | "metrics/recall(M)", 191 | "metrics/mAP_0.5(M)", 192 | "metrics/mAP_0.5:0.95(M)", # metrics 193 | "val/box_loss", 194 | "val/seg_loss", # val loss 195 | "val/obj_loss", 196 | "val/cls_loss", 197 | "x/lr0", 198 | "x/lr1", 199 | "x/lr2",] 200 | 201 | BEST_KEYS = [ 202 | "best/epoch", 203 | "best/precision(B)", 204 | "best/recall(B)", 205 | "best/mAP_0.5(B)", 206 | "best/mAP_0.5:0.95(B)", 207 | "best/precision(M)", 208 | "best/recall(M)", 209 | "best/mAP_0.5(M)", 210 | "best/mAP_0.5:0.95(M)",] 211 | -------------------------------------------------------------------------------- /utils/segment/plots.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import math 3 | from pathlib import Path 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | import torch 10 | 11 | from .. import threaded 12 | from ..general import xywh2xyxy 13 | from ..plots import Annotator, colors 14 | 15 | 16 | @threaded 17 | def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): 18 | # Plot image grid with labels 19 | if isinstance(images, torch.Tensor): 20 | images = images.cpu().float().numpy() 21 | if isinstance(targets, torch.Tensor): 22 | targets = targets.cpu().numpy() 23 | if isinstance(masks, torch.Tensor): 24 | masks = masks.cpu().numpy().astype(int) 25 | 26 | max_size = 1920 # max image size 27 | max_subplots = 16 # max image subplots, i.e. 4x4 28 | bs, _, h, w = images.shape # batch size, _, height, width 29 | bs = min(bs, max_subplots) # limit plot images 30 | ns = np.ceil(bs ** 0.5) # number of subplots (square) 31 | if np.max(images[0]) <= 1: 32 | images *= 255 # de-normalise (optional) 33 | 34 | # Build Image 35 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init 36 | for i, im in enumerate(images): 37 | if i == max_subplots: # if last batch has fewer images than we expect 38 | break 39 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 40 | im = im.transpose(1, 2, 0) 41 | mosaic[y:y + h, x:x + w, :] = im 42 | 43 | # Resize (optional) 44 | scale = max_size / ns / max(h, w) 45 | if scale < 1: 46 | h = math.ceil(scale * h) 47 | w = math.ceil(scale * w) 48 | mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) 49 | 50 | # Annotate 51 | fs = int((h + w) * ns * 0.01) # font size 52 | annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) 53 | for i in range(i + 1): 54 | x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin 55 | annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders 56 | if paths: 57 | annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames 58 | if len(targets) > 0: 59 | idx = targets[:, 0] == i 60 | ti = targets[idx] # image targets 61 | 62 | boxes = xywh2xyxy(ti[:, 2:6]).T 63 | classes = ti[:, 1].astype('int') 64 | labels = ti.shape[1] == 6 # labels if no conf column 65 | conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) 66 | 67 | if boxes.shape[1]: 68 | if boxes.max() <= 1.01: # if normalized with tolerance 0.01 69 | boxes[[0, 2]] *= w # scale to pixels 70 | boxes[[1, 3]] *= h 71 | elif scale < 1: # absolute coords need scale if image scales 72 | boxes *= scale 73 | boxes[[0, 2]] += x 74 | boxes[[1, 3]] += y 75 | for j, box in enumerate(boxes.T.tolist()): 76 | cls = classes[j] 77 | color = colors(cls) 78 | cls = names[cls] if names else cls 79 | if labels or conf[j] > 0.25: # 0.25 conf thresh 80 | label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' 81 | annotator.box_label(box, label, color=color) 82 | 83 | # Plot masks 84 | if len(masks): 85 | if masks.max() > 1.0: # mean that masks are overlap 86 | image_masks = masks[[i]] # (1, 640, 640) 87 | nl = len(ti) 88 | index = np.arange(nl).reshape(nl, 1, 1) + 1 89 | image_masks = np.repeat(image_masks, nl, axis=0) 90 | image_masks = np.where(image_masks == index, 1.0, 0.0) 91 | else: 92 | image_masks = masks[idx] 93 | 94 | im = np.asarray(annotator.im).copy() 95 | for j, box in enumerate(boxes.T.tolist()): 96 | if labels or conf[j] > 0.25: # 0.25 conf thresh 97 | color = colors(classes[j]) 98 | mh, mw = image_masks[j].shape 99 | if mh != h or mw != w: 100 | mask = image_masks[j].astype(np.uint8) 101 | mask = cv2.resize(mask, (w, h)) 102 | mask = mask.astype(bool) 103 | else: 104 | mask = image_masks[j].astype(bool) 105 | with contextlib.suppress(Exception): 106 | im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 107 | annotator.fromarray(im) 108 | annotator.im.save(fname) # save 109 | 110 | 111 | def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): 112 | # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') 113 | save_dir = Path(file).parent if file else Path(dir) 114 | fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) 115 | ax = ax.ravel() 116 | files = list(save_dir.glob("results*.csv")) 117 | assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." 118 | for f in files: 119 | try: 120 | data = pd.read_csv(f) 121 | index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 122 | 0.1 * data.values[:, 11]) 123 | s = [x.strip() for x in data.columns] 124 | x = data.values[:, 0] 125 | for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): 126 | y = data.values[:, j] 127 | # y[y == 0] = np.nan # don't show zero values 128 | ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) 129 | if best: 130 | # best 131 | ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) 132 | ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") 133 | else: 134 | # last 135 | ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) 136 | ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") 137 | # if j in [8, 9, 10]: # share train and val loss y axes 138 | # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) 139 | except Exception as e: 140 | print(f"Warning: Plotting error for {f}: {e}") 141 | ax[1].legend() 142 | fig.savefig(save_dir / "results.png", dpi=200) 143 | plt.close() 144 | -------------------------------------------------------------------------------- /utils/triton.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 🚀 by Ultralytics, GPL-3.0 license 2 | """ Utils to interact with the Triton Inference Server 3 | """ 4 | 5 | import typing 6 | from urllib.parse import urlparse 7 | 8 | import torch 9 | 10 | 11 | class TritonRemoteModel: 12 | """ A wrapper over a model served by the Triton Inference Server. It can 13 | be configured to communicate over GRPC or HTTP. It accepts Torch Tensors 14 | as input and returns them as outputs. 15 | """ 16 | 17 | def __init__(self, url: str): 18 | """ 19 | Keyword arguments: 20 | url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 21 | """ 22 | 23 | parsed_url = urlparse(url) 24 | if parsed_url.scheme == "grpc": 25 | from tritonclient.grpc import InferenceServerClient, InferInput 26 | 27 | self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client 28 | model_repository = self.client.get_model_repository_index() 29 | self.model_name = model_repository.models[0].name 30 | self.metadata = self.client.get_model_metadata(self.model_name, as_json=True) 31 | 32 | def create_input_placeholders() -> typing.List[InferInput]: 33 | return [ 34 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 35 | 36 | else: 37 | from tritonclient.http import InferenceServerClient, InferInput 38 | 39 | self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client 40 | model_repository = self.client.get_model_repository_index() 41 | self.model_name = model_repository[0]['name'] 42 | self.metadata = self.client.get_model_metadata(self.model_name) 43 | 44 | def create_input_placeholders() -> typing.List[InferInput]: 45 | return [ 46 | InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] 47 | 48 | self._create_input_placeholders_fn = create_input_placeholders 49 | 50 | @property 51 | def runtime(self): 52 | """Returns the model runtime""" 53 | return self.metadata.get("backend", self.metadata.get("platform")) 54 | 55 | def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]: 56 | """ Invokes the model. Parameters can be provided via args or kwargs. 57 | args, if provided, are assumed to match the order of inputs of the model. 58 | kwargs are matched with the model input names. 59 | """ 60 | inputs = self._create_inputs(*args, **kwargs) 61 | response = self.client.infer(model_name=self.model_name, inputs=inputs) 62 | result = [] 63 | for output in self.metadata['outputs']: 64 | tensor = torch.as_tensor(response.as_numpy(output['name'])) 65 | result.append(tensor) 66 | return result[0] if len(result) == 1 else result 67 | 68 | def _create_inputs(self, *args, **kwargs): 69 | args_len, kwargs_len = len(args), len(kwargs) 70 | if not args_len and not kwargs_len: 71 | raise RuntimeError("No inputs provided.") 72 | if args_len and kwargs_len: 73 | raise RuntimeError("Cannot specify args and kwargs at the same time") 74 | 75 | placeholders = self._create_input_placeholders_fn() 76 | if args_len: 77 | if args_len != len(placeholders): 78 | raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.") 79 | for input, value in zip(placeholders, args): 80 | input.set_data_from_numpy(value.cpu().numpy()) 81 | else: 82 | for input in placeholders: 83 | value = kwargs[input.name] 84 | input.set_data_from_numpy(value.cpu().numpy()) 85 | return placeholders 86 | --------------------------------------------------------------------------------