├── models ├── __init__.py ├── yolov4-tiny.yaml ├── hub │ ├── yolov3-tiny.yaml │ ├── yolov5-fpn.yaml │ ├── yolov5-panet.yaml │ ├── yolov3.yaml │ ├── yolov3-spp.yaml │ ├── yolov5-p2.yaml │ ├── yolov5-p6.yaml │ ├── yolov5-p7.yaml │ └── anchors.yaml ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml ├── yolov5x.yaml ├── yolov5s_conv.yaml ├── yolov5s_fangweisui.yaml ├── yolov5s_xinglixiang.yaml ├── yolov5s_ghostnet.yaml ├── yolov5s_noUpsample.yaml ├── yolov5s_mobilenetv2_1280.yaml ├── yolov5s_mobilenetv2_1024.yaml ├── export_onnx.py ├── export_prune_onnx.py ├── export.py ├── experimental.py ├── export_plugin_onnx.py └── yolo.py ├── utils ├── __init__.py ├── google_app_engine │ ├── additional_requirements.txt │ ├── app.yaml │ └── Dockerfile ├── activations.py ├── google_utils.py ├── prune_utils.py ├── distill_utils.py ├── autoanchor.py ├── metrics.py └── torch_utils.py ├── train.sh ├── data ├── coco_hand.yaml ├── hyp.scratch.yaml └── converter.py ├── script ├── train.sh ├── export_prune_onnx.sh ├── prune_detect.sh ├── train_sparsity.sh ├── prune_finetune.sh ├── prune_finetune_distill.sh └── plugin_onnx_example.py ├── train_sparsity.sh ├── weights ├── simple_model.sh └── download_weights.sh ├── requirements.txt ├── Dockerfile ├── test.txt ├── cfg ├── yolov4-tiny.cfg ├── prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg ├── yolov5s_v4_hand.cfg └── yolov5s_v4.cfg ├── .gitignore ├── hubconf.py ├── detect.py ├── prune_detect.py └── README.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | python train.py --img 416 --batch 16 --epochs 50 --weights weights/yolov5s_v4.pt --data data/coco_hand.yaml --cfg models/yolov5s.yaml --name s_hand --cache -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /data/coco_hand.yaml: -------------------------------------------------------------------------------- 1 | train: data/train.txt # 128 images 2 | val: data/valid.txt # 128 images 3 | 4 | # number of classes 5 | nc: 1 6 | 7 | # class names 8 | names: [ 'hand' ] 9 | -------------------------------------------------------------------------------- /script/train.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | python train.py --img 640 --batch 8 --epochs 50 --weights weights/yolov5s_v4.pt --data data/coco_hand.yaml --cfg models/yolov5s.yaml --name s_hand 3 | cd script -------------------------------------------------------------------------------- /script/export_prune_onnx.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | export PYTHONPATH="$PWD" && python models/export_prune_onnx.py --weights runs/train/s_hand_finetune_distill/weights/last.pt --img 640 --batch 1 3 | cd script -------------------------------------------------------------------------------- /train_sparsity.sh: -------------------------------------------------------------------------------- 1 | python train_sparsity.py --img 416 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg models/yolov5s.yaml --weights runs/train/s_hand/weights/last.pt --name s_hand_sparsity -sr --s 0.001 --prune 1 -------------------------------------------------------------------------------- /script/prune_detect.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | python prune_detect.py --weights weights/last_s_hand_finetune.pt --img 640 --conf 0.7 --save-txt --source /home/lishuang/Disk/gitlab/traincode/yolov5/data/hand_dataset/images/test 3 | cd script -------------------------------------------------------------------------------- /weights/simple_model.sh: -------------------------------------------------------------------------------- 1 | #refer to (https://github.com/onnx/onnx-tensorrt/blob/master/docs/faq.md#inputsat0-must-be-an-initializer-or-inputsat0is_weights) 2 | polygraphy surgeon sanitize yolov5s.onnx --fold-constants --output model_folded.onnx 3 | -------------------------------------------------------------------------------- /script/train_sparsity.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | python train_sparsity.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg models/yolov5s.yaml --weights runs/train/s_hand/weights/last.pt --name s_hand_sparsity -sr --s 0.001 --prune 1 3 | cd script 4 | 5 | -------------------------------------------------------------------------------- /script/prune_finetune.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | python prune_finetune.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune 3 | cd script -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /script/prune_finetune_distill.sh: -------------------------------------------------------------------------------- 1 | cd .. 2 | python prune_finetune.py --img 640 --batch 6 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune_distill --distill 3 | cd script -------------------------------------------------------------------------------- /weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download latest models from https://github.com/ultralytics/yolov5/releases 3 | # Usage: 4 | # $ bash weights/download_weights.sh 5 | 6 | python - <=3.2.2 6 | numpy>=1.18.5 7 | opencv-python>=4.1.2 8 | Pillow 9 | PyYAML>=5.3 10 | scipy>=1.4.1 11 | tensorboard>=2.2 12 | torch>=1.7.0 13 | torchvision>=0.8.1 14 | tqdm>=4.41.0 15 | 16 | # logging ------------------------------------- 17 | # wandb 18 | 19 | # plotting ------------------------------------ 20 | seaborn>=0.11.0 21 | pandas 22 | 23 | # export -------------------------------------- 24 | # coremltools==4.0 25 | # onnx>=1.8.0 26 | # scikit-learn==0.19.2 # for coreml quantization 27 | 28 | # extras -------------------------------------- 29 | thop # FLOPS computation 30 | pycocotools>=2.0 # COCO mAP 31 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /models/yolov4-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | backbone: 12 | [[-1,1,Conv,[32,3,2,None,1,nn.LeakyReLU]], # 0 13 | [-1,1,Conv,[64, 3, 2,None,1,nn.LeakyReLU]], 14 | [-1,1,Conv,[64,3,1,None,1,nn.LeakyReLU]], # 2 15 | [-1,1,Ctiny,[64]], 16 | [[2, -1], 1, Concat, [1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], 18 | [-1,1,Conv,[128,3,1,None,1,nn.LeakyReLU]], # 6 19 | [-1,1,Ctiny,[128]], 20 | [[6, -1], 1, Concat, [1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], 22 | [-1,1,Conv,[256,3,1,None,1,nn.LeakyReLU]], # 10 23 | [-1,1,Ctiny,[256]], # 11 24 | [[10, -1], 1, Concat, [1]], 25 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], 26 | ] 27 | 28 | head: 29 | [[-1,1,Conv,[512,3,1,None,1,nn.LeakyReLU]], 30 | [-1,1,Conv,[256,1,1,None,1,nn.LeakyReLU]], 31 | [-1,1,Conv,[512,3,1,None,1,nn.LeakyReLU]], #16 32 | 33 | [-2,1,Conv,[128,1,1,None,1,nn.LeakyReLU]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 11], 1, Concat, [1]], 36 | [-1,1,Conv,[256,3,1,None,1,nn.LeakyReLU]], #20 37 | 38 | [[20, 16], 1, Detect, [nc, anchors]], 39 | ] -------------------------------------------------------------------------------- /script/plugin_onnx_example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | 5 | class SiLUImplementtation(torch.autograd.Function): 6 | # 主要是这里,对于autograd.Function这种自定义实现的op,只需要添加静态方法symbolic即可,除了g以外的参数应与forward函数的除ctx以外完全一样 7 | #“SiLU”作为插件名称 8 | @staticmethod 9 | def symbolic(g, input): 10 | return g.op("SiLU", input) 11 | 12 | def forward(self, x): 13 | return x * torch.sigmoid(x) 14 | 15 | #省略了backward 16 | 17 | class customSiLU(nn.Module): 18 | def forward(self, x): 19 | return SiLUImplementtation.apply(x) 20 | 21 | 22 | class FooModel(torch.nn.Module): 23 | def __init__(self): 24 | super(FooModel, self).__init__() 25 | self.SiLU = customSiLU() 26 | 27 | def forward(self, input1, input2): 28 | return input2 + self.SiLU(input1) 29 | 30 | 31 | dummy_input1 = torch.zeros((1, 3, 3, 3)) 32 | dummy_input2 = torch.zeros((1, 1, 3, 3)) 33 | model = FooModel() 34 | 35 | # 这里演示了2个输入的情况,实际上你可以自己定义几个输入 36 | # torch高版本需添加operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK来导出自定义层,参见torch.onnx官方文档 37 | torch.onnx.export(model, (dummy_input1, dummy_input2), 'test.onnx', verbose=True, opset_version=12, 38 | operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK) -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_conv.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_fangweisui.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 2 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [40, 39, 51, 50, 61, 59] # P3/8 9 | - [75, 69, 62, 92, 88, 98] # P4/16 10 | - [115, 77, 93, 129, 128, 115] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_xinglixiang.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 2 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [24,21, 49,28, 68,41] # P3/8 9 | - [86,56, 103,74, 124,94] # P4/16 10 | - [185,85, 158,121, 214,137] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /data/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 10.0 # image rotation (+/- deg) 26 | translate: 0.0 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.5 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.5 # image mixup (probability) -------------------------------------------------------------------------------- /models/yolov5s_ghostnet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, GhostBottleneck, [ 128, 3, 1]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, GhostBottleneck, [ 256, 3, 1]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, GhostBottleneck, [ 512, 3, 1]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, GhostBottleneck, [ 1024, 3, 1]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, GhostBottleneck, [ 512, 3, 1]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, GhostBottleneck, [ 256, 3, 1]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, GhostBottleneck, [ 512, 3, 1]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, GhostBottleneck, [ 1024, 3, 1]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s_noUpsample.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | # [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [-1, 1, nn.ConvTranspose2d, [512, 4, 2, 1, 0, 1, False]], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 3, C3, [512, False]], # 13 34 | 35 | [-1, 1, Conv, [256, 1, 1]], 36 | # [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [-1, 1, nn.ConvTranspose2d, [512, 4, 2, 1, 0, 1, False]], 38 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 39 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 40 | 41 | [-1, 1, Conv, [256, 3, 2]], 42 | [[-1, 14], 1, Concat, [1]], # cat head P4 43 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 44 | 45 | [-1, 1, Conv, [512, 3, 2]], 46 | [[-1, 10], 1, Concat, [1]], # cat head P5 47 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 48 | 49 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 50 | ] 51 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-p2.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 3 8 | 9 | # YOLOv5 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 14 | [ -1, 3, C3, [ 128 ] ], 15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 16 | [ -1, 9, C3, [ 256 ] ], 17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 18 | [ -1, 9, C3, [ 512 ] ], 19 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32 20 | [ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ], 21 | [ -1, 3, C3, [ 1024, False ] ], # 9 22 | ] 23 | 24 | # YOLOv5 head 25 | head: 26 | [ [ -1, 1, Conv, [ 512, 1, 1 ] ], 27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 28 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 29 | [ -1, 3, C3, [ 512, False ] ], # 13 30 | 31 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 34 | [ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small) 35 | 36 | [ -1, 1, Conv, [ 128, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2 39 | [ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall) 40 | 41 | [ -1, 1, Conv, [ 128, 3, 2 ] ], 42 | [ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3 43 | [ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small) 44 | 45 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 46 | [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4 47 | [ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium) 48 | 49 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 50 | [ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5 51 | [ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large) 52 | 53 | [ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5) 54 | ] 55 | -------------------------------------------------------------------------------- /models/hub/yolov5-p6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 3 8 | 9 | # YOLOv5 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 14 | [ -1, 3, C3, [ 128 ] ], 15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 16 | [ -1, 9, C3, [ 256 ] ], 17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 18 | [ -1, 9, C3, [ 512 ] ], 19 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 20 | [ -1, 3, C3, [ 768 ] ], 21 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 22 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 23 | [ -1, 3, C3, [ 1024, False ] ], # 11 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 29 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 30 | [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 31 | [ -1, 3, C3, [ 768, False ] ], # 15 32 | 33 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 34 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 35 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 36 | [ -1, 3, C3, [ 512, False ] ], # 19 37 | 38 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 39 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 40 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 41 | [ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small) 42 | 43 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 44 | [ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4 45 | [ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium) 46 | 47 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 48 | [ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5 49 | [ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large) 50 | 51 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 52 | [ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6 53 | [ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge) 54 | 55 | [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 56 | ] 57 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:20.12-py3 3 | 4 | # Install linux packages 5 | RUN apt update && apt install -y screen libgl1-mesa-glx 6 | 7 | # Install python dependencies 8 | RUN pip install --upgrade pip 9 | COPY requirements.txt . 10 | RUN pip install -r requirements.txt 11 | RUN pip install gsutil 12 | 13 | # Create working directory 14 | RUN mkdir -p /usr/src/app 15 | WORKDIR /usr/src/app 16 | 17 | # Copy contents 18 | COPY . /usr/src/app 19 | 20 | # Copy weights 21 | #RUN python3 -c "from models import *; \ 22 | #attempt_download('weights/yolov5s.pt'); \ 23 | #attempt_download('weights/yolov5m.pt'); \ 24 | #attempt_download('weights/yolov5l.pt')" 25 | 26 | 27 | # --------------------------------------------------- Extras Below --------------------------------------------------- 28 | 29 | # Build and Push 30 | # t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t 31 | # for v in {300..303}; do t=ultralytics/coco:v$v && sudo docker build -t $t . && sudo docker push $t; done 32 | 33 | # Pull and Run 34 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t 35 | 36 | # Pull and Run with local directory access 37 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t 38 | 39 | # Kill all 40 | # sudo docker kill $(sudo docker ps -q) 41 | 42 | # Kill all image-based 43 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest) 44 | 45 | # Bash into running container 46 | # sudo docker container exec -it ba65811811ab bash 47 | 48 | # Bash into stopped container 49 | # sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume 50 | 51 | # Send weights to GCP 52 | # python -c "from utils.general import *; strip_optimizer('runs/train/exp0_*/weights/best.pt', 'tmp.pt')" && gsutil cp tmp.pt gs://*.pt 53 | 54 | # Clean up 55 | # docker system prune -a --volumes 56 | -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- 1 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 2 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.1 3 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.15 4 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.2 5 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.25 6 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.3 7 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.35 8 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.4 9 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.45 10 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.5 11 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.55 12 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.6 13 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.65 14 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.7 15 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.75 16 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.8 17 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.85 18 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.9 19 | python test.py --weights runs/train/s_xinglixiang/weights/last.pt --data data/coco_xinglixiang.yaml --img 416 --conf-thres 0.95 20 | -------------------------------------------------------------------------------- /models/hub/yolov5-p7.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 3 8 | 9 | # YOLOv5 backbone 10 | backbone: 11 | # [from, number, module, args] 12 | [ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2 13 | [ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4 14 | [ -1, 3, C3, [ 128 ] ], 15 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8 16 | [ -1, 9, C3, [ 256 ] ], 17 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16 18 | [ -1, 9, C3, [ 512 ] ], 19 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32 20 | [ -1, 3, C3, [ 768 ] ], 21 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64 22 | [ -1, 3, C3, [ 1024 ] ], 23 | [ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128 24 | [ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ], 25 | [ -1, 3, C3, [ 1280, False ] ], # 13 26 | ] 27 | 28 | # YOLOv5 head 29 | head: 30 | [ [ -1, 1, Conv, [ 1024, 1, 1 ] ], 31 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 32 | [ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6 33 | [ -1, 3, C3, [ 1024, False ] ], # 17 34 | 35 | [ -1, 1, Conv, [ 768, 1, 1 ] ], 36 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 37 | [ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5 38 | [ -1, 3, C3, [ 768, False ] ], # 21 39 | 40 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 41 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 42 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 43 | [ -1, 3, C3, [ 512, False ] ], # 25 44 | 45 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 46 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 47 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 48 | [ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small) 49 | 50 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 51 | [ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4 52 | [ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium) 53 | 54 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 55 | [ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5 56 | [ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large) 57 | 58 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 59 | [ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6 60 | [ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge) 61 | 62 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], 63 | [ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7 64 | [ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge) 65 | 66 | [ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7) 67 | ] 68 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /models/yolov5s_mobilenetv2_1280.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | #depth_multiple: 1 # model depth multiple 4 | #width_multiple: 1 # layer channel multiple 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | 8 | # anchors 9 | anchors: 10 | - [10,13, 16,30, 33,23] # P3/8 11 | - [30,61, 62,45, 59,119] # P4/16 12 | - [116,90, 156,198, 373,326] # P5/32 13 | 14 | # YOLOv5 backbone 15 | backbone: 16 | # [from, number, module, args] 17 | [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2 32x320x320 18 | [-1, 1, InvertedResidual, [16, 1, 1]], # 1 16x320x320 19 | [-1, 1, InvertedResidual, [24, 2, 6]], # 2-P2/4 24x160x160 20 | [-1, 1, InvertedResidual, [24, 1, 6]], # 3-P2/4 24x160x160 21 | [-1, 1, InvertedResidual, [32, 2, 6]], # 4-P3/8 32x80x80 22 | [-1, 1, InvertedResidual, [32, 1, 6]], # 5-P3/8 32x80x80 23 | [-1, 1, InvertedResidual, [32, 1, 6]], # 6-P3/8 32x80x80 24 | [-1, 1, InvertedResidual, [64, 2, 6]], # 7-P4/16 64x40x40 25 | [-1, 1, InvertedResidual, [64, 1, 6]], # 8-P4/16 64x40x40 26 | [-1, 1, InvertedResidual, [64, 1, 6]], # 9-P4/16 64x40x40 27 | [-1, 1, InvertedResidual, [64, 1, 6]], # 10-P4/16 64x40x40 28 | [-1, 1, InvertedResidual, [96, 1, 6]], # 11 96X40X40 29 | [-1, 1, InvertedResidual, [96, 1, 6]], # 12 96X40X40 30 | [-1, 1, InvertedResidual, [96, 1, 6]], # 13 96X40X40 31 | [-1, 1, InvertedResidual, [160, 2, 6]], # 14-P5/32 160X20X20 32 | [-1, 1, InvertedResidual, [160, 1, 6]], # 15-P5/32 160X20X20 33 | [-1, 1, InvertedResidual, [160, 1, 6]], # 16-P5/32 160X20X20 34 | [-1, 1, InvertedResidual, [320, 1, 6]], # 17 320X20X20 35 | [-1, 1, Conv, [1280, 1, 1]], # 7-P5/32 36 | [-1, 1, SPP, [1024, [5, 9, 13]]], #19 37 | [-1, 3, C3, [1024, False]], # 29 38 | ] 39 | # YOLOv5 head 40 | head: 41 | [[-1, 1, Conv, [512, 1, 1]], 42 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 43 | [[-1, 13], 1, Concat, [1]], # cat backbone P4 44 | [-1, 3, C3, [512, False]], # 24 45 | 46 | [-1, 1, Conv, [256, 1, 1]], 47 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 48 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 49 | [-1, 3, C3, [256, False]], # 28 (P3/8-small) 50 | 51 | [-1, 1, Conv, [256, 3, 2]], 52 | [[-1, 25], 1, Concat, [1]], # cat head P4 53 | [-1, 3, C3, [512, False]], # 31 (P4/16-medium) 54 | 55 | [-1, 1, Conv, [512, 3, 2]], 56 | [[-1, 21], 1, Concat, [1]], # cat head P5 57 | [-1, 3, C3, [1024, False]], # 34 (P5/32-large) 58 | 59 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 60 | ] 61 | -------------------------------------------------------------------------------- /models/yolov5s_mobilenetv2_1024.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | #depth_multiple: 1 # model depth multiple 4 | #width_multiple: 1 # layer channel multiple 5 | depth_multiple: 0.33 # model depth multiple 6 | width_multiple: 0.50 # layer channel multiple 7 | 8 | # anchors 9 | anchors: 10 | - [10,13, 16,30, 33,23] # P3/8 11 | - [30,61, 62,45, 59,119] # P4/16 12 | - [116,90, 156,198, 373,326] # P5/32 13 | 14 | # YOLOv5 backbone 15 | backbone: 16 | # [from, number, module, args] 17 | [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2 32x320x320 18 | [-1, 1, InvertedResidual, [16, 1, 1]], # 1 16x320x320 19 | [-1, 1, InvertedResidual, [24, 2, 6]], # 2-P2/4 24x160x160 20 | [-1, 1, InvertedResidual, [24, 1, 6]], # 3-P2/4 24x160x160 21 | [-1, 1, InvertedResidual, [32, 2, 6]], # 4-P3/8 32x80x80 22 | [-1, 1, InvertedResidual, [32, 1, 6]], # 5-P3/8 32x80x80 23 | [-1, 1, InvertedResidual, [32, 1, 6]], # 6-P3/8 32x80x80 24 | [-1, 1, InvertedResidual, [64, 2, 6]], # 7-P4/16 64x40x40 25 | [-1, 1, InvertedResidual, [64, 1, 6]], # 8-P4/16 64x40x40 26 | [-1, 1, InvertedResidual, [64, 1, 6]], # 9-P4/16 64x40x40 27 | [-1, 1, InvertedResidual, [64, 1, 6]], # 10-P4/16 64x40x40 28 | [-1, 1, InvertedResidual, [96, 1, 6]], # 11 96X40X40 29 | [-1, 1, InvertedResidual, [96, 1, 6]], # 12 96X40X40 30 | [-1, 1, InvertedResidual, [96, 1, 6]], # 13 96X40X40 31 | [-1, 1, InvertedResidual, [160, 2, 6]], # 14-P5/32 160X20X20 32 | [-1, 1, InvertedResidual, [160, 1, 6]], # 15-P5/32 160X20X20 33 | [-1, 1, InvertedResidual, [160, 1, 6]], # 16-P5/32 160X20X20 34 | [-1, 1, InvertedResidual, [320, 1, 6]], # 17 320X20X20 35 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 36 | [-1, 1, SPP, [1024, [5, 9, 13]]], #19 37 | [-1, 3, C3, [1024, False]], # 29 38 | ] 39 | # YOLOv5 head 40 | head: 41 | [[-1, 1, Conv, [512, 1, 1]], 42 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 43 | [[-1, 17], 1, Concat, [1]], # cat backbone P4 44 | [-1, 3, C3, [512, False]], # 24 45 | 46 | [-1, 1, Conv, [256, 1, 1]], 47 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 48 | [[-1, 13], 1, Concat, [1]], # cat backbone P3 49 | [-1, 3, C3, [256, False]], # 28 (P3/8-small) 50 | 51 | [-1, 1, Conv, [256, 3, 2]], 52 | [[-1, 25], 1, Concat, [1]], # cat head P4 53 | [-1, 3, C3, [512, False]], # 31 (P4/16-medium) 54 | 55 | [-1, 1, Conv, [512, 3, 2]], 56 | [[-1, 21], 1, Concat, [1]], # cat head P5 57 | [-1, 3, C3, [1024, False]], # 34 (P5/32-large) 58 | 59 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 60 | ] 61 | -------------------------------------------------------------------------------- /models/hub/anchors.yaml: -------------------------------------------------------------------------------- 1 | # Default YOLOv5 anchors for COCO data 2 | 3 | 4 | # P5 ------------------------------------------------------------------------------------------------------------------- 5 | # P5-640: 6 | anchors_p5_640: 7 | - [ 10,13, 16,30, 33,23 ] # P3/8 8 | - [ 30,61, 62,45, 59,119 ] # P4/16 9 | - [ 116,90, 156,198, 373,326 ] # P5/32 10 | 11 | 12 | # P6 ------------------------------------------------------------------------------------------------------------------- 13 | # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 14 | anchors_p6_640: 15 | - [ 9,11, 21,19, 17,41 ] # P3/8 16 | - [ 43,32, 39,70, 86,64 ] # P4/16 17 | - [ 65,131, 134,130, 120,265 ] # P5/32 18 | - [ 282,180, 247,354, 512,387 ] # P6/64 19 | 20 | # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 21 | anchors_p6_1280: 22 | - [ 19,27, 44,40, 38,94 ] # P3/8 23 | - [ 96,68, 86,152, 180,137 ] # P4/16 24 | - [ 140,301, 303,264, 238,542 ] # P5/32 25 | - [ 436,615, 739,380, 925,792 ] # P6/64 26 | 27 | # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 28 | anchors_p6_1920: 29 | - [ 28,41, 67,59, 57,141 ] # P3/8 30 | - [ 144,103, 129,227, 270,205 ] # P4/16 31 | - [ 209,452, 455,396, 358,812 ] # P5/32 32 | - [ 653,922, 1109,570, 1387,1187 ] # P6/64 33 | 34 | 35 | # P7 ------------------------------------------------------------------------------------------------------------------- 36 | # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 37 | anchors_p7_640: 38 | - [ 11,11, 13,30, 29,20 ] # P3/8 39 | - [ 30,46, 61,38, 39,92 ] # P4/16 40 | - [ 78,80, 146,66, 79,163 ] # P5/32 41 | - [ 149,150, 321,143, 157,303 ] # P6/64 42 | - [ 257,402, 359,290, 524,372 ] # P7/128 43 | 44 | # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 45 | anchors_p7_1280: 46 | - [ 19,22, 54,36, 32,77 ] # P3/8 47 | - [ 70,83, 138,71, 75,173 ] # P4/16 48 | - [ 165,159, 148,334, 375,151 ] # P5/32 49 | - [ 334,317, 251,626, 499,474 ] # P6/64 50 | - [ 750,326, 534,814, 1079,818 ] # P7/128 51 | 52 | # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 53 | anchors_p7_1920: 54 | - [ 29,34, 81,55, 47,115 ] # P3/8 55 | - [ 105,124, 207,107, 113,259 ] # P4/16 56 | - [ 247,238, 222,500, 563,227 ] # P5/32 57 | - [ 501,476, 376,939, 749,711 ] # P6/64 58 | - [ 1126,489, 801,1222, 1618,1227 ] # P7/128 59 | -------------------------------------------------------------------------------- /models/export_onnx.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export_onnx.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # ONNX export 57 | try: 58 | import onnx 59 | from onnxsim import simplify 60 | 61 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 62 | f = opt.weights.replace('.pt', '.onnx') # filename 63 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 64 | output_names=['classes', 'boxes'] if y is None else ['output']) 65 | 66 | # Checks 67 | onnx_model = onnx.load(f) # load onnx model 68 | onnx.checker.check_model(onnx_model) # check onnx model 69 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 70 | 71 | sim_f = opt.weights.replace('.pt', '.sim.onnx') # filename 72 | model_simp, check = simplify(onnx_model) 73 | assert check, "Simplified ONNX model could not be validated" 74 | onnx.save(model_simp, sim_f) 75 | 76 | # print(onnx.helper.printable_graph(model_simp.graph)) # print a human readable model 77 | print('ONNX export success, saved as %s' % f) 78 | except Exception as e: 79 | print('ONNX export failure: %s' % e) 80 | 81 | # Finish 82 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 83 | -------------------------------------------------------------------------------- /models/export_prune_onnx.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export_prune_onnx.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | # gs = int(max(model.stride)) # grid size (max stride) 38 | # opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | elif isinstance(m, nn.Sequential): 52 | if len(m)>1 and isinstance(m[1], nn.SiLU): 53 | m[1] = SiLU() 54 | # m=SiLU() 55 | # elif isinstance(m, models.yolo.Detect): 56 | # m.forward = m.forward_export # assign forward (optional) 57 | # model.model[-1].export = True # set Detect() layer export=True 58 | y = model(img) # dry run 59 | 60 | # ONNX export 61 | try: 62 | import onnx 63 | from onnxsim import simplify 64 | 65 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 66 | f = opt.weights.replace('.pt', '.onnx') # filename 67 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 68 | output_names=['classes', 'boxes'] if y is None else ['output']) 69 | 70 | # Checks 71 | onnx_model = onnx.load(f) # load onnx model 72 | onnx.checker.check_model(onnx_model) # check onnx model 73 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 74 | 75 | sim_f = opt.weights.replace('.pt', '.sim.onnx') # filename 76 | model_simp, check = simplify(onnx_model) 77 | assert check, "Simplified ONNX model could not be validated" 78 | onnx.save(model_simp, sim_f) 79 | 80 | # print(onnx.helper.printable_graph(model_simp.graph)) # print a human readable model 81 | print('ONNX export success, saved as %s' % f) 82 | except Exception as e: 83 | print('ONNX export failure: %s' % e) 84 | 85 | # Finish 86 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 87 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # TorchScript export 57 | try: 58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 60 | ts = torch.jit.trace(model, img) 61 | ts.save(f) 62 | print('TorchScript export success, saved as %s' % f) 63 | except Exception as e: 64 | print('TorchScript export failure: %s' % e) 65 | 66 | # ONNX export 67 | try: 68 | import onnx 69 | 70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 71 | f = opt.weights.replace('.pt', '.onnx') # filename 72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 73 | output_names=['classes', 'boxes'] if y is None else ['output']) 74 | 75 | # Checks 76 | onnx_model = onnx.load(f) # load onnx model 77 | onnx.checker.check_model(onnx_model) # check onnx model 78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 79 | print('ONNX export success, saved as %s' % f) 80 | except Exception as e: 81 | print('ONNX export failure: %s' % e) 82 | 83 | # CoreML export 84 | try: 85 | import coremltools as ct 86 | 87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 88 | # convert model from torchscript and apply pixel scaling as per detect.py 89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 90 | f = opt.weights.replace('.pt', '.mlmodel') # filename 91 | model.save(f) 92 | print('CoreML export success, saved as %s' % f) 93 | except Exception as e: 94 | print('CoreML export failure: %s' % e) 95 | 96 | # Finish 97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 98 | -------------------------------------------------------------------------------- /data/converter.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from PIL import Image 3 | import os, glob 4 | import datetime 5 | import shutil 6 | 7 | running_from_path = os.getcwd() 8 | created_images_dir = 'images' 9 | created_labels_dir = 'labels' 10 | data_dir = 'data' # data_dir为脚本所在的文件夹 11 | 12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间 13 | h = int(sec_elapsed / (60 * 60)) 14 | m = int((sec_elapsed % (60 * 60)) / 60) 15 | s = sec_elapsed % 60. 16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s) 17 | 18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹 19 | images_dir = os.path.join(root_path, 'images') 20 | annotation_dir = os.path.join(root_path, 'annotations') 21 | 22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下 23 | new_annotation_dir = os.path.join(created_labels_dir, set_name) 24 | 25 | if not os.path.exists(new_images_dir): 26 | os.makedirs(new_images_dir) 27 | 28 | if not os.path.exists(new_annotation_dir): 29 | os.makedirs(new_annotation_dir) 30 | 31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下 32 | shutil.copy(img, new_images_dir) 33 | 34 | os.chdir(annotation_dir) # 切换到annotation的路径下 35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径 36 | os.chdir(running_from_path) # 切换回原来的路径 37 | 38 | for matfile in matlab_annotations: 39 | filename = matfile.split(".")[0] 40 | 41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg")) 42 | 43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False) 44 | 45 | boxes = content["boxes"] 46 | 47 | width, height = pil_image.size 48 | 49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs: 50 | for box_idx, box in enumerate(boxes.T): 51 | a = box[0][0][0][0] 52 | b = box[0][0][0][1] 53 | c = box[0][0][0][2] 54 | d = box[0][0][0][3] 55 | 56 | aXY = (a[0][1], a[0][0]) 57 | bXY = (b[0][1], b[0][0]) 58 | cXY = (c[0][1], c[0][0]) 59 | dXY = (d[0][1], d[0][0]) 60 | 61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0]) 62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0]) 63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1]) 64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1]) 65 | 66 | # clip,防止超出边界 67 | maxX = min(maxX, width-1) 68 | minX = max(minX, 0) 69 | maxY = min(maxY, height-1) 70 | minY = max(minY, 0) 71 | 72 | # ( / ) 73 | norm_width = (maxX - minX) / width 74 | 75 | # ( / ) 76 | norm_height = (maxY - minY) / height 77 | 78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2 79 | 80 | norm_center_x = center_x / width 81 | norm_center_y = center_y / height 82 | 83 | if box_idx != 0: 84 | hs.write("\n") 85 | 86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别 87 | 88 | def create_txt(dirlist, filename): 89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件 90 | imglist = [] 91 | 92 | for dir in dirlist: # dir='images/test' 93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg' 94 | 95 | for idx, img in enumerate(imglist): 96 | if idx != 0: 97 | txtfile.write("\n") 98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data 99 | 100 | if __name__ == '__main__': 101 | start_time = datetime.datetime.now() 102 | 103 | ori_root="../../../data/" 104 | 105 | generate_dir("train", f"{ori_root}hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称 106 | generate_dir("test", f"{ori_root}hand_dataset/test_dataset/test_data") 107 | generate_dir("validation", f"{ori_root}hand_dataset/validation_dataset/validation_data") 108 | 109 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train 110 | os.path.join(created_images_dir, 'validation')), 111 | 'train.txt') 112 | create_txt((os.path.join(created_images_dir, 'test'), ), 113 | 'valid.txt') 114 | 115 | end_time = datetime.datetime.now() 116 | seconds_elapsed = (end_time - start_time).total_seconds() 117 | print("It took {} to execute this".format(hms_string(seconds_elapsed))) 118 | -------------------------------------------------------------------------------- /cfg/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | 21 | max_batches = 2000200 22 | policy=steps 23 | steps=1600000,1800000 24 | scales=.1,.1 25 | 26 | 27 | #weights_reject_freq=1001 28 | #ema_alpha=0.9998 29 | #equidistant_point=1000 30 | #num_sigmas_reject_badlabels=3 31 | #badlabels_rejection_percentage=0.2 32 | 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=32 37 | size=3 38 | stride=2 39 | pad=1 40 | activation=leaky 41 | 42 | [convolutional] 43 | batch_normalize=1 44 | filters=64 45 | size=3 46 | stride=2 47 | pad=1 48 | activation=leaky 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [route] 59 | layers=-1 60 | groups=2 61 | group_id=1 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=32 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=32 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [route] 80 | layers = -1,-2 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=64 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [route] 91 | layers = -6,-1 92 | 93 | [maxpool] 94 | size=2 95 | stride=2 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=128 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [route] 106 | layers=-1 107 | groups=2 108 | group_id=1 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [route] 127 | layers = -1,-2 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=128 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [route] 138 | layers = -6,-1 139 | 140 | [maxpool] 141 | size=2 142 | stride=2 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=256 147 | size=3 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [route] 153 | layers=-1 154 | groups=2 155 | group_id=1 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=128 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [route] 174 | layers = -1,-2 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=256 179 | size=1 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | [route] 185 | layers = -6,-1 186 | 187 | [maxpool] 188 | size=2 189 | stride=2 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=512 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | ################################## 200 | 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=1 205 | stride=1 206 | pad=1 207 | activation=leaky 208 | 209 | [convolutional] 210 | batch_normalize=1 211 | filters=512 212 | size=3 213 | stride=1 214 | pad=1 215 | activation=leaky 216 | 217 | [convolutional] 218 | size=1 219 | stride=1 220 | pad=1 221 | filters=255 222 | activation=linear 223 | 224 | 225 | 226 | [yolo] 227 | mask = 3,4,5 228 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 229 | classes=80 230 | num=6 231 | jitter=.3 232 | scale_x_y = 1.05 233 | cls_normalizer=1.0 234 | iou_normalizer=0.07 235 | iou_loss=ciou 236 | ignore_thresh = .7 237 | truth_thresh = 1 238 | random=0 239 | resize=1.5 240 | nms_kind=greedynms 241 | beta_nms=0.6 242 | #new_coords=1 243 | #scale_x_y = 2.0 244 | 245 | [route] 246 | layers = -4 247 | 248 | [convolutional] 249 | batch_normalize=1 250 | filters=128 251 | size=1 252 | stride=1 253 | pad=1 254 | activation=leaky 255 | 256 | [upsample] 257 | stride=2 258 | 259 | [route] 260 | layers = -1, 23 261 | 262 | [convolutional] 263 | batch_normalize=1 264 | filters=256 265 | size=3 266 | stride=1 267 | pad=1 268 | activation=leaky 269 | 270 | [convolutional] 271 | size=1 272 | stride=1 273 | pad=1 274 | filters=255 275 | activation=linear 276 | 277 | [yolo] 278 | mask = 1,2,3 279 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 280 | classes=80 281 | num=6 282 | jitter=.3 283 | scale_x_y = 1.05 284 | cls_normalizer=1.0 285 | iou_normalizer=0.07 286 | iou_loss=ciou 287 | ignore_thresh = .7 288 | truth_thresh = 1 289 | random=0 290 | resize=1.5 291 | nms_kind=greedynms 292 | beta_nms=0.6 293 | #new_coords=1 294 | #scale_x_y = 2.0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.jpg 3 | *.jpeg 4 | *.png 5 | *.bmp 6 | *.tif 7 | *.tiff 8 | *.heic 9 | *.JPG 10 | *.JPEG 11 | *.PNG 12 | *.BMP 13 | *.TIF 14 | *.TIFF 15 | *.HEIC 16 | *.mp4 17 | *.mov 18 | *.MOV 19 | *.avi 20 | *.data 21 | *.json 22 | *.whl 23 | 24 | 25 | #fangweisui 26 | fangweisui_test.py 27 | fangweisui_test.sh 28 | detect_videofile.py 29 | detect_fangweisui.py 30 | fangweisui_test.txt 31 | prune_finetune.txt 32 | train.txt 33 | 34 | #*.cfg 35 | !cfg/yolov3*.cfg 36 | 37 | storage.googleapis.com 38 | runs/* 39 | data/images 40 | data/labels 41 | data/*.cache 42 | 43 | pycocotools/* 44 | results*.txt 45 | gcp_test*.sh 46 | 47 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 48 | *.m~ 49 | *.mat 50 | !targets*.mat 51 | 52 | # Neural Network weights ----------------------------------------------------------------------------------------------- 53 | *.weights 54 | *.pt 55 | *.onnx 56 | *.mlmodel 57 | *.torchscript 58 | darknet53.conv.74 59 | yolov3-tiny.conv.15 60 | 61 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 62 | # Byte-compiled / optimized / DLL files 63 | __pycache__/ 64 | *.py[cod] 65 | *$py.class 66 | 67 | # C extensions 68 | *.so 69 | 70 | # Distribution / packaging 71 | .Python 72 | env/ 73 | build/ 74 | develop-eggs/ 75 | dist/ 76 | downloads/ 77 | eggs/ 78 | .eggs/ 79 | lib/ 80 | lib64/ 81 | parts/ 82 | sdist/ 83 | var/ 84 | wheels/ 85 | *.egg-info/ 86 | .installed.cfg 87 | *.egg 88 | 89 | # PyInstaller 90 | # Usually these files are written by a python script from a template 91 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 92 | *.manifest 93 | *.spec 94 | 95 | # Installer logs 96 | pip-log.txt 97 | pip-delete-this-directory.txt 98 | 99 | # Unit test / coverage reports 100 | htmlcov/ 101 | .tox/ 102 | .coverage 103 | .coverage.* 104 | .cache 105 | nosetests.xml 106 | coverage.xml 107 | *.cover 108 | .hypothesis/ 109 | 110 | # Translations 111 | *.mo 112 | *.pot 113 | 114 | # Django stuff: 115 | *.log 116 | local_settings.py 117 | 118 | # Flask stuff: 119 | instance/ 120 | .webassets-cache 121 | 122 | # Scrapy stuff: 123 | .scrapy 124 | 125 | # Sphinx documentation 126 | docs/_build/ 127 | 128 | # PyBuilder 129 | target/ 130 | 131 | # Jupyter Notebook 132 | .ipynb_checkpoints 133 | 134 | # pyenv 135 | .python-version 136 | 137 | # celery beat schedule file 138 | celerybeat-schedule 139 | 140 | # SageMath parsed files 141 | *.sage.py 142 | 143 | # dotenv 144 | .env 145 | 146 | # virtualenv 147 | .venv* 148 | venv*/ 149 | ENV*/ 150 | 151 | # Spyder project settings 152 | .spyderproject 153 | .spyproject 154 | 155 | # Rope project settings 156 | .ropeproject 157 | 158 | # mkdocs documentation 159 | /site 160 | 161 | # mypy 162 | .mypy_cache/ 163 | 164 | 165 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 166 | 167 | # General 168 | .DS_Store 169 | .AppleDouble 170 | .LSOverride 171 | 172 | # Icon must end with two \r 173 | Icon 174 | Icon? 175 | 176 | # Thumbnails 177 | ._* 178 | 179 | # Files that might appear in the root of a volume 180 | .DocumentRevisions-V100 181 | .fseventsd 182 | .Spotlight-V100 183 | .TemporaryItems 184 | .Trashes 185 | .VolumeIcon.icns 186 | .com.apple.timemachine.donotpresent 187 | 188 | # Directories potentially created on remote AFP share 189 | .AppleDB 190 | .AppleDesktop 191 | Network Trash Folder 192 | Temporary Items 193 | .apdisk 194 | 195 | 196 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 197 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 198 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 199 | 200 | # User-specific stuff: 201 | .idea/* 202 | .idea/**/workspace.xml 203 | .idea/**/tasks.xml 204 | .idea/dictionaries 205 | .html # Bokeh Plots 206 | .pg # TensorFlow Frozen Graphs 207 | .avi # videos 208 | 209 | # Sensitive or high-churn files: 210 | .idea/**/dataSources/ 211 | .idea/**/dataSources.ids 212 | .idea/**/dataSources.local.xml 213 | .idea/**/sqlDataSources.xml 214 | .idea/**/dynamic.xml 215 | .idea/**/uiDesigner.xml 216 | 217 | # Gradle: 218 | .idea/**/gradle.xml 219 | .idea/**/libraries 220 | 221 | # CMake 222 | cmake-build-debug/ 223 | cmake-build-release/ 224 | 225 | # Mongo Explorer plugin: 226 | .idea/**/mongoSettings.xml 227 | 228 | ## File-based project format: 229 | *.iws 230 | 231 | ## Plugin-specific files: 232 | 233 | # IntelliJ 234 | out/ 235 | 236 | # mpeltonen/sbt-idea plugin 237 | .idea_modules/ 238 | 239 | # JIRA plugin 240 | atlassian-ide-plugin.xml 241 | 242 | # Cursive Clojure plugin 243 | .idea/replstate.xml 244 | 245 | # Crashlytics plugin (for Android Studio and IntelliJ) 246 | com_crashlytics_export_strings.xml 247 | crashlytics.properties 248 | crashlytics-build.properties 249 | fabric.properties 250 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(weights): 20 | # Attempt to download pretrained weights if not found locally 21 | weights = str(weights).strip().replace("'", '') 22 | file = Path(weights).name.lower() 23 | 24 | msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/' 25 | response = requests.get('https://api.github.com/repos/ultralytics/yolov5/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 27 | redundant = False # second download option 28 | 29 | if file in assets and not os.path.isfile(weights): 30 | try: # GitHub 31 | tag = response['tag_name'] # i.e. 'v1.0' 32 | url = f'https://github.com/ultralytics/yolov5/releases/download/{tag}/{file}' 33 | print('Downloading %s to %s...' % (url, weights)) 34 | torch.hub.download_url_to_file(url, weights) 35 | assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check 36 | except Exception as e: # GCP 37 | print('Download error: %s' % e) 38 | assert redundant, 'No secondary mirror' 39 | url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file 40 | print('Downloading %s to %s...' % (url, weights)) 41 | r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights) 42 | finally: 43 | if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check 44 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 45 | print('ERROR: Download failure: %s' % msg) 46 | print('') 47 | return 48 | 49 | 50 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', name='tmp.zip'): 51 | # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download() 52 | t = time.time() 53 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 54 | os.remove(name) if os.path.exists(name) else None # remove existing 55 | os.remove('cookie') if os.path.exists('cookie') else None 56 | 57 | # Attempt file download 58 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 59 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) 60 | if os.path.exists('cookie'): # large file 61 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) 62 | else: # small file 63 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) 64 | r = os.system(s) # execute, capture return 65 | os.remove('cookie') if os.path.exists('cookie') else None 66 | 67 | # Error check 68 | if r != 0: 69 | os.remove(name) if os.path.exists(name) else None # remove partial 70 | print('Download error ') # raise Exception('Download error') 71 | return r 72 | 73 | # Unzip if archive 74 | if name.endswith('.zip'): 75 | print('unzipping... ', end='') 76 | os.system('unzip -q %s' % name) # unzip 77 | os.remove(name) # remove zip to free space 78 | 79 | print('Done (%.1fs)' % (time.time() - t)) 80 | return r 81 | 82 | 83 | def get_token(cookie="./cookie"): 84 | with open(cookie) as f: 85 | for line in f: 86 | if "download" in line: 87 | return line.split()[-1] 88 | return "" 89 | 90 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 91 | # # Uploads a file to a bucket 92 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 93 | # 94 | # storage_client = storage.Client() 95 | # bucket = storage_client.get_bucket(bucket_name) 96 | # blob = bucket.blob(destination_blob_name) 97 | # 98 | # blob.upload_from_filename(source_file_name) 99 | # 100 | # print('File {} uploaded to {}.'.format( 101 | # source_file_name, 102 | # destination_blob_name)) 103 | # 104 | # 105 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 106 | # # Uploads a blob from a bucket 107 | # storage_client = storage.Client() 108 | # bucket = storage_client.get_bucket(bucket_name) 109 | # blob = bucket.blob(source_blob_name) 110 | # 111 | # blob.download_to_filename(destination_file_name) 112 | # 113 | # print('Blob {} downloaded to {}.'.format( 114 | # source_blob_name, 115 | # destination_file_name)) 116 | -------------------------------------------------------------------------------- /utils/prune_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def get_sr_flag(epoch, sr): 4 | # return epoch >= 5 and sr 5 | return sr 6 | 7 | class BNOptimizer(): 8 | 9 | @staticmethod 10 | def updateBN(sr_flag, module_list, s, prune_idx, epoch, idx2mask=None, opt=None): 11 | if sr_flag: 12 | # s = s if epoch <= opt.epochs * 0.5 else s * 0.01 13 | for idx in prune_idx: 14 | # Squential(Conv, BN, Lrelu) 15 | # bn_module = module_list[idx][1] 16 | bn_module = module_list[idx][1] if type( 17 | module_list[idx][1]).__name__ == 'BatchNorm2d' else module_list[idx][0] 18 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1 19 | if idx2mask: 20 | for idx in idx2mask: 21 | # bn_module = module_list[idx][1] 22 | bn_module = module_list[idx][1] if type( 23 | module_list[idx][1]).__name__ == 'BatchNorm2d' else module_list[idx][0] 24 | #bn_module.weight.grad.data.add_(0.5 * s * torch.sign(bn_module.weight.data) * (1 - idx2mask[idx].cuda())) 25 | bn_module.weight.grad.data.sub_(0.99 * s * torch.sign(bn_module.weight.data) * idx2mask[idx].cuda()) 26 | 27 | def parse_module_defs(module_defs): 28 | 29 | CBL_idx = [] 30 | Conv_idx = [] 31 | ignore_idx = set() 32 | for i, module_def in enumerate(module_defs): 33 | if module_def['type'] == 'convolutional': 34 | if module_def['batch_normalize'] == '1': 35 | CBL_idx.append(i) 36 | else: 37 | Conv_idx.append(i) 38 | if module_defs[i+1]['type'] == 'maxpool' and module_defs[i+2]['type'] == 'route': 39 | #spp前一个CBL不剪 区分tiny 40 | ignore_idx.add(i) 41 | if module_defs[i+1]['type'] == 'route' and 'groups' in module_defs[i+1]: 42 | ignore_idx.add(i) 43 | if module_defs[i+1]['type'] == 'convolutional_nobias': 44 | ignore_idx.add(i) 45 | elif module_def['type'] == 'convolutional_noconv': 46 | CBL_idx.append(i) 47 | ignore_idx.add(i) 48 | elif module_def['type'] == 'shortcut': 49 | ignore_idx.add(i-1) 50 | identity_idx = (i + int(module_def['from'])) 51 | if module_defs[identity_idx]['type'] == 'convolutional': 52 | ignore_idx.add(identity_idx) 53 | elif module_defs[identity_idx]['type'] == 'shortcut': 54 | ignore_idx.add(identity_idx - 1) 55 | 56 | elif module_def['type'] == 'upsample': 57 | #上采样层前的卷积层不裁剪 58 | ignore_idx.add(i - 1) 59 | 60 | 61 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 62 | 63 | return CBL_idx, Conv_idx, prune_idx 64 | 65 | 66 | def parse_module_defs2(module_defs): 67 | CBL_idx = [] 68 | Conv_idx = [] 69 | shortcut_idx = dict() 70 | shortcut_all = set() 71 | ignore_idx = set() 72 | for i, module_def in enumerate(module_defs): 73 | if module_def['type'] == 'convolutional': 74 | if module_def['batch_normalize'] == '1': 75 | CBL_idx.append(i) 76 | else: 77 | Conv_idx.append(i) 78 | if module_defs[i + 1]['type'] == 'maxpool' and module_defs[i + 2]['type'] == 'route': 79 | # spp前一个CBL不剪 区分spp和tiny 80 | ignore_idx.add(i) 81 | if module_defs[i + 1]['type'] == 'route' and 'groups' in module_defs[i + 1]: 82 | ignore_idx.add(i) 83 | 84 | elif module_def['type'] == 'convolutional_noconv': 85 | CBL_idx.append(i) 86 | 87 | elif module_def['type'] == 'upsample': 88 | # 上采样层前的卷积层不裁剪 89 | ignore_idx.add(i - 1) 90 | 91 | elif module_def['type'] == 'shortcut': 92 | identity_idx = (i + int(module_def['from'])) 93 | if module_defs[identity_idx]['type'] == 'convolutional': 94 | 95 | # ignore_idx.add(identity_idx) 96 | shortcut_idx[i - 1] = identity_idx 97 | shortcut_all.add(identity_idx) 98 | elif module_defs[identity_idx]['type'] == 'shortcut': 99 | 100 | # ignore_idx.add(identity_idx - 1) 101 | shortcut_idx[i - 1] = identity_idx - 1 102 | shortcut_all.add(identity_idx - 1) 103 | shortcut_all.add(i - 1) 104 | 105 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 106 | 107 | return CBL_idx, Conv_idx, prune_idx, shortcut_idx, shortcut_all 108 | 109 | 110 | def gather_bn_weights(module_list, prune_idx): 111 | 112 | size_list = [module_list[idx][1].weight.data.shape[0] if type(module_list[idx][1]).__name__ == 'BatchNorm2d' else module_list[idx][0].weight.data.shape[0] for idx in prune_idx] 113 | 114 | bn_weights = torch.zeros(sum(size_list)) 115 | index = 0 116 | for idx, size in zip(prune_idx, size_list): 117 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone() if type(module_list[idx][1]).__name__ == 'BatchNorm2d' else module_list[idx][0].weight.data.abs().clone() 118 | index += size 119 | 120 | return bn_weights -------------------------------------------------------------------------------- /utils/distill_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # from utils.loss import build_targets 4 | 5 | def wh_iou_cfg(box1, box2): 6 | # Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2 7 | box2 = box2.t() 8 | 9 | # w, h = box1 10 | w1, h1 = box1[0], box1[1] 11 | w2, h2 = box2[0], box2[1] 12 | 13 | # Intersection area 14 | inter_area = torch.min(w1, w2) * torch.min(h1, h2) 15 | 16 | # Union Area 17 | union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area 18 | 19 | return inter_area / union_area # iou 20 | 21 | def build_targets_cfg(model, targets): 22 | # targets = [image, class, x, y, w, h] 23 | 24 | nt = len(targets) 25 | tcls, tbox, indices, av = [], [], [], [] 26 | multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 27 | for i in model.yolo_layers: 28 | # get number of grid points and anchor vec for this yolo layer 29 | if multi_gpu: 30 | ng, anchor_vec = model.module.module_list[i].ng, model.module.module_list[i].anchor_vec 31 | else: 32 | ng, anchor_vec = model.module_list[i].ng, model.module_list[i].anchor_vec 33 | 34 | # iou of targets-anchors 35 | t, a = targets, [] 36 | gwh = t[:, 4:6] * ng 37 | if nt: 38 | iou = torch.stack([wh_iou_cfg(x, gwh) for x in anchor_vec], 0) 39 | 40 | use_best_anchor = False 41 | if use_best_anchor: 42 | iou, a = iou.max(0) # best iou and anchor 43 | else: # use all anchors 44 | na = len(anchor_vec) # number of anchors 45 | a = torch.arange(na).view((-1, 1)).repeat([1, nt]).view(-1) 46 | t = targets.repeat([na, 1]) 47 | gwh = gwh.repeat([na, 1]) 48 | iou = iou.view(-1) # use all ious 49 | 50 | # reject anchors below iou_thres (OPTIONAL, increases P, lowers R) 51 | reject = True 52 | if reject: 53 | j = iou > model.hyp['iou_t'] # iou threshold hyperparameter 54 | t, a, gwh = t[j], a[j], gwh[j] 55 | 56 | # Indices 57 | b, c = t[:, :2].long().t() # target image, class 58 | gxy = t[:, 2:4] * ng # grid x, y 59 | gi, gj = gxy.long().t() # grid x, y indices 60 | indices.append((b, a, gj, gi)) 61 | 62 | # GIoU 63 | gxy -= gxy.floor() # xy 64 | tbox.append(torch.cat((gxy, gwh), 1)) # xywh (grids) 65 | av.append(anchor_vec[a]) # anchor vec 66 | 67 | # Class 68 | tcls.append(c) 69 | if c.shape[0]: # if any targets 70 | assert c.max() <= model.nc, 'Target classes exceed model classes' 71 | 72 | return tcls, tbox, indices, av 73 | 74 | def distillation_loss1(output_s, output_t, num_classes, batch_size): 75 | T = 3.0 76 | Lambda_ST = 0.001 77 | criterion_st = torch.nn.KLDivLoss(reduction='sum') 78 | output_s = torch.cat([i.view(-1, num_classes + 5) for i in output_s]) 79 | output_t = torch.cat([i.view(-1, num_classes + 5) for i in output_t]) 80 | loss_st = criterion_st(nn.functional.log_softmax(output_s/T, dim=1), nn.functional.softmax(output_t/T,dim=1))* (T*T) / batch_size 81 | return loss_st * Lambda_ST 82 | 83 | 84 | 85 | def distillation_loss2(model, targets, output_s, output_t): 86 | reg_m = 0.0 87 | T = 3.0 88 | Lambda_cls, Lambda_box = 0.0001, 0.001 89 | 90 | criterion_st = torch.nn.KLDivLoss(reduction='sum') 91 | ft = torch.cuda.FloatTensor if output_s[0].is_cuda else torch.Tensor 92 | lcls, lbox = ft([0]), ft([0]) 93 | 94 | tcls, tbox, indices, anchor_vec = build_targets_cfg(model,targets) 95 | reg_ratio, reg_num, reg_nb = 0, 0, 0 96 | for i, (ps, pt) in enumerate(zip(output_s, output_t)): # layer index, layer predictions 97 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 98 | ps=ps.float() 99 | nb = len(b) 100 | if nb: # number of targets 101 | pss = ps[b, a, gj, gi] # prediction subset corresponding to targets 102 | pts = pt[b, a, gj, gi] 103 | 104 | psxy = torch.sigmoid(pss[:, 0:2]) # pxy = pxy * s - (s - 1) / 2, s = 1.5 (scale_xy) 105 | psbox = torch.cat((psxy, torch.exp(pss[:, 2:4]) * anchor_vec[i]), 1).view(-1, 4) # predicted box 106 | 107 | ptxy = torch.sigmoid(pts[:, 0:2]) # pxy = pxy * s - (s - 1) / 2, s = 1.5 (scale_xy) 108 | ptbox = torch.cat((ptxy, torch.exp(pts[:, 2:4]) * anchor_vec[i]), 1).view(-1, 4) # predicted box 109 | 110 | 111 | l2_dis_s = (psbox - tbox[i]).pow(2).sum(1) 112 | l2_dis_s_m = l2_dis_s + reg_m 113 | l2_dis_t = (ptbox - tbox[i]).pow(2).sum(1) 114 | l2_num = l2_dis_s_m > l2_dis_t 115 | lbox += l2_dis_s[l2_num].sum() 116 | reg_num += l2_num.sum().item() 117 | reg_nb += nb 118 | 119 | output_s_i = ps[..., 4:].view(-1, model.nc + 1) 120 | output_t_i = pt[..., 4:].view(-1, model.nc + 1) 121 | lcls += criterion_st(nn.functional.log_softmax(output_s_i/T, dim=1), nn.functional.softmax(output_t_i/T,dim=1))* (T*T) / ps.size(0) 122 | 123 | if reg_nb: 124 | reg_ratio = reg_num / reg_nb 125 | 126 | return lcls * Lambda_cls + lbox * Lambda_box, reg_ratio -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | import torch 11 | 12 | from models.yolo import Model 13 | from utils.general import set_logging 14 | from utils.google_utils import attempt_download 15 | 16 | dependencies = ['torch', 'yaml'] 17 | set_logging() 18 | 19 | 20 | def create(name, pretrained, channels, classes, autoshape): 21 | """Creates a specified YOLOv5 model 22 | 23 | Arguments: 24 | name (str): name of model, i.e. 'yolov5s' 25 | pretrained (bool): load pretrained weights into the model 26 | channels (int): number of input channels 27 | classes (int): number of model classes 28 | 29 | Returns: 30 | pytorch model 31 | """ 32 | config = Path(__file__).parent / 'models' / f'{name}.yaml' # model.yaml path 33 | try: 34 | model = Model(config, channels, classes) 35 | if pretrained: 36 | fname = f'{name}.pt' # checkpoint filename 37 | attempt_download(fname) # download if not found locally 38 | ckpt = torch.load(fname, map_location=torch.device('cpu')) # load 39 | state_dict = ckpt['model'].float().state_dict() # to FP32 40 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter 41 | model.load_state_dict(state_dict, strict=False) # load 42 | if len(ckpt['model'].names) == classes: 43 | model.names = ckpt['model'].names # set class names attribute 44 | if autoshape: 45 | model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 46 | return model 47 | 48 | except Exception as e: 49 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 50 | s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url 51 | raise Exception(s) from e 52 | 53 | 54 | def yolov5s(pretrained=False, channels=3, classes=80, autoshape=True): 55 | """YOLOv5-small model from https://github.com/ultralytics/yolov5 56 | 57 | Arguments: 58 | pretrained (bool): load pretrained weights into the model, default=False 59 | channels (int): number of input channels, default=3 60 | classes (int): number of model classes, default=80 61 | 62 | Returns: 63 | pytorch model 64 | """ 65 | return create('yolov5s', pretrained, channels, classes, autoshape) 66 | 67 | 68 | def yolov5m(pretrained=False, channels=3, classes=80, autoshape=True): 69 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5 70 | 71 | Arguments: 72 | pretrained (bool): load pretrained weights into the model, default=False 73 | channels (int): number of input channels, default=3 74 | classes (int): number of model classes, default=80 75 | 76 | Returns: 77 | pytorch model 78 | """ 79 | return create('yolov5m', pretrained, channels, classes, autoshape) 80 | 81 | 82 | def yolov5l(pretrained=False, channels=3, classes=80, autoshape=True): 83 | """YOLOv5-large model from https://github.com/ultralytics/yolov5 84 | 85 | Arguments: 86 | pretrained (bool): load pretrained weights into the model, default=False 87 | channels (int): number of input channels, default=3 88 | classes (int): number of model classes, default=80 89 | 90 | Returns: 91 | pytorch model 92 | """ 93 | return create('yolov5l', pretrained, channels, classes, autoshape) 94 | 95 | 96 | def yolov5x(pretrained=False, channels=3, classes=80, autoshape=True): 97 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5 98 | 99 | Arguments: 100 | pretrained (bool): load pretrained weights into the model, default=False 101 | channels (int): number of input channels, default=3 102 | classes (int): number of model classes, default=80 103 | 104 | Returns: 105 | pytorch model 106 | """ 107 | return create('yolov5x', pretrained, channels, classes, autoshape) 108 | 109 | 110 | def custom(path_or_model='path/to/model.pt', autoshape=True): 111 | """YOLOv5-custom model from https://github.com/ultralytics/yolov5 112 | 113 | Arguments (3 options): 114 | path_or_model (str): 'path/to/model.pt' 115 | path_or_model (dict): torch.load('path/to/model.pt') 116 | path_or_model (nn.Module): torch.load('path/to/model.pt')['model'] 117 | 118 | Returns: 119 | pytorch model 120 | """ 121 | model = torch.load(path_or_model) if isinstance(path_or_model, str) else path_or_model # load checkpoint 122 | if isinstance(model, dict): 123 | model = model['model'] # load model 124 | 125 | hub_model = Model(model.yaml).to(next(model.parameters()).device) # create 126 | hub_model.load_state_dict(model.float().state_dict()) # load state_dict 127 | hub_model.names = model.names # class names 128 | return hub_model.autoshape() if autoshape else hub_model 129 | 130 | 131 | if __name__ == '__main__': 132 | model = create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example 133 | # model = custom(path_or_model='path/to/model.pt') # custom example 134 | 135 | # Verify inference 136 | from PIL import Image 137 | 138 | imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')] 139 | results = model(imgs) 140 | results.show() 141 | results.print() 142 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | # self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) #5 ? 3 53 | self.cv2 = Conv(c_, c_, 3, 1, None, c_, act) # 5 ? 3 54 | 55 | def forward(self, x): 56 | y = self.cv1(x) 57 | return torch.cat([y, self.cv2(y)], 1) 58 | 59 | 60 | class GhostBottleneck(nn.Module): 61 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 62 | def __init__(self, c1, c2, n,k, s): 63 | super(GhostBottleneck, self).__init__() 64 | c_ = c2 // 2 65 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 66 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 67 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 68 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 69 | Conv(c1, c2, 1, 1, act=False)) if (s == 2 or c1 != c2) else nn.Identity() 70 | 71 | def forward(self, x): 72 | return self.conv(x) + self.shortcut(x) 73 | 74 | 75 | class MixConv2d(nn.Module): 76 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 77 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 78 | super(MixConv2d, self).__init__() 79 | groups = len(k) 80 | if equal_ch: # equal c_ per group 81 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 82 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 83 | else: # equal weight.numel() per group 84 | b = [c2] + [0] * groups 85 | a = np.eye(groups + 1, groups, k=-1) 86 | a -= np.roll(a, 1, axis=1) 87 | a *= np.array(k) ** 2 88 | a[0] = 1 89 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 90 | 91 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 92 | self.bn = nn.BatchNorm2d(c2) 93 | self.act = nn.LeakyReLU(0.1, inplace=True) 94 | 95 | def forward(self, x): 96 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 97 | 98 | 99 | class Ensemble(nn.ModuleList): 100 | # Ensemble of models 101 | def __init__(self): 102 | super(Ensemble, self).__init__() 103 | 104 | def forward(self, x, augment=False): 105 | y = [] 106 | for module in self: 107 | y.append(module(x, augment)[0]) 108 | # y = torch.stack(y).max(0)[0] # max ensemble 109 | # y = torch.stack(y).mean(0) # mean ensemble 110 | y = torch.cat(y, 1) # nms ensemble 111 | return y, None # inference, train output 112 | 113 | 114 | def attempt_load(weights, map_location=None): 115 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 116 | model = Ensemble() 117 | for w in weights if isinstance(weights, list) else [weights]: 118 | attempt_download(w) 119 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 120 | 121 | # Compatibility updates 122 | for m in model.modules(): 123 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 124 | m.inplace = True # pytorch 1.7.0 compatibility 125 | elif type(m) is Conv: 126 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 127 | 128 | if len(model) == 1: 129 | return model[-1] # return model 130 | else: 131 | print('Ensemble created with %s\n' % weights) 132 | for k in ['names', 'stride']: 133 | setattr(model, k, getattr(model[-1], k)) 134 | return model # return ensemble 135 | -------------------------------------------------------------------------------- /models/export_plugin_onnx.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export_plugin_onnx.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish 19 | from utils.general import set_logging, check_img_size 20 | 21 | class SiLUImplementtation(torch.autograd.Function): 22 | @staticmethod 23 | def symbolic(g,input): 24 | return g.op("SiLU",input) 25 | 26 | def forward(self,x): 27 | return x * torch.sigmoid(x) 28 | 29 | class customSiLU(nn.Module): 30 | def forward(self,x): 31 | return SiLUImplementtation.apply(x) 32 | 33 | if __name__ == '__main__': 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 36 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 37 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 38 | opt = parser.parse_args() 39 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 40 | print(opt) 41 | set_logging() 42 | t = time.time() 43 | 44 | # Load PyTorch model 45 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 46 | labels = model.names 47 | 48 | # Checks 49 | gs = int(max(model.stride)) # grid size (max stride) 50 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 51 | 52 | # Input 53 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 54 | 55 | # Update model 56 | for k, m in model.named_modules(): 57 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 58 | if isinstance(m, models.common.Conv): # assign export-friendly activations 59 | if isinstance(m.act, nn.Hardswish): 60 | m.act = Hardswish() 61 | elif isinstance(m.act, nn.SiLU): 62 | m.act = customSiLU() 63 | # elif isinstance(m, models.yolo.Detect): 64 | # m.forward = m.forward_export # assign forward (optional) 65 | if False: 66 | import cv2 67 | from utils.general import non_max_suppression,scale_coords 68 | from pathlib import Path 69 | from numpy import random 70 | from utils.plots import plot_one_box 71 | from utils.datasets import letterbox 72 | import numpy as np 73 | path="data/images/coco_1.jpg" 74 | img0 = cv2.imread(path) # BGR 75 | assert img0 is not None, 'Image Not Found ' + path 76 | 77 | # Get names and colors 78 | names = model.module.names if hasattr(model, 'module') else model.names 79 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] 80 | 81 | # Padded resize 82 | img = letterbox(img0, new_shape=640)[0] 83 | 84 | # Convert 85 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 86 | img = np.ascontiguousarray(img) 87 | img = torch.from_numpy(img).to('cpu') 88 | img = img.float() # uint8 to fp16/32 89 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 90 | if img.ndimension() == 3: 91 | img = img.unsqueeze(0) 92 | pred = model(img)[0] 93 | # Apply NMS 94 | pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False) 95 | # Process detections 96 | for i, det in enumerate(pred): # detections per image 97 | p, s,im0 = path, '',img0 98 | 99 | p = Path(p) # to Path 100 | s += '%gx%g ' % img.shape[2:] # print string 101 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 102 | if len(det): 103 | # Rescale boxes from img_size to im0 size 104 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 105 | 106 | # Print results 107 | for c in det[:, -1].unique(): 108 | n = (det[:, -1] == c).sum() # detections per class 109 | s += f'{n} {names[int(c)]}s, ' # add to string 110 | 111 | # Write results 112 | for *xyxy, conf, cls in reversed(det): 113 | label = f'{names[int(cls)]} {conf:.2f}' 114 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 115 | save_path="result.jpg" 116 | cv2.imwrite(save_path, im0) 117 | 118 | 119 | model.model[-1].export = True # set Detect() layer export=True 120 | y = model(img) # dry run 121 | 122 | # ONNX export 123 | try: 124 | import onnx 125 | 126 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 127 | f = opt.weights.replace('.pt', '.onnx') # filename 128 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK, 129 | input_names=['images'],output_names=['classes', 'boxes'] if y is None else ['output']) 130 | 131 | # Checks 132 | onnx_model = onnx.load(f) # load onnx model 133 | # onnx.checker.check_model(onnx_model) # check onnx model 134 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 135 | 136 | print('ONNX export success, saved as %s' % f) 137 | except Exception as e: 138 | print('ONNX export failure: %s' % e) 139 | 140 | # Finish 141 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 142 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | 10 | def check_anchor_order(m): 11 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 12 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 13 | da = a[-1] - a[0] # delta a 14 | ds = m.stride[-1] - m.stride[0] # delta s 15 | if da.sign() != ds.sign(): # same order 16 | print('Reversing anchor order') 17 | m.anchors[:] = m.anchors.flip(0) 18 | m.anchor_grid[:] = m.anchor_grid.flip(0) 19 | 20 | 21 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 22 | # Check anchor fit to data, recompute if necessary 23 | print('\nAnalyzing anchors... ', end='') 24 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 25 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 26 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 27 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 28 | 29 | def metric(k): # compute metric 30 | r = wh[:, None] / k[None] 31 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 32 | best = x.max(1)[0] # best_x 33 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 34 | bpr = (best > 1. / thr).float().mean() # best possible recall 35 | return bpr, aat 36 | 37 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 38 | print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') 39 | if bpr < 0.98: # threshold to recompute 40 | print('. Attempting to improve anchors, please wait...') 41 | na = m.anchor_grid.numel() // 2 # number of anchors 42 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 43 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 44 | if new_bpr > bpr: # replace anchors 45 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 46 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 47 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 48 | check_anchor_order(m) 49 | print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') 50 | else: 51 | print('Original anchors better than new anchors. Proceeding with original anchors.') 52 | print('') # newline 53 | 54 | 55 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 56 | """ Creates kmeans-evolved anchors from training dataset 57 | 58 | Arguments: 59 | path: path to dataset *.yaml, or a loaded dataset 60 | n: number of anchors 61 | img_size: image size used for training 62 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 63 | gen: generations to evolve anchors using genetic algorithm 64 | verbose: print all results 65 | 66 | Return: 67 | k: kmeans evolved anchors 68 | 69 | Usage: 70 | from utils.autoanchor import *; _ = kmean_anchors() 71 | """ 72 | thr = 1. / thr 73 | 74 | def metric(k, wh): # compute metrics 75 | r = wh[:, None] / k[None] 76 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 77 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 78 | return x, x.max(1)[0] # x, best_x 79 | 80 | def anchor_fitness(k): # mutation fitness 81 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 82 | return (best * (best > thr).float()).mean() # fitness 83 | 84 | def print_results(k): 85 | k = k[np.argsort(k.prod(1))] # sort small to large 86 | x, best = metric(k, wh0) 87 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 88 | print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) 89 | print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % 90 | (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') 91 | for i, x in enumerate(k): 92 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 93 | return k 94 | 95 | if isinstance(path, str): # *.yaml file 96 | with open(path) as f: 97 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict 98 | from utils.datasets import LoadImagesAndLabels 99 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 100 | else: 101 | dataset = path # dataset 102 | 103 | # Get label wh 104 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 105 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 106 | 107 | # Filter 108 | i = (wh0 < 3.0).any(1).sum() 109 | if i: 110 | print('WARNING: Extremely small objects found. ' 111 | '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) 112 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 113 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 114 | 115 | # Kmeans calculation 116 | print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) 117 | s = wh.std(0) # sigmas for whitening 118 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 119 | k *= s 120 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 121 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 122 | k = print_results(k) 123 | 124 | # Plot 125 | # k, d = [None] * 20, [None] * 20 126 | # for i in tqdm(range(1, 21)): 127 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 128 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 129 | # ax = ax.ravel() 130 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 131 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 132 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 133 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 134 | # fig.savefig('wh.png', dpi=200) 135 | 136 | # Evolve 137 | npr = np.random 138 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 139 | pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar 140 | for _ in pbar: 141 | v = np.ones(sh) 142 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 143 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 144 | kg = (k.copy() * v).clip(min=2.0) 145 | fg = anchor_fitness(kg) 146 | if fg > f: 147 | f, k = fg, kg.copy() 148 | pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f 149 | if verbose: 150 | print_results(k) 151 | 152 | return print_results(k) 153 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | from pathlib import Path 4 | 5 | import cv2 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | from numpy import random 9 | 10 | from models.experimental import attempt_load 11 | from utils.datasets import LoadStreams, LoadImages 12 | from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ 13 | strip_optimizer, set_logging, increment_path 14 | from utils.plots import plot_one_box 15 | from utils.torch_utils import select_device, load_classifier, time_synchronized 16 | 17 | 18 | def detect(save_img=False): 19 | source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size 20 | webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( 21 | ('rtsp://', 'rtmp://', 'http://')) 22 | 23 | # Directories 24 | save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run 25 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 26 | 27 | # Initialize 28 | set_logging() 29 | device = select_device(opt.device) 30 | half = device.type != 'cpu' # half precision only supported on CUDA 31 | 32 | # Load model 33 | model = attempt_load(weights, map_location=device) # load FP32 model 34 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 35 | if half: 36 | model.half() # to FP16 37 | 38 | # Second-stage classifier 39 | classify = False 40 | if classify: 41 | modelc = load_classifier(name='resnet101', n=2) # initialize 42 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() 43 | 44 | # Set Dataloader 45 | vid_path, vid_writer = None, None 46 | if webcam: 47 | view_img = True 48 | cudnn.benchmark = True # set True to speed up constant image size inference 49 | dataset = LoadStreams(source, img_size=imgsz) 50 | else: 51 | save_img = True 52 | dataset = LoadImages(source, img_size=imgsz) 53 | 54 | # Get names and colors 55 | names = model.module.names if hasattr(model, 'module') else model.names 56 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] 57 | 58 | # Run inference 59 | t0 = time.time() 60 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 61 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 62 | for path, img, im0s, vid_cap in dataset: 63 | img = torch.from_numpy(img).to(device) 64 | img = img.half() if half else img.float() # uint8 to fp16/32 65 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 66 | if img.ndimension() == 3: 67 | img = img.unsqueeze(0) 68 | 69 | # Inference 70 | t1 = time_synchronized() 71 | pred = model(img, augment=opt.augment)[0] 72 | 73 | # Apply NMS 74 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) 75 | t2 = time_synchronized() 76 | 77 | # Apply Classifier 78 | if classify: 79 | pred = apply_classifier(pred, modelc, img, im0s) 80 | 81 | # Process detections 82 | for i, det in enumerate(pred): # detections per image 83 | if webcam: # batch_size >= 1 84 | p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count 85 | else: 86 | p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) 87 | 88 | p = Path(p) # to Path 89 | save_path = str(save_dir / p.name) # img.jpg 90 | txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt 91 | s += '%gx%g ' % img.shape[2:] # print string 92 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 93 | if len(det): 94 | # Rescale boxes from img_size to im0 size 95 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 96 | 97 | # Print results 98 | for c in det[:, -1].unique(): 99 | n = (det[:, -1] == c).sum() # detections per class 100 | s += f'{n} {names[int(c)]}s, ' # add to string 101 | 102 | # Write results 103 | for *xyxy, conf, cls in reversed(det): 104 | if save_txt: # Write to file 105 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 106 | line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format 107 | with open(txt_path + '.txt', 'a') as f: 108 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 109 | 110 | if save_img or view_img: # Add bbox to image 111 | label = f'{names[int(cls)]} {conf:.2f}' 112 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 113 | 114 | # Print time (inference + NMS) 115 | print(f'{s}Done. ({t2 - t1:.3f}s)') 116 | 117 | # Stream results 118 | if view_img: 119 | cv2.imshow(str(p), im0) 120 | 121 | # Save results (image with detections) 122 | if save_img: 123 | if dataset.mode == 'image': 124 | cv2.imwrite(save_path, im0) 125 | else: # 'video' 126 | if vid_path != save_path: # new video 127 | vid_path = save_path 128 | if isinstance(vid_writer, cv2.VideoWriter): 129 | vid_writer.release() # release previous video writer 130 | 131 | fourcc = 'mp4v' # output video codec 132 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 133 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 134 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 135 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) 136 | vid_writer.write(im0) 137 | 138 | if save_txt or save_img: 139 | s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' 140 | print(f"Results saved to {save_dir}{s}") 141 | 142 | print(f'Done. ({time.time() - t0:.3f}s)') 143 | 144 | 145 | if __name__ == '__main__': 146 | parser = argparse.ArgumentParser() 147 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 148 | parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam 149 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 150 | parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') 151 | parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') 152 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 153 | parser.add_argument('--view-img', action='store_true', help='display results') 154 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 155 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 156 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 157 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 158 | parser.add_argument('--augment', action='store_true', help='augmented inference') 159 | parser.add_argument('--update', action='store_true', help='update all models') 160 | parser.add_argument('--project', default='runs/detect', help='save results to project/name') 161 | parser.add_argument('--name', default='exp', help='save results to project/name') 162 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 163 | opt = parser.parse_args() 164 | print(opt) 165 | 166 | with torch.no_grad(): 167 | if opt.update: # update all models (to fix SourceChangeWarning) 168 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 169 | detect() 170 | strip_optimizer(opt.weights) 171 | else: 172 | detect() 173 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | 39 | # Create Precision-Recall curve and compute AP for each class 40 | px, py = np.linspace(0, 1, 1000), [] # for plotting 41 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 42 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 43 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 44 | for ci, c in enumerate(unique_classes): 45 | i = pred_cls == c 46 | n_l = (target_cls == c).sum() # number of labels 47 | n_p = i.sum() # number of predictions 48 | 49 | if n_p == 0 or n_l == 0: 50 | continue 51 | else: 52 | # Accumulate FPs and TPs 53 | fpc = (1 - tp[i]).cumsum(0) 54 | tpc = tp[i].cumsum(0) 55 | 56 | # Recall 57 | recall = tpc / (n_l + 1e-16) # recall curve 58 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases 59 | 60 | # Precision 61 | precision = tpc / (tpc + fpc) # precision curve 62 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 63 | 64 | # AP from recall-precision curve 65 | for j in range(tp.shape[1]): 66 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 67 | if plot and (j == 0): 68 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 69 | 70 | # Compute F1 score (harmonic mean of precision and recall) 71 | f1 = 2 * p * r / (p + r + 1e-16) 72 | 73 | if plot: 74 | plot_pr_curve(px, py, ap, save_dir, names) 75 | 76 | return p, r, ap, f1, unique_classes.astype('int32') 77 | 78 | 79 | def compute_ap(recall, precision): 80 | """ Compute the average precision, given the recall and precision curves 81 | # Arguments 82 | recall: The recall curve (list) 83 | precision: The precision curve (list) 84 | # Returns 85 | Average precision, precision curve, recall curve 86 | """ 87 | 88 | # Append sentinel values to beginning and end 89 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 90 | mpre = np.concatenate(([1.], precision, [0.])) 91 | 92 | # Compute the precision envelope 93 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 94 | 95 | # Integrate area under curve 96 | method = 'interp' # methods: 'continuous', 'interp' 97 | if method == 'interp': 98 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 99 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 100 | else: # 'continuous' 101 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 102 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 103 | 104 | return ap, mpre, mrec 105 | 106 | 107 | class ConfusionMatrix: 108 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 109 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 110 | self.matrix = np.zeros((nc + 1, nc + 1)) 111 | self.nc = nc # number of classes 112 | self.conf = conf 113 | self.iou_thres = iou_thres 114 | 115 | def process_batch(self, detections, labels): 116 | """ 117 | Return intersection-over-union (Jaccard index) of boxes. 118 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 119 | Arguments: 120 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 121 | labels (Array[M, 5]), class, x1, y1, x2, y2 122 | Returns: 123 | None, updates confusion matrix accordingly 124 | """ 125 | detections = detections[detections[:, 4] > self.conf] 126 | gt_classes = labels[:, 0].int() 127 | detection_classes = detections[:, 5].int() 128 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 129 | 130 | x = torch.where(iou > self.iou_thres) 131 | if x[0].shape[0]: 132 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 133 | if x[0].shape[0] > 1: 134 | matches = matches[matches[:, 2].argsort()[::-1]] 135 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 138 | else: 139 | matches = np.zeros((0, 3)) 140 | 141 | n = matches.shape[0] > 0 142 | m0, m1, _ = matches.transpose().astype(np.int16) 143 | for i, gc in enumerate(gt_classes): 144 | j = m0 == i 145 | if n and sum(j) == 1: 146 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 147 | else: 148 | self.matrix[gc, self.nc] += 1 # background FP 149 | 150 | if n: 151 | for i, dc in enumerate(detection_classes): 152 | if not any(m1 == i): 153 | self.matrix[self.nc, dc] += 1 # background FN 154 | 155 | def matrix(self): 156 | return self.matrix 157 | 158 | def plot(self, save_dir='', names=()): 159 | try: 160 | import seaborn as sn 161 | 162 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 163 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 164 | 165 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 166 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 167 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 168 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 169 | xticklabels=names + ['background FN'] if labels else "auto", 170 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 171 | fig.axes[0].set_xlabel('True') 172 | fig.axes[0].set_ylabel('Predicted') 173 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 174 | except Exception as e: 175 | pass 176 | 177 | def print(self): 178 | for i in range(self.nc + 1): 179 | print(' '.join(map(str, self.matrix[i]))) 180 | 181 | 182 | # Plots ---------------------------------------------------------------------------------------------------------------- 183 | 184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()): 185 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 186 | py = np.stack(py, axis=1) 187 | 188 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes 189 | for i, y in enumerate(py.T): 190 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision) 191 | else: 192 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 193 | 194 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 195 | ax.set_xlabel('Recall') 196 | ax.set_ylabel('Precision') 197 | ax.set_xlim(0, 1) 198 | ax.set_ylim(0, 1) 199 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 200 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) 201 | -------------------------------------------------------------------------------- /prune_detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | from pathlib import Path 4 | 5 | import cv2 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | from numpy import random 9 | 10 | from models.experimental import attempt_load 11 | from utils.datasets import LoadStreams, LoadImages 12 | from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ 13 | strip_optimizer, set_logging, increment_path 14 | from utils.plots import plot_one_box 15 | from utils.torch_utils import select_device, load_classifier, time_synchronized 16 | 17 | from modelsori import * 18 | 19 | 20 | def detect(save_img=False): 21 | source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size 22 | webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( 23 | ('rtsp://', 'rtmp://', 'http://')) 24 | 25 | # Directories 26 | save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run 27 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 28 | 29 | # Initialize 30 | set_logging() 31 | device = select_device(opt.device) 32 | half = device.type != 'cpu' # half precision only supported on CUDA 33 | 34 | # Load model 35 | if isinstance(weights, list): 36 | model=torch.load(weights[-1], map_location=device)['model'].float().eval() 37 | else: 38 | model=torch.load(weights, map_location=device)['model'].float().eval() 39 | stride = [8, 16, 32] 40 | imgsz = check_img_size(imgsz, s=max(stride)) # check img_size 41 | if half: 42 | model.half() # to FP16 43 | 44 | # Second-stage classifier 45 | classify = False 46 | if classify: 47 | modelc = load_classifier(name='resnet101', n=2) # initialize 48 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() 49 | 50 | # Set Dataloader 51 | vid_path, vid_writer = None, None 52 | if webcam: 53 | view_img = True 54 | cudnn.benchmark = True # set True to speed up constant image size inference 55 | dataset = LoadStreams(source, img_size=imgsz) 56 | else: 57 | save_img = True 58 | dataset = LoadImages(source, img_size=imgsz) 59 | 60 | # Get names and colors 61 | names = model.module.names if hasattr(model, 'module') else model.names 62 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 63 | 64 | # Run inference 65 | t0 = time.time() 66 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 67 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 68 | for path, img, im0s, vid_cap in dataset: 69 | img = torch.from_numpy(img).to(device) 70 | img = img.half() if half else img.float() # uint8 to fp16/32 71 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 72 | if img.ndimension() == 3: 73 | img = img.unsqueeze(0) 74 | 75 | # Inference 76 | t1 = time_synchronized() 77 | pred = model(img, augment=opt.augment)[0] 78 | 79 | # Apply NMS 80 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) 81 | t2 = time_synchronized() 82 | 83 | # Apply Classifier 84 | if classify: 85 | pred = apply_classifier(pred, modelc, img, im0s) 86 | 87 | # Process detections 88 | for i, det in enumerate(pred): # detections per image 89 | if webcam: # batch_size >= 1 90 | p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count 91 | else: 92 | p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) 93 | 94 | p = Path(p) # to Path 95 | save_path = str(save_dir / p.name) # img.jpg 96 | txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt 97 | s += '%gx%g ' % img.shape[2:] # print string 98 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 99 | if len(det): 100 | # Rescale boxes from img_size to im0 size 101 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 102 | 103 | # Print results 104 | for c in det[:, -1].unique(): 105 | n = (det[:, -1] == c).sum() # detections per class 106 | s += f'{n} {names[int(c)]}s, ' # add to string 107 | 108 | # Write results 109 | for *xyxy, conf, cls in reversed(det): 110 | if save_txt: # Write to file 111 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 112 | line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format 113 | with open(txt_path + '.txt', 'a') as f: 114 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 115 | 116 | if save_img or view_img: # Add bbox to image 117 | label = f'{names[int(cls)]} {conf:.2f}' 118 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 119 | 120 | # Print time (inference + NMS) 121 | print(f'{s}Done. ({t2 - t1:.3f}s)') 122 | 123 | # Stream results 124 | if view_img: 125 | cv2.imshow(str(p), im0) 126 | 127 | # Save results (image with detections) 128 | if save_img: 129 | if dataset.mode == 'image': 130 | cv2.imwrite(save_path, im0) 131 | else: # 'video' 132 | if vid_path != save_path: # new video 133 | vid_path = save_path 134 | if isinstance(vid_writer, cv2.VideoWriter): 135 | vid_writer.release() # release previous video writer 136 | 137 | fourcc = 'mp4v' # output video codec 138 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 139 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 140 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 141 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) 142 | vid_writer.write(im0) 143 | 144 | if save_txt or save_img: 145 | s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' 146 | print(f"Results saved to {save_dir}{s}") 147 | 148 | print(f'Done. ({time.time() - t0:.3f}s)') 149 | 150 | 151 | if __name__ == '__main__': 152 | parser = argparse.ArgumentParser() 153 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 154 | parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam 155 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 156 | parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') 157 | parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') 158 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 159 | parser.add_argument('--view-img', action='store_true', help='display results') 160 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 161 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 162 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 163 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 164 | parser.add_argument('--augment', action='store_true', help='augmented inference') 165 | parser.add_argument('--update', action='store_true', help='update all models') 166 | parser.add_argument('--project', default='runs/detect', help='save results to project/name') 167 | parser.add_argument('--name', default='exp', help='save results to project/name') 168 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 169 | opt = parser.parse_args() 170 | print(opt) 171 | 172 | with torch.no_grad(): 173 | if opt.update: # update all models (to fix SourceChangeWarning) 174 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 175 | detect() 176 | strip_optimizer(opt.weights) 177 | else: 178 | detect() 179 | -------------------------------------------------------------------------------- /cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=608 5 | height=608 6 | channels=3 7 | momentum=0.949 8 | decay=0.0005 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | learning_rate=0.00261 14 | burn_in=1000 15 | max_batches=500500 16 | policy=steps 17 | steps=400000,450000 18 | scales=.1,.1 19 | mosaic=1 20 | 21 | [focus] 22 | filters=12 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=32 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=SiLU 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=64 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=SiLU 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=SiLU 47 | 48 | [route] 49 | layers=-2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=32 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=SiLU 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=32 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=SiLU 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=32 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=SiLU 74 | 75 | [shortcut] 76 | from=-3 77 | activation=linear 78 | 79 | [route] 80 | layers=-1,-6 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=64 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=SiLU 89 | 90 | [convolutional] 91 | batch_normalize=1 92 | filters=128 93 | size=3 94 | stride=2 95 | pad=1 96 | activation=SiLU 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=64 101 | size=1 102 | stride=1 103 | pad=1 104 | activation=SiLU 105 | 106 | [route] 107 | layers=-2 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=64 112 | size=1 113 | stride=1 114 | pad=1 115 | activation=SiLU 116 | 117 | [convolutional] 118 | batch_normalize=1 119 | filters=64 120 | size=1 121 | stride=1 122 | pad=1 123 | activation=SiLU 124 | 125 | [convolutional] 126 | batch_normalize=1 127 | filters=64 128 | size=3 129 | stride=1 130 | pad=1 131 | activation=SiLU 132 | 133 | [shortcut] 134 | from=-3 135 | activation=linear 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=1 141 | stride=1 142 | pad=1 143 | activation=SiLU 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=64 148 | size=3 149 | stride=1 150 | pad=1 151 | activation=SiLU 152 | 153 | [shortcut] 154 | from=-3 155 | activation=linear 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=SiLU 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=64 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=SiLU 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [route] 178 | layers=-1,-12 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=128 183 | size=1 184 | stride=1 185 | pad=1 186 | activation=SiLU 187 | 188 | [convolutional] 189 | batch_normalize=1 190 | filters=200 191 | size=3 192 | stride=2 193 | pad=1 194 | activation=SiLU 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | filters=88 199 | size=1 200 | stride=1 201 | pad=1 202 | activation=SiLU 203 | 204 | [route] 205 | layers=-2 206 | 207 | [convolutional] 208 | batch_normalize=1 209 | filters=128 210 | size=1 211 | stride=1 212 | pad=1 213 | activation=SiLU 214 | 215 | [convolutional] 216 | batch_normalize=1 217 | filters=48 218 | size=1 219 | stride=1 220 | pad=1 221 | activation=SiLU 222 | 223 | [convolutional] 224 | batch_normalize=1 225 | filters=128 226 | size=3 227 | stride=1 228 | pad=1 229 | activation=SiLU 230 | 231 | [shortcut] 232 | from=-3 233 | activation=linear 234 | 235 | [convolutional] 236 | batch_normalize=1 237 | filters=80 238 | size=1 239 | stride=1 240 | pad=1 241 | activation=SiLU 242 | 243 | [convolutional] 244 | batch_normalize=1 245 | filters=128 246 | size=3 247 | stride=1 248 | pad=1 249 | activation=SiLU 250 | 251 | [shortcut] 252 | from=-3 253 | activation=linear 254 | 255 | [convolutional] 256 | batch_normalize=1 257 | filters=72 258 | size=1 259 | stride=1 260 | pad=1 261 | activation=SiLU 262 | 263 | [convolutional] 264 | batch_normalize=1 265 | filters=128 266 | size=3 267 | stride=1 268 | pad=1 269 | activation=SiLU 270 | 271 | [shortcut] 272 | from=-3 273 | activation=linear 274 | 275 | [route] 276 | layers=-1,-12 277 | 278 | [convolutional] 279 | batch_normalize=1 280 | filters=160 281 | size=1 282 | stride=1 283 | pad=1 284 | activation=SiLU 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=496 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=SiLU 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=SiLU 301 | 302 | [maxpool] 303 | stride=1 304 | size=5 305 | 306 | [route] 307 | layers=-2 308 | 309 | [maxpool] 310 | stride=1 311 | size=9 312 | 313 | [route] 314 | layers=-4 315 | 316 | [maxpool] 317 | stride=1 318 | size=13 319 | 320 | [route] 321 | layers=-6,-5,-3,-1 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=184 326 | size=1 327 | stride=1 328 | pad=1 329 | activation=SiLU 330 | 331 | [convolutional] 332 | batch_normalize=1 333 | filters=16 334 | size=1 335 | stride=1 336 | pad=1 337 | activation=SiLU 338 | 339 | [route] 340 | layers=-2 341 | 342 | [convolutional] 343 | batch_normalize=1 344 | filters=8 345 | size=1 346 | stride=1 347 | pad=1 348 | activation=SiLU 349 | 350 | [convolutional] 351 | batch_normalize=1 352 | filters=8 353 | size=1 354 | stride=1 355 | pad=1 356 | activation=SiLU 357 | 358 | [convolutional] 359 | batch_normalize=1 360 | filters=160 361 | size=3 362 | stride=1 363 | pad=1 364 | activation=SiLU 365 | 366 | [route] 367 | layers=-1,-5 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=80 372 | size=1 373 | stride=1 374 | pad=1 375 | activation=SiLU 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=SiLU 384 | 385 | [upsample] 386 | stride=2 387 | 388 | [route] 389 | layers=-1,-19 390 | 391 | [convolutional] 392 | batch_normalize=1 393 | filters=80 394 | size=1 395 | stride=1 396 | pad=1 397 | activation=SiLU 398 | 399 | [route] 400 | layers=-2 401 | 402 | [convolutional] 403 | batch_normalize=1 404 | filters=56 405 | size=1 406 | stride=1 407 | pad=1 408 | activation=SiLU 409 | 410 | [convolutional] 411 | batch_normalize=1 412 | filters=56 413 | size=1 414 | stride=1 415 | pad=1 416 | activation=SiLU 417 | 418 | [convolutional] 419 | batch_normalize=1 420 | filters=120 421 | size=3 422 | stride=1 423 | pad=1 424 | activation=SiLU 425 | 426 | [route] 427 | layers=-1,-5 428 | 429 | [convolutional] 430 | batch_normalize=1 431 | filters=152 432 | size=1 433 | stride=1 434 | pad=1 435 | activation=SiLU 436 | 437 | [convolutional] 438 | batch_normalize=1 439 | filters=128 440 | size=1 441 | stride=1 442 | pad=1 443 | activation=SiLU 444 | 445 | [upsample] 446 | stride=2 447 | 448 | [route] 449 | layers=-1,-44 450 | 451 | [convolutional] 452 | batch_normalize=1 453 | filters=48 454 | size=1 455 | stride=1 456 | pad=1 457 | activation=SiLU 458 | 459 | [route] 460 | layers=-2 461 | 462 | [convolutional] 463 | batch_normalize=1 464 | filters=56 465 | size=1 466 | stride=1 467 | pad=1 468 | activation=SiLU 469 | 470 | [convolutional] 471 | batch_normalize=1 472 | filters=56 473 | size=1 474 | stride=1 475 | pad=1 476 | activation=SiLU 477 | 478 | [convolutional] 479 | batch_normalize=1 480 | filters=64 481 | size=3 482 | stride=1 483 | pad=1 484 | activation=SiLU 485 | 486 | [route] 487 | layers=-1,-5 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=120 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=SiLU 496 | 497 | [convolutional] 498 | size=1 499 | stride=1 500 | pad=1 501 | filters=18 502 | activation=linear 503 | 504 | [yolo] 505 | mask=0,1,2 506 | anchors=10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 507 | classes=1 508 | num=9 509 | jitter=.3 510 | ignore_thresh=.7 511 | truth_thresh=1 512 | scale_x_y=1.2 513 | iou_thresh=0.213 514 | cls_normalizer=1.0 515 | iou_normalizer=0.07 516 | iou_loss=ciou 517 | nms_kind=greedynms 518 | beta_nms=0.6 519 | 520 | [route] 521 | layers=-3 522 | 523 | [convolutional] 524 | batch_normalize=1 525 | filters=80 526 | size=3 527 | stride=2 528 | pad=1 529 | activation=SiLU 530 | 531 | [route] 532 | layers=-1,-14 533 | 534 | [convolutional] 535 | batch_normalize=1 536 | filters=40 537 | size=1 538 | stride=1 539 | pad=1 540 | activation=SiLU 541 | 542 | [route] 543 | layers=-2 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=72 548 | size=1 549 | stride=1 550 | pad=1 551 | activation=SiLU 552 | 553 | [convolutional] 554 | batch_normalize=1 555 | filters=48 556 | size=1 557 | stride=1 558 | pad=1 559 | activation=SiLU 560 | 561 | [convolutional] 562 | batch_normalize=1 563 | filters=72 564 | size=3 565 | stride=1 566 | pad=1 567 | activation=SiLU 568 | 569 | [route] 570 | layers=-1,-5 571 | 572 | [convolutional] 573 | batch_normalize=1 574 | filters=152 575 | size=1 576 | stride=1 577 | pad=1 578 | activation=SiLU 579 | 580 | [convolutional] 581 | size=1 582 | stride=1 583 | pad=1 584 | filters=18 585 | activation=linear 586 | 587 | [yolo] 588 | mask=3,4,5 589 | anchors=10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 590 | classes=1 591 | num=9 592 | jitter=.3 593 | ignore_thresh=.7 594 | truth_thresh=1 595 | scale_x_y=1.2 596 | iou_thresh=0.213 597 | cls_normalizer=1.0 598 | iou_normalizer=0.07 599 | iou_loss=ciou 600 | nms_kind=greedynms 601 | beta_nms=0.6 602 | 603 | [route] 604 | layers=-3 605 | 606 | [convolutional] 607 | batch_normalize=1 608 | filters=88 609 | size=3 610 | stride=2 611 | pad=1 612 | activation=SiLU 613 | 614 | [route] 615 | layers=-1,-36 616 | 617 | [convolutional] 618 | batch_normalize=1 619 | filters=16 620 | size=1 621 | stride=1 622 | pad=1 623 | activation=SiLU 624 | 625 | [route] 626 | layers=-2 627 | 628 | [convolutional] 629 | batch_normalize=1 630 | filters=32 631 | size=1 632 | stride=1 633 | pad=1 634 | activation=SiLU 635 | 636 | [convolutional] 637 | batch_normalize=1 638 | filters=32 639 | size=1 640 | stride=1 641 | pad=1 642 | activation=SiLU 643 | 644 | [convolutional] 645 | batch_normalize=1 646 | filters=40 647 | size=3 648 | stride=1 649 | pad=1 650 | activation=SiLU 651 | 652 | [route] 653 | layers=-1,-5 654 | 655 | [convolutional] 656 | batch_normalize=1 657 | filters=80 658 | size=1 659 | stride=1 660 | pad=1 661 | activation=SiLU 662 | 663 | [convolutional] 664 | size=1 665 | stride=1 666 | pad=1 667 | filters=18 668 | activation=linear 669 | 670 | [yolo] 671 | mask=6,7,8 672 | anchors=10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 673 | classes=1 674 | num=9 675 | jitter=.3 676 | ignore_thresh=.7 677 | truth_thresh=1 678 | scale_x_y=1.2 679 | iou_thresh=0.213 680 | cls_normalizer=1.0 681 | iou_normalizer=0.07 682 | iou_loss=ciou 683 | nms_kind=greedynms 684 | beta_nms=0.6 685 | 686 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |   4 | 5 | ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) 6 | 7 | This repository represents Ultralytics open-source research into future object detection methods, and incorporates lessons learned and best practices evolved over thousands of hours of training and evolution on anonymized client datasets. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk. 8 | 9 | ** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 32, and includes image preprocessing, PyTorch FP16 inference, postprocessing and NMS. EfficientDet data from [google/automl](https://github.com/google/automl) at batch size 8. 10 | 11 | - **January 5, 2021**: [v4.0 release](https://github.com/ultralytics/yolov5/releases/tag/v4.0): nn.SiLU() activations, [Weights & Biases](https://wandb.ai/) logging, [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/) integration. 12 | - **August 13, 2020**: [v3.0 release](https://github.com/ultralytics/yolov5/releases/tag/v3.0): nn.Hardswish() activations, data autodownload, native AMP. 13 | - **July 23, 2020**: [v2.0 release](https://github.com/ultralytics/yolov5/releases/tag/v2.0): improved model definition, training and mAP. 14 | - **June 22, 2020**: [PANet](https://arxiv.org/abs/1803.01534) updates: new heads, reduced parameters, improved speed and mAP [364fcfd](https://github.com/ultralytics/yolov5/commit/364fcfd7dba53f46edd4f04c037a039c0a287972). 15 | - **June 19, 2020**: [FP16](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.half) as new default for smaller checkpoints and faster inference [d4c6674](https://github.com/ultralytics/yolov5/commit/d4c6674c98e19df4c40e33a777610a18d1961145). 16 | 17 | 18 | ## Pretrained Checkpoints 19 | 20 | | Model | size | APval | APtest | AP50 | SpeedV100 | FPSV100 || params | GFLOPS | 21 | |---------- |------ |------ |------ |------ | -------- | ------| ------ |------ | :------: | 22 | | [YOLOv5s](https://github.com/ultralytics/yolov5/releases) |640 |36.8 |36.8 |55.6 |**2.2ms** |**455** ||7.3M |17.0 23 | | [YOLOv5m](https://github.com/ultralytics/yolov5/releases) |640 |44.5 |44.5 |63.1 |2.9ms |345 ||21.4M |51.3 24 | | [YOLOv5l](https://github.com/ultralytics/yolov5/releases) |640 |48.1 |48.1 |66.4 |3.8ms |264 ||47.0M |115.4 25 | | [YOLOv5x](https://github.com/ultralytics/yolov5/releases) |640 |**50.1** |**50.1** |**68.7** |6.0ms |167 ||87.7M |218.8 26 | | | | | | | | || | 27 | | [YOLOv5x](https://github.com/ultralytics/yolov5/releases) + TTA |832 |**51.9** |**51.9** |**69.6** |24.9ms |40 ||87.7M |1005.3 28 | 29 | 33 | 34 | ** APtest denotes COCO [test-dev2017](http://cocodataset.org/#upload) server results, all other AP results denote val2017 accuracy. 35 | ** All AP numbers are for single-model single-scale without ensemble or TTA. **Reproduce mAP** by `python test.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` 36 | ** SpeedGPU averaged over 5000 COCO val2017 images using a GCP [n1-standard-16](https://cloud.google.com/compute/docs/machine-types#n1_standard_machine_types) V100 instance, and includes image preprocessing, FP16 inference, postprocessing and NMS. NMS is 1-2ms/img. **Reproduce speed** by `python test.py --data coco.yaml --img 640 --conf 0.25 --iou 0.45` 37 | ** All checkpoints are trained to 300 epochs with default settings and hyperparameters (no autoaugmentation). 38 | ** Test Time Augmentation ([TTA](https://github.com/ultralytics/yolov5/issues/303)) runs at 3 image sizes. **Reproduce TTA** by `python test.py --data coco.yaml --img 832 --iou 0.65 --augment` 39 | 40 | 41 | ## Requirements 42 | 43 | Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run: 44 | ```bash 45 | $ pip install -r requirements.txt 46 | ``` 47 | 48 | 49 | ## Tutorials 50 | 51 | * [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED 52 | * [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW 53 | * [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) 54 | * [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW 55 | * [ONNX and TorchScript Export](https://github.com/ultralytics/yolov5/issues/251) 56 | * [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) 57 | * [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) 58 | * [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) 59 | * [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) 60 | * [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW 61 | * [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) 62 | 63 | 64 | ## Environments 65 | 66 | YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): 67 | 68 | - **Google Colab Notebook** with free GPU: Open In Colab 69 | - **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5) 70 | - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 71 | - **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker) 72 | 73 | 74 | ## Inference 75 | 76 | detect.py runs inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. 77 | ```bash 78 | $ python detect.py --source 0 # webcam 79 | file.jpg # image 80 | file.mp4 # video 81 | path/ # directory 82 | path/*.jpg # glob 83 | rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa # rtsp stream 84 | rtmp://192.168.1.105/live/test # rtmp stream 85 | http://112.50.243.8/PLTV/88888888/224/3221225900/1.m3u8 # http stream 86 | ``` 87 | 88 | To run inference on example images in `data/images`: 89 | ```bash 90 | $ python detect.py --source data/images --weights yolov5s.pt --conf 0.25 91 | 92 | Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', img_size=640, iou_thres=0.45, save_conf=False, save_dir='runs/detect', save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt']) 93 | Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB) 94 | 95 | Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt to yolov5s.pt... 100%|██████████████| 14.5M/14.5M [00:00<00:00, 21.3MB/s] 96 | 97 | Fusing layers... 98 | Model Summary: 232 layers, 7459581 parameters, 0 gradients 99 | image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s) 100 | image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s) 101 | Results saved to runs/detect/exp 102 | Done. (0.113s) 103 | ``` 104 | 105 | 106 | ### PyTorch Hub 107 | 108 | To run **batched inference** with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36): 109 | ```python 110 | import torch 111 | from PIL import Image 112 | 113 | # Model 114 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) 115 | 116 | # Images 117 | img1 = Image.open('zidane.jpg') 118 | img2 = Image.open('bus.jpg') 119 | imgs = [img1, img2] # batched list of images 120 | 121 | # Inference 122 | result = model(imgs) 123 | ``` 124 | 125 | 126 | ## Training 127 | 128 | Run commands below to reproduce results on [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) dataset (dataset auto-downloads on first use). Training times for YOLOv5s/m/l/x are 2/4/6/8 days on a single V100 (multi-GPU times faster). Use the largest `--batch-size` your GPU allows (batch sizes shown for 16 GB devices). 129 | ```bash 130 | $ python train.py --data coco.yaml --cfg yolov5s.yaml --weights '' --batch-size 64 131 | yolov5m 40 132 | yolov5l 24 133 | yolov5x 16 134 | ``` 135 | 136 | 137 | 138 | ## Citation 139 | 140 | [![DOI](https://zenodo.org/badge/264818686.svg)](https://zenodo.org/badge/latestdoi/264818686) 141 | 142 | 143 | ## About Us 144 | 145 | Ultralytics is a U.S.-based particle physics and AI startup with over 6 years of expertise supporting government, academic and business clients. We offer a wide range of vision AI services, spanning from simple expert advice up to delivery of fully customized, end-to-end production solutions, including: 146 | - **Cloud-based AI** systems operating on **hundreds of HD video streams in realtime.** 147 | - **Edge AI** integrated into custom iOS and Android apps for realtime **30 FPS video inference.** 148 | - **Custom data training**, hyperparameter evolution, and model exportation to any destination. 149 | 150 | For business inquiries and professional support requests please visit us at https://www.ultralytics.com. 151 | 152 | 153 | ## Contact 154 | 155 | **Issues should be raised directly in the repository.** For business inquiries or professional support requests please visit https://www.ultralytics.com or email Glenn Jocher at glenn.jocher@ultralytics.com. 156 | -------------------------------------------------------------------------------- /cfg/yolov5s_v4_hand.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | [focus] 30 | filters=12 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=SiLU 39 | 40 | # Downsample 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=2 46 | pad=1 47 | activation=SiLU 48 | 49 | #C3 50 | [convolutional] 51 | batch_normalize=1 52 | filters=32 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=SiLU 57 | 58 | [route] 59 | layers = -2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=32 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=SiLU 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=32 72 | size=1 73 | stride=1 74 | pad=1 75 | activation=SiLU 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=32 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=SiLU 84 | 85 | [shortcut] 86 | from=-3 87 | activation=linear 88 | 89 | [route] 90 | layers = -1,-6 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=64 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=SiLU 99 | 100 | # Downsample 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=2 106 | pad=1 107 | activation=SiLU 108 | 109 | #C3 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=SiLU 117 | 118 | [route] 119 | layers = -2 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=64 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=SiLU 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=64 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=SiLU 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=SiLU 144 | 145 | [shortcut] 146 | from=-3 147 | activation=linear 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=64 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=SiLU 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=SiLU 164 | 165 | [shortcut] 166 | from=-3 167 | activation=linear 168 | 169 | [convolutional] 170 | batch_normalize=1 171 | filters=64 172 | size=1 173 | stride=1 174 | pad=1 175 | activation=SiLU 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=3 181 | stride=1 182 | pad=1 183 | activation=SiLU 184 | 185 | [shortcut] 186 | from=-3 187 | activation=linear 188 | 189 | [route] 190 | layers = -1,-12 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=128 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=SiLU 199 | 200 | # Downsample 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=3 205 | stride=2 206 | pad=1 207 | activation=SiLU 208 | 209 | #C3 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=1 214 | stride=1 215 | pad=1 216 | activation=SiLU 217 | 218 | [route] 219 | layers = -2 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | filters=128 224 | size=1 225 | stride=1 226 | pad=1 227 | activation=SiLU 228 | 229 | [convolutional] 230 | batch_normalize=1 231 | filters=128 232 | size=1 233 | stride=1 234 | pad=1 235 | activation=SiLU 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=3 241 | stride=1 242 | pad=1 243 | activation=SiLU 244 | 245 | [shortcut] 246 | from=-3 247 | activation=linear 248 | 249 | [convolutional] 250 | batch_normalize=1 251 | filters=128 252 | size=1 253 | stride=1 254 | pad=1 255 | activation=SiLU 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=128 260 | size=3 261 | stride=1 262 | pad=1 263 | activation=SiLU 264 | 265 | [shortcut] 266 | from=-3 267 | activation=linear 268 | 269 | [convolutional] 270 | batch_normalize=1 271 | filters=128 272 | size=1 273 | stride=1 274 | pad=1 275 | activation=SiLU 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=128 280 | size=3 281 | stride=1 282 | pad=1 283 | activation=SiLU 284 | 285 | [shortcut] 286 | from=-3 287 | activation=linear 288 | 289 | [route] 290 | layers = -1,-12 291 | 292 | [convolutional] 293 | batch_normalize=1 294 | filters=256 295 | size=1 296 | stride=1 297 | pad=1 298 | activation=SiLU 299 | 300 | # Downsample 301 | [convolutional] 302 | batch_normalize=1 303 | filters=512 304 | size=3 305 | stride=2 306 | pad=1 307 | activation=SiLU 308 | 309 | [convolutional] 310 | batch_normalize=1 311 | filters=256 312 | size=1 313 | stride=1 314 | pad=1 315 | activation=SiLU 316 | 317 | ### SPP ### 318 | [maxpool] 319 | stride=1 320 | size=5 321 | 322 | [route] 323 | layers=-2 324 | 325 | [maxpool] 326 | stride=1 327 | size=9 328 | 329 | [route] 330 | layers=-4 331 | 332 | [maxpool] 333 | stride=1 334 | size=13 335 | 336 | [route] 337 | ###layers=-1,-3,-5,-6 338 | layers=-6,-5,-3,-1 339 | ### End SPP ### 340 | 341 | [convolutional] 342 | batch_normalize=1 343 | filters=512 344 | size=1 345 | stride=1 346 | pad=1 347 | activation=SiLU 348 | 349 | #C3 350 | [convolutional] 351 | batch_normalize=1 352 | filters=256 353 | size=1 354 | stride=1 355 | pad=1 356 | activation=SiLU 357 | 358 | [route] 359 | layers = -2 360 | 361 | [convolutional] 362 | batch_normalize=1 363 | filters=256 364 | size=1 365 | stride=1 366 | pad=1 367 | activation=SiLU 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=256 372 | size=1 373 | stride=1 374 | pad=1 375 | activation=SiLU 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=3 381 | stride=1 382 | pad=1 383 | activation=SiLU 384 | 385 | [route] 386 | layers = -1,-5 387 | 388 | [convolutional] 389 | batch_normalize=1 390 | filters=512 391 | size=1 392 | stride=1 393 | pad=1 394 | activation=SiLU 395 | 396 | [convolutional] 397 | batch_normalize=1 398 | filters=256 399 | size=1 400 | stride=1 401 | pad=1 402 | activation=SiLU 403 | 404 | [upsample] 405 | stride=2 406 | 407 | [route] 408 | layers = -1,-19 409 | 410 | #C3 411 | [convolutional] 412 | batch_normalize=1 413 | filters=128 414 | size=1 415 | stride=1 416 | pad=1 417 | activation=SiLU 418 | 419 | [route] 420 | layers = -2 421 | 422 | [convolutional] 423 | batch_normalize=1 424 | filters=128 425 | size=1 426 | stride=1 427 | pad=1 428 | activation=SiLU 429 | 430 | [convolutional] 431 | batch_normalize=1 432 | filters=128 433 | size=1 434 | stride=1 435 | pad=1 436 | activation=SiLU 437 | 438 | [convolutional] 439 | batch_normalize=1 440 | filters=128 441 | size=3 442 | stride=1 443 | pad=1 444 | activation=SiLU 445 | 446 | [route] 447 | layers = -1,-5 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=256 452 | size=1 453 | stride=1 454 | pad=1 455 | activation=SiLU 456 | 457 | [convolutional] 458 | batch_normalize=1 459 | filters=128 460 | size=1 461 | stride=1 462 | pad=1 463 | activation=SiLU 464 | 465 | [upsample] 466 | stride=2 467 | 468 | [route] 469 | layers = -1,-44 470 | 471 | #C3 472 | [convolutional] 473 | batch_normalize=1 474 | filters=64 475 | size=1 476 | stride=1 477 | pad=1 478 | activation=SiLU 479 | 480 | [route] 481 | layers = -2 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=64 486 | size=1 487 | stride=1 488 | pad=1 489 | activation=SiLU 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=64 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=SiLU 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=64 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=SiLU 506 | 507 | [route] 508 | layers = -1,-5 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=128 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=SiLU 517 | 518 | ###################### 519 | [convolutional] 520 | size=1 521 | stride=1 522 | pad=1 523 | filters=18 524 | activation=linear 525 | 526 | [yolo] 527 | mask = 0,1,2 528 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 529 | classes=1 530 | num=9 531 | jitter=.3 532 | ignore_thresh = .7 533 | truth_thresh = 1 534 | scale_x_y = 1.2 535 | iou_thresh=0.213 536 | cls_normalizer=1.0 537 | iou_normalizer=0.07 538 | iou_loss=ciou 539 | nms_kind=greedynms 540 | beta_nms=0.6 541 | 542 | [route] 543 | layers = -3 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=128 548 | size=3 549 | stride=2 550 | pad=1 551 | activation=SiLU 552 | 553 | [route] 554 | layers = -1,-14 555 | 556 | #C3 557 | [convolutional] 558 | batch_normalize=1 559 | filters=128 560 | size=1 561 | stride=1 562 | pad=1 563 | activation=SiLU 564 | 565 | [route] 566 | layers = -2 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=128 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=SiLU 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | filters=128 579 | size=1 580 | stride=1 581 | pad=1 582 | activation=SiLU 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=128 587 | size=3 588 | stride=1 589 | pad=1 590 | activation=SiLU 591 | 592 | [route] 593 | layers = -1,-5 594 | 595 | [convolutional] 596 | batch_normalize=1 597 | filters=256 598 | size=1 599 | stride=1 600 | pad=1 601 | activation=SiLU 602 | 603 | ###################### 604 | [convolutional] 605 | size=1 606 | stride=1 607 | pad=1 608 | filters=18 609 | activation=linear 610 | 611 | [yolo] 612 | mask = 3,4,5 613 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 614 | classes=1 615 | num=9 616 | jitter=.3 617 | ignore_thresh = .7 618 | truth_thresh = 1 619 | scale_x_y = 1.2 620 | iou_thresh=0.213 621 | cls_normalizer=1.0 622 | iou_normalizer=0.07 623 | iou_loss=ciou 624 | nms_kind=greedynms 625 | beta_nms=0.6 626 | 627 | [route] 628 | layers = -3 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=256 633 | size=3 634 | stride=2 635 | pad=1 636 | activation=SiLU 637 | 638 | [route] 639 | layers = -1,-36 640 | 641 | #C3 642 | [convolutional] 643 | batch_normalize=1 644 | filters=256 645 | size=1 646 | stride=1 647 | pad=1 648 | activation=SiLU 649 | 650 | [route] 651 | layers = -2 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=SiLU 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | filters=256 664 | size=1 665 | stride=1 666 | pad=1 667 | activation=SiLU 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=3 673 | stride=1 674 | pad=1 675 | activation=SiLU 676 | 677 | [route] 678 | layers = -1,-5 679 | 680 | [convolutional] 681 | batch_normalize=1 682 | filters=512 683 | size=1 684 | stride=1 685 | pad=1 686 | activation=SiLU 687 | 688 | ###################### 689 | [convolutional] 690 | size=1 691 | stride=1 692 | pad=1 693 | filters=18 694 | activation=linear 695 | 696 | [yolo] 697 | mask = 6,7,8 698 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 699 | classes=1 700 | num=9 701 | jitter=.3 702 | ignore_thresh = .7 703 | truth_thresh = 1 704 | scale_x_y = 1.2 705 | iou_thresh=0.213 706 | cls_normalizer=1.0 707 | iou_normalizer=0.07 708 | iou_loss=ciou 709 | nms_kind=greedynms 710 | beta_nms=0.6 711 | 712 | 713 | -------------------------------------------------------------------------------- /cfg/yolov5s_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | [focus] 30 | filters=12 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=SiLU 39 | 40 | # Downsample 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=2 46 | pad=1 47 | activation=SiLU 48 | 49 | #C3 50 | [convolutional] 51 | batch_normalize=1 52 | filters=32 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=SiLU 57 | 58 | [route] 59 | layers = -2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=32 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=SiLU 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=32 72 | size=1 73 | stride=1 74 | pad=1 75 | activation=SiLU 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=32 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=SiLU 84 | 85 | [shortcut] 86 | from=-3 87 | activation=linear 88 | 89 | [route] 90 | layers = -1,-6 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=64 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=SiLU 99 | 100 | # Downsample 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=2 106 | pad=1 107 | activation=SiLU 108 | 109 | #C3 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=SiLU 117 | 118 | [route] 119 | layers = -2 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=64 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=SiLU 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=64 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=SiLU 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=SiLU 144 | 145 | [shortcut] 146 | from=-3 147 | activation=linear 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=64 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=SiLU 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=SiLU 164 | 165 | [shortcut] 166 | from=-3 167 | activation=linear 168 | 169 | [convolutional] 170 | batch_normalize=1 171 | filters=64 172 | size=1 173 | stride=1 174 | pad=1 175 | activation=SiLU 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=3 181 | stride=1 182 | pad=1 183 | activation=SiLU 184 | 185 | [shortcut] 186 | from=-3 187 | activation=linear 188 | 189 | [route] 190 | layers = -1,-12 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=128 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=SiLU 199 | 200 | # Downsample 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=3 205 | stride=2 206 | pad=1 207 | activation=SiLU 208 | 209 | #C3 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=1 214 | stride=1 215 | pad=1 216 | activation=SiLU 217 | 218 | [route] 219 | layers = -2 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | filters=128 224 | size=1 225 | stride=1 226 | pad=1 227 | activation=SiLU 228 | 229 | [convolutional] 230 | batch_normalize=1 231 | filters=128 232 | size=1 233 | stride=1 234 | pad=1 235 | activation=SiLU 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=3 241 | stride=1 242 | pad=1 243 | activation=SiLU 244 | 245 | [shortcut] 246 | from=-3 247 | activation=linear 248 | 249 | [convolutional] 250 | batch_normalize=1 251 | filters=128 252 | size=1 253 | stride=1 254 | pad=1 255 | activation=SiLU 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=128 260 | size=3 261 | stride=1 262 | pad=1 263 | activation=SiLU 264 | 265 | [shortcut] 266 | from=-3 267 | activation=linear 268 | 269 | [convolutional] 270 | batch_normalize=1 271 | filters=128 272 | size=1 273 | stride=1 274 | pad=1 275 | activation=SiLU 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=128 280 | size=3 281 | stride=1 282 | pad=1 283 | activation=SiLU 284 | 285 | [shortcut] 286 | from=-3 287 | activation=linear 288 | 289 | [route] 290 | layers = -1,-12 291 | 292 | [convolutional] 293 | batch_normalize=1 294 | filters=256 295 | size=1 296 | stride=1 297 | pad=1 298 | activation=SiLU 299 | 300 | # Downsample 301 | [convolutional] 302 | batch_normalize=1 303 | filters=512 304 | size=3 305 | stride=2 306 | pad=1 307 | activation=SiLU 308 | 309 | [convolutional] 310 | batch_normalize=1 311 | filters=256 312 | size=1 313 | stride=1 314 | pad=1 315 | activation=SiLU 316 | 317 | ### SPP ### 318 | [maxpool] 319 | stride=1 320 | size=5 321 | 322 | [route] 323 | layers=-2 324 | 325 | [maxpool] 326 | stride=1 327 | size=9 328 | 329 | [route] 330 | layers=-4 331 | 332 | [maxpool] 333 | stride=1 334 | size=13 335 | 336 | [route] 337 | ###layers=-1,-3,-5,-6 338 | layers=-6,-5,-3,-1 339 | ### End SPP ### 340 | 341 | [convolutional] 342 | batch_normalize=1 343 | filters=512 344 | size=1 345 | stride=1 346 | pad=1 347 | activation=SiLU 348 | 349 | #C3 350 | [convolutional] 351 | batch_normalize=1 352 | filters=256 353 | size=1 354 | stride=1 355 | pad=1 356 | activation=SiLU 357 | 358 | [route] 359 | layers = -2 360 | 361 | [convolutional] 362 | batch_normalize=1 363 | filters=256 364 | size=1 365 | stride=1 366 | pad=1 367 | activation=SiLU 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=256 372 | size=1 373 | stride=1 374 | pad=1 375 | activation=SiLU 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=3 381 | stride=1 382 | pad=1 383 | activation=SiLU 384 | 385 | [route] 386 | layers = -1,-5 387 | 388 | [convolutional] 389 | batch_normalize=1 390 | filters=512 391 | size=1 392 | stride=1 393 | pad=1 394 | activation=SiLU 395 | 396 | [convolutional] 397 | batch_normalize=1 398 | filters=256 399 | size=1 400 | stride=1 401 | pad=1 402 | activation=SiLU 403 | 404 | [upsample] 405 | stride=2 406 | 407 | [route] 408 | layers = -1,-19 409 | 410 | #C3 411 | [convolutional] 412 | batch_normalize=1 413 | filters=128 414 | size=1 415 | stride=1 416 | pad=1 417 | activation=SiLU 418 | 419 | [route] 420 | layers = -2 421 | 422 | [convolutional] 423 | batch_normalize=1 424 | filters=128 425 | size=1 426 | stride=1 427 | pad=1 428 | activation=SiLU 429 | 430 | [convolutional] 431 | batch_normalize=1 432 | filters=128 433 | size=1 434 | stride=1 435 | pad=1 436 | activation=SiLU 437 | 438 | [convolutional] 439 | batch_normalize=1 440 | filters=128 441 | size=3 442 | stride=1 443 | pad=1 444 | activation=SiLU 445 | 446 | [route] 447 | layers = -1,-5 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=256 452 | size=1 453 | stride=1 454 | pad=1 455 | activation=SiLU 456 | 457 | [convolutional] 458 | batch_normalize=1 459 | filters=128 460 | size=1 461 | stride=1 462 | pad=1 463 | activation=SiLU 464 | 465 | [upsample] 466 | stride=2 467 | 468 | [route] 469 | layers = -1,-44 470 | 471 | #C3 472 | [convolutional] 473 | batch_normalize=1 474 | filters=64 475 | size=1 476 | stride=1 477 | pad=1 478 | activation=SiLU 479 | 480 | [route] 481 | layers = -2 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=64 486 | size=1 487 | stride=1 488 | pad=1 489 | activation=SiLU 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=64 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=SiLU 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=64 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=SiLU 506 | 507 | [route] 508 | layers = -1,-5 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=128 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=SiLU 517 | 518 | ###################### 519 | [convolutional] 520 | size=1 521 | stride=1 522 | pad=1 523 | filters=21 524 | activation=linear 525 | 526 | [yolo] 527 | mask = 0,1,2 528 | anchors = 40, 39, 51, 50, 61, 59, 75, 69, 62, 92, 88, 98, 115, 77, 93, 129, 128, 115 529 | classes=2 530 | num=9 531 | jitter=.3 532 | ignore_thresh = .7 533 | truth_thresh = 1 534 | scale_x_y = 1.2 535 | iou_thresh=0.213 536 | cls_normalizer=1.0 537 | iou_normalizer=0.07 538 | iou_loss=ciou 539 | nms_kind=greedynms 540 | beta_nms=0.6 541 | 542 | [route] 543 | layers = -3 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=128 548 | size=3 549 | stride=2 550 | pad=1 551 | activation=SiLU 552 | 553 | [route] 554 | layers = -1,-14 555 | 556 | #C3 557 | [convolutional] 558 | batch_normalize=1 559 | filters=128 560 | size=1 561 | stride=1 562 | pad=1 563 | activation=SiLU 564 | 565 | [route] 566 | layers = -2 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=128 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=SiLU 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | filters=128 579 | size=1 580 | stride=1 581 | pad=1 582 | activation=SiLU 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=128 587 | size=3 588 | stride=1 589 | pad=1 590 | activation=SiLU 591 | 592 | [route] 593 | layers = -1,-5 594 | 595 | [convolutional] 596 | batch_normalize=1 597 | filters=256 598 | size=1 599 | stride=1 600 | pad=1 601 | activation=SiLU 602 | 603 | ###################### 604 | [convolutional] 605 | size=1 606 | stride=1 607 | pad=1 608 | filters=21 609 | activation=linear 610 | 611 | [yolo] 612 | mask = 3,4,5 613 | anchors = 40, 39, 51, 50, 61, 59, 75, 69, 62, 92, 88, 98, 115, 77, 93, 129, 128, 115 614 | classes=2 615 | num=9 616 | jitter=.3 617 | ignore_thresh = .7 618 | truth_thresh = 1 619 | scale_x_y = 1.2 620 | iou_thresh=0.213 621 | cls_normalizer=1.0 622 | iou_normalizer=0.07 623 | iou_loss=ciou 624 | nms_kind=greedynms 625 | beta_nms=0.6 626 | 627 | [route] 628 | layers = -3 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=256 633 | size=3 634 | stride=2 635 | pad=1 636 | activation=SiLU 637 | 638 | [route] 639 | layers = -1,-36 640 | 641 | #C3 642 | [convolutional] 643 | batch_normalize=1 644 | filters=256 645 | size=1 646 | stride=1 647 | pad=1 648 | activation=SiLU 649 | 650 | [route] 651 | layers = -2 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=SiLU 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | filters=256 664 | size=1 665 | stride=1 666 | pad=1 667 | activation=SiLU 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=3 673 | stride=1 674 | pad=1 675 | activation=SiLU 676 | 677 | [route] 678 | layers = -1,-5 679 | 680 | [convolutional] 681 | batch_normalize=1 682 | filters=512 683 | size=1 684 | stride=1 685 | pad=1 686 | activation=SiLU 687 | 688 | ###################### 689 | [convolutional] 690 | size=1 691 | stride=1 692 | pad=1 693 | filters=21 694 | activation=linear 695 | 696 | [yolo] 697 | mask = 6,7,8 698 | anchors = 40, 39, 51, 50, 61, 59, 75, 69, 62, 92, 88, 98, 115, 77, 93, 129, 128, 115 699 | classes=2 700 | num=9 701 | jitter=.3 702 | ignore_thresh = .7 703 | truth_thresh = 1 704 | scale_x_y = 1.2 705 | iou_thresh=0.213 706 | cls_normalizer=1.0 707 | iou_normalizer=0.07 708 | iou_loss=ciou 709 | nms_kind=greedynms 710 | beta_nms=0.6 711 | 712 | 713 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # PyTorch utils 2 | 3 | import logging 4 | import math 5 | import os 6 | import time 7 | from contextlib import contextmanager 8 | from copy import deepcopy 9 | 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torchvision 15 | 16 | try: 17 | import thop # for FLOPS computation 18 | except ImportError: 19 | thop = None 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | @contextmanager 24 | def torch_distributed_zero_first(local_rank: int): 25 | """ 26 | Decorator to make all processes in distributed training wait for each local_master to do something. 27 | """ 28 | if local_rank not in [-1, 0]: 29 | torch.distributed.barrier() 30 | yield 31 | if local_rank == 0: 32 | torch.distributed.barrier() 33 | 34 | 35 | def init_torch_seeds(seed=0): 36 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 37 | torch.manual_seed(seed) 38 | if seed == 0: # slower, more reproducible 39 | cudnn.benchmark, cudnn.deterministic = False, True 40 | else: # faster, less reproducible 41 | cudnn.benchmark, cudnn.deterministic = True, False 42 | 43 | 44 | def select_device(device='', batch_size=None): 45 | # device = 'cpu' or '0' or '0,1,2,3' 46 | s = f'Using torch {torch.__version__} ' # string 47 | cpu = device.lower() == 'cpu' 48 | if cpu: 49 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 50 | elif device: # non-cpu device requested 51 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 52 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 53 | 54 | cuda = torch.cuda.is_available() and not cpu 55 | if cuda: 56 | n = torch.cuda.device_count() 57 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 58 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 59 | space = ' ' * len(s) 60 | for i, d in enumerate(device.split(',') if device else range(n)): 61 | p = torch.cuda.get_device_properties(i) 62 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 63 | else: 64 | s += 'CPU' 65 | 66 | logger.info(f'{s}\n') # skip a line 67 | return torch.device('cuda:0' if cuda else 'cpu') 68 | 69 | 70 | def time_synchronized(): 71 | # pytorch-accurate time 72 | if torch.cuda.is_available(): 73 | torch.cuda.synchronize() 74 | return time.time() 75 | 76 | 77 | def profile(x, ops, n=100, device=None): 78 | # profile a pytorch module or list of modules. Example usage: 79 | # x = torch.randn(16, 3, 640, 640) # input 80 | # m1 = lambda x: x * torch.sigmoid(x) 81 | # m2 = nn.SiLU() 82 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 83 | 84 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 85 | x = x.to(device) 86 | x.requires_grad = True 87 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 88 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 89 | for m in ops if isinstance(ops, list) else [ops]: 90 | m = m.to(device) if hasattr(m, 'to') else m # device 91 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 92 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 93 | try: 94 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 95 | except: 96 | flops = 0 97 | 98 | for _ in range(n): 99 | t[0] = time_synchronized() 100 | y = m(x) 101 | t[1] = time_synchronized() 102 | try: 103 | _ = y.sum().backward() 104 | t[2] = time_synchronized() 105 | except: # no backward method 106 | t[2] = float('nan') 107 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 108 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 109 | 110 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 111 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 112 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 113 | print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 114 | 115 | 116 | def is_parallel(model): 117 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 118 | 119 | 120 | def intersect_dicts(da, db, exclude=()): 121 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 122 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 123 | 124 | 125 | def initialize_weights(model): 126 | for m in model.modules(): 127 | t = type(m) 128 | if t is nn.Conv2d: 129 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 130 | elif t is nn.BatchNorm2d: 131 | m.eps = 1e-3 132 | m.momentum = 0.03 133 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 134 | m.inplace = True 135 | 136 | 137 | def find_modules(model, mclass=nn.Conv2d): 138 | # Finds layer indices matching module class 'mclass' 139 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 140 | 141 | 142 | def sparsity(model): 143 | # Return global model sparsity 144 | a, b = 0., 0. 145 | for p in model.parameters(): 146 | a += p.numel() 147 | b += (p == 0).sum() 148 | return b / a 149 | 150 | 151 | def prune(model, amount=0.3): 152 | # Prune model to requested global sparsity 153 | import torch.nn.utils.prune as prune 154 | print('Pruning model... ', end='') 155 | for name, m in model.named_modules(): 156 | if isinstance(m, nn.Conv2d): 157 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 158 | prune.remove(m, 'weight') # make permanent 159 | print(' %.3g global sparsity' % sparsity(model)) 160 | 161 | 162 | def fuse_conv_and_bn(conv, bn): 163 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 164 | fusedconv = nn.Conv2d(conv.in_channels, 165 | conv.out_channels, 166 | kernel_size=conv.kernel_size, 167 | stride=conv.stride, 168 | padding=conv.padding, 169 | groups=conv.groups, 170 | bias=True).requires_grad_(False).to(conv.weight.device) 171 | 172 | # prepare filters 173 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 174 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 175 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 176 | 177 | # prepare spatial bias 178 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 179 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 180 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 181 | 182 | return fusedconv 183 | 184 | 185 | def model_info(model, verbose=False, img_size=640): 186 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 187 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 188 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 189 | if verbose: 190 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 191 | for i, (name, p) in enumerate(model.named_parameters()): 192 | name = name.replace('module_list.', '') 193 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 194 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 195 | 196 | try: # FLOPS 197 | from thop import profile 198 | stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 199 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 200 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 201 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 202 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 203 | except (ImportError, Exception): 204 | fs = '' 205 | 206 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 207 | 208 | 209 | def load_classifier(name='resnet101', n=2): 210 | # Loads a pretrained model reshaped to n-class output 211 | model = torchvision.models.__dict__[name](pretrained=True) 212 | 213 | # ResNet model properties 214 | # input_size = [3, 224, 224] 215 | # input_space = 'RGB' 216 | # input_range = [0, 1] 217 | # mean = [0.485, 0.456, 0.406] 218 | # std = [0.229, 0.224, 0.225] 219 | 220 | # Reshape output to n classes 221 | filters = model.fc.weight.shape[1] 222 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 223 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 224 | model.fc.out_features = n 225 | return model 226 | 227 | 228 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 229 | # scales img(bs,3,y,x) by ratio constrained to gs-multiple 230 | if ratio == 1.0: 231 | return img 232 | else: 233 | h, w = img.shape[2:] 234 | s = (int(h * ratio), int(w * ratio)) # new size 235 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 236 | if not same_shape: # pad/crop img 237 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 238 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 239 | 240 | 241 | def copy_attr(a, b, include=(), exclude=()): 242 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 243 | for k, v in b.__dict__.items(): 244 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 245 | continue 246 | else: 247 | setattr(a, k, v) 248 | 249 | 250 | class ModelEMA: 251 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 252 | Keep a moving average of everything in the model state_dict (parameters and buffers). 253 | This is intended to allow functionality like 254 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 255 | A smoothed version of the weights is necessary for some training schemes to perform well. 256 | This class is sensitive where it is initialized in the sequence of model init, 257 | GPU assignment and distributed training wrappers. 258 | """ 259 | 260 | def __init__(self, model, decay=0.9999, updates=0): 261 | # Create EMA 262 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 263 | # if next(model.parameters()).device.type != 'cpu': 264 | # self.ema.half() # FP16 EMA 265 | self.updates = updates # number of EMA updates 266 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 267 | for p in self.ema.parameters(): 268 | p.requires_grad_(False) 269 | 270 | def update(self, model): 271 | # Update EMA parameters 272 | with torch.no_grad(): 273 | self.updates += 1 274 | d = self.decay(self.updates) 275 | 276 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 277 | for k, v in self.ema.state_dict().items(): 278 | if v.dtype.is_floating_point: 279 | v *= d 280 | v += (1. - d) * msd[k].detach() 281 | 282 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 283 | # Update EMA attributes 284 | copy_attr(self.ema, model, include, exclude) 285 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | from copy import deepcopy 5 | from pathlib import Path 6 | 7 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 8 | logger = logging.getLogger(__name__) 9 | 10 | from models.common import * 11 | from models.experimental import MixConv2d, CrossConv, GhostBottleneck 12 | from utils.autoanchor import check_anchor_order 13 | from utils.general import make_divisible, check_file, set_logging 14 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 15 | select_device, copy_attr 16 | 17 | try: 18 | import thop # for FLOPS computation 19 | except ImportError: 20 | thop = None 21 | 22 | 23 | class Detect(nn.Module): 24 | stride = None # strides computed during build 25 | export = False # onnx export 26 | 27 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 28 | super(Detect, self).__init__() 29 | self.nc = nc # number of classes 30 | self.no = nc + 5 # number of outputs per anchor 31 | self.nl = len(anchors) # number of detection layers 32 | self.na = len(anchors[0]) // 2 # number of anchors 33 | self.grid = [torch.zeros(1)] * self.nl # init grid 34 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 35 | self.register_buffer('anchors', a) # shape(nl,na,2) 36 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 37 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 38 | 39 | def forward(self, x): 40 | # x = x.copy() # for profiling 41 | z = [] # inference output 42 | self.training |= self.export 43 | for i in range(self.nl): 44 | x[i] = self.m[i](x[i]) # conv 45 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 46 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 47 | 48 | if not self.training: # inference 49 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 50 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 51 | 52 | y = x[i].sigmoid() 53 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 54 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 55 | z.append(y.view(bs, -1, self.no)) 56 | 57 | return x if self.training else (torch.cat(z, 1), x) 58 | 59 | @staticmethod 60 | def _make_grid(nx=20, ny=20): 61 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 62 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 63 | 64 | 65 | class Model(nn.Module): 66 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 67 | super(Model, self).__init__() 68 | if isinstance(cfg, dict): 69 | self.yaml = cfg # model dict 70 | else: # is *.yaml 71 | import yaml # for torch hub 72 | self.yaml_file = Path(cfg).name 73 | with open(cfg) as f: 74 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 75 | 76 | # Define model 77 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 78 | if nc and nc != self.yaml['nc']: 79 | logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 80 | self.yaml['nc'] = nc # override yaml value 81 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 82 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 83 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 84 | 85 | # Build strides, anchors 86 | m = self.model[-1] # Detect() 87 | if isinstance(m, Detect): 88 | s = 256 # 2x min stride 89 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 90 | m.anchors /= m.stride.view(-1, 1, 1) 91 | check_anchor_order(m) 92 | self.stride = m.stride 93 | self._initialize_biases() # only run once 94 | # print('Strides: %s' % m.stride.tolist()) 95 | 96 | # Init weights, biases 97 | initialize_weights(self) 98 | self.info() 99 | logger.info('') 100 | 101 | def forward(self, x, augment=False, profile=False): 102 | if augment: 103 | img_size = x.shape[-2:] # height, width 104 | s = [1, 0.83, 0.67] # scales 105 | f = [None, 3, None] # flips (2-ud, 3-lr) 106 | y = [] # outputs 107 | for si, fi in zip(s, f): 108 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 109 | yi = self.forward_once(xi)[0] # forward 110 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 111 | yi[..., :4] /= si # de-scale 112 | if fi == 2: 113 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 114 | elif fi == 3: 115 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 116 | y.append(yi) 117 | return torch.cat(y, 1), None # augmented inference, train 118 | else: 119 | return self.forward_once(x, profile) # single-scale inference, train 120 | 121 | def forward_once(self, x, profile=False): 122 | y, dt = [], [] # outputs 123 | for m in self.model: 124 | # show network 125 | # print(x.shape) 126 | # print(type(m)) 127 | if m.f != -1: # if not from previous layer 128 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 129 | 130 | if profile: 131 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS 132 | t = time_synchronized() 133 | for _ in range(10): 134 | _ = m(x) 135 | dt.append((time_synchronized() - t) * 100) 136 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 137 | 138 | x = m(x) # run 139 | y.append(x if m.i in self.save else None) # save output 140 | 141 | if profile: 142 | print('%.1fms total' % sum(dt)) 143 | return x 144 | 145 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 146 | # https://arxiv.org/abs/1708.02002 section 3.3 147 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 148 | m = self.model[-1] # Detect() module 149 | for mi, s in zip(m.m, m.stride): # from 150 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 151 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 152 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 153 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 154 | 155 | def _print_biases(self): 156 | m = self.model[-1] # Detect() module 157 | for mi in m.m: # from 158 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 159 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 160 | 161 | # def _print_weights(self): 162 | # for m in self.model.modules(): 163 | # if type(m) is Bottleneck: 164 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 165 | 166 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 167 | print('Fusing layers... ') 168 | for m in self.model.modules(): 169 | if type(m) is Conv and hasattr(m, 'bn'): 170 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 171 | delattr(m, 'bn') # remove batchnorm 172 | m.forward = m.fuseforward # update forward 173 | self.info() 174 | return self 175 | 176 | def nms(self, mode=True): # add or remove NMS module 177 | present = type(self.model[-1]) is NMS # last layer is NMS 178 | if mode and not present: 179 | print('Adding NMS... ') 180 | m = NMS() # module 181 | m.f = -1 # from 182 | m.i = self.model[-1].i + 1 # index 183 | self.model.add_module(name='%s' % m.i, module=m) # add 184 | self.eval() 185 | elif not mode and present: 186 | print('Removing NMS... ') 187 | self.model = self.model[:-1] # remove 188 | return self 189 | 190 | def autoshape(self): # add autoShape module 191 | print('Adding autoShape... ') 192 | m = autoShape(self) # wrap model 193 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 194 | return m 195 | 196 | def info(self, verbose=False, img_size=640): # print model information 197 | model_info(self, verbose, img_size) 198 | 199 | 200 | def parse_model(d, ch): # model_dict, input_channels(3) 201 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 202 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 203 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 204 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 205 | 206 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 207 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 208 | m = eval(m) if isinstance(m, str) else m # eval strings 209 | for j, a in enumerate(args): 210 | try: 211 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 212 | except: 213 | pass 214 | 215 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 216 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3,Ctiny,GhostBottleneck,InvertedResidual,nn.ConvTranspose2d]: 217 | c1, c2 = ch[f], args[0] 218 | 219 | if m in [Conv] and args[-1]==nn.LeakyReLU: 220 | args[-1]=nn.LeakyReLU(0.1, inplace=True) 221 | 222 | # Normal 223 | # if i > 0 and args[0] != no: # channel expansion factor 224 | # ex = 1.75 # exponential (default 2.0) 225 | # e = math.log(c2 / ch[1]) / math.log(2) 226 | # c2 = int(ch[1] * ex ** e) 227 | # if m != Focus: 228 | 229 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 230 | 231 | # Experimental 232 | # if i > 0 and args[0] != no: # channel expansion factor 233 | # ex = 1 + gw # exponential (default 2.0) 234 | # ch1 = 32 # ch[1] 235 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 236 | # c2 = int(ch1 * ex ** e) 237 | # if m != Focus: 238 | # c2 = make_divisible(c2, 8) if c2 != no else c2 239 | 240 | args = [c1, c2, *args[1:]] 241 | if m in [BottleneckCSP, C3,Ctiny,GhostBottleneck]: 242 | args.insert(2, n) 243 | n = 1 244 | elif m is nn.BatchNorm2d: 245 | args = [ch[f]] 246 | elif m is Concat: 247 | c2 = sum([ch[x if x < 0 else x + 1] for x in f]) 248 | elif m is Detect: 249 | args.append([ch[x + 1] for x in f]) 250 | if isinstance(args[1], int): # number of anchors 251 | args[1] = [list(range(args[1] * 2))] * len(f) 252 | elif m is Contract: 253 | c2 = ch[f if f < 0 else f + 1] * args[0] ** 2 254 | elif m is Expand: 255 | c2 = ch[f if f < 0 else f + 1] // args[0] ** 2 256 | else: 257 | c2 = ch[f if f < 0 else f + 1] 258 | 259 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 260 | t = str(m)[8:-2].replace('__main__.', '') # module type 261 | np = sum([x.numel() for x in m_.parameters()]) # number params 262 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 263 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 264 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 265 | layers.append(m_) 266 | ch.append(c2) 267 | return nn.Sequential(*layers), sorted(save) 268 | 269 | 270 | if __name__ == '__main__': 271 | parser = argparse.ArgumentParser() 272 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 273 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 274 | opt = parser.parse_args() 275 | opt.cfg = check_file(opt.cfg) # check file 276 | set_logging() 277 | device = select_device(opt.device) 278 | 279 | # Create model 280 | model = Model(opt.cfg).to(device) 281 | model.train() 282 | 283 | # Profile 284 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 285 | # y = model(img, profile=True) 286 | 287 | # Tensorboard 288 | # from torch.utils.tensorboard import SummaryWriter 289 | # tb_writer = SummaryWriter() 290 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 291 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 292 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 293 | --------------------------------------------------------------------------------