├── images ├── res.jpg ├── call3.jpg └── zidane.jpg ├── get_small_script ├── to_jit_cpu.py ├── to_jit_gpu.py ├── vis_pruned_model.py ├── cal_model_flops.py └── get_small_model.py ├── data ├── voc.yaml ├── hyp.finetune.yaml └── hyp.scratch.yaml ├── requirements.txt ├── weights └── copy_weight.py ├── VOC2012 ├── step1_split_data.py └── step2_voc_label.py ├── models ├── yolov5l.yaml ├── experimental.py ├── common.py └── yolo.py ├── readme.md ├── utils └── torch_utils.py ├── model.py ├── loss.py ├── small_model_mod.py ├── test.py ├── detector_cpu.py ├── detector_gpu.py ├── train.py └── train_prune_sfp.py /images/res.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xhwNobody/yolov5_prune_sfp/HEAD/images/res.jpg -------------------------------------------------------------------------------- /images/call3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xhwNobody/yolov5_prune_sfp/HEAD/images/call3.jpg -------------------------------------------------------------------------------- /images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xhwNobody/yolov5_prune_sfp/HEAD/images/zidane.jpg -------------------------------------------------------------------------------- /get_small_script/to_jit_cpu.py: -------------------------------------------------------------------------------- 1 | import sys,os 2 | sys.path.append('../') 3 | import torch 4 | model = torch.load('best.pt', map_location='cpu')['model'].float() 5 | #print(model.keys()) 6 | model.eval() 7 | input_shape = torch.rand([1, 3, 640, 640]) 8 | input_shape = input_shape#.cuda() 9 | model(input_shape) 10 | torch.jit.trace(model, input_shape).save('best_cpu.torchscript.pt') 11 | 12 | -------------------------------------------------------------------------------- /data/voc.yaml: -------------------------------------------------------------------------------- 1 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 2 | train: /root/xhw/yolov5-3.1-self/paper_voc2007_2012/train.txt # 118287 images 3 | val: /root/xhw/yolov5-3.1-self/paper_voc2007_2012/val.txt # 5000 images 4 | #test: /root/xhw/yolov5-3.1-self/paper_voc2007/val.txt # 5000 images 5 | 6 | # number of classes 7 | nc: 20 8 | 9 | # class names 10 | names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 11 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] 12 | 13 | 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | # base ---------------------------------------- 4 | Cython 5 | matplotlib>=3.2.2 6 | numpy>=1.18.5 7 | opencv-python>=4.1.2 8 | pillow 9 | PyYAML>=5.3 10 | scipy>=1.4.1 11 | tensorboard>=2.2 12 | torch>=1.6.0 13 | torchvision>=0.7.0 14 | tqdm>=4.41.0 15 | 16 | # coco ---------------------------------------- 17 | # pycocotools>=2.0 18 | 19 | # export -------------------------------------- 20 | # packaging # for coremltools 21 | # coremltools==4.0 22 | # onnx>=1.7.0 23 | # scikit-learn==0.19.2 # for coreml quantization 24 | 25 | # extras -------------------------------------- 26 | # thop # FLOPS computation 27 | # seaborn # plotting 28 | -------------------------------------------------------------------------------- /get_small_script/to_jit_gpu.py: -------------------------------------------------------------------------------- 1 | import sys,os 2 | sys.path.append('../') 3 | import torch 4 | 5 | model = torch.load('best.pt', map_location='cuda')['model'].float() 6 | #print(model.keys()) 7 | model.eval() 8 | input_shape = torch.rand([1, 3, 640, 640]) 9 | input_shape = input_shape.cuda() 10 | model(input_shape) 11 | torch.jit.trace(model, input_shape).save('best.torchscript.pt') 12 | 13 | model = torch.load('small_model_all.pt', map_location='cuda')['model'].float() 14 | #print(model.keys()) 15 | model.eval() 16 | input_shape = torch.rand([1, 3, 640, 640]) 17 | input_shape = input_shape.cuda() 18 | model(input_shape) 19 | torch.jit.trace(model, input_shape).save('small_model_all.torchscript.pt') 20 | 21 | 22 | -------------------------------------------------------------------------------- /weights/copy_weight.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import sys,os 3 | sys.path.append('../') 4 | from model import Model 5 | 6 | model_src = torch.load('yolov5l.pt')#['model'] 7 | model_src = model_src['model'] 8 | model_src_dict = model_src.state_dict() 9 | 10 | model_dst = Model(nc=80) 11 | model_dst_dict = model_dst.state_dict() 12 | model_dst.load_state_dict(model_dst_dict) 13 | 14 | model_dst_dict_list = list(model_dst_dict) 15 | 16 | for ind, (key, value) in enumerate(model_src_dict.items()): 17 | 18 | model_dst_dict[model_dst_dict_list[ind]] = value 19 | 20 | model_dst.load_state_dict(model_dst_dict) 21 | ckpt = {'epoch': -1, 22 | 'best_fitness': None, 23 | 'training_results': None, 24 | 'model': model_dst, 25 | 'optimizer': None } 26 | torch.save(ckpt,'pretrained.pt') -------------------------------------------------------------------------------- /data/hyp.finetune.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for VOC finetuning 2 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | # Hyperparameter Evolution Results 7 | # Generations: 306 8 | # P R mAP.5 mAP.5:.95 box obj cls 9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 10 | 11 | lr0: 0.0032 12 | lrf: 0.12 13 | momentum: 0.843 14 | weight_decay: 0.00036 15 | warmup_epochs: 2.0 16 | warmup_momentum: 0.5 17 | warmup_bias_lr: 0.05 18 | box: 0.0296 19 | cls: 0.243 20 | cls_pw: 0.631 21 | obj: 0.301 22 | obj_pw: 0.911 23 | iou_t: 0.2 24 | anchor_t: 2.91 25 | # anchors: 3.63 26 | fl_gamma: 0.0 27 | hsv_h: 0.0138 28 | hsv_s: 0.664 29 | hsv_v: 0.464 30 | degrees: 0.373 31 | translate: 0.245 32 | scale: 0.898 33 | shear: 0.602 34 | perspective: 0.0 35 | flipud: 0.00856 36 | fliplr: 0.5 37 | mosaic: 1.0 38 | mixup: 0.243 39 | -------------------------------------------------------------------------------- /get_small_script/vis_pruned_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import sys,os 4 | sys.path.append('../') 5 | # print(model['model'].parameters()) 6 | # for index, item in enumerate(model['model'].named_modules()): 7 | # print(index, item) 8 | 9 | # i = 0 10 | # for index, item in enumerate(model['model'].parameters()): 11 | # 12 | # i+=1 13 | # print(i) 14 | 15 | # model_state_dict = model['model'].state_dict() 16 | # model_state_dict = model.state_dict() 17 | 18 | # model_state_dict = model['model'].state_dict() 19 | # for index, [key, value] in enumerate(model_state_dict.items()): 20 | # print(index, key, value.shape) 21 | 22 | big_model = torch.load('big_model.pt') 23 | # print(big_model.keys()) 24 | small_model = torch.load('small_model.pt') 25 | big_model_state_dict = big_model.state_dict() 26 | small_model_state_dict = small_model.state_dict() 27 | ind = 0 28 | for index, [key, value] in enumerate(big_model_state_dict.items()): 29 | if 'bn.running_mean' not in key and 'bn.running_var' not in key and 'bn.num_batches_tracked' not in key: 30 | print(ind, index, key, value.shape, small_model_state_dict[key].shape) 31 | ind += 1 32 | 33 | # print(big_model_state_dict['backbone_self.csp2.cv4.bn.weight']) 34 | # print(small_model_state_dict['backbone_self.csp2.cv4.bn.weight']) 35 | # 36 | # print(big_model_state_dict['backbone_self.csp2.cv4.bn.bias']) 37 | # print(small_model_state_dict['backbone_self.csp2.cv4.bn.bias']) 38 | -------------------------------------------------------------------------------- /VOC2012/step1_split_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import random 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | #xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下 8 | parser.add_argument('--xml_path', default='Annotations', type=str, help='input xml label path') 9 | #数据集的划分,地址选择自己数据下的ImageSets/Main 10 | parser.add_argument('--txt_path', default='ImageSets/Main', type=str, help='output txt label path') 11 | opt = parser.parse_args() 12 | 13 | trainval_percent = 0.95 14 | train_percent = 0.8 15 | xmlfilepath = opt.xml_path 16 | txtsavepath = opt.txt_path 17 | total_xml = os.listdir(xmlfilepath) 18 | if not os.path.exists(txtsavepath): 19 | os.makedirs(txtsavepath) 20 | 21 | num = len(total_xml) 22 | list_index = range(num) 23 | tv = int(num * trainval_percent) 24 | tr = int(tv * train_percent) 25 | trainval = random.sample(list_index, tv) 26 | train = random.sample(trainval, tr) 27 | 28 | file_trainval = open(txtsavepath + '/trainval.txt', 'w') 29 | file_test = open(txtsavepath + '/test.txt', 'w') 30 | file_train = open(txtsavepath + '/train.txt', 'w') 31 | file_val = open(txtsavepath + '/val.txt', 'w') 32 | 33 | for i in list_index: 34 | name = total_xml[i][:-4] + '\n' 35 | if i in trainval: 36 | file_trainval.write(name) 37 | if i in train: 38 | file_train.write(name) 39 | else: 40 | file_val.write(name) 41 | else: 42 | file_test.write(name) 43 | 44 | file_trainval.close() 45 | file_train.close() 46 | file_val.close() 47 | file_test.close() -------------------------------------------------------------------------------- /get_small_script/cal_model_flops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import sys 4 | sys.path.append('../') 5 | from thop import profile 6 | from model import Model 7 | from small_model_mod import Small_Model 8 | 9 | #big model 10 | net1 = torch.load('yolov5_weights/yolov5l.pt', map_location='cuda:0')['model'].type(torch.FloatTensor) 11 | net1.to('cuda') 12 | input1 = torch.randn(1, 3, 640, 640).to('cuda') 13 | 14 | flops1, params1 = profile(net1, (input1,)) 15 | print('big model flops: ', flops1, 'big model params: ', params1) 16 | 17 | #small model 18 | net2 = torch.load('yolov5_weights/yolov5m.pt', map_location='cuda:0')['model'].type(torch.FloatTensor) 19 | net2.to('cuda') 20 | input2 = torch.randn(1, 3, 640, 640).to('cuda') 21 | 22 | flops2, params2 = profile(net2, (input2,)) 23 | print('flops: ', flops2, 'params: ', params2) 24 | 25 | 26 | #big model 27 | net1 = torch.load('best.pt', map_location='cuda:0')['model'].type(torch.FloatTensor) 28 | net1.to('cuda') 29 | input1 = torch.randn(1, 3, 640, 640).to('cuda') 30 | 31 | flops1, params1 = profile(net1, (input1,)) 32 | print('big model flops: ', flops1, 'big model params: ', params1) 33 | 34 | #small model 35 | net2 = torch.load('small_model_all.pt', map_location='cuda:0')['model'].type(torch.FloatTensor) 36 | net2.to('cuda') 37 | input2 = torch.randn(1, 3, 640, 640).to('cuda') 38 | 39 | flops2, params2 = profile(net2, (input2,)) 40 | print('flops: ', flops2, 'params: ', params2) 41 | 42 | 43 | 44 | 45 | #flops: 55897084800.0 params: 42157128.0 46 | #flops: 27991932800.0 params: 20636701.0 47 | 48 | -------------------------------------------------------------------------------- /data/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 0 # anchors per output grid (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], ######################### 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], ######################### 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], ######################### 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 基于SFP和FPGM的yolov5的软剪枝实现 2 | 3 | ### 安装 4 | 5 | ```sh 6 | $ git clone https://github.com/xhwNobody/yolov5_prune_sfp.git 7 | $ cd yolov5_prune_sfp && pip3 install -r requirements.txt 8 | ``` 9 | 10 | ### 推理 11 | 12 | 1.下载[模型权重](https://pan.baidu.com/s/1_4gLnNwG5RaJBggKRpXoxQ),提取码:4b6p 13 | 14 | 15 | 说明:yolov5l-官方预训练模型;best.pt-使用SFP剪枝训练好的模型权重。 16 | 17 | ```sh 18 | $ mv weights yolov5_prune_sfp 19 | ``` 20 | 21 | 2.修剪权重和转换模型 22 | 23 | ```shell 24 | $ cp weights/best.pt get_small_script 25 | $ python3 get_small_script/get_small_model.py 26 | $ python3 to_jit_gpu.py (or python3 to_jit_gpu.py) 27 | ``` 28 | 29 | 3.在gpu或cpu下推理 30 | 31 | ```sh 32 | $ python3 detector_gpu.py (or python3 detector_cpu.py) 33 | ``` 34 | 35 | ### 训练 36 | 37 | 1.下载[VOC数据集](https://pan.baidu.com/s/12ncD6qfj8WsGotmB8vlm7g),提取码:7jnf 38 | 39 | 2.制作labels 40 | 41 | ```shell 42 | $ python3 VOC2012/step1_split_data.py 43 | $ python3 VOC2012/step2_voc_label.py 44 | ``` 45 | 46 | 3.转换模型 47 | 48 | ```shell 49 | $ python3 weights/copy_weight.py 50 | ``` 51 | 52 | 4.开始训练 53 | 54 | ①正常训练(不剪枝) 55 | 56 | ```sh 57 | $ python3 train.py --data data/voc.yaml --weights weights/pretrained.pt --epoch 50 --device 0 --hyp data/hyp.finetune.yaml 58 | ``` 59 | 60 | ②利用sfp进行剪枝训练 61 | 62 | ```sh 63 | $ python3 train_prune_sfp.py --data data/voc.yaml --device 1 --weights weights/pretrained.pt --hyp data/hyp.finetune.yaml 64 | ``` 65 | 66 | ③利用fpgm进行剪枝训练 67 | 68 | ```sh 69 | $ python3 train_prune_fpgm.py --data data/voc.yaml --device 1 --weights weights/pretrained.pt --hyp data/hyp.finetune.yaml 70 | ``` 71 | 72 | ### 文章 73 | 74 | 相关内容参考知乎:https://zhuanlan.zhihu.com/p/391045703 75 | 76 | ### 参考 77 | 78 | 【1】yolov5官方地址:https://github.com/ultralytics/yolov5.git 79 | 80 | 【2】SFP代码地址:https://github.com/he-y/soft-filter-pruning.git 81 | 82 | 【3】FPGM代码地址:https://github.com/he-y/filter-pruning-geometric-median.git 83 | 84 | \*\***亲爱的童鞋,如果我的文章和代码对你有帮助,希望点赞和star支持一下,欢迎交流!谢谢!**\*\* 85 | 86 | -------------------------------------------------------------------------------- /VOC2012/step2_voc_label.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import xml.etree.ElementTree as ET 3 | import os 4 | from os import getcwd 5 | 6 | sets = ['train', 'val', 'test'] 7 | classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 8 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] 9 | abs_path = os.getcwd() 10 | print(abs_path) 11 | 12 | def convert(size, box): 13 | dw = 1. / (size[0]) 14 | dh = 1. / (size[1]) 15 | x = (box[0] + box[1]) / 2.0 - 1 16 | y = (box[2] + box[3]) / 2.0 - 1 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x * dw 20 | w = w * dw 21 | y = y * dh 22 | h = h * dh 23 | return x, y, w, h 24 | 25 | def convert_annotation(image_id): 26 | in_file = open('Annotations/%s.xml' % (image_id), encoding='UTF-8') 27 | out_file = open('labels/%s.txt' % (image_id), 'w') 28 | tree = ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text) 33 | for obj in root.iter('object'): 34 | # difficult = obj.find('difficult').text 35 | #difficult = obj.find('Difficult').text 36 | cls = obj.find('name').text 37 | if cls not in classes: #or int(difficult) == 1: 38 | continue 39 | cls_id = classes.index(cls) 40 | xmlbox = obj.find('bndbox') 41 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), 42 | float(xmlbox.find('ymax').text)) 43 | b1, b2, b3, b4 = b 44 | # 标注越界修正 45 | if b2 > w: 46 | b2 = w 47 | if b4 > h: 48 | b4 = h 49 | b = (b1, b2, b3, b4) 50 | bb = convert((w, h), b) 51 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 52 | 53 | wd = getcwd() 54 | for image_set in sets: 55 | if not os.path.exists('labels/'): 56 | os.makedirs('labels/') 57 | image_ids = open('ImageSets/Main/%s.txt' % (image_set)).read().strip().split() 58 | list_file = open('%s.txt' % (image_set), 'w') 59 | for image_id in image_ids: 60 | list_file.write(abs_path + '/images/%s.jpg\n' % (image_id)) 61 | convert_annotation(image_id) 62 | list_file.close() 63 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This file contains modules common to various models 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | 9 | class CrossConv(nn.Module): 10 | # Cross Convolution Downsample 11 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 12 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 13 | super(CrossConv, self).__init__() 14 | c_ = int(c2 * e) # hidden channels 15 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 16 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 17 | self.add = shortcut and c1 == c2 18 | 19 | def forward(self, x): 20 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 21 | 22 | 23 | class C3(nn.Module): 24 | # Cross Convolution CSP 25 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 26 | super(C3, self).__init__() 27 | c_ = int(c2 * e) # hidden channels 28 | self.cv1 = Conv(c1, c_, 1, 1) 29 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 30 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 31 | self.cv4 = Conv(2 * c_, c2, 1, 1) 32 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 33 | self.act = nn.LeakyReLU(0.1, inplace=True) 34 | self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 35 | 36 | def forward(self, x): 37 | y1 = self.cv3(self.m(self.cv1(x))) 38 | y2 = self.cv2(x) 39 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 40 | 41 | 42 | class Sum(nn.Module): 43 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 44 | def __init__(self, n, weight=False): # n: number of inputs 45 | super(Sum, self).__init__() 46 | self.weight = weight # apply weights boolean 47 | self.iter = range(n - 1) # iter object 48 | if weight: 49 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 50 | 51 | def forward(self, x): 52 | y = x[0] # no weight 53 | if self.weight: 54 | w = torch.sigmoid(self.w) * 2 55 | for i in self.iter: 56 | y = y + x[i + 1] * w[i] 57 | else: 58 | for i in self.iter: 59 | y = y + x[i + 1] 60 | return y 61 | 62 | 63 | class GhostConv(nn.Module): 64 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 65 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 66 | super(GhostConv, self).__init__() 67 | c_ = c2 // 2 # hidden channels 68 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 69 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 70 | 71 | def forward(self, x): 72 | y = self.cv1(x) 73 | return torch.cat([y, self.cv2(y)], 1) 74 | 75 | 76 | class GhostBottleneck(nn.Module): 77 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 78 | def __init__(self, c1, c2, k, s): 79 | super(GhostBottleneck, self).__init__() 80 | c_ = c2 // 2 81 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 82 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 83 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 84 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 85 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 86 | 87 | def forward(self, x): 88 | return self.conv(x) + self.shortcut(x) 89 | 90 | 91 | class MixConv2d(nn.Module): 92 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 93 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 94 | super(MixConv2d, self).__init__() 95 | groups = len(k) 96 | if equal_ch: # equal c_ per group 97 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 98 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 99 | else: # equal weight.numel() per group 100 | b = [c2] + [0] * groups 101 | a = np.eye(groups + 1, groups, k=-1) 102 | a -= np.roll(a, 1, axis=1) 103 | a *= np.array(k) ** 2 104 | a[0] = 1 105 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 106 | 107 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 108 | self.bn = nn.BatchNorm2d(c2) 109 | self.act = nn.LeakyReLU(0.1, inplace=True) 110 | 111 | def forward(self, x): 112 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 113 | 114 | 115 | class Ensemble(nn.ModuleList): 116 | # Ensemble of models 117 | def __init__(self): 118 | super(Ensemble, self).__init__() 119 | 120 | def forward(self, x, augment=False): 121 | y = [] 122 | for module in self: 123 | y.append(module(x, augment)[0]) 124 | # y = torch.stack(y).max(0)[0] # max ensemble 125 | # y = torch.cat(y, 1) # nms ensemble 126 | y = torch.stack(y).mean(0) # mean ensemble 127 | return y, None # inference, train output 128 | 129 | 130 | def attempt_load(weights, map_location=None): 131 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 132 | model = Ensemble() 133 | for w in weights if isinstance(weights, list) else [weights]: 134 | model.append(torch.load(w, map_location=map_location)['model'].float().eval()) # load FP32 model 135 | 136 | # Compatibility updates 137 | for m in model.modules(): 138 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 139 | m.inplace = True # pytorch 1.7.0 compatibility 140 | elif type(m) is Conv: 141 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 142 | 143 | if len(model) == 1: 144 | return model[-1] # return model 145 | else: 146 | print('Ensemble created with %s\n' % weights) 147 | for k in ['names', 'stride']: 148 | setattr(model, k, getattr(model[-1], k)) 149 | return model # return ensemble -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import os 4 | import time 5 | from copy import deepcopy 6 | 7 | import math 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import torchvision 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | def is_parallel(model): 17 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 18 | 19 | def init_torch_seeds(seed=0): 20 | torch.manual_seed(seed) 21 | 22 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 23 | if seed == 0: # slower, more reproducible 24 | cudnn.deterministic = True 25 | cudnn.benchmark = False 26 | else: # faster, less reproducible 27 | cudnn.deterministic = False 28 | cudnn.benchmark = True 29 | 30 | class ModelEMA: 31 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 32 | Keep a moving average of everything in the model state_dict (parameters and buffers). 33 | This is intended to allow functionality like 34 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 35 | A smoothed version of the weights is necessary for some training schemes to perform well. 36 | This class is sensitive where it is initialized in the sequence of model init, 37 | GPU assignment and distributed training wrappers. 38 | """ 39 | 40 | def __init__(self, model, decay=0.9999, updates=0): 41 | # Create EMA 42 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 43 | # if next(model.parameters()).device.type != 'cpu': 44 | # self.ema.half() # FP16 EMA 45 | self.updates = updates # number of EMA updates 46 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 47 | for p in self.ema.parameters(): 48 | p.requires_grad_(False) 49 | 50 | def update(self, model): 51 | # Update EMA parameters 52 | with torch.no_grad(): 53 | self.updates += 1 54 | d = self.decay(self.updates) 55 | 56 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 57 | for k, v in self.ema.state_dict().items(): 58 | if v.dtype.is_floating_point: 59 | v *= d 60 | v += (1. - d) * msd[k].detach() 61 | 62 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 63 | # Update EMA attributes 64 | copy_attr(self.ema, model, include, exclude) 65 | 66 | def select_device(device='', batch_size=None): 67 | # device = 'cpu' or '0' or '0,1,2,3' 68 | cpu_request = device.lower() == 'cpu' 69 | if device and not cpu_request: # if device requested other than 'cpu' 70 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 71 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 72 | 73 | cuda = False if cpu_request else torch.cuda.is_available() 74 | if cuda: 75 | c = 1024 ** 2 # bytes to MB 76 | ng = torch.cuda.device_count() 77 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 78 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 79 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 80 | s = 'Using CUDA ' 81 | for i in range(0, ng): 82 | if i == 1: 83 | s = ' ' * len(s) 84 | logger.info("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 85 | (s, i, x[i].name, x[i].total_memory / c)) 86 | else: 87 | logger.info('Using CPU') 88 | 89 | logger.info('') # skip a line 90 | return torch.device('cuda:0' if cuda else 'cpu') 91 | 92 | def copy_attr(a, b, include=(), exclude=()): 93 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 94 | for k, v in b.__dict__.items(): 95 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 96 | continue 97 | else: 98 | setattr(a, k, v) 99 | 100 | def intersect_dicts(da, db, exclude=()): 101 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 102 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 103 | 104 | def time_synchronized(): 105 | torch.cuda.synchronize() if torch.cuda.is_available() else None 106 | return time.time() 107 | 108 | def fuse_conv_and_bn(conv, bn): 109 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 110 | 111 | # init 112 | fusedconv = nn.Conv2d(conv.in_channels, 113 | conv.out_channels, 114 | kernel_size=conv.kernel_size, 115 | stride=conv.stride, 116 | padding=conv.padding, 117 | groups=conv.groups, 118 | bias=True).requires_grad_(False).to(conv.weight.device) 119 | 120 | # prepare filters 121 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 122 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 123 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 124 | 125 | # prepare spatial bias 126 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 127 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 128 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 129 | 130 | return fusedconv 131 | 132 | def model_info(model, verbose=False): 133 | # Plots a line-by-line description of a PyTorch model 134 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 135 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 136 | if verbose: 137 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 138 | for i, (name, p) in enumerate(model.named_parameters()): 139 | name = name.replace('module_list.', '') 140 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 141 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 142 | 143 | try: # FLOPS 144 | from thop import profile 145 | flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2 146 | fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS 147 | except: 148 | fs = '' 149 | 150 | logger.info( 151 | 'Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) 152 | 153 | def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio 154 | # scales img(bs,3,y,x) by ratio 155 | if ratio == 1.0: 156 | return img 157 | else: 158 | h, w = img.shape[2:] 159 | s = (int(h * ratio), int(w * ratio)) # new size 160 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 161 | if not same_shape: # pad/crop img 162 | gs = 32 # (pixels) grid size 163 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 164 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 165 | 166 | def initialize_weights(model): 167 | for m in model.modules(): 168 | t = type(m) 169 | if t is nn.Conv2d: 170 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 171 | elif t is nn.BatchNorm2d: 172 | m.eps = 1e-3 173 | m.momentum = 0.03 174 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 175 | m.inplace = True 176 | 177 | def load_classifier(name='resnet101', n=2): 178 | # Loads a pretrained model reshaped to n-class output 179 | model = torchvision.models.__dict__[name](pretrained=True) 180 | 181 | # ResNet model properties 182 | # input_size = [3, 224, 224] 183 | # input_space = 'RGB' 184 | # input_range = [0, 1] 185 | # mean = [0.485, 0.456, 0.406] 186 | # std = [0.229, 0.224, 0.225] 187 | 188 | # Reshape output to n classes 189 | filters = model.fc.weight.shape[1] 190 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 191 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 192 | model.fc.out_features = n 193 | return model 194 | -------------------------------------------------------------------------------- /models/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This file contains modules common to various models 3 | 4 | import math 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | from datasets import letterbox 10 | from utils.general import non_max_suppression, make_divisible, scale_coords 11 | 12 | def autopad(k, p=None): # kernel, padding 13 | # Pad to 'same' 14 | if p is None: 15 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 16 | return p 17 | 18 | def DWConv(c1, c2, k=1, s=1, act=True): 19 | # Depthwise convolution 20 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 21 | 22 | class Conv(nn.Module): 23 | # Standard convolution 24 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 25 | super(Conv, self).__init__() 26 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 27 | self.bn = nn.BatchNorm2d(c2) 28 | self.act = nn.Hardswish() if act else nn.Identity() 29 | 30 | def forward(self, x): 31 | return self.act(self.bn(self.conv(x))) 32 | 33 | def fuseforward(self, x): 34 | return self.act(self.conv(x)) 35 | 36 | class Bottleneck(nn.Module): 37 | # Standard bottleneck 38 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 39 | super(Bottleneck, self).__init__() 40 | c_ = int(c2 * e) # hidden channels 41 | self.cv1 = Conv(c1, c_, 1, 1) 42 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 43 | self.add = shortcut and c1 == c2 44 | 45 | def forward(self, x): 46 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 47 | 48 | class BottleneckCSP(nn.Module): 49 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 50 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 51 | super(BottleneckCSP, self).__init__() 52 | c_ = int(c2 * e) # hidden channels 53 | self.cv1 = Conv(c1, c_, 1, 1) 54 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 55 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 56 | self.cv4 = Conv(2 * c_, c2, 1, 1) 57 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 58 | self.act = nn.LeakyReLU(0.1, inplace=True) 59 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 60 | 61 | def forward(self, x): 62 | y1 = self.cv3(self.m(self.cv1(x))) 63 | y2 = self.cv2(x) 64 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 65 | 66 | class Bottleneck_index(nn.Module): 67 | # Standard bottleneck 68 | def __init__(self, c1, c2, pr, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 69 | super(Bottleneck_index, self).__init__() 70 | c_ = int(c2 * e) # hidden channels 71 | self.cv1 = Conv(c1, int(c_*pr), 1, 1) 72 | self.cv2 = Conv(int(c_*pr), c2, 3, 1, g=g) 73 | self.add = shortcut and c1 == c2 74 | 75 | def forward(self, x): 76 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 77 | 78 | class BottleneckCSP_index(nn.Module): 79 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 80 | def __init__(self, c1, c2, pr, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion c1 128 c2 128 81 | super(BottleneckCSP_index, self).__init__() 82 | c_ = int(c2 * e) # hidden channels 128 83 | self.cv1 = Conv(int(c1 * pr), c_, 1, 1) 84 | self.cv2 = nn.Conv2d(int(c1*pr), int(c_*pr), 1, 1, bias=False) 85 | self.cv3 = nn.Conv2d(c_, int(c_*pr), 1, 1, bias=False) 86 | self.cv4 = Conv(2 * int(c_*pr), int(c2*pr), 1, 1) 87 | self.bn = nn.BatchNorm2d(2 * int(c_*pr)) # applied to cat(cv2, cv3) 88 | self.act = nn.LeakyReLU(0.1, inplace=True) 89 | self.m = nn.Sequential(*[Bottleneck_index(c_, c_, pr, shortcut, g, e=1.0) for _ in range(n)]) 90 | 91 | def forward(self, x): 92 | y1 = self.cv3(self.m(self.cv1(x))) 93 | y2 = self.cv2(x) 94 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 95 | 96 | 97 | 98 | class SPP(nn.Module): 99 | # Spatial pyramid pooling layer used in YOLOv3-SPP 100 | def __init__(self, c1, c2, k=(5, 9, 13)): 101 | super(SPP, self).__init__() 102 | c_ = c1 // 2 # hidden channels 103 | self.cv1 = Conv(c1, c_, 1, 1) 104 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 105 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 106 | 107 | def forward(self, x): 108 | x = self.cv1(x) 109 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 110 | 111 | 112 | class Focus(nn.Module): 113 | # Focus wh information into c-space 114 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 115 | super(Focus, self).__init__() 116 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 117 | 118 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 119 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 120 | 121 | 122 | class Concat(nn.Module): 123 | # Concatenate a list of tensors along dimension 124 | def __init__(self, dimension=1): 125 | super(Concat, self).__init__() 126 | self.d = dimension 127 | 128 | def forward(self, x): 129 | return torch.cat(x, self.d) 130 | 131 | 132 | class NMS(nn.Module): 133 | # Non-Maximum Suppression (NMS) module 134 | conf = 0.25 # confidence threshold 135 | iou = 0.45 # IoU threshold 136 | classes = None # (optional list) filter by class 137 | 138 | def __init__(self): 139 | super(NMS, self).__init__() 140 | 141 | def forward(self, x): 142 | return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) 143 | 144 | 145 | class autoShape(nn.Module): 146 | # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 147 | img_size = 640 # inference size (pixels) 148 | conf = 0.25 # NMS confidence threshold 149 | iou = 0.45 # NMS IoU threshold 150 | classes = None # (optional list) filter by class 151 | 152 | def __init__(self, model): 153 | super(autoShape, self).__init__() 154 | self.model = model 155 | 156 | def forward(self, x, size=640, augment=False, profile=False): 157 | # supports inference from various sources. For height=720, width=1280, RGB images example inputs are: 158 | # opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) 159 | # PIL: x = Image.open('image.jpg') # HWC x(720,1280,3) 160 | # numpy: x = np.zeros((720,1280,3)) # HWC 161 | # torch: x = torch.zeros(16,3,720,1280) # BCHW 162 | # multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 163 | 164 | p = next(self.model.parameters()) # for device and type 165 | if isinstance(x, torch.Tensor): # torch 166 | return self.model(x.to(p.device).type_as(p), augment, profile) # inference 167 | 168 | # Pre-process 169 | if not isinstance(x, list): 170 | x = [x] 171 | shape0, shape1 = [], [] # image and inference shapes 172 | batch = range(len(x)) # batch size 173 | for i in batch: 174 | x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png 175 | s = x[i].shape[:2] # HWC 176 | shape0.append(s) # image shape 177 | g = (size / max(s)) # gain 178 | shape1.append([y * g for y in s]) 179 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 180 | x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad 181 | x = np.stack(x, 0) if batch[-1] else x[0][None] # stack 182 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 183 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 184 | 185 | # Inference 186 | x = self.model(x, augment, profile) # forward 187 | x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS 188 | 189 | # Post-process 190 | for i in batch: 191 | if x[i] is not None: 192 | x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i]) 193 | return x 194 | 195 | 196 | class Flatten(nn.Module): 197 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 198 | @staticmethod 199 | def forward(x): 200 | return x.view(x.size(0), -1) 201 | 202 | 203 | class Classify(nn.Module): 204 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 205 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 206 | super(Classify, self).__init__() 207 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 208 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) 209 | self.flat = Flatten() 210 | 211 | def forward(self, x): 212 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 213 | return self.flat(self.conv(z)) # flatten to x(b,c2) 214 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import sys 4 | import math 5 | 6 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 7 | logger = logging.getLogger(__name__) 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from models.common import Conv, SPP, Focus, BottleneckCSP, Concat, NMS, autoShape 13 | from utils.general import check_anchor_order 14 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, copy_attr 15 | 16 | class Model(nn.Module): 17 | def __init__(self, ch=3, nc=None): # model, input channels, number of classes 18 | super(Model, self).__init__() 19 | 20 | self.ch = ch 21 | self.nc = nc 22 | self.depth_multiple = 1.0 23 | self.width_multiple = 1.0 24 | self.anchors = [[10,13, 16,30, 33,23], [30,61, 62,45, 59,119], [116,90, 156,198, 373,326]] 25 | 26 | # 定义模型 27 | self.backbone_self = backbone(ch) 28 | self.neck_self = neck() 29 | 30 | self.ch_head = [256, 512, 1024] 31 | #self.head_self = head(self.nc, self.anchors, self.ch_head) 32 | self.head_self = Detect(self.nc, self.anchors, self.ch_head) 33 | 34 | 35 | # 创建步长和anchor 36 | if isinstance(self.head_self, Detect): 37 | s = 128 # 2x min stride 38 | ch_temp = 3 39 | self.head_self.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch_temp, s, s))]) # forward 40 | self.head_self.anchors /= self.head_self.stride.view(-1, 1, 1) 41 | check_anchor_order(self.head_self) 42 | self.stride = self.head_self.stride 43 | self._initialize_biases() # only run once 44 | 45 | # Init weights, biases 46 | initialize_weights(self) 47 | 48 | def forward(self, x): 49 | x, x_6, x_4 = self.backbone_self(x) 50 | x_list = self.neck_self(x, x_6, x_4) 51 | out = self.head_self(x_list) 52 | return out 53 | 54 | def _print_biases(self): 55 | m = self.model[-1] # Detect() module 56 | for mi in m.m: # from 57 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 58 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 59 | 60 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 61 | m = self.head_self # Detect() module 62 | for mi, s in zip(m.m, m.stride): # from 63 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 64 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 65 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 66 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 67 | 68 | class backbone(nn.Module): 69 | def __init__(self, inp_ch): # model, input channels, number of classes 70 | super(backbone, self).__init__() 71 | self.focus = Focus(inp_ch, 64, 3) 72 | self.conv1 = Conv(64, 128, 3, 2) 73 | self.csp1 = BottleneckCSP(128, 128, 3, shortcut=True) ############ 74 | self.conv2 = Conv(128, 256, 3, 2) 75 | self.csp2 = BottleneckCSP(256, 256, 9, shortcut=True) ############ 76 | self.conv3 = Conv(256, 512, 3, 2) 77 | self.csp3 = BottleneckCSP(512, 512, 9, shortcut=True) ############ 78 | self.conv4 = Conv(512, 1024, 3, 2) 79 | self.spp = SPP(1024, 1024, [5, 9, 13]) 80 | self.csp4 = BottleneckCSP(1024, 1024, 3, shortcut=False) #??????????????? 81 | 82 | def forward(self, x): 83 | # print('inp:', x.shape) 84 | x_0 = self.focus(x) #0 85 | # print('x_0:', x_0.shape) 86 | x_1 = self.conv1(x_0) #1 87 | # print('x_1:', x_1.shape) 88 | x_2 = self.csp1(x_1) #2 89 | # print('x_2:', x_2.shape) 90 | x_3 = self.conv2(x_2) #3 91 | #print('x_3:', x_3.shape) 92 | # print('x_3:', x_3.shape) 93 | x_4 = self.csp2(x_3) #4 94 | # print('x_4:', x_4.shape) 95 | x_5 = self.conv3(x_4) #5 96 | # print('x_5:', x_5.shape) 97 | x_6 = self.csp3(x_5) #6 98 | # print('x_6:', x_6.shape) 99 | x_7 = self.conv4(x_6) #7 100 | # print('x_7:', x_7.shape) 101 | x_8 = self.spp(x_7) #8 102 | # print('x_8:', x_8.shape) 103 | out = self.csp4(x_8) #9 104 | # print('out:', out.shape) 105 | return [out, x_6, x_4] 106 | 107 | class neck(nn.Module): 108 | def __init__(self): 109 | super(neck, self).__init__() 110 | self.conv1 = Conv(1024, 512, 1, 1) 111 | self.upsample1 = nn.Upsample(None, 2, 'nearest') 112 | self.cat1 = Concat(dimension=1) 113 | self.csp1 = BottleneckCSP(1024, 512, 3, shortcut=False) 114 | 115 | self.conv2 = Conv(512, 256, 1, 1) 116 | self.upsample2 = nn.Upsample(None, 2, 'nearest') 117 | self.cat2 = Concat(dimension=1) 118 | self.csp2 = BottleneckCSP(512, 256, 3, shortcut=False) 119 | 120 | self.conv3 = Conv(256, 256, 3, 2) 121 | self.cat3 = Concat(dimension=1) 122 | self.csp3 = BottleneckCSP(512, 512, 3, shortcut=False) 123 | 124 | self.conv4 = Conv(512, 512, 3, 2) 125 | self.cat4 = Concat(dimension=1) 126 | self.csp4 = BottleneckCSP(1024, 1024, 3, shortcut=False) 127 | 128 | def forward(self, x, x_6, x_4): 129 | x_10 = self.conv1(x) #10 512 130 | # print('x_10:', x_10.shape) 131 | x_11 = self.upsample1(x_10) #11 132 | # print('x_11:', x_11.shape) 133 | x_12 = self.cat1([x_11, x_6]) #12 512+512 134 | # print('x_12:', x_12.shape) 135 | x_13 = self.csp1(x_12) #13 136 | # print('x_13:', x_13.shape) 137 | 138 | x_14 = self.conv2(x_13) #14 139 | # print('x_14:', x_14.shape) 140 | x_15 = self.upsample2(x_14) #15 141 | # print('x_15:', x_15.shape) 142 | x_16 = self.cat2([x_15, x_4]) #16 256+256 143 | # print('x_16:', x_16.shape) 144 | x_17 = self.csp2(x_16) #17 145 | # print('x_17:', x_17.shape) 146 | 147 | x_18 = self.conv3(x_17) #18 148 | # print('x_18:', x_18.shape) 149 | x_19 = self.cat3([x_18, x_14]) #19 256+256 150 | # print('x_19:', x_19.shape) 151 | x_20 = self.csp3(x_19) #20 152 | # print('x_20:', x_20.shape) 153 | 154 | x_21 = self.conv4(x_20) #21 155 | # print('x_21:', x_21.shape) 156 | x_22 = self.cat4([x_21, x_10]) #22 512+512 157 | # print('x_22:', x_22.shape) 158 | x_23 = self.csp4(x_22) #23 159 | # print('x_23:', x_23.shape) 160 | 161 | return [x_17, x_20, x_23] 162 | 163 | class Detect(nn.Module): 164 | stride = None # strides computed during build 165 | export = False # onnx export 166 | 167 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 168 | super(Detect, self).__init__() 169 | self.nc = nc # number of classes 170 | self.no = nc + 5 # number of outputs per anchor 171 | self.nl = len(anchors) # number of detection layers 172 | self.na = len(anchors[0]) // 2 # number of anchors 173 | self.grid = [torch.zeros(1)] * self.nl # init grid 174 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 175 | self.register_buffer('anchors', a) # shape(nl,na,2) 176 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 177 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 178 | 179 | def forward(self, x): 180 | # x = x.copy() # for profiling 181 | z = [] # inference output 182 | self.training |= self.export 183 | for i in range(self.nl): 184 | x[i] = self.m[i](x[i]) # conv 185 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 186 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 187 | 188 | if not self.training: # inference 189 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 190 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 191 | 192 | y = x[i].sigmoid() 193 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 194 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 195 | z.append(y.view(bs, -1, self.no)) 196 | 197 | return x if self.training else (torch.cat(z, 1), x) 198 | # return x 199 | 200 | @staticmethod 201 | def _make_grid(nx=20, ny=20): 202 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 203 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 204 | 205 | if __name__ == '__main__': 206 | modell = Model(nc=20) 207 | x = torch.randn(1, 3, 640, 640) 208 | script_model = torch.jit.trace(modell, x) 209 | script_model.save("m.pt") 210 | # print(modell) 211 | 212 | # model_state_dict = modell.state_dict() 213 | # for index, [key, value] in enumerate(model_state_dict.items()): 214 | # print(index, key, value.shape) 215 | #mask_index = [] 216 | #for index, item in enumerate(modell.parameters()): 217 | #print(index, item.shape) 218 | # if len(item.shape) > 1 and index >= 3 and index <= 314: 219 | # mask_index.append(index) 220 | # print(mask_index) 221 | # 222 | # mask_index = [x for x in range(0, 159, 3)] 223 | # print(mask_index) 224 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | 7 | class FocalLoss(nn.Module): 8 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 9 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 10 | super(FocalLoss, self).__init__() 11 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 12 | self.gamma = gamma 13 | self.alpha = alpha 14 | self.reduction = loss_fcn.reduction 15 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 16 | 17 | def forward(self, pred, true): 18 | loss = self.loss_fcn(pred, true) 19 | # p_t = torch.exp(-loss) 20 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 21 | 22 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 23 | pred_prob = torch.sigmoid(pred) # prob from logits 24 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 25 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 26 | modulating_factor = (1.0 - p_t) ** self.gamma 27 | loss *= alpha_factor * modulating_factor 28 | 29 | if self.reduction == 'mean': 30 | return loss.mean() 31 | elif self.reduction == 'sum': 32 | return loss.sum() 33 | else: # 'none' 34 | return loss 35 | 36 | class BCEBlurWithLogitsLoss(nn.Module): 37 | # BCEwithLogitLoss() with reduced missing label effects. 38 | def __init__(self, alpha=0.05): 39 | super(BCEBlurWithLogitsLoss, self).__init__() 40 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 41 | self.alpha = alpha 42 | 43 | def forward(self, pred, true): 44 | loss = self.loss_fcn(pred, true) 45 | pred = torch.sigmoid(pred) # prob from logits 46 | dx = pred - true # reduce only missing label effects 47 | # dx = (pred - true).abs() # reduce missing label and false label effects 48 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 49 | loss *= alpha_factor 50 | return loss.mean() 51 | 52 | def compute_loss(p, targets, model): # predictions, targets, model 53 | device = targets.device 54 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 55 | tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets 56 | h = model.hyp # hyperparameters 57 | 58 | # Define criteria 59 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device) 60 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device) 61 | 62 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 63 | cp, cn = smooth_BCE(eps=0.0) 64 | 65 | # Focal loss 66 | g = h['fl_gamma'] # focal loss gamma 67 | if g > 0: 68 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 69 | 70 | # Losses 71 | nt = 0 # number of targets 72 | np = len(p) # number of outputs 73 | balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 74 | for i, pi in enumerate(p): # layer index, layer predictions 75 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 76 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 77 | 78 | n = b.shape[0] # number of targets 79 | if n: 80 | nt += n # cumulative targets 81 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 82 | 83 | # Regression 84 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 85 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 86 | pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box 87 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 88 | lbox += (1.0 - iou).mean() # iou loss 89 | 90 | # Objectness 91 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 92 | 93 | # Classification 94 | if model.nc > 1: # cls loss (only if multiple classes) 95 | t = torch.full_like(ps[:, 5:], cn, device=device) # targets 96 | t[range(n), tcls[i]] = cp 97 | lcls += BCEcls(ps[:, 5:], t) # BCE 98 | 99 | # Append targets to text file 100 | # with open('targets.txt', 'a') as file: 101 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 102 | 103 | lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss 104 | 105 | s = 3 / np # output count scaling 106 | lbox *= h['box'] * s 107 | lobj *= h['obj'] * s * (1.4 if np == 4 else 1.) 108 | lcls *= h['cls'] * s 109 | bs = tobj.shape[0] # batch size 110 | 111 | loss = lbox + lobj + lcls 112 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 113 | 114 | ############################################################################################## 115 | def build_targets(p, targets, model): 116 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 117 | #det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 118 | 119 | det = model.head_self 120 | na, nt = det.na, targets.shape[0] # number of anchors, targets 121 | tcls, tbox, indices, anch = [], [], [], [] 122 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 123 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 124 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 125 | 126 | g = 0.5 # bias 127 | off = torch.tensor([[0, 0], 128 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 129 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 130 | ], device=targets.device).float() * g # offsets 131 | 132 | for i in range(det.nl): 133 | anchors = det.anchors[i] 134 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 135 | 136 | # Match targets to anchors 137 | t = targets * gain 138 | if nt: 139 | # Matches 140 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 141 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare 142 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 143 | t = t[j] # filter 144 | 145 | # Offsets 146 | gxy = t[:, 2:4] # grid xy 147 | gxi = gain[[2, 3]] - gxy # inverse 148 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 149 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 150 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 151 | t = t.repeat((5, 1, 1))[j] 152 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 153 | else: 154 | t = targets[0] 155 | offsets = 0 156 | 157 | # Define 158 | b, c = t[:, :2].long().T # image, class 159 | gxy = t[:, 2:4] # grid xy 160 | gwh = t[:, 4:6] # grid wh 161 | gij = (gxy - offsets).long() 162 | gi, gj = gij.T # grid xy indices 163 | 164 | # Append 165 | a = t[:, 6].long() # anchor indices 166 | indices.append((b, a, gj, gi)) # image, anchor, grid indices 167 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 168 | anch.append(anchors[a]) # anchors 169 | tcls.append(c) # class 170 | 171 | return tcls, tbox, indices, anch 172 | 173 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 174 | # return positive, negative label smoothing BCE targets 175 | return 1.0 - 0.5 * eps, 0.5 * eps 176 | 177 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): 178 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 179 | box2 = box2.T 180 | 181 | # Get the coordinates of bounding boxes 182 | if x1y1x2y2: # x1, y1, x2, y2 = box1 183 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 184 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 185 | else: # transform from xywh to xyxy 186 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 187 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 188 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 189 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 190 | 191 | # Intersection area 192 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 193 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 194 | 195 | # Union Area 196 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 197 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 198 | union = w1 * h1 + w2 * h2 - inter + eps 199 | 200 | iou = inter / union 201 | if GIoU or DIoU or CIoU: 202 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 203 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 204 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 205 | c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 206 | rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + 207 | (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 208 | if DIoU: 209 | return iou - rho2 / c2 # DIoU 210 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 211 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 212 | with torch.no_grad(): 213 | alpha = v / ((1 + eps) - iou + v) 214 | return iou - (rho2 / c2 + v * alpha) # CIoU 215 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 216 | c_area = cw * ch + eps # convex area 217 | return iou - (c_area - union) / c_area # GIoU 218 | else: 219 | return iou # IoU 220 | 221 | 222 | -------------------------------------------------------------------------------- /small_model_mod.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import sys 4 | import math 5 | 6 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 7 | logger = logging.getLogger(__name__) 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from models.common import Conv, SPP, Focus, BottleneckCSP, BottleneckCSP_index, Concat, NMS, autoShape 13 | from utils.general import check_anchor_order 14 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, copy_attr 15 | 16 | class Small_Model(nn.Module): 17 | def __init__(self, ch=3, prune_rate=0.7,nc=None, istrain=False): # model, input channels, number of classes 18 | super(Small_Model, self).__init__() 19 | 20 | self.ch = ch 21 | self.nc = nc 22 | self.depth_multiple = 1.0 23 | self.width_multiple = 1.0 24 | self.anchors = [[10,13, 16,30, 33,23], [30,61, 62,45, 59,119], [116,90, 156,198, 373,326]] 25 | self.prune_rate = prune_rate 26 | 27 | # 定义模型 28 | self.backbone_self = backbone(ch, self.prune_rate) 29 | self.neck_self = neck(self.prune_rate) 30 | 31 | self.ch_head = [int(256*self.prune_rate), int(512*self.prune_rate), int(1024*self.prune_rate)] 32 | # self.head_self = head(self.nc, self.anchors, self.ch_head) 33 | self.head_self = Detect(self.nc, self.anchors, self.ch_head, istrain) 34 | 35 | # 创建步长和anchor 36 | if isinstance(self.head_self, Detect): 37 | s = 128 # 2x min stride 38 | ch_temp = 3 39 | self.head_self.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch_temp, s, s))]) # forward 40 | self.head_self.anchors /= self.head_self.stride.view(-1, 1, 1) 41 | check_anchor_order(self.head_self) 42 | self.stride = self.head_self.stride 43 | self._initialize_biases() # only run once 44 | 45 | # Init weights, biases 46 | initialize_weights(self) 47 | 48 | def forward(self, x): 49 | x, x_6, x_4 = self.backbone_self(x) 50 | x_list = self.neck_self(x, x_6, x_4) 51 | out = self.head_self(x_list) 52 | return out 53 | 54 | def _print_biases(self): 55 | m = self.model[-1] # Detect() module 56 | for mi in m.m: # from 57 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 58 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 59 | 60 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 61 | m = self.head_self # Detect() module 62 | for mi, s in zip(m.m, m.stride): # from 63 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 64 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 65 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 66 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 67 | 68 | class backbone(nn.Module): 69 | def __init__(self, inp_ch, prune_rate): # model, input channels, number of classes 70 | super(backbone, self).__init__() 71 | self.focus = Focus(inp_ch, 64, 3) 72 | self.conv1 = Conv(64, int(128*prune_rate), 3, 2) #int(128*prune_rate) 73 | self.csp1 = BottleneckCSP_index(128, 128, prune_rate, 3, shortcut=True) ################################ 74 | self.conv2 = Conv(int(128*prune_rate), int(256*prune_rate), 3, 2) 75 | self.csp2 = BottleneckCSP_index(256, 256, prune_rate, 9, shortcut=True) #################################### 76 | self.conv3 = Conv(int(256*prune_rate), int(512*prune_rate), 3, 2) 77 | self.csp3 = BottleneckCSP_index(512, 512, prune_rate, 9, shortcut=True) ########################################## 78 | self.conv4 = Conv(int(512*prune_rate), int(1024*prune_rate), 3, 2) 79 | self.spp = SPP(int(1024*prune_rate), int(1024*prune_rate), [5, 9, 13]) 80 | self.csp4 = BottleneckCSP(int(1024*prune_rate), int(1024*prune_rate), 3, shortcut=False) 81 | 82 | def forward(self, x): 83 | # print('inp:', x.shape) 84 | x_0 = self.focus(x) #0 85 | # print('x_0:', x_0.shape) 86 | x_1 = self.conv1(x_0) #1 87 | # print('x_1:', x_1.shape) 88 | x_2 = self.csp1(x_1) #2 89 | # print('x_2:', x_2.shape) 90 | x_3 = self.conv2(x_2) #3 91 | #print('x_3:', x_3.shape) 92 | # print('happy') 93 | x_4 = self.csp2(x_3) #4 94 | # print('x_4:', x_4.shape) 95 | x_5 = self.conv3(x_4) #5 96 | # print('x_5:', x_5.shape) 97 | x_6 = self.csp3(x_5) #6 98 | # print('x_6:', x_6.shape) 99 | x_7 = self.conv4(x_6) #7 100 | # print('x_7:', x_7.shape) 101 | x_8 = self.spp(x_7) #8 102 | # print('x_8:', x_8.shape) 103 | out = self.csp4(x_8) #9 104 | # print('out:', out.shape) 105 | return [out, x_6, x_4] 106 | 107 | class neck(nn.Module): 108 | def __init__(self, prune_rate): 109 | super(neck, self).__init__() 110 | self.conv1 = Conv(int(1024*prune_rate), int(512*prune_rate), 1, 1) 111 | self.upsample1 = nn.Upsample(None, 2, 'nearest') 112 | self.cat1 = Concat(dimension=1) 113 | self.csp1 = BottleneckCSP(int(1024*prune_rate), int(512*prune_rate), 3, shortcut=False) 114 | 115 | self.conv2 = Conv(int(512*prune_rate), int(256*prune_rate), 1, 1) 116 | self.upsample2 = nn.Upsample(None, 2, 'nearest') 117 | self.cat2 = Concat(dimension=1) 118 | self.csp2 = BottleneckCSP(int(512*prune_rate), int(256*prune_rate), 3, shortcut=False) 119 | 120 | self.conv3 = Conv(int(256*prune_rate), int(256*prune_rate), 3, 2) 121 | self.cat3 = Concat(dimension=1) 122 | self.csp3 = BottleneckCSP(int(512*prune_rate), int(512*prune_rate), 3, shortcut=False) 123 | 124 | self.conv4 = Conv(int(512*prune_rate), int(512*prune_rate), 3, 2) 125 | self.cat4 = Concat(dimension=1) 126 | self.csp4 = BottleneckCSP(int(1024*prune_rate), int(1024*prune_rate), 3, shortcut=False) 127 | 128 | def forward(self, x, x_6, x_4): 129 | x_10 = self.conv1(x) #10 512 130 | # print('x_10:', x_10.shape) 131 | x_11 = self.upsample1(x_10) #11 132 | #print('x_11:', x_11.shape) 133 | x_12 = self.cat1([x_11, x_6]) #12 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 134 | #print('x_12:', x_12.shape) 135 | x_13 = self.csp1(x_12) #13 136 | #print('x_13:', x_13.shape) 137 | 138 | x_14 = self.conv2(x_13) #14 139 | # print('x_14:', x_14.shape) 140 | x_15 = self.upsample2(x_14) #15 141 | # print('x_15:', x_15.shape) 142 | x_16 = self.cat2([x_15, x_4]) #16 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 143 | # print('x_16:', x_16.shape) 144 | x_17 = self.csp2(x_16) #17 145 | # print('x_17:', x_17.shape) 146 | 147 | x_18 = self.conv3(x_17) #18 148 | # print('x_18:', x_18.shape) 149 | x_19 = self.cat3([x_18, x_14]) #19 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 150 | # print('x_19:', x_19.shape) 151 | x_20 = self.csp3(x_19) #20 152 | # print('x_20:', x_20.shape) 153 | 154 | x_21 = self.conv4(x_20) #21 155 | # print('x_21:', x_21.shape) 156 | x_22 = self.cat4([x_21, x_10]) #22 $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ 157 | # print('x_22:', x_22.shape) 158 | x_23 = self.csp4(x_22) #23 159 | # print('x_23:', x_23.shape) 160 | 161 | return [x_17, x_20, x_23] 162 | 163 | class Detect(nn.Module): 164 | stride = None # strides computed during build 165 | export = False # onnx export 166 | 167 | def __init__(self, nc=80, anchors=(), ch=(), istrain=False): # detection layer 168 | super(Detect, self).__init__() 169 | self.nc = nc # number of classes 170 | self.no = nc + 5 # number of outputs per anchor 171 | self.nl = len(anchors) # number of detection layers 172 | self.na = len(anchors[0]) // 2 # number of anchors 173 | self.grid = [torch.zeros(1)] * self.nl # init grid 174 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 175 | self.register_buffer('anchors', a) # shape(nl,na,2) 176 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 177 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 178 | self.istrain = istrain 179 | 180 | def forward(self, x): 181 | # x = x.copy() # for profiling 182 | z = [] # inference output 183 | self.training |= self.export 184 | #self.training = False 185 | # self.istrain = False 186 | # if self.istrain == True: 187 | # self.training |= self.export 188 | # else: 189 | # self.training = False 190 | for i in range(self.nl): 191 | x[i] = self.m[i](x[i]) # conv 192 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 193 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 194 | 195 | if not self.training: # inference 196 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 197 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 198 | 199 | y = x[i].sigmoid() 200 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 201 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 202 | z.append(y.view(bs, -1, self.no)) 203 | 204 | # return (torch.cat(z, 1), x) 205 | return x if self.training else (torch.cat(z, 1), x) 206 | # return x 207 | 208 | @staticmethod 209 | def _make_grid(nx=20, ny=20): 210 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 211 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 212 | 213 | if __name__ == '__main__': 214 | modell = Small_Model(nc=3, prune_rate=0.7) 215 | # print(modell) 216 | 217 | model_state_dict = modell.state_dict() 218 | for index, [key, value] in enumerate(model_state_dict.items()): 219 | key_list = key.split('.') 220 | print(index, key, value.shape) 221 | # if 'conv' in key_list and 'weight' in key_list: 222 | # print(index, key, value.shape) 223 | # if 'bn' in key_list and 'weight' in key_list: 224 | # print(index, key, value.shape) 225 | # if 'bn' in key_list and 'bias' in key_list: 226 | # print(index, key, value.shape) 227 | 228 | 229 | 230 | # mask_index = [] 231 | # for index, item in enumerate(modell.parameters()): 232 | # print(index, item.shape) 233 | # if len(item.shape) > 1 and index >= 3 and index <= 314: 234 | # mask_index.append(index) 235 | # print(mask_index) 236 | # 237 | # mask_index = [x for x in range(0, 159, 3)] 238 | # print(mask_index) -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | from copy import deepcopy 5 | from pathlib import Path 6 | 7 | import math 8 | 9 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 10 | logger = logging.getLogger(__name__) 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, NMS, autoShape 16 | from models.experimental import MixConv2d, CrossConv, C3 17 | from utils.general import check_anchor_order, make_divisible, check_file, set_logging 18 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 19 | select_device, copy_attr 20 | 21 | 22 | class Detect(nn.Module): 23 | stride = None # strides computed during build 24 | export = False # onnx export 25 | 26 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 27 | super(Detect, self).__init__() 28 | self.nc = nc # number of classes 29 | self.no = nc + 5 # number of outputs per anchor 30 | self.nl = len(anchors) # number of detection layers 31 | self.na = len(anchors[0]) // 2 # number of anchors 32 | self.grid = [torch.zeros(1)] * self.nl # init grid 33 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 34 | self.register_buffer('anchors', a) # shape(nl,na,2) 35 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 36 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 37 | 38 | def forward(self, x): 39 | # x = x.copy() # for profiling 40 | z = [] # inference output 41 | self.training |= self.export 42 | for i in range(self.nl): 43 | x[i] = self.m[i](x[i]) # conv 44 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 45 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 46 | 47 | if not self.training: # inference 48 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 49 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 50 | 51 | y = x[i].sigmoid() 52 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 53 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 54 | z.append(y.view(bs, -1, self.no)) 55 | 56 | return x if self.training else (torch.cat(z, 1), x) 57 | # return x 58 | 59 | @staticmethod 60 | def _make_grid(nx=20, ny=20): 61 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 62 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 63 | 64 | 65 | class Model(nn.Module): 66 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 67 | super(Model, self).__init__() 68 | # 读取yolov5.yaml,组成字典 69 | if isinstance(cfg, dict): 70 | self.yaml = cfg # model dict 71 | else: # is *.yaml 72 | import yaml # for torch hub 73 | self.yaml_file = Path(cfg).name 74 | with open(cfg) as f: 75 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 76 | 77 | # 在定义模型时可以重置类别数 78 | if nc and nc != self.yaml['nc']: 79 | print('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 80 | self.yaml['nc'] = nc # override yaml value 81 | 82 | # 定义模型 83 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out 84 | 85 | # 取出Detect层 86 | m = self.model[-1] # Detect() 87 | 88 | # 创建步长和anchor 89 | if isinstance(m, Detect): 90 | s = 128 # 2x min stride 91 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 92 | m.anchors /= m.stride.view(-1, 1, 1) 93 | check_anchor_order(m) 94 | self.stride = m.stride 95 | self._initialize_biases() # only run once 96 | # print('Strides: %s' % m.stride.tolist()) 97 | 98 | # Init weights, biases 99 | initialize_weights(self) 100 | 101 | # 在这里会以[1,3,64,64]作为输入tensor前向计算,打印出信息 102 | self.info() 103 | 104 | def forward(self, x, augment=False, profile=False): 105 | # 是否进行数据增强 106 | if augment: 107 | img_size = x.shape[-2:] # height, width 108 | s = [1, 0.83, 0.67] # scales 109 | f = [None, 3, None] # flips (2-ud, 3-lr) 110 | y = [] # outputs 111 | for si, fi in zip(s, f): 112 | xi = scale_img(x.flip(fi) if fi else x, si) 113 | yi = self.forward_once(xi)[0] # forward 114 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 115 | yi[..., :4] /= si # de-scale 116 | if fi == 2: 117 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 118 | elif fi == 3: 119 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 120 | y.append(yi) 121 | return torch.cat(y, 1), None # augmented inference, train 122 | else: 123 | return self.forward_once(x, profile) # single-scale inference, train 124 | 125 | def forward_once(self, x, profile=False): 126 | # 输出,y保存每一层输出的特征图 127 | y, dt = [], [] # outputs 128 | for m in self.model: 129 | # 如果特征图不是上层特征图(即concat层) 130 | if m.f != -1: # if not from previous layer 131 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 132 | 133 | # 是否计算FLOPs 134 | if profile: 135 | try: 136 | import thop 137 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # FLOPS 138 | except: 139 | o = 0 140 | t = time_synchronized() 141 | for _ in range(10): 142 | _ = m(x) 143 | dt.append((time_synchronized() - t) * 100) 144 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 145 | 146 | # 当前层前向计算 147 | x = m(x) # run 148 | # 当前层计算结果(特征图)保存到y中 149 | y.append(x if m.i in self.save else None) # save output 150 | 151 | if profile: 152 | print('%.1fms total' % sum(dt)) 153 | return x 154 | 155 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 156 | # https://arxiv.org/abs/1708.02002 section 3.3 157 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 158 | m = self.model[-1] # Detect() module 159 | for mi, s in zip(m.m, m.stride): # from 160 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 161 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 162 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 163 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 164 | 165 | def _print_biases(self): 166 | m = self.model[-1] # Detect() module 167 | for mi in m.m: # from 168 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 169 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 170 | 171 | # def _print_weights(self): 172 | # for m in self.model.modules(): 173 | # if type(m) is Bottleneck: 174 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 175 | 176 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 177 | print('Fusing layers... ') 178 | for m in self.model.modules(): 179 | if type(m) is Conv and hasattr(m, 'bn'): 180 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 181 | delattr(m, 'bn') # remove batchnorm 182 | m.forward = m.fuseforward # update forward 183 | self.info() 184 | return self 185 | 186 | def nms(self, mode=True): # add or remove NMS module 187 | present = type(self.model[-1]) is NMS # last layer is NMS 188 | if mode and not present: 189 | print('Adding NMS... ') 190 | m = NMS() # module 191 | m.f = -1 # from 192 | m.i = self.model[-1].i + 1 # index 193 | self.model.add_module(name='%s' % m.i, module=m) # add 194 | self.eval() 195 | elif not mode and present: 196 | print('Removing NMS... ') 197 | self.model = self.model[:-1] # remove 198 | return self 199 | 200 | def autoshape(self): # add autoShape module 201 | print('Adding autoShape... ') 202 | m = autoShape(self) # wrap model 203 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 204 | return m 205 | 206 | def info(self, verbose=False): # print model information 207 | model_info(self, verbose) 208 | 209 | 210 | def parse_model(d, ch): # model_dict, input_channels(3) 211 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 212 | # 获取 anchor, nc, gd, gw(‘depth_multiple’控制所有层的通道数,'width_multiple'控制BottleneckCSP里的卷积的个数) 213 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 214 | # anchor的个数 3 215 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 216 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 217 | 218 | # 网络结构列表,保存列表,输出特征图通道列表 219 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 220 | 221 | # 依据backbone和head来搭建网络 222 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 223 | # eval将字符串转为 基本操作 224 | m = eval(m) if isinstance(m, str) else m # eval strings 225 | 226 | for j, a in enumerate(args): 227 | try: 228 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 229 | except: 230 | pass 231 | 232 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 233 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 234 | c1, c2 = ch[f], args[0] 235 | 236 | # Normal 237 | # if i > 0 and args[0] != no: # channel expansion factor 238 | # ex = 1.75 # exponential (default 2.0) 239 | # e = math.log(c2 / ch[1]) / math.log(2) 240 | # c2 = int(ch[1] * ex ** e) 241 | # if m != Focus: 242 | 243 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 244 | 245 | # Experimental 246 | # if i > 0 and args[0] != no: # channel expansion factor 247 | # ex = 1 + gw # exponential (default 2.0) 248 | # ch1 = 32 # ch[1] 249 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 250 | # c2 = int(ch1 * ex ** e) 251 | # if m != Focus: 252 | # c2 = make_divisible(c2, 8) if c2 != no else c2 253 | 254 | args = [c1, c2, *args[1:]] 255 | if m in [BottleneckCSP, C3]: 256 | args.insert(2, n) 257 | n = 1 258 | elif m is nn.BatchNorm2d: 259 | args = [ch[f]] 260 | elif m is Concat: 261 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 262 | elif m is Detect: 263 | args.append([ch[x + 1] for x in f]) 264 | if isinstance(args[1], int): # number of anchors 265 | args[1] = [list(range(args[1] * 2))] * len(f) 266 | else: 267 | c2 = ch[f] 268 | 269 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 270 | t = str(m)[8:-2].replace('__main__.', '') # module type 271 | np = sum([x.numel() for x in m_.parameters()]) # number params 272 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 273 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 274 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 275 | layers.append(m_) 276 | ch.append(c2) 277 | return nn.Sequential(*layers), sorted(save) 278 | 279 | 280 | if __name__ == '__main__': 281 | parser = argparse.ArgumentParser() 282 | parser.add_argument('--cfg', type=str, default='yolov5l.yaml', help='model.yaml') 283 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 284 | opt = parser.parse_args() 285 | opt.cfg = check_file(opt.cfg) # check file 286 | set_logging() 287 | device = select_device(opt.device) 288 | 289 | # Create model 290 | model = Model(opt.cfg).to(device) 291 | #model.train() 292 | print(model) 293 | 294 | # Profile 295 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 296 | # y = model(img, profile=True) 297 | 298 | # Tensorboard 299 | # from torch.utils.tensorboard import SummaryWriter 300 | # tb_writer = SummaryWriter() 301 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 302 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 303 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 304 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import json 4 | import os 5 | import shutil 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | import torch 10 | import yaml 11 | from tqdm import tqdm 12 | 13 | from models.experimental import attempt_load 14 | from datasets import create_dataloader 15 | from utils.general import ( 16 | coco80_to_coco91_class, check_dataset, check_file, check_img_size, non_max_suppression, scale_coords, 17 | xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, ap_per_class, set_logging) 18 | from utils.torch_utils import select_device, time_synchronized 19 | from loss import compute_loss 20 | 21 | 22 | def test(data, 23 | weights=None, 24 | batch_size=16, 25 | imgsz=640, 26 | conf_thres=0.001, 27 | iou_thres=0.6, # for NMS 28 | save_json=False, 29 | single_cls=False, 30 | augment=False, 31 | verbose=False, 32 | model=None, 33 | dataloader=None, 34 | save_dir=Path(''), # for saving images 35 | save_txt=False, # for auto-labelling 36 | save_conf=False, 37 | plots=True): 38 | # Initialize/load model and set device 39 | training = model is not None 40 | if training: # called by train.py 41 | device = next(model.parameters()).device # get model device 42 | 43 | else: # called directly 44 | set_logging() 45 | device = select_device(opt.device, batch_size=batch_size) 46 | save_txt = opt.save_txt # save *.txt labels 47 | 48 | # Remove previous 49 | if os.path.exists(save_dir): 50 | shutil.rmtree(save_dir) # delete dir 51 | os.makedirs(save_dir) # make new dir 52 | 53 | if save_txt: 54 | out = save_dir / 'autolabels' 55 | if os.path.exists(out): 56 | shutil.rmtree(out) # delete dir 57 | os.makedirs(out) # make new dir 58 | 59 | # Load model 60 | model = attempt_load(weights, map_location=device) # load FP32 model 61 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 62 | 63 | # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 64 | # if device.type != 'cpu' and torch.cuda.device_count() > 1: 65 | # model = nn.DataParallel(model) 66 | 67 | # Half 68 | half = device.type != 'cpu' # half precision only supported on CUDA 69 | if half: 70 | model.half() 71 | 72 | # Configure 73 | model.eval() 74 | with open(data) as f: 75 | data = yaml.load(f, Loader=yaml.FullLoader) # model dict 76 | check_dataset(data) # check 77 | nc = 1 if single_cls else int(data['nc']) # number of classes 78 | iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 79 | niou = iouv.numel() 80 | 81 | # Dataloader 82 | if not training: 83 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 84 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 85 | path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images 86 | dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, 87 | hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] 88 | 89 | seen = 0 90 | names = model.names if hasattr(model, 'names') else model.module.names 91 | coco91class = coco80_to_coco91_class() 92 | s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') 93 | p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. 94 | loss = torch.zeros(3, device=device) 95 | jdict, stats, ap, ap_class = [], [], [], [] 96 | for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 97 | img = img.to(device, non_blocking=True) 98 | img = img.half() if half else img.float() # uint8 to fp16/32 99 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 100 | targets = targets.to(device) 101 | nb, _, height, width = img.shape # batch size, channels, height, width 102 | whwh = torch.Tensor([width, height, width, height]).to(device) 103 | 104 | # Disable gradients 105 | with torch.no_grad(): 106 | # Run model 107 | t = time_synchronized() 108 | inf_out, train_out = model(img) # inference and training outputs 109 | t0 += time_synchronized() - t 110 | 111 | # Compute loss 112 | if training: # if model has loss hyperparameters 113 | loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls 114 | 115 | # Run NMS 116 | t = time_synchronized() 117 | output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) 118 | t1 += time_synchronized() - t 119 | 120 | # Statistics per image 121 | for si, pred in enumerate(output): 122 | labels = targets[targets[:, 0] == si, 1:] 123 | nl = len(labels) 124 | tcls = labels[:, 0].tolist() if nl else [] # target class 125 | seen += 1 126 | 127 | if pred is None: 128 | if nl: 129 | stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) 130 | continue 131 | 132 | # Append to text file 133 | if save_txt: 134 | gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh 135 | x = pred.clone() 136 | x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original 137 | for *xyxy, conf, cls in x: 138 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 139 | line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format 140 | with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: 141 | f.write(('%g ' * len(line) + '\n') % line) 142 | 143 | # Clip boxes to image bounds 144 | clip_coords(pred, (height, width)) 145 | 146 | # Append to pycocotools JSON dictionary 147 | if save_json: 148 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 149 | image_id = Path(paths[si]).stem 150 | box = pred[:, :4].clone() # xyxy 151 | scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape 152 | box = xyxy2xywh(box) # xywh 153 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 154 | for p, b in zip(pred.tolist(), box.tolist()): 155 | jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id, 156 | 'category_id': coco91class[int(p[5])], 157 | 'bbox': [round(x, 3) for x in b], 158 | 'score': round(p[4], 5)}) 159 | 160 | # Assign all predictions as incorrect 161 | correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) 162 | if nl: 163 | detected = [] # target indices 164 | tcls_tensor = labels[:, 0] 165 | 166 | # target boxes 167 | tbox = xywh2xyxy(labels[:, 1:5]) * whwh 168 | 169 | # Per target class 170 | for cls in torch.unique(tcls_tensor): 171 | ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices 172 | pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices 173 | 174 | # Search for detections 175 | if pi.shape[0]: 176 | # Prediction to target ious 177 | ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices 178 | 179 | # Append detections 180 | detected_set = set() 181 | for j in (ious > iouv[0]).nonzero(as_tuple=False): 182 | d = ti[i[j]] # detected target 183 | if d.item() not in detected_set: 184 | detected_set.add(d.item()) 185 | detected.append(d) 186 | correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn 187 | if len(detected) == nl: # all targets already located in image 188 | break 189 | 190 | # Append statistics (correct, conf, pcls, tcls) 191 | stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) 192 | 193 | # Plot images 194 | if plots and batch_i < 1: 195 | f = save_dir / f'test_batch{batch_i}_gt.jpg' # filename 196 | plot_images(img, targets, paths, str(f), names) # ground truth 197 | f = save_dir / f'test_batch{batch_i}_pred.jpg' 198 | plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions 199 | 200 | # Compute statistics 201 | stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy 202 | if len(stats) and stats[0].any(): 203 | p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') 204 | p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] 205 | mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() 206 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 207 | else: 208 | nt = torch.zeros(1) 209 | 210 | # Print results 211 | pf = '%20s' + '%12.3g' * 6 # print format 212 | print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) 213 | 214 | # Print results per class 215 | if verbose and nc > 1 and len(stats): 216 | for i, c in enumerate(ap_class): 217 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) 218 | 219 | # Print speeds 220 | t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple 221 | if not training: 222 | print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) 223 | 224 | # Save JSON 225 | if save_json and len(jdict): 226 | w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights 227 | file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file 228 | print('\nCOCO mAP with pycocotools... saving %s...' % file) 229 | with open(file, 'w') as f: 230 | json.dump(jdict, f) 231 | 232 | try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 233 | from pycocotools.coco import COCO 234 | from pycocotools.cocoeval import COCOeval 235 | 236 | imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] 237 | cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api 238 | cocoDt = cocoGt.loadRes(str(file)) # initialize COCO pred api 239 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 240 | cocoEval.params.imgIds = imgIds # image IDs to evaluate 241 | cocoEval.evaluate() 242 | cocoEval.accumulate() 243 | cocoEval.summarize() 244 | map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) 245 | except Exception as e: 246 | print('ERROR: pycocotools unable to run: %s' % e) 247 | 248 | # Return results 249 | model.float() # for training 250 | maps = np.zeros(nc) + map 251 | for i, c in enumerate(ap_class): 252 | maps[c] = ap[i] 253 | return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t 254 | 255 | 256 | if __name__ == '__main__': 257 | parser = argparse.ArgumentParser(prog='test.py') 258 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 259 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') 260 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') 261 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 262 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 263 | parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') 264 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 265 | parser.add_argument('--task', default='val', help="'val', 'test', 'study'") 266 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 267 | parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') 268 | parser.add_argument('--augment', action='store_true', help='augmented inference') 269 | parser.add_argument('--verbose', action='store_true', help='report mAP by class') 270 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 271 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 272 | parser.add_argument('--save-dir', type=str, default='runs/test', help='directory to save results') 273 | opt = parser.parse_args() 274 | opt.save_json |= opt.data.endswith('coco.yaml') 275 | opt.data = check_file(opt.data) # check file 276 | print(opt) 277 | 278 | if opt.task in ['val', 'test']: # run normally 279 | test(opt.data, 280 | opt.weights, 281 | opt.batch_size, 282 | opt.img_size, 283 | opt.conf_thres, 284 | opt.iou_thres, 285 | opt.save_json, 286 | opt.single_cls, 287 | opt.augment, 288 | opt.verbose, 289 | save_dir=Path(opt.save_dir), 290 | save_txt=opt.save_txt, 291 | save_conf=opt.save_conf, 292 | ) 293 | 294 | print('Results saved to %s' % opt.save_dir) 295 | 296 | elif opt.task == 'study': # run over a range of settings and save/plot 297 | for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 298 | f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to 299 | x = list(range(320, 800, 64)) # x axis 300 | y = [] # y axis 301 | for i in x: # img-size 302 | print('\nRunning %s point %s...' % (f, i)) 303 | r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json) 304 | y.append(r + t) # results and times 305 | np.savetxt(f, y, fmt='%10.4g') # save 306 | os.system('zip -r study.zip study_*.txt') 307 | # utils.general.plot_study_txt(f, x) # plot 308 | -------------------------------------------------------------------------------- /get_small_script/get_small_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys,os 3 | sys.path.append('../') 4 | from model import Model 5 | from small_model_mod import Small_Model 6 | import torch 7 | import numpy as np 8 | 9 | # if item[:25] not in ['backbone_self.csp1.m.0.cv2', 'backbone_self.csp1.m.1.cv2', 'backbone_self.csp1.m.2.cv2', 10 | # 'backbone_self.csp2.m.0.cv2', 'backbone_self.csp2.m.1.cv2', 'backbone_self.csp2.m.2.cv2', 11 | # 'backbone_self.csp2.m.3.cv2', 'backbone_self.csp2.m.4.cv2', 'backbone_self.csp2.m.5.cv2', 12 | # 'backbone_self.csp2.m.6.cv2', 'backbone_self.csp2.m.7.cv2', 'backbone_self.csp2.m.8.cv2', 13 | # 'backbone_self.csp3.m.0.cv2', 'backbone_self.csp3.m.1.cv2', 'backbone_self.csp3.m.2.cv2', 14 | # 'backbone_self.csp3.m.3.cv2', 'backbone_self.csp3.m.4.cv2', 'backbone_self.csp3.m.5.cv2', 15 | # 'backbone_self.csp3.m.6.cv2', 'backbone_self.csp3.m.7.cv2', 'backbone_self.csp3.m.8.cv2']: 剪第1维(√),不剪第0维(√) 16 | # print(item) 17 | 18 | # if item[:25] not in ['backbone_self.csp1.m.0.cv1', 'backbone_self.csp1.m.1.cv1', 'backbone_self.csp1.m.2.cv1', 19 | # 'backbone_self.csp2.m.0.cv1', 'backbone_self.csp2.m.1.cv1', 'backbone_self.csp2.m.2.cv1', 20 | # 'backbone_self.csp2.m.3.cv1', 'backbone_self.csp2.m.4.cv1', 'backbone_self.csp2.m.5.cv1', 21 | # 'backbone_self.csp2.m.6.cv1', 'backbone_self.csp2.m.7.cv1', 'backbone_self.csp2.m.8.cv1', 22 | # 'backbone_self.csp3.m.0.cv1', 'backbone_self.csp3.m.1.cv1', 'backbone_self.csp3.m.2.cv1', 23 | # 'backbone_self.csp3.m.3.cv1', 'backbone_self.csp3.m.4.cv1', 'backbone_self.csp3.m.5.cv1', 24 | # 'backbone_self.csp3.m.6.cv1', 'backbone_self.csp3.m.7.cv1', 'backbone_self.csp3.m.8.cv1']: 剪第0维(√),不剪第1维(√) 25 | # print(item) 26 | 27 | #backbone_self.csp1.cv1.conv.weight backbone_self.csp2.cv1.conv.weight backbone_self.csp3.cv1.conv.weight 剪第1维(√),不剪第0维(√) 28 | 29 | #backbone_self.csp1.cv3.weight backbone_self.csp2.cv3.weight backbone_self.csp2.cv3.weight 剪第0维(√),不减第1维(√) 30 | 31 | def get_small_model(big_model): 32 | indice_dict, small_model = extract_para(big_model) 33 | 34 | big_state_dict = big_model.state_dict() 35 | small_state_dict = {} 36 | 37 | bottleneck_not0 = ['backbone_self.csp1.m.0.cv2', 'backbone_self.csp1.m.1.cv2', 'backbone_self.csp1.m.2.cv2', 38 | 'backbone_self.csp2.m.0.cv2', 'backbone_self.csp2.m.1.cv2', 'backbone_self.csp2.m.2.cv2', 39 | 'backbone_self.csp2.m.3.cv2', 'backbone_self.csp2.m.4.cv2', 'backbone_self.csp2.m.5.cv2', 40 | 'backbone_self.csp2.m.6.cv2', 'backbone_self.csp2.m.7.cv2', 'backbone_self.csp2.m.8.cv2', 41 | 'backbone_self.csp3.m.0.cv2', 'backbone_self.csp3.m.1.cv2', 'backbone_self.csp3.m.2.cv2', 42 | 'backbone_self.csp3.m.3.cv2', 'backbone_self.csp3.m.4.cv2', 'backbone_self.csp3.m.5.cv2', 43 | 'backbone_self.csp3.m.6.cv2', 'backbone_self.csp3.m.7.cv2', 'backbone_self.csp3.m.8.cv2'] 44 | 45 | # csp模块中bn前 8个 46 | csp_bn = ['backbone_self.csp1.bn','backbone_self.csp2.bn', 'backbone_self.csp3.bn', 'backbone_self.csp4.bn', 47 | 'neck_self.csp1.bn', 'neck_self.csp2.bn', 'neck_self.csp3.bn', 'neck_self.csp4.bn'] 48 | 49 | # csp模块中bn以及最后的卷积 8个 50 | csp_after_bn = ['backbone_self.csp1.cv4.conv.weight', 'backbone_self.csp2.cv4.conv.weight', 51 | 'backbone_self.csp3.cv4.conv.weight', 'backbone_self.csp4.cv4.conv.weight', 52 | 'neck_self.csp1.cv4.conv.weight', 'neck_self.csp2.cv4.conv.weight', 53 | 'neck_self.csp3.cv4.conv.weight', 'neck_self.csp4.cv4.conv.weight'] 54 | 55 | csp_indict = [torch.cat((indice_dict['backbone_self.csp1.cv3.weight'], indice_dict['backbone_self.csp1.cv2.weight'] + 64)), 56 | torch.cat((indice_dict['backbone_self.csp2.cv3.weight'], indice_dict['backbone_self.csp2.cv2.weight'] + 128)), 57 | torch.cat((indice_dict['backbone_self.csp3.cv3.weight'], indice_dict['backbone_self.csp3.cv2.weight'] + 256)), 58 | torch.cat((indice_dict['backbone_self.csp4.cv3.weight'], indice_dict['backbone_self.csp4.cv2.weight'] + 512)), 59 | torch.cat((indice_dict['neck_self.csp1.cv3.weight'], indice_dict['neck_self.csp1.cv2.weight'] + 256)), 60 | torch.cat((indice_dict['neck_self.csp2.cv3.weight'], indice_dict['neck_self.csp2.cv2.weight'] + 128)), 61 | torch.cat((indice_dict['neck_self.csp3.cv3.weight'], indice_dict['neck_self.csp3.cv2.weight'] + 256)), 62 | torch.cat((indice_dict['neck_self.csp4.cv3.weight'], indice_dict['neck_self.csp4.cv2.weight'] + 512))] 63 | 64 | csp_after_bn_indict = [indice_dict['backbone_self.csp1.cv4.conv.weight'], indice_dict['backbone_self.csp2.cv4.conv.weight'], 65 | indice_dict['backbone_self.csp3.cv4.conv.weight'], indice_dict['backbone_self.csp4.cv4.conv.weight'], 66 | indice_dict['neck_self.csp1.cv4.conv.weight'], indice_dict['neck_self.csp2.cv4.conv.weight'], 67 | indice_dict['neck_self.csp3.cv4.conv.weight'], indice_dict['neck_self.csp4.cv4.conv.weight']] 68 | 69 | # csp模块后的卷积 7个 70 | csp_after_conv = ['backbone_self.conv2.conv.weight', 'backbone_self.conv3.conv.weight', 71 | 'backbone_self.conv4.conv.weight', 'neck_self.conv1.conv.weight', 72 | 'neck_self.conv2.conv.weight', 'neck_self.conv3.conv.weight', 'neck_self.conv4.conv.weight'] 73 | 74 | #n个bottleneck输入不是上一个特征图 8个 75 | csp_bottleneck_not1 = ['backbone_self.csp1.m.0.cv1.conv.weight', 'backbone_self.csp1.m.1.cv1.conv.weight', 'backbone_self.csp1.m.2.cv1.conv.weight', 76 | 'backbone_self.csp2.m.0.cv1.conv.weight', 'backbone_self.csp2.m.1.cv1.conv.weight', 'backbone_self.csp2.m.2.cv1.conv.weight', 77 | 'backbone_self.csp2.m.3.cv1.conv.weight', 'backbone_self.csp2.m.4.cv1.conv.weight', 'backbone_self.csp2.m.5.cv1.conv.weight', 78 | 'backbone_self.csp2.m.6.cv1.conv.weight', 'backbone_self.csp2.m.7.cv1.conv.weight', 'backbone_self.csp2.m.8.cv1.conv.weight', 79 | 'backbone_self.csp3.m.0.cv1.conv.weight', 'backbone_self.csp3.m.1.cv1.conv.weight', 'backbone_self.csp3.m.2.cv1.conv.weight', 80 | 'backbone_self.csp3.m.3.cv1.conv.weight', 'backbone_self.csp3.m.4.cv1.conv.weight', 'backbone_self.csp3.m.5.cv1.conv.weight', 81 | 'backbone_self.csp3.m.6.cv1.conv.weight', 'backbone_self.csp3.m.7.cv1.conv.weight', 'backbone_self.csp3.m.8.cv1.conv.weight'] 82 | csp_bottleneck = ['backbone_self.csp4.m.0.cv1.conv.weight', 83 | 'neck_self.csp1.m.0.cv1.conv.weight', 'neck_self.csp2.m.0.cv1.conv.weight', 84 | 'neck_self.csp3.m.0.cv1.conv.weight', 'neck_self.csp4.m.0.cv1.conv.weight'] 85 | csp_bottleneck_indict = [indice_dict['backbone_self.csp4.cv1.conv.weight'], 86 | indice_dict['neck_self.csp1.cv1.conv.weight'], indice_dict['neck_self.csp2.cv1.conv.weight'], 87 | indice_dict['neck_self.csp3.cv1.conv.weight'], indice_dict['neck_self.csp4.cv1.conv.weight']] 88 | 89 | #backbone部分中csp模块中跳跃中的卷积 90 | csp_backbone_cv1 = ['backbone_self.csp1.cv1.conv.weight', 'backbone_self.csp2.cv1.conv.weight', 91 | 'backbone_self.csp3.cv1.conv.weight', 'backbone_self.csp4.cv1.conv.weight'] 92 | 93 | 94 | csp_backbone_cv2 = ['backbone_self.csp1.cv2.weight', 'backbone_self.csp2.cv2.weight', 95 | 'backbone_self.csp3.cv2.weight', 'backbone_self.csp4.cv2.weight'] 96 | csp_backbone_cv1_cv2_indict = [indice_dict['backbone_self.conv1.conv.weight'], indice_dict['backbone_self.conv2.conv.weight'], 97 | indice_dict['backbone_self.conv3.conv.weight'], indice_dict['backbone_self.spp.cv2.conv.weight']] 98 | 99 | csp_cv3_not1 = ['backbone_self.csp1.cv3.weight', 'backbone_self.csp2.cv3.weight', 'backbone_self.csp3.cv3.weight'] 100 | csp_cv3 = ['backbone_self.csp1.cv3.weight', 'backbone_self.csp2.cv3.weight', 101 | 'backbone_self.csp3.cv3.weight', 'backbone_self.csp4.cv3.weight', 102 | 'neck_self.csp1.cv3.weight', 'neck_self.csp2.cv3.weight', 103 | 'neck_self.csp3.cv3.weight', 'neck_self.csp4.cv3.weight'] 104 | csp_cv3_indict = [indice_dict['backbone_self.csp1.m.2.cv2.bn.weight'], indice_dict['backbone_self.csp2.m.8.cv2.conv.weight'], 105 | indice_dict['backbone_self.csp3.m.8.cv2.conv.weight'], indice_dict['backbone_self.csp4.m.2.cv2.conv.weight'], 106 | indice_dict['neck_self.csp1.m.2.cv2.conv.weight'], indice_dict['neck_self.csp2.m.2.cv2.conv.weight'], 107 | indice_dict['neck_self.csp3.m.2.cv2.conv.weight'], indice_dict['neck_self.csp4.m.2.cv2.conv.weight']] 108 | 109 | # 外面的concat 4个 110 | csp_neck_cv1 = ['neck_self.csp1.cv1.conv.weight', 'neck_self.csp2.cv1.conv.weight', 111 | 'neck_self.csp3.cv1.conv.weight', 'neck_self.csp4.cv1.conv.weight'] 112 | csp_neck_cv2 = ['neck_self.csp1.cv2.weight', 'neck_self.csp2.cv2.weight', 113 | 'neck_self.csp3.cv2.weight', 'neck_self.csp4.cv2.weight'] 114 | csp_neck_cv_indict = [torch.cat((indice_dict['neck_self.conv1.conv.weight'],indice_dict['backbone_self.csp3.cv4.conv.weight']+512)), 115 | torch.cat((indice_dict['neck_self.conv2.conv.weight'],indice_dict['backbone_self.csp2.cv4.conv.weight']+256)), 116 | torch.cat((indice_dict['neck_self.conv3.conv.weight'],indice_dict['neck_self.conv2.conv.weight']+256)), 117 | torch.cat((indice_dict['neck_self.conv4.conv.weight'],indice_dict['neck_self.conv1.conv.weight']+512))] 118 | 119 | # spp部分 1个 120 | spp_cat = ['backbone_self.spp.cv2.conv.weight'] 121 | spp_cat_indict = torch.cat((indice_dict['backbone_self.spp.cv1.conv.weight'], indice_dict['backbone_self.spp.cv1.conv.weight'] + 512, 122 | indice_dict['backbone_self.spp.cv1.conv.weight'] + 512*2, indice_dict['backbone_self.spp.cv1.conv.weight'] + 512*3)) 123 | 124 | # head部分 3个 125 | head_det = ['head_self.m.0.weight', 'head_self.m.1.weight', 'head_self.m.2.weight'] 126 | head_det_indict = [indice_dict['neck_self.csp2.cv4.conv.weight'], indice_dict['neck_self.csp3.cv4.conv.weight'], indice_dict['neck_self.csp4.cv4.conv.weight']] 127 | 128 | temp_list = [] 129 | for index, [key, value] in enumerate(big_state_dict.items()): 130 | if 'num_batches_tracked' not in key: 131 | # focus、head以及8个csp模块中的bn先完全赋值 132 | if indice_dict[key] == [] or \ 133 | csp_bn[0] in key or csp_bn[1] in key or csp_bn[2] in key or csp_bn[3] in key \ 134 | or csp_bn[4] in key or csp_bn[5] in key or csp_bn[6] in key or csp_bn[7] in key: 135 | small_state_dict[key] = value 136 | else: 137 | # 减去卷积核的个数, 即第0维度 138 | if key[:22] in ['backbone_self.csp1.cv1', 'backbone_self.csp2.cv1', 'backbone_self.csp3.cv1']: 139 | small_state_dict[key] = value 140 | 141 | elif key[:26] in bottleneck_not0: 142 | small_state_dict[key] = value 143 | else: 144 | small_state_dict[key] = torch.index_select(value, 0, indice_dict[key]) 145 | 146 | # 减去输入特征图的通道数(一般为上一层卷积核的个数),即第1维度 147 | if 'backbone_self.conv1' not in key and 'bn' not in key: 148 | if key in csp_after_bn: #CSP module中BN后的卷积 149 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_indict[csp_after_bn.index(key)]) 150 | elif key in csp_after_conv: #CSP module后的卷积 151 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_after_bn_indict[csp_after_conv.index(key)]) 152 | elif key in csp_bottleneck: #CSP module中的bottleneck 153 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_bottleneck_indict[csp_bottleneck.index(key)]) 154 | elif key in csp_backbone_cv1: #backbone CSP module中第一条支路 155 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_backbone_cv1_cv2_indict[csp_backbone_cv1.index(key)]) 156 | elif key in csp_backbone_cv2: #backbone CSP module中第二条支路(跳跃) 157 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_backbone_cv1_cv2_indict[csp_backbone_cv2.index(key)]) 158 | elif key in csp_neck_cv1: #neck CSP module中第一条支路 159 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_neck_cv_indict[csp_neck_cv1.index(key)]) 160 | elif key in csp_neck_cv2: #neck CSP module中第二条支路 161 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_neck_cv_indict[csp_neck_cv2.index(key)]) 162 | elif key in csp_cv3: #backbone CSP module中第一条支路中的卷积 163 | if key in csp_cv3_not1: 164 | small_state_dict[key] = small_state_dict[key] 165 | else: 166 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, csp_cv3_indict[csp_cv3.index(key)]) 167 | elif key in spp_cat: #spp module 168 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, spp_cat_indict) 169 | elif key in csp_bottleneck_not1: 170 | small_state_dict[key] = small_state_dict[key] 171 | else: 172 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, temp_list[-1]) 173 | temp_list.append(indice_dict[key]) 174 | 175 | # 直接剪去CSP module中的BN 176 | if 'num_batches_tracked' not in key: 177 | for ind, key_csp_bn in enumerate(csp_bn): 178 | if key_csp_bn in key: 179 | small_state_dict[key] = torch.index_select(small_state_dict[key], 0, csp_indict[ind]) 180 | 181 | # head部分,只剪去第1维度 182 | if key in head_det: 183 | small_state_dict[key] = torch.index_select(small_state_dict[key], 1, head_det_indict[head_det.index(key)]) 184 | 185 | small_model.load_state_dict(small_state_dict) 186 | return small_model 187 | 188 | def extract_para(big_model): 189 | kept_index_per_layer = {} 190 | big_model_state_dict = big_model.state_dict() 191 | temp = [] 192 | for ind, key in enumerate(big_model_state_dict.keys()): 193 | if 'conv.weight' in key or 'cv2.weight' in key or 'cv3.weight' in key: 194 | if 'focus' not in key: 195 | indices_zero, indices_nonzero = check_channel(big_model_state_dict[key]) 196 | kept_index_per_layer[key] = indices_nonzero #记下所有卷积该保留的索引 197 | temp = indices_nonzero 198 | else: 199 | kept_index_per_layer[key] = temp #focus为[] 200 | elif 'head_self' in key: 201 | kept_index_per_layer[key] = [] #head为[] 202 | else: 203 | if 'bn.num_batches_tracked' in key: 204 | kept_index_per_layer[key] = [] #bn.num_batches_tracked记为空 205 | else: 206 | kept_index_per_layer[key] = temp #记下所有卷积后的bn该保留的索引 207 | 208 | small_model = Small_Model(nc=20, prune_rate=0.7)######################################### 209 | 210 | return kept_index_per_layer, small_model 211 | 212 | def check_channel(tensor): 213 | 214 | size_0 = tensor.size()[0] 215 | size_1 = tensor.size()[1] * tensor.size()[2] * tensor.size()[3] 216 | tensor_resize = tensor.view(size_0, -1) 217 | # indicator: if the channel contain all zeros 218 | channel_if_zero = np.zeros(size_0) 219 | for x in range(0, size_0, 1): 220 | channel_if_zero[x] = np.count_nonzero(tensor_resize[x].cpu().numpy()) != 0 221 | 222 | indices_nonzero = torch.LongTensor((channel_if_zero != 0).nonzero()[0]) 223 | 224 | zeros = (channel_if_zero == 0).nonzero()[0] 225 | indices_zero = torch.LongTensor(zeros) if zeros != [] else [] 226 | 227 | return indices_zero, indices_nonzero 228 | 229 | if __name__ == '__main__': 230 | # model = torch.load('best_new.pt')['model'] 231 | # small_model = get_small_model(model) 232 | model_all_keys = torch.load('best.pt') 233 | 234 | model = torch.load('best.pt')['model'].to('cpu') 235 | model.float() 236 | big_path = 'big_model.pt' 237 | torch.save(model, big_path) 238 | 239 | small_model = get_small_model(model) 240 | 241 | small_path = 'small_model.pt' 242 | torch.save(small_model, small_path) 243 | 244 | model_all_keys['model'] = small_model 245 | torch.save(model_all_keys, 'small_model_all.pt') 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /detector_cpu.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import cv2 4 | import torch 5 | from numpy import random 6 | import os 7 | import math 8 | 9 | import torchvision 10 | 11 | from typing import Dict, Tuple, Sequence, List, Union 12 | 13 | names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 14 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] 15 | class Detect_model(object): 16 | def __init__(self, weight:str='', imgsize:int=640, device:str='', conf:float=0.3, names=[]): 17 | self.weight = weight 18 | self.imgsize = imgsize 19 | self.conf = conf 20 | self.device = select_device(device) 21 | self.half = False 22 | #self.half = self.device.type != 'cpu' 23 | #print(self.half) 24 | # print(weight) 25 | #self.model = torch.load(self.weight, map_location=self.device)['model'].float().fuse().eval() 26 | self.model_jit = torch.jit.load(weight) 27 | if self.half: 28 | self.model_jit.half().to(self.device) # to FP16 29 | 30 | # self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names 31 | self.names = names 32 | # print(self.names) 33 | self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] 34 | 35 | ### warm up 36 | img = torch.zeros((1, 3, self.imgsize, self.imgsize)).to(self.device) # init img 37 | _ = self.model_jit(img.half() if self.half else img) if self.device.type != 'cpu' else None # run once 38 | _ = self.model_jit(img.half() if self.half else img) if self.device.type != 'cpu' else None # run once 39 | 40 | def preprocess(self, img:np.ndarray=None): 41 | # img = letterbox(img, new_shape=self.imgsize)[0] 42 | img = image_pad(img, (self.imgsize, self.imgsize)) 43 | # Convert 44 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 45 | img = np.ascontiguousarray(img) 46 | 47 | img = torch.from_numpy(img).to(self.device) 48 | img = img.half() if self.half else img.float() # uint8 to fp16/32 49 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 50 | if img.ndimension() == 3: 51 | img = img.unsqueeze(0) 52 | return img 53 | 54 | def detect_frame(self, img0:np.ndarray=None, draw_img:bool=False): 55 | ori_shape = img0.shape 56 | res_boxs = [] 57 | res_labels = [] 58 | 59 | img = self.preprocess(img0) 60 | # Inference 61 | t0 = time.time() 62 | pred = self.model_jit(img)[0] 63 | tt = time.time()-t0 64 | # Apply NMS 65 | pred = non_max_suppression(pred, conf_thres=self.conf, iou_thres=0.5) 66 | 67 | #print(time.time()-t0) 68 | # Process detections 69 | for i, det in enumerate(pred): # detections per image 70 | if len(det): 71 | # Rescale boxes from img_size to im0 size 72 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], ori_shape).round() 73 | 74 | # Write results 75 | for *xyxy, conf, cls in reversed(det): 76 | #print(self.names[int(cls)]) 77 | if self.names[int(cls)] == "person": 78 | res_boxs.append([int(xyxy[0]), 79 | int(xyxy[1]), 80 | int(xyxy[2]), 81 | int(xyxy[3])]) 82 | #print(int(cls)) 83 | res_labels.append(self.names[int(cls)]) 84 | if draw_img: # Add bbox to image 85 | label = self.names[int(cls)]+' '+str(round(float(conf), 2)) 86 | plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)], line_thickness=3) 87 | 88 | label = self.names[int(cls)]+' '+str(round(float(conf), 2)) 89 | plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)], line_thickness=3) 90 | #cv2.imwrite('res.jpg', img0) 91 | 92 | 93 | 94 | 95 | #print(time.time()-t0) 96 | 97 | if draw_img: 98 | return res_boxs, res_labels, img0, tt 99 | else: 100 | return res_boxs, res_labels 101 | 102 | def select_device(device='', batch_size=None): 103 | # device = 'cpu' or '0' or '0,1,2,3' 104 | # s = f'Using torch {torch.__version__} ' # string 105 | cpu = device.lower() == 'cpu' 106 | if cpu: 107 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 108 | elif device: # non-cpu device requested 109 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 110 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 111 | 112 | cuda = torch.cuda.is_available() and not cpu 113 | if cuda: 114 | n = torch.cuda.device_count() 115 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 116 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 117 | # space = ' ' * len(s) 118 | for i, d in enumerate(device.split(',') if device else range(n)): 119 | p = torch.cuda.get_device_properties(i) 120 | # s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 121 | else: 122 | # s += 'CPU' 123 | pass 124 | return torch.device('cuda:0' if cuda else 'cpu') 125 | 126 | def image_pad(image, target_size): 127 | # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) 128 | # image = image.astype(np.int8) 129 | if isinstance(target_size, int): 130 | ih, iw = target_size, target_size 131 | else: 132 | ih, iw = target_size 133 | # print(ih, iw) 134 | h, w, _ = image.shape 135 | 136 | scale = min(iw / w, ih / h) 137 | nw, nh = int(scale * w), int(scale * h) 138 | image_resized = cv2.resize(image, (nw, nh)) 139 | 140 | image_paded = np.full(shape=[ih, iw, 3], fill_value=114) 141 | dw, dh = (iw - nw) // 2, (ih - nh) // 2 142 | image_paded[dh:nh + dh, dw:nw + dw, :] = image_resized 143 | 144 | return image_paded.astype(np.uint8) 145 | 146 | def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): 147 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 148 | shape = img.shape[:2] # current shape [height, width] 149 | if isinstance(new_shape, int): 150 | new_shape = (new_shape, new_shape) 151 | 152 | # Scale ratio (new / old) 153 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 154 | if not scaleup: # only scale down, do not scale up (for better test mAP) 155 | r = min(r, 1.0) 156 | 157 | # Compute padding 158 | ratio = r, r # width, height ratios 159 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 160 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 161 | if auto: # minimum rectangle 162 | dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding 163 | elif scaleFill: # stretch 164 | dw, dh = 0.0, 0.0 165 | new_unpad = (new_shape[1], new_shape[0]) 166 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 167 | 168 | dw /= 2 # divide padding into 2 sides 169 | dh /= 2 170 | 171 | if shape[::-1] != new_unpad: # resize 172 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 173 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 174 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 175 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 176 | return img, ratio, (dw, dh) 177 | 178 | def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): 179 | """Performs Non-Maximum Suppression (NMS) on inference results 180 | 181 | Returns: 182 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 183 | """ 184 | 185 | nc = prediction.shape[2] - 5 # number of classes 186 | xc = prediction[..., 4] > conf_thres # candidates 187 | 188 | # Settings 189 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 190 | max_det = 300 # maximum number of detections per image 191 | max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() 192 | time_limit = 10.0 # seconds to quit after 193 | redundant = True # require redundant detections 194 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 195 | merge = False # use merge-NMS 196 | 197 | t = time.time() 198 | output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] 199 | for xi, x in enumerate(prediction): # image index, image inference 200 | # Apply constraints 201 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 202 | x = x[xc[xi]] # confidence 203 | 204 | # Cat apriori labels if autolabelling 205 | if labels and len(labels[xi]): 206 | l = labels[xi] 207 | v = torch.zeros((len(l), nc + 5), device=x.device) 208 | v[:, :4] = l[:, 1:5] # box 209 | v[:, 4] = 1.0 # conf 210 | v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls 211 | x = torch.cat((x, v), 0) 212 | 213 | # If none remain process next image 214 | if not x.shape[0]: 215 | continue 216 | 217 | # Compute conf 218 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 219 | 220 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 221 | box = xywh2xyxy(x[:, :4]) 222 | 223 | # Detections matrix nx6 (xyxy, conf, cls) 224 | if multi_label: 225 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 226 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 227 | else: # best class only 228 | conf, j = x[:, 5:].max(1, keepdim=True) 229 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 230 | 231 | # Filter by class 232 | if classes is not None: 233 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 234 | 235 | # Apply finite constraint 236 | # if not torch.isfinite(x).all(): 237 | # x = x[torch.isfinite(x).all(1)] 238 | 239 | # Check shape 240 | n = x.shape[0] # number of boxes 241 | if not n: # no boxes 242 | continue 243 | elif n > max_nms: # excess boxes 244 | x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence 245 | 246 | # Batched NMS 247 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 248 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 249 | # print(boxes.shape) 250 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 251 | # print(i.shape) 252 | if i.shape[0] > max_det: # limit detections 253 | i = i[:max_det] 254 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 255 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 256 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 257 | weights = iou * scores[None] # box weights 258 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 259 | if redundant: 260 | i = i[iou.sum(1) > 1] # require redundancy 261 | 262 | output[xi] = x[i] 263 | if (time.time() - t) > time_limit: 264 | print(f'WARNING: NMS time limit {time_limit}s exceeded') 265 | break # time limit exceeded 266 | 267 | return output 268 | 269 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 270 | # Rescale coords (xyxy) from img1_shape to img0_shape 271 | if ratio_pad is None: # calculate from img0_shape 272 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 273 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 274 | else: 275 | gain = ratio_pad[0][0] 276 | pad = ratio_pad[1] 277 | 278 | coords[:, [0, 2]] -= pad[0] # x padding 279 | coords[:, [1, 3]] -= pad[1] # y padding 280 | coords[:, :4] /= gain 281 | clip_coords(coords, img0_shape) 282 | return coords 283 | 284 | def xyxy2xywh(x): 285 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 286 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 287 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 288 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 289 | y[:, 2] = x[:, 2] - x[:, 0] # width 290 | y[:, 3] = x[:, 3] - x[:, 1] # height 291 | return y 292 | 293 | def xywh2xyxy(x): 294 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 295 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 296 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 297 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 298 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 299 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 300 | return y 301 | 302 | def clip_coords(boxes, img_shape): 303 | # Clip bounding xyxy bounding boxes to image shape (height, width) 304 | boxes[:, 0].clamp_(0, img_shape[1]) # x1 305 | boxes[:, 1].clamp_(0, img_shape[0]) # y1 306 | boxes[:, 2].clamp_(0, img_shape[1]) # x2 307 | boxes[:, 3].clamp_(0, img_shape[0]) # y2 308 | 309 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): 310 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 311 | box2 = box2.T 312 | 313 | # Get the coordinates of bounding boxes 314 | if x1y1x2y2: # x1, y1, x2, y2 = box1 315 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 316 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 317 | else: # transform from xywh to xyxy 318 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 319 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 320 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 321 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 322 | 323 | # Intersection area 324 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 325 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 326 | 327 | # Union Area 328 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 329 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 330 | union = w1 * h1 + w2 * h2 - inter + eps 331 | 332 | iou = inter / union 333 | if GIoU or DIoU or CIoU: 334 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 335 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 336 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 337 | c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 338 | rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + 339 | (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 340 | if DIoU: 341 | return iou - rho2 / c2 # DIoU 342 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 343 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 344 | with torch.no_grad(): 345 | alpha = v / ((1 + eps) - iou + v) 346 | return iou - (rho2 / c2 + v * alpha) # CIoU 347 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 348 | c_area = cw * ch + eps # convex area 349 | return iou - (c_area - union) / c_area # GIoU 350 | else: 351 | return iou # IoU 352 | 353 | 354 | def box_iou(box1, box2): 355 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 356 | """ 357 | Return intersection-over-union (Jaccard index) of boxes. 358 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 359 | Arguments: 360 | box1 (Tensor[N, 4]) 361 | box2 (Tensor[M, 4]) 362 | Returns: 363 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 364 | IoU values for every element in boxes1 and boxes2 365 | """ 366 | #print(box1.size(), box2.size()) 367 | def box_area(box): 368 | # box = 4xn 369 | return (box[2] - box[0]) * (box[3] - box[1]) 370 | 371 | area1 = box_area(box1.T) 372 | area2 = box_area(box2.T) 373 | 374 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 375 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 376 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 377 | 378 | def wh_iou(wh1, wh2): 379 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 380 | wh1 = wh1[:, None] # [N,1,2] 381 | wh2 = wh2[None] # [1,M,2] 382 | inter = torch.min(wh1, wh2).prod(2) # [N,M] 383 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 384 | 385 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 386 | # Plots one bounding box on image img 387 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 388 | color = color or [random.randint(0, 255) for _ in range(3)] 389 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 390 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 391 | if label: 392 | tf = max(tl - 1, 1) # font thickness 393 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 394 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 395 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 396 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 397 | 398 | if __name__ == '__main__': 399 | import copy 400 | model0 = Detect_model(weight=r'./get_small_script/best_cpu.torchscript.pt', imgsize=640, device='cpu', conf=0.4, names=names) #small_model_all 401 | 402 | input1 = torch.randn(1, 3, 640, 640)#.to('cuda').half() 403 | model0.model_jit(input1) 404 | model0.model_jit(input1) 405 | 406 | # input_list = [] 407 | # for i in range(300): 408 | # input_list.append(input1) 409 | # 410 | # tsum = 0 411 | # tsum0 = 0 412 | 413 | # for i in range(300): 414 | # #t00 = time.time() 415 | # #input1 = torch.randn(1, 3, 640, 640).to('cuda')#.half() 416 | # 417 | # #input2 = copy.deepcopy(input1) 418 | # 419 | # t0 = time.time() 420 | # pred = model0.model_jit(input_list[i])[0] 421 | # t1 = time.time()-t0 422 | # #t2 = t0 -t00 423 | # tsum += t1 424 | # #tsum0 += t2 425 | # 426 | # print(tsum/300) 427 | # #print(tsum0/300) 428 | 429 | tsum = 0 430 | image = cv2.imread(r'images/call3.jpg') 431 | image_list = [] 432 | for i in range(50): 433 | image_list.append(image) 434 | 435 | for i in range(50): 436 | res_boxs, res_labels, img0, ttt = model0.detect_frame(image_list[i], draw_img=True) 437 | tsum += ttt 438 | print('tsum:', tsum/50) 439 | ## cv2.imwrite('res.jpg', img0) 440 | # # cv2.imshow('aaa', image) 441 | # # cv2.waitKey(0) 442 | 443 | 444 | 445 | -------------------------------------------------------------------------------- /detector_gpu.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import cv2 4 | import torch 5 | from numpy import random 6 | import os 7 | import math 8 | 9 | import torchvision 10 | 11 | from typing import Dict, Tuple, Sequence, List, Union 12 | 13 | 14 | names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 15 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] 16 | 17 | ''' 18 | names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 19 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 20 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 21 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 22 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 23 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 24 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 25 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 26 | 'hair drier', 'toothbrush'] 27 | ''' 28 | class Detect_model(object): 29 | def __init__(self, weight:str='', imgsize:int=640, device:str='', conf:float=0.3, names=[]): 30 | self.weight = weight 31 | self.imgsize = imgsize 32 | self.conf = conf 33 | self.device = select_device(device) 34 | self.half = False 35 | #self.half = self.device.type != 'cpu' 36 | #print(self.half) 37 | # print(weight) 38 | #self.model = torch.load(self.weight, map_location=self.device)['model'].float().fuse().eval() 39 | self.model_jit = torch.jit.load(weight) 40 | if self.half: 41 | self.model_jit.half().to(self.device) # to FP16 42 | 43 | # self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names 44 | self.names = names 45 | # print(self.names) 46 | self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] 47 | 48 | ### warm up 49 | img = torch.zeros((1, 3, self.imgsize, self.imgsize)).to(self.device) # init img 50 | _ = self.model_jit(img.half() if self.half else img) if self.device.type != 'cpu' else None # run once 51 | _ = self.model_jit(img.half() if self.half else img) if self.device.type != 'cpu' else None # run once 52 | 53 | def preprocess(self, img:np.ndarray=None): 54 | # img = letterbox(img, new_shape=self.imgsize)[0] 55 | img = image_pad(img, (self.imgsize, self.imgsize)) 56 | # Convert 57 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 58 | img = np.ascontiguousarray(img) 59 | 60 | img = torch.from_numpy(img).to(self.device) 61 | img = img.half() if self.half else img.float() # uint8 to fp16/32 62 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 63 | if img.ndimension() == 3: 64 | img = img.unsqueeze(0) 65 | return img 66 | 67 | def detect_frame(self, img0:np.ndarray=None, draw_img:bool=False): 68 | ori_shape = img0.shape 69 | res_boxs = [] 70 | res_labels = [] 71 | 72 | img = self.preprocess(img0) 73 | t0 = time.time() 74 | # Inference 75 | pred = self.model_jit(img)[0] 76 | tt = time.time()-t0 77 | # Apply NMS 78 | pred = non_max_suppression(pred, conf_thres=self.conf, iou_thres=0.5) 79 | 80 | 81 | #print(time.time()-t0) 82 | # Process detections 83 | for i, det in enumerate(pred): # detections per image 84 | if len(det): 85 | # Rescale boxes from img_size to im0 size 86 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], ori_shape).round() 87 | 88 | # Write results 89 | for *xyxy, conf, cls in reversed(det): 90 | #print(self.names[int(cls)]) 91 | if self.names[int(cls)] == "person": 92 | res_boxs.append([int(xyxy[0]), 93 | int(xyxy[1]), 94 | int(xyxy[2]), 95 | int(xyxy[3])]) 96 | #print(int(cls)) 97 | res_labels.append(self.names[int(cls)]) 98 | if draw_img: # Add bbox to image 99 | label = self.names[int(cls)]+' '+str(round(float(conf), 2)) 100 | plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)], line_thickness=3) 101 | 102 | label = self.names[int(cls)]+' '+str(round(float(conf), 2)) 103 | plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)], line_thickness=3) 104 | cv2.imwrite('res.jpg', img0) 105 | 106 | 107 | 108 | 109 | #print(time.time()-t0) 110 | 111 | if draw_img: 112 | return res_boxs, res_labels, img0, tt 113 | else: 114 | return res_boxs, res_labels 115 | 116 | def select_device(device='', batch_size=None): 117 | # device = 'cpu' or '0' or '0,1,2,3' 118 | # s = f'Using torch {torch.__version__} ' # string 119 | cpu = device.lower() == 'cpu' 120 | if cpu: 121 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 122 | elif device: # non-cpu device requested 123 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 124 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 125 | 126 | cuda = torch.cuda.is_available() and not cpu 127 | if cuda: 128 | n = torch.cuda.device_count() 129 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 130 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 131 | # space = ' ' * len(s) 132 | for i, d in enumerate(device.split(',') if device else range(n)): 133 | p = torch.cuda.get_device_properties(i) 134 | # s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 135 | else: 136 | # s += 'CPU' 137 | pass 138 | return torch.device('cuda:0' if cuda else 'cpu') 139 | 140 | def image_pad(image, target_size): 141 | # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) 142 | # image = image.astype(np.int8) 143 | if isinstance(target_size, int): 144 | ih, iw = target_size, target_size 145 | else: 146 | ih, iw = target_size 147 | # print(ih, iw) 148 | h, w, _ = image.shape 149 | 150 | scale = min(iw / w, ih / h) 151 | nw, nh = int(scale * w), int(scale * h) 152 | image_resized = cv2.resize(image, (nw, nh)) 153 | 154 | image_paded = np.full(shape=[ih, iw, 3], fill_value=114) 155 | dw, dh = (iw - nw) // 2, (ih - nh) // 2 156 | image_paded[dh:nh + dh, dw:nw + dw, :] = image_resized 157 | 158 | return image_paded.astype(np.uint8) 159 | 160 | def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): 161 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 162 | shape = img.shape[:2] # current shape [height, width] 163 | if isinstance(new_shape, int): 164 | new_shape = (new_shape, new_shape) 165 | 166 | # Scale ratio (new / old) 167 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 168 | if not scaleup: # only scale down, do not scale up (for better test mAP) 169 | r = min(r, 1.0) 170 | 171 | # Compute padding 172 | ratio = r, r # width, height ratios 173 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 174 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 175 | if auto: # minimum rectangle 176 | dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding 177 | elif scaleFill: # stretch 178 | dw, dh = 0.0, 0.0 179 | new_unpad = (new_shape[1], new_shape[0]) 180 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 181 | 182 | dw /= 2 # divide padding into 2 sides 183 | dh /= 2 184 | 185 | if shape[::-1] != new_unpad: # resize 186 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 187 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 188 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 189 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 190 | return img, ratio, (dw, dh) 191 | 192 | def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): 193 | """Performs Non-Maximum Suppression (NMS) on inference results 194 | 195 | Returns: 196 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 197 | """ 198 | 199 | nc = prediction.shape[2] - 5 # number of classes 200 | xc = prediction[..., 4] > conf_thres # candidates 201 | 202 | # Settings 203 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 204 | max_det = 300 # maximum number of detections per image 205 | max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() 206 | time_limit = 10.0 # seconds to quit after 207 | redundant = True # require redundant detections 208 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 209 | merge = False # use merge-NMS 210 | 211 | t = time.time() 212 | output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] 213 | for xi, x in enumerate(prediction): # image index, image inference 214 | # Apply constraints 215 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 216 | x = x[xc[xi]] # confidence 217 | 218 | # Cat apriori labels if autolabelling 219 | if labels and len(labels[xi]): 220 | l = labels[xi] 221 | v = torch.zeros((len(l), nc + 5), device=x.device) 222 | v[:, :4] = l[:, 1:5] # box 223 | v[:, 4] = 1.0 # conf 224 | v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls 225 | x = torch.cat((x, v), 0) 226 | 227 | # If none remain process next image 228 | if not x.shape[0]: 229 | continue 230 | 231 | # Compute conf 232 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 233 | 234 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 235 | box = xywh2xyxy(x[:, :4]) 236 | 237 | # Detections matrix nx6 (xyxy, conf, cls) 238 | if multi_label: 239 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 240 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 241 | else: # best class only 242 | conf, j = x[:, 5:].max(1, keepdim=True) 243 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 244 | 245 | # Filter by class 246 | if classes is not None: 247 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 248 | 249 | # Apply finite constraint 250 | # if not torch.isfinite(x).all(): 251 | # x = x[torch.isfinite(x).all(1)] 252 | 253 | # Check shape 254 | n = x.shape[0] # number of boxes 255 | if not n: # no boxes 256 | continue 257 | elif n > max_nms: # excess boxes 258 | x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence 259 | 260 | # Batched NMS 261 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 262 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 263 | # print(boxes.shape) 264 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 265 | # print(i.shape) 266 | if i.shape[0] > max_det: # limit detections 267 | i = i[:max_det] 268 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 269 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 270 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 271 | weights = iou * scores[None] # box weights 272 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 273 | if redundant: 274 | i = i[iou.sum(1) > 1] # require redundancy 275 | 276 | output[xi] = x[i] 277 | if (time.time() - t) > time_limit: 278 | print(f'WARNING: NMS time limit {time_limit}s exceeded') 279 | break # time limit exceeded 280 | 281 | return output 282 | 283 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 284 | # Rescale coords (xyxy) from img1_shape to img0_shape 285 | if ratio_pad is None: # calculate from img0_shape 286 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 287 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 288 | else: 289 | gain = ratio_pad[0][0] 290 | pad = ratio_pad[1] 291 | 292 | coords[:, [0, 2]] -= pad[0] # x padding 293 | coords[:, [1, 3]] -= pad[1] # y padding 294 | coords[:, :4] /= gain 295 | clip_coords(coords, img0_shape) 296 | return coords 297 | 298 | def xyxy2xywh(x): 299 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 300 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 301 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 302 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 303 | y[:, 2] = x[:, 2] - x[:, 0] # width 304 | y[:, 3] = x[:, 3] - x[:, 1] # height 305 | return y 306 | 307 | def xywh2xyxy(x): 308 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 309 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 310 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 311 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 312 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 313 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 314 | return y 315 | 316 | def clip_coords(boxes, img_shape): 317 | # Clip bounding xyxy bounding boxes to image shape (height, width) 318 | boxes[:, 0].clamp_(0, img_shape[1]) # x1 319 | boxes[:, 1].clamp_(0, img_shape[0]) # y1 320 | boxes[:, 2].clamp_(0, img_shape[1]) # x2 321 | boxes[:, 3].clamp_(0, img_shape[0]) # y2 322 | 323 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): 324 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 325 | box2 = box2.T 326 | 327 | # Get the coordinates of bounding boxes 328 | if x1y1x2y2: # x1, y1, x2, y2 = box1 329 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 330 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 331 | else: # transform from xywh to xyxy 332 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 333 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 334 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 335 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 336 | 337 | # Intersection area 338 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 339 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 340 | 341 | # Union Area 342 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 343 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 344 | union = w1 * h1 + w2 * h2 - inter + eps 345 | 346 | iou = inter / union 347 | if GIoU or DIoU or CIoU: 348 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 349 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 350 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 351 | c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 352 | rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + 353 | (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 354 | if DIoU: 355 | return iou - rho2 / c2 # DIoU 356 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 357 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 358 | with torch.no_grad(): 359 | alpha = v / ((1 + eps) - iou + v) 360 | return iou - (rho2 / c2 + v * alpha) # CIoU 361 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 362 | c_area = cw * ch + eps # convex area 363 | return iou - (c_area - union) / c_area # GIoU 364 | else: 365 | return iou # IoU 366 | 367 | 368 | def box_iou(box1, box2): 369 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 370 | """ 371 | Return intersection-over-union (Jaccard index) of boxes. 372 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 373 | Arguments: 374 | box1 (Tensor[N, 4]) 375 | box2 (Tensor[M, 4]) 376 | Returns: 377 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 378 | IoU values for every element in boxes1 and boxes2 379 | """ 380 | #print(box1.size(), box2.size()) 381 | def box_area(box): 382 | # box = 4xn 383 | return (box[2] - box[0]) * (box[3] - box[1]) 384 | 385 | area1 = box_area(box1.T) 386 | area2 = box_area(box2.T) 387 | 388 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 389 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 390 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 391 | 392 | def wh_iou(wh1, wh2): 393 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 394 | wh1 = wh1[:, None] # [N,1,2] 395 | wh2 = wh2[None] # [1,M,2] 396 | inter = torch.min(wh1, wh2).prod(2) # [N,M] 397 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 398 | 399 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 400 | # Plots one bounding box on image img 401 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 402 | color = color or [random.randint(0, 255) for _ in range(3)] 403 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 404 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 405 | if label: 406 | tf = max(tl - 1, 1) # font thickness 407 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 408 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 409 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 410 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 411 | 412 | if __name__ == '__main__': 413 | import copy 414 | model0 = Detect_model(weight=r'./get_small_script/small_model_all.torchscript.pt', imgsize=640, device='0', conf=0.4, names=names) #small_model_all 415 | 416 | input1 = torch.randn(1, 3, 640, 640).to('cuda')#.half() 417 | model0.model_jit(input1) 418 | model0.model_jit(input1) 419 | 420 | # input_list = [] 421 | # for i in range(300): 422 | # input_list.append(input1) 423 | # 424 | # tsum = 0 425 | # tsum0 = 0 426 | # 427 | # for i in range(300): 428 | # #t00 = time.time() 429 | # #input1 = torch.randn(1, 3, 640, 640).to('cuda')#.half() 430 | # 431 | # #input2 = copy.deepcopy(input1) 432 | # 433 | # t0 = time.time() 434 | # pred = model0.model_jit(input_list[i])[0] 435 | # t1 = time.time()-t0 436 | # #t2 = t0 -t00 437 | # tsum += t1 438 | # #tsum0 += t2 439 | # 440 | # print(tsum/300) 441 | # #print(tsum0/300) 442 | 443 | tsum = 0 444 | image = cv2.imread(r'images/call3.jpg') 445 | image_list = [] 446 | for i in range(50): 447 | image_list.append(image) 448 | 449 | for i in range(50): 450 | res_boxs, res_labels, img0, ttt = model0.detect_frame(image_list[i], draw_img=True) 451 | tsum += ttt 452 | print('tsum:', tsum/50) 453 | 454 | ## cv2.imwrite('res.jpg', img0) 455 | # # cv2.imshow('aaa', image) 456 | # # cv2.waitKey(0) 457 | 458 | 459 | 460 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # signal gpu 3 | import argparse 4 | import logging 5 | import os 6 | import random 7 | import shutil 8 | import time 9 | from pathlib import Path 10 | from warnings import warn 11 | 12 | import math 13 | import numpy as np 14 | import torch.nn.functional as F 15 | import torch.optim as optim 16 | import torch.optim.lr_scheduler as lr_scheduler 17 | import torch.utils.data 18 | import yaml 19 | from torch.cuda import amp 20 | from tqdm import tqdm 21 | 22 | import test # import test.py to get mAP after each epoch 23 | from model import Model 24 | from datasets import create_dataloader 25 | from utils.general import ( 26 | torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights, 27 | plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file, 28 | check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging, init_seeds) 29 | from utils.torch_utils import ModelEMA, select_device, intersect_dicts 30 | from loss import compute_loss 31 | 32 | logger = logging.getLogger(__name__) 33 | 34 | def train(hyp, opt, device): 35 | logger.info(f'Hyperparameters {hyp}') 36 | log_dir = Path(opt.logdir) / 'train' # logging directory 37 | wdir = log_dir / 'weights' # weights directory 38 | os.makedirs(wdir, exist_ok=True) 39 | last = wdir / 'last.pt' 40 | best = wdir / 'best.pt' 41 | epochs, batch_size, total_batch_size, weights, rank = \ 42 | opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank 43 | 44 | # Save run settings 45 | with open(log_dir / 'hyp.yaml', 'w') as f: 46 | yaml.dump(hyp, f, sort_keys=False) 47 | with open(log_dir / 'opt.yaml', 'w') as f: 48 | yaml.dump(vars(opt), f, sort_keys=False) 49 | 50 | # Configure 51 | cuda = device.type != 'cpu' 52 | init_seeds(2 + rank) 53 | with open(opt.data) as f: 54 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict 55 | with torch_distributed_zero_first(rank): 56 | check_dataset(data_dict) # check 57 | train_path = data_dict['train'] 58 | test_path = data_dict['val'] 59 | nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names 60 | assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check 61 | 62 | # Model 63 | pretrained = weights.endswith('.pt') 64 | if pretrained: 65 | ckpt = torch.load(weights, map_location=device) # load checkpoint 66 | if hyp.get('anchors'): 67 | ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor 68 | model = Model(nc=nc).to(device) 69 | exclude = ['anchor'] if hyp.get('anchors') else [] # exclude keys 70 | state_dict = ckpt['model'].float().state_dict() # to FP32 71 | state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect 72 | model.load_state_dict(state_dict, strict=False) # load 73 | logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report 74 | else: 75 | model = Model(nc=nc).to(device) 76 | 77 | # Optimizer 78 | nbs = 64 # nominal batch size 79 | accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing 80 | hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay 81 | 82 | pg0, pg1, pg2 = [], [], [] # optimizer parameter groups 83 | for k, v in model.named_parameters(): 84 | v.requires_grad = True 85 | if '.bias' in k: 86 | pg2.append(v) # biases 87 | elif '.weight' in k and '.bn' not in k: 88 | pg1.append(v) # apply weight decay 89 | else: 90 | pg0.append(v) # all else 91 | 92 | if opt.adam: 93 | optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum 94 | else: 95 | optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) 96 | 97 | optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay 98 | optimizer.add_param_group({'params': pg2}) # add pg2 (biases) 99 | logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) 100 | del pg0, pg1, pg2 101 | 102 | # cosine lr_scheduler 103 | lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine 104 | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 105 | 106 | # Resume 107 | start_epoch, best_fitness = 0, 0.0 108 | if pretrained: 109 | # Optimizer 110 | if ckpt['optimizer'] is not None: 111 | optimizer.load_state_dict(ckpt['optimizer']) 112 | best_fitness = ckpt['best_fitness'] 113 | 114 | # Epochs 115 | start_epoch = ckpt['epoch'] + 1 116 | if opt.resume: 117 | assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) 118 | shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}') # save previous weights 119 | if epochs < start_epoch: 120 | logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % 121 | (weights, ckpt['epoch'], epochs)) 122 | epochs += ckpt['epoch'] # finetune additional epochs 123 | 124 | del ckpt, state_dict 125 | 126 | # Image sizes 127 | gs = int(max(model.stride)) # grid size (max stride) 128 | imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples 129 | 130 | # Exponential moving average 131 | ema = ModelEMA(model) if rank in [-1, 0] else None 132 | 133 | # Trainloader 134 | dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, 135 | hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, 136 | rank=rank, world_size=opt.world_size, workers=opt.workers) 137 | mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class 138 | nb = len(dataloader) # number of batches 139 | assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) 140 | 141 | # Process 0 142 | if rank in [-1, 0]: 143 | ema.updates = start_epoch * nb // accumulate # set EMA updates 144 | testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, 145 | hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, 146 | rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader 147 | 148 | if not opt.resume: 149 | labels = np.concatenate(dataset.labels, 0) 150 | c = torch.tensor(labels[:, 0]) # classes 151 | plot_labels(labels, save_dir=log_dir) 152 | 153 | # Anchors 154 | if not opt.noautoanchor: 155 | check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) 156 | 157 | # Model parameters 158 | hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset 159 | model.nc = nc # attach number of classes to model 160 | model.hyp = hyp # attach hyperparameters to model 161 | model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) 162 | model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights 163 | model.names = names 164 | 165 | # Start training 166 | t0 = time.time() 167 | nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) 168 | # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training 169 | maps = np.zeros(nc) # mAP per class 170 | results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) 171 | scheduler.last_epoch = start_epoch - 1 # do not move 172 | scaler = amp.GradScaler(enabled=cuda) 173 | logger.info('Image sizes %g train, %g test\n' 174 | 'Using %g dataloader workers\nLogging results to %s\n' 175 | 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) 176 | 177 | for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ 178 | 179 | model.train() 180 | 181 | # Update image weights (optional) 182 | if opt.image_weights: 183 | # Generate indices 184 | if rank in [-1, 0]: 185 | cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights 186 | iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights 187 | dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx 188 | 189 | mloss = torch.zeros(4, device=device) # mean losses 190 | if rank != -1: 191 | dataloader.sampler.set_epoch(epoch) 192 | pbar = enumerate(dataloader) 193 | logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) 194 | if rank in [-1, 0]: 195 | pbar = tqdm(pbar, total=nb) # progress bar 196 | optimizer.zero_grad() 197 | for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- 198 | ni = i + nb * epoch # number integrated batches (since train start) 199 | imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 200 | 201 | # Warmup 202 | if ni <= nw: 203 | xi = [0, nw] # x interp 204 | accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) 205 | for j, x in enumerate(optimizer.param_groups): 206 | x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) 207 | if 'momentum' in x: 208 | x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) 209 | 210 | # Multi-scale 211 | if opt.multi_scale: 212 | sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size 213 | sf = sz / max(imgs.shape[2:]) # scale factor 214 | if sf != 1: 215 | ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) 216 | imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) 217 | 218 | # Forward 219 | with amp.autocast(enabled=cuda): 220 | pred = model(imgs) # forward 221 | loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size 222 | 223 | # Backward 224 | scaler.scale(loss).backward() 225 | 226 | # Optimize 227 | if ni % accumulate == 0: 228 | scaler.step(optimizer) # optimizer.step 229 | scaler.update() 230 | optimizer.zero_grad() 231 | if ema: 232 | ema.update(model) 233 | 234 | # Print 235 | if rank in [-1, 0]: 236 | mloss = (mloss * i + loss_items) / (i + 1) # update mean losses 237 | mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) 238 | s = ('%10s' * 2 + '%10.4g' * 6) % ( 239 | '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) 240 | pbar.set_description(s) 241 | 242 | # Plot 243 | if ni < 3: 244 | f = str(log_dir / f'train_batch{ni}.jpg') # filename 245 | plot_images(images=imgs, targets=targets, paths=paths, fname=f) 246 | 247 | # end batch ------------------------------------------------------------------------------------------------ 248 | 249 | # Scheduler 250 | lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard 251 | scheduler.step() 252 | 253 | # DDP process 0 or single-GPU 254 | if rank in [-1, 0]: 255 | # mAP 256 | if ema: 257 | ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) 258 | final_epoch = epoch + 1 == epochs 259 | if not opt.notest or final_epoch: # Calculate mAP 260 | results, maps, times = test.test(opt.data, 261 | batch_size=total_batch_size, 262 | imgsz=imgsz_test, 263 | model=ema.ema, 264 | single_cls=opt.single_cls, 265 | dataloader=testloader, 266 | save_dir=log_dir, 267 | plots=epoch == 0 or final_epoch) # plot first and last 268 | 269 | # Update best mAP 270 | fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] 271 | if fi > best_fitness: 272 | best_fitness = fi 273 | 274 | # Save model 275 | save = (not opt.nosave) or (final_epoch and not opt.evolve) 276 | if save: 277 | 278 | ckpt = {'epoch': epoch, 279 | 'best_fitness': best_fitness, 280 | 'model': ema.ema, 281 | 'optimizer': None if final_epoch else optimizer.state_dict()} 282 | 283 | # Save last, best and delete 284 | torch.save(ckpt, last) 285 | if best_fitness == fi: 286 | torch.save(ckpt, best) 287 | del ckpt 288 | # end epoch ---------------------------------------------------------------------------------------------------- 289 | # end training 290 | return results 291 | 292 | if __name__ == '__main__': 293 | parser = argparse.ArgumentParser() 294 | parser.add_argument('--weights', type=str, default='', help='initial weights path') 295 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 296 | parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path') 297 | parser.add_argument('--epochs', type=int, default=300) 298 | parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') 299 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes') 300 | parser.add_argument('--rect', action='store_true', help='rectangular training') 301 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') 302 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 303 | parser.add_argument('--notest', action='store_true', help='only test final epoch') 304 | parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') 305 | parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') 306 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 307 | parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') 308 | parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') 309 | parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied') 310 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 311 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') 312 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 313 | parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') 314 | parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') 315 | parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') 316 | parser.add_argument('--logdir', type=str, default='runs/', help='logging directory') 317 | parser.add_argument('--workers', type=int, default=1, help='maximum number of dataloader workers') 318 | opt = parser.parse_args() 319 | 320 | opt.total_batch_size = opt.batch_size 321 | opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 322 | opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 323 | 324 | # Resume 恢复训练 325 | if opt.resume: # resume an interrupted run 326 | ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path 327 | log_dir = Path(ckpt).parent.parent # runs/exp0 328 | assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' 329 | with open(log_dir / 'opt.yaml') as f: 330 | opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace 331 | opt.weights, opt.resume = ckpt, True 332 | logger.info('Resuming training from %s' % ckpt) 333 | 334 | else: 335 | # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') 336 | opt.data, opt.hyp = check_file(opt.data), check_file(opt.hyp) # check files 337 | opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) 338 | log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name) # runs/exp1 339 | 340 | device = select_device(opt.device, batch_size=opt.batch_size) 341 | 342 | # Hyperparameters 超参数 343 | with open(opt.hyp) as f: 344 | hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps 345 | if 'box' not in hyp: 346 | warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' % 347 | (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120')) 348 | hyp['box'] = hyp.pop('giou') 349 | 350 | # Train 训练 351 | logger.info(opt) 352 | if not opt.evolve: 353 | if opt.global_rank in [-1, 0]: 354 | logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/') 355 | train(hyp, opt, device) 356 | 357 | # Evolve hyperparameters (optional) 超参数进化(可选) 358 | else: 359 | # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) 360 | meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 361 | 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 362 | 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 363 | 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 364 | 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 365 | 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 366 | 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 367 | 'box': (1, 0.02, 0.2), # box loss gain 368 | 'cls': (1, 0.2, 4.0), # cls loss gain 369 | 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 370 | 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 371 | 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 372 | 'iou_t': (0, 0.1, 0.7), # IoU training threshold 373 | 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 374 | 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 375 | 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 376 | 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 377 | 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 378 | 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 379 | 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 380 | 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 381 | 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 382 | 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 383 | 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 384 | 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 385 | 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 386 | 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 387 | 'mixup': (1, 0.0, 1.0)} # image mixup (probability) 388 | 389 | opt.notest, opt.nosave = True, True # only test/save final epoch 390 | yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here 391 | if opt.bucket: 392 | os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists 393 | 394 | for _ in range(300): # generations to evolve 395 | if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate 396 | # Select parent(s) 397 | parent = 'single' # parent selection method: 'single' or 'weighted' 398 | x = np.loadtxt('evolve.txt', ndmin=2) 399 | n = min(5, len(x)) # number of previous results to consider 400 | x = x[np.argsort(-fitness(x))][:n] # top n mutations 401 | w = fitness(x) - fitness(x).min() # weights 402 | if parent == 'single' or len(x) == 1: 403 | # x = x[random.randint(0, n - 1)] # random selection 404 | x = x[random.choices(range(n), weights=w)[0]] # weighted selection 405 | elif parent == 'weighted': 406 | x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination 407 | 408 | # Mutate 409 | mp, s = 0.8, 0.2 # mutation probability, sigma 410 | npr = np.random 411 | npr.seed(int(time.time())) 412 | g = np.array([x[0] for x in meta.values()]) # gains 0-1 413 | ng = len(meta) 414 | v = np.ones(ng) 415 | while all(v == 1): # mutate until a change occurs (prevent duplicates) 416 | v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) 417 | for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) 418 | hyp[k] = float(x[i + 7] * v[i]) # mutate 419 | 420 | # Constrain to limits 421 | for k, v in meta.items(): 422 | hyp[k] = max(hyp[k], v[1]) # lower limit 423 | hyp[k] = min(hyp[k], v[2]) # upper limit 424 | hyp[k] = round(hyp[k], 5) # significant digits 425 | 426 | # Train mutation 427 | results = train(hyp.copy(), opt, device) 428 | 429 | # Write mutation results 430 | print_mutation(hyp.copy(), results, yaml_file, opt.bucket) 431 | 432 | # Plot results 433 | plot_evolution(yaml_file) 434 | print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' 435 | f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') 436 | -------------------------------------------------------------------------------- /train_prune_sfp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import shutil 6 | import time 7 | from pathlib import Path 8 | from warnings import warn 9 | 10 | import math 11 | import numpy as np 12 | import torch.distributed as dist 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | import torch.optim.lr_scheduler as lr_scheduler 16 | import torch.utils.data 17 | import yaml 18 | from torch.cuda import amp 19 | from torch.nn.parallel import DistributedDataParallel as DDP 20 | from torch.utils.tensorboard import SummaryWriter 21 | from tqdm import tqdm 22 | 23 | import test # import test.py to get mAP after each epoch 24 | from model import Model 25 | from datasets import create_dataloader 26 | from utils.general import ( 27 | torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights, 28 | plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file, 29 | check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging, init_seeds) 30 | from utils.torch_utils import ModelEMA, select_device, intersect_dicts 31 | from loss import compute_loss 32 | 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | def train(hyp, opt, device, tb_writer=None): 38 | logger.info(f'Hyperparameters {hyp}') 39 | log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve' # logging directory 40 | wdir = log_dir / 'weights' # weights directory 41 | os.makedirs(wdir, exist_ok=True) 42 | last = wdir / 'last.pt' 43 | best = wdir / 'best.pt' 44 | results_file = str(log_dir / 'results.txt') 45 | epochs, batch_size, total_batch_size, weights, rank = \ 46 | opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank 47 | 48 | # Save run settings 49 | with open(log_dir / 'hyp.yaml', 'w') as f: 50 | yaml.dump(hyp, f, sort_keys=False) 51 | with open(log_dir / 'opt.yaml', 'w') as f: 52 | yaml.dump(vars(opt), f, sort_keys=False) 53 | 54 | # Configure 55 | cuda = device.type != 'cpu' 56 | init_seeds(2 + rank) 57 | with open(opt.data) as f: 58 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict 59 | with torch_distributed_zero_first(rank): 60 | check_dataset(data_dict) # check 61 | train_path = data_dict['train'] 62 | test_path = data_dict['val'] 63 | nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names']) # number classes, names 64 | assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check 65 | 66 | # Model 67 | pretrained = weights.endswith('.pt') 68 | if pretrained: 69 | # with torch_distributed_zero_first(rank): 70 | # attempt_download(weights) # download if not found locally 71 | ckpt = torch.load(weights, map_location=device) # load checkpoint 72 | if hyp.get('anchors'): 73 | ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor 74 | #model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create 75 | model = Model(nc=nc).to(device) # create 76 | exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys 77 | state_dict = ckpt['model'].float().state_dict() # to FP32 78 | state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect 79 | model.load_state_dict(state_dict, strict=False) # load 80 | logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report 81 | else: 82 | model = Model(nc=nc).to(device) # create 83 | 84 | # Freeze 85 | freeze = ['', ] # parameter names to freeze (full or partial) 86 | if any(freeze): 87 | for k, v in model.named_parameters(): 88 | if any(x in k for x in freeze): 89 | print('freezing %s' % k) 90 | v.requires_grad = False 91 | 92 | # Optimizer 93 | nbs = 64 # nominal batch size 94 | accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing 95 | hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay 96 | 97 | pg0, pg1, pg2 = [], [], [] # optimizer parameter groups 98 | for k, v in model.named_parameters(): 99 | v.requires_grad = True 100 | if '.bias' in k: 101 | pg2.append(v) # biases 102 | elif '.weight' in k and '.bn' not in k: 103 | pg1.append(v) # apply weight decay 104 | else: 105 | pg0.append(v) # all else 106 | 107 | if opt.adam: 108 | optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum 109 | else: 110 | optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) 111 | 112 | optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay 'weight_decay': hyp['weight_decay'] 113 | optimizer.add_param_group({'params': pg2}) # add pg2 (biases) 114 | logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) 115 | del pg0, pg1, pg2 116 | 117 | # Scheduler https://arxiv.org/pdf/1812.01187.pdf 118 | # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR 119 | lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine 120 | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 121 | # plot_lr_scheduler(optimizer, scheduler, epochs) 122 | 123 | # Resume 124 | start_epoch, best_fitness = 0, 0.0 125 | if pretrained: 126 | # Optimizer 127 | if ckpt['optimizer'] is not None: 128 | optimizer.load_state_dict(ckpt['optimizer']) 129 | best_fitness = ckpt['best_fitness'] 130 | 131 | # Results 132 | if ckpt.get('training_results') is not None: 133 | with open(results_file, 'w') as file: 134 | file.write(ckpt['training_results']) # write results.txt 135 | 136 | # Epochs 137 | start_epoch = ckpt['epoch'] + 1 138 | if opt.resume: 139 | assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) 140 | shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}') # save previous weights 141 | if epochs < start_epoch: 142 | logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % 143 | (weights, ckpt['epoch'], epochs)) 144 | epochs += ckpt['epoch'] # finetune additional epochs 145 | 146 | del ckpt, state_dict 147 | 148 | # Image sizes 149 | gs = int(max(model.stride)) # grid size (max stride) 150 | imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples 151 | 152 | # DP mode 153 | if cuda and rank == -1 and torch.cuda.device_count() > 1: 154 | model = torch.nn.DataParallel(model) 155 | 156 | # SyncBatchNorm 157 | if opt.sync_bn and cuda and rank != -1: 158 | model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) 159 | logger.info('Using SyncBatchNorm()') 160 | 161 | # Exponential moving average 162 | #print('@@@@@@@@@@@@@@@@@', 'True' if rank in [-1, 0] else 'False') 163 | #ema = ModelEMA(model) if rank in [-1, 0] else None 164 | #ema = None 165 | 166 | # DDP mode 167 | if cuda and rank != -1: 168 | model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) 169 | 170 | # Trainloader 171 | dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, 172 | hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, 173 | rank=rank, world_size=opt.world_size, workers=opt.workers) 174 | mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class 175 | nb = len(dataloader) # number of batches 176 | assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) 177 | 178 | # Process 0 179 | if rank in [-1, 0]: 180 | #ema.updates = start_epoch * nb // accumulate # set EMA updates 181 | testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, 182 | hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, 183 | rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader 184 | 185 | if not opt.resume: 186 | labels = np.concatenate(dataset.labels, 0) 187 | c = torch.tensor(labels[:, 0]) # classes 188 | # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency 189 | # model._initialize_biases(cf.to(device)) 190 | plot_labels(labels, save_dir=log_dir) 191 | if tb_writer: 192 | # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 193 | tb_writer.add_histogram('classes', c, 0) 194 | 195 | # Anchors 196 | if not opt.noautoanchor: 197 | check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) 198 | 199 | # Model parameters 200 | hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset 201 | model.nc = nc # attach number of classes to model 202 | model.hyp = hyp # attach hyperparameters to model 203 | model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) 204 | model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights 205 | model.names = names 206 | 207 | # Start training 208 | t0 = time.time() 209 | nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) 210 | # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training 211 | maps = np.zeros(nc) # mAP per class 212 | results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) 213 | scheduler.last_epoch = start_epoch - 1 # do not move 214 | scaler = amp.GradScaler(enabled=cuda) 215 | logger.info('Image sizes %g train, %g test\n' 216 | 'Using %g dataloader workers\nLogging results to %s\n' 217 | 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) 218 | # 初始化Mask对象 219 | m = Mask(model) ################################################# 220 | m.init_length() ################################################# 221 | for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ 222 | model.train() 223 | 224 | # Update image weights (optional) 225 | if opt.image_weights: 226 | # Generate indices 227 | if rank in [-1, 0]: 228 | cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights 229 | iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights 230 | dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx 231 | # Broadcast if DDP 232 | if rank != -1: 233 | indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() 234 | dist.broadcast(indices, 0) 235 | if rank != 0: 236 | dataset.indices = indices.cpu().numpy() 237 | 238 | # Update mosaic border 239 | # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) 240 | # dataset.mosaic_border = [b - imgsz, -b] # height, width borders 241 | 242 | mloss = torch.zeros(4, device=device) # mean losses 243 | if rank != -1: 244 | dataloader.sampler.set_epoch(epoch) 245 | pbar = enumerate(dataloader) 246 | logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) 247 | if rank in [-1, 0]: 248 | pbar = tqdm(pbar, total=nb) # progress bar 249 | optimizer.zero_grad() 250 | for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- 251 | ni = i + nb * epoch # number integrated batches (since train start) 252 | imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 253 | 254 | # Warmup 255 | if ni <= nw: 256 | xi = [0, nw] # x interp 257 | # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) 258 | accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) 259 | for j, x in enumerate(optimizer.param_groups): 260 | # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 261 | x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) 262 | if 'momentum' in x: 263 | x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) 264 | 265 | # Multi-scale 266 | if opt.multi_scale: 267 | sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size 268 | sf = sz / max(imgs.shape[2:]) # scale factor 269 | if sf != 1: 270 | ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) 271 | imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) 272 | 273 | # Forward 274 | with amp.autocast(enabled=cuda): 275 | pred = model(imgs) # forward 276 | loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size 277 | if rank != -1: 278 | loss *= opt.world_size # gradient averaged between devices in DDP mode 279 | 280 | # Backward 281 | scaler.scale(loss).backward() 282 | 283 | # Optimize 284 | if ni % accumulate == 0: 285 | scaler.step(optimizer) # optimizer.step 286 | scaler.update() 287 | optimizer.zero_grad() 288 | #if ema: 289 | #ema.update(model) 290 | 291 | # Print 292 | if rank in [-1, 0]: 293 | mloss = (mloss * i + loss_items) / (i + 1) # update mean losses 294 | mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) 295 | s = ('%10s' * 2 + '%10.4g' * 6) % ( 296 | '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) 297 | pbar.set_description(s) 298 | 299 | # Plot 300 | if ni < 3: 301 | f = str(log_dir / f'train_batch{ni}.jpg') # filename 302 | result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) 303 | # if tb_writer and result is not None: 304 | # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) 305 | # tb_writer.add_graph(model, imgs) # add model to tensorboard 306 | 307 | # end batch ------------------------------------------------------------------------------------------------ 308 | 309 | # Scheduler 310 | lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard 311 | scheduler.step() 312 | 313 | if opt.prunebody = 1: 314 | prune_indexnum = 202 #only backbone 315 | elif opt.prunebody = 2: 316 | prune_indexnum = 326 #both backbone and neck 317 | else: 318 | break 319 | # 通过Mask对象将model进行软剪支 320 | m.model = model #################################### 321 | m.if_zero() #################################### 322 | m.init_mask(0.7, 3, prune_indexnum, True) #################################### 323 | m.do_mask() #################################### 324 | m.if_zero() #################################### 325 | model = m.model #################################### 326 | 327 | 328 | # DDP process 0 or single-GPU 329 | if rank in [-1, 0]: 330 | # mAP 331 | #if ema: 332 | #ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) 333 | final_epoch = epoch + 1 == epochs 334 | if not opt.notest or final_epoch: # Calculate mAP 335 | results, maps, times = test.test(opt.data, 336 | batch_size=total_batch_size, 337 | imgsz=imgsz_test, 338 | model=model, #ema.ema 339 | single_cls=opt.single_cls, 340 | dataloader=testloader, 341 | save_dir=log_dir, 342 | plots=epoch == 0 or final_epoch) # plot first and last 343 | 344 | # Write 345 | with open(results_file, 'a') as f: 346 | f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) 347 | if len(opt.name) and opt.bucket: 348 | os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) 349 | 350 | # Tensorboard 351 | if tb_writer: 352 | tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 353 | 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 354 | 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 355 | 'x/lr0', 'x/lr1', 'x/lr2'] # params 356 | for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): 357 | tb_writer.add_scalar(tag, x, epoch) 358 | 359 | # Update best mAP 360 | fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] 361 | if fi > best_fitness: 362 | best_fitness = fi 363 | 364 | # Save model 365 | save = (not opt.nosave) or (final_epoch and not opt.evolve) 366 | if save: 367 | with open(results_file, 'r') as f: # create checkpoint 368 | ckpt = {'epoch': epoch, 369 | 'best_fitness': best_fitness, 370 | 'training_results': f.read(), 371 | 'model': model,#ema.ema 372 | 'optimizer': None if final_epoch else optimizer.state_dict()} 373 | 374 | # Save last, best and delete 375 | torch.save(ckpt, last) 376 | if best_fitness == fi: 377 | torch.save(ckpt, best) 378 | del ckpt 379 | # end epoch ---------------------------------------------------------------------------------------------------- 380 | # end training 381 | 382 | if rank in [-1, 0]: 383 | # Strip optimizers 384 | n = opt.name if opt.name.isnumeric() else '' 385 | fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' 386 | for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): 387 | if os.path.exists(f1): 388 | os.rename(f1, f2) # rename 389 | if str(f2).endswith('.pt'): # is *.pt 390 | strip_optimizer(f2) # strip optimizer 391 | os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload 392 | # Finish 393 | if not opt.evolve: 394 | plot_results(save_dir=log_dir) # save as results.png 395 | logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) 396 | 397 | dist.destroy_process_group() if rank not in [-1, 0] else None 398 | torch.cuda.empty_cache() 399 | return results 400 | 401 | #get_filter_codebook, convert2tensor, init_length, init_rate, init_mask, do_mask, if_zero 402 | class Mask: 403 | def __init__(self, model): 404 | self.model_size = {} 405 | self.model_length = {} 406 | self.compress_rate = {} 407 | self.mat = {} 408 | self.model = model 409 | self.mask_index = [] 410 | 411 | def get_filter_codebook(self, weight_torch, compress_rate, length): 412 | codebook = np.ones(length) 413 | if len(weight_torch.size()) == 4: ######################################################### 414 | filter_pruned_num = int(weight_torch.size()[0] * (1 - compress_rate)) + 1 #为了配合yolov5 small model 415 | weight_vec = weight_torch.view(weight_torch.size()[0], -1) 416 | norm2 = torch.norm(weight_vec, 2, 1) 417 | norm2_np = norm2.cpu().numpy() 418 | filter_index = norm2_np.argsort()[:filter_pruned_num] 419 | kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3] 420 | for x in range(0, len(filter_index)): 421 | codebook[filter_index[x] * kernel_length: (filter_index[x] + 1) * kernel_length] = 0 422 | #print("filter codebook done") 423 | else: 424 | pass 425 | return codebook 426 | 427 | def convert2tensor(self, x): 428 | x = torch.FloatTensor(x) 429 | return x 430 | 431 | def init_length(self): 432 | for index, item in enumerate(self.model.parameters()): 433 | self.model_size[index] = item.size() 434 | 435 | for index1 in self.model_size: 436 | for index2 in range(0, len(self.model_size[index1])): 437 | if index2 == 0: 438 | self.model_length[index1] = self.model_size[index1][0] 439 | else: 440 | self.model_length[index1] *= self.model_size[index1][index2] 441 | 442 | def init_rate(self, layer_rate, layer_begin, layer_end): 443 | 444 | for index, item in enumerate(self.model.parameters()): 445 | self.compress_rate[index] = 1 446 | 447 | self.mask_index = [] 448 | for index, item in enumerate(self.model.parameters()): 449 | if len(item.shape) > 1 and index >= layer_begin and index <= layer_end: #if len(item.shape) > 1 and index >= layer_begin and index <= layer_end 450 | self.mask_index.append(index) 451 | #print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$', len(self.mask_index)) 452 | 453 | for key in self.mask_index: 454 | self.compress_rate[key] = layer_rate 455 | 456 | def init_mask(self, layer_rate, layer_begin, layer_end, use_cuda): 457 | self.init_rate(layer_rate, layer_begin, layer_end) 458 | for index, item in enumerate(self.model.parameters()): 459 | 460 | if (index in self.mask_index): 461 | self.mat[index] = self.get_filter_codebook(item.data, self.compress_rate[index], 462 | self.model_length[index]) 463 | self.mat[index] = self.convert2tensor(self.mat[index]) 464 | if use_cuda: 465 | self.mat[index] = self.mat[index].cuda() 466 | #print("mask Ready") 467 | 468 | def do_mask(self): 469 | for index, item in enumerate(self.model.parameters()): 470 | if (index in self.mask_index): 471 | a = item.data.view(self.model_length[index]) 472 | b = a * self.mat[index] 473 | item.data = b.view(self.model_size[index]) 474 | #print("mask Done") 475 | 476 | def if_zero(self):#layer_begin, layer_end 477 | for index, item in enumerate(self.model.parameters()): 478 | if index in self.mask_index: 479 | a = item.data.view(self.model_length[index]) 480 | b = a.cpu().numpy() 481 | #print("layer: %d, number of nonzero weight is %d, zero is %d" % ( 482 | #index, np.count_nonzero(b), len(b) - np.count_nonzero(b))) 483 | 484 | 485 | 486 | if __name__ == '__main__': 487 | parser = argparse.ArgumentParser() 488 | parser.add_argument('--weights', type=str, default='', help='initial weights path') 489 | parser.add_argument('--cfg', type=str, default='', help='model.yaml path') 490 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 491 | parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path') 492 | parser.add_argument('--epochs', type=int, default=300) 493 | parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') 494 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes') 495 | parser.add_argument('--rect', action='store_true', help='rectangular training') 496 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') 497 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 498 | parser.add_argument('--notest', action='store_true', help='only test final epoch') 499 | parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') 500 | parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') 501 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 502 | parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') 503 | parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') 504 | parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied') 505 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 506 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') 507 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 508 | parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') 509 | parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') 510 | parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') 511 | parser.add_argument('--logdir', type=str, default='runs/', help='logging directory') 512 | parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') 513 | 514 | parser.add_argument('--prunebody', type=int, default=1, help='1 means only prune backbone, 2 means prune both backbone and neck') 515 | opt = parser.parse_args() 516 | 517 | # Set DDP variables 518 | opt.total_batch_size = opt.batch_size 519 | opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 520 | opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 521 | set_logging(opt.global_rank) 522 | if opt.global_rank in [-1, 0]: 523 | check_git_status() 524 | 525 | # Resume 526 | if opt.resume: # resume an interrupted run 527 | ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path 528 | log_dir = Path(ckpt).parent.parent # runs/exp0 529 | assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' 530 | with open(log_dir / 'opt.yaml') as f: 531 | opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace 532 | opt.cfg, opt.weights, opt.resume = '', ckpt, True 533 | logger.info('Resuming training from %s' % ckpt) 534 | 535 | else: 536 | # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') 537 | opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files 538 | opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) 539 | log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name) # runs/exp1 540 | 541 | # DDP mode 542 | device = select_device(opt.device, batch_size=opt.batch_size) 543 | if opt.local_rank != -1: 544 | assert torch.cuda.device_count() > opt.local_rank 545 | torch.cuda.set_device(opt.local_rank) 546 | device = torch.device('cuda', opt.local_rank) 547 | dist.init_process_group(backend='nccl', init_method='env://') # distributed backend 548 | assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' 549 | opt.batch_size = opt.total_batch_size // opt.world_size 550 | 551 | # Hyperparameters 552 | with open(opt.hyp) as f: 553 | hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps 554 | if 'box' not in hyp: 555 | warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' % 556 | (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120')) 557 | hyp['box'] = hyp.pop('giou') 558 | 559 | # Train 560 | logger.info(opt) 561 | if not opt.evolve: 562 | tb_writer = None 563 | if opt.global_rank in [-1, 0]: 564 | logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/') 565 | tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0 566 | 567 | train(hyp, opt, device, tb_writer) 568 | 569 | # Evolve hyperparameters (optional) 570 | else: 571 | # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) 572 | meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 573 | 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 574 | 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 575 | 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 576 | 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 577 | 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 578 | 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 579 | 'box': (1, 0.02, 0.2), # box loss gain 580 | 'cls': (1, 0.2, 4.0), # cls loss gain 581 | 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 582 | 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 583 | 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 584 | 'iou_t': (0, 0.1, 0.7), # IoU training threshold 585 | 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 586 | 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 587 | 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 588 | 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 589 | 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 590 | 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 591 | 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 592 | 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 593 | 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 594 | 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 595 | 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 596 | 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 597 | 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 598 | 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 599 | 'mixup': (1, 0.0, 1.0)} # image mixup (probability) 600 | 601 | assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' 602 | opt.notest, opt.nosave = True, True # only test/save final epoch 603 | # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices 604 | yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here 605 | if opt.bucket: 606 | os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists 607 | 608 | for _ in range(300): # generations to evolve 609 | if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate 610 | # Select parent(s) 611 | parent = 'single' # parent selection method: 'single' or 'weighted' 612 | x = np.loadtxt('evolve.txt', ndmin=2) 613 | n = min(5, len(x)) # number of previous results to consider 614 | x = x[np.argsort(-fitness(x))][:n] # top n mutations 615 | w = fitness(x) - fitness(x).min() # weights 616 | if parent == 'single' or len(x) == 1: 617 | # x = x[random.randint(0, n - 1)] # random selection 618 | x = x[random.choices(range(n), weights=w)[0]] # weighted selection 619 | elif parent == 'weighted': 620 | x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination 621 | 622 | # Mutate 623 | mp, s = 0.8, 0.2 # mutation probability, sigma 624 | npr = np.random 625 | npr.seed(int(time.time())) 626 | g = np.array([x[0] for x in meta.values()]) # gains 0-1 627 | ng = len(meta) 628 | v = np.ones(ng) 629 | while all(v == 1): # mutate until a change occurs (prevent duplicates) 630 | v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) 631 | for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) 632 | hyp[k] = float(x[i + 7] * v[i]) # mutate 633 | 634 | # Constrain to limits 635 | for k, v in meta.items(): 636 | hyp[k] = max(hyp[k], v[1]) # lower limit 637 | hyp[k] = min(hyp[k], v[2]) # upper limit 638 | hyp[k] = round(hyp[k], 5) # significant digits 639 | 640 | # Train mutation 641 | results = train(hyp.copy(), opt, device) 642 | 643 | # Write mutation results 644 | print_mutation(hyp.copy(), results, yaml_file, opt.bucket) 645 | 646 | # Plot results 647 | plot_evolution(yaml_file) 648 | print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' 649 | f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') 650 | --------------------------------------------------------------------------------