├── model_data ├── new_class.txt ├── yolo_anchors.txt ├── readme.md ├── voc_classes.txt └── coco_classes.txt ├── img ├── 1.jpg ├── 1.png ├── 2.jpg ├── 2.png ├── 3.png ├── 4.jpg ├── 4.png ├── 5.jpg └── 6.jpg ├── scripts ├── 1_vitisAI_tf_printNode.sh ├── readme.md ├── 3_vitisAI_tf_compile.sh └── 2_vitisAI_tf_quantize.sh ├── compile_result └── readme.md ├── nets ├── __pycache__ │ ├── ious.cpython-36.pyc │ ├── loss.cpython-36.pyc │ ├── yolo4_tiny.cpython-36.pyc │ └── CSPdarknet53_tiny.cpython-36.pyc ├── ious.py ├── CSPdarknet53_tiny.py ├── loss.py └── yolo4_tiny.py ├── quantize_result └── readme.md ├── utils ├── __pycache__ │ └── utils.cpython-36.pyc └── utils.py ├── yolo_fastest_tensorflow2 ├── weights │ └── readme.md ├── nets │ ├── yolo_fastest_backbone.py │ └── yolo_fastest.py └── train.py ├── edge └── readme.md ├── test.py ├── VOCdevkit ├── readme.md ├── voc_data_migrate.py ├── ImageSets_Convert.py └── DETRAC_xmlParser.py ├── frozon_result └── readme.md ├── predict.py ├── video.py ├── voc_annotation.py ├── get_gt_txt.py ├── kmeans_for_anchors.py ├── README.md ├── input_fn.py ├── get_dr_txt.py ├── core ├── tf_prediction.py ├── yolo3_predictor.py └── evaluation.py ├── yolo.py ├── keras_to_tensorflow.py ├── train.py └── Model_pruning ├── compressor.py └── train_purn.py /model_data/new_class.txt: -------------------------------------------------------------------------------- 1 | car -------------------------------------------------------------------------------- /model_data/yolo_anchors.txt: -------------------------------------------------------------------------------- 1 | 13,18, 19,26, 28,40, 43,39, 53,63, 83,104 -------------------------------------------------------------------------------- /img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/1.jpg -------------------------------------------------------------------------------- /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/1.png -------------------------------------------------------------------------------- /img/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/2.jpg -------------------------------------------------------------------------------- /img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/2.png -------------------------------------------------------------------------------- /img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/3.png -------------------------------------------------------------------------------- /img/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/4.jpg -------------------------------------------------------------------------------- /img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/4.png -------------------------------------------------------------------------------- /img/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/5.jpg -------------------------------------------------------------------------------- /img/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/6.jpg -------------------------------------------------------------------------------- /scripts/1_vitisAI_tf_printNode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # main process 4 | vai_q_tensorflow inspect --input_frozen_graph=model_data/model.pb 5 | -------------------------------------------------------------------------------- /compile_result/readme.md: -------------------------------------------------------------------------------- 1 | The compiled files will be here. 2 | 3 | My files: 4 | 5 | 链接:https://pan.baidu.com/s/1ZaXH9lgg3r4U6YR3NDkf7w 6 | 提取码:1byo -------------------------------------------------------------------------------- /nets/__pycache__/ious.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/ious.cpython-36.pyc -------------------------------------------------------------------------------- /nets/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /quantize_result/readme.md: -------------------------------------------------------------------------------- 1 | The quantize result is here. 2 | 3 | My file: 4 | 5 | 链接:https://pan.baidu.com/s/1CZXitu0Rh7HkTt6PyNLxLg 6 | 提取码:hk81 7 | -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /nets/__pycache__/yolo4_tiny.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/yolo4_tiny.cpython-36.pyc -------------------------------------------------------------------------------- /nets/__pycache__/CSPdarknet53_tiny.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/CSPdarknet53_tiny.cpython-36.pyc -------------------------------------------------------------------------------- /scripts/readme.md: -------------------------------------------------------------------------------- 1 | The dpu.json and dpu.dcf file are here: 2 | 3 | 链接:https://pan.baidu.com/s/1Nl7y9-WkWOp1vyd9SuwYQw 4 | 提取码:01ra 5 | 6 | We can use 7 | 8 | dlet -f dpu.hwh to generate .dcf file. -------------------------------------------------------------------------------- /yolo_fastest_tensorflow2/weights/readme.md: -------------------------------------------------------------------------------- 1 | My weights and model structure of Yolo-Fastest implemented with tensorflow2: 2 | 3 | 链接:https://pan.baidu.com/s/1PRbR1SHPd6r5gFIa_gg_OQ 4 | 提取码:dmvo 5 | 6 | -------------------------------------------------------------------------------- /model_data/readme.md: -------------------------------------------------------------------------------- 1 | The Yolov4-tiny-voc weight is here: 2 | 3 | 链接:https://pan.baidu.com/s/1MAnXMgzkxK8zvTNOmqu12w 4 | 提取码:2095 5 | 6 | My reference: 7 | 8 | https://github.com/bubbliiiing/yolov4-tiny-tf2 -------------------------------------------------------------------------------- /edge/readme.md: -------------------------------------------------------------------------------- 1 | The .bit and .hwh files are here. We can rebuild the official project DPU-PYNQ(https://github.com/Xilinx/DPU-PYNQ/tree/master/boards) 2 | 3 | My files: 4 | 5 | 链接:https://pan.baidu.com/s/1D4TiPUSjwU2tWVFq2EHUcw 6 | 提取码:lcic -------------------------------------------------------------------------------- /model_data/voc_classes.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from nets.yolo4_tiny import yolo_body 2 | from tensorflow.keras.layers import Input 3 | 4 | # 输入的图像为 5 | image_input = Input(shape=(416, 416, 3)) 6 | model = yolo_body(image_input,3,20) 7 | model.summary() 8 | 9 | for i,layer in enumerate(model.layers): 10 | print(i,layer.name) -------------------------------------------------------------------------------- /VOCdevkit/readme.md: -------------------------------------------------------------------------------- 1 | These files are used to convert the dataset format. We use VOC format. 2 | 3 | We can refer to https://blog.csdn.net/weixin_38106878/article/details/88684280?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control -------------------------------------------------------------------------------- /scripts/3_vitisAI_tf_compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compile 4 | 5 | vai_c_tensorflow --arch /workspace/ministNumber/dnndk/dnndk/dpu.json -f quantize_results/deploy_model.pb --output_dir compile_result -n yolo_car 6 | 7 | 8 | echo "#####################################" 9 | echo "COMPILATION COMPLETED" 10 | echo "#####################################" 11 | -------------------------------------------------------------------------------- /frozon_result/readme.md: -------------------------------------------------------------------------------- 1 | We can use keras_to_tensorflow.py to generate our .pb file. We will use this file to quantize the model. 2 | 3 | Requirement : Tensorflow 1.15.2 4 | 5 | My pb file: 6 | 7 | 链接:https://pan.baidu.com/s/1jrBh0l2umt_mENZf9RJA6w 8 | 提取码:sy0j 9 | 10 | 11 | 12 | My weights and model structure: 13 | 14 | 链接:https://pan.baidu.com/s/1RyHs3Fzf0V46y2h3YVZr1g 15 | 提取码:86e4 -------------------------------------------------------------------------------- /scripts/2_vitisAI_tf_quantize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # run quantization 5 | 6 | vai_q_tensorflow quantize \ 7 | --input_frozen_graph ./model_data/model.pb \ 8 | --input_nodes input_1 \ 9 | --input_shapes ?,320,320,3 \ 10 | --output_nodes conv2d_20/BiasAdd,conv2d_23/BiasAdd \ 11 | --method 1 \ 12 | --input_fn input_fn.calib_input \ 13 | --gpu 0 \ 14 | --calib_iter 100 \ 15 | 16 | echo "#####################################" 17 | echo "QUANTIZATION COMPLETED" 18 | echo "#####################################" 19 | 20 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from yolo import YOLO 2 | from PIL import Image 3 | import tensorflow as tf 4 | 5 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 6 | for gpu in gpus: 7 | tf.config.experimental.set_memory_growth(gpu, True) 8 | 9 | yolo = YOLO() 10 | 11 | while True: 12 | img = input('Input image filename:') 13 | try: 14 | image = Image.open(img) 15 | except: 16 | print('Open Error! Try again!') 17 | continue 18 | else: 19 | r_image = yolo.detect_image(image) 20 | r_image.show() 21 | -------------------------------------------------------------------------------- /VOCdevkit/voc_data_migrate.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import random 4 | import shutil 5 | 6 | #xml路径的地址 7 | XmlPath=r'xml_test' 8 | #原图片的地址 9 | pictureBasePath=r"Insight-MVT_Annotation_Train" 10 | #保存图片的地址 11 | saveBasePath=r"picture_test" 12 | 13 | total_xml = os.listdir(XmlPath) 14 | num=len(total_xml) 15 | list=range(num) 16 | if os.path.exists(saveBasePath)==False: #判断文件夹是否存在 17 | os.makedirs(saveBasePath) 18 | 19 | 20 | for xml in total_xml: 21 | xml_temp=xml.split("__") 22 | folder=xml_temp[0] 23 | filename=xml_temp[1].split(".")[0]+".jpg" 24 | # print(folder) 25 | # print(filename) 26 | temp_pictureBasePath=os.path.join(pictureBasePath,folder) 27 | filePath=os.path.join(temp_pictureBasePath,filename) 28 | # print(filePath) 29 | newfile=xml.split(".")[0]+".jpg" 30 | newfile_path=os.path.join(saveBasePath,newfile) 31 | print(newfile_path) 32 | shutil.copyfile(filePath, newfile_path) 33 | print("xml file total number",num) 34 | -------------------------------------------------------------------------------- /model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /VOCdevkit/ImageSets_Convert.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | xmlfilepath=r'./VOC2020/Annotations' 6 | saveBasePath=r"./" 7 | 8 | trainval_percent=0.8 9 | train_percent=0.85 10 | total_xml = os.listdir(xmlfilepath) 11 | num=len(total_xml) 12 | list=range(num) 13 | tv=int(num*trainval_percent) 14 | tr=int(tv*train_percent) 15 | trainval= random.sample(list,tv) 16 | train=random.sample(trainval,tr) 17 | 18 | print("train and val size",tv) 19 | print("traub suze",tr) 20 | ftrainval = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/trainval.txt'), 'w') 21 | ftest = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/test.txt'), 'w') 22 | ftrain = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/train.txt'), 'w') 23 | fval = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/val.txt'), 'w') 24 | # Start time 25 | start = time.time() 26 | for i in list: 27 | name=total_xml[i][:-4]+'\n' 28 | if i in trainval: 29 | ftrainval.write(name) 30 | if i in train: 31 | ftrain.write(name) 32 | else: 33 | fval.write(name) 34 | else: 35 | ftest.write(name) 36 | # End time 37 | end = time.time() 38 | seconds=end-start 39 | print( "Time taken : {0} seconds".format(seconds)) 40 | 41 | ftrainval.close() 42 | ftrain.close() 43 | fval.close() 44 | ftest .close() -------------------------------------------------------------------------------- /video.py: -------------------------------------------------------------------------------- 1 | #-------------------------------------# 2 | # 调用摄像头检测 3 | #-------------------------------------# 4 | from yolo import YOLO 5 | from PIL import Image 6 | import numpy as np 7 | import cv2 8 | import time 9 | 10 | import tensorflow as tf 11 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 12 | for gpu in gpus: 13 | tf.config.experimental.set_memory_growth(gpu, True) 14 | 15 | yolo = YOLO() 16 | # 调用摄像头 17 | capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4") 18 | 19 | fps = 0.0 20 | t1 = time.time() 21 | while(True): 22 | t1 = time.time() 23 | # 读取某一帧 24 | ref,frame=capture.read() 25 | # 格式转变,BGRtoRGB 26 | frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) 27 | # 转变成Image 28 | frame = Image.fromarray(np.uint8(frame)) 29 | 30 | # 进行检测 31 | frame = np.array(yolo.detect_image(frame)) 32 | 33 | # RGBtoBGR满足opencv显示格式 34 | frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) 35 | 36 | fps = ( fps + (1./(time.time()-t1)) ) / 2 37 | print("fps= %.2f"%(fps)) 38 | frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 39 | 40 | t1 = time.time() 41 | cv2.imshow("video",frame) 42 | c= cv2.waitKey(1) & 0xff 43 | if c==27: 44 | capture.release() 45 | break 46 | 47 | yolo.close_session() -------------------------------------------------------------------------------- /voc_annotation.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from os import getcwd 3 | 4 | sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')] 5 | 6 | classes = ["car"] 7 | 8 | def convert_annotation(year, image_id, list_file): 9 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 10 | tree=ET.parse(in_file) 11 | root = tree.getroot() 12 | 13 | for obj in root.iter('object'): 14 | difficult = 0 15 | if obj.find('difficult')!=None: 16 | difficult = obj.find('difficult').text 17 | 18 | cls = obj.find('name').text 19 | if cls not in classes or int(difficult)==1: 20 | continue 21 | cls_id = classes.index(cls) 22 | xmlbox = obj.find('bndbox') 23 | b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) 24 | list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) 25 | 26 | wd = getcwd() 27 | 28 | for year, image_set in sets: 29 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 30 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 31 | for image_id in image_ids: 32 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id)) 33 | convert_annotation(year, image_id, list_file) 34 | list_file.write('\n') 35 | list_file.close() 36 | -------------------------------------------------------------------------------- /get_gt_txt.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------# 2 | # 获取测试集的ground-truth 3 | # 具体视频教程可查看 4 | # https://www.bilibili.com/video/BV1zE411u7Vw 5 | #----------------------------------------------------# 6 | import sys 7 | import os 8 | import glob 9 | import xml.etree.ElementTree as ET 10 | 11 | image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split() 12 | 13 | if not os.path.exists("./input"): 14 | os.makedirs("./input") 15 | if not os.path.exists("./input/ground-truth"): 16 | os.makedirs("./input/ground-truth") 17 | 18 | for image_id in image_ids: 19 | with open("./input/ground-truth/"+image_id+".txt", "w") as new_f: 20 | root = ET.parse("VOCdevkit/VOC2007/Annotations/"+image_id+".xml").getroot() 21 | for obj in root.findall('object'): 22 | difficult_flag = False 23 | if obj.find('difficult')!=None: 24 | difficult = obj.find('difficult').text 25 | if int(difficult)==1: 26 | difficult_flag = True 27 | obj_name = obj.find('name').text 28 | bndbox = obj.find('bndbox') 29 | left = bndbox.find('xmin').text 30 | top = bndbox.find('ymin').text 31 | right = bndbox.find('xmax').text 32 | bottom = bndbox.find('ymax').text 33 | if difficult_flag: 34 | new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) 35 | else: 36 | new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) 37 | 38 | print("Conversion completed!") 39 | -------------------------------------------------------------------------------- /nets/ious.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import backend as K 2 | import tensorflow as tf 3 | import math 4 | def box_ciou(b1, b2): 5 | """ 6 | 输入为: 7 | ---------- 8 | b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 9 | b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 10 | 返回为: 11 | ------- 12 | ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) 13 | """ 14 | # 求出预测框左上角右下角 15 | b1_xy = b1[..., :2] 16 | b1_wh = b1[..., 2:4] 17 | b1_wh_half = b1_wh/2. 18 | b1_mins = b1_xy - b1_wh_half 19 | b1_maxes = b1_xy + b1_wh_half 20 | # 求出真实框左上角右下角 21 | b2_xy = b2[..., :2] 22 | b2_wh = b2[..., 2:4] 23 | b2_wh_half = b2_wh/2. 24 | b2_mins = b2_xy - b2_wh_half 25 | b2_maxes = b2_xy + b2_wh_half 26 | 27 | # 求真实框和预测框所有的iou 28 | intersect_mins = K.maximum(b1_mins, b2_mins) 29 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 30 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 31 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 32 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 33 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 34 | union_area = b1_area + b2_area - intersect_area 35 | iou = intersect_area / K.maximum(union_area,K.epsilon()) 36 | 37 | # 计算中心的差距 38 | center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1) 39 | # 找到包裹两个框的最小框的左上角和右下角 40 | enclose_mins = K.minimum(b1_mins, b2_mins) 41 | enclose_maxes = K.maximum(b1_maxes, b2_maxes) 42 | enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0) 43 | # 计算对角线距离 44 | enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1) 45 | ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal ,K.epsilon()) 46 | 47 | v = 4*K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1],K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi) 48 | alpha = v / K.maximum((1.0 - iou + v), K.epsilon()) 49 | ciou = ciou - alpha * v 50 | 51 | ciou = K.expand_dims(ciou, -1) 52 | return ciou -------------------------------------------------------------------------------- /kmeans_for_anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xml.etree.ElementTree as ET 3 | import glob 4 | import random 5 | 6 | def cas_iou(box,cluster): 7 | x = np.minimum(cluster[:,0],box[0]) 8 | y = np.minimum(cluster[:,1],box[1]) 9 | 10 | intersection = x * y 11 | area1 = box[0] * box[1] 12 | 13 | area2 = cluster[:,0] * cluster[:,1] 14 | iou = intersection / (area1 + area2 -intersection) 15 | 16 | return iou 17 | 18 | def avg_iou(box,cluster): 19 | return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])]) 20 | 21 | 22 | def kmeans(box,k): 23 | # 取出一共有多少框 24 | row = box.shape[0] 25 | 26 | # 每个框各个点的位置 27 | distance = np.empty((row,k)) 28 | 29 | # 最后的聚类位置 30 | last_clu = np.zeros((row,)) 31 | 32 | np.random.seed() 33 | 34 | # 随机选5个当聚类中心 35 | cluster = box[np.random.choice(row,k,replace = False)] 36 | # cluster = random.sample(row, k) 37 | while True: 38 | # 计算每一行距离五个点的iou情况。 39 | for i in range(row): 40 | distance[i] = 1 - cas_iou(box[i],cluster) 41 | 42 | # 取出最小点 43 | near = np.argmin(distance,axis=1) 44 | 45 | if (last_clu == near).all(): 46 | break 47 | 48 | # 求每一个类的中位点 49 | for j in range(k): 50 | cluster[j] = np.median( 51 | box[near == j],axis=0) 52 | 53 | last_clu = near 54 | 55 | return cluster 56 | 57 | def load_data(path): 58 | data = [] 59 | # 对于每一个xml都寻找box 60 | for xml_file in glob.glob('{}/*xml'.format(path)): 61 | tree = ET.parse(xml_file) 62 | height = int(tree.findtext('./size/height')) 63 | width = int(tree.findtext('./size/width')) 64 | # 对于每一个目标都获得它的宽高 65 | for obj in tree.iter('object'): 66 | xmin = int(float(obj.findtext('bndbox/xmin'))) / width 67 | ymin = int(float(obj.findtext('bndbox/ymin'))) / height 68 | xmax = int(float(obj.findtext('bndbox/xmax'))) / width 69 | ymax = int(float(obj.findtext('bndbox/ymax'))) / height 70 | 71 | xmin = np.float64(xmin) 72 | ymin = np.float64(ymin) 73 | xmax = np.float64(xmax) 74 | ymax = np.float64(ymax) 75 | # 得到宽高 76 | data.append([xmax-xmin,ymax-ymin]) 77 | return np.array(data) 78 | 79 | 80 | if __name__ == '__main__': 81 | # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml 82 | # 会生成yolo_anchors.txt 83 | SIZE = 416 84 | anchors_num = 6 85 | # 载入数据集,可以使用VOC的xml 86 | path = r'./VOCdevkit/VOC2007/Annotations' 87 | 88 | # 载入所有的xml 89 | # 存储格式为转化为比例后的width,height 90 | data = load_data(path) 91 | 92 | # 使用k聚类算法 93 | out = kmeans(data,anchors_num) 94 | out = out[np.argsort(out[:,0])] 95 | print('acc:{:.2f}%'.format(avg_iou(data,out) * 100)) 96 | print(out*SIZE) 97 | data = out*SIZE 98 | f = open("yolo_anchors.txt", 'w') 99 | row = np.shape(data)[0] 100 | for i in range(row): 101 | if i == 0: 102 | x_y = "%d,%d" % (data[i][0], data[i][1]) 103 | else: 104 | x_y = ", %d,%d" % (data[i][0], data[i][1]) 105 | f.write(x_y) 106 | f.close() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ultra96 Yolov4-tiny and Yolo-Fastest 2 | 1. We convert dataset to VOC format. I use UA-DETRAC dataset, and we can use ./VOCdevkit/ files to convert dataset. 3 | 4 | 2. In the official yolov4-tiny, there is a slice operation to realize the CSPnet, but the quantitative tools don't support the operation, so I use a 1*1 convolution to replace it. 5 | 6 | 3. Then we can use train.py to train the model, and save the model structure and weights as model.json and model.h5. I use TensorFlow-gpu 2.2.0. 7 | 8 | 4. Then we can generate pb file that is suitable for deployment tools. We can see ./frozon_result/readme.md for details. 9 | 10 | 5. Then we use Vitis-AI to quantify our model. We can use ./scripts/1_vitisAI_tf_printNode.sh to find the input and output, and use ./scripts/2_vitisAI_tf_quantize.sh to quantify our model. 11 | 12 | 6. We can compile our model. We can use ./scripts/3_vitisAI_tf_compile.sh to compile our model. 13 | 14 | 7. We should use vivado and Vitis to build the hardware platform. (./edge/readme.md) 15 | 16 | 8. The last, we can run our model on Ultra96-v2 board. There is an example that using yolo model to detate vehicles (./edge/dpu_yolo_v4_tiny.ipynb). There are the results, the fps is 25 with 320*320 images. 17 | 18 | ![1](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/1.png) 19 | 20 | ![2](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/2.png) 21 | 22 | 9. In order to achieve faster detection speed, I try to use Yolo-Fastest ([Yolo-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest)) and implement it with tensorflow, then deploy it to Ultra96-v2 board. There are the results, it can achieve 30fps+. 23 | 24 | ![3](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/3.png) 25 | 26 | ![4](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/4.png) 27 | 28 | 10. Now we support model pruning. We use [keras-surgeon](https://github.com/BenWhetton/keras-surgeon) 0.2.0 and [nni](https://github.com/microsoft/nni) 1.5 to prune the model, you can see in ./Model_pruning. I modified the source code of nni (compressor.py) and fixed some bugs, then we can choose the layer that we want to prune, and I gave a demo that use FPGM to prune the model. 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | References: 37 | 38 | [Yolov4-tiny-tf2](https://github.com/bubbliiiing/yolov4-tiny-tf2) 39 | 40 | [Yolo-v3-Xilinx](https://github.com/Xilinx/Vitis-AI-Tutorials/tree/ML-at-Edge-yolov3) 41 | 42 | [Yolo-v4-tutorial-Xilinx](https://github.com/Xilinx/Vitis-Tutorials/tree/33d6cf9686398ef1179778dc0da163291c68b465/Machine_Learning/Design_Tutorials/07-yolov4-tutorial) 43 | 44 | [Yolo-v3-dnndk](https://github.com/Xilinx/Vitis-AI/blob/v1.1/mpsoc/vitis_ai_dnndk_samples/tf_yolov3_voc_py/tf_yolov3_voc.py) 45 | 46 | [UA-DETRAC to VOC](https://blog.csdn.net/weixin_38106878/article/details/88684280?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control) 47 | 48 | [Vitis-AI 1.1](https://www.xilinx.com/html_docs/vitis_ai/1_1/zkj1576857115470.html) 49 | 50 | [Yolo-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest) 51 | 52 | [keras-surgeon](https://github.com/BenWhetton/keras-surgeon) 53 | 54 | [nni](https://github.com/microsoft/nni) 55 | 56 | -------------------------------------------------------------------------------- /nets/CSPdarknet53_tiny.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from tensorflow.keras import backend as K 3 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Lambda, Layer, LeakyReLU, BatchNormalization 4 | from tensorflow.keras.regularizers import l2 5 | from utils.utils import compose 6 | import tensorflow as tf 7 | 8 | def route_group(input_layer, groups, group_id): 9 | # 对通道数进行均等分割,我们取第二部分 10 | #convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1) 11 | #return convs[group_id] 12 | in_channels = input_layer.get_shape().as_list()[3] 13 | convs = input_layer[:, :, :, in_channels//2:] 14 | return convs 15 | 16 | #--------------------------------------------------# 17 | # 单次卷积 18 | #--------------------------------------------------# 19 | @wraps(Conv2D) 20 | def DarknetConv2D(*args, **kwargs): 21 | # 多了一个正则化的项 22 | # 正则化系数5e-4 23 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 24 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 25 | darknet_conv_kwargs.update(kwargs) 26 | return Conv2D(*args, **darknet_conv_kwargs) 27 | 28 | #---------------------------------------------------# 29 | # 卷积块 30 | # DarknetConv2D + BatchNormalization + LeakyReLU 31 | #---------------------------------------------------# 32 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 33 | no_bias_kwargs = {'use_bias': False} 34 | no_bias_kwargs.update(kwargs) 35 | return compose( 36 | DarknetConv2D(*args, **no_bias_kwargs), 37 | BatchNormalization(), 38 | LeakyReLU(alpha=0.1)) 39 | 40 | #---------------------------------------------------# 41 | # CSPdarknet的结构块 42 | # 存在一个大残差边 43 | # 这个大残差边绕过了很多的残差结构 44 | #---------------------------------------------------# 45 | def resblock_body(x, num_filters): 46 | # 特征整合 47 | x = DarknetConv2D_BN_Leaky(num_filters, (3,3))(x) 48 | # 残差边route 49 | route = x 50 | # 通道分割 51 | #x = Lambda(route_group,arguments={'groups':2, 'group_id':1})(x) 52 | x = DarknetConv2D_BN_Leaky(int(num_filters/2), (1,1))(x) 53 | x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x) 54 | 55 | # 小残差边route1 56 | route_1 = x 57 | x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x) 58 | # 堆叠 59 | x = Concatenate()([x, route_1]) 60 | 61 | x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x) 62 | # 第三个resblockbody会引出来一个有效特征层分支 63 | feat = x 64 | # 连接 65 | x = Concatenate()([route, x]) 66 | x = MaxPooling2D(pool_size=[2,2],)(x) 67 | 68 | # 最后对通道数进行整合 69 | return x, feat 70 | 71 | #---------------------------------------------------# 72 | # darknet53 的主体部分 73 | #---------------------------------------------------# 74 | def darknet_body(x): 75 | # 进行长和宽的压缩 76 | x = ZeroPadding2D(((1,0),(1,0)))(x) 77 | # 416,416,3 -> 208,208,32 78 | x = DarknetConv2D_BN_Leaky(32, (3,3), strides=(2,2))(x) 79 | 80 | # 进行长和宽的压缩 81 | x = ZeroPadding2D(((1,0),(1,0)))(x) 82 | # 208,208,32 -> 104,104,64 83 | x = DarknetConv2D_BN_Leaky(64, (3,3), strides=(2,2))(x) 84 | # 104,104,64 -> 52,52,128 85 | x, _ = resblock_body(x,num_filters = 64) 86 | # 52,52,128 -> 26,26,256 87 | x, _ = resblock_body(x,num_filters = 128) 88 | # 26,26,256 -> 13,13,512 89 | # feat1的shape = 26,26,256 90 | x, feat1 = resblock_body(x,num_filters = 256) 91 | 92 | x = DarknetConv2D_BN_Leaky(512, (3,3))(x) 93 | 94 | feat2 = x 95 | return feat1, feat2 96 | 97 | -------------------------------------------------------------------------------- /yolo_fastest_tensorflow2/nets/yolo_fastest_backbone.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from tensorflow.keras import backend as K 3 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Lambda, Layer, LeakyReLU, BatchNormalization, SeparableConv2D 4 | from tensorflow.keras.regularizers import l2 5 | from utils.utils import compose 6 | import tensorflow as tf 7 | 8 | #--------------------------------------------------# 9 | # 单次卷积 10 | #--------------------------------------------------# 11 | @wraps(Conv2D) 12 | def DarknetConv2D(*args, **kwargs): 13 | # 多了一个正则化的项 14 | # 正则化系数5e-4 15 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 16 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 17 | darknet_conv_kwargs.update(kwargs) 18 | return Conv2D(*args, **darknet_conv_kwargs) 19 | 20 | def DepthwiseConv2D(*args, **kwargs): 21 | # 多了一个正则化的项 22 | # 正则化系数5e-4 23 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 24 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 25 | darknet_conv_kwargs.update(kwargs) 26 | return SeparableConv2D(*args, **darknet_conv_kwargs) 27 | 28 | #---------------------------------------------------# 29 | # 卷积块 30 | # DarknetConv2D + BatchNormalization + LeakyReLU 31 | #---------------------------------------------------# 32 | 33 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 34 | no_bias_kwargs = {'use_bias': False} 35 | no_bias_kwargs.update(kwargs) 36 | return compose( 37 | DarknetConv2D(*args, **no_bias_kwargs), 38 | BatchNormalization(), 39 | LeakyReLU(alpha=0.1)) 40 | 41 | def DepthwiseConv2D_BN_Leaky(*args, **kwargs): 42 | no_bias_kwargs = {'use_bias': False} 43 | no_bias_kwargs.update(kwargs) 44 | return compose( 45 | DepthwiseConv2D(*args, **no_bias_kwargs), 46 | BatchNormalization(), 47 | LeakyReLU(alpha=0.1)) 48 | #---------------------------------------------------# 49 | # CSPdarknet的结构块 50 | # 存在一个大残差边 51 | # 这个大残差边绕过了很多的残差结构 52 | #---------------------------------------------------# 53 | def resblock_body(x, num_filters, num_filters_1): 54 | route = x 55 | x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x) 56 | #x = ZeroPadding2D(((1, 0), (1, 0)))(x) 57 | x = DepthwiseConv2D_BN_Leaky(num_filters_1, (3, 3))(x) 58 | #x = DarknetConv2D_BN_Leaky(num_filters_1, (1, 1))(x) 59 | #x = route + x 60 | x = Concatenate()([route, x]) 61 | return x 62 | 63 | #---------------------------------------------------# 64 | # darknet53 的主体部分 65 | #---------------------------------------------------# 66 | def darknet_body(x): 67 | # 进行长和宽的压缩,下一步步长为2 68 | x = ZeroPadding2D(((1, 0),(1, 0)))(x) 69 | # 416,416,3 -> 208,208,32 70 | x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x) 71 | #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x) 72 | #x = ZeroPadding2D(((1, 0), (1, 0)))(x) 73 | x = DepthwiseConv2D_BN_Leaky(4, (3, 3), strides=(1, 1))(x) 74 | #x = DarknetConv2D_BN_Leaky(4, (1, 1), strides=(1, 1))(x) 75 | x = resblock_body(x, 8, 4) 76 | x = DarknetConv2D_BN_Leaky(24, (1, 1), strides=(1, 1))(x) 77 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 78 | x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x) 79 | #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x) 80 | x = resblock_body(x, 32, 8) 81 | x = resblock_body(x, 32, 8) 82 | x = DarknetConv2D_BN_Leaky(32, (1, 1), strides=(1, 1))(x) 83 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 84 | x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x) 85 | #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x) 86 | x = resblock_body(x, 48, 8) 87 | x = resblock_body(x, 48, 8) 88 | x = DarknetConv2D_BN_Leaky(48, (1, 1), strides=(1, 1))(x) 89 | x = DepthwiseConv2D_BN_Leaky(16, (3, 3), strides=(1, 1))(x) 90 | #x = DarknetConv2D_BN_Leaky(16, (1, 1), strides=(1, 1))(x) 91 | x = resblock_body(x, 96, 16) 92 | x = resblock_body(x, 96, 16) 93 | x = resblock_body(x, 96, 16) 94 | x = resblock_body(x, 96, 16) 95 | x = DarknetConv2D_BN_Leaky(96, (1, 1), strides=(1, 1))(x) 96 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 97 | x = DepthwiseConv2D_BN_Leaky(24, (3, 3), strides=(2, 2))(x) 98 | #x = DarknetConv2D_BN_Leaky(24, (1, 1), strides=(1, 1))(x) 99 | x = resblock_body(x, 136, 24) 100 | x = resblock_body(x, 136, 24) 101 | x = resblock_body(x, 136, 24) 102 | x = resblock_body(x, 136, 24) 103 | x = DarknetConv2D_BN_Leaky(136, (1, 1), strides=(1, 1))(x) 104 | feat1 = x 105 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 106 | x = DepthwiseConv2D_BN_Leaky(48, (3, 3), strides=(2, 2))(x) 107 | #x = DarknetConv2D_BN_Leaky(48, (1, 1), strides=(1, 1))(x) 108 | x = resblock_body(x, 224, 48) 109 | x = resblock_body(x, 224, 48) 110 | x = resblock_body(x, 224, 48) 111 | x = resblock_body(x, 224, 48) 112 | x = resblock_body(x, 224, 48) 113 | x = DarknetConv2D_BN_Leaky(96, (1, 1), strides=(1, 1))(x) 114 | feat2 = x 115 | return feat1, feat2 116 | -------------------------------------------------------------------------------- /input_fn.py: -------------------------------------------------------------------------------- 1 | #MIT License 2 | 3 | #Copyright (c) 2018 qqwweee 4 | 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | 12 | #The above copyright notice and this permission notice shall be included in all 13 | #copies or substantial portions of the Software. 14 | 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | #SOFTWARE. 22 | 23 | # Modification made by Xilinx, Inc. 24 | # Copyright (c) 2019, Xilinx, Inc. 25 | # Licensed under the Apache License, Version 2.0 (the "License"); 26 | # you may not use this file except in compliance with the License. 27 | # You may obtain a copy of the License at 28 | # http://www.apache.org/licenses/LICENSE-2.0 29 | # Unless required by applicable law or agreed to in writing, software 30 | # distributed under the License is distributed on an "AS IS" BASIS, 31 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 | # See the License for the specific language governing permissions and 33 | # limitations under the License. 34 | 35 | # Origin code:https://github.com/lji72/inference/blob/master/others/cloud/single_stage_detector/tensorflow/train/eval_ssd_large.py 36 | 37 | # All rights reserved. 38 | # 39 | # Redistribution and use in source and binary forms, with or without modification, 40 | # are permitted provided that the following conditions are met: 41 | # 42 | # 1. Redistributions of source code must retain the above copyright notice, 43 | # this list of conditions and the following disclaimer. 44 | # 45 | # 2. Redistributions in binary form must reproduce the above copyright notice, 46 | # this list of conditions and the following disclaimer in the documentation 47 | # and/or other materials provided with the distribution. 48 | # 49 | # 3. Neither the name of the copyright holder nor the names of its contributors 50 | # may be used to endorse or promote products derived from this software 51 | # without specific prior written permission. 52 | # 53 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 54 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 55 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 56 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 57 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 58 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 60 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 61 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62 | 63 | 64 | 65 | from PIL import Image 66 | import numpy as np 67 | 68 | 69 | def letterbox_image(image, size): 70 | '''resize image with unchanged aspect ratio using padding''' 71 | iw, ih = image.size 72 | w, h = size 73 | scale = min(w/iw, h/ih) 74 | nw = int(iw*scale) 75 | nh = int(ih*scale) 76 | 77 | image = image.resize((nw,nh), Image.BICUBIC) 78 | new_image = Image.new('RGB', size, (128,128,128)) 79 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 80 | return new_image 81 | 82 | #image = Image.open(img_path) 83 | 84 | def preprocessing_fn(image, model_image_size=(416,416)): 85 | if model_image_size != (None, None): 86 | assert model_image_size[0]%32 == 0, 'Multiples of 32 required' 87 | assert model_image_size[1]%32 == 0, 'Multiples of 32 required' 88 | boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) 89 | else: 90 | new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) 91 | boxed_image = letterbox_image(image, new_image_size) 92 | image_data = np.array(boxed_image, dtype='float32') 93 | image_data /= 255. 94 | return image_data 95 | 96 | calib_image_dir = "./images/" 97 | calib_image_list = "./list.txt" 98 | calib_batch_size = 1 99 | def calib_input(iter): 100 | images = [] 101 | line = open(calib_image_list).readlines() 102 | for index in range(0, calib_batch_size): 103 | curline = line[iter * calib_batch_size + index] 104 | image_name = curline.strip() 105 | image = Image.open(calib_image_dir + image_name) 106 | image = preprocessing_fn(image) 107 | images.append(image) 108 | return {"input_1": images} 109 | -------------------------------------------------------------------------------- /get_dr_txt.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------# 2 | # 获取测试集的detection-result和images-optional 3 | # 具体视频教程可查看 4 | # https://www.bilibili.com/video/BV1zE411u7Vw 5 | #----------------------------------------------------# 6 | from yolo import YOLO 7 | import os 8 | import numpy as np 9 | import copy 10 | import colorsys 11 | import tensorflow as tf 12 | from timeit import default_timer as timer 13 | from tensorflow.keras import backend as K 14 | from tensorflow.keras.models import load_model 15 | from tensorflow.keras.layers import Input, Lambda 16 | from tensorflow.keras.models import Model 17 | from PIL import Image, ImageFont, ImageDraw 18 | from nets.yolo4_tiny import yolo_body,yolo_eval 19 | from utils.utils import letterbox_image 20 | from tqdm import tqdm 21 | 22 | import tensorflow as tf 23 | 24 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 25 | for gpu in gpus: 26 | tf.config.experimental.set_memory_growth(gpu, True) 27 | 28 | class mAP_YOLO(YOLO): 29 | #---------------------------------------------------# 30 | # 获得所有的分类 31 | #---------------------------------------------------# 32 | def generate(self): 33 | self.score = 0.01 34 | self.iou = 0.5 35 | model_path = os.path.expanduser(self.model_path) 36 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 37 | 38 | # 计算anchor数量 39 | num_anchors = len(self.anchors) 40 | num_classes = len(self.class_names) 41 | 42 | # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 43 | # 否则先构建模型再载入 44 | self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) 45 | self.yolo_model.load_weights(self.model_path,by_name=True) 46 | print('{} model, anchors, and classes loaded.'.format(model_path)) 47 | 48 | 49 | # 画框设置不同的颜色 50 | hsv_tuples = [(x / len(self.class_names), 1., 1.) 51 | for x in range(len(self.class_names))] 52 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 53 | self.colors = list( 54 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 55 | self.colors)) 56 | 57 | # 打乱颜色 58 | np.random.seed(10101) 59 | np.random.shuffle(self.colors) 60 | np.random.seed(None) 61 | 62 | if self.eager: 63 | self.input_image_shape = Input([2,],batch_size=1) 64 | inputs = [*self.yolo_model.output, self.input_image_shape] 65 | outputs = Lambda(yolo_eval, output_shape=(1,), name='yolo_eval', 66 | arguments={'anchors': self.anchors, 'num_classes': len(self.class_names), 'image_shape': self.model_image_size, 67 | 'score_threshold': self.score, 'eager': True, 'max_boxes': self.max_boxes})(inputs) 68 | self.yolo_model = Model([self.yolo_model.input, self.input_image_shape], outputs) 69 | else: 70 | self.input_image_shape = K.placeholder(shape=(2, )) 71 | 72 | self.boxes, self.scores, self.classes = yolo_eval(self.yolo_model.output, self.anchors, 73 | num_classes, self.input_image_shape, max_boxes=self.max_boxes, 74 | score_threshold=self.score, iou_threshold=self.iou) 75 | 76 | 77 | #---------------------------------------------------# 78 | # 检测图片 79 | #---------------------------------------------------# 80 | def detect_image(self, image_id, image): 81 | f = open("./input/detection-results/"+image_id+".txt","w") 82 | 83 | # 调整图片使其符合输入要求 84 | new_image_size = (self.model_image_size[1],self.model_image_size[0]) 85 | boxed_image = letterbox_image(image, new_image_size) 86 | image_data = np.array(boxed_image, dtype='float32') 87 | image_data /= 255. 88 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 89 | 90 | if self.eager: 91 | # 预测结果 92 | input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0) 93 | out_boxes, out_scores, out_classes = self.yolo_model.predict([image_data, input_image_shape]) 94 | else: 95 | # 预测结果 96 | out_boxes, out_scores, out_classes = self.sess.run( 97 | [self.boxes, self.scores, self.classes], 98 | feed_dict={ 99 | self.yolo_model.input: image_data, 100 | self.input_image_shape: [image.size[1], image.size[0]], 101 | K.learning_phase(): 0 102 | }) 103 | 104 | for i, c in enumerate(out_classes): 105 | predicted_class = self.class_names[int(c)] 106 | score = str(out_scores[i]) 107 | 108 | top, left, bottom, right = out_boxes[i] 109 | f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) 110 | 111 | f.close() 112 | return 113 | 114 | yolo = mAP_YOLO() 115 | 116 | image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split() 117 | 118 | if not os.path.exists("./input"): 119 | os.makedirs("./input") 120 | if not os.path.exists("./input/detection-results"): 121 | os.makedirs("./input/detection-results") 122 | if not os.path.exists("./input/images-optional"): 123 | os.makedirs("./input/images-optional") 124 | 125 | for image_id in tqdm(image_ids): 126 | image_path = "./VOCdevkit/VOC2007/JPEGImages/"+image_id+".jpg" 127 | image = Image.open(image_path) 128 | # 开启后在之后计算mAP可以可视化 129 | # image.save("./input/images-optional/"+image_id+".jpg") 130 | yolo.detect_image(image_id,image) 131 | 132 | print("Conversion completed!") 133 | -------------------------------------------------------------------------------- /core/tf_prediction.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Xilinx Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | import os 17 | import cv2 18 | from tensorflow.python.platform import gfile 19 | import numpy as np 20 | from PIL import Image 21 | from yolo3_predictor import yolo_predictor 22 | from tqdm import tqdm 23 | import argparse 24 | 25 | from tensorflow.contrib import decent_q 26 | 27 | def letterbox_image(image, size): 28 | '''resize image with unchanged aspect ratio using padding''' 29 | ih, iw, _ = image.shape 30 | w, h = size 31 | scale = min(w/iw, h/ih) 32 | nw = int(iw*scale) 33 | nh = int(ih*scale) 34 | 35 | image = cv2.resize(image, (nw,nh), interpolation=cv2.INTER_LINEAR) 36 | new_image = np.ones((h,w,3), np.uint8) * 128 37 | h_start = (h-nh)//2 38 | w_start = (w-nw)//2 39 | new_image[h_start:h_start+nh, w_start:w_start+nw, :] = image 40 | return new_image 41 | 42 | 43 | def get_class(classes_path): 44 | with open(classes_path) as f: 45 | class_names = f.readlines() 46 | class_names = [c.strip() for c in class_names] 47 | return class_names 48 | 49 | 50 | def get_anchors(anchors_path): 51 | with open(anchors_path) as f: 52 | anchors = f.readline() 53 | anchors = [float(x) for x in anchors.split(',')] 54 | return np.array(anchors).reshape(-1, 2) 55 | 56 | 57 | def write_items_to_file(image_id, items, fw): 58 | for item in items: 59 | fw.write(image_id + " " + " ".join([str(comp) for comp in item]) + "\n") 60 | 61 | 62 | def pred_img(img_path, model_image_size): 63 | image = cv2.imread(img_path) 64 | image = image[...,::-1] 65 | image_h, image_w, _ = image.shape 66 | 67 | # image preprocessing 68 | if model_image_size != (None, None): 69 | assert model_image_size[0]%32 == 0, 'Multiples of 32 required' 70 | assert model_image_size[1]%32 == 0, 'Multiples of 32 required' 71 | boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) 72 | else: 73 | new_image_size = (image_w - (image_w % 32), image_h - (image_h % 32)) 74 | boxed_image = letterbox_image(image, new_image_size) 75 | image_data = np.array(boxed_image, dtype='float32') 76 | image_data /= 255. 77 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 78 | 79 | out_boxes, out_scores, out_classes, out_y = sess.run( 80 | [pred_boxes, pred_scores, pred_classes, output_y], 81 | feed_dict={input_x: image_data, input_image_shape: (image_h, image_w)}) 82 | 83 | # convert the result to label format 84 | items = [] 85 | for i, c in reversed(list(enumerate(out_classes))): 86 | predicted_class = class_names[c] 87 | box = out_boxes[i] 88 | score = out_scores[i] 89 | 90 | top, left, bottom, right = box 91 | top = max(0, np.floor(top + 0.5).astype('int32')) 92 | left = max(0, np.floor(left + 0.5).astype('int32')) 93 | bottom = min(image_h, np.floor(bottom + 0.5).astype('int32')) 94 | right = min(image_w, np.floor(right + 0.5).astype('int32')) 95 | item = [predicted_class, score, left, top, right, bottom] 96 | items.append(item) 97 | 98 | return items 99 | 100 | 101 | if __name__ == "__main__": 102 | 103 | parser = argparse.ArgumentParser() 104 | parser.add_argument('--pb_file', type=str, default="./model_data/yolov3_voc.pb" , help='path of frozon pb file') 105 | parser.add_argument('--test_list', type=str, help='path of voc test list') 106 | parser.add_argument('--result_file', type=str, help='path of voc prediction result') 107 | FLAGS = parser.parse_args() 108 | 109 | classes_path = "model_data/voc_classes.txt" 110 | anchors_path = "model_data/yolo_anchors.txt" 111 | pb_file_path = FLAGS.pb_file 112 | score_thresh = 0.005 113 | nms_thresh = 0.45 114 | 115 | class_names = get_class(classes_path) 116 | predictor = yolo_predictor(score_thresh, nms_thresh, classes_path, anchors_path) 117 | 118 | sess = tf.Session() 119 | with gfile.FastGFile(pb_file_path, 'rb') as f: # file I/O 120 | graph_def = tf.GraphDef() 121 | graph_def.ParseFromString(f.read()) # get graph_def from file 122 | sess.graph.as_default() 123 | tf.import_graph_def(graph_def, name='') # import graph 124 | sess.run(tf.global_variables_initializer()) 125 | 126 | input_x = sess.graph.get_tensor_by_name('input_1:0') 127 | output_y1 = sess.graph.get_tensor_by_name('conv2d_17/BiasAdd:0') 128 | output_y2 = sess.graph.get_tensor_by_name('conv2d_20/BiasAdd:0') 129 | #output_y3 = sess.graph.get_tensor_by_name('output:0') 130 | output_y = [output_y1, output_y2] 131 | input_image_shape = tf.placeholder(tf.int32, shape=(2)) 132 | pred_boxes, pred_scores, pred_classes = predictor.predict(output_y, input_image_shape) 133 | 134 | with open(FLAGS.test_list) as fr: 135 | lines = fr.readlines() 136 | fw = open(FLAGS.result_file, "w") 137 | for line in tqdm(lines): 138 | img_path = line.strip().split(" ")[0] 139 | fname = os.path.split(img_path)[-1] 140 | image_id = os.path.splitext(fname)[0] 141 | 142 | items = pred_img(img_path, (416, 416)) 143 | write_items_to_file(image_id, items, fw) 144 | 145 | fw.close() 146 | -------------------------------------------------------------------------------- /nets/loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import backend as K 3 | from nets.ious import box_ciou 4 | 5 | #---------------------------------------------------# 6 | # 平滑标签 7 | #---------------------------------------------------# 8 | def _smooth_labels(y_true, label_smoothing): 9 | num_classes = tf.cast(K.shape(y_true)[-1], dtype=K.floatx()) 10 | label_smoothing = K.constant(label_smoothing, dtype=K.floatx()) 11 | return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes 12 | #---------------------------------------------------# 13 | # 将预测值的每个特征层调成真实值 14 | #---------------------------------------------------# 15 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 16 | num_anchors = len(anchors) 17 | # [1, 1, 1, num_anchors, 2] 18 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 19 | 20 | # 获得x,y的网格 21 | # (13, 13, 1, 2) 22 | grid_shape = K.shape(feats)[1:3] # height, width 23 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 24 | [1, grid_shape[1], 1, 1]) 25 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 26 | [grid_shape[0], 1, 1, 1]) 27 | grid = K.concatenate([grid_x, grid_y]) 28 | grid = K.cast(grid, K.dtype(feats)) 29 | 30 | # (batch_size,13,13,3,85) 31 | feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 32 | 33 | # 将预测值调成真实值 34 | # box_xy对应框的中心点 35 | # box_wh对应框的宽和高 36 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats)) 37 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats)) 38 | box_confidence = K.sigmoid(feats[..., 4:5]) 39 | box_class_probs = K.sigmoid(feats[..., 5:]) 40 | 41 | # 在计算loss的时候返回如下参数 42 | if calc_loss == True: 43 | return grid, feats, box_xy, box_wh 44 | return box_xy, box_wh, box_confidence, box_class_probs 45 | 46 | #---------------------------------------------------# 47 | # 用于计算每个预测框与真实框的iou 48 | #---------------------------------------------------# 49 | def box_iou(b1, b2): 50 | # 13,13,3,1,4 51 | # 计算左上角的坐标和右下角的坐标 52 | b1 = K.expand_dims(b1, -2) 53 | b1_xy = b1[..., :2] 54 | b1_wh = b1[..., 2:4] 55 | b1_wh_half = b1_wh/2. 56 | b1_mins = b1_xy - b1_wh_half 57 | b1_maxes = b1_xy + b1_wh_half 58 | 59 | # 1,n,4 60 | # 计算左上角和右下角的坐标 61 | b2 = K.expand_dims(b2, 0) 62 | b2_xy = b2[..., :2] 63 | b2_wh = b2[..., 2:4] 64 | b2_wh_half = b2_wh/2. 65 | b2_mins = b2_xy - b2_wh_half 66 | b2_maxes = b2_xy + b2_wh_half 67 | 68 | # 计算重合面积 69 | intersect_mins = K.maximum(b1_mins, b2_mins) 70 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 71 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 72 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 73 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 74 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 75 | iou = intersect_area / (b1_area + b2_area - intersect_area) 76 | 77 | return iou 78 | 79 | 80 | #---------------------------------------------------# 81 | # loss值计算 82 | #---------------------------------------------------# 83 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False): 84 | 85 | # 一共有2层 86 | num_layers = len(anchors)//3 87 | 88 | # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] 89 | # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。 90 | # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。 91 | y_true = args[num_layers:] 92 | yolo_outputs = args[:num_layers] 93 | 94 | # 先验框 95 | anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] 96 | 97 | # 得到input_shpae为608,608 98 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) 99 | 100 | loss = 0 101 | 102 | # 取出每一张图片 103 | # m的值就是batch_size 104 | m = K.shape(yolo_outputs[0])[0] 105 | mf = K.cast(m, K.dtype(yolo_outputs[0])) 106 | 107 | # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。 108 | # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。 109 | for l in range(num_layers): 110 | # 以第一个特征层(m,13,13,3,85)为例子 111 | # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) 112 | object_mask = y_true[l][..., 4:5] 113 | # 取出其对应的种类(m,13,13,3,80) 114 | true_class_probs = y_true[l][..., 5:] 115 | if label_smoothing: 116 | true_class_probs = _smooth_labels(true_class_probs, label_smoothing) 117 | 118 | # 将yolo_outputs的特征层输出进行处理 119 | # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) 120 | # 还有解码后的xy,wh,(m,13,13,3,2) 121 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], 122 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) 123 | 124 | # 这个是解码后的预测的box的位置 125 | # (m,13,13,3,4) 126 | pred_box = K.concatenate([pred_xy, pred_wh]) 127 | 128 | # 找到负样本群组,第一步是创建一个数组,[] 129 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 130 | object_mask_bool = K.cast(object_mask, 'bool') 131 | 132 | # 对每一张图片计算ignore_mask 133 | def loop_body(b, ignore_mask): 134 | # 取出第b副图内,真实存在的所有的box的参数 135 | # n,4 136 | true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 137 | # 计算预测结果与真实情况的iou 138 | # pred_box为13,13,3,4 139 | # 计算的结果是每个pred_box和其它所有真实框的iou 140 | # 13,13,3,n 141 | iou = box_iou(pred_box[b], true_box) 142 | 143 | # 13,13,3 144 | best_iou = K.max(iou, axis=-1) 145 | 146 | # 如果某些预测框和真实框的重合程度大于0.5,则忽略。 147 | ignore_mask = ignore_mask.write(b, K.cast(best_iou= 0: 170 | text_origin = np.array([left, top - label_size[1]]) 171 | else: 172 | text_origin = np.array([left, top + 1]) 173 | 174 | for i in range(thickness): 175 | draw.rectangle( 176 | [left + i, top + i, right - i, bottom - i], 177 | outline=self.colors[c]) 178 | draw.rectangle( 179 | [tuple(text_origin), tuple(text_origin + label_size)], 180 | fill=self.colors[c]) 181 | draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) 182 | del draw 183 | 184 | end = timer() 185 | print(end - start) 186 | return image 187 | -------------------------------------------------------------------------------- /nets/yolo4_tiny.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.keras import backend as K 6 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, LeakyReLU, \ 7 | BatchNormalization 8 | from tensorflow.keras.models import Model 9 | from tensorflow.keras.regularizers import l2 10 | from nets.CSPdarknet53_tiny import darknet_body 11 | from utils.utils import compose 12 | 13 | 14 | 15 | #--------------------------------------------------# 16 | # 单次卷积 17 | #--------------------------------------------------# 18 | @wraps(Conv2D) 19 | def DarknetConv2D(*args, **kwargs): 20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 22 | darknet_conv_kwargs.update(kwargs) 23 | return Conv2D(*args, **darknet_conv_kwargs) 24 | 25 | #---------------------------------------------------# 26 | # 卷积块 27 | # DarknetConv2D + BatchNormalization + LeakyReLU 28 | #---------------------------------------------------# 29 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 30 | no_bias_kwargs = {'use_bias': False} 31 | no_bias_kwargs.update(kwargs) 32 | return compose( 33 | DarknetConv2D(*args, **no_bias_kwargs), 34 | BatchNormalization(), 35 | LeakyReLU(alpha=0.1)) 36 | 37 | #---------------------------------------------------# 38 | # 特征层->最后的输出 39 | #---------------------------------------------------# 40 | def yolo_body(inputs, num_anchors, num_classes): 41 | # 生成darknet53的主干模型 42 | # 首先我们会获取到两个有效特征层 43 | # feat1 26x26x256 44 | # feat2 13x13x512 45 | feat1,feat2 = darknet_body(inputs) 46 | 47 | # 13x13x512 -> 13x13x256 48 | P5 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2) 49 | 50 | P5_output = DarknetConv2D_BN_Leaky(512, (3,3))(P5) 51 | P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output) 52 | 53 | # Conv+UpSampling2D 13x13x256 -> 26x26x128 54 | P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P5) 55 | 56 | # 26x26x(128+256) 26x26x384 57 | P4 = Concatenate()([feat1, P5_upsample]) 58 | 59 | P4_output = DarknetConv2D_BN_Leaky(256, (3,3))(P4) 60 | P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output) 61 | 62 | return Model(inputs, [P5_output, P4_output]) 63 | 64 | #---------------------------------------------------# 65 | # 将预测值的每个特征层调成真实值 66 | #---------------------------------------------------# 67 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 68 | num_anchors = len(anchors) 69 | # [1, 1, 1, num_anchors, 2] 70 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 71 | 72 | # 获得x,y的网格 73 | # (13,13, 1, 2) 74 | grid_shape = K.shape(feats)[1:3] # height, width 75 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 76 | [1, grid_shape[1], 1, 1]) 77 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 78 | [grid_shape[0], 1, 1, 1]) 79 | grid = K.concatenate([grid_x, grid_y]) 80 | grid = K.cast(grid, K.dtype(feats)) 81 | 82 | # (batch_size,13,13,3,85) 83 | feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 84 | 85 | # 将预测值调成真实值 86 | # box_xy对应框的中心点 87 | # box_wh对应框的宽和高 88 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats)) 89 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats)) 90 | box_confidence = K.sigmoid(feats[..., 4:5]) 91 | box_class_probs = K.sigmoid(feats[..., 5:]) 92 | 93 | # 在计算loss的时候返回如下参数 94 | if calc_loss == True: 95 | return grid, feats, box_xy, box_wh 96 | return box_xy, box_wh, box_confidence, box_class_probs 97 | 98 | #---------------------------------------------------# 99 | # 对box进行调整,使其符合真实图片的样子 100 | #---------------------------------------------------# 101 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 102 | box_yx = box_xy[..., ::-1] 103 | box_hw = box_wh[..., ::-1] 104 | 105 | input_shape = K.cast(input_shape, K.dtype(box_yx)) 106 | image_shape = K.cast(image_shape, K.dtype(box_yx)) 107 | 108 | new_shape = K.round(image_shape * K.min(input_shape/image_shape)) 109 | offset = (input_shape-new_shape)/2./input_shape 110 | scale = input_shape/new_shape 111 | 112 | box_yx = (box_yx - offset) * scale 113 | box_hw *= scale 114 | 115 | box_mins = box_yx - (box_hw / 2.) 116 | box_maxes = box_yx + (box_hw / 2.) 117 | boxes = K.concatenate([ 118 | box_mins[..., 0:1], # y_min 119 | box_mins[..., 1:2], # x_min 120 | box_maxes[..., 0:1], # y_max 121 | box_maxes[..., 1:2] # x_max 122 | ]) 123 | 124 | boxes *= K.concatenate([image_shape, image_shape]) 125 | return boxes 126 | 127 | #---------------------------------------------------# 128 | # 获取每个box和它的得分 129 | #---------------------------------------------------# 130 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): 131 | # 将预测值调成真实值 132 | # box_xy对应框的中心点 133 | # box_wh对应框的宽和高 134 | # -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80 135 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape) 136 | # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax 137 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 138 | # 获得得分和box 139 | boxes = K.reshape(boxes, [-1, 4]) 140 | box_scores = box_confidence * box_class_probs 141 | box_scores = K.reshape(box_scores, [-1, num_classes]) 142 | return boxes, box_scores 143 | 144 | # ---------------------------------------------------# 145 | # 图片预测 146 | # ---------------------------------------------------# 147 | def yolo_eval(yolo_outputs, 148 | anchors, 149 | num_classes, 150 | image_shape, 151 | max_boxes=20, 152 | score_threshold=.6, 153 | iou_threshold=.5, 154 | eager = False): 155 | if eager: 156 | image_shape = K.reshape(yolo_outputs[-1],[-1]) 157 | num_layers = len(yolo_outputs)-1 158 | else: 159 | # 获得特征层的数量 160 | num_layers = len(yolo_outputs) 161 | # 特征层1对应的anchor是678 162 | # 特征层2对应的anchor是345 163 | # 特征层3对应的anchor是012 164 | anchor_mask = [[3, 4, 5], [0, 1, 2]] 165 | 166 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 167 | boxes = [] 168 | box_scores = [] 169 | # 对每个特征层进行处理 170 | for l in range(num_layers): 171 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, 172 | image_shape) 173 | boxes.append(_boxes) 174 | box_scores.append(_box_scores) 175 | # 将每个特征层的结果进行堆叠 176 | boxes = K.concatenate(boxes, axis=0) 177 | box_scores = K.concatenate(box_scores, axis=0) 178 | 179 | mask = box_scores >= score_threshold 180 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 181 | boxes_ = [] 182 | scores_ = [] 183 | classes_ = [] 184 | for c in range(num_classes): 185 | # 取出所有box_scores >= score_threshold的框,和成绩 186 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 187 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 188 | 189 | # 非极大抑制,去掉box重合程度高的那一些 190 | nms_index = tf.image.non_max_suppression( 191 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) 192 | 193 | # 获取非极大抑制后的结果 194 | # 下列三个分别是 195 | # 框的位置,得分与种类 196 | class_boxes = K.gather(class_boxes, nms_index) 197 | class_box_scores = K.gather(class_box_scores, nms_index) 198 | classes = K.ones_like(class_box_scores, 'int32') * c 199 | boxes_.append(class_boxes) 200 | scores_.append(class_box_scores) 201 | classes_.append(classes) 202 | boxes_ = K.concatenate(boxes_, axis=0) 203 | scores_ = K.concatenate(scores_, axis=0) 204 | classes_ = K.concatenate(classes_, axis=0) 205 | 206 | return boxes_, scores_, classes_ 207 | -------------------------------------------------------------------------------- /VOCdevkit/DETRAC_xmlParser.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import xml.etree.ElementTree as ET 4 | from xml.dom.minidom import Document 5 | import os 6 | import cv2 7 | import time 8 | 9 | def ConvertVOCXml(file_path="",file_name=""): 10 | tree = ET.parse(file_name) 11 | root = tree.getroot() 12 | # print(root.tag) 13 | 14 | num=0 #计数 15 | #读xml操作 16 | 17 | frame_lists=[] 18 | output_file_name="" 19 | for child in root: 20 | 21 | if(child.tag=="frame"): 22 | # 创建dom文档 23 | doc = Document() 24 | # 创建根节点 25 | annotation = doc.createElement('annotation') 26 | # 根节点插入dom树 27 | doc.appendChild(annotation) 28 | 29 | #print(child.tag, child.attrib["num"]) 30 | pic_id= child.attrib["num"].zfill(5) 31 | #print(pic_id) 32 | output_file_name=root.attrib["name"]+"__img"+pic_id+".xml" 33 | # print(output_file_name) 34 | 35 | folder = doc.createElement("folder") 36 | folder.appendChild(doc.createTextNode("VOC2007")) 37 | annotation.appendChild(folder) 38 | 39 | filename = doc.createElement("filename") 40 | pic_name=root.attrib["name"]+"__img"+pic_id+".jpg" 41 | filename.appendChild(doc.createTextNode(pic_name)) 42 | annotation.appendChild(filename) 43 | 44 | sizeimage = doc.createElement("size") 45 | imagewidth = doc.createElement("width") 46 | imageheight = doc.createElement("height") 47 | imagedepth = doc.createElement("depth") 48 | 49 | imagewidth.appendChild(doc.createTextNode("960")) 50 | imageheight.appendChild(doc.createTextNode("540")) 51 | imagedepth.appendChild(doc.createTextNode("3")) 52 | 53 | sizeimage.appendChild(imagedepth) 54 | sizeimage.appendChild(imagewidth) 55 | sizeimage.appendChild(imageheight) 56 | annotation.appendChild(sizeimage) 57 | 58 | target_list=child.getchildren()[0] #获取target_list 59 | #print(target_list.tag) 60 | object=None 61 | for target in target_list: 62 | if(target.tag=="target"): 63 | #print(target.tag) 64 | object = doc.createElement('object') 65 | bndbox = doc.createElement("bndbox") 66 | 67 | for target_child in target: 68 | if(target_child.tag=="box"): 69 | xmin = doc.createElement("xmin") 70 | ymin = doc.createElement("ymin") 71 | xmax = doc.createElement("xmax") 72 | ymax = doc.createElement("ymax") 73 | xmin_value=int(float(target_child.attrib["left"])) 74 | ymin_value=int(float(target_child.attrib["top"])) 75 | box_width_value=int(float(target_child.attrib["width"])) 76 | box_height_value=int(float(target_child.attrib["height"])) 77 | xmin.appendChild(doc.createTextNode(str(xmin_value))) 78 | ymin.appendChild(doc.createTextNode(str(ymin_value))) 79 | if(xmin_value+box_width_value>960): 80 | xmax.appendChild(doc.createTextNode(str(960))) 81 | else: 82 | xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value))) 83 | if(ymin_value+box_height_value>540): 84 | ymax.appendChild(doc.createTextNode(str(540))) 85 | else: 86 | ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value))) 87 | 88 | if(target_child.tag=="attribute"): 89 | name = doc.createElement('name') 90 | pose=doc.createElement('pose') 91 | truncated=doc.createElement('truncated') 92 | difficult=doc.createElement('difficult') 93 | 94 | name.appendChild(doc.createTextNode("car")) 95 | pose.appendChild(doc.createTextNode("Left")) #随意指定 96 | truncated.appendChild(doc.createTextNode("0")) #随意指定 97 | difficult.appendChild(doc.createTextNode("0")) #随意指定 98 | 99 | 100 | object.appendChild(name) 101 | object.appendChild(pose) 102 | object.appendChild(truncated) 103 | object.appendChild(difficult) 104 | 105 | bndbox.appendChild(xmin) 106 | bndbox.appendChild(ymin) 107 | bndbox.appendChild(xmax) 108 | bndbox.appendChild(ymax) 109 | object.appendChild(bndbox) 110 | annotation.appendChild(object) 111 | 112 | 113 | file_path_out=os.path.join(file_path,output_file_name) 114 | f = open(file_path_out, 'w') 115 | f.write(doc.toprettyxml(indent=' ' * 4)) 116 | f.close() 117 | num=num+1 118 | return num 119 | 120 | 121 | 122 | 123 | ''' 124 | 画方框 125 | ''' 126 | def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2): 127 | 128 | # Draw bounding box... 129 | print(bbox) 130 | p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"]))) 131 | p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"]))) 132 | cv2.rectangle(img, p1, p2, color, thickness) 133 | 134 | 135 | def visualization_image(image_name,xml_file_name): 136 | tree = ET.parse(xml_file_name) 137 | root = tree.getroot() 138 | 139 | object_lists=[] 140 | for child in root: 141 | if(child.tag=="folder"): 142 | print(child.tag, child.text) 143 | elif (child.tag == "filename"): 144 | print(child.tag, child.text) 145 | elif (child.tag == "size"): #解析size 146 | for size_child in child: 147 | if(size_child.tag=="width"): 148 | print(size_child.tag,size_child.text) 149 | elif (size_child.tag == "height"): 150 | print(size_child.tag, size_child.text) 151 | elif (size_child.tag == "depth"): 152 | print(size_child.tag, size_child.text) 153 | elif (child.tag == "object"): #解析object 154 | singleObject={} 155 | for object_child in child: 156 | if (object_child.tag == "name"): 157 | # print(object_child.tag,object_child.text) 158 | singleObject["name"] = object_child.text 159 | elif (object_child.tag == "bndbox"): 160 | for bndbox_child in object_child: 161 | if (bndbox_child.tag == "xmin"): 162 | singleObject["xmin"] = bndbox_child.text 163 | # print(bndbox_child.tag, bndbox_child.text) 164 | elif (bndbox_child.tag == "ymin"): 165 | # print(bndbox_child.tag, bndbox_child.text) 166 | singleObject["ymin"] = bndbox_child.text 167 | elif (bndbox_child.tag == "xmax"): 168 | singleObject["xmax"] = bndbox_child.text 169 | elif (bndbox_child.tag == "ymax"): 170 | singleObject["ymax"] = bndbox_child.text 171 | object_length=len(singleObject) 172 | if(object_length>0): 173 | object_lists.append(singleObject) 174 | img = cv2.imread(image_name) 175 | for object_coordinate in object_lists: 176 | bboxes_draw_on_img(img,object_coordinate) 177 | cv2.imshow("capture", img) 178 | cv2.waitKey (0) 179 | cv2.destroyAllWindows() 180 | 181 | 182 | if ( __name__ == "__main__"): 183 | #print("main") 184 | basePath="DETRAC-Train-Annotations-XML" 185 | totalxml=os.listdir(basePath) 186 | total_num=0 187 | flag=False 188 | print("正在转换") 189 | saveBasePath="xml_test" 190 | if os.path.exists(saveBasePath)==False: #判断文件夹是否存在 191 | os.makedirs(saveBasePath) 192 | 193 | #ConvertVOCXml(file_path="samplexml",file_name="000009.xml") 194 | # Start time 195 | start = time.time() 196 | log=open("xml_statistical.txt","w") #分析日志,进行排错 197 | for xml in totalxml: 198 | file_name=os.path.join(basePath,xml) 199 | print(file_name) 200 | num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name) 201 | print(num) 202 | total_num=total_num+num 203 | log.write(file_name+" "+str(num)+"\n") 204 | # End time 205 | end = time.time() 206 | seconds=end-start 207 | print( "Time taken : {0} seconds".format(seconds)) 208 | print(total_num) 209 | log.write(str(total_num)+"\n") 210 | visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml") 211 | -------------------------------------------------------------------------------- /keras_to_tensorflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Copyright (c) 2019, by the Authors: Amir H. Abdi 4 | This script is freely available under the MIT Public License. 5 | Please see the License file in the root for details. 6 | The following code snippet will convert the keras model files 7 | to the freezed .pb tensorflow weight file. The resultant TensorFlow model 8 | holds both the model architecture and its associated weights. 9 | """ 10 | 11 | import tensorflow as tf 12 | from tensorflow.python.framework import graph_util 13 | from tensorflow.python.framework import graph_io 14 | from pathlib import Path 15 | from absl import app 16 | from absl import flags 17 | from absl import logging 18 | import tensorflow.keras as keras 19 | from tensorflow.keras import backend as K 20 | from tensorflow.keras.models import model_from_json, model_from_yaml 21 | 22 | K.set_learning_phase(0) 23 | FLAGS = flags.FLAGS 24 | 25 | flags.DEFINE_string('input_model', None, 'Path to the input model.') 26 | flags.DEFINE_string('input_model_json', None, 'Path to the input model ' 27 | 'architecture in json format.') 28 | flags.DEFINE_string('input_model_yaml', None, 'Path to the input model ' 29 | 'architecture in yaml format.') 30 | flags.DEFINE_string('output_model', None, 'Path where the converted model will ' 31 | 'be stored.') 32 | flags.DEFINE_boolean('save_graph_def', False, 33 | 'Whether to save the graphdef.pbtxt file which contains ' 34 | 'the graph definition in ASCII format.') 35 | flags.DEFINE_string('output_nodes_prefix', None, 36 | 'If set, the output nodes will be renamed to ' 37 | '`output_nodes_prefix`+i, where `i` will numerate the ' 38 | 'number of of output nodes of the network.') 39 | flags.DEFINE_boolean('quantize', False, 40 | 'If set, the resultant TensorFlow graph weights will be ' 41 | 'converted from float into eight-bit equivalents. See ' 42 | 'documentation here: ' 43 | 'https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms') 44 | flags.DEFINE_boolean('channels_first', False, 45 | 'Whether channels are the first dimension of a tensor. ' 46 | 'The default is TensorFlow behaviour where channels are ' 47 | 'the last dimension.') 48 | flags.DEFINE_boolean('output_meta_ckpt', False, 49 | 'If set to True, exports the model as .meta, .index, and ' 50 | '.data files, with a checkpoint file. These can be later ' 51 | 'loaded in TensorFlow to continue training.') 52 | 53 | flags.mark_flag_as_required('input_model') 54 | flags.mark_flag_as_required('output_model') 55 | 56 | 57 | def load_model(input_model_path, input_json_path=None, input_yaml_path=None): 58 | if not Path(input_model_path).exists(): 59 | raise FileNotFoundError('Model file `{}` does not exist.'.format(input_model_path)) 60 | try: 61 | # model = keras.models.load_model(input_model_path, compile=False) 62 | with open(input_json_path) as json_file: 63 | json_config = json_file.read() 64 | model = tf.keras.models.model_from_json(json_config, custom_objects={'tf': tf}) 65 | 66 | # Load weights 67 | model.load_weights(input_model_path) 68 | return model 69 | except FileNotFoundError as err: 70 | logging.error('Input mode file (%s) does not exist.', FLAGS.input_model) 71 | raise err 72 | except ValueError as wrong_file_err: 73 | if input_json_path: 74 | if not Path(input_json_path).exists(): 75 | raise FileNotFoundError( 76 | 'Model description json file `{}` does not exist.'.format( 77 | input_json_path)) 78 | try: 79 | model = model_from_json(open(str(input_json_path)).read()) 80 | model.load_weights(input_model_path) 81 | return model 82 | except Exception as err: 83 | logging.error("Couldn't load model from json.") 84 | raise err 85 | elif input_yaml_path: 86 | if not Path(input_yaml_path).exists(): 87 | raise FileNotFoundError( 88 | 'Model description yaml file `{}` does not exist.'.format( 89 | input_yaml_path)) 90 | try: 91 | model = model_from_yaml(open(str(input_yaml_path)).read()) 92 | model.load_weights(input_model_path) 93 | return model 94 | except Exception as err: 95 | logging.error("Couldn't load model from yaml.") 96 | raise err 97 | else: 98 | logging.error( 99 | 'Input file specified only holds the weights, and not ' 100 | 'the model definition. Save the model using ' 101 | 'model.save(filename.h5) which will contain the network ' 102 | 'architecture as well as its weights. ' 103 | 'If the model is saved using the ' 104 | 'model.save_weights(filename) function, either ' 105 | 'input_model_json or input_model_yaml flags should be set to ' 106 | 'to import the network architecture prior to loading the ' 107 | 'weights. \n' 108 | 'Check the keras documentation for more details ' 109 | '(https://keras.io/getting-started/faq/)') 110 | raise wrong_file_err 111 | 112 | 113 | def main(args): 114 | # If output_model path is relative and in cwd, make it absolute from root 115 | output_model = FLAGS.output_model 116 | if str(Path(output_model).parent) == '.': 117 | output_model = str((Path.cwd() / output_model)) 118 | 119 | output_fld = Path(output_model).parent 120 | output_model_name = Path(output_model).name 121 | output_model_stem = Path(output_model).stem 122 | output_model_pbtxt_name = output_model_stem + '.pbtxt' 123 | 124 | # Create output directory if it does not exist 125 | #Path(output_model).parent.mkdir(parents=True, exist_ok=True) 126 | #Path(output_model).parent.mkdir(parents=True) 127 | 128 | if FLAGS.channels_first: 129 | K.set_image_data_format('channels_first') 130 | else: 131 | K.set_image_data_format('channels_last') 132 | 133 | model = load_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml) 134 | 135 | # TODO(amirabdi): Support networks with multiple inputs 136 | orig_output_node_names = [node.op.name for node in model.outputs] 137 | if FLAGS.output_nodes_prefix: 138 | num_output = len(orig_output_node_names) 139 | pred = [None] * num_output 140 | converted_output_node_names = [None] * num_output 141 | 142 | # Create dummy tf nodes to rename output 143 | for i in range(num_output): 144 | converted_output_node_names[i] = '{}{}'.format( 145 | FLAGS.output_nodes_prefix, i) 146 | pred[i] = tf.identity(model.outputs[i], 147 | name=converted_output_node_names[i]) 148 | else: 149 | converted_output_node_names = orig_output_node_names 150 | logging.info('Converted output node names are: %s', 151 | str(converted_output_node_names)) 152 | 153 | sess = K.get_session() 154 | if FLAGS.output_meta_ckpt: 155 | saver = tf.train.Saver() 156 | saver.save(sess, str(output_fld / output_model_stem)) 157 | 158 | if FLAGS.save_graph_def: 159 | tf.train.write_graph(sess.graph.as_graph_def(), str(output_fld), 160 | output_model_pbtxt_name, as_text=True) 161 | logging.info('Saved the graph definition in ascii format at %s', 162 | str(Path(output_fld) / output_model_pbtxt_name)) 163 | 164 | if FLAGS.quantize: 165 | from tensorflow.tools.graph_transforms import TransformGraph 166 | transforms = ["quantize_weights", "quantize_nodes"] 167 | transformed_graph_def = TransformGraph(sess.graph.as_graph_def(), [], 168 | converted_output_node_names, 169 | transforms) 170 | constant_graph = graph_util.convert_variables_to_constants( 171 | sess, 172 | transformed_graph_def, 173 | converted_output_node_names) 174 | else: 175 | constant_graph = graph_util.convert_variables_to_constants( 176 | sess, 177 | sess.graph.as_graph_def(), 178 | converted_output_node_names) 179 | 180 | graph_io.write_graph(constant_graph, str(output_fld), output_model_name, 181 | as_text=False) 182 | logging.info('Saved the freezed graph at %s', 183 | str(Path(output_fld) / output_model_name)) 184 | 185 | 186 | if __name__ == "__main__": 187 | app.run(main) 188 | -------------------------------------------------------------------------------- /yolo_fastest_tensorflow2/nets/yolo_fastest.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.keras import backend as K 6 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, LeakyReLU, \ 7 | BatchNormalization, SeparableConv2D 8 | from tensorflow.keras.models import Model 9 | from tensorflow.keras.regularizers import l2 10 | from nets.yolo_fastest_backbone import darknet_body 11 | from utils.utils import compose 12 | 13 | 14 | # --------------------------------------------------# 15 | # 单次卷积 16 | # --------------------------------------------------# 17 | @wraps(Conv2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | # 多了一个正则化的项 20 | # 正则化系数5e-4 21 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 22 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 23 | darknet_conv_kwargs.update(kwargs) 24 | return Conv2D(*args, **darknet_conv_kwargs) 25 | 26 | def DepthwiseConv2D(*args, **kwargs): 27 | # 多了一个正则化的项 28 | # 正则化系数5e-4 29 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 30 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' 31 | darknet_conv_kwargs.update(kwargs) 32 | return SeparableConv2D(*args, **darknet_conv_kwargs) 33 | 34 | #---------------------------------------------------# 35 | # 卷积块 36 | # DarknetConv2D + BatchNormalization + LeakyReLU 37 | #---------------------------------------------------# 38 | 39 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 40 | no_bias_kwargs = {'use_bias': False} 41 | no_bias_kwargs.update(kwargs) 42 | return compose( 43 | DarknetConv2D(*args, **no_bias_kwargs), 44 | BatchNormalization(), 45 | LeakyReLU(alpha=0.1)) 46 | 47 | def DarknetConv2D_Bias(*args, **kwargs): 48 | no_bias_kwargs = {'use_bias': True} 49 | no_bias_kwargs.update(kwargs) 50 | return DarknetConv2D(*args, **no_bias_kwargs) 51 | 52 | def DepthwiseConv2D_BN_Leaky(*args, **kwargs): 53 | no_bias_kwargs = {'use_bias': False} 54 | no_bias_kwargs.update(kwargs) 55 | return compose( 56 | DepthwiseConv2D(*args, **no_bias_kwargs), 57 | BatchNormalization(), 58 | LeakyReLU(alpha=0.1)) 59 | 60 | 61 | # ---------------------------------------------------# 62 | # 特征层->最后的输出 63 | # ---------------------------------------------------# 64 | def yolo_body(inputs, num_anchors, num_classes): 65 | # 生成darknet53的主干模型 66 | # 首先我们会获取到两个有效特征层 67 | # feat1 26x26x256 68 | # feat2 13x13x512 69 | feat1, feat2 = darknet_body(inputs) 70 | 71 | P5 = UpSampling2D(2)(feat2) 72 | P5 = Concatenate()([P5, feat1]) 73 | P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5) 74 | P5_output = DepthwiseConv2D_BN_Leaky(96, (5, 5))(P5_output) 75 | #P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5_output) 76 | P5_output = DepthwiseConv2D_BN_Leaky(96, (5, 5))(P5_output) 77 | #P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5_output) 78 | P5_output = DarknetConv2D_Bias(num_anchors * (num_classes + 5), (1, 1))(P5_output) 79 | 80 | P4 = DepthwiseConv2D_BN_Leaky(128, (5, 5))(feat2) 81 | #P4 = DarknetConv2D_BN_Leaky(128, (1, 1))(P4) 82 | P4 = DepthwiseConv2D_BN_Leaky(128, (5, 5))(P4) 83 | #P4 = DarknetConv2D_BN_Leaky(128, (1, 1))(P4) 84 | P4_output = DarknetConv2D_Bias(num_anchors * (num_classes + 5), (1, 1))(P4) 85 | 86 | return Model(inputs, [P4_output, P5_output]) 87 | 88 | 89 | # ---------------------------------------------------# 90 | # 将预测值的每个特征层调成真实值 91 | # ---------------------------------------------------# 92 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 93 | num_anchors = len(anchors) 94 | # [1, 1, 1, num_anchors, 2] 95 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 96 | 97 | # 获得x,y的网格 98 | # (13,13, 1, 2) 99 | grid_shape = K.shape(feats)[1:3] # height, width 100 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 101 | [1, grid_shape[1], 1, 1]) 102 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 103 | [grid_shape[0], 1, 1, 1]) 104 | grid = K.concatenate([grid_x, grid_y]) 105 | grid = K.cast(grid, K.dtype(feats)) 106 | 107 | # (batch_size,13,13,3,85) 108 | feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 109 | 110 | # 将预测值调成真实值 111 | # box_xy对应框的中心点 112 | # box_wh对应框的宽和高 113 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) 114 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[..., ::-1], K.dtype(feats)) 115 | box_confidence = K.sigmoid(feats[..., 4:5]) 116 | box_class_probs = K.sigmoid(feats[..., 5:]) 117 | 118 | # 在计算loss的时候返回如下参数 119 | if calc_loss == True: 120 | return grid, feats, box_xy, box_wh 121 | return box_xy, box_wh, box_confidence, box_class_probs 122 | 123 | 124 | # ---------------------------------------------------# 125 | # 对box进行调整,使其符合真实图片的样子 126 | # ---------------------------------------------------# 127 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 128 | box_yx = box_xy[..., ::-1] 129 | box_hw = box_wh[..., ::-1] 130 | 131 | input_shape = K.cast(input_shape, K.dtype(box_yx)) 132 | image_shape = K.cast(image_shape, K.dtype(box_yx)) 133 | 134 | new_shape = K.round(image_shape * K.min(input_shape / image_shape)) 135 | offset = (input_shape - new_shape) / 2. / input_shape 136 | scale = input_shape / new_shape 137 | 138 | box_yx = (box_yx - offset) * scale 139 | box_hw *= scale 140 | 141 | box_mins = box_yx - (box_hw / 2.) 142 | box_maxes = box_yx + (box_hw / 2.) 143 | boxes = K.concatenate([ 144 | box_mins[..., 0:1], # y_min 145 | box_mins[..., 1:2], # x_min 146 | box_maxes[..., 0:1], # y_max 147 | box_maxes[..., 1:2] # x_max 148 | ]) 149 | 150 | boxes *= K.concatenate([image_shape, image_shape]) 151 | return boxes 152 | 153 | 154 | # ---------------------------------------------------# 155 | # 获取每个box和它的得分 156 | # ---------------------------------------------------# 157 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): 158 | # 将预测值调成真实值 159 | # box_xy对应框的中心点 160 | # box_wh对应框的宽和高 161 | # -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80 162 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape) 163 | # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax 164 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 165 | # 获得得分和box 166 | boxes = K.reshape(boxes, [-1, 4]) 167 | box_scores = box_confidence * box_class_probs 168 | box_scores = K.reshape(box_scores, [-1, num_classes]) 169 | return boxes, box_scores 170 | 171 | 172 | # ---------------------------------------------------# 173 | # 图片预测 174 | # ---------------------------------------------------# 175 | def yolo_eval(yolo_outputs, 176 | anchors, 177 | num_classes, 178 | image_shape, 179 | max_boxes=20, 180 | score_threshold=.6, 181 | iou_threshold=.5, 182 | eager=False): 183 | if eager: 184 | image_shape = K.reshape(yolo_outputs[-1], [-1]) 185 | num_layers = len(yolo_outputs) - 1 186 | else: 187 | # 获得特征层的数量 188 | num_layers = len(yolo_outputs) 189 | # 特征层1对应的anchor是678 190 | # 特征层2对应的anchor是345 191 | # 特征层3对应的anchor是012 192 | anchor_mask = [[3, 4, 5], [0, 1, 2]] 193 | 194 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 195 | boxes = [] 196 | box_scores = [] 197 | # 对每个特征层进行处理 198 | for l in range(num_layers): 199 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, 200 | image_shape) 201 | boxes.append(_boxes) 202 | box_scores.append(_box_scores) 203 | # 将每个特征层的结果进行堆叠 204 | boxes = K.concatenate(boxes, axis=0) 205 | box_scores = K.concatenate(box_scores, axis=0) 206 | 207 | mask = box_scores >= score_threshold 208 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 209 | boxes_ = [] 210 | scores_ = [] 211 | classes_ = [] 212 | for c in range(num_classes): 213 | # 取出所有box_scores >= score_threshold的框,和成绩 214 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 215 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 216 | 217 | # 非极大抑制,去掉box重合程度高的那一些 218 | nms_index = tf.image.non_max_suppression( 219 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) 220 | 221 | # 获取非极大抑制后的结果 222 | # 下列三个分别是 223 | # 框的位置,得分与种类 224 | class_boxes = K.gather(class_boxes, nms_index) 225 | class_box_scores = K.gather(class_box_scores, nms_index) 226 | classes = K.ones_like(class_box_scores, 'int32') * c 227 | boxes_.append(class_boxes) 228 | scores_.append(class_box_scores) 229 | classes_.append(classes) 230 | boxes_ = K.concatenate(boxes_, axis=0) 231 | scores_ = K.concatenate(scores_, axis=0) 232 | classes_ = K.concatenate(classes_, axis=0) 233 | 234 | return boxes_, scores_, classes_ -------------------------------------------------------------------------------- /core/yolo3_predictor.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright 2020 Xilinx Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # This file is modified from other's code from github. 17 | # For more details, please refer to https://github.com/aloyschen/tensorflow-yolo3.git 18 | 19 | 20 | import os 21 | import random 22 | import colorsys 23 | import numpy as np 24 | import tensorflow as tf 25 | 26 | 27 | class yolo_predictor: 28 | def __init__(self, obj_threshold, nms_threshold, classes_file, anchors_file): 29 | """ 30 | Introduction 31 | ------------ 32 | 初始化函数 33 | Parameters 34 | ---------- 35 | obj_threshold: 目标检测为物体的阈值 36 | nms_threshold: nms阈值 37 | """ 38 | self.obj_threshold = obj_threshold 39 | self.nms_threshold = nms_threshold 40 | self.classes_path = classes_file 41 | self.anchors_path = anchors_file 42 | self.class_names = self._get_class() 43 | self.anchors = self._get_anchors() 44 | hsv_tuples = [(x / len(self.class_names), 1., 1.)for x in range(len(self.class_names))] 45 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 46 | self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) 47 | random.seed(10101) 48 | random.shuffle(self.colors) 49 | random.seed(None) 50 | 51 | 52 | def _get_class(self): 53 | """ 54 | Introduction 55 | ------------ 56 | 读取类别名称 57 | """ 58 | classes_path = os.path.expanduser(self.classes_path) 59 | with open(classes_path) as f: 60 | class_names = f.readlines() 61 | class_names = [c.strip() for c in class_names] 62 | return class_names 63 | 64 | def _get_anchors(self): 65 | """ 66 | Introduction 67 | ------------ 68 | 读取anchors数据 69 | """ 70 | anchors_path = os.path.expanduser(self.anchors_path) 71 | with open(anchors_path) as f: 72 | anchors = f.readline() 73 | anchors = [float(x) for x in anchors.split(',')] 74 | anchors = np.array(anchors).reshape(-1, 2) 75 | return anchors 76 | 77 | 78 | 79 | def eval(self, yolo_outputs, image_shape, max_boxes = 20): 80 | """ 81 | Introduction 82 | ------------ 83 | 根据Yolo模型的输出进行非极大值抑制,获取最后的物体检测框和物体检测类别 84 | Parameters 85 | ---------- 86 | yolo_outputs: yolo模型输出 87 | image_shape: 图片的大小 88 | max_boxes: 最大box数量 89 | Returns 90 | ------- 91 | boxes_: 物体框的位置 92 | scores_: 物体类别的概率 93 | classes_: 物体类别 94 | """ 95 | anchor_mask = [[3, 4, 5], [0, 1, 2]] 96 | boxes = [] 97 | box_scores = [] 98 | input_shape = tf.shape(yolo_outputs[0])[1 : 3] * 32 99 | # 对三个尺度的输出获取每个预测box坐标和box的分数,score计算为置信度x类别概率 100 | for i in range(len(yolo_outputs)): 101 | _boxes, _box_scores = self.boxes_and_scores(yolo_outputs[i], self.anchors[anchor_mask[i]], len(self.class_names), input_shape, image_shape) 102 | boxes.append(_boxes) 103 | box_scores.append(_box_scores) 104 | boxes = tf.concat(boxes, axis = 0) 105 | box_scores = tf.concat(box_scores, axis = 0) 106 | 107 | mask = box_scores >= self.obj_threshold 108 | max_boxes_tensor = tf.constant(max_boxes, dtype = tf.int32) 109 | boxes_ = [] 110 | scores_ = [] 111 | classes_ = [] 112 | for c in range(len(self.class_names)): 113 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 114 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 115 | nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold = self.nms_threshold) 116 | class_boxes = tf.gather(class_boxes, nms_index) 117 | class_box_scores = tf.gather(class_box_scores, nms_index) 118 | classes = tf.ones_like(class_box_scores, 'int32') * c 119 | boxes_.append(class_boxes) 120 | scores_.append(class_box_scores) 121 | classes_.append(classes) 122 | boxes_ = tf.concat(boxes_, axis = 0) 123 | scores_ = tf.concat(scores_, axis = 0) 124 | classes_ = tf.concat(classes_, axis = 0) 125 | return boxes_, scores_, classes_ 126 | 127 | 128 | def boxes_and_scores(self, feats, anchors, classes_num, input_shape, image_shape): 129 | """ 130 | Introduction 131 | ------------ 132 | 将预测出的box坐标转换为对应原图的坐标,然后计算每个box的分数 133 | Parameters 134 | ---------- 135 | feats: yolo输出的feature map 136 | anchors: anchor的位置 137 | class_num: 类别数目 138 | input_shape: 输入大小 139 | image_shape: 图片大小 140 | Returns 141 | ------- 142 | boxes: 物体框的位置 143 | boxes_scores: 物体框的分数,为置信度和类别概率的乘积 144 | """ 145 | box_xy, box_wh, box_confidence, box_class_probs = self._get_feats(feats, anchors, classes_num, input_shape) 146 | boxes = self.correct_boxes(box_xy, box_wh, input_shape, image_shape) 147 | boxes = tf.reshape(boxes, [-1, 4]) 148 | box_scores = box_confidence * box_class_probs 149 | box_scores = tf.reshape(box_scores, [-1, classes_num]) 150 | return boxes, box_scores 151 | 152 | 153 | def correct_boxes(self, box_xy, box_wh, input_shape, image_shape): 154 | """ 155 | Introduction 156 | ------------ 157 | 计算物体框预测坐标在原图中的位置坐标 158 | Parameters 159 | ---------- 160 | box_xy: 物体框左上角坐标 161 | box_wh: 物体框的宽高 162 | input_shape: 输入的大小 163 | image_shape: 图片的大小 164 | Returns 165 | ------- 166 | boxes: 物体框的位置 167 | """ 168 | box_yx = box_xy[..., ::-1] 169 | box_hw = box_wh[..., ::-1] 170 | input_shape = tf.cast(input_shape, dtype = tf.float32) 171 | image_shape = tf.cast(image_shape, dtype = tf.float32) 172 | new_shape = tf.round(image_shape * tf.reduce_min(input_shape / image_shape)) 173 | offset = (input_shape - new_shape) / 2. / input_shape 174 | scale = input_shape / new_shape 175 | box_yx = (box_yx - offset) * scale 176 | box_hw *= scale 177 | 178 | box_mins = box_yx - (box_hw / 2.) 179 | box_maxes = box_yx + (box_hw / 2.) 180 | boxes = tf.concat([ 181 | box_mins[..., 0:1], 182 | box_mins[..., 1:2], 183 | box_maxes[..., 0:1], 184 | box_maxes[..., 1:2] 185 | ], axis = -1) 186 | boxes *= tf.concat([image_shape, image_shape], axis = -1) 187 | return boxes 188 | 189 | 190 | 191 | def _get_feats(self, feats, anchors, num_classes, input_shape): 192 | """ 193 | Introduction 194 | ------------ 195 | 根据yolo最后一层的输出确定bounding box 196 | Parameters 197 | ---------- 198 | feats: yolo模型最后一层输出 199 | anchors: anchors的位置 200 | num_classes: 类别数量 201 | input_shape: 输入大小 202 | Returns 203 | ------- 204 | box_xy, box_wh, box_confidence, box_class_probs 205 | """ 206 | num_anchors = len(anchors) 207 | anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2]) 208 | grid_size = tf.shape(feats)[1:3] 209 | predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5]) 210 | # 这里构建13*13*1*2的矩阵,对应每个格子加上对应的坐标 211 | grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1]) 212 | grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1]) 213 | grid = tf.concat([grid_x, grid_y], axis = -1) 214 | grid = tf.cast(grid, tf.float32) 215 | # 将x,y坐标归一化为占416的比例 216 | box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32) 217 | # 将w,h也归一化为占416的比例 218 | box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / tf.cast(input_shape[::-1], tf.float32) 219 | box_confidence = tf.sigmoid(predictions[..., 4:5]) 220 | box_class_probs = tf.sigmoid(predictions[..., 5:]) 221 | return box_xy, box_wh, box_confidence, box_class_probs 222 | 223 | 224 | def predict(self, output, image_shape): 225 | """ 226 | Introduction 227 | ------------ 228 | 构建预测模型 229 | Parameters 230 | ---------- 231 | inputs: 处理之后的输入图片 232 | image_shape: 图像原始大小 233 | Returns 234 | ------- 235 | boxes: 物体框坐标 236 | scores: 物体概率值 237 | classes: 物体类别 238 | """ 239 | boxes, scores, classes = self.eval(output, image_shape, max_boxes = 20) 240 | return boxes, scores, classes 241 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.keras.backend as K 6 | from tensorflow.keras.layers import Input, Lambda 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.optimizers import Adam 9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping 10 | from nets.yolo4_tiny import yolo_body 11 | from nets.loss import yolo_loss 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint 13 | import os 14 | 15 | 16 | #---------------------------------------------------# 17 | # 获得类和先验框 18 | #---------------------------------------------------# 19 | def get_classes(classes_path): 20 | '''loads the classes''' 21 | with open(classes_path) as f: 22 | class_names = f.readlines() 23 | class_names = [c.strip() for c in class_names] 24 | return class_names 25 | 26 | def get_anchors(anchors_path): 27 | '''loads the anchors from a file''' 28 | with open(anchors_path) as f: 29 | anchors = f.readline() 30 | anchors = [float(x) for x in anchors.split(',')] 31 | return np.array(anchors).reshape(-1, 2) 32 | 33 | #---------------------------------------------------# 34 | # 训练数据生成器 35 | #---------------------------------------------------# 36 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False): 37 | '''data generator for fit_generator''' 38 | n = len(annotation_lines) 39 | i = 0 40 | flag = True 41 | while True: 42 | image_data = [] 43 | box_data = [] 44 | for b in range(batch_size): 45 | if i==0: 46 | np.random.shuffle(annotation_lines) 47 | if mosaic: 48 | if flag and (i+4) < n: 49 | image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape) 50 | i = (i+4) % n 51 | else: 52 | image, box = get_random_data(annotation_lines[i], input_shape) 53 | i = (i+1) % n 54 | flag = bool(1-flag) 55 | else: 56 | image, box = get_random_data(annotation_lines[i], input_shape) 57 | i = (i+1) % n 58 | image_data.append(image) 59 | box_data.append(box) 60 | image_data = np.array(image_data) 61 | box_data = np.array(box_data) 62 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 63 | yield [image_data, *y_true], np.zeros(batch_size) 64 | 65 | 66 | #---------------------------------------------------# 67 | # 读入xml文件,并输出y_true 68 | #---------------------------------------------------# 69 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 70 | assert (true_boxes[..., 4]0 99 | 100 | for b in range(m): 101 | # 对每一张图进行处理 102 | wh = boxes_wh[b, valid_mask[b]] 103 | if len(wh)==0: continue 104 | # [n,1,2] 105 | wh = np.expand_dims(wh, -2) 106 | box_maxes = wh / 2. 107 | box_mins = -box_maxes 108 | 109 | # 计算真实框和哪个先验框最契合 110 | intersect_mins = np.maximum(box_mins, anchor_mins) 111 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 112 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 113 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 114 | box_area = wh[..., 0] * wh[..., 1] 115 | anchor_area = anchors[..., 0] * anchors[..., 1] 116 | iou = intersect_area / (box_area + anchor_area - intersect_area) 117 | # 维度是(n) 感谢 消尽不死鸟 的提醒 118 | best_anchor = np.argmax(iou, axis=-1) 119 | 120 | for t, n in enumerate(best_anchor): 121 | for l in range(num_layers): 122 | if n in anchor_mask[l]: 123 | # floor用于向下取整 124 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') 125 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') 126 | # 找到真实框在特征层l中第b副图像对应的位置 127 | k = anchor_mask[l].index(n) 128 | c = true_boxes[b,t, 4].astype('int32') 129 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] 130 | y_true[l][b, j, i, k, 4] = 1 131 | y_true[l][b, j, i, k, 5+c] = 1 132 | 133 | return y_true 134 | 135 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 136 | for gpu in gpus: 137 | tf.config.experimental.set_memory_growth(gpu, True) 138 | 139 | 140 | #----------------------------------------------------# 141 | # 检测精度mAP和pr曲线计算参考视频 142 | # https://www.bilibili.com/video/BV1zE411u7Vw 143 | #----------------------------------------------------# 144 | if __name__ == "__main__": 145 | # 标签的位置 146 | annotation_path = '2007_train.txt' 147 | # 获取classes和anchor的位置 148 | classes_path = 'model_data/new_class.txt' 149 | anchors_path = 'model_data/yolo_anchors.txt' 150 | # 预训练模型的位置 151 | weights_path = 'logs_1/last1.h5' 152 | # 获得classes和anchor 153 | class_names = get_classes(classes_path) 154 | anchors = get_anchors(anchors_path) 155 | # 一共有多少类 156 | num_classes = len(class_names) 157 | num_anchors = len(anchors) 158 | # 训练后的模型保存的位置 159 | log_dir = 'logs_2/' 160 | #----------------------------------------------# 161 | # 输入的shape大小 162 | # 显存比较小可以使用416x416 163 | # 现存比较大可以使用608x608 164 | #----------------------------------------------# 165 | input_shape = (320,320) 166 | mosaic = False 167 | Cosine_scheduler = False 168 | label_smoothing = 0 169 | 170 | # 清除session 171 | K.clear_session() 172 | 173 | # 输入的图像为 174 | image_input = Input(shape=(None, None, 3)) 175 | h, w = input_shape 176 | 177 | # 创建yolo模型 178 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 179 | model_body = yolo_body(image_input, num_anchors//2, num_classes) 180 | 181 | model_body.summary() 182 | 183 | if not os.path.exists(log_dir): 184 | os.makedirs(log_dir) 185 | json_config = model_body.to_json() 186 | with open(log_dir + 'model_config.json', 'w') as json_file: 187 | json_file.write(json_config) 188 | #-------------------------------------------# 189 | # 权值文件的下载请看README 190 | #-------------------------------------------# 191 | print('Load weights {}.'.format(weights_path)) 192 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 193 | 194 | # y_true为13,13,3,85 195 | # 26,26,3,85 196 | y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)] 197 | 198 | # 输入为*model_body.input, *y_true 199 | # 输出为model_loss 200 | loss_input = [*model_body.output, *y_true] 201 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 202 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input) 203 | 204 | model = Model([model_body.input, *y_true], model_loss) 205 | 206 | # 训练参数设置 207 | logging = TensorBoard(log_dir=log_dir) 208 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 209 | monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) 210 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 211 | 212 | # 0.1用于验证,0.9用于训练 213 | val_split = 0.1 214 | with open(annotation_path) as f: 215 | lines = f.readlines() 216 | np.random.seed(10101) 217 | np.random.shuffle(lines) 218 | np.random.seed(None) 219 | num_val = int(len(lines)*val_split) 220 | num_train = len(lines) - num_val 221 | 222 | freeze_layers = 60 223 | for i in range(freeze_layers): model_body.layers[i].trainable = False 224 | print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) 225 | 226 | #------------------------------------------------------# 227 | # 主干特征提取网络特征通用,冻结训练可以加快训练速度 228 | # 也可以在训练初期防止权值被破坏。 229 | # Init_Epoch为起始世代 230 | # Freeze_Epoch为冻结训练的世代 231 | # Epoch总训练世代 232 | # 提示OOM或者显存不足请调小Batch_size 233 | #------------------------------------------------------# 234 | if True: 235 | Init_epoch = 0 236 | Freeze_epoch = 1 237 | # batch_size大小,每次喂入多少数据 238 | batch_size = 16 239 | # 最大学习率 240 | learning_rate_base = 1e-3 241 | if Cosine_scheduler: 242 | # 预热期 243 | warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2) 244 | # 总共的步长 245 | total_steps = int((Freeze_epoch-Init_epoch) * num_train / batch_size) 246 | # 预热步长 247 | warmup_steps = int(warmup_epoch * num_train / batch_size) 248 | # 学习率 249 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 250 | total_steps=total_steps, 251 | warmup_learning_rate=1e-4, 252 | warmup_steps=warmup_steps, 253 | hold_base_rate_steps=num_train, 254 | min_learn_rate=1e-6 255 | ) 256 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 257 | else: 258 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 259 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 260 | 261 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 262 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 263 | steps_per_epoch=max(1, num_train//batch_size), 264 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), 265 | validation_steps=max(1, num_val//batch_size), 266 | epochs=Freeze_epoch, 267 | initial_epoch=Init_epoch, 268 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 269 | model.save_weights(log_dir + 'trained_weights_stage_1.h5') 270 | 271 | for i in range(freeze_layers): model_body.layers[i].trainable = True 272 | 273 | # 解冻后训练 274 | if True: 275 | Freeze_epoch = 1 276 | Epoch = 11 277 | # batch_size大小,每次喂入多少数据 278 | batch_size = 16 279 | 280 | # 最大学习率 281 | learning_rate_base = 1e-4 282 | if Cosine_scheduler: 283 | # 预热期 284 | warmup_epoch = int((Epoch-Freeze_epoch)*0.2) 285 | # 总共的步长 286 | total_steps = int((Epoch-Freeze_epoch) * num_train / batch_size) 287 | # 预热步长 288 | warmup_steps = int(warmup_epoch * num_train / batch_size) 289 | # 学习率 290 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 291 | total_steps=total_steps, 292 | warmup_learning_rate=1e-5, 293 | warmup_steps=warmup_steps, 294 | hold_base_rate_steps=num_train//2, 295 | min_learn_rate=1e-6 296 | ) 297 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 298 | else: 299 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 300 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 301 | 302 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 303 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 304 | steps_per_epoch=max(1, num_train//batch_size), 305 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), 306 | validation_steps=max(1, num_val//batch_size), 307 | epochs=Epoch, 308 | initial_epoch=Freeze_epoch, 309 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 310 | model.save_weights(log_dir + 'last1.h5') 311 | #json_config = model.to_json() 312 | #with open(log_dir + 'model_config.json', 'w') as json_file: 313 | # json_file.write(json_config) 314 | 315 | -------------------------------------------------------------------------------- /yolo_fastest_tensorflow2/train.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.keras.backend as K 6 | from tensorflow.keras.layers import Input, Lambda 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.optimizers import Adam 9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping 10 | from nets.yolo_fastest import yolo_body 11 | from nets.loss import yolo_loss 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint 13 | import os 14 | 15 | 16 | #---------------------------------------------------# 17 | # 获得类和先验框 18 | #---------------------------------------------------# 19 | def get_classes(classes_path): 20 | '''loads the classes''' 21 | with open(classes_path) as f: 22 | class_names = f.readlines() 23 | class_names = [c.strip() for c in class_names] 24 | return class_names 25 | 26 | def get_anchors(anchors_path): 27 | '''loads the anchors from a file''' 28 | with open(anchors_path) as f: 29 | anchors = f.readline() 30 | anchors = [float(x) for x in anchors.split(',')] 31 | return np.array(anchors).reshape(-1, 2) 32 | 33 | #---------------------------------------------------# 34 | # 训练数据生成器 35 | #---------------------------------------------------# 36 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False): 37 | '''data generator for fit_generator''' 38 | n = len(annotation_lines) 39 | i = 0 40 | flag = True 41 | while True: 42 | image_data = [] 43 | box_data = [] 44 | for b in range(batch_size): 45 | if i==0: 46 | np.random.shuffle(annotation_lines) 47 | if mosaic: 48 | if flag and (i+4) < n: 49 | image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape) 50 | i = (i+4) % n 51 | else: 52 | image, box = get_random_data(annotation_lines[i], input_shape) 53 | i = (i+1) % n 54 | flag = bool(1-flag) 55 | else: 56 | image, box = get_random_data(annotation_lines[i], input_shape) 57 | i = (i+1) % n 58 | image_data.append(image) 59 | box_data.append(box) 60 | image_data = np.array(image_data) 61 | box_data = np.array(box_data) 62 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 63 | yield [image_data, *y_true], np.zeros(batch_size) 64 | 65 | 66 | #---------------------------------------------------# 67 | # 读入xml文件,并输出y_true 68 | #---------------------------------------------------# 69 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 70 | assert (true_boxes[..., 4]0 99 | 100 | for b in range(m): 101 | # 对每一张图进行处理 102 | wh = boxes_wh[b, valid_mask[b]] 103 | if len(wh)==0: continue 104 | # [n,1,2] 105 | wh = np.expand_dims(wh, -2) 106 | box_maxes = wh / 2. 107 | box_mins = -box_maxes 108 | 109 | # 计算真实框和哪个先验框最契合 110 | intersect_mins = np.maximum(box_mins, anchor_mins) 111 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 112 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 113 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 114 | box_area = wh[..., 0] * wh[..., 1] 115 | anchor_area = anchors[..., 0] * anchors[..., 1] 116 | iou = intersect_area / (box_area + anchor_area - intersect_area) 117 | # 维度是(n) 感谢 消尽不死鸟 的提醒 118 | best_anchor = np.argmax(iou, axis=-1) 119 | 120 | for t, n in enumerate(best_anchor): 121 | for l in range(num_layers): 122 | if n in anchor_mask[l]: 123 | # floor用于向下取整 124 | i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') 125 | j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') 126 | # 找到真实框在特征层l中第b副图像对应的位置 127 | k = anchor_mask[l].index(n) 128 | c = true_boxes[b,t, 4].astype('int32') 129 | y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] 130 | y_true[l][b, j, i, k, 4] = 1 131 | y_true[l][b, j, i, k, 5+c] = 1 132 | 133 | return y_true 134 | 135 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 136 | for gpu in gpus: 137 | tf.config.experimental.set_memory_growth(gpu, True) 138 | 139 | 140 | #----------------------------------------------------# 141 | # 检测精度mAP和pr曲线计算参考视频 142 | # https://www.bilibili.com/video/BV1zE411u7Vw 143 | #----------------------------------------------------# 144 | if __name__ == "__main__": 145 | # 标签的位置 146 | annotation_path = '2007_train.txt' 147 | # 获取classes和anchor的位置 148 | classes_path = 'model_data/new_class.txt' 149 | anchors_path = 'model_data/yolo_anchors.txt' 150 | # 预训练模型的位置 151 | weights_path = 'logs_6/ep003-loss14.097-val_loss13.660.h5' 152 | # 获得classes和anchor 153 | class_names = get_classes(classes_path) 154 | anchors = get_anchors(anchors_path) 155 | # 一共有多少类 156 | num_classes = len(class_names) 157 | num_anchors = len(anchors) 158 | # 训练后的模型保存的位置 159 | log_dir = 'logs_7/' 160 | #----------------------------------------------# 161 | # 输入的shape大小 162 | # 显存比较小可以使用416x416 163 | # 现存比较大可以使用608x608 164 | #----------------------------------------------# 165 | input_shape = (320,320) 166 | mosaic = False 167 | Cosine_scheduler = False 168 | label_smoothing = 0 169 | 170 | # 清除session 171 | K.clear_session() 172 | 173 | # 输入的图像为 174 | image_input = Input(shape=(None, None, 3)) 175 | h, w = input_shape 176 | 177 | # 创建yolo模型 178 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 179 | model_body = yolo_body(image_input, num_anchors//2, num_classes) 180 | 181 | model_body.summary() 182 | 183 | if not os.path.exists(log_dir): 184 | os.makedirs(log_dir) 185 | json_config = model_body.to_json() 186 | with open(log_dir + 'model_config.json', 'w') as json_file: 187 | json_file.write(json_config) 188 | #-------------------------------------------# 189 | # 权值文件的下载请看README 190 | #-------------------------------------------# 191 | print('Load weights {}.'.format(weights_path)) 192 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 193 | 194 | # y_true为13,13,3,85 195 | # 26,26,3,85 196 | y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)] 197 | 198 | # 输入为*model_body.input, *y_true 199 | # 输出为model_loss 200 | loss_input = [*model_body.output, *y_true] 201 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 202 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input) 203 | 204 | model = Model([model_body.input, *y_true], model_loss) 205 | 206 | # 训练参数设置 207 | logging = TensorBoard(log_dir=log_dir) 208 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 209 | monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) 210 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 211 | 212 | # 0.1用于验证,0.9用于训练 213 | val_split = 0.1 214 | with open(annotation_path) as f: 215 | lines = f.readlines() 216 | np.random.seed(10101) 217 | np.random.shuffle(lines) 218 | np.random.seed(None) 219 | num_val = int(len(lines)*val_split) 220 | num_train = len(lines) - num_val 221 | 222 | freeze_layers = 60 223 | for i in range(freeze_layers): model_body.layers[i].trainable = False 224 | print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) 225 | 226 | #------------------------------------------------------# 227 | # 主干特征提取网络特征通用,冻结训练可以加快训练速度 228 | # 也可以在训练初期防止权值被破坏。 229 | # Init_Epoch为起始世代 230 | # Freeze_Epoch为冻结训练的世代 231 | # Epoch总训练世代 232 | # 提示OOM或者显存不足请调小Batch_size 233 | #------------------------------------------------------# 234 | if True: 235 | Init_epoch = 0 236 | Freeze_epoch = 0 237 | # batch_size大小,每次喂入多少数据 238 | batch_size = 32 239 | # 最大学习率 240 | learning_rate_base = 1e-3 241 | if Cosine_scheduler: 242 | # 预热期 243 | warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2) 244 | # 总共的步长 245 | total_steps = int((Freeze_epoch-Init_epoch) * num_train / batch_size) 246 | # 预热步长 247 | warmup_steps = int(warmup_epoch * num_train / batch_size) 248 | # 学习率 249 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 250 | total_steps=total_steps, 251 | warmup_learning_rate=1e-4, 252 | warmup_steps=warmup_steps, 253 | hold_base_rate_steps=num_train, 254 | min_learn_rate=1e-6 255 | ) 256 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 257 | else: 258 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 259 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 260 | 261 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 262 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 263 | steps_per_epoch=max(1, num_train//batch_size), 264 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), 265 | validation_steps=max(1, num_val//batch_size), 266 | epochs=Freeze_epoch, 267 | initial_epoch=Init_epoch, 268 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 269 | model.save_weights(log_dir + 'trained_weights_stage_1.h5') 270 | 271 | for i in range(freeze_layers): model_body.layers[i].trainable = True 272 | 273 | # 解冻后训练 274 | if True: 275 | Freeze_epoch = 0 276 | Epoch = 100 277 | # batch_size大小,每次喂入多少数据 278 | batch_size = 16 279 | 280 | # 最大学习率 281 | learning_rate_base = 1e-4 282 | if Cosine_scheduler: 283 | # 预热期 284 | warmup_epoch = int((Epoch-Freeze_epoch)*0.2) 285 | # 总共的步长 286 | total_steps = int((Epoch-Freeze_epoch) * num_train / batch_size) 287 | # 预热步长 288 | warmup_steps = int(warmup_epoch * num_train / batch_size) 289 | # 学习率 290 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 291 | total_steps=total_steps, 292 | warmup_learning_rate=1e-5, 293 | warmup_steps=warmup_steps, 294 | hold_base_rate_steps=num_train//2, 295 | min_learn_rate=1e-6 296 | ) 297 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 298 | else: 299 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 300 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 301 | 302 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 303 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 304 | steps_per_epoch=max(1, num_train//batch_size), 305 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), 306 | validation_steps=max(1, num_val//batch_size), 307 | epochs=Epoch, 308 | initial_epoch=Freeze_epoch, 309 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 310 | model.save_weights(log_dir + 'last1.h5') 311 | #json_config = model.to_json() 312 | #with open(log_dir + 'model_config.json', 'w') as json_file: 313 | # json_file.write(json_config) 314 | 315 | -------------------------------------------------------------------------------- /Model_pruning/compressor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import logging 5 | import tensorflow as tf 6 | #tf.compat.v1.disable_eager_execution() 7 | 8 | import numpy as np 9 | from . import default_layers 10 | from kerassurgeon import Surgeon 11 | tf.config.experimental_run_functions_eagerly(True) 12 | 13 | _logger = logging.getLogger(__name__) 14 | 15 | 16 | class LayerInfo: 17 | def __init__(self, keras_layer): 18 | self.keras_layer = keras_layer 19 | self.name = keras_layer.name 20 | self.type = default_layers.get_op_type(type(keras_layer)) 21 | self.weight_index = default_layers.get_weight_index(self.type) 22 | if self.weight_index is not None: 23 | self.weight = keras_layer.weights[self.weight_index] 24 | self._call = None 25 | 26 | class Compressor: 27 | """ 28 | Abstract base TensorFlow compressor 29 | """ 30 | 31 | def __init__(self, model, config_list): 32 | """ 33 | Record necessary info in class members 34 | 35 | Parameters 36 | ---------- 37 | model : keras model 38 | the model user wants to compress 39 | config_list : list 40 | the configurations that users specify for compression 41 | """ 42 | self.bound_model = model 43 | self.config_list = config_list 44 | self.modules_to_compress = [] 45 | self.modules_to_delete = [] 46 | 47 | def detect_modules_to_compress(self): 48 | """ 49 | detect all modules should be compressed, and save the result in `self.modules_to_compress`. 50 | 51 | The model will be instrumented and user should never edit it after calling this method. 52 | """ 53 | if self.modules_to_compress is not None: 54 | self.modules_to_compress = [] 55 | for keras_layer in self.bound_model.layers: 56 | layer = LayerInfo(keras_layer) 57 | config = self.select_config(layer) 58 | if (config is not None) and (layer.name == 'conv2d_50'): #Choose which layer to prune 59 | self.modules_to_compress.append((layer, config)) 60 | self.modules_to_delete.append((keras_layer, config)) 61 | return self.modules_to_compress, self.modules_to_delete 62 | 63 | def compress(self): 64 | """ 65 | Compress the model with algorithm implemented by subclass. 66 | 67 | The model will be instrumented and user should never edit it after calling this method. 68 | `self.modules_to_compress` records all the to-be-compressed layers 69 | """ 70 | modules_to_compress, ignore_ = self.detect_modules_to_compress() 71 | for layer, config in modules_to_compress: 72 | self._instrument_layer(layer, config) 73 | return self.bound_model 74 | 75 | def compress_model(self): 76 | """ 77 | Compress the model with algorithm implemented by subclass. 78 | 79 | The model will be instrumented and user should never edit it after calling this method. 80 | `self.modules_to_compress` records all the to-be-compressed layers 81 | """ 82 | ignore_, modules_to_compress = self.detect_modules_to_compress() 83 | for layer,config in modules_to_compress: 84 | layer_1 = LayerInfo(layer) 85 | self.bound_model = self.Prun_channel(layer_1, layer, config) 86 | #a_list = self.Prun_channel(layer_1, layer, config) 87 | return self.bound_model 88 | 89 | def compress_model_1(self, channels_p): 90 | """ 91 | Compress the model with algorithm implemented by subclass. 92 | 93 | The model will be instrumented and user should never edit it after calling this method. 94 | `self.modules_to_compress` records all the to-be-compressed layers 95 | """ 96 | ignore_, modules_to_compress = self.detect_modules_to_compress() 97 | for layer,config in modules_to_compress: 98 | layer_1 = LayerInfo(layer) 99 | #self.bound_model = self.Prun_channel(layer_1, layer, config) 100 | self.bound_model = self.Prun_channel_1(layer_1, layer, config, channels_p) 101 | return self.bound_model 102 | 103 | def get_modules_to_compress(self): 104 | """ 105 | To obtain all the to-be-compressed layers. 106 | 107 | Returns 108 | ------- 109 | self.modules_to_compress : list 110 | a list of the layers, each of which is a tuple (`layer`, `config`), 111 | `layer` is `LayerInfo`, `config` is a `dict` 112 | """ 113 | return self.modules_to_compress 114 | 115 | def select_config(self, layer): 116 | """ 117 | Find the configuration for `layer` by parsing `self.config_list` 118 | 119 | Parameters 120 | ---------- 121 | layer: LayerInfo 122 | one layer 123 | 124 | Returns 125 | ------- 126 | ret : config or None 127 | the retrieved configuration for this layer, if None, this layer should 128 | not be compressed 129 | """ 130 | ret = None 131 | if layer.type is None: 132 | return None 133 | for config in self.config_list: 134 | config = config.copy() 135 | config['op_types'] = self._expand_config_op_types(config) 136 | if layer.type not in config['op_types']: 137 | continue 138 | if config.get('op_names') and layer.name not in config['op_names']: 139 | continue 140 | ret = config 141 | if ret is None or ret.get('exclude'): 142 | return None 143 | return ret 144 | 145 | def update_epoch(self, epoch): 146 | """ 147 | If user want to update model every epoch, user can override this method. 148 | This method should be called at the beginning of each epoch 149 | 150 | Parameters 151 | ---------- 152 | epoch : num 153 | the current epoch number 154 | """ 155 | 156 | def step(self): 157 | """ 158 | If user want to update mask every step, user can override this method 159 | """ 160 | 161 | 162 | def _instrument_layer(self, layer, config): 163 | """ 164 | This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer` 165 | 166 | Parameters 167 | ---------- 168 | layer : LayerInfo 169 | the layer to instrument the compression operation 170 | config : dict 171 | the configuration for compressing this layer 172 | """ 173 | raise NotImplementedError() 174 | 175 | def Prun_channel(self, layer, config): 176 | """ 177 | This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer` 178 | 179 | Parameters 180 | ---------- 181 | layer : LayerInfo 182 | the layer to instrument the compression operation 183 | config : dict 184 | the configuration for compressing this layer 185 | """ 186 | raise NotImplementedError() 187 | 188 | def Prun_channel_1(self, layer, config, channels_p): 189 | """ 190 | This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer` 191 | 192 | Parameters 193 | ---------- 194 | layer : LayerInfo 195 | the layer to instrument the compression operation 196 | config : dict 197 | the configuration for compressing this layer 198 | """ 199 | raise NotImplementedError() 200 | 201 | def _expand_config_op_types(self, config): 202 | if config is None: 203 | return [] 204 | op_types = [] 205 | 206 | for op_type in config.get('op_types', []): 207 | if op_type == 'default': 208 | op_types.extend(default_layers.default_layers) 209 | else: 210 | op_types.append(op_type) 211 | return op_types 212 | 213 | 214 | class Pruner(Compressor): 215 | """ 216 | Abstract base TensorFlow pruner 217 | """ 218 | 219 | def calc_mask(self, layer, config): 220 | """ 221 | Pruners should overload this method to provide mask for weight tensors. 222 | The mask must have the same shape and type comparing to the weight. 223 | It will be applied with `mul()` operation on the weight. 224 | This method is effectively hooked to `forward()` method of the model. 225 | 226 | Parameters 227 | ---------- 228 | layer : LayerInfo 229 | calculate mask for `layer`'s weight 230 | config : dict 231 | the configuration for generating the mask 232 | """ 233 | raise NotImplementedError("Pruners must overload calc_mask()") 234 | 235 | def _instrument_layer(self, layer, config): 236 | """ 237 | Create a wrapper forward function to replace the original one. 238 | 239 | Parameters 240 | ---------- 241 | layer : LayerInfo 242 | the layer to instrument the mask 243 | config : dict 244 | the configuration for generating the mask 245 | """ 246 | layer._call = layer.keras_layer.call 247 | 248 | def new_call(*inputs): 249 | weights = [x.numpy() for x in layer.keras_layer.weights] 250 | mask = self.calc_mask(layer, config) 251 | weights[layer.weight_index] = weights[layer.weight_index] * mask 252 | layer.keras_layer.set_weights(weights) 253 | ret = layer._call(*inputs) 254 | return ret 255 | 256 | layer.keras_layer.call = new_call 257 | 258 | def Prun_channel(self, layer, layer_1, config): 259 | weight = layer.weight 260 | op_type = layer.type 261 | op_name = layer.name 262 | assert 0 <= config.get('sparsity') < 1 263 | assert op_type in ['Conv1D', 'Conv2D'] 264 | assert op_type in config['op_types'] 265 | 266 | # op_name = layer.name 267 | # assert 0 <= config.get('sparsity') < 1 268 | # assert op_type in ['Conv1D', 'Conv2D'] 269 | # assert op_type in config['op_types'] 270 | 271 | if layer.name in self.epoch_pruned_layers: 272 | assert layer.name in self.mask_dict 273 | return self.mask_dict.get(layer.name) 274 | 275 | try: 276 | w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0])) 277 | masks = np.ones(w.shape) 278 | num_filters = w.shape[0] 279 | num_prune = int(num_filters * config.get('sparsity')) 280 | if num_filters < 2 or num_prune < 1: 281 | return masks 282 | min_gm_idx = self._get_min_gm_kernel_idx_m(w, num_prune) 283 | 284 | surgeon = Surgeon(self.bound_model, copy=False) 285 | channels = min_gm_idx 286 | surgeon.add_job('delete_channels', layer_1, channels=channels) 287 | 288 | #for idx in min_gm_idx: 289 | # masks[idx] = 0. 290 | finally: 291 | masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape) 292 | masks = tf.Variable(masks) 293 | self.mask_dict.update({op_name: masks}) 294 | self.epoch_pruned_layers.add(layer.name) 295 | 296 | return surgeon.operate() 297 | #return min_gm_idx 298 | 299 | 300 | def Prun_channel_1(self, layer, layer_1, config, channels_p): 301 | weight = layer.weight 302 | op_type = layer.type 303 | op_name = layer.name 304 | assert 0 <= config.get('sparsity') < 1 305 | assert op_type in ['Conv1D', 'Conv2D'] 306 | assert op_type in config['op_types'] 307 | 308 | # op_name = layer.name 309 | # assert 0 <= config.get('sparsity') < 1 310 | # assert op_type in ['Conv1D', 'Conv2D'] 311 | # assert op_type in config['op_types'] 312 | 313 | #if layer.name in self.epoch_pruned_layers: 314 | # assert layer.name in self.mask_dict 315 | # return self.mask_dict.get(layer.name) 316 | 317 | try: 318 | w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0])) 319 | masks = np.ones(w.shape) 320 | num_filters = w.shape[0] 321 | num_prune = int(num_filters * config.get('sparsity')) 322 | if num_filters < 2 or num_prune < 1: 323 | return masks 324 | #min_gm_idx = self._get_min_gm_kernel_idx_m(w, num_prune) 325 | 326 | surgeon = Surgeon(self.bound_model, copy=False) 327 | channels = channels_p 328 | surgeon.add_job('delete_channels', layer_1, channels=channels) 329 | 330 | #for idx in min_gm_idx: 331 | # masks[idx] = 0. 332 | finally: 333 | masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape) 334 | masks = tf.Variable(masks) 335 | self.mask_dict.update({op_name: masks}) 336 | self.epoch_pruned_layers.add(layer.name) 337 | 338 | return surgeon.operate() 339 | #return min_gm_idx 340 | 341 | 342 | def _get_min_gm_kernel_idx_m(self, weight, n): 343 | 344 | dist_list = [] 345 | sum_max = 0; 346 | for out_i in range(weight.shape[0]): 347 | dist_sum = self._get_distance_sum_m(weight, out_i) 348 | dist_list.append((dist_sum, out_i)) 349 | #dist_list.append(dist_sum) 350 | 351 | a=0 352 | min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n] 353 | 354 | #min_gm_kernels = dist_list[:n] 355 | #size_a = tf.size(dist_list) 356 | #min_gm_kernels = tf.sort(dist_list) 357 | return [x[1] for x in min_gm_kernels] 358 | #return dist_list 359 | 360 | def _get_distance_sum_m(self, weight, out_idx): 361 | anchor_w = tf.tile(tf.expand_dims(weight[out_idx], 0), [weight.shape[0], 1]) 362 | x = weight - anchor_w 363 | x = tf.math.reduce_sum((x*x), -1) 364 | x = tf.math.sqrt(x) 365 | return tf.math.reduce_sum(x) 366 | 367 | 368 | class Quantizer(Compressor): 369 | """ 370 | Abstract base TensorFlow quantizer 371 | """ 372 | 373 | def quantize_weight(self, weight, config, op, op_type, op_name): 374 | raise NotImplementedError("Quantizer must overload quantize_weight()") 375 | -------------------------------------------------------------------------------- /core/evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Xilinx Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # -------------------------------------------------------- 16 | # Code of "Evaluate classification or detection performance" 17 | # python version 18 | # Written by Lu Tian 19 | # -------------------------------------------------------- 20 | 21 | import argparse 22 | import numpy as np 23 | import os 24 | 25 | 26 | def compute_classification_accuracy(results, gts): 27 | """ 28 | Evaluate classification results 29 | :param results: predicted results 30 | :param gts: ground truth 31 | :return: accuracy 32 | """ 33 | num_label = len(gts[0].split(' ')) - 1 34 | image_label_gt = {} 35 | for gt in gts: 36 | gt_info = gt.split(' ') 37 | if len(gt_info) is not (num_label + 1): 38 | print ('label number does not match: ' + gt_info[0]) 39 | return 0 40 | image_label_gt[gt_info[0]] = np.array(gt_info[1:]) 41 | 42 | accuracy = np.zeros(num_label) 43 | count = 0 44 | image_names = set() 45 | for result in results: 46 | result_info = result.split(' ') 47 | if result_info[0] not in image_label_gt.keys(): 48 | print ('could not find ground truth of image: ' + result_info[0]) 49 | return 0 50 | if result_info[0] in image_names: 51 | print ('duplicate results of image: ' + result_info[0]) 52 | return 0 53 | if len(result_info) is not (num_label + 1): 54 | print ('wrong predict label number of image: ' + result_info[0]) 55 | return 0 56 | prediction = np.array(result_info[1:]) 57 | accuracy += prediction == image_label_gt[result_info[0]] 58 | count += 1 59 | image_names.add(result_info[0]) 60 | accuracy /= max(1, count) 61 | print ('evaluate ' + str(count) + ' images') 62 | return accuracy 63 | 64 | 65 | def voc_ap(rec, prec, use_07_metric=False): 66 | """ 67 | Compute VOC AP given precision and recall. 68 | :param rec: recall 69 | :param prec: precision 70 | :param use_07_metric: uses the VOC 07 11 point method to compute VOC AP given precision and recall 71 | :return: ap 72 | """ 73 | if use_07_metric: 74 | # 11 point metric 75 | ap = 0. 76 | for t in np.arange(0., 1.1, 0.1): 77 | if np.sum(rec >= t) == 0: 78 | p = 0 79 | else: 80 | p = np.max(prec[rec >= t]) 81 | ap = ap + p / 11. 82 | else: 83 | # correct AP calculation 84 | # first append sentinel values at the end 85 | mrec = np.concatenate(([0.], rec, [1.])) 86 | mpre = np.concatenate(([0.], prec, [0.])) 87 | 88 | # compute the precision envelope 89 | for i in range(mpre.size - 1, 0, -1): 90 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 91 | 92 | # to calculate area under PR curve, look for points 93 | # where X axis (recall) changes value 94 | i = np.where(mrec[1:] != mrec[:-1])[0] 95 | 96 | # and sum (\Delta recall) * prec 97 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 98 | return ap 99 | 100 | 101 | def compute_detection_ap(results, gts, thresh, overlap_thresh, use_07_metric=False): 102 | """ 103 | Evaluate detection results 104 | :param results: image_name class_label score xmin ymin xmax ymax 105 | :param gts: image_name class_label xmin ymin xmax ymax 106 | :param thresh: only bboxes whose confidence score under thresh are used 107 | :param overlap_thresh: threshold of IOU ratio to determine a match bbox 108 | :param use_07_metric: uses the VOC 07 11 point method to compute VOC AP given precision and recall 109 | :return: recall, precision, ap 110 | """ 111 | # load gt 112 | class_gts = {} 113 | class_num_positive = {} 114 | image_names = set() 115 | for gt in gts: 116 | gt_info = gt.split(' ') 117 | if len(gt_info) != 6 and len(gt_info) != 7: 118 | print('wrong ground truth info: ' + gt_info[0]) 119 | return 0 120 | image_name = gt_info[0] 121 | class_name = gt_info[1] 122 | bbox = [float(item) for item in gt_info[2:6]] 123 | if len(gt_info) == 6: 124 | difficult = False 125 | else: 126 | difficult = bool(int(gt_info[-1])) 127 | 128 | if class_name not in class_gts.keys(): 129 | class_gts[class_name] = {} 130 | class_num_positive[class_name] = 0 131 | if image_name not in class_gts[class_name].keys(): 132 | class_gts[class_name][image_name] = {'bbox': np.array([bbox]), 133 | 'hit': [False], 134 | 'difficult': [difficult]} 135 | else: 136 | class_gts[class_name][image_name]['bbox'] = np.vstack((class_gts[class_name][image_name]['bbox'], 137 | np.array(bbox))) 138 | class_gts[class_name][image_name]['hit'].append(False) 139 | class_gts[class_name][image_name]['difficult'].append(difficult) 140 | class_num_positive[class_name] += int(True ^ difficult) 141 | image_names.add(image_name) 142 | class_names = class_gts.keys() 143 | 144 | # read dets 145 | class_dets = {} 146 | for result in results: 147 | result_info = result.split(' ') 148 | if len(result_info) != 7: 149 | print ('wrong detections info: ' + result_info[0]) 150 | return 0 151 | image_name = result_info[0] 152 | class_name = result_info[1] 153 | bbox = [float(item) for item in result_info[2:]] 154 | if bbox[0] <= thresh: 155 | continue 156 | if class_name not in class_names: 157 | continue 158 | if class_name not in class_dets.keys(): 159 | class_dets[class_name] = {'images': [], 160 | 'scores': [], 161 | 'bboxes': []} 162 | class_dets[class_name]['images'].append(image_name) 163 | class_dets[class_name]['scores'].append(bbox[0]) 164 | class_dets[class_name]['bboxes'].append(bbox[1:]) 165 | 166 | ap = {} 167 | precision = {} 168 | recall = {} 169 | for class_name in class_names: 170 | if class_name not in class_dets.keys(): 171 | ap[class_name] = 0 172 | recall[class_name] = 0 173 | precision[class_name] = 0 174 | continue 175 | 176 | gt_images = class_gts[class_name] 177 | num_positive = class_num_positive[class_name] 178 | 179 | det_images = class_dets[class_name]['images'] 180 | det_scores = np.array(class_dets[class_name]['scores']) 181 | det_bboxes = np.array(class_dets[class_name]['bboxes']) 182 | 183 | # sort by confidence 184 | sorted_index = np.argsort(-det_scores) 185 | det_bboxes = det_bboxes[sorted_index, :] 186 | det_images = [det_images[x] for x in sorted_index] 187 | 188 | # go down dets and mark TPs and FPs 189 | num_dets = len(det_images) 190 | true_positive = np.zeros(num_dets) 191 | false_positive = np.zeros(num_dets) 192 | for idx in range(num_dets): 193 | if det_images[idx] not in gt_images.keys(): 194 | false_positive[idx] = 1 195 | continue 196 | 197 | gt_bboxes = gt_images[det_images[idx]]['bbox'].astype(float) 198 | gt_hit = gt_images[det_images[idx]]['hit'] 199 | git_difficult = gt_images[det_images[idx]]['difficult'] 200 | det_bbox = det_bboxes[idx, :].astype(float) 201 | overlaps_max = -np.inf 202 | 203 | if gt_bboxes.size > 0: 204 | # compute overlaps 205 | # intersection 206 | inter_xmin = np.maximum(gt_bboxes[:, 0], det_bbox[0]) 207 | inter_ymin = np.maximum(gt_bboxes[:, 1], det_bbox[1]) 208 | inter_xmax = np.minimum(gt_bboxes[:, 2], det_bbox[2]) 209 | inter_ymax = np.minimum(gt_bboxes[:, 3], det_bbox[3]) 210 | inter_width = np.maximum(inter_xmax - inter_xmin + 1., 0.) 211 | inter_height = np.maximum(inter_ymax - inter_ymin + 1., 0.) 212 | inters = inter_width * inter_height 213 | 214 | # union 215 | unions = ((det_bbox[2] - det_bbox[0] + 1.) * (det_bbox[3] - det_bbox[1] + 1.) + 216 | (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1.) * (gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1.) - inters) 217 | 218 | overlaps = inters / unions 219 | overlaps_max = np.max(overlaps) 220 | jmax = np.argmax(overlaps) 221 | 222 | if overlaps_max > overlap_thresh: 223 | if not git_difficult[jmax]: 224 | if not gt_hit[jmax]: 225 | true_positive[idx] = 1. 226 | gt_hit[jmax] = 1 227 | else: 228 | false_positive[idx] = 1. 229 | else: 230 | false_positive[idx] = 1. 231 | 232 | # compute precision recall 233 | false_positive = np.cumsum(false_positive) 234 | true_positive = np.cumsum(true_positive) 235 | recall[class_name] = true_positive / float(num_positive) 236 | precision[class_name] = true_positive / np.maximum(true_positive + false_positive, np.finfo(np.float64).eps) 237 | ap[class_name] = voc_ap(recall[class_name], precision[class_name], use_07_metric) 238 | print ('evaluate ' + str(len(image_names)) + ' images') 239 | return recall, precision, ap 240 | 241 | 242 | if __name__ == '__main__': 243 | parser = argparse.ArgumentParser(description='evaluate classification or detection performance') 244 | parser.add_argument('-mode', default='detection', 245 | help='mode, detection or classification, default detection') 246 | parser.add_argument('-result_file', default='', 247 | help="""Result file in space-separated text format. 248 | For classification, each row is: image_id label [label ...]. 249 | For detection, each row is: image_id label score xmin ymin xmax ymax [difficult_bool].""") 250 | parser.add_argument('-gt_file', default='', 251 | help="""Ground truth file in space-separated text format. 252 | For classification, each row is: image_id label [label ...]. 253 | For detection, each row is: image_id label xmin ymin xmax ymax.""") 254 | parser.add_argument('-detection_metric', default='map', 255 | help="""Evaluation metric for detection, default map. 256 | Options are map (mean average precision), precision (given recall), recall (given precision), 257 | pr (precision and recall given threshold of confidence score).""") 258 | parser.add_argument('-detection_iou', default='0.5', 259 | help="""Threshold of IOU ratio to 260 | determine a match bbox.""") 261 | parser.add_argument('-detection_thresh', default='0.005', 262 | help="""Threshold of confidence score for calculating evaluation metric, default 0.05. 263 | For metric = pr, detection_thresh should be the confidence score to determine a positive bbox. 264 | For other detection metrics, detection_thresh should be a very small value.""") 265 | parser.add_argument('-detection_fix_recall', default='0.8', 266 | help="""Used when detection_metric is precision, default 0.8.""") 267 | parser.add_argument('-detection_fix_precision', default='0.8', 268 | help="""Used when detection_metric is recall, default 0.8.""") 269 | parser.add_argument('-detection_use_07_metric', default='False', 270 | help="""Uses the VOC 07 11 point method to compute VOC AP given precision and recall.""") 271 | 272 | args = parser.parse_args() 273 | 274 | results_file = open(args.result_file, 'r') 275 | #fix for pytyhon3 JimH 276 | #results_lines = filter(None, [item.strip() for item in results_file.readlines()]) 277 | results_lines = list(filter(None, [item.strip() for item in results_file.readlines()])) 278 | gts_file = open(args.gt_file, 'r') 279 | #fix for python3 Jimh 280 | #gts_lines = filter(None, [item.strip() for item in gts_file.readlines()]) 281 | gts_lines = list(filter(None, [item.strip() for item in gts_file.readlines()])) 282 | if len(gts_lines) < 1: 283 | print ('ground truth file is empty!') 284 | if len(results_lines) < 1: 285 | print ('result file is empty!') 286 | 287 | if args.mode == 'classification': 288 | accuracy = compute_classification_accuracy(results_lines, gts_lines) 289 | print ('classification accuracy of each class: ' + str(accuracy)) 290 | print ('mean classification accuracy: ' + str(np.mean(accuracy))) 291 | elif args.mode == 'detection': 292 | detection_thresh = float(args.detection_thresh) 293 | detection_iou = float(args.detection_iou) 294 | use_07_metric = False 295 | if args.detection_use_07_metric == 'True': 296 | use_07_metric = True 297 | recall, precision, ap = compute_detection_ap(results_lines, gts_lines, detection_thresh, detection_iou, 298 | use_07_metric) 299 | if args.detection_metric == 'map': 300 | for class_name in ap.keys(): 301 | print (class_name + ' AP: ' + str(ap[class_name])) 302 | print ('mAP: ' + str((float(sum(ap.values()))) / max(1, len(ap)))) 303 | elif args.detection_metric == 'precision': 304 | fix_recall = float(args.detection_fix_recall) 305 | for class_name in ap.keys(): 306 | if np.sum(recall[class_name] >= fix_recall) == 0: 307 | output_precision = 0 308 | else: 309 | output_precision = np.max(precision[class_name][recall[class_name] >= fix_recall]) 310 | print (class_name + ', set recall is ' + str(fix_recall) + ', precision: ' + str(output_precision)) 311 | elif args.detection_metric == 'recall': 312 | fix_precision = float(args.detection_fix_precision) 313 | for class_name in ap.keys(): 314 | if np.sum(precision[class_name] >= fix_precision) == 0: 315 | output_recall = 0 316 | else: 317 | output_recall = np.max(recall[class_name][precision[class_name] >= fix_precision]) 318 | print (class_name + ', set precision is ' + str(fix_precision) + ', recall: ' + str(output_recall)) 319 | elif args.detection_metric == 'pr': 320 | for class_name in ap.keys(): 321 | if len(recall[class_name]) > 0: 322 | output_recall = recall[class_name][-1] 323 | else: 324 | output_recall = 0 325 | if np.sum(recall[class_name] >= output_recall) == 0: 326 | output_precision = 0 327 | else: 328 | output_precision = np.max(precision[class_name][recall[class_name] >= output_recall]) 329 | print (class_name + ', set confidence score is ' + str(detection_thresh) + \ 330 | ', precision: ' + str(output_precision) + ', recall: ' + str(output_recall)) 331 | else: 332 | print ('wrong evaluation metric!') 333 | results_file.close() 334 | gts_file.close() 335 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | from PIL import Image 4 | from functools import reduce 5 | from tensorflow import keras 6 | from tensorflow.keras import backend as K 7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb 8 | import cv2 9 | def compose(*funcs): 10 | if funcs: 11 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 12 | else: 13 | raise ValueError('Composition of empty sequence not supported.') 14 | 15 | def letterbox_image(image, size): 16 | iw, ih = image.size 17 | w, h = size 18 | scale = min(w/iw, h/ih) 19 | nw = int(iw*scale) 20 | nh = int(ih*scale) 21 | 22 | image = image.resize((nw,nh), Image.BICUBIC) 23 | new_image = Image.new('RGB', size, (128,128,128)) 24 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 25 | return new_image 26 | 27 | def rand(a=0, b=1): 28 | return np.random.rand()*(b-a) + a 29 | 30 | def merge_bboxes(bboxes, cutx, cuty): 31 | merge_bbox = [] 32 | for i in range(len(bboxes)): 33 | for box in bboxes[i]: 34 | tmp_box = [] 35 | x1,y1,x2,y2 = box[0], box[1], box[2], box[3] 36 | 37 | if i == 0: 38 | if y1 > cuty or x1 > cutx: 39 | continue 40 | if y2 >= cuty and y1 <= cuty: 41 | y2 = cuty 42 | if y2-y1 < 5: 43 | continue 44 | if x2 >= cutx and x1 <= cutx: 45 | x2 = cutx 46 | if x2-x1 < 5: 47 | continue 48 | 49 | if i == 1: 50 | if y2 < cuty or x1 > cutx: 51 | continue 52 | 53 | if y2 >= cuty and y1 <= cuty: 54 | y1 = cuty 55 | if y2-y1 < 5: 56 | continue 57 | 58 | if x2 >= cutx and x1 <= cutx: 59 | x2 = cutx 60 | if x2-x1 < 5: 61 | continue 62 | 63 | if i == 2: 64 | if y2 < cuty or x2 < cutx: 65 | continue 66 | 67 | if y2 >= cuty and y1 <= cuty: 68 | y1 = cuty 69 | if y2-y1 < 5: 70 | continue 71 | 72 | if x2 >= cutx and x1 <= cutx: 73 | x1 = cutx 74 | if x2-x1 < 5: 75 | continue 76 | 77 | if i == 3: 78 | if y1 > cuty or x2 < cutx: 79 | continue 80 | 81 | if y2 >= cuty and y1 <= cuty: 82 | y2 = cuty 83 | if y2-y1 < 5: 84 | continue 85 | 86 | if x2 >= cutx and x1 <= cutx: 87 | x1 = cutx 88 | if x2-x1 < 5: 89 | continue 90 | 91 | tmp_box.append(x1) 92 | tmp_box.append(y1) 93 | tmp_box.append(x2) 94 | tmp_box.append(y2) 95 | tmp_box.append(box[-1]) 96 | merge_bbox.append(tmp_box) 97 | return merge_bbox 98 | 99 | def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5): 100 | '''random preprocessing for real-time data augmentation''' 101 | h, w = input_shape 102 | min_offset_x = 0.4 103 | min_offset_y = 0.4 104 | scale_low = 1-min(min_offset_x,min_offset_y) 105 | scale_high = scale_low+0.2 106 | 107 | image_datas = [] 108 | box_datas = [] 109 | index = 0 110 | 111 | place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)] 112 | place_y = [0,int(h*min_offset_y),int(h*min_offset_y),0] 113 | for line in annotation_line: 114 | # 每一行进行分割 115 | line_content = line.split() 116 | # 打开图片 117 | image = Image.open(line_content[0]) 118 | image = image.convert("RGB") 119 | # 图片的大小 120 | iw, ih = image.size 121 | # 保存框的位置 122 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) 123 | 124 | # 是否翻转图片 125 | flip = rand()<.5 126 | if flip and len(box)>0: 127 | image = image.transpose(Image.FLIP_LEFT_RIGHT) 128 | box[:, [0,2]] = iw - box[:, [2,0]] 129 | 130 | # 对输入进来的图片进行缩放 131 | new_ar = w/h 132 | scale = rand(scale_low, scale_high) 133 | if new_ar < 1: 134 | nh = int(scale*h) 135 | nw = int(nh*new_ar) 136 | else: 137 | nw = int(scale*w) 138 | nh = int(nw/new_ar) 139 | image = image.resize((nw,nh), Image.BICUBIC) 140 | 141 | # 进行色域变换 142 | hue = rand(-hue, hue) 143 | sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) 144 | val = rand(1, val) if rand()<.5 else 1/rand(1, val) 145 | x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV) 146 | x[..., 0] += hue*360 147 | x[..., 0][x[..., 0]>1] -= 1 148 | x[..., 0][x[..., 0]<0] += 1 149 | x[..., 1] *= sat 150 | x[..., 2] *= val 151 | x[x[:,:, 0]>360, 0] = 360 152 | x[:, :, 1:][x[:, :, 1:]>1] = 1 153 | x[x<0] = 0 154 | image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1 155 | 156 | image = Image.fromarray((image*255).astype(np.uint8)) 157 | # 将图片进行放置,分别对应四张分割图片的位置 158 | dx = place_x[index] 159 | dy = place_y[index] 160 | new_image = Image.new('RGB', (w,h), (128,128,128)) 161 | new_image.paste(image, (dx, dy)) 162 | image_data = np.array(new_image)/255 163 | 164 | 165 | index = index + 1 166 | box_data = [] 167 | # 对box进行重新处理 168 | if len(box)>0: 169 | np.random.shuffle(box) 170 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 171 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 172 | box[:, 0:2][box[:, 0:2]<0] = 0 173 | box[:, 2][box[:, 2]>w] = w 174 | box[:, 3][box[:, 3]>h] = h 175 | box_w = box[:, 2] - box[:, 0] 176 | box_h = box[:, 3] - box[:, 1] 177 | box = box[np.logical_and(box_w>1, box_h>1)] 178 | box_data = np.zeros((len(box),5)) 179 | box_data[:len(box)] = box 180 | 181 | image_datas.append(image_data) 182 | box_datas.append(box_data) 183 | 184 | # 将图片分割,放在一起 185 | cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x))) 186 | cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y))) 187 | 188 | new_image = np.zeros([h,w,3]) 189 | new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] 190 | new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] 191 | new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] 192 | new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] 193 | 194 | # 对框进行进一步的处理 195 | new_boxes = merge_bboxes(box_datas, cutx, cuty) 196 | 197 | # 将box进行调整 198 | box_data = np.zeros((max_boxes,5)) 199 | if len(new_boxes)>0: 200 | if len(new_boxes)>max_boxes: new_boxes = new_boxes[:max_boxes] 201 | box_data[:len(new_boxes)] = new_boxes 202 | return new_image, box_data 203 | 204 | 205 | def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5): 206 | '''random preprocessing for real-time data augmentation''' 207 | line = annotation_line.split() 208 | image = Image.open(line[0]) 209 | iw, ih = image.size 210 | h, w = input_shape 211 | box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) 212 | 213 | # 对图像进行缩放并且进行长和宽的扭曲 214 | new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter) 215 | scale = rand(.25,2) 216 | if new_ar < 1: 217 | nh = int(scale*h) 218 | nw = int(nh*new_ar) 219 | else: 220 | nw = int(scale*w) 221 | nh = int(nw/new_ar) 222 | image = image.resize((nw,nh), Image.BICUBIC) 223 | 224 | # 将图像多余的部分加上灰条 225 | dx = int(rand(0, w-nw)) 226 | dy = int(rand(0, h-nh)) 227 | new_image = Image.new('RGB', (w,h), (128,128,128)) 228 | new_image.paste(image, (dx, dy)) 229 | image = new_image 230 | 231 | # 翻转图像 232 | flip = rand()<.5 233 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) 234 | 235 | # 色域扭曲 236 | hue = rand(-hue, hue) 237 | sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat) 238 | val = rand(1, val) if rand()<.5 else 1/rand(1, val) 239 | x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV) 240 | x[..., 0] += hue*360 241 | x[..., 0][x[..., 0]>1] -= 1 242 | x[..., 0][x[..., 0]<0] += 1 243 | x[..., 1] *= sat 244 | x[..., 2] *= val 245 | x[x[:,:, 0]>360, 0] = 360 246 | x[:, :, 1:][x[:, :, 1:]>1] = 1 247 | x[x<0] = 0 248 | image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1 249 | 250 | # 将box进行调整 251 | box_data = np.zeros((max_boxes,5)) 252 | if len(box)>0: 253 | np.random.shuffle(box) 254 | box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx 255 | box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy 256 | if flip: box[:, [0,2]] = w - box[:, [2,0]] 257 | box[:, 0:2][box[:, 0:2]<0] = 0 258 | box[:, 2][box[:, 2]>w] = w 259 | box[:, 3][box[:, 3]>h] = h 260 | box_w = box[:, 2] - box[:, 0] 261 | box_h = box[:, 3] - box[:, 1] 262 | box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box 263 | if len(box)>max_boxes: box = box[:max_boxes] 264 | box_data[:len(box)] = box 265 | 266 | return image_data, box_data 267 | 268 | 269 | def cosine_decay_with_warmup(global_step, 270 | learning_rate_base, 271 | total_steps, 272 | warmup_learning_rate=0.0, 273 | warmup_steps=0, 274 | hold_base_rate_steps=0, 275 | min_learn_rate=0, 276 | ): 277 | """ 278 | 参数: 279 | global_step: 上面定义的Tcur,记录当前执行的步数。 280 | learning_rate_base:预先设置的学习率,当warm_up阶段学习率增加到learning_rate_base,就开始学习率下降。 281 | total_steps: 是总的训练的步数,等于epoch*sample_count/batch_size,(sample_count是样本总数,epoch是总的循环次数) 282 | warmup_learning_rate: 这是warm up阶段线性增长的初始值 283 | warmup_steps: warm_up总的需要持续的步数 284 | hold_base_rate_steps: 这是可选的参数,即当warm up阶段结束后保持学习率不变,知道hold_base_rate_steps结束后才开始学习率下降 285 | """ 286 | if total_steps < warmup_steps: 287 | raise ValueError('total_steps must be larger or equal to ' 288 | 'warmup_steps.') 289 | #这里实现了余弦退火的原理,设置学习率的最小值为0,所以简化了表达式 290 | learning_rate = 0.5 * learning_rate_base * (1 + np.cos(np.pi * 291 | (global_step - warmup_steps - hold_base_rate_steps) / float(total_steps - warmup_steps - hold_base_rate_steps))) 292 | #如果hold_base_rate_steps大于0,表明在warm up结束后学习率在一定步数内保持不变 293 | if hold_base_rate_steps > 0: 294 | learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps, 295 | learning_rate, learning_rate_base) 296 | if warmup_steps > 0: 297 | if learning_rate_base < warmup_learning_rate: 298 | raise ValueError('learning_rate_base must be larger or equal to ' 299 | 'warmup_learning_rate.') 300 | #线性增长的实现 301 | slope = (learning_rate_base - warmup_learning_rate) / warmup_steps 302 | warmup_rate = slope * global_step + warmup_learning_rate 303 | #只有当global_step 仍然处于warm up阶段才会使用线性增长的学习率warmup_rate,否则使用余弦退火的学习率learning_rate 304 | learning_rate = np.where(global_step < warmup_steps, warmup_rate, 305 | learning_rate) 306 | 307 | learning_rate = max(learning_rate,min_learn_rate) 308 | return learning_rate 309 | 310 | 311 | class WarmUpCosineDecayScheduler(keras.callbacks.Callback): 312 | """ 313 | 继承Callback,实现对学习率的调度 314 | """ 315 | def __init__(self, 316 | learning_rate_base, 317 | total_steps, 318 | global_step_init=0, 319 | warmup_learning_rate=0.0, 320 | warmup_steps=0, 321 | hold_base_rate_steps=0, 322 | min_learn_rate=0, 323 | # interval_epoch代表余弦退火之间的最低点 324 | interval_epoch=[0.05, 0.15, 0.30, 0.50], 325 | verbose=0): 326 | super(WarmUpCosineDecayScheduler, self).__init__() 327 | # 基础的学习率 328 | self.learning_rate_base = learning_rate_base 329 | # 热调整参数 330 | self.warmup_learning_rate = warmup_learning_rate 331 | # 参数显示 332 | self.verbose = verbose 333 | # learning_rates用于记录每次更新后的学习率,方便图形化观察 334 | self.min_learn_rate = min_learn_rate 335 | self.learning_rates = [] 336 | 337 | self.interval_epoch = interval_epoch 338 | # 贯穿全局的步长 339 | self.global_step_for_interval = global_step_init 340 | # 用于上升的总步长 341 | self.warmup_steps_for_interval = warmup_steps 342 | # 保持最高峰的总步长 343 | self.hold_steps_for_interval = hold_base_rate_steps 344 | # 整个训练的总步长 345 | self.total_steps_for_interval = total_steps 346 | 347 | self.interval_index = 0 348 | # 计算出来两个最低点的间隔 349 | self.interval_reset = [self.interval_epoch[0]] 350 | for i in range(len(self.interval_epoch)-1): 351 | self.interval_reset.append(self.interval_epoch[i+1]-self.interval_epoch[i]) 352 | self.interval_reset.append(1-self.interval_epoch[-1]) 353 | 354 | #更新global_step,并记录当前学习率 355 | def on_batch_end(self, batch, logs=None): 356 | self.global_step = self.global_step + 1 357 | self.global_step_for_interval = self.global_step_for_interval + 1 358 | lr = K.get_value(self.model.optimizer.lr) 359 | self.learning_rates.append(lr) 360 | 361 | #更新学习率 362 | def on_batch_begin(self, batch, logs=None): 363 | # 每到一次最低点就重新更新参数 364 | if self.global_step_for_interval in [0]+[int(i*self.total_steps_for_interval) for i in self.interval_epoch]: 365 | self.total_steps = self.total_steps_for_interval * self.interval_reset[self.interval_index] 366 | self.warmup_steps = self.warmup_steps_for_interval * self.interval_reset[self.interval_index] 367 | self.hold_base_rate_steps = self.hold_steps_for_interval * self.interval_reset[self.interval_index] 368 | self.global_step = 0 369 | self.interval_index += 1 370 | 371 | lr = cosine_decay_with_warmup(global_step=self.global_step, 372 | learning_rate_base=self.learning_rate_base, 373 | total_steps=self.total_steps, 374 | warmup_learning_rate=self.warmup_learning_rate, 375 | warmup_steps=self.warmup_steps, 376 | hold_base_rate_steps=self.hold_base_rate_steps, 377 | min_learn_rate = self.min_learn_rate) 378 | K.set_value(self.model.optimizer.lr, lr) 379 | if self.verbose > 0: 380 | print('\nBatch %05d: setting learning ' 381 | 'rate to %s.' % (self.global_step + 1, lr)) 382 | 383 | 384 | class ModelCheckpoint(keras.callbacks.Callback): 385 | def __init__(self, filepath, monitor='val_loss', verbose=0, 386 | save_best_only=False, save_weights_only=False, 387 | mode='auto', period=1): 388 | super(ModelCheckpoint, self).__init__() 389 | self.monitor = monitor 390 | self.verbose = verbose 391 | self.filepath = filepath 392 | self.save_best_only = save_best_only 393 | self.save_weights_only = save_weights_only 394 | self.period = period 395 | self.epochs_since_last_save = 0 396 | 397 | if mode not in ['auto', 'min', 'max']: 398 | warnings.warn('ModelCheckpoint mode %s is unknown, ' 399 | 'fallback to auto mode.' % (mode), 400 | RuntimeWarning) 401 | mode = 'auto' 402 | 403 | if mode == 'min': 404 | self.monitor_op = np.less 405 | self.best = np.Inf 406 | elif mode == 'max': 407 | self.monitor_op = np.greater 408 | self.best = -np.Inf 409 | else: 410 | if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): 411 | self.monitor_op = np.greater 412 | self.best = -np.Inf 413 | else: 414 | self.monitor_op = np.less 415 | self.best = np.Inf 416 | 417 | def on_epoch_end(self, epoch, logs=None): 418 | logs = logs or {} 419 | self.epochs_since_last_save += 1 420 | if self.epochs_since_last_save >= self.period: 421 | self.epochs_since_last_save = 0 422 | filepath = self.filepath.format(epoch=epoch + 1, **logs) 423 | if self.save_best_only: 424 | current = logs.get(self.monitor) 425 | if current is None: 426 | warnings.warn('Can save best model only with %s available, ' 427 | 'skipping.' % (self.monitor), RuntimeWarning) 428 | else: 429 | if self.monitor_op(current, self.best): 430 | if self.verbose > 0: 431 | print('\nEpoch %05d: %s improved from %0.5f to %0.5f,' 432 | ' saving model to %s' 433 | % (epoch + 1, self.monitor, self.best, 434 | current, filepath)) 435 | self.best = current 436 | if self.save_weights_only: 437 | self.model.save_weights(filepath, overwrite=True) 438 | else: 439 | self.model.save(filepath, overwrite=True) 440 | else: 441 | if self.verbose > 0: 442 | print('\nEpoch %05d: %s did not improve' % 443 | (epoch + 1, self.monitor)) 444 | else: 445 | if self.verbose > 0: 446 | print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath)) 447 | if self.save_weights_only: 448 | self.model.save_weights(filepath, overwrite=True) 449 | else: 450 | self.model.save(filepath, overwrite=True) 451 | 452 | -------------------------------------------------------------------------------- /Model_pruning/train_purn.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensorflow.keras.backend as K 6 | from tensorflow.keras.layers import Input, Lambda 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.optimizers import Adam 9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping 10 | from nets.yolo_fastest import yolo_body 11 | from nets.loss import yolo_loss 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint 13 | import os 14 | 15 | from kerassurgeon.identify import get_apoz 16 | from kerassurgeon import Surgeon 17 | import math 18 | import pandas as pd 19 | from nni.compression.tensorflow import FPGMPruner 20 | tf.compat.v1.enable_eager_execution() 21 | 22 | from tensorflow.python.framework import ops 23 | from tensorflow.keras.models import load_model, model_from_json 24 | 25 | 26 | 27 | 28 | # ---------------------------------------------------# 29 | # 获得类和先验框 30 | # ---------------------------------------------------# 31 | def get_classes(classes_path): 32 | '''loads the classes''' 33 | with open(classes_path) as f: 34 | class_names = f.readlines() 35 | class_names = [c.strip() for c in class_names] 36 | return class_names 37 | 38 | 39 | def get_anchors(anchors_path): 40 | '''loads the anchors from a file''' 41 | with open(anchors_path) as f: 42 | anchors = f.readline() 43 | anchors = [float(x) for x in anchors.split(',')] 44 | return np.array(anchors).reshape(-1, 2) 45 | 46 | 47 | # ---------------------------------------------------# 48 | # 训练数据生成器 49 | # ---------------------------------------------------# 50 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False): 51 | '''data generator for fit_generator''' 52 | n = len(annotation_lines) 53 | i = 0 54 | flag = True 55 | while True: 56 | image_data = [] 57 | box_data = [] 58 | for b in range(batch_size): 59 | if i == 0: 60 | np.random.shuffle(annotation_lines) 61 | if mosaic: 62 | if flag and (i + 4) < n: 63 | image, box = get_random_data_with_Mosaic(annotation_lines[i:i + 4], input_shape) 64 | i = (i + 4) % n 65 | else: 66 | image, box = get_random_data(annotation_lines[i], input_shape) 67 | i = (i + 1) % n 68 | flag = bool(1 - flag) 69 | else: 70 | image, box = get_random_data(annotation_lines[i], input_shape) 71 | i = (i + 1) % n 72 | image_data.append(image) 73 | box_data.append(box) 74 | image_data = np.array(image_data) 75 | box_data = np.array(box_data) 76 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) 77 | yield [image_data, *y_true], np.zeros(batch_size) 78 | 79 | 80 | # ---------------------------------------------------# 81 | # 读入xml文件,并输出y_true 82 | # ---------------------------------------------------# 83 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 84 | assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes' 85 | # 一共有三个特征层数 86 | num_layers = len(anchors) // 3 87 | # 先验框 88 | anchor_mask: List[List[int]] = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [0, 1, 2]] 89 | 90 | true_boxes = np.array(true_boxes, dtype='float32') 91 | input_shape = np.array(input_shape, dtype='int32') # 416,416 92 | # 读出xy轴,读出长宽 93 | # 中心点(m,n,2) 94 | boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2 95 | boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2] 96 | # 计算比例 97 | true_boxes[..., 0:2] = boxes_xy / input_shape[::-1] 98 | true_boxes[..., 2:4] = boxes_wh / input_shape[::-1] 99 | 100 | # m张图 101 | m = true_boxes.shape[0] 102 | # 得到网格的shape为13,13;26,26; 103 | grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)] 104 | # y_true的格式为(m,13,13,3,85)(m,26,26,3,85) 105 | y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes), 106 | dtype='float32') for l in range(num_layers)] 107 | # [1,9,2] 108 | anchors = np.expand_dims(anchors, 0) 109 | anchor_maxes = anchors / 2. 110 | anchor_mins = -anchor_maxes 111 | # 长宽要大于0才有效 112 | valid_mask = boxes_wh[..., 0] > 0 113 | 114 | for b in range(m): 115 | # 对每一张图进行处理 116 | wh = boxes_wh[b, valid_mask[b]] 117 | if len(wh) == 0: continue 118 | # [n,1,2] 119 | wh = np.expand_dims(wh, -2) 120 | box_maxes = wh / 2. 121 | box_mins = -box_maxes 122 | 123 | # 计算真实框和哪个先验框最契合 124 | intersect_mins = np.maximum(box_mins, anchor_mins) 125 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 126 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 127 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 128 | box_area = wh[..., 0] * wh[..., 1] 129 | anchor_area = anchors[..., 0] * anchors[..., 1] 130 | iou = intersect_area / (box_area + anchor_area - intersect_area) 131 | # 维度是(n) 感谢 消尽不死鸟 的提醒 132 | best_anchor = np.argmax(iou, axis=-1) 133 | 134 | for t, n in enumerate(best_anchor): 135 | for l in range(num_layers): 136 | if n in anchor_mask[l]: 137 | # floor用于向下取整 138 | i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32') 139 | j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32') 140 | # 找到真实框在特征层l中第b副图像对应的位置 141 | k = anchor_mask[l].index(n) 142 | c = true_boxes[b, t, 4].astype('int32') 143 | y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4] 144 | y_true[l][b, j, i, k, 4] = 1 145 | y_true[l][b, j, i, k, 5 + c] = 1 146 | 147 | return y_true 148 | 149 | 150 | #-----------------------------------------------------# 151 | # purn 152 | #-----------------------------------------------------# 153 | 154 | def prune_model(model, apoz_df, n_channels_delete): 155 | # Identify 5% of channels with the highest APoZ in model 156 | sorted_apoz_df = apoz_df.sort_values('apoz', ascending=False) 157 | high_apoz_index = sorted_apoz_df.iloc[0:n_channels_delete, :] 158 | 159 | # Create the Surgeon and add a 'delete_channels' job for each layer 160 | # whose channels are to be deleted. 161 | surgeon = Surgeon(model, copy=True) 162 | for name in high_apoz_index.index.unique().values: 163 | channels = list(pd.Series(high_apoz_index.loc[name, 'index'], 164 | dtype=np.int64).values) 165 | surgeon.add_job('delete_channels', model.get_layer(name), 166 | channels=channels) 167 | # Delete channels 168 | return surgeon.operate() 169 | 170 | 171 | def get_total_channels(model): 172 | start = None 173 | end = None 174 | channels = 0 175 | for layer in model.layers[start:end]: 176 | if layer.__class__.__name__ == 'Conv2D': 177 | channels += layer.filters 178 | return channels 179 | 180 | 181 | def get_model_apoz(model, generator): 182 | # Get APoZ 183 | start = None 184 | end = None 185 | apoz = [] 186 | for layer in model.layers[start:end]: 187 | if layer.__class__.__name__ == 'Conv2D': 188 | print(layer.name) 189 | apoz.extend([(layer.name, i, value) for (i, value) 190 | in enumerate(get_apoz(model, layer, generator))]) 191 | 192 | layer_name, index, apoz_value = zip(*apoz) 193 | apoz_df = pd.DataFrame({'layer': layer_name, 'index': index, 194 | 'apoz': apoz_value}) 195 | apoz_df = apoz_df.set_index('layer') 196 | return apoz_df 197 | 198 | #--------------------------------------------------------------------# 199 | 200 | 201 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 202 | for gpu in gpus: 203 | tf.config.experimental.set_memory_growth(gpu, True) 204 | 205 | # ----------------------------------------------------# 206 | # 检测精度mAP和pr曲线计算参考视频 207 | # https://www.bilibili.com/video/BV1zE411u7Vw 208 | # ----------------------------------------------------# 209 | if __name__ == "__main__": 210 | # 标签的位置 211 | annotation_path = '2007_train.txt' 212 | # 获取classes和anchor的位置 213 | classes_path = 'model_data/new_class.txt' 214 | anchors_path = 'model_data/yolo_anchors_320.txt' 215 | # 预训练模型的位置 216 | weights_path = 'logs_12/ep075-loss8.043-val_loss7.786.h5' 217 | # 获得classes和anchor 218 | class_names = get_classes(classes_path) 219 | anchors = get_anchors(anchors_path) 220 | # 一共有多少类 221 | num_classes = len(class_names) 222 | num_anchors = len(anchors) 223 | # 训练后的模型保存的位置 224 | log_dir = 'logs_14/' 225 | # ----------------------------------------------# 226 | # 输入的shape大小 227 | # 显存比较小可以使用416x416 228 | # 现存比较大可以使用608x608 229 | # ----------------------------------------------# 230 | input_shape = (320, 320) 231 | mosaic = False 232 | Cosine_scheduler = False 233 | label_smoothing = 0 234 | 235 | # 清除session 236 | K.clear_session() 237 | 238 | # 输入的图像为 239 | #image_input = Input(shape=(None, None, 3)) 240 | image_input = Input(shape=(320, 320, 3)) 241 | h, w = input_shape 242 | 243 | # 创建yolo模型 244 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) 245 | model_body = yolo_body(image_input, num_anchors // 2, num_classes) 246 | 247 | model_body.summary() 248 | 249 | if not os.path.exists(log_dir): 250 | os.makedirs(log_dir) 251 | json_config = model_body.to_json() 252 | with open(log_dir + 'model_config.json', 'w') as json_file: 253 | json_file.write(json_config) 254 | # -------------------------------------------# 255 | # 权值文件的下载请看README 256 | # -------------------------------------------# 257 | print('Load weights {}.'.format(weights_path)) 258 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) 259 | 260 | # y_true为13,13,3,85 261 | # 26,26,3,85 262 | y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in 263 | range(2)] 264 | 265 | # 输入为*model_body.input, *y_true 266 | # 输出为model_loss 267 | loss_input = [*model_body.output, *y_true] 268 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 269 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 270 | 'label_smoothing': label_smoothing})(loss_input) 271 | 272 | model = Model([model_body.input, *y_true], model_loss) 273 | 274 | # 训练参数设置 275 | logging = TensorBoard(log_dir=log_dir) 276 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 277 | monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) 278 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 279 | 280 | #model_complexity_param = model_complexity.ModelParametersCallback(log_dir, verbose=1) 281 | 282 | # 0.1用于验证,0.9用于训练 283 | val_split = 0.1 284 | with open(annotation_path) as f: 285 | lines = f.readlines() 286 | np.random.seed(10101) 287 | np.random.shuffle(lines) 288 | np.random.seed(None) 289 | num_val = int(len(lines) * val_split) 290 | num_train = len(lines) - num_val 291 | 292 | freeze_layers = 60 293 | for i in range(freeze_layers): model_body.layers[i].trainable = False 294 | print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) 295 | 296 | # ------------------------------------------------------# 297 | # 主干特征提取网络特征通用,冻结训练可以加快训练速度 298 | # 也可以在训练初期防止权值被破坏。 299 | # Init_Epoch为起始世代 300 | # Freeze_Epoch为冻结训练的世代 301 | # Epoch总训练世代 302 | # 提示OOM或者显存不足请调小Batch_size 303 | # ------------------------------------------------------# 304 | if False: 305 | Init_epoch = 0 306 | Freeze_epoch = 0 307 | # batch_size大小,每次喂入多少数据 308 | batch_size = 16 309 | # 最大学习率 310 | learning_rate_base = 1e-3 311 | if Cosine_scheduler: 312 | # 预热期 313 | warmup_epoch = int((Freeze_epoch - Init_epoch) * 0.2) 314 | # 总共的步长 315 | total_steps = int((Freeze_epoch - Init_epoch) * num_train / batch_size) 316 | # 预热步长 317 | warmup_steps = int(warmup_epoch * num_train / batch_size) 318 | # 学习率 319 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 320 | total_steps=total_steps, 321 | warmup_learning_rate=1e-4, 322 | warmup_steps=warmup_steps, 323 | hold_base_rate_steps=num_train, 324 | min_learn_rate=1e-6 325 | ) 326 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 327 | else: 328 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 329 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 330 | 331 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 332 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 333 | steps_per_epoch=max(1, num_train // batch_size), 334 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, 335 | mosaic=False), 336 | validation_steps=max(1, num_val // batch_size), 337 | epochs=Freeze_epoch, 338 | initial_epoch=Init_epoch, 339 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 340 | model.save_weights(log_dir + 'trained_weights_stage_1.h5') 341 | 342 | for i in range(freeze_layers): model_body.layers[i].trainable = True 343 | 344 | # 解冻后训练 345 | if False: 346 | Freeze_epoch = 0 347 | Epoch = 1 348 | # batch_size大小,每次喂入多少数据 349 | batch_size = 16 350 | 351 | # 最大学习率 352 | learning_rate_base = 1e-5 353 | if Cosine_scheduler: 354 | # 预热期 355 | warmup_epoch = int((Epoch - Freeze_epoch) * 0.2) 356 | # 总共的步长 357 | total_steps = int((Epoch - Freeze_epoch) * num_train / batch_size) 358 | # 预热步长 359 | warmup_steps = int(warmup_epoch * num_train / batch_size) 360 | # 学习率 361 | reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 362 | total_steps=total_steps, 363 | warmup_learning_rate=1e-5, 364 | warmup_steps=warmup_steps, 365 | hold_base_rate_steps=num_train // 2, 366 | min_learn_rate=1e-6 367 | ) 368 | model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 369 | else: 370 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 371 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 372 | 373 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) 374 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 375 | steps_per_epoch=max(1, num_train // batch_size), 376 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, 377 | mosaic=False), 378 | validation_steps=max(1, num_val // batch_size), 379 | epochs=Epoch, 380 | initial_epoch=Freeze_epoch, 381 | callbacks=[logging, checkpoint, reduce_lr, early_stopping, model_complexity_param]) 382 | model.save_weights(log_dir + 'last1.h5') 383 | # pruner.export_model(model_path='pruned_vgg19_cifar10.h5', mask_path='mask_vgg19_cifar10.h5') 384 | json_config = model.to_json() 385 | with open(log_dir + 'model_config.json', 'w') as json_file: 386 | json_file.write(json_config) 387 | 388 | # 最大学习率 389 | learning_rate_base = 1e-3 390 | batch_size = 16 391 | output_dir = 'inception_flowers/' 392 | train_data_dir = output_dir + 'data/train/' 393 | validation_data_dir = output_dir + 'data/validation/' 394 | tuned_weights_path = output_dir + 'tuned_weights.h5' 395 | Epoch = 2 396 | val_batch_size = 16 397 | percent_pruning = 2 398 | total_percent_pruning = 50 399 | 400 | validation_generator = data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False) 401 | 402 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 403 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 404 | 405 | configure_list = [{ 406 | 'sparsity': 0.5, 407 | 'op_types': ['Conv2D'] 408 | }] 409 | 410 | configure_dict = {'sparsity': 0.5} 411 | pruner = FPGMPruner(model, configure_list) 412 | 413 | start = None 414 | end = None 415 | for layer in model.layers[start:end]: 416 | if layer.__class__.__name__ == 'Conv2D': 417 | print(layer.name) 418 | # if layer.name == 'conv2d_17': 419 | # a = pruner.calc_mask(layer, configure_dict) 420 | model = pruner.compress_model() 421 | #prun_a = pruner.compress_model() 422 | #min_gm_kernels = sorted(prun_a, key=lambda x: x[0])[:10] 423 | #min_gm_kernels_1 = [x[1] for x in min_gm_kernels] 424 | #surgeon = Surgeon(model, copy=False) 425 | #model = pruner.compress_model_1(channels_p=min_gm_kernels_1) 426 | #a=1 427 | 428 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 429 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 430 | steps_per_epoch=max(1, num_train // batch_size), 431 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, 432 | mosaic=False), 433 | validation_steps=max(1, num_val // batch_size), 434 | epochs=Epoch, 435 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 436 | 437 | model.save_weights(output_dir + '1' + '.h5') 438 | 439 | if False: 440 | del model 441 | tf.python.keras.backend.clear_session() 442 | ops.reset_default_graph() 443 | 444 | with open(r'F:\yolo\yolo_fastest_tf2\inception_flowers\model_config.json', 'r') as file: 445 | model_json1 = file.read() 446 | new_model = model_from_json(model_json1) 447 | new_model.load_weights(output_dir + '1' + '.h5', by_name=True, skip_mismatch=True) 448 | 449 | y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in 450 | range(2)] 451 | 452 | # 输入为*model_body.input, *y_true 453 | # 输出为model_loss 454 | loss_input = [*new_model.output, *y_true] 455 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', 456 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 457 | 'label_smoothing': label_smoothing})(loss_input) 458 | 459 | model = Model([new_model.input, *y_true], model_loss) 460 | 461 | # 训练参数设置 462 | logging = TensorBoard(log_dir=log_dir) 463 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', 464 | monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) 465 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) 466 | 467 | # model_complexity_param = model_complexity.ModelParametersCallback(log_dir, verbose=1) 468 | 469 | # 0.1用于验证,0.9用于训练 470 | val_split = 0.1 471 | with open(annotation_path) as f: 472 | lines = f.readlines() 473 | np.random.seed(10101) 474 | np.random.shuffle(lines) 475 | np.random.seed(None) 476 | num_val = int(len(lines) * val_split) 477 | num_train = len(lines) - num_val 478 | 479 | #model.save_weights(output_dir + '1' + '.h5') 480 | #json_config = model.to_json() 481 | #with open(output_dir + 'model_config.json', 'w') as json_file: 482 | # json_file.write(json_config) 483 | #model.save(output_dir + '1' + '.h5') 484 | #del model 485 | #tf.python.keras.backend.clear_session() 486 | #ops.reset_default_graph() 487 | #model = load_model(output_dir + '1' + '.h5') 488 | #with open(r'F:\yolo\yolo_fastest_tf2\inception_flowers\model_config.json', 'r') as file: 489 | # model_json1 = file.read() 490 | #new_model = model_from_json(model_json1) 491 | #new_model.load_weights(output_dir + '1' + '.h5') 492 | 493 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 494 | 495 | if False: 496 | total_channels = get_total_channels(model) 497 | n_channels_delete = int(math.floor(percent_pruning / 100 * total_channels)) 498 | 499 | # Incrementally prune the network, retraining it each time 500 | percent_pruned = 0 501 | # If percent_pruned > 0, continue pruning from previous checkpoint 502 | if percent_pruned > 0: 503 | checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) 504 | + 'percent') 505 | model = load_model(output_dir + checkpoint_name + '.h5') 506 | 507 | while percent_pruned <= total_percent_pruning: 508 | # Prune the model 509 | apoz_df = get_model_apoz(model, validation_generator) 510 | percent_pruned += percent_pruning 511 | print('pruning up to ', str(percent_pruned), 512 | '% of the original model weights') 513 | model = prune_model(model, apoz_df, n_channels_delete) 514 | 515 | # Clean up tensorflow session after pruning and re-load model 516 | checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) 517 | + 'percent') 518 | model.save(output_dir + checkpoint_name + '.h5') 519 | del model 520 | tensorflow.python.keras.backend.clear_session() 521 | tf.reset_default_graph() 522 | model = load_model(output_dir + checkpoint_name + '.h5') 523 | 524 | # Re-train the model 525 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) 526 | model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) 527 | checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) 528 | + 'percent') 529 | #csv_logger = CSVLogger(output_dir + checkpoint_name + '.csv') 530 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 531 | steps_per_epoch=max(1, num_train // batch_size), 532 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, 533 | mosaic=False), 534 | validation_steps=max(1, num_val // batch_size), 535 | epochs=Epoch, 536 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 537 | 538 | model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), 539 | steps_per_epoch=max(1, num_train // batch_size), 540 | validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, 541 | mosaic=False), 542 | validation_steps=max(1, num_val // batch_size), 543 | epochs=Epoch, 544 | callbacks=[logging, checkpoint, reduce_lr, early_stopping]) 545 | --------------------------------------------------------------------------------